#!/usr/bin/perl -w # notes2html: Convert inline notes to HTML displayed as popups or footnotes. # This program takes a Perl program that has been annotated with inline # comments starting with '##', '###', etc., and generates an HTML-ized # version of the program, with note references that are either internal # links to footnotes or links to open a new popup window for each note. # See the documentation below for more information. # Author: Frank Hecker # Version: 0.94 # License: MIT license (see below). use strict; package notes2html; # Global variables, used in variable interpolation of templates. use vars qw/$html_title $link_title $link_note_num/; my $cmd_name = $0; # name of program for error messages $cmd_name =~ s!^.*/([^\.]*)(?:\..*)?$!$1!; my $cmd_version = "0.94"; my %template = (); # hash for HTML/JavaScript templates my @notes = (); # stores footnotes as needed; my $note_num = 0; # last note stored (counting from 1) my $pending_ref = 0; # earliest note not yet referenced my @pending_titles = (); # titles for pending links my $ref_col = 73; # print references starting this column my $ifh; # filehandle to read program from my $ofh; # filehandle to write HTML to my $html_written = 0; # Any HTML written yet? use Text::Textile qw/textile/; # to generate HTML from note text my $textile = new Text::Textile; $textile->flavor('html'); # instead of the default XHTML use Text::Tabs; # to expand tabs in input $tabstop = 8; # use standard tab stops # Parse argument list and set options appropriately. my ($do_popups, $line_num, $fold, $title, $outfile, $infile) = parse_args(); $html_title = $title; # save in global for interpolation # If we're folding adjust the column to start note reference comments in. $fold and $ref_col = $fold - 7; # Open input file (or use stdin if not specified). if ($infile) { # Try to open the specified file for reading. open ($ifh, $infile) or die "$cmd_name: cannot open $infile: $!"; $html_title or $html_title = $infile and $html_title =~ s!^.*/(.*)!$1!; } else { $ifh = \*STDIN; } # Open output file (or use stdout if not specified). if ($outfile) { # Try to open the specified file for writing. open ($ofh, ">", $outfile) or die "$cmd_name: cannot open $outfile: $!"; } else { $ofh = \*STDOUT; } # Read in templates to use in generating HTML and JavaScript. # (This code and the idea of using templates were taken from Blosxom). while () { last if /^(__END__)?$/; my ($template_name, $template_contents) = /^(\S+)\s(.*)$/; $template_contents =~ s/\\n/\n/mg; $template{$template_name} = $template_contents; } # Read the file a line at a time and look for an inline note. Convert notes # to HTML and either output them as JavaScript variables (for popups) or # save them ourselves (for footnotes). Add note reference links to the next # nonblank code line. while (<$ifh>) { # Expand tabs so that note references go in the right column. $_ = expand($_); # Start generating HTML only when we know we have output. # Interpolate Perl variables to customize the output. unless ($html_written) { print {$ofh} interpolate($template{'html_begin'}); print {$ofh} interpolate($template{'js_popup'}) if ($do_popups); print {$ofh} interpolate($template{'code_begin'}); $html_written = 1; } # Inline notes start with '## '. '### ', and so on. if (/^(\#\#+)/) { # first line of an inline note # Mark this note for future reference and look for more note lines. my $note_marker = $1; # save to match further lines $note_num++; $pending_ref = $note_num unless $pending_ref; # Strip off note marker and trailing whitespace (including newline). # (We leave leading whitespace because it might be intentional.) s/^$note_marker\s(.*)\s*$/$1/; my $note = $_; # Read the next lines to see if they are part of the note as well. my $read_too_far = 0; while (<$ifh>) { unless (/^$note_marker\s/) { $read_too_far = 1; # not part of this note last; } # Add this line to the current note after removing note marker. s/^$note_marker\s(.*)\s*$/$1/; $note .= "\n" . $_; } # Remove blank lines at the beginning and end of the note. $note =~ s/^\n*(.*)/$1/; $note =~ s/(.*)\n*$/$1/; $note .= "\n"; # put final newline back on # Convert note to HTML and save part of it for link title. $note = note_html($note); push @pending_titles, escape_html_specials(link_title($note)); # For popup notes, convert the note to HTML and generate JavaScript # code to save the note in a JavaScript variable that we can pass # to the routine creating the popup window for the note. # Otherwise save the note ourselves to print footnotes later. if ($do_popups) { print {$ofh} interpolate($template{'js_begin'}); print {$ofh} " notes.push('" . escape_string($note) . "');\n"; print {$ofh} interpolate($template{'js_end'}); } else { push @notes, $note; } # If we read one line too many, handle it in the normal way # without trying to read another line first. redo if $read_too_far; } elsif (/^\s$/ || !$pending_ref) { # Blank code line, or no references # Print the code line, with line numbering and folding if specified. print_line($ofh, $_, $line_num, $fold); $line_num and $line_num++; } else { # Code line with note references # Print the code line, including links for all pending references, # with line numbering and folding if specified. print_line($ofh, $_, $line_num, $fold, $pending_ref, $note_num); $line_num and $line_num++; $pending_ref = 0; @pending_titles = (); } } # If we wrote out any code then output closing HTML tag(s) for it. print {$ofh} interpolate($template{'code_end'}) if $html_written; # Write notes as footnotes if specified. unless ($do_popups) { $link_note_num = 1; my $note; foreach $note (@notes) { print {$ofh} interpolate($template{'footnote_header'}); print {$ofh} $note . "\n"; $link_note_num++; } } # Write final HTML tags if needed. print {$ofh} interpolate($template{'html_end'}) if $html_written; # All done. close $ifh; close $ofh; exit 0; # Parse @ARGV arguments to program. sub parse_args { my $popups = 1; # do popup notes? my $line_nums = 0; # print line numbers? my $fold = 0; # fold long lines? my $title = ""; # use in titles, note headers my $outfile = ""; # file to open for output my $infile = ""; # file to open for input use Getopt::Long # Perl getopt implementation qw/GetOptions HelpMessage/; use Pod::Usage; # Use Pod to print usage info GetOptions( 'popups|p' => \$popups, 'footnotes|f' => sub { $popups = 0 }, 'line-numbers|l' => \$line_nums, 'fold=n' => \$fold, 'version|V' => sub { print "$cmd_name $cmd_version\n"; exit 0; }, 'help|?' => sub { HelpMessage() }, 'man|m' => sub { pod2usage(-exitstatus => 0, -verbose => 2) }, 'title|t=s' => \$title, 'outfile|o=s' => \$outfile, ) or pod2usage(2); # At this point we should just have the one (optional) input file. @ARGV and $infile = $ARGV[0] and shift @ARGV; if (@ARGV) { print "Too many arguments\n"; pod2usage(1); } # '-' means read from stdin (or write to stdout if used with --outfile). $infile eq '-' and $infile = ""; $outfile eq '-' and $outfile = ""; # Enforce a minimum limit on how much you can fold code lines. if ($fold and $fold < 24) { print "$cmd_name: Cannot fold to less than 24 columns\n"; pod2usage(1); } return $popups, $line_nums, $fold, $title, $outfile, $infile; } # Convert the note contents to HTML for use in a popup or footnote. sub note_html { my ($note_text) = @_; my $note_html = ""; my $note_para; my @uri_schemes = # Types of URLs we create links for qw/ftp http gopher mailto news telnet wais ldap https/; my $scheme_re = # alteration regexp for URL types join '|', @uri_schemes; my @browser_schemes = # URLs typically handled by browser qw/ftp http gopher wais ldap https/; my $bs_re = join '|', @browser_schemes; # We convert the note's contents a paragraph at a time. foreach $note_para (split /\n\s*\n/, $note_text) { chomp($note_para); if ($note_para =~ /^\s/) { # indented # The paragraph represents preformatted content; we process it # ourselves by escaping characters special to HTML and converting # newlines to
tags (in that order!). $note_para = escape_html_specials($note_para); $note_para =~ s/\n/
/g; # If the line contains URLs (of known types) we generate links. $note_para =~ s!((?:$scheme_re):[^\s><"{}|\\^[\]`]+)!$1!ig; $note_html .= "\n
\n" . $note_para . "\n
\n"; } elsif ($note_para =~ /^[*\#]+\s/) { # E.g., "# ..." or "* ..." # The paragraph represents a bulleted or numbered list; we # convert each list item to a single line and process the whole # list using Textile. $note_para =~ s/\n([^*\#])/ $1/g; $note_html .= $textile->process($note_para) . "\n" } else { # The paragraph represents a normal block of text; we convert # the paragraph to a single line and process the line using # Textile to produce HTML enclosed in

...

tags. $note_para =~ s/\n/ /g; $note_html .= "\n" . $textile->process($note_para) . "\n"; } } # Fix up popup notes to open links in new windows, for those links # that would typically be displayed by the browser in the current window. $note_html =~ s///ig if $do_popups; return $note_html; } # Print a code line with references if any, possibly folded and/or numbered. sub print_line { my ($ofh, $line, $line_num, $fold, $pending_ref, $current_ref) = @_; # We assume that $line is a single newline-terminated line with no tabs, # backspaces, or other control characters, and no multi-byte characters. # This is a valid assumption in the context of this program. chomp $line; # don't count newline when folding # Build the string of note references, and keep track of its length # as it will be displayedi, so we can fold if necessary. my $ref_str = ""; my $ref_str_len = 0; if ($pending_ref) { $ref_str = " \#"; $ref_str_len += 1; while ($pending_ref <= $current_ref) { # See how long reference will display, and if we are folding # start a new line if necessary. $ref_str_len += length($pending_ref) + 3; # ' [nn]' if ($fold && $ref_col + $ref_str_len - 1 > $fold) { $ref_str .= "\n" . ' ' x ($ref_col - 1) . "\#"; $ref_str_len = length($pending_ref) + 4; # reset } # Add links (of the proper type) pointing to the notes, # with title and note number interpolated as global variables. $link_title = shift @pending_titles; $link_note_num = $pending_ref; $ref_str .= " [" . interpolate($template{$do_popups ? 'popup_link' : 'footnote_link'}) . "]"; $pending_ref++; } } # Keep track of the length of the last line we will display for this code # line so we can properly compute where to put the note references. my $last_line_len = length($line); # If folding is requested, do it, breaking on spaces where possible. if ($fold) { my $folded_line = ""; while (length($line) > $fold) { my $last_space = rindex(substr($line,0,$fold), ' '); if ($last_space < 0) { # no space prior to fold point $folded_line .= substr($line, 0, $fold) . "\n"; $line = substr($line, $fold); } else { $folded_line .= substr($line, 0, $last_space) . "\n"; $line = substr($line, $last_space+1); } } $folded_line .= $line; # what's left over after folding $last_line_len = length($line); $line = $folded_line; } # Replace HTML special characters with character entities. # Note: This must be done *before* adding the note reference links. $line = escape_html_specials($line); # Add any references to the last line displayed if there's room, # otherwise if we're folding display another line with just the references. # Note that $ref_col is the column in which we wish to start the comment # containing the references (i.e., the '#' character). However we want # to separate the comment by at least two spaces from the code itself; # these extra spaces are already included in $ref_str. if ($ref_str) { if ($fold && $last_line_len >= $ref_col-2) { $line .= "\n"; # start new line just for references $last_line_len = 0; } my $padding = $last_line_len >= $ref_col-2 ? 0 : $ref_col - $last_line_len - 3; $line .= " " while ($padding--); $line .= $ref_str; } # Add line numbers if requested. if ($line_num) { my $line_num_str = sprintf("%5d", $line_num); $line =~ s/^/ /gm; # put in enough space for line number $line =~ s/^ {5}/$line_num_str/; } print {$ofh} $line . "\n"; } # Create a note link title given a note (in HTML form). sub link_title { my ($note) = @_; my $link_title = ""; # Look for the first non-blank line, strip HTML tags out, and then # return up to the first 10 words. foreach $link_title (split "\n", $note) { if ($link_title) { $link_title =~ s/<[^>]*>//g; $link_title =~ s/^((?:\S+\s){10}).*$/$1.../; return $link_title; } } return ""; } # Make sure string is suitable for HTML output by replacing special # characters like '&' with the corresponding HTML character entities.. sub escape_html_specials { my ($string) = @_; my %character_entities = ( # Character entities for HTML specials '<' => '<', '>' => '>', '&' => '&', '"' => '"', ); my $specials_re = # alteration regexp for HTML specials join '|', keys %character_entities; $string =~ s/($specials_re)/$character_entities{$1}/g; return $string; } # Make sure string is suitable for a JavaScript string literal. sub escape_string { my ($string) = @_; # Escape any quote and backslash characters in the string. # (Since the string may be used in JavaScript delimited with either # a single quote or a double quote, make sure we escape both.) $string =~ s/(\\|\'|\")/\\$1/g; # Escape any newlines. $string =~ s/\n/\\n/g; # Escape '/' in '\n\n\n\n$title\n\n\n

$title

\n html_end \n\n code_begin
\n
code_end 
\n js_begin js_popup \n popup_link
$link_note_num footnote_link $link_note_num footnote_header \n

Note $link_note_num

\n __END__ =head1 NAME notes2html - Converts annotated Perl code to HTML with popup notes or footnotes =head1 SYNOPSIS notes2html [options] [file] Options: --popups, -p display notes as popup windows --footnotes, -f display notes as footnotes at end of document --line-numbers, -l print line numbers for each line of code --fold=n fold code lines longer than n columns (n >= 24) --title, -t specify title for HTML document and popup notes --outfile=file specify output file (otherwise stdout) --help, -? brief help message --man, -m documentation --version, -V print program version =head1 OPTIONS =over 8 =item B<--popups>, B<-p> Display notes as popup windows. This option may not be specified together with --footnotes. =item B<--footnotes>, B<-f> Display notes as footnotes at the end of the HTML document. This option may not be specified together with --popups. =item B<--line-numbers>, B<-l> Print a line number at the beginning of each line of code. =item B<--fold=n>, Fold code lines that are longer than I columns and print them on multiple lines of not more than I columns each, breaking at space characters where possible. Note that line numbers are not included in the folding calculations, so code lines will fold the same way whether you use the --line-numbers option or not. =item B<--title=string>, B<-t=string> Specifies a string to be used for the HTML document's C<< >> and C<< <H1> >> header; the string is also used in constructing the title and header for popup notes. If this option is not specified then the input file's filename will be used in titles and headers. =item B<--outfile=file>, B<-o=file> Specifies a file to which the HTML document will be written. If this option is not specified then the HTML document will be written to standard output. =item B<--help>, B<-?> Print a brief help message and exits. =item B<--man>, B<-m> Prints the program's documentation and exits. =item B<--version>, B<-V> Prints the program's version information and exits. =back =head1 DESCRIPTION B<notes2html> will read a Perl program annotated with inline notes and will create an HTML document that will display the original code with added note references that link to the notes themselves. The notes can be displayed either in separate popup windows or as footnotes at the end of the HTML document. The syntax of the inline notes is intended to be reasonably simple yet allow for special features where desired. It is also designed to be unobtrusive: if you'd like you can leave the inline notes in the program and they won't affect its execution; otherwise you can use C<grep -v '^##'> to produce a version of the program without the notes. Here are some example inline notes to illustrate the syntax: ## This is an inline note referring to the Perl code line below ## that opens a file; inline notes are just Perl comments starting ## with two or more '#' characters and followed by at least one space. ## (You can also add an inline note to normal Perl comments as well.) open ($ifh, $input_fn) or die "$cmd: cannot open $input_fn: $!"; ## You can also provide more than one note reference for a given ## line of code. ### All you have to do is change the number of initial '#' characters. while (<$ifh>) { ## Text paragraphs in notes are processed using Textile, so you can ## use simple markup, e.g., to emphasize *important things* or to ## cite documents like ??Programming in Perl??, or full HTML markup ## for such things as Perl expressions (like <code>$foo + 2</code>). ## A text paragraph is concluded with a "blank" comment as follows ## (or by a code line). ## ## You can also include preformatted text to refer to one or more ## lines of Perl code (or other content intended to be included ## verbatim) as part of the inline note; as with Pod, preformatted ## text is marked by indenting it more than a regular text paragraph. ## ## $str =~ s/($escape_re)/$escape{$1}/g; ## ## You can include URLs in the preformatted text and they will be ## automatically converted to links; for example, see ## ## http://www.example.com/an-example.html ## ftp://ftp.example.com/pub/foo.tar.gz ## ## You can also include HTML links in regular text paragraphs, ## either by using the standard HTML <A> tag or by using the ## Textile syntax "text of link":http://www.example.com/foo.html. ## ## You can include bulleted lists or numbered lists using ## the standard Textile markup conventions: ## ## * Bulleted list items start with '*'. ## * Don't indent them, or they'll be taken as preformatted text! ## ** You can have sublists... ## *** ...or even sub-sublists ## ## # Mark numbered list items with an initial '#'. ## # Textile will add numbers automatically. ## ## h3. A Final Note ## ## Last but not least, you can use "h3.", "h4.", etc., to provide ## a header for the beginning of an inline note and subheaders ## within it. The inline notes do not appear as part of the Perl code in the generated HTML. Instead each code line with an inline note (or notes) preceding it is displayed with a righthand Perl comment including note references in the form '[123]'; the note number is either an internal link pointing to a footnote at the end of the document, or a link to display a popup window containing the note. The generated HTML document and the popup note windows contain only standard HTML, and should properly validate unless you've included non-standard HTML markup in your inline notes. In particular, the main HTML document should validate as HTML 4.01 Strict, while the popup notes should validate as HTML 4.01 Transitional. (In popup notes we use the C<TARGET> attribute of the C<< <A> >> tag, which is not permitted under the HTML 4.01 Strict DTD.) One common problem for HTML validation is including something in a text paragraph that looks like an HTML tag, e.g., referring to a filehandle C<< <FOO> >>; in cases like that you'll need to use the HTML character entities C<<> and C<>>, e.g., write C<<FOO>> instead. (Note that this is not a problem in preformatted text, where the escaping of HTML special characters is done automatically.) =head1 IMPLEMENTATION NOTES We employ two separate approaches to storing note-related information and generating HTML for notes. When notes are to be displayed as footnotes (i.e., at the end of the HTML document) the approach is straightforward: save all notes in a Perl array as we encounter them in the input, and then iterate over the array to generate HTML for the notes after we have generated HTML for all the code lines. The approach for handling popup notes is more complicated: As notes are encountered we do I<not> save them in Perl; rather we generate JavaScript code to save the HTML for the notes in a JavaScript array in the context of the HTML document itself. That array can then be referenced in a JavaScript function C<popup_note> invoked by the C<onClick> handler for the links associated with note references. Another possible approach would be to pass the HTML for the note as a JavaScript string literal directly to the C<popup_note> function. This would eliminate the need to use a JavaScript array but would introduce a great deal of complexity in terms of how to properly escape characters in the string literal, since the string literal would be interpreted in a mixed HTML/JavaScript context (i.e., the C<onClick> attribute of an C<< <A> >> tag). Using a JavaScript variable reduces the complexity, since the literal is interpreted in more of a pure JavaScript context (i.e., within an HTML comment between C<< <SCRIPT> >> and C<< </SCRIPT> >> tags). =head1 BUGS The content of popup notes does not display when using Microsoft Internet Explorer for Mac OS X. (Instead the word "undefined" is displayed where the note contents should be.) It is not possible to specify the --fold option and let the number of columns default to some reasonable value (e.g., to 80 columns); you must always explicitly specify the number of columns. (This is due to the limitations of the Getopt::Long module.) The code to recognize URLs in the preformatted parts of inline notes will not recognize URLs with embedded whitespace. Also, the code recognizes only a limited subset of the URI schemes recognized by the IANA (see L<http://www.iana.org/assignments/uri-schemes>). If a note is included at the very end of the program (i.e., there are no program lines after the note) then a reference to the note will never be printed. The workaround is to put a standard comment after the note. For consistency of style B<notes2html> generates an initial header for each popup note using an C<< <H2> >> tag, not an C<< <H1> >> tag, to match the C<< <H2> >> tag used to introduce a footnote. (An C<< <H1> >> header is generated as a title for the code itself.) Using the Textile markup C<h1.> or C<h2.> within inline notes will produce headers which look odd and don't properly reflect structural markup within the generated HTML; you should use only C<h3.> (or higher) headers within inline notes. The C<interpolate> subroutine within B<notes2html> does not check to see whether interpolated variables in the HTML template strings are defined or not. This is an issue only if you modify B<notes2html> to change the template strings. In several places (e.g., the folding algorithm) the code is somewhat kludgy and doesn't necessarily evince proper Perl style. Suggestions for improvement are welcome. =head1 TO DO Here are some ideas for ways in which this program could be enhanced and extended: =over =item Allow more control over popup note size and placement, either at the time the HTML document is generated or (ideally) at runtime. For example, the size and placement of the popup window could be computed taking into account the size of the user's screen and the size and placement of the main document window; if there's enough room the popup window should go to the immediate right of the note reference in the main window, placed far enough over so as not to obscure the code in the main window. It would be even nicer to let users move and resize a popup note to their liking, and then to reuse that size and placement for subsequent popup notes. Allow a note to refer to another note using a symbolic name that would be converted to a note number in the generated HTML. Such references should also produce properly behaving links; i.e., a link in a footnote to another note should be an internal link, while a link in a popup note to another note should pop up a new window. In the meantime you can use the C<--line-numbers> option and have a note refer to, e.g., the note for line 123. =item Support other languages by changing the particular comment markers that are recognized and the comments for note references that are generated. In the meantime you can annotate Unix shell scripts and other files that use '#' to mark comments. =item Support multiple files as input, producing a single file as output, including appropriate HTML headers to separate the code for the various files. In the meantime you can cat the files together before processing them with B<notes2html>, adding some intermediate files that contain blank lines and comments to provide spacing between the code files. =item Support specifying your own CSS styles for the generated HTML document and/or for popup note windows. (At present the CSS styles are hard-coded in the HTML template strings; the styles are loosely based on the styles used on the Perl documentation website.) =item Support marking up inline notes using Pod instead of Textile. Note that this would I<not> be the same as using Pod in the standard way, i.e., including Pod commands within the source file and then using a converter like B<pod2html> to process the entire file as a whole. Rather this refers to using B<notes2html> to recognize inline notes as described above, with each inline note (after stripping the initial '#' characters) containing a section of Pod markup to be processed independently of the Pod markup in any other note. =back =head1 SEE ALSO For more information on Textile markup see http://www.textism.com/tools/textile Note that Text::Textile does not implement the full Textile specification; for example, it does not support using Textile markup for mailto: URLs. =head1 VERSION Version 0.94 (June 27, 2004). =head1 AUTHOR Created by Frank Hecker <hecker@hecker.org>. A few bits of code were inspired by and/or adapted from Rael Dornfest's Blosxom code. =head1 COPYRIGHT AND LICENSE Copyright 2004 Frank Hecker Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut