Annotated blosxom.cgi

    1  #!/usr/bin/perl
    2  
    3  # Blosxom                                                               # [1] [2] [3]
    4  # Author: Rael Dornfest <rael@oreilly.com>
    5  # Version: 2.0
    6  # Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/
    7  
    8  package blosxom;                                                        # [4]
    9  
   10  # --- Configurable variables -----                                      # [5]
   11  
   12  # What's this blog's title?
   13  $blog_title = "My Weblog";                                              # [6]
   14  
   15  # What's this blog's description (for outgoing RSS feed)?
   16  $blog_description = "Yet another Blosxom weblog.";
   17  
   18  # What's this blog's primary language (for outgoing RSS feed)?
   19  $blog_language = "en";
   20  
   21  # Where are this blog's entries kept?
   22  $datadir = "/Library/WebServer/Documents/blosxom";
   23  
   24  # What's my preferred base URL for this blog (leave blank for automatic)?
   25  $url = "";
   26  
   27  # Should I stick only to the datadir for items or travel down the
   28  # directory hierarchy looking for items?  If so, to what depth?
   29  # 0 = infinite depth (aka grab everything), 1 = datadir only, n = n levels down
   30  $depth = 0;
   31  
   32  # How many entries should I show on the home page?
   33  $num_entries = 40;
   34  
   35  # What file extension signifies a blosxom entry?
   36  $file_extension = "txt";
   37  
   38  # What is the default flavour?
   39  $default_flavour = "html";
   40  
   41  # Should I show entries from the future (i.e. dated after now)?
   42  $show_future_entries = 0;
   43  
   44  # --- Plugins (Optional) -----
   45  
   46  # Where are my plugins kept?
   47  $plugin_dir = "";
   48  
   49  # Where should my modules keep their state information?
   50  $plugin_state_dir = "$plugin_dir/state";
   51  
   52  # --- Static Rendering -----
   53  
   54  # Where are this blog's static files to be created?
   55  $static_dir = "/Library/WebServer/Documents/blog";
   56  
   57  # What's my administrative password (you must set this for static rendering)?
   58  $static_password = "";
   59  
   60  # What flavours should I generate statically?
   61  @static_flavours = qw/html rss/;                                        # [7]
   62  
   63  # Should I statically generate individual entries?
   64  # 0 = no, 1 = yes
   65  $static_entries = 0;
   66  
   67  # --------------------------------
   68  
   69  use vars qw! $version $blog_title $blog_description $blog_language $datadir $url %template $template $depth $num_entries $file_extension $default_flavour $static_or_dynamic $plugin_dir $plugin_state_dir @plugins %plugins $static_dir $static_password @static_flavours $static_entries $path_info $path_info_yr $path_info_mo $path_info_da $path_info_mo_num $flavour $static_or_dynamic %month2num @num2month $interpolate $entries $output $header $show_future_entries %files %indexes %others !;  # [8]
   70  
   71  use strict;                                                             # [9]
   72  use FileHandle;                                                         # [10] [11]
   73  use File::Find;                                                         # [12]
   74  use File::stat;                                                         # [13]
   75  use Time::localtime;                                                    # [14]
   76  use CGI qw/:standard :netscape/;                                        # [15]
   77  
   78  $version = "2.0";                                                       # [16]
   79  
   80  my $fh = new FileHandle;                                                # [17] [18]
   81  
   82  %month2num = (nil=>'00', Jan=>'01', Feb=>'02', Mar=>'03', Apr=>'04', May=>'05', Jun=>'06', Jul=>'07', Aug=>'08', Sep=>'09', Oct=>'10', Nov=>'11', Dec=>'12');  # [19]
   83  @num2month = sort { $month2num{$a} <=> $month2num{$b} } keys %month2num;  # [20]
   84  
   85  # Use the stated preferred URL or figure it out automatically
   86  $url ||= url();                                                         # [21] [22]
   87  $url =~ s/^included:/http:/; # Fix for Server Side Includes (SSI)       # [23] [24]
   88  $url =~ s!/$!!;                                                         # [25]
   89  
   90  # Drop ending any / from dir settings
   91  $datadir =~ s!/$!!; $plugin_dir =~ s!/$!!; $static_dir =~ s!/$!!;
   92    
   93  # Fix depth to take into account datadir's path
   94  $depth and $depth += ($datadir =~ tr[/][]) - 1;                         # [26] [27]
   95  
   96  # Global variable to be used in head/foot.{flavour} templates
   97  $path_info = '';
   98  
   99  $static_or_dynamic = (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) ? 'static' : 'dynamic';  # [28] [29]
  100  $static_or_dynamic eq 'dynamic' and param(-name=>'-quiet', -value=>1);  # [30]
  101  
  102  # Path Info Magic
  103  # Take a gander at HTTP's PATH_INFO for optional blog name, archive yr/mo/day
  104  my @path_info = split m{/}, path_info() || param('path');               # [31] [32]
  105  shift @path_info;                                                       # [33]
  106  
  107  while ($path_info[0] and $path_info[0] =~ /^[a-zA-Z].*$/ and $path_info[0] !~ /(.*)\.(.*)/) { $path_info .= '/' . shift @path_info; }  # [34] [35]
  108  
  109  # Flavour specified by ?flav={flav} or index.{flav}
  110  $flavour = '';
  111  
  112  if ( $path_info[$#path_info] =~ /(.+)\.(.+)$/ ) {                       # [36] [37]
  113    $flavour = $2;                                                        # [38]
  114    $1 ne 'index' and $path_info .= "/$1.$2";                             # [39]
  115    pop @path_info;                                                       # [40]
  116  } else {
  117    $flavour = param('flav') || $default_flavour;                         # [41]
  118  }
  119  
  120  # Strip spurious slashes
  121  $path_info =~ s!(^/*)|(/*$)!!g;                                         # [42]
  122  
  123  # Date fiddling
  124  ($path_info_yr,$path_info_mo,$path_info_da) = @path_info;               # [43] [44]
  125  $path_info_mo_num = $path_info_mo ? ( $path_info_mo =~ /\d{2}/ ? $path_info_mo : ($month2num{ucfirst(lc $path_info_mo)} || undef) ) : undef;  # [45] [46]
  126  
  127  # Define standard template subroutine, plugin-overridable at Plugins: Template
  128  $template =                                                             # [47] [48]
  129    sub {
  130      my ($path, $chunk, $flavour) = @_;                                  # [49] [50]
  131  
  132      do {                                                                # [51]
  133        return join '', <$fh> if $fh->open("< $datadir/$path/$chunk.$flavour");  # [52]
  134      } while ($path =~ s/(\/*[^\/]*)$// and $1);                         # [53]
  135  
  136      return join '', ($template{$flavour}{$chunk} || $template{error}{$chunk} || '');  # [54]
  137    };
  138  # Bring in the templates
  139  %template = ();                                                         # [55]
  140  while (<DATA>) {                                                        # [56] [57]
  141    last if /^(__END__)?$/;                                               # [58]
  142    my($ct, $comp, $txt) = /^(\S+)\s(\S+)\s(.*)$/;                        # [59]
  143    $txt =~ s/\\n/\n/mg;                                                  # [60]
  144    $template{$ct}{$comp} = $txt;                                         # [61]
  145  }
  146  
  147  # Plugins: Start
  148  if ( $plugin_dir and opendir PLUGINS, $plugin_dir ) {                   # [62]
  149    foreach my $plugin ( grep { /^\w+$/ && -f "$plugin_dir/$_"  } sort readdir(PLUGINS) ) {  # [63]
  150      my($plugin_name, $off) = $plugin =~ /^\d*(\w+?)(_?)$/;              # [64]
  151      my $on_off = $off eq '_' ? -1 : 1;                                  # [65]
  152      require "$plugin_dir/$plugin";                                      # [66]
  153      $plugin_name->start() and ( $plugins{$plugin_name} = $on_off ) and push @plugins, $plugin_name;  # [67]
  154    }
  155    closedir PLUGINS;                                                     # [68]
  156  }
  157  
  158  # Plugins: Template
  159  # Allow for the first encountered plugin::template subroutine to override the
  160  # default built-in template subroutine
  161  my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('template') and defined($tmp = $plugin->template()) and $template = $tmp and last; }  # [69] [70] [71]
  162  
  163  # Provide backward compatibility for Blosxom < 2.0rc1 plug-ins
  164  sub load_template {                                                     # [72]
  165    return &$template(@_);
  166  }
  167  
  168  # Define default find subroutine
  169  $entries =                                                              # [73]
  170    sub {
  171      my(%files, %indexes, %others);                                      # [74]
  172      find(                                                               # [75]
  173        sub {
  174          my $d; 
  175          my $curr_depth = $File::Find::dir =~ tr[/][];                   # [76]
  176          return if $depth and $curr_depth > $depth;                      # [77]
  177       
  178          if (                                                            # [78]
  179            # a match
  180            $File::Find::name =~ m!^$datadir/(?:(.*)/)?(.+)\.$file_extension$!  # [79]
  181            # not an index, .file, and is readable
  182            and $2 ne 'index' and $2 !~ /^\./ and (-r $File::Find::name)  # [80]
  183          ) {
  184  
  185              # to show or not to show future entries                     # [81]
  186              (                                                           # [82]
  187                $show_future_entries
  188                or stat($File::Find::name)->mtime < time 
  189              )
  190  
  191                # add the file and its associated mtime to the list of files
  192                and $files{$File::Find::name} = stat($File::Find::name)->mtime  # [83]
  193  
  194                  # static rendering bits
  195                  and (                                                   # [84]
  196                    param('-all')                                         # [85]
  197                    or !-f "$static_dir/$1/index." . $static_flavours[0]  # [86]
  198                    or stat("$static_dir/$1/index." . $static_flavours[0])->mtime < stat($File::Find::name)->mtime  # [87]
  199                  )
  200                    and $indexes{$1} = 1                                  # [88]
  201                      and $d = join('/', (nice_date($files{$File::Find::name}))[5,2,3])  # [89]
  202    
  203                        and $indexes{$d} = $d                             # [90]
  204                          and $static_entries and $indexes{ ($1 ? "$1/" : '') . "$2.$file_extension" } = 1  # [91]
  205  
  206              } 
  207              else {
  208                !-d $File::Find::name and -r $File::Find::name and $others{$File::Find::name} = stat($File::Find::name)->mtime  # [92]
  209              }
  210        }, $datadir                                                       # [93]
  211      );
  212  
  213      return (\%files, \%indexes, \%others);                              # [94]
  214    };
  215  
  216  # Plugins: Entries
  217  # Allow for the first encountered plugin::entries subroutine to override the
  218  # default built-in entries subroutine
  219  my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('entries') and defined($tmp = $plugin->entries()) and $entries = $tmp and last; }  # [95]
  220  
  221  my ($files, $indexes, $others) = &$entries();                           # [96]
  222  %files = %$files; %indexes = %$indexes; %others = ref $others ? %$others : ();  # [97]
  223  
  224  # Plugins: Filter
  225  foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('filter') and $entries = $plugin->filter(\%files, \%others) }  # [98]
  226  
  227  # Static
  228  if (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) {  # [99]
  229  
  230    param('-quiet') or print "Blosxom is generating static index pages...\n";  # [100]
  231  
  232    # Home Page and Directory Indexes
  233    my %done;                                                             # [101]
  234    foreach my $path ( sort keys %indexes) {                              # [102]
  235      my $p = '';                                                         # [103]
  236      foreach ( ('', split /\//, $path) ) {                               # [104]
  237        $p .= "/$_";                                                      # [105]
  238        $p =~ s!^/!!;
  239        $path_info = $p;                                                  # [106]
  240        $done{$p}++ and next;                                             # [107] [108]
  241        (-d "$static_dir/$p" or $p =~ /\.$file_extension$/) or mkdir "$static_dir/$p", 0755;  # [109]
  242        foreach $flavour ( @static_flavours ) {                           # [110]
  243          my $content_type = (&$template($p,'content_type',$flavour));    # [111]
  244          $content_type =~ s!\n.*!!s;
  245          my $fn = $p =~ m!^(.+)\.$file_extension$! ? $1 : "$p/index";    # [112]
  246          param('-quiet') or print "$fn.$flavour\n";
  247          my $fh_w = new FileHandle "> $static_dir/$fn.$flavour" or die "Couldn't open $static_dir/$p for writing: $!";    # [113]
  248          $output = '';                                                   # [114]
  249          print $fh_w                                                     # [115]
  250            $indexes{$path} == 1
  251              ? &generate('static', $p, '', $flavour, $content_type)      # [116]
  252              : &generate('static', '', $p, $flavour, $content_type);
  253          $fh_w->close;                                                   # [117]
  254        }
  255      }
  256    }
  257  }
  258  
  259  # Dynamic
  260  else {                                                                  # [118]
  261    my $content_type = (&$template($path_info,'content_type',$flavour));  # [119]
  262    $content_type =~ s!\n.*!!s;
  263  
  264    $header = {-type=>$content_type};                                     # [120] [121]
  265  
  266    print generate('dynamic', $path_info, "$path_info_yr/$path_info_mo_num/$path_info_da", $flavour, $content_type);  # [122]
  267  }
  268  
  269  # Plugins: End
  270  foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('end') and $entries = $plugin->end() }  # [123]
  271  
  272  # Generate                                                              # [124]
  273  sub generate {                                                          # [125]
  274    my($static_or_dynamic, $currentdir, $date, $flavour, $content_type) = @_;  # [126]
  275  
  276    my %f = %files;                                                       # [127]
  277  
  278    # Plugins: Skip
  279    # Allow plugins to decide if we can cut short story generation
  280    my $skip; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('skip') and defined($tmp = $plugin->skip()) and $skip = $tmp and last; }  # [128] [129]
  281    
  282    # Define default interpolation subroutine
  283    $interpolate =                                                        # [130]
  284      sub {
  285        package blosxom;                                                  # [131]
  286        my $template = shift;
  287        $template =~                                                      # [132] [133]
  288          s/(\$\w+(?:::)?\w*)/"defined $1 ? $1 : ''"/gee;
  289        return $template;
  290      };  
  291  
  292    unless (defined($skip) and $skip) {                                   # [134]
  293  
  294      # Plugins: Interpolate
  295      # Allow for the first encountered plugin::interpolate subroutine to 
  296      # override the default built-in interpolate subroutine
  297      my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('interpolate') and defined($tmp = $plugin->interpolate()) and $interpolate = $tmp and last; }  # [135]
  298          
  299      # Head
  300      my $head = (&$template($currentdir,'head',$flavour));               # [136]
  301    
  302      # Plugins: Head
  303      foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('head') and $entries = $plugin->head($currentdir, \$head) }  # [137] [138]
  304    
  305      $head = &$interpolate($head);                                       # [139]
  306    
  307      $output .= $head;
  308      
  309      # Stories
  310      my $curdate = '';
  311      my $ne = $num_entries;                                              # [140]
  312  
  313      if ( $currentdir =~ /(.*?)([^\/]+)\.(.+)$/ and $2 ne 'index' ) {    # [141] [142]
  314        $currentdir = "$1$2.$file_extension";                             # [143]
  315        $files{"$datadir/$1$2.$file_extension"} and %f = ( "$datadir/$1$2.$file_extension" => $files{"$datadir/$1$2.$file_extension"} );  # [144]
  316      } 
  317      else { 
  318        $currentdir =~ s!/index\..+$!!;                                   # [145]
  319      }
  320  
  321      # Define a default sort subroutine
  322      my $sort = sub {                                                    # [146]
  323        my($files_ref) = @_;
  324        return sort { $files_ref->{$b} <=> $files_ref->{$a} } keys %$files_ref;
  325      };
  326    
  327      # Plugins: Sort
  328      # Allow for the first encountered plugin::sort subroutine to override the
  329      # default built-in sort subroutine
  330      my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('sort') and defined($tmp = $plugin->sort()) and $sort = $tmp and last; }  # [147]
  331    
  332      foreach my $path_file ( &$sort(\%f, \%others) ) {                   # [148]
  333        last if $ne <= 0 && $date !~ /\d/;                                # [149] [150]
  334        use vars qw/ $path $fn /;                                         # [151]
  335        ($path,$fn) = $path_file =~ m!^$datadir/(?:(.*)/)?(.*)\.$file_extension!;  # [152]
  336    
  337        # Only stories in the right hierarchy
  338        $path =~ /^$currentdir/ or $path_file eq "$datadir/$currentdir" or next;  # [153]
  339    
  340        # Prepend a slash for use in templates only if a path exists
  341        $path &&= "/$path";                                               # [154]
  342  
  343        # Date fiddling for by-{year,month,day} archive views
  344        use vars qw/ $dw $mo $mo_num $da $ti $yr $hr $min $hr12 $ampm /;  # [155]
  345        ($dw,$mo,$mo_num,$da,$ti,$yr) = nice_date($files{"$path_file"});
  346        ($hr,$min) = split /:/, $ti;
  347        ($hr12, $ampm) = $hr >= 12 ? ($hr - 12,'pm') : ($hr, 'am'); 
  348        $hr12 =~ s/^0//; $hr12 == 0 and $hr12 = 12;
  349    
  350        # Only stories from the right date
  351        my($path_info_yr,$path_info_mo_num, $path_info_da) = split /\//, $date;  # [156]
  352        next if $path_info_yr && $yr != $path_info_yr; last if $path_info_yr && $yr < $path_info_yr;   # [157]
  353        next if $path_info_mo_num && $mo ne $num2month[$path_info_mo_num];  # [158]
  354        next if $path_info_da && $da != $path_info_da; last if $path_info_da && $da < $path_info_da;   # [159]
  355    
  356        # Date 
  357        my $date = (&$template($path,'date',$flavour));                   # [160] [161]
  358        
  359        # Plugins: Date
  360        foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('date') and $entries = $plugin->date($currentdir, \$date, $files{$path_file}, $dw,$mo,$mo_num,$da,$ti,$yr) }  # [162]
  361    
  362        $date = &$interpolate($date);                                     # [163]
  363    
  364        $curdate ne $date and $curdate = $date and $output .= $date;      # [164]
  365        
  366        use vars qw/ $title $body $raw /;                                 # [165]
  367        if (-f "$path_file" && $fh->open("< $path_file")) {               # [166]
  368          chomp($title = <$fh>);
  369          chomp($body = join '', <$fh>);
  370          $fh->close;
  371          $raw = "$title\n$body";                                         # [167]
  372        }
  373        my $story = (&$template($path,'story',$flavour));                 # [168]
  374    
  375        # Plugins: Story
  376        foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('story') and $entries = $plugin->story($path, $fn, \$story, \$title, \$body) }  # [169]
  377        
  378        if ($content_type =~ m{\Wxml$}) {                                 # [170]
  379          # Escape <, >, and &, and to produce valid RSS
  380          my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');    # [171]
  381          my $escape_re  = join '|' => keys %escape;                      # [172]
  382          $title =~ s/($escape_re)/$escape{$1}/g;                         # [173]
  383          $body =~ s/($escape_re)/$escape{$1}/g;
  384        }
  385    
  386        $story = &$interpolate($story);                                   # [174]
  387      
  388        $output .= $story;
  389        $fh->close;                                                       # [175]
  390    
  391        $ne--;
  392      }
  393    
  394      # Foot
  395      my $foot = (&$template($currentdir,'foot',$flavour));               # [176]
  396    
  397      # Plugins: Foot
  398      foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('foot') and $entries = $plugin->foot($currentdir, \$foot) }  # [177]
  399    
  400      $foot = &$interpolate($foot);                                       # [178]
  401      $output .= $foot;
  402  
  403      # Plugins: Last
  404      foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('last') and $entries = $plugin->last() }  # [179]
  405  
  406    } # End skip
  407  
  408    # Finally, add the header, if any and running dynamically
  409    $static_or_dynamic eq 'dynamic' and $header and $output = header($header) . $output;  # [180]
  410    
  411    $output;                                                              # [181]
  412  }
  413  
  414  
  415  sub nice_date {                                                         # [182]
  416    my($unixtime) = @_;                                                   # [183]
  417    
  418    my $c_time = ctime($unixtime);                                        # [184]
  419    my($dw,$mo,$da,$ti,$yr) = ( $c_time =~ /(\w{3}) +(\w{3}) +(\d{1,2}) +(\d{2}:\d{2}):\d{2} +(\d{4})$/ );  # [185]
  420    $da = sprintf("%02d", $da);                                           # [186]
  421    my $mo_num = $month2num{$mo};                                         # [187]
  422    
  423    return ($dw,$mo,$mo_num,$da,$ti,$yr);
  424  }
  425  
  426  
  427  # Default HTML and RSS template bits                                    # [188]
  428  __DATA__
  429  html content_type text/html
  430  html head <html><head><link rel="alternate" type="type="application/rss+xml" title="RSS" href="$url/index.rss" /><title>$blog_title $path_info_da $path_info_mo $path_info_yr</title></head><body><center><font size="+3">$blog_title</font><br />$path_info_da $path_info_mo $path_info_yr</center><p />
  431  html story <p><a name="$fn"><b>$title</b></a><br />$body<br /><br />posted at: $ti | path: <a href="$url$path">$path</a> | <a href="$url/$yr/$mo_num/$da#$fn">permanent link to this entry</a></p>\n
  432  html date <h3>$dw, $da $mo $yr</h3>\n
  433  html foot <p /><center><a href="http://www.blosxom.com/"><img src="http://www.blosxom.com/images/pb_blosxom.gif" border="0" /></a></body></html>
  434  rss content_type text/xml
  435  rss head <?xml version="1.0"?>\n<!-- name="generator" content="blosxom/$version" -->\n<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN" "http://my.netscape.com/publish/formats/rss-0.91.dtd">\n\n<rss version="0.91">\n  <channel>\n    <title>$blog_title $path_info_da $path_info_mo $path_info_yr</title>\n    <link>$url</link>\n    <description>$blog_description</description>\n    <language>$blog_language</language>\n
  436  rss story   <item>\n    <title>$title</title>\n    <link>$url/$yr/$mo_num/$da#$fn</link>\n    <description>$body</description>\n  </item>\n
  437  rss date \n
  438  rss foot   </channel>\n</rss>
  439  error content_type text/html
  440  error head <html><body><p><font color="red">Error: I'm afraid this is the first I've heard of a "$flavour" flavoured Blosxom.  Try dropping the "/+$flavour" bit from the end of the URL.</font>\n\n
  441  error story <p><b>$title</b><br />$body <a href="$url/$yr/$mo_num/$da#fn.$default_flavour">#</a></p>\n
  442  error date <h3>$dw, $da $mo $yr</h3>\n
  443  error foot </body></html>
  444  __END__

Note 1

Annotation credits

This is version 0.9 of the Blosxom 2 annotations, by Frank Hecker <hecker@hecker.org>. These annotations to the blosxom.cgi source code are made available under the same license terms as Blosxom itself. Comments and suggestions for changes are welcome.

The online Perl documentation was indispensable in creating these notes. The notes2html script was used to create HTML documents from the inline notes.

For more information see the following URLs:

  http://www.blosxom.com/downloads/blosxom.zip
  http://www.blosxom.com/license.html
  http://www.hecker.org/blosxom/

Note 2

For people learning Perl: Comments and #!

All lines starting with '#' are comments, not part of the code itself. The first line uses the standard Unix #! convention to identify the location of the Perl interpreter. You would need to change this line if for some reason the Perl interpreter were in a different directory or had a different name.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlrun.html

Note 3

Overview of the Blosxom 2 source code

The Blosxom source file is divided into three major sections:

initial code (lines 1-8) and the configurable variable section (lines 12-65) containing variables that need to be modified in order to set up a new weblog (as described in the Blosxom user documentation).
the main Blosxom code (lines 69-424)
the data section defining the default html and rss flavours (lines 429-443)

The main Blosxom code itself is further divided into various sections as discussed below.

Blosxom initialization (lines 69-214)

In this section the following tasks are carried out:

declare various global variables (line 69)
import needed Perl modules (lines 71-76)
set the Blosxom version number (line 78)
define additional variables needed (lines 80-83):
- $fh: a FileHandle for reading files (line 80)
- %month2num: hash to convert a month abbreviation to a month number (line 82)
- @num2month: array to convert a month number to a month abbreviation (line 83)
tweak the values of selected configuration variables (lines 86-94):
- $url: if a value for the base URL wasn't defined in the configurable variables section, set a new value as described below (lines 86-88)
- $datadir: strip any trailing slash if present (line 91)
- $depth: adjust to account for the number of path components in $datadir (line 94)
extract information about the current requested page (lines 86-88, 97-125):
- $url: the part of the requested URL corresponding to the Blosxom script itself (e.g., http://www.example.com/cgi-bin/blosxom.cgi) (lines 86-88)
- $static_or_dynamic: set to 'static' if Blosxom is running in static mode, 'dynamic' if Blosxom isrunning in dynamic mode, i.e., through CGI (line 99)
- $path_info: the part of the requested URL identifying a particular category or individual entry to be displayed, e.g., /society/literature or /cooking/italian/bruschetta.html (lines 97, 104-107, 121)
- $flavour: the particular flavour of data being requested, e.g., 'html' or 'rss' (lines 110-118)
- $path_info_yr, $path_info_mo, and $path_info_da: the dates for which we are requesting that entries be displayed (lines 124-125)
template initialization (lines 128-145, 161)
1. define a default template routine and store a reference to it in $template (lines 128-137)
2. read the default templates from the data section and store them in the %template hash keyed by the content type (e.g., 'html' or 'rss') and template component (e.g., 'head' or 'foot') (lines 139-145)
3. override the default template subroutine with the template subroutine provided by the first plugin that defines one, if any do (line 161)
plugin initialization (lines 148-155)
1. find all plugins in the plugin directory (lines 148-149)
2. sort the plugins, taking into account prefixes like '00', '01', etc. (line 149)
3. determine whether a given plugin is enabled or disabled (lines 150-151)
4. import the plugin packages (line 152)
5. populate the @plugins array (a list of plugin names, minus prefixes) and the %plugins hash (which stores the enabled/disabled status for each plugin, keyed by the plugin name) (line 153)
define a default subroutine to find entries in the data directory (storing a reference to the subroutine in $entries) (lines 169-214) and then allow overriding it by the first plugin that defines an alternate entries subroutine (line 219)

Finding (and filtering) Blosxom entries (lines 221-226)

This section of the code looks for Blosxom entries and related items of interest, performing the following tasks:

search for entries using the subroutine referenced by $entries, and build up three hashes (lines 221-222):
- %files: files representing individual Blosxom entries (e.g., foo.txt if '.txt' is the standard Blosxom file extension)
- %indexes: directories for which index files might need to be created or updated as part of static page generation, as well as individual entry files for which static pages might need to be generated
- %others: all other files not falling into the above two categories
filter the list of files in %files and %others by invoking the filter subroutine for each and every plugin that defines one (line 225)

The hashes %files and %others are keyed by the name of the entry file (for %files) or other item (for %others), in the form of an absolute pathname; the value for each element in %files or %others is the date/time last modified for the corresponding entry file or other item.

The hash %indexes is keyed by the name of the directory or entry file for which static page generation should be done, expressed as a relative pathname relative to the Blosxom data directory (e.g., 'a/b' or '2004/05/22'); the value for elements in %indexes is 1 for elements corresponding to category directories or individual entries, and for elements corresponding to date directories is the same as the key (e.g., '2004/05/22').

Generating output (lines 228-270)

The next section of the Blosxom code generates HTML or other output. For dynamic invocation of Blosxom this is relatively simple, since we need to generate only one page in response to the requested URL (lines 260-267):

determine the content type for the requested flavour and create the appropriate HTTP header (lines 261-264)
call the generate subroutine to create the page output, based on the category, date, entry, and flavour information from the requested URL (line 266)
print the output returned by generate (which includes the HTTP header for the appropriate content type) (line 266)

For static invocation of Blosxom page generation is more complex, since we may need to generate several pages (lines 230-257):

loop through each element of %indexes (lines 234-256) and then for each element loop through each directory component of the item (directory or entry file) corresponding to the element (lines 236-255)
1. create new directories wherever needed in order to hold index pages (line 241)
2. for each of the required flavours specified by @static_flavours (lines 242-254)
  1. create (or rewrite) the required index page or static entry page (lines 245-247)
  2. call the generate subroutine to create the output for the page (lines 250-252)
  3. write the output to the static file and then close the file (lines 249-253)

Finally, we loop through the plugins and call each plugin's end subroutine in order to do any final processing (line 270).

The `generate` subroutine (lines 273-412)

The generate subroutine creates the actual output for a page of the desired flavour, taking as input the path information for the category, entry file, and/or date, along with the flavour and content type, and an indication of whether static or dynamic page generation is desired. The generate subroutine also uses the hashes %files, %indexes, and %others previously populated.

The specific tasks performed by the generate subroutine are as follows:

define a default interpolate subroutine for variable interpolation in templates (lines 283-290)
call each plugin's skip subroutine and decide if page generation should be skipped, otherwise proceeding as described below (lines 280, 292)
allow one of the plugins to override the default interpolate subroutine (line 297)
generate output for the 'head' section (lines 300-307):
1. determine the proper 'head' template to use, based on the default template subroutine or one provided by a plugin (line 300)
2. allow the plugins to modify the 'head' template (line 303)
3. interpolate the values of variables (e.g., $blog_title) in the 'head' template and add the result to the output (lines 305-307)
tweak the $currentdir argument, which holds information on the category and/or individual entry for which a page needs to be generated (lines 313-319)
if a page for an individual entry is to be generated, tweak the %f hash (a copy of %files) so that it contains information for just that entry (line 315)
define a default subroutine for sorting entries (by file modification times) and then allow a plugin to override it (lines 322-330)
loop through the (sorted) elements of the hashes %f and %others, each representing an entry to be added to the generated page (lines 332-392)
1. for category index pages and the main index page, stop looking at entries once we've reached the maximum entries per page configured using $num_entries (line 333)
2. skip entries that are in categories other than the one for which a page is being generated (line 338)
3. skip entries whose dates don't match the date(s) for which a page is being generated (line 344-354)
4. do date processing (lines 357-364):
  1. get the appropriate template for the 'date' section (line 357)
  2. allow plugins to modify the template (line 360)
  3. interpolate variables into the template, including the actual date values (line 362)
  4. generate output for the date if needed, e.g., at the beginning of a set of entries for the same date (line 364)
5. read the entry file to obtain the entry title (the first line of the file) and body (the rest of the file), and generate output for the entry (lines 366-389):
  1. get the appropriate template for the 'story' section (line 373)
  2. allow plugins to modify the story template (line 376)
  3. for RSS and similar types of XML-based content, replace problematic characters in the story template with the corresponding character entities (lines 378-384)
  4. interpolate variables into the story template (line 386)
  5. generate output for the story (line 388) and prepare to process the next entry file, if any (lines 389-391)
generate output for the 'foot' section (lines 395-401)
1. determine the proper 'foot' template to use, based on the default template subroutine or one provided by a plugin (line 395)
2. allow the plugins to modify the 'foot' template (line 398)
3. interpolate the values of variables in the 'foot' template and add the result to the output (lines 400-401)
call the last subroutines for each and every plugin that defines one, to do any final processing for the page (line 404)
prepend the HTTP header if needed, and return the generated output (lines 409-411)

The `nice_date` subroutine (lines 415-424)

The nice_date subroutine converts OS-provided time values (expressed as the number of seconds since some fixed date) into year, month, day, etc., values that we can use for printing date/times and creating date-based URLs. For more information see the notes for lines 415-424.

Note 4

For people learning Perl: Packages

package defines a namespace for variables, subroutines, etc., so that their names won't conflict with names defined in other Perl code used by Blosxom and pulled in from other places.

See the following URL for more information:

  http://www.perldoc.com/perl5.8.4/pod/perlmod.html#Packages

Note 5

For people learning Perl: Package "global" variables

The scope of the configurable variables is within the blosxom package. We put "global" in quotes because, as the Perl documentation notes, "there's really no such thing as a global variable in Perl", in the sense of global variables as used in C and similar languages. However the configurable variables are like global variables in that their values are visible anywhere in the Blosxom code (unless "hidden" by other variable declarations as described in the notes to line 171). See also the notes to line 69.

The configurable variables can be referenced from Blosxom plugins as $blosxom::foo where $foo is a variable. Alternatively, a Blosxom plugin can include a package blosxom statement prior to a section of code to allow Blosxom configurable variables to be referenced within that code section without having to preface the variables' names with "blosxom::". (For example, a plugin would do this when defining its own version of the interpolate subroutine; see the notes to lines 283 and 285 for more information.)

See the following URL for more information on variable scope:

  http://www.perldoc.com/perl5.8.4/pod/perlmod.html

Note when reading the documentation that the configurable variables are considered to be "dynamic" (as opposed to "lexical") variables.

Note 6

For people learning Perl: Scalar variables

In Perl a variable starting with '$' is a scalar (i.e., single-valued) variable. Note that unlike shell syntax the '$' is used when assigning to the variable as well as when using its value.

The $blog_title variable is used to hold a string. Like shell variables Perl scalar variables can have either string or numeric values. String values can be delimited by either single quotes or double quotes; like the Unix shell, if the string is within double quotes then it can include references to other Perl variables (e.g., "A Blog by $author") and the values of those variables will be interpolated into the string, replacing the variable references.

Because of this variable interpolation, if you want to use a '$' in your blog title or description then you need to either precede the '$' with a '\' ("My \$64,000 Blog") or use single quotes to delimit the string ('My $64,000 Blog'). (If you use single quotes for your string delimiter then you will also need to escape any single quote character in the string itself by preceding it with a '\', e.g., 'John\'s $64,000 Blog'; a similar rule holds when you want to include a double quote in a string delimited by double quotes.)

For more information on Perl scalar variables see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html#Scalar-values

Note 7

For people learning Perl: Array variables and `qw`

In Perl a variable starting with '@' is an array variable that holds an ordered list of values indexed by array position (starting from 0 as the first position).

Here we define a 2-element array with the string values 'html' and 'rss'. qw is a function that returns a list of words extracted out of a string enclosed within delimiters, e.g., qw/a b/ is the same as 'a', 'b'. (Alternately you could use qw(a b) or qw! a b ! or whatever.) This is a very common Perl idiom, as it eliminates the need to quote each and every word within the list.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators

Note 8

For people learning Perl: `use vars`

Here we declare global variables used in this package (actually, within the file, but the file just contains a single package). Note that use vars was deemed obsolete as of Perl 5.6, being replaced by our, but as used here supports use of Blosxom with earlier Perl 5.x versions.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlmodlib.html#Pragmatic-Modules
  http://search.cpan.org/~nwclark/perl-5.8.4/lib/vars.pm

Note 9

For people learning Perl: `use strict`

use strict tells Perl to produce compiler warnings for all sorts of things, such as references to variables that were not previously defined or declared.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/strict.pm

Note 10

For people learning Perl: Importing modules with the `use` function

The next few lines import various Perl modules, making their functions and global variables available without needing to qualify the names with package names. (In other words, we can refer to bar() rather than foo::bar() where bar is a function in the package foo.)

On packages vs. modules: per the documentation, "A module is just a set of related functions in a library file, i.e., a Perl package with the same name as the file." Strictly speaking Blosxom 2.0 is a package but not a module; however Blosxom 3.0 will be a full-fledged module.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/use.html

Note 11

FileHandle

The FileHandle module contains functions for basic file I/O operations: open, new, getc, gets, seek, close, etc.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm

Note 12

File::Find

The File::Find module contains functions to traverse a directory tree in the file system, analogous to the Unix find command. Blosxom uses File::Find functions and variables in its own find subroutine below.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/Find.pm

Note 13

File::stat

The File::stat module gets a file's attributes, like the Unix stat kernel routine. Blosxom uses File::stat functions and variables to get the date/time modified for entry files and related information.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/stat.pm

Note 14

Time::localtime

The Time::localtime module gets the current date and time and performs other date/time-related operations, like the corresponding Unix functions. Blosxom uses Time::localtime functions in the subroutine nice_date and elsewhere.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/Time/localtime.pm

Note 15

CGI

The CGI module is used to parse incoming HTTP requests (e.g., to get the URL being requested) and to create HTTP headers and HTML pages sent in response (see the subroutine generate for an example).

Note that :standard imports a standard set of functions and :netscape imports optional functions for Netscape-specific HTML extensions.

For more information see the following URL:

  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm

Note 16

The Blosxom version

Blosxom 2.0 is considered stable. Blosxom 3.0 is currently in development.

Note 17

For people learning Perl: `my` variables

my creates a private variable visible only within the lexical scope within which it is defined (e.g., within a given code block enclosed by curly braces), and not visible anywhere else (including subroutines called from a given code block). In this case the lexical scope is considered to be the entire blosxom package within the blosxom.cgi source file.

For more information see

  http://www.perldoc.com/perl5.8.4/pod/perlintro.html#Variable-scoping
  http://www.perldoc.com/perl5.8.4/pod/perlsub.html#Private-Variables-via-my()

Note 18

For people learning Perl: Creating objects with `new`

The FileHandle module presents an object-oriented interface, so new in this context produces a new instance of the FileHandle class.

In object-oriented terms new is a "constructor", i.e., a so-called "class method" that creates and initializes new objects. Unlike object-oriented languages like C++, in Perl a constructor could be called something other than "new", but it's a common convention.

For more information see

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
  http://www.perldoc.com/perl5.8.4/pod/perlobj.html

Note 19

For people learning Perl: Hashes

We create a hash table (or plain "hash" in Perl jargon) with month names being the keys and month numbers (as strings) being the values. Hashes are initialized by providing a list in which the odd entries are the key values and the even entries are the corresponding values, e.g., ('key1', 'value1', 'key2', 'value2'). The syntax (a=>'b', c=>'d') is equivalent to ('a','b','c','d') and is intended to make hash initialization more understandable.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html

Note 20

For people learning Perl: The `keys` and `sort` functions

This takes the list of keys in the previously-defined hash table, i.e., the list ('nil', 'Jan', 'Feb', ..., 'Dec'), sorts it using a comparison function that compares the corresponding values in the hash table for each key, i.e., the values '00', '01', etc., and then assigns the resulting sorted list of keys to an array indexed by month number.

This is equivalent to defining the array as follows:

  @num2month = ('nil', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');

(Note that the 'nil' value is included because Perl arrays are indexed from 0 but month numbers start at 1.)

keys is a function that takes as an argument a hash and returns either a list consisting of all the keys in the hash (if used in list context) or the number of keys in the hash (if used in scalar context). Here we're using keys in list context, because as noted below the sort function expects a list as an argument.

sort is a function that takes as arguments the list of items to be sorted and (as an optional first argument) a subroutine defining how sort comparisons are to be done; in this case that subroutine is an "anonymous" inline routine enclosed in curly braces. $a and $b are special global variables used to hold the values being sorted at each step of the sort algorithm; <=> is a comparison operator that returns -1, 0, or 1 depending on whether the first item is respectively less than, equal to, or greater than the second. (This is a numeric comparison.)

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/func/keys.html
  http://www.perldoc.com/perl5.8.4/pod/func/sort.html
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators

Note 21

For people learning Perl: The `||=` operator

This defines $url to be its existing value (if it has one) or (if it has no value) the value returned by the url function (part of the CGI module) as described in the next note. (Perl has different namespaces for variables and functions, which is why we can name the variable the same as the function.)

The || operator is a logical "or" operator similar (but not identical) to that used in shell or C programming; $url ||= url(); is equivalent to $url = $url || url(); where the original value of $url is considered false if it is undefined or its value is the empty string '', and true otherwise. So if $url already has a value then the second part of the conditional expression (after ||) is not executed, and that existing value is (re)assigned to $url; otherwise the second part is executed to obtain the returned value from url(), and that value is assigned to $url.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#C-style-Logical-Or
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Assignment-Operators

Note 22

The value of `url()`

Note that url() returns only the URL of the Blosxom CGI script itself, not the full URL being requested. Thus (for example) if the HTTP request were for the URL

  http://www.example.com/cgi-bin/blosxom.cgi/2004/05/22

then url() would return (and $url would be set to) the URL

  http://www.example.com/cgi-bin/blosxom.cgi

If you have configured the web server to hide the blosxom.cgi part of the URL (as described in the FAQ referenced below) then the value of url() will be that part of the full URL which was translated into the script location. For example, if you configured Apache using the ScriptAlias directive as follows:

  ScriptAlias /blog "/var/www/cgi-bin/blosxom.cgi"

then if the requested URL were

  http://www.example.com/blog/2004/05/22

then url() would return (and $url would be set to) the URL

  http://www.example.com/blog

For more information see the following URLs:

  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#OBTAINING_THE_SCRIPT'S_URL
  http://www.blosxom.com/faq/cgi/hide_cgi_bit.htm

Note 23

Server Side Includes and Blosxom

We assign $url a new value consisting of its previous value with the initial string "included:" (if present) replaced with "http:". This is intended for the case when output from bloxsom.cgi is included in an HTML file by a Server Side Include directive like the following:

   <!--#include virtual="/cgi-bin/blosxom.cgi/2004/05/19" -->

When invoked in this way the URL returned by url() above would be (for example)

  included://www.example.com/cgi-bin/blosxom.cgi

instead of

  http://www.example.com/cgi-bin/blosxom.cgi

For more information see the following URL:

  http://httpd.apache.org/docs-2.0/howto/ssi.html

Note 24

For people learning Perl: The `=~` operator and regular expression matching

=~ is a special operator that takes the left side ($url) and applies to it a pattern match specified on the right side (s/^included:/http:/), in this case a pattern match that actually does substitution, using regular expressions modeled on those used in the Unix shell and utilities. (So, for example, in this case the '^' tells Perl to look for a match starting at the beginning of the string.) The result is that the value of $url is modified if the match succeeds.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Binding-Operators
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators

Note 25

For people learning Perl: More on regular expressions and substitution

This statement strips off a trailing slash from the URL value if present; the '$' in the regular expression tells Perl to look for a match at the end of the string.

Note that the value returned by the url() function doesn't have a trailing slash, but the person configuring Blosxom may have included a trailing slash when specifying a non-default value for the $url variable.

Note 26

Adjusting `$depth`

If $depth is non-zero (i.e., limiting search to n directories deep) then we take the $datadir path, count the number of path components, subtract 1, and add that to $depth to get the new value. For example, if the value of $data_dir is /a/b/c then a $depth value of 2 would get changed to a value of 4.

The new value of $depth can be interpreted as follows: Search through a directory only if the number of components in its path is $depth or less. So in the previous example the directory /a/b/c/d would be searched but the directory /a/b/c/d/e would not.

Note 27

For people learning Perl: The `tr` and `and` operators

Counting the number of path components is done using the tr operator, which is typically used to modify a string by transliterating one set of characters with another, e.g., $s =~ tr[a-z][A-Z] to change lowercase characters in $s to uppercase. However in this case the set of replacement characters is empty ([]) so no replacement is done; instead we simply use the standard return value from tr, namely the number of times the character(s) in the search list (i.e., the '/' character in this case) was found.

Since the value of $datadir is an absolute path (i.e., it starts with '/') and we trimmed any trailing '/' characters (see above) the number of '/' characters will be equal to the number of components in the path.

[Note: There is a minor potential bug here: If the value of $datadir were specified with multiple trailing slashes, e.g., /a/b/c//, then the code above would remove only a single trailing '/', leaving one extra '/' at the end, and the number of directory components would be miscounted as being higher than it actually is. The fix is simple: Replace the existing statement $datadir =~ s!/$!!; with the statement $datadir =~ s!/*$!!; to look for zero or more '/' characters at the end of the string and remove any found; even safer would be $datadir =~ s!/*\s*$!!; to remove trailing whitespace as well.]

The and operator here is used to conditionally change $depth only if it is non-zero; if $depth is zero then it is interpreted as false and the expression after the and is not executed. However any non-zero value will be interpreted as true and $depth modified as described above.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Quote-and-Quote-like-Operators
  http://www.perldoc.com/perl5.8.4/pod/perlreref.html
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Logical-And

Note 28

Static vs. dynamic mode

We set the variable $static_or_dynamic to 'static' or 'dynamic' to reflect the mode we're in. We're in static mode if all the following are true:

the GATEWAY_INTERFACE environment variable is not set (i.e., we are not executing as a CGI routine)

the parameter -password has a value

the variable $static_password is defined (see above)

the value of the -password parameter is the same as the value of $static_password

Otherwise we're in dynamic mode.

Note 29

For people learning Perl: The `eq` operator and `CGI::param()` function

We saw the and operator above. The eq operator tests for string equality. The expression $a ? $b : $c is like that used in C: if $a is true then return $b, otherwise return $c.

param() is a CGI function, but it can also return values when the Perl script is invoked from the command line, e.g.

  perl blosxom.cgi -password='secret'

would assign the string value 'secret' to the parameter -password.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Conditional-Operator
  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#DEBUGGING
  http://www.blosxom.com/documentation/users/configure/static.html

Note 30

For people learning Perl: Setting parameters using the `CGI::param()` function

If we're in dynamic mode then we set the value of the -quiet parameter to be 1. When setting parameters the param() function takes an argument list similar in syntax to the way hashes are initialized, e.g., param(-name=>'a', -value=>'b') would set the parameter a to the value b.

For more information see the following URL:

  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#SETTING_THE_VALUE(S)_OF_A_NAMED_PARAMETER:

Note 31

`PATH_INFO`

PATH_INFO (the CGI environment variable whose value is returned by the path_info function) contains any path information in the URL after the part of the URL that identifies the CGI script. For example, if the requested URL were http://www.example.com/cgi-bin/blosxom.cgi/2004/05 then the value returned by path_info() would be /2004/05.

For more information see the following URL:

  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#FETCHING_ENVIRONMENT_VARIABLES

Note 32

For people learning Perl: The `split` function

We use my to define a private array variable @path_info. To set this variable we first use the split function on the string returned by the path_info() function (if it's non-empty), splitting that value into different components separated by the '/' character. (m{/} is a regular expression that will match a single '/'.) The split function returns a list of strings, which is why we use an array to hold the result.

If for some reason path_info() returns an empty string then we split the value of the path CGI parameter instead. This would allow you to use a URL like

  http://www.example.com/cgi-bin/blosxom.cgi?path=/2004/05/22

if you wished to do so. Note that the || operator has a higher precedence than the comma operator, so the decision whether to use the value of path_info() or param('path') is made before that value is passed to the split function.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/split.html
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Operator-Precedence-and-Associativity

Note 33

For people learning Perl: The `shift` function

The shift function discards the first element of an array. We do this because path_info() returns a path with an initial '/', and the split function as used above on that path will produce an empty string as the first element of the returned array; for example, the expression split m{/}, "/a/b/c" will return the list ('', 'a', 'b', 'c'). We don't want the initial empty string so we use shift to get the list ('a', 'b', 'c') instead.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/shift.html

Note 34

Interpreting Blosxom URLs

Recall that after the part of the URL that references the Blosxom script itself (stored in $url), a Blosxom URL can contain an additional path consisting of three possible parts: an optional set of categories, an optional set of year, month, and day values, and an optional reference to an individual entry. For example, the following are values that might be returned by the path_info() function as applied to Blosxom URLs:

  /society/literature
  /2004/05/19
  /music/index.rss
  /personal/resolutions/2003/07
  /cooking/italian/bruschetta.html

From the path returned by path_info() we end up setting the following variables:

$path_info: either an individual entry path including categories, subcategories, and entry name (e.g., /cooking/italian/bruschetta.html) or a category/subcategory path for which we wish to see all entries (e.g., /society/literature or /music)
$flavour: the desired flavour, whether explicitly specified in the URL (e.g., 'html' for /cooking/italian/bruschetta.html or 'rss' for /music/index.rss) or defaulted (e.g., as in /society/literature/)
$path_info_yr, $path_info_mo_num, and $path_info_da: the year, month, and day if present in the URL (e.g., for /personal/resolutions/2003/07 the year and month would be '2003' and '07' respectively while the day would be undefined)

Our first task is to extract the path information relating to categories; since we know that category names can't begin with a digit we can simply look for path components starting with alphabetic characters. However we have to stop before we get to any reference to an individual entry; we identify such entries by the presence of a '.' character in their names.

[Note: This implies two additional restrictions in Blosxom as currently designed: you can't have a category name containing a '.', and you can't reference individual entries using URLs that don't have a file extension at the end (as recommended by the W3C, among others.]

For more information see the following URLs:

  http://www.blosxom.com/documentation/users/view.html
  http://www.w3.org/Provider/Style/URI

Note 35

For people learning Perl: The `while` loop

A while loop executes a block of code (in curly braces) as long as a given condition (in parentheses) is true. In this case before executing the code block we first check to see if the first element of @path_info is defined and non-empty; otherwise there are no more components and we're done. ($a[i] is the i'th element of the array @a; note that it's distinct from the scalar variable $a.)

If we have a further component, we then check to see if its value starts with an alphabetic character, by trying to match it against the regular expression character class [a-zA-Z] starting at the beginning of the string ('^'); otherwise the component represents a date and not a category, and we're done.

Finally we check to verify that the component's value does not have a literal period (\.) in it; otherwise the component represents an individual entry (e.g., "a.html") and we're done. (The operator !~ is the reverse of =~, returning a true value when the pattern match fails.)

See the notes for line 112 below for the meaning of the parentheses in the regular expression /(.*)\.(.*)/ used to check for a period in in the path component. For now we simply note that as used here the regular expression could have been replaced with the simpler regular expression /.*\..*/ without affecting things.

If the first element of @path_info looks like a category then we append it to the scalar variable $path_info, preceded by a '/', and remove the element from the @path_info array. ($path_info was defined above, with its initial value set to the empty string.) Note that shift @path_info both does the removal and returns the removed element as a result. The . operator concatenates two strings, in this case '/' and the removed first element. The .= assignment operator is like the ||= and += operators seen above, so that $a .= 'b' is the same as $a = $a . 'b', where the . operator concatenates two strings.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Compound-Statements
  http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Additive-Operators

Note 36

Determining the flavour, part 1

If the flavour is specified by index.{flav}, as in

  http://www.example.com/cgi-bin/blosxom.cgi/music/index.rss

then it must be parsed from the PATH_INFO value stored in @path_info. However if the flavour is specified by ?flav={flav}, as in

  http://www.example.com/cgi-bin/blosxom.cgi/music?flav=rss

then its value must be obtained using param(), since anything in the URL after a '?' is considered a CGI parameter and not part of PATH_INFO.

Note 37

For people learning Perl: `$#path_info`

$#path_info returns the index of the last element of the array @path_info. We match the value of that last element against a regular expression consisting of one or more characters followed by a literal '.' character followed by one or more characters to the end of the string. This match will succeed when the last element looks like, e.g., 'a.b', where we'll interpret 'b' as the flavour.

(Note that this regular expression is slightly different from the one used in the while loop on line 107; the previous expression matched zero or more characters followed by a '.' followed by zero or more characters. In other words, the test at line 107 will match . by itself, .a, a., and so on, while the test here will not. In practice this doesn't matter: the first test was simply intended to reject path components that weren't categories, which can't contain '.'; the second test is intended to find flavour values, and for that purpose we need a component that actually has something after the '.', as well as before.)

The regular expression uses parentheses to save parts of the component that are matched, for later use. In particular, the regular expression /(.+)\.(.+)/ is used (instead of the simpler /.+\..+/) to save the flavour value (matched by the expression in the second set of parentheses) and the entry name (matched by the expression in the first set of parentheses). The saved values can then be referenced by the special variables $1 (first part matched, the entry name) and $2 (second part matched, the flavour).

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html#Variable-names
  http://www.perldoc.com/perl5.8.4/pod/perlreref.html#SYNTAX

Note 38

For people learning Perl: "Greedy" matching

If the last value in @path_info does contain a '.' character then as noted above the value of the variable $2 will be the string to the right of the '.', and we save that value in $flavour.

Because of the way regular expression matching works, if the final component actually has two or more periods, e.g., "example.com-news.html", $2 will be set to the string after the final '.', not the string after the first one. This "greedy" matching (i.e., match as many characters as you can) is exactly what we want to happen.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS

Note 39

Indexes vs. individual entries

If the first part (before the '.') of the last path component is not equal to 'index' then that component points to an individual entry, and we save both the entry name and flavour by appending them to the $path_info variable that stores the category components of the path.

On the other hand, if the first part is 'index' then the original URL was not a request for an individual entry but rather a request for all entries in a particular category or for a particular day, month, or year, displayed using a specified flavour. For such requests the path might be something like /a/b/index.rss or /2004/05/index.rss. In this case we don't need to save the value 'index.rss' (or whatever) as part of $path_info, since all we need is the flavour value.

Note 40

For people learning Perl: The `pop` function

Now that we've extracted the needed information from the last element of @path_info we use the pop function to remove it.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/pop.html

Note 41

Determining the flavour, part 2

If the final component of the path does not contain a period then either the flavour was specified using the flav parameter, as in the URL

  http://www.example.com/cgi-bin/blosxom.cgi/a/b?flav=rss

or the flavour was omitted entirely. In the latter case we set $flavour to the default flavor defined in the configurable variables section.

Note 42

For people learning Perl: Alternative patterns in regular expressions

Using | in a regular expression lets you search for (and in this case replace) two or more alternative patterns, in this case zero or more '/' characters at the beginning of $path_info and zero or more at the end. The 'g' option replaces all patterns found, so we replace both '/' characters found at the beginning and any found at the end.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlretut.html#Matching-this-or-that

Note 43

Date references

At this point we've extracted from @path_info any category names (at the beginning of the path) and any final path component associated with either an individual entry or an index.{flav} reference. So the only components left in @path_info should be date references (if any) from URLs containing sequences like /2004/05/19, /2004/05, or /2004.

Note 44

For people learning Perl: Assigning into a list

This statement assigns $path_info[0] (i.e., the first element in the array @path_info) to $path_info_yr, $path_info[1] to $path_info_mo, and $path_info[2] to $path_info_da. If @path_info doesn't have three elements then some or all of the three variables may end up undefined (starting with $path_info_da).

In general you can assign a list of scalar values into a list of scalar variables:

  ($a, $b, $c) = (1, 2, 3);
  ($a, $b, $c) = @d;

where the righthand side could be a constructed list (using ','), an array, a function returning a list, or any other expression returning a list.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html#List-value-constructors

Note 45

Month abbreviations in Blosxom URLs

[Note: Although I don't believe the online documentation mentions this, based on this code it appears that you can use Blosxom URLs that identify months by their three-letter abbreviations instead of month numbers; so, for example, rather than identifying the date as /2004/01/31 it appears that you could request it as /2004/Jan/31.

If so, there's no danger in mistaking a month abbreviation for a category name since the month must be preceded by a four-digit year, and Blosxom stops parsing the URL for categories as soon as it hits a component starting with a digit.]

Note 46

For people learning Perl: The `lc`, `ucfirst`, and `undef` functions

This statement can be paraphrased as follows: if $path_info_mo has a (non-empty) value, then check to see if that value is a string with (at least) two digits (i.e., it matches the regular expression \d{2}); if so, assign the value of $path_info_mo to $path_info_num. If $path_info_mo has a value that doesn't contain two digits, then put the value in "initial cap" form and look it up in the %month2num hash to see if the value is a month abbreviation; if so, assign the month number from the hash to $path_info_num.

If the value of $path_info_mo doesn't look like a month number or month abbreviation, or if it's empty or undefined, then $path_info_mo_num is undefined as well.

The function lc returns the lower-case equivalent of its string argument, and the ucfirst function returns a copy of its argument with the first letter (only) capitalized. Hence ucfirst(lc 'jaN') returns the value 'Jan', which is the capitalization style used in %month2num.

The function undef returns an undefined value that (as in this case) can be assigned to a variable.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
  http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS
  http://www.perldoc.com/perl5.8.4/pod/func/lc.html
  http://www.perldoc.com/perl5.8.4/pod/func/ucfirst.html
  http://www.perldoc.com/perl5.8.4/pod/func/undef.html

Note 47

The template subroutine

The template subroutine is used to look for and return the contents of flavour template files (e.g., head.html, foot.html, etc.). It can be overridden by a plugin that defines its own template subroutine; see the notes for line 161.

Note 48

For people learning Perl: Anonymous subroutines and references

sub { ... } defines an "anonymous" (i.e., not named) subroutine, a reference to which is then assigned to the variable $template. (References are basically names that can be used to refer to variables and subroutines, and are the third type of value that a scalar variable can have, along with numbers and strings.) The subroutine can then be called using the syntax &$template() where you can put subroutine arguments inside the parentheses.

The template subroutine is defined in this way (using a reference stored in a variable rather than a named subroutine) so that the subroutine can be overridden; a plugin can define its own template subroutine, and a reference to that can be assigned to $template, replacing the reference to the original subroutine defined here.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlsub.html
  http://www.perldoc.com/perl5.8.4/pod/perlref.html

Note 49

Template subroutine arguments

$flavour is the flavour for which we are looking, e.g., 'html', 'rss', etc. $chunk is the type of template we are looking for, e.g., 'head', 'foot', 'story', etc. $path is the directory at which we should start our search, expressed as a relative pathname relative to the Blosxom data directory.

Note 50

For people learning Perl: Argument passing using `@_`

Arguments to the subroutine are passed in a special array variable @_, with the first three elements of that assigned to the private variables $path, $chunk, and $flavour respectively.

Note 51

For people learning Perl: The `do while` loop

A do while loop is like a while loop except that the condition is checked at the bottom (after the loop is executed at least once) instead of at the top.

(The similarity between while and do while loops is only superficial, since in Perl the do {...} while construct isn't considered to be a true loop. In particular, you can't put next and last statements within a do {...} while; see the notes for lines 141 and 240.)

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/do.html

Note 52

For people learning Perl: The `join` and `open` functions

The following statement is basically a backwards if statement: First we use the FileHandle $fh (created above) and try to open a template file for read access ("<"), constructing a template pathname from the values of $datadir, $path, $chunk, and $flavour. So, for example, if $datadir is '/blosxom', $path is '/a/b', $chunk is 'head', and $flavour is 'html', we look for a flavour template file '/blosxom/a/b/head.html'.

(Because the FileHandle module provides an object-oriented interface, we use the method invocation $fh->open(...) rather than the function call open($fh, ...). Also note that if we have already opened a file using the FileHandle $fh that file will be closed first before we open a new one.)

If the open succeeds (i.e., the template file exists and is readable) then we read in all the lines of the template file using the $fh FileHandle and return a string containing all those lines concatenated together.

(<$fh> would normally read only one line of the file, but using the join function causes <$fh> to be used in a list context -- because join expects a list as its second argument -- and that causes <$fh> to read all lines and return them as an array, with each array element being a newline-terminated line. The join function then returns a string consisting of all the array elements concatenated together separated by the join function's first argument, which in this case happens to be the empty string. So the returned result is a single string containing all the lines in the flavour template file, each terminated by a newline, e.g.,

  <html>\n<body>\n<h1>A Blog</h1>\n...

for a typical head section.)

For more information see the following URLs:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
  http://www.perldoc.com/perl5.8.4/pod/perlopentut.html
  http://www.perldoc.com/perl5.8.4/pod/func/open.html
  http://www.perldoc.com/perl5.8.4/pod/func/join.html
  http://www.perldoc.com/perl5.8.4/pod/func/return.html

Note 53

For people learning Perl: Retrying the template file search

If the open fails (e.g., there was no file at the location we looked) then we modify the value of $path by stripping off the last path component (e.g., if $path has the value /a/b we change it to /a) and then we go back to the top of the loop and try the open again. (In other words, we search for the template file in the parent directory of the directory we just looked in.)

(To explain the regular expression a bit: '\/' matches a literal '/' and '[^\/]' matches anything but a slash, so '\/*[^\/]*' matches zero or more '/' characters followed by zero or more other characters. The regular expression \/*[^\/]*$ means look for this pattern at the end of the string, so that when the substitution is done -- replacing the matched pattern by an empty string -- it removes the last component of $path. Finally, we use parentheses to save the matched pattern in the $1 variable for later checking, hence (\/*[^\/]*)$ is the final regular expression used.)

If we never succeed in opening a template file then the loop ends when all the path components have been removed, the matched pattern is an empty string so that $1 is empty and hence false, and the and test fails.

Note 54

For people learning Perl: Values in nested hashes

If we never succeed in opening a template file (i.e., we drop out of the do while loop) then we return a string consisting of lines from a flavour template already stored in a multidimensional hash, using $flavour and $chunk as keys. (This hash is defined below; recall that right now we are defining the subroutine, not executing it. See the notes for line 144 for more information.)

Note 55

For people learning Perl: Initializing a hash to be empty

We set the %template hash variable to contain nothing, i.e., no keys and no values.

Note 56

Default templates

Read in and store the default templates defined in the data section of this file, saving them in %template.

Note 57

For people learning Perl: `<DATA>`

<DATA> causes lines to be read from the data section of this file (i.e., blosxom.cgi). The data section starts after a line consisting of __DATA__ by itself. In this context <DATA> returns a line at a time, returning an undefined value (and thus ending the while loop) when we reach the end of the file.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html

Note 58

For people learning Perl: `END` and `last`

Using <DATA> would continue to read lines after __DATA__ until the end of the blosxom.cgi file. However in our case we may want to put some additional text after the __END__ line (which marks the end of what the Perl compiler parses). We therefore explicitly check for the presence of __END__ on a line by itself, and if we find it we use the last command to exit the while loop immediately.

Note that since we are not using the =~ operator the string pattern match is done against the special variable $_ that holds the line just read from the data section using <DATA>.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Loop-Control

Note that there are a couple of subtle points about the test for __END__. First, the test is actually for either zero or one occurrence of __END__, so the test would succeed (and reading of data end) if the __DATA__ section contained a blank line at some point. Second, the pattern match requested is for __END__ starting from the beginning of the line (^) and ending at the end of the line ($), with nothing else present. But the string being tested against (the value of the $_) variable does in fact have something else in it, namely a newline at the end of the string.

Why then does the test work? Because as noted in the Perl online documentation, "the '^' character is guaranteed to match only the beginning of the string, the '$' character only the end (or before the newline at the end), ..." (emphasis added). In other words, the newline at the end of $_ is ignored for the purpose of matching the specified pattern /^(__END__)?$/.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlre.html#Regular-Expressions

Note 59

For people learning Perl: Parsing template lines and pattern matching in a list context

As can be seen by looking at the data sections below, the default templates are each defined as a single line containing the flavour, the type of template, and the template data itself, each field separated by whitespace. We therefore parse each line of the data section into three whitespace-separated fields, and then assign the values to the private variables $ct, $comp, and $txt respectively.

In the code thus far we have seen pattern matching done in a scalar context; in that context a pattern match will return the number of matches found, or zero if no match exists. However here the pattern match is being done in a list context because of the assignment to ($ct, $comp, $txt). (Recall that this is comparable to an assignment of the form @a = ... where @a is an array variable.)

When done in a list context a pattern match will return an array ($1, $2, ...) containing the parts of the string that were matched. Hence in this context $ct will be assigned the value of $1, $comp will be assigned the value of $2, and $txt will be assigned the value of $3.

To expand a bit on the regular expression: \s matches a whitespace character (space, tab, etc.) and \S matches a non-whitespace character. The first field gets matched by ^(\S+), the second field gets matched by (\S+), and the third field (which can contain spaces) gets matched by (.*)$; the field patterns are then separated by the \s pattern.

[Note: The regular expression looks for a single whitespace character between the fields. On each line in the data section there is in fact only a single space between the flavour specifier and the template type specifier, on each line, so this works out OK. However on some lines there is more than one space between the template type specifier and the template content. This does not cause any problem in practice, since the pattern for the third field can match spaces; the extra spaces are simply included as leading whitespace in the value matched for the third field and then assigned to $txt.]

Note 60

For people learning Perl: Multiline mode in regular expressions

We modify $txt to change literal occurrences of '\n' (i.e., the '\' character followed by the character 'n') to occurrences of the newline character.

'\\' in the pattern being searched for matches for a literal '\', and '\n' in the replacement string is interpreted as a newline character. The g option does a global search and replace as noted above while the m option searches in multiline mode.

Multiline mode treats the string as a multiline buffer, so you can use '^' and '$' to match at the beginning and end of newline terminated substrings within the string as a whole.

[Note: It's not exactly clear why multiline mode is used in this context, particularly since the regular expression doesn't use either '^' or '$'; in testing the substitution seemed to work fine even without the m option.]

Note 61

For people learning Perl: Nested hashes

We store the default flavour template text read from the data section, indexing it by the flavour and type of content.

The usage $a{$b}{$c} is an example of the use of Perl references to simulate multi-dimensional arrays or nested hashes. To expand on this: the syntax $a{$b}{$c} is equivalent to $a{$b}->{$c}, which in turn is equivalent to ${$a{$b}}{$c}. Here %a is a hash, the value of $b is a key for that hash, and the hash value $a{$b} is a reference that points to another hash. (The second hash is anonymous, i.e., it has no name of its own.) To refer to a value in the second hash we use ${$a{$b}}{$c} where the value of $c is a key in the second hash. As noted above we can also use the syntax $a{$b}->{$c} instead, and can in turn shorten that to $a{$b}{$c}.

When we make an assignment like $a{$b}{$c} = "def" Perl automagically creates the anonymous hash and stores a reference to it in $a{$b}. If Perl didn't do this then you'd have to go through the following machinations to make the same assignment (assuming that the hash %a already existed):

  %h = ();                # Create an empty hash %h
  $h{$c} = "def";         # Store value "def" in %h at key $c
  $a{$b} = \%h;           # Store reference to %h in hash %a at key $b

In this example the value could then be referenced as either $h{$c} or ${$a{$b}}{$c}. Per the online Perl documentation, "Anywhere you'd put an identifier ... as part of a variable ... name, you can replace the identifier with a simple scalar variable containing a reference of the correct type". So we are replacing the identifier "h" in $h{$c} with the scalar variable $a{$b} that contains a hash reference. We could actually use the syntax $$a{$b}{$c} for this but we use the extra pair of curly braces to clarify what's going on. ${$a{$b}}{$c} then becomes $a{$b}{$c} through the alternative syntax discussed above.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlreftut.html
  http://www.perldoc.com/perl5.8.4/pod/perlref.html#Using-References

Note 62

For people learning Perl: The `opendir` function

If there's a plugin directory defined we open it and look for plugins, using the file handle PLUGINS; we use the opendir function instead of open because we are opening a directory, not a regular file.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/opendir.html

Note 63

For people learning Perl: The `readdir`, `grep`, and `-f` functions and the `foreach` loop

Working backwards from the end of the statement: We use the readdir function to return a list of all the entries in the plugin directory, and then use the sort function to sort those entries in the default (alphabetical) order. (readdir returns all directory entries because it's being executed in a list context, since sort expects a list argument; otherwise readdir would return one directory entry at a time.)

We then use the grep function to test each of the sorted directory entries against the specified expression (in curly braces) and return a list consisting of only those entries for which the expression is true. In this case the expression for grep is a compound expression consisting of a regular expression and a file test function anded together.

We first test using /^\w+$/ to make sure that the directory entry starts with and contains only alphanumeric characters or '_'; this eliminates directory entries for . (the current directory), .. (the parent directory), and hidden files (e.g., .a). (Note that we don't use the =~ operator here because we are matching against the special variable $_ that grep sets in turn to hold the value of each element of the list passed to it.)

We then test using the file test function -f "$plugin_dir/$_" to verify that the directory entry actually is a file and not something else; this eliminates directory entries for the plugin state directory and other subdirectories that might be present, as well as directory entries for special files like device files, named pipes, and the like. (Again we reference the special $_ variable set by grep.)

[Note: Symbolic links do pass the -f test (at least on Unix and Unix-like systems) if (and only if) they point to regular files. Unless other considerations apply, this should allow you to put a plugin file in another directory and put a symlink in the plugin directory itself.]

Finally, we use a foreach loop to iterate over each element in the list of plugins, assigning the value of each element to the variable $plugin in turn and executing the statements in the following code block.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/func/readdir.html
  http://www.perldoc.com/perl5.8.4/pod/func/grep.html
  http://www.perldoc.com/perl5.8.4/pod/func/-X.html
  http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Foreach-Loops
  http://www.perldoc.com/perl5.8.4/pod/perlvar.html

Note 64

For people learning Perl: Parsing plugin names

Recall that plugins can have a (normally two-digit) number at the beginning of their names (to enforce a particular plugin order) and can also have an underscore character ('_') at the end of their names to disable them from being used.

Here we use a regular expression to match and save the actual plugin name and look for a concluding '_' if present. (We no longer need the numeric prefix since we are now processing the plugins in the proper sort order.) Note that the regular expression as written allows underscores to be used as part of the plugin name itself; only an underscore at the end is special.

The plugin name and the (optional) trailing underscore are saved in the special variables $1 and $2 and then assigned to the private variables $plugin_name and $off respectively. (See the note to line 142 for more information on pattern matching in a list context.)

Note 65

For people learning Perl: Determining if a plugin is disabled

If the final underscore is present ($off has the value '_') we set $on_off to -1 to indicate that the plugin is disabled; otherwise $on_off is set to 1 to indicate an active plugin.

Note 66

For people learning Perl: The `require` function

We include the code for the current plugin. (This is somewhat analogous to #include in C.) Note that since we are supplying a pathname the require function will look for the plugin at the pathname (instead of looking in the directories specified by @INC, the Perl search path analogous to LD_LIBRARY_PATH and similar environment variables in Unix.)

[Note: The Perl online documentation for require mentions only searching in @INC directories for a filename, and does not explicitly address using a full pathname. This is presumably just an oversight.]

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/require.html

Note 67

For people learning Perl: Calling a plugin's start routine

Now that the code for this plugin has been loaded we can call subroutines defined in the plugin. We first call the plugin's start routine, using the plugin's name in a method invocation (see below). Assuming that the start routine exists and returns a true value, we then use the plugin's name as a key to put the plugin's $on_off value into the %plugins hash. Finally, we create a new element in the @plugins array and set its value to the plugin name. (Recall that %plugins and @plugins are entirely different variables that just happen to share the same name.)

Note that we set $on_off to the value -1 for off instead of 0 because otherwise the middle expression (between the two and's) would have evaluated false, and we would never have executed the third expression to set @plugins.

Start routine invocation

For those wanting a more in-depth explanation, calling the start routine works as follows:

A plugin "abc" has to define a package abc, as noted in the Blosxom plugin developer documentation. So as a result of the "abc" plugin being loaded (by require) we can now refer to subroutines and variables defined by the package. (Strictly speaking we can't refer to everything defined by the package, but let's ignore that for now.) For example, if a scalar variable $foo is defined by plugin "abc" (i.e., package abc) then we could refer to it as $abc::foo to obtain its value. Similarly we could call the start subroutine in package abc using the notation abc::start().

However we have a problem: the Blosxom code doesn't know beforehand that there's going to be a plugin "abc" (or "foo", or whatever), so the Blosxom code can't use abc::start() to invoke package abc's start subroutine. The solution is to use a different way to call a routine defined in a plugin: Blosxom invokes abc::start as a method rather than calling it as a subroutine.

Methods are a concept from object-oriented (OO) programming, in which (in theory) everything of interest is an "object", objects can belong to "classes", classes can have "methods" that operate on objects of that class, classes can be "subclasses" of higher-level classes, and so on.

For Blosxom (at least Blosxom 2.0) we don't need to worry about the full OO story, we simply need to know that in Perl terms an object is just a reference, a class is simply a package and a method is a subroutine defined by a package. So in our example rather than using abc::start() to call the start subroutine in package abc, we can use the method invocation notation abc->start() instead. (Method invocation doesn't work exactly like subroutine calling, particularly in terms of which arguments are passed, but we can ignore that for now.)

However we still have the problem of Blosxom not knowing about package abc beforehand, so using abc->start() won't work either. Fortunately in method invocation instead of a package identifier to the left of the -> we can substitute a scalar variable whose value is a string representing a valid package name. In particular, rather than using abc->start() to invoke the start subroutine (using the package identifier abc), we can set a scalar variable $foo to the value "abc", and then use $foo->start() to invoke the subroutine. (We're using $foo as an example; Blosxom actually uses the variable $plugin_name previously assigned.)

For more information see the following URLs:

  http://www.blosxom.com/documentation/developers/plugins.html
  http://www.perldoc.com/perl5.8.4/pod/perlobj.html#Method-Invocation

Method invocation vs. symbolic references

[Note: (This is for people like me who get led astray reading Perl documentation.) The usage $foo->start() looks similar to the use of -> with Perl references as previously discussed, and it's tempting to think of $foo in this context as a kind of reference, in particular a symbolic reference, a Perl concept where a scalar variable containing the name of a variable or subroutine can get interpreted as a (real) reference to that (second) variable or subroutine.

However as far I can tell there is no connection between symbolic references and use of a scalar variable to specify the package (class) name in method invocation. This is supported by the fact that Blosxom does a use strict, which flags use of symbolic references as an error; however this doesn't affect the use of scalar variables in method invocation.]

Note 68

For people learning Perl: The `closedir` function

Having cycled through all the plugins, we now close the PLUGINS file handle we used to open the plugins directory.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/closedir.html

Note 69

Overriding the default template subroutine

We loop through the @plugins array, which now contains a list of plugin names for both active and disable plugins. For each plugin we look up its name in the %plugin hash and determine whether the plugin is enabled (1) or disabled (-1). If a plugin is enabled we then use the plugin name to invoke the can method to see if a template subroutine is defined by the plugin's package.

If so then we invoke the plugin's template subroutine, which returns a reference to a new (anonymous) subroutine to handle templates; we save the reference to that subroutine in the $template variable (overriding the value set earlier, representing the default template subroutine), and then we exit the loop (and don't bother to look at the other plugins).

Note 70

For people learning Perl: The `can` method

Earlier we saw a method invocation used to call a plugin's start subroutine, using the expression $plugin_name->start() where the value of the scalar variable $plugin_name was a string with the plugin's name (which is the same name as its package). The expression $plugin->can('start') looks similar, except for the addition of an argument to be passed to the method.

However plugins don't actually define a can method; where then does it come from? Here we see more of the object-oriented features of Perl: When doing method invocation (but not when doing a standard subroutine call) Perl will look for a method not only in that package/class (recall that they are the same in Perl), but also in higher-level classes from which the class in question inherits methods.

In particular, Perl has a package UNIVERSAL from which all packages inherit the can method. The expression abc->can('foo') will invoke the can method and check to see if the package abc has the foo method defined; if so, it returns a reference to the method, or an undefined value if no such method exists. The Blosxom code uses a similar expression but using a scalar variable holding the package name instead of the package identifier (which it can't know a priori).

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlobj.html#Default-UNIVERSAL-methods

Note 71

For people learning Perl: Inside the `can` method

By knowing a little bit about how Perl represents subroutines internally we can get a general idea of how the can method works. Internally Perl identifiers for variables, subroutines, etc., are stored in a special hash known as a "symbol table". Every package has its own symbol table, among other things to support the Perl feature that different packages can have different variables that happen to have the same names. A package's symbol table has entries for variables and subroutines defined in that package (except for lexically-scoped items, which we ignore here).

So if package abc (corresponding to the "abc" plugin) has defined a template subroutine/method, then in the symbol table for package abc (which can be accessed from Perl as the hash variable %abc::) there will be a hash element with key 'template' that will have as its value a special data object called a "typeglob" (the typeglob value is accessible from Perl as $abc::{'template'} or *abc::template); that typeglob in turn can be used to find a reference to the template subroutine (accessible as $abc::template{CODE} using a hash-like notation).

Given a package and a string with the name of the desired method, the can method looks in the package's symbol table to find an entry for that name, and then looks at the typeglob to see if there's actually a subroutine defined with that name. (After all, the package might have a scalar variable, hash, or array with the same name as the subroutine.) The can method then returns the subroutine reference obtained from the typeglob, or an undefined value if no such reference was found.

One question remains: How does the can method know the package for which it's searching for a method? Because when the can method is invoked Perl passes it an extra argument containing the name of the package/class on which the can method was originally invoked (the abc package in our example).

Such an extra argument is passed as the first argument to any subroutine invoked as a method (although in some types of method invocation the first argument is a reference and not a class/package name). The presence of this additional argument is another way in which method invocation is different than a subroutine call.

If you happen to read code for plugins, this is why some subroutines have an argument $pkg (or whatever) that's not shown in the Blosxom code invoking that subroutine. The $pkg argument is present only for plugin subroutines that take arguments in the first place, since in that case the subroutine has to skip over the $pkg argument before getting to the "real" arguments. Plugin subroutines that don't take arguments (like the start and template subroutines) don't worry about this; they just ignore any arguments passed, including the package name argument.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlmod.html#Symbol-Tables
  http://www.perldoc.com/perl5.8.4/pod/perlref.html

Note 72

The `load_template` subroutine

[Note: Since I've never seen code for Blosxom versions earlier than 2.0 I'm just going to ignore this code and not worry about it. It doesn't seem relevant for current 2.0-based plugins.]

Note 73

Default entries subroutine

We define a default subroutine to find entries, just as we previously defined a default subroutine to handle flavour templates. We define this as an anonymous subroutine and then store a reference to that subroutine in the variable $entries. A plugin can then have a subroutine entries (not to be confused with $entries) that defines a new anonymous subroutine and returns its reference as a replacement for the reference in $entries.

Note 74

The return values from the entries subroutine

The entries subroutine returns a list containing three things: a hash of all the files representing individual entries (%files), (for static rendering only) a hash of all directories needing index pages generated and individual entry files needing static pages generated (%indexes), and a hash of all other files found (%others).

Note that the private lexical variables %files, %indexes, and %others declared here are entirely distinct from the global variables of the same names declared on line 69. In general using my to declare a private variable within a given lexical scope will "hide" any global variables of the same name, as well as private variables of the same name declared at a higher-level lexical scope. (See the notes to lines 333 and 357 for another example of such hiding.)

Note 75

For people learning Perl: The `find` subroutine

The default entries subroutine uses the find subroutine from File:Find to do all the work. The find subroutine is analogous to the Unix find command and takes two arguments, a list of directories in which to search (here just $datadir, the Blosxom data directory) and a reference to a subroutine that will be called by find for each directory entry (e.g., file, subdirectory, symlink, etc.) found in the search. (Here we define that subroutine as an anonymous subroutine, which automatically produces the reference to be passed in.)

To help clarify how find is used, if we wanted to mimic the operation of the simple Unix command

  find /blosxom/data -name 'index.*' -print

(find all items whose filenames start with "index.", and print their pathnames) we could call find as follows:

  find( sub { /^index\..*\z/s && print "$name\n"; }, '/blosxom/data');

Here $name (also known as $File::Find::name) is a variable that find sets to the current pathname being processed.

For more information see the following URL:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/Find.pm

Note 76

For people learning Perl: `$File::Find::dir`

The value of $File::Find:dir is the absolute path of the directory currently being searched. We count '/' characters in the path using tr to obtain the number of directory components in the path.

Note 77

Interpretation of `$depth`

We don't process the entries in a directory if it exceeds a specified search depth limit. Recall that if $depth was originally set to a non-zero value (i.e., to limit the depth of search) then that value was adjusted to account for the number of components in the path to the Blosxom data directory. See the notes for line 94 for more information.

Note 78

Recognizing a Blosxom entry file

As noted above, $File::Find::name contains the absolute pathname of the item we are currently processing. We check to see if the current item appears to be a Blosxom entry: Its filename has the proper extension (.txt by default), it's not an index file or hidden file, and it's readable as a file (e.g., as opposed to being a directory with a name that looks like a blosxom entry). If so, we do further processing on the item as described below to build the %files list. Otherwise we consider adding the item to the %others list, as described in the notes for line 208.

Note 79

For people learning Perl: Regexp matching for entries

The regular expression matching here has some subtleties worth exploring. Items that are entries are going to look like, e.g., /blosxom/data/foo.txt (for an entry in the Blosxom data directory itself) or /blosxom/data/a/b/bar.txt (for an entry in a subdirectory somewhere below the Blosxom data directory). For reasons that will become more clear below, we want to save the basename of the item's filename (e.g., foo or bar respectively in our example) as well as the sequence of subdirectories between the data directory and the filename (e.g., '' and a/b respectively in our example).

With that in mind let's look more closely at the pattern match. First, we use m!...! to delimit the pattern to be matched, as opposed to the usual /.../, because the '/' character is part of the pattern itself and we don't want to have to escape it (i.e., as "\/"). We then match the beginning of the path against the data directory with ^$datadir/; this would match /blosxom/data/ in our example above..

To match the subdirectory components we use the pattern (?:(.*)/)?. The pattern .*/ by itself would match subdirectories up to the final '/' (e.g., in the example item ...a/b/bar.txt above), and in order to save the subdirectory components (minus the trailing '/') we could use the pattern (.*)/. However we also have to account for the possibility that the entry might be in the data directory itself, in which case there wouldn't be any subdirectory names and no second '/' character; we could handle this case using the pattern ((.*)/)? (i.e., match either one or zero occurrences of (.*)/).

However now we're capturing the subdirectory part of the path twice: one without trailing '/' (e.g., a/b) and once with it (e.g., a/b/); to avoid this redundancy we instead use the pattern (?:(.*)/)?. (?:...) is like (...) except that it doesn't capture the matched string; as a result (?:(.*)/)? captures only the string matched by (.*), and puts it into $1.

To match the item's filename we would use a pattern like (.+)\.txt$ if we knew the extension would always be .txt: we look for one or characters, then a literal '.', then the extension at the end of the string, and we capture the basename (i.e., the characters before the '.') for later use. In the case of Blosxom the value of the extension we're looking for is in a variable, so we use the pattern (.+)\.$file_extension$ instead, where the value of $file_extension gets interpolated into the pattern as it would into a double-quoted string.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlre.html#Extended-Patterns

Note 80

Skipping faux entries

We skip over faux entries like index.txt, .foo.txt, and foo.txt where foo.txt can't be read as a file (e.g., it's a directory instead). Note that the last test also implies that Blosxom will silently ignore entry files if the web server userid (e.g., "http") does not have permission to read them (but does have permission to search the directory in which they're located).

[Note: I need more information on the treatment of symlinks by Blosxom. A symlink can pass the -r test if it points to a readable file. Are there any other considerations that come into play here?]

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/-X.html

Note 81

For people learning Perl: Chaining `and` operators

If the current item passes our initial test to see if it might be an entry, we do a series of additional tests and operations, in the form of a series of expressions a and b and c and ... f where each expression is evaluated and we stop if any expression evaluates to a false value. Note that in this case some of the expressions anded together are parenthesized expressions of the form (x or y or ... z).

Note 82

For people learning Perl: File modification times

If we're showing future entries (i.e., $show_future_entries is true) then we proceed to the next test, otherwise the "last modified" time of the current item must be less than the current time. Note that stat(...)->mtime is a method call where the left hand side of the -> operator is an object reference (as opposed to a class/package name), in this case a File::stat object returned by stat(...). Both mtime and time are expressed in seconds since some fixed date ("the epoch") and hence are directly comparable.

For more information see the following URLs:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/stat.pm
  http://www.perldoc.com/perl5.8.4/pod/func/stat.html
  http://www.perldoc.com/perl5.8.4/pod/func/time.html

Note 83

The `%files` hash

The %files hash has the entry's absolute pathname as a key and its modification time as a value. If we didn't have to worry about modification times we could just use an array of entry pathnames but we need to keep a record of the entry modification times, in particular to do sorting of entries and to display the dates for entries.

Note 84

Regenerating index files when static rendering

Here we figure out which index.* files we will need to generate (or regenerate) when we're doing static rendering, passed on the presence of new and/or updated entries.

[Note: It appears that the %indexes hash will be populated even if we are doing dynamic rendering, although it's not clear that %indexes will be used in that case.]

In general we will have two types of index files that need to be generated: index files for directories corresponding to categories (e.g., a/b for an entry foo.txt in that directory) and index files corresponding to dates (e.g., 2004, 2004/05, and 2004/05/22 for an entry foo.txt last modified on May 22, 2004). (Note that the main Blosxom data directory is a special case of a category directory.)

As we determine which index files need to be (re)generated we build up a list (in %indexes) of the directories in which they need to be created. We also use %indexes to build up a list of individual entries for which static pages need to be generated.

Note 85

The `-all` parameter for static rendering

If the -all parameter was passed in with value 1 (i.e., -all=1 on the command line) then we (re)generate all index.* files.

For more information see the following URL:

  http://www.blosxom.com/documentation/users/configure/static.html

Note 86

Creating new index files

If there is no index file of the default flavour (e.g., index.html) for the directory in which the entry is located ($static_dir/$1) then we generate a new one. Recall that @static_flavours is the list of flavours to be generated statically; $static_flavours[0] is 'html' by default.

(Note that we actually end up generating index files for all the flavours in @static_flavours, not just the first flavour. It's just more convenient to check for only one flavour, assuming that if its index file needs to be generated then the index files for the other static flavours do too.)

Note 87

Updating old index files for new or updated entries

If the default index file (e.g., index.html) is older than the entry being processed then we update the index.* files in the entry's directory.

(Again, we're checking the index file for one flavour and extrapolating the results for the other static flavours.)

Note 88

The `%indexes` hash and category directories

The %indexes hash uses the directory pathname relative to the Blosxom data directory (e.g., a/b) as a key. This relative pathname can also be thought of as a relative URL, with the base URL being the URL that resolves to the Blosxom script.

For an %indexes element corresponding to a category directory (e.g., a/b) we set the value of the element (e.g., $indexes{'a/b'}) to 1. (See the note for line 203 for the value of %indexes entries for date directories, e.g., 2004/05/22.)

Note 89

For people learning Perl: Date directories and array slices

If an entry was created on a certain date (e.g., May 22, 2004) then we need to create index files in a subdirectory corresponding to that date (e.g., 2004/05/22/index.html) so that date-based Blosxom URLs will work properly.

Note that the nice_date subroutine (defined below) takes a time in seconds since the epoch (here the entry's "last modified" time as stored in %files) and returns a list containing the various parts of the date/time broken out.

Here we need only the year, month number, and day, so rather than using the entire list returned by nice_date we just use the elements we need, using Perl slice notation: @a[5,2,3] means a list consisting of $a[5], $a[2], and $a[3], where here @a is replaced by (nice_date(...)). (The parentheses around nice_date(...) are needed for proper Perl syntax.)

We then take the slice, e.g., ("2004", "05", "22"), and join the elements with '/' to get the relative path we need, e.g., 2004/05/22.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html#Slices

Note 90

The `%indexes` hash and date directories

We add the date subdirectory (e.g., 2004/05/22) to the %indexes hash.

For an %indexes element corresponding to a date directory we set the value of the element (e.g., $indexes{'2004/05/22'}) to the relative pathname of the date directory itself (e.g., 2004/05/22, same as the key). (Recall from the notes for line 200 that for %indexes elements corresponding to category directories we set the values of the elements to 1.)

Note 91

The `%indexes` hash and static entry pages

If we are generating static entry pages then we also add the entry's relative pathname (e.g., a/b/foo.txt) to %indexes. (We can't just use $File::Find:name as the key here, as we did in %files, because that's an absolute pathname that includes the Blosxom data directory.)

We use the conditional expression ($1 ? "$1/" : "") because we have to handle specially the case when the entry is in the Blosxom data directory itself and not in a subdirectory somewhere underneath it; in that case the subdirectory part of the entry's pathname (the middle part stored in $1) will be empty, and we don't want to add an extra '/' we don't need.

For a %indexes element corresponding to an individual entry we set the value of the element (e.g., $indexes{'a/b/foo.txt'}) to 1, the same as for %indexes elements for category directories.

Note 92

The `%others` hash and non-entry files

As noted above, we come to the else block when the item being processed does not appear to be a Blosxom entry file (e.g., it might be an existing file like foo.html).

If the item is not a directory and it's readable then we add it to the %others hash, using its absolute pathname as the key and its "last modified" time as the value. (This is the same way the %files hash is structured.)

Note 93

Completing the arguments to `find`

We've finally come to the end of the first argument to find, the anonymous subroutine to process items, and we include $datadir as the second argument, the directory at which we wish to start searching for items.

Note 94

For people learning Perl: Returning reference values

At the end of the default entries subroutine we return a list of references to the %files, %indexes, and %others hashes.

Recall that %files, %indexes, and %others were defined as private variables of this anonymous subroutine using my. This makes them so-called "lexical" variables whose scope is limited to the subroutine, i.e., they would not normally be visible outside this subroutine. However by passing back references we make it possible for other parts of the Blosxom code to use the values of the %file, %indexes, and %others variables, and we ensure that the values of the variables stick around as long as we need to access them. As the Perl online documentation puts it, "So long as something else references a lexical, that lexical won't be freed... This means that you can pass back or save away references to lexical variables".

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlsub.html#Persistent-Private-Variables

Note 95

For people learning Perl: Overriding the default entries subroutine

This is exactly the same approach we used earlier to allow plugins to override the default template subroutine. See the notes for line 161 for more information concerning how this code works.

Note 96

For people learning Perl: Calling the entries subroutine

We invoke the entries subroutine to search for entries and build the %files, %indexes, and %others hashes. Because $entries is a reference to an anonymous subroutine (either the one we defined above or one defined by a plugin to override the default) we use & to dereference the reference and actually call the subroutine.

Also recall that the entries subroutine returns a list of references to hashes, not the hashes themselves. That's why we assign the list of returned values into scalar variables, e.g., $files will now have as its value a reference to the %files hash created in the subroutine.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlref.html#Using-References

Note 97

For people learning Perl: Using references to create hash copies

We dereference the returned references in $files, $indexes, and $others to set our own hash variables %files, %indexes, and %others.

Note that despite having the same names these are separate and distinct variables from the %files, %indexes, and %others variables defined in the default entries subroutine: the %files, etc., variables in the entries subroutine are private lexical variables while the %files, etc., variables here are global variables for the Blosxom code (originally defined above at line 69 with use vars).

Also note that the assignments %files = %$files, etc., actually create copies of the original hashes constructed by the entries subroutine, just like an assignment %files = %h where %h is some existing hash.

Finally, the code used to assign %others reflects the fact that (as shown in the example in the Blosxom plugin developer's documentation) a plugin might not actually build a %others list and return a reference to it; in that case $others would be undefined or empty. We therefore use the ref function to verify that $others is a proper reference before we attempt to deference it, otherwise we assign %others to be an empty hash.

For more information see the following URLs:

  http://www.blosxom.com/documentation/developers/plugins.html
  http://www.perldoc.com/perl5.8.4/pod/func/ref.html

Note 98

For people learning Perl: Calling plugin filter subroutines

Having constructed the lists of entries and other files, we allow plugins to modify that list themselves by defining a filter subroutine. The code here is similar to the code used for the template and entry subroutines: we iterate over all plugins, check to see that the plugin is enabled and it defines a filter method, and if so then we invoke the method, passing references to the %files and %others hashes. The major difference here is that we do not terminate the loop early as soon as we find a plugin with a filter subroutine; instead we call the filter subroutine for each and every plugin that defines one.

Note that although the code passes references to both %files and %others the Blosxom developer documentation mentions passing only the %files reference. (One more reason to read the actual code :-) Since the filter subroutine is passed references to the hashes it can modify them directly by deleting, modifying, or even adding hash elements.

Finally, note that although the return value from the filter subroutine is assigned into the $entries variable used earlier to hold a reference to the entries subroutine, the return value is simply a 0 or 1 return code indicating whether the filter subroutine for a given plugin succeeded or failed. Since we already called the entries subroutine and won't do so again, we no longer need the original value of $entries; from this point on in the code $entries is used simply to save return values from plugin subroutines.

[Note: Arguably it's bad coding style to re-use $entries in this potentially confusing way. As it happens it doesn't appear to be necessary to use a variable for this purpose anyway, since $entries is assigned to but never referenced -- why not just use code like

  foreach my $plugin ( @plugins ) { ... and $plugin->filter(...) }

where we just test the return value directly and don't save it?]

For more information see the following URL:

  http://www.blosxom.com/documentation/developers/plugins.html

Note 99

Deciding whether to generate static or dynamic pages

We check to see if we are generating static pages or dynamic pages, and execute the appropriate code.

[Note: The conditional expression below exactly duplicates the expression used at line 99 above to set the variable $static_or_dynamic; why not just use the code

  if ($static_or_dynamic eq 'static') {

instead?]

Note 100

Generating static pages: The `-quiet` option

We print a status message (on stdout) unless the option -quiet was passed on the command line.

Note 101

Generating static pages: The `%done` hash

The %done hash is used to keep track of whether we've done static page generation for a particular directory; see the notes for line 240 below.

Note 102

Generating static pages: Iterating over `%indexes`

We iterate over all index-related items stored in the %indexes hash.

Recall that if we are generating static pages then %indexes will contain three types of items, all expressed as relative pathnames (relative to the Blosxom data directory): category directories for which index.* pages need to be generated (e.g., a/b), date directories that need to be created with index.* pages to support date-based URLs (e.g., 2004/05/22), and individual entries for which static pages need to be generated (e.g., a/b/foo.txt).

For each item we will need to create not only static pages for those items, but also the directories needed to contain those static pages, the higher-level directories containing those directories (e.g., subdirectory a under the data directory for a category directory a/b, or directories 2004 and 2004/05 for a date directory 2004/05/22), and index pages for those higher-level directories.

Note 103

Generating static pages: Keeping track of parent directories

As noted above we have to worry not only about static pages corresponding directly to each %indexes key (e.g., the index page a/b/index.html where $path is the %indexes item a/b), but also static pages for any higher-level directories (e.g., the index page a/index.html for directory a as well as the index page index.html for the data directory itself, the parent directory of a).

The variable $p is used to iterate over all directory components in $path and make sure that the necessary directories are created and index pages generated. We start off at $p = '', representing the Blosxom data directory itself.

Note 104

Generating static pages: Iterating over `$path` components

We iterate over each component of the relative pathname stored in $path, in order to create higher-level directories and their corresponding static pages where appropriate. We include the empty string '' as the first element of the foreach list in order to handle index files at the level of the Blosxom data directory.

Thus, for example, if $path is a/b then we will iterate over '', a, and b. If $path is 2004/05/22 then we will iterate over '', 2004, 05, and 22.

Note 105

Generating static pages: Building the relative pathname `$p`

We add the current subdirectory component to the relative pathname being built up. Since $p is initially the empty string '' and the first element of the foreach loop is '' as well, $p will be set to '/' the first time through the loop, and we'll then need to remove the leading '/'. On subsequent iterations $p will end up being set to, e.g., 2004, 2004/05, etc., assuming a value for $path of 2004/05/22.

Note 106

Generating static pages: Make relative path available to plugins

We save the current relative pathname (in $p) as $path_info so that plugins will have access to it (as a global variable in the blosxom package).

Note 107

Generating static pages: Relative path already processed?

We keep track of whether we have seen this relative path before. If not (i.e., if $done{$p} is false) then we increment $done{$p} by 1 and proceed to process it. Otherwise we skip to the next item in the foreach loop.

Note 108

For people learning Perl: `$a++` vs. `++$a`

Note that the check here works because ++ is used as a suffix operator, and hence $done{$p} is incremented after its value is checked. Also, if $done{$p} is undefined (which would be the case initially, since %done is not otherwise initialized) then its value will be converted to zero prior to incrementing it.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Auto-increment-and-Auto-decrement

Note 109

Generating static pages: Creating directories as needed

We check to see if there is already an existing directory corresponding to the path we're working on, or if the path represents an individual entry. Otherwise the path represents a directory that needs to be created, and we use the mkdir function to create the directory. (We attempt to set the directory's access permissions to "rwxr-xr-x" so that anyone can look up files in the directory, but this may be made more restrictive by the umask setting of the user executing blosxom.cgi in static mode. Note that at a minimum the userid associated with the web server, e.g., "httpd", needs "r" access to the static pages and "rx" access to the directories containing them.)

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/func/-X.html
  http://www.perldoc.com/perl5.8.4/pod/func/mkdir.html

Note 110

Generating static pages: Creating pages for all needed flavours

We iterate over all the flavours for which we need to create static pages.

Note 111

Generating static pages: Determining the content type for a flavour

We use the reference stored in $template to call a subroutine to determine what content type we should pass to the generate subroutine. (Recall that $template was set to a reference to an anonymous subroutine returned by a template subroutine, either the default defined in blosxom.cgi or one provided by a plugin to override the default.)

In the case of the default subroutine we look for a file with filename content_type.<i>$flavour</i> (e.g., content_type.html) in the directory specified by $p or in its parent directories (up to and including the Blosxom data directory) and, if found, use the value of content_type that it defines. Otherwise we use the default value of content_type found in the %templates hash; for example, for the 'html' flavour we would use a content type of 'text/html'.

We look for a newline in the content_type value and delete it and anything after it. This might be the case if the content type were defined in a file; we only need the first line of the file (prior to the first newline) and can ignore the rest.

Note 112

Generating static pages: Relative pathname for the page to be created

We determine the relative pathname for the static page we need to create, up to but not including the extension. If the path $p represents an individual entry (e.g., a/b/foo.txt) then $fn will be, e.g., a/b/foo; otherwise $p represents a directory in which index files need to be created and $fn will be, e.g., a/b/index.

[Note: Unlike $content_type (which depends on the specific flavour for which we need to create a static page), the value of $fn could have been determined before entering the static flavour foreach loop, since it will be the same no matter what the flavour happens to be.]

Note 113

For people learning Perl: Opening files for writing

We attempt to create (or rewrite, i.e., open and truncate) the static page for this favour.

For more information see the following URLs:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
  http://www.perldoc.com/perl5.8.4/pod/func/open.html

Note 114

Generating static pages: Beginning output for a page

$output is the global variable used by the generate subroutine to build up the data for the page.

Note 115

Generating static pages: Generating a page

We call the generate subroutine to generate the data for the static page and then write it to the just-opened file.

If the current %indexes element has the value 1 then it corresponds to a category directory or individual entry, and we pass $p to the generate subroutine as its $currentdir argument and the empty string '' as the $date argument. Otherwise the %indexes element represents a date-related index page and we pass $p as the date and the empty string '' as the $currentdir argument.

See the notes for lines 266 (generating a dynamic page) and 273 and 274 (the generate subroutine) for more information about the arguments passed.

Note 116

For people learning Perl: Referencing the `generate` subroutine

Note: In this expression we use

  &generate(...)

instead of

  generate(...)

as one might expect. According to the online Perl documentation the initial '&' is typically optional and may be omitted. Using '&' does disable checking of prototypes, but the generate subroutine doesn't use prototypes. Is there some other reason for using '&' here?

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlsub.html

Note 117

Generating static pages: Finished with a page

We have finished writing this static page; close the FileHandle and go on to the next static flavour.

Note 118

Generating a dynamic page

The code for a dynamic page is much simpler than that for static pages; all it has to do is to create the content type header and call the generate subroutine to create the content to be returned in the HTTP response.

Note 119

Generating a dynamic page: Content type

See the notes for line 243 in the code for static pages for more information about $content_type.

Note 120

Generating a dynamic page: HTTP headers

We store the content type in an anonymous hash referenced by $header, under the key '-type'.

Note that this variable will later be passed to the CGI::header function as its single argument. (See the notes to line 409.) By using a hash to store the arguments we allow Blosxom plugins to cause additional HTTP headers to be output (i.e., other than the Content-type header) by adding additional key/value pairs to the hash referenced by $blosxom::header. For example, this is done by the cookies, lastmodified, and xhtml plugins.

For more information see the following URL:

  http://www.blosxom.com/plugins/headers/index.html

Note 121

For people learning Perl: Arguments to the `CGI::header` function

The CGI::header function can be called in three different ways. First, it can be called with a single argument that is the content type in the form of a string, e.g.,

  header('text->html')

It can also be called with multiple named arguments, of which the content type might be only one. When using multiple arguments there are two possible styles of argument passing, e.g.,

  header(-type => 'text/html', -expires => '+3d')

  header({-type => "text/html", -expires => "+3d"})

The former style is basically passing an array of arguments and is equivalent to, e.g.,

  header('-type', 'text/html', '-expires', '+3d')

while the latter is passing a reference to an anonymous hash.

The Blosxom code assumes the third style of argument passing shown above, using the variable $header to store a reference to an anonymous hash (as discussed in the previous note).

Note that older versions of the CGI module do not support all styles of argument passing. In particular, versions prior to 2.0 do not support passing multiple arguments to the CGI::header function, and versions prior to 2.37b7 do not support putting curly braces around the argument list (i.e., passing the argument list as a hash reference).

If your hosting service does not support a recent version of the CGI module then you may need to patch Blosxom to fix the way the content type is handled. The simplest patch is to revert to the original style of passing a single string argument to CGI::header:

  $header = $content_type;

Note that if you use this patch then you will not be able to use Blosxom plugins that add their own HTTP headers. (See the previous note.)

For more information see the following URLs:

  http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#CALLING_CGI.PM_ROUTINES
  http://stein.cshl.org/WWW/software/CGI/#named_param

Note 122

Generating a dynamic page: Page generation

We call the generate subroutine to do the actual work of generating the page, passing the following arguments:

'dynamic': Generate a dynamic page.
$path_info: A relative path containing the category and individual entry information we found in the originally-requested URL. Note that $path_info will be empty if the URL contained date references only or the URL was requesting a top-level index, i.e., at the level of the Blosxom data directory. See the notes for lines 102 through 121 for more information.
"$path_info_yr/$path_info_mo_num/$path_info_da": We pass a string containing whatever date information we found in the originally requested URL. Note that the date string passed may be partial (e.g., '2004//' or '2004/05/') or "empty" ('//') if the original URL didn't reference a date. See the notes for lines 124 and 125 for more information.
$flavour: The flavour of page requested; see the notes for lines 110 through 118 for more information.
$content_type: The content type (determined by the flavour); see the notes for lines 261 and 262 above.

Note 123

Calling plugin end subroutines

We call the end subroutine for each plugin (if it defined one). The approach here is identical to that used for calling the filter subroutine and other plugin subroutines that are called for each plugin; see the notes for line 225 for an explanation of the code.

Note that the end subroutine is not passed any arguments; however it could use information previously stored in global variables in the plugin's package.

Once the end subroutines have been called we are done with processing.

Note 124

Blosxom subroutines

This marks the end of the main Blosxom code section. The remainder of the code defines two subroutines:

generate: does the main work of page generation
nice_date: parses Perl date/time values for easier processing

Note 125

The `generate` subroutine

The generate subroutine does the actual work of generating a page, and returns a (multiline) string containing the generated output for the page.

Note 126

Arguments to the `generate` subroutine

The generate subroutine takes the following arguments:

$static_or_dynamic has the value 'static' if we are generating a static page and 'dynamic' if we are generating a dynamic page.
$currentdir contains category and individual entry entry information, in the form of a relative path, e.g., a/b or a/b/foo.html. Note that $currentdir will be empty if the request being processed or the static page being generated is for the index page at the top level (i.e., the Blosxom data directory) or is for an index of all entries for a particular date.
$date contains date information, in the form of a string 'yyyy/mm/dd'. Note that $date may be "empty" ('//') if no date information is associated with the request or the static page being generated; $date may also have a partial value (e.g., '2004//' or '2004/05//').
$flavour is the flavour for which a page is being generated, e.g., 'html' or 'rss'.
$content_type is the MIME type for the page, e.g., 'text/html' for an HTML page or 'text/xml' for an RSS page.

Note 127

For people learning Perl: Global variables in the `generate` subroutine

Besides its arguments, the generate subroutine also has access to the global variables %files, etc. We make a local copy of the %files hash containing information about all the entries we might need to include on this page.

Note 128

Calling the plugin skip subroutines

We loop through all enabled plugins to see if any of them define a skip subroutine; if so we invoke it, set the returned value to $skip, and if the returned value is true end the loop. Otherwise $skip will end up set to false (i.e., we are not skipping story generation).

Note 129

For people learning Perl: Calling the plugin skip subroutines

For more information on how this code works see the earlier notes for lines 161 and 225 discussing the template and filter subroutines.

Note 130

The default interpolate subroutine

The default interpolate subroutine is used to replace occurrences of Blosxom variables with the values of those variables. (For example, in the head section it would replace occurrences of the string '$blog_title' with the value of the $blog_title variable.) It is called with one argument, the template string in which interpolation is to be done.

The interpolate subroutine is defined as an anonymous subroutine whose reference is stored in $interpolate. This is the same approach used with the template subroutine, etc.

[Note: The default interpolate subroutine is newly defined each time we call the generate subroutine. This is presumably necessary because overriding the default interpolate subroutine (unlike overriding the template or entries subroutines) is not necessarily a one-time decision: different plugins might choose to define their own interpolate subroutine at different times for different reasons.]

Note 131

For people learning Perl: Variables used for interpolation

We (re)specify the blosxom package here to ensure that we are using the blosxom package namespace when we execute this subroutine. This is necessary for us to properly interpolate Blosxom global variables when we call this subroutine from a plugin.

[Note: I need to double-check that this is the actual reason. Also, I presume that for a variable to be interpolated it must be a global variable in the blosxom package, i.e., no lexical variables can be interpolated even if they are of file scope.]

Note 132

How the interpolate subroutine works

$template (the first and only argument to the interpolate subroutine) is the template contents to be processed.

We look for substrings in $template that appear to be Perl identifiers, either unqualified identifiers like $foo or package-qualified identifiers like $abc::foo (but not, for example, variable references like ${foo} or $abc::def::foo). For any strings we find that are actual defined Perl variables we replace the string with the result of evaluating the variable, otherwise we replace it with the empty string ('').

Note 133

For people learning Perl: Recognizing and evauating a Perl scalar variable

The regular expression here matches a literal '$' followed by one or more "word" characters (alphanumeric or underscore) followed by an optional '::' if the identifier is package-qualified, followed by zero or more word characters (for any identifier after the '::'). We use (?:::)? instead of (::)? because we don't need to or want to capture the '::' string for later reference.

Note that the regular expression used will not match package-qualified identifiers of the form $abc::def::foo. This is not a problem in practice because neither the blosxom package nor the plugin packages use package names of the form abc::def. The regular expression also will not match variables of the form ${foo}. Again, this should not be a problem in practice because the braces are normally used to disambiguate variable references in a context where they would be ambiguous, e.g., "$foo's blog" where the single quote could be interpreted as an old-style Perl package qualifier; however in our case we are looking at the variable in isolation, without any context, and that eliminates (or at least reduces) any possibly ambiguities.

(The only problem would be if we wanted to put alphanumeric characters immediately after the variable reference, e.g., if the template contents contained a string like "$blog_titlexxx" then we would try to interpolate the variable $blog_titlexxx (and fail) instead of concatenating the value of $blog_title with three 'x' characters.)

In the replacement string we use the ee option to treat the string as a Perl expression and evaluate it as such at runtime. So, for example, if the pattern matched is $blog_title (i.e., a literal '$' followed by the word 'blog_title') then the replacement string becomes the Perl expression

  if defined $blog_title ? $blog_title : ''

In other words, if evaluating $blog_title produces a defined value (e.g., 'My First Blog'), then use that value to replace the matched pattern ($blog_title), otherwise use the empty string '' as the replacement string. This Perl expression is then evaluated at run-time to produce the desired result.

The g option matches all occurrences of things that look like Perl scalar variables. The net effect is that all apparent Perl scalar variables in the template contents are replaced with the value of the variables in question, or with the empty string if no such variables exist (or they exist but do not have defined values).

[Note: I have two open questions here: First, why is the replacement string enclosed in double-quotes, i.e.,

  $template =~ s/(\$\w+(?:::)?\w*)/"defined $1 ? $1 : ''"/gee;

instead of

  $template =~ s/(\$\w+(?:::)?\w*)/defined $1 ? $1 : ''/gee;

Second, why use the defined function at all? In testing the following code seemed to work identically and without any errors:

  $template =~ s/(\$\w+(?:::)?\w*)/$1/gee;

In what circumstances might an error occur with this simpler code?]

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlre.html
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
  http://www.perldoc.com/perl5.8.4/pod/func/defined.html

Note 134

Skipping page generation

Now begins the bulk of the work, which we skip entirely if some plugin has told us to skip it.

Note 135

For people learning Perl: Overriding the default interpolate subroutine

For more information on how this code works see the previous notes for line 161 concerning overriding the default template subroutine.

Note 136

Obtaining the template for the top of the generated page

We dereference $template to invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) and ask it to return the template used for the top of the page ('head'), for the specific flavour.

(For the 'html' flavour this template would typically include the DOCTYPE, <html> tag, <head> section, and the beginning of the <body> section, including the blog title.)

As noted previously, the default template subroutine will start looking for template files (e.g., head.html for the 'html' flavour) in the directory $currentdir (a relative pathname relative to the Blosxom data directory) and will continue looking in the parent directories of $currentdir, up to and including the Blosxom data directory itself. (If $currentdir is the empty string '' then the search will start and end in the Blosxom data directory.) If no template files are found then the default template subroutine will return the appropriate template previously read from the data section of blosxom.cgi.

Note 137

Calling the plugin head subroutines

We loop over the plugins and call the head subroutine for any plugins that have defined one, passing the subroutine the path for which which we are generating a page, as well as a reference to $head so that the subroutine can modify it in place.

Note 138

For people learning Perl: The package name argument

For more information on how this code works see the notes for line 225 concerning calling the plugin filter subroutines.

Note that since we are actually invoking head as a method the head subroutine will be passed the package name as its added first argument. The subroutine can simply ignore this.

Note 139

Interpolate Perl variables in the head section

Now that all plugins have had a chance to modify the contents of $head, interpolate any variable references in the contents of $head and then append the contents of$head to the output being generated for the page.

Note 140

Keeping track of the number of entries per page

Recall that $num_entries is the default number of entries to show on a page.

Note 141

Checking for individual entry pages

We check to see if $currentdir actually refers to an individual entry as opposed to an index page.

Note 142

For people learning Perl: Identifying an individual entry reference

In the regular expression we attempt to look for patterns of the form .../foo.bar where the final component of the path has a file extension of some sort. If we find such a match then $1 will be everything up to (but not including) the final '/', $2 will be the basename of the final component, and $3 will be the file extension. If $2 is not 'index' then we assume this is a reference to an individual entry (e.g., a/b/foo.html).

Note 143

Handling individual entries, part 1

For an individual entry we reassign $currentdir to have the standard Blosxom file extension instead of whatever file extension it happened to have had. This has the effect of converting, e.g., a/b/foo.html to a/b/foo.txt.

This is necessary because we need to find the actual entry file from which to generate the output for whatever flavour was associated with the original request for the entry.

Note 144

Handling individual entries, part 2

If we are generating a page for an individual entry then we do not need to worry about all the other entries. Thus we modify %f (our local copy of %files) to be a new hash with a single element; we take the key and value for the sole element of %f from the element in %files for this entry.

(This is assuming that we did in fact find an entry file corresponding to the entry being requested, as evidenced by there being an associated element in %files. What happens if we have a request for an individual entry, e.g., a/b/foo.html and there is no entry file a/b/foo.txt? In that case %f will retain its prior value as a copy of %files. We will then proceed to iterate over the elements of %f in the foreach loop from lines 332 to 392, but the test on line 338 will always fail, i.e., no element will ever be matched. The result will be a page with a 'head' and 'foot' section but no content in between.)

Note 145

Handling index pages

If $currentdir does not correspond to an individual entry then we remove the final path component if it is of the form index.foo. It should now have the form of a pure directory reference, e.g., a/b/index.html gets changed to a/b/ and 2004/05/index.rss gets changed to 2004/05/.

Note 146

For people learning Perl: Sorting entries in chronological order

We define an anonymous subroutine to do sorting of entries, and store a reference to it in $sort.

The sort subroutine is passed a reference to a hash assumed to be of the same form as %files (hence the name $files_ref for the argument).

We get a list of all keys in the hash referenced by $files_ref, and then sort that list by comparing the values corresponding to each key, returning the sorted list.

In the %files hash (and hashes modelled on it) the keys are (absolute) pathnames for files and the values are the "last modified" times for those files, so this returns a list of file pathnames in chronological order based on when the files were last modified.

Note 147

For people learning Perl: Overriding the default sort subroutine

For more information on how this code works see the previous notes for line 161 concerning overriding the default template subroutine.

Note 148

Iterating over all entries of interest

We iterate over the sorted list of entry files (which may contain just one entry file if we are generating a page for an individual entry) and the list of other files. The files are sorted by file modification time if we're using the default sort subroutine, or possibly by some other criteria if a plugin has defined a different sort subroutine.

Note that although the code passes references to both the hashes %f (the generate subroutine's local copy of %files) and %others as arguments to the sort subroutine, in the default version of the sort subroutine the second argument (\%others) is ignored.

Note 149

No more entries for this page

We end the loop if we have exceeded the maximum number of entries per page and we are not requesting entries for a specific date. On the other hand, if we are requesting entries for a specific date (i.e., the $date argument contains at least a year value, e.g., /2004//) then we display all entries regardless of the value of the $num_entries configuration variable.

[Note: Although the Blosxom documentation mentions the use of $num_entries only in connection with a weblog's home page, it is also used to limit the number of entries displayed on category pages.]

For more information see the following URL:

  http://www.blosxom.com/documentation/users/configure/

Note 150

For people learning Perl: The case of the two `$date` variables

The check here on line 333 and the statement on line 351 use the value of the $date argument passed to the generate subroutine, i.e., the $date variable declared on line 274 using my.

This variable is not the same as the $date variable declared on line 357 and referenced on lines 360 through 364, even though all the references to $date from line 333 to line 364 appear within the same foreach loop. For more information see the notes for line 357.

Note 151

The global variables `$path` and `$fn`

[Note: Presumably $path and $fn are declared as global variables because they are referenced in the default templates (in particular, the story templates) and thus will need to be visible to the interpolate subroutine; this would not be the case if the variables were declared with my as being of lexical scope.]

Note 152

Parsing the directory path and basename for an entry file

The value of the loop variable $path_file is an absolute pathname as used in %f (%files) and %others. We extract from that pathname the directory path relative to the data directory ($path) and the filename minus the file extension ($fn).

Note that the directory path may be empty if the file is in the data directory itself (e.g., $datadir/foo.txt). That accounts for the use of the regular expression (?:(.*)/)? when matching the directory path.

Note 153

Determining which entries to process

We want to generate output only for entries whose files are in the current directory (whose value is in $currentdir as a pathname relative to the Blosxom data directory) or in subdirectories underneath the current directory. So, for example, if the current directory is a/b then the entry files a/b/foo.txt or a/b/c/bar.txt should be processed, but the entry file a/baz.txt should not. In the latter case $path will be a, which does not contain a/b ($currentdir) at the beginning, and hence the test will fail.

We also have to take into account the possibility that the value of $currentdir represents an individual entry file and not a directory. In that case we check to verify that $path_file (the full absolute pathname of the file being processed) matches the absolute pathname $datadir/$currentdir.

If both of these tests fail (the current file is not in the right hierarchy, nor does it match the individual entry we want) then we skip to processing the next item.

Note 154

For people learning Perl: The `&&=` operator

"If a path exists" here means "if $path is non-empty". As noted above $path will be empty if the file in question is in the data directory itself.

The &&= operator works similarly to the ||= operator discussed in the notes for line 86: $a &&= $b is equivalent to $a = $a && $b, i.e., if the value of $a is true (defined and non-empty) then assign the value of $b to $a. So in this case if $path is non-empty then we assign it the value "/$path", the effect of which is to prepend a '/' to the original value of $path.

There are a number of other ways to express this; for example,

  $path and $path = "/$path";

would have worked as well, as would

  $path and $path = '/' . $path;

However the syntax chosen is the most economical, if not necessarily the most understandable to Perl newbies.

For more information see the following URLs:

  http://www.perldoc.com/perl5.8.4/pod/perlop.html#C-style-Logical-And
  http://www.perldoc.com/perl5.8.4/pod/perlop.html#Assignment-Operators

Note 155

Parsing the entry file's modification time.

In the next series of lines we call the nice_date subroutine to extract individual year, month, date, etc., values for the entry file's modification time and store them in global variables.

[Note: Presumably we use global variables here so that plugins can have access to the date information without having to reparse the files' modification time values.]

The nice_date subroutine doesn't return the hours and minutes as individual values, so we split those out of $ti using ':' as the separator.

Finally, the nice_date subroutine returns a 24-hour time value, so we convert that to a 12-hour value (with am/pm indicator) for convenience. We also remove any zero on the left of the hour (e.g., change '02' to '2') and adjust "zero hour" values to the normal am/pm notation where hour values range from 1 to 12..

Note 156

Determining the date(s) for which we want to display entries

$date will either be "empty" ('//') or will be a full or partial date of the form yyyy/mm/dd, e.g., 2004/05/22, 2004/05/, or 2004//. We split this up (using '/' as the separator) to obtain individual year, month, day values (some of which may end up being empty for a given value of $date).

Note 157

Checking for entries for the year we want

If we are generating output for a particular year (i.e., $path_info_yr has a non-empty value) then we skip to processing the next item if the year in which the entry file being processed was last modified ($yr) is not the same as the year we want ($path_info_yr).

We exit the loop entirely if the file was last modified in a year earlier than the year from the URL.

[Note: How would this second check ever succeed, given the previous check? We would get to the second check only if $path_info_yr were empty or undefined, or if $yr were equal to $path_info_yr. So it would seem that the second check is guaranteed to always fail.]

Note 158

Checking for entries for the month we want

If we are generating output for a particular month (i.e., $path_info_mo_num has a non-empty value) then we skip to processing the next item if the month in which the file being processed was last modified is not the same as the month we want.

Note that $mo as returned from nice_date is actually a three-letter month abbreviation, while $path_info_mo_num is a two-digit month number; hence we have to convert $path_info_mo_num before comparing it to $mo.

[Note: Why not just use $mo_num as returned by nice_date, instead of $mo?]

Note 159

Checking for entries for the day we want

If we are generating output for a particular day of the month (i.e., $path_info_da has a non-empty value) then we skip to processing the next item if the day on which the file being processed was last modified ($da) is not the same as the day we want ($path_info_da).

We exit the loop entirely if the file was last modified on a day earlier than the day from the URL.

[Note: How would this second check ever succeed, given the previous check? We would get to the second check only if $path_info_da were empty or undefined, or if $da were equal to $path_info_da. So it would seem that the second check is guaranteed to always fail.]

Note 160

Obtaining the date template

We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for date formats for the specific flavour. (For example, for the 'hmtl' flavour the date template will be

  <h3>$dw, $da $mo $yr</h3>\n

if the default template subroutine is used and no date.html file is found.)

Note 161

For people learning Perl: The new `$date` variable

Previous to this line references to $date referred to the $date variable declared on line 274 and used to store the date argument passed to the generate subroutine. By using my here to declare a new private variable $date, we cause the previous $date variable to be hidden, i.e., its value is no longer accessible to us.

The scope of lexical variables declared with my is limited to the code block in which they were declared; in this case the enclosing code block is the foreach loop used to loop over entries, which extends from line 332 to line 392. However the scope of this new $date variable does not extend over the entire foreach loop; rather it extends only from the point at which it was declared on line 357 to the end of the loop at line 392. References to $date in the foreach loop prior to line 357 refer to the original $date argument declared on line 274.

Note 162

Calling the plugin date subroutines

We let each and every plugin have a chance to modify the contents of the date template if desired (e.g., by rearranging the order in which the date-related variables appear in the template). We pass in a number of date-related variables that the plugin subroutine(s) can use in making decisions about what to change.

Note 163

Filling in the date

We interpolate any variable references in $date; among other things this will replace date-related variables such as $yr, $my, and $da with the actual date values.

Note 164

Output the date section for the first entry on that date

If we have a number of entries for the same date then we want to output the date section only once. If we haven't previously output a date section for this page ($curdate is empty or otherwise not equal to the date section we're now processing) then we set $curdate to the current date string and append the date section to the page output.

Note 165

The global variables `$title`, `$body`, and `$raw`

[Note: Here we declare $title, $body, and $raw as global variables. Presumably this is because these variables (or at least $title and $body) are referenced in the default story template and hence have to be visible in the interpolate subroutine; if the variables were of lexical scope (i.e., using my) then they would not be visible to be interpolated.]

Note 166

For people learning Perl: Reading an entry file

If the file identified by $path_file exists then we attempt to open it to read it. If this is successful we read the first line of the file and assign it to $title; the chomp function removes any trailing newline in $title after it gets assigned. We then read the rest of the file and assign the lines to $body, removing any trailing newline. (Within $body the multiple lines from the file are separated by newlines.)

The join function is used to cause <$fh> to be evaluated in list context instead of scalar context, so that it will read the entire file (or more precisely, what's left of the file after reading the first line) instead of just reading a single line.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/chomp.html

Note 167

The raw contents of the entry file

Note that given the way that $title and $body were assigned, $raw essentially recovers the raw contents of the entry file into a single multi-line string, minus any final newline in the file.

[Note: The global variable $raw is not otherwise used in the main Blosxom code, and is not included in any of the default templates. Perhaps it's defined for the benefit of any plugins that might want to use it?]

Note 168

Obtaining the story template

We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for the story section for the specific flavour.

Note 169

Calling the plugin story subroutines

We let each and every plugin have a chance to modify the story section if desired. Note that the plugins' story subroutines have access both to the story template ($story) and to the story contents ($title and $body).

Note 170

Escaping characters special to XML

If the content_type is an XML-based content type (e.g., 'text/xml') then we need to modify the generated output so that characters like '<' are properly escaped by replacing them with XML character entities.

[Note: The escaping is not done if the content_type is 'text/html', so HTML markup in the story (i.e., for the 'html' flavour) is not affected. However, if your blog pages were in XHTML and you had specified the content type to be 'application/xhtml+xml' (as recommended by Ian Hickson and the W3C) then your XHTML markup would presumably be mangled. (Of course, almost everybody with "XHTML" pages is actually serving them as 'text/html' because MSIE doesn't support the 'application/xhtml+xml' content type.)]

Note 171

For people learning Perl: The `%escape` hash

The %escape hash maps problematic characters to their respective character entities.

Note 172

For people learning Perl: The `$escape_re` regular expression

The $escape_re variable holds a string value that can be used in a regular expression to look for alternate strings to match. In particular, the value of $escape_re will be <|>|&|" given the definition of %escape.

Note that the expression

  join '|' => keys %escape

as used in this context is equivalent to

  join '|', keys %escape

The latter is the form typically used in the Blosxom code.

[Note: I wonder if there was a particular reason for using => here?]

Note 173

Escaping the title and body

We modify both $title and $body to replace the problematic characters with the equivalent character entities. Note that this takes advantage of the Perl feature where the search string and replacement string undergo interpolation to replace references to Perl variables ($escape_re and $escape{...} in this case) with the values of those variables.

Note that this is different from the approach used by the interpolate subroutine. There we did not know what the actual variable names were going to be, so we had to "eval" the replacement string using the ee option to the pattern match. Here we know what the variable names are at compile time, so we don't need the ee option, just the normal variable interpolation that Perl does as a matter of course.

[Note: I need to double-check the ordering of variable interpolation. In order for the replacement string to work properly the value of $1 has to be known prior to interpolation taking place.]

Note 174

Interpolating variables in the story and generating story output

We interpolate any variables present in the contents of the story section, and then append the resulting content to the output string being built.

Note 175

Finished processing an entry

We're now done with the file for this entry, and so can close its FileHandle.

Having added another entry to the output, we decrement $ne to keep track of how many more entries we can add before hitting the specified maximum entries per page. (We don't check the number of entries when generating date-based pages, but we decrement $ne anyway.)

Note 176

Obtaining the template for the foot section

We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for the foot section for the specified flavour. For example, for the 'html' flavour this would typically include the </body> and </html> closing tags.

Note 177

Calling the plugin foot subroutines

We let each and every plugin have a chance to modify the contents of the foot section if desired.

Note 178

Interpolating variables in the foot section and generating output

We interpolate any variables present in the contents of the foot section, and then append the resulting content to the output string being built.

Note 179

Calling the plugin last subroutines

We let each and every plugin have a chance to do any final processing for this page. Note that the last subroutine is not passed arguments, but it has access to Blosxom global variables, including $output.

Note 180

Adding the HTTP header

Note that the HTTP header (if any) goes at the front of the generated output, and is needed only for dynamic page generation. The variable $header contains the content type value as well as the values for any other HTTP headers added by plugins; see the notes to line 264.

Note 181

For people learning Perl: The `generate` subroutine return value

We return the $output string as the value of the generate subroutine. (The return keyword is optional in Perl.)

Note 182

The `nice_date` subroutine

The subroutine nice_date converts OS-provided time values (expressed as the number of seconds since some fixed date) into year, month, day, etc., values that we can use for printing date/times and creating date-based URLs.

Note 183

For people learning Perl: The `$unixtime` argument

The subroutine argument is called $unixtime but it's not specific to Unix; it's the time value returned by the Perl function time and similar routines. The value is always a number in seconds, but its interpretation can differ slightly from OS to OS (for example, between Unix and Mac OS).

Note 184

For people learning Perl: The `ctime` function

We don't use the value of $unixtime directly, we just pass it to the Time::localtime::ctime routine to return a human-readable string representing the date/time, e.g., "Sat May 29 06:58:29 2004".

For more information see the following URLs:

  http://search.cpan.org/~nwclark/perl-5.8.4/lib/Time/localtime.pm
  http://www.perldoc.com/perl5.8.4/pod/func/localtime.html

Note 185

For people learning Perl: Interpreting the time value

The value returned by ctime has the format "Dow Mon dd hh:mm:ss yyyy" where "Dow" is the three letter abbreviation for the day of the week (e.g., "Sun", "Mon", etc.) and "Mon" is the three letter abbreviation for the month (e.g., "Jan, "Feb", etc.). The other fields have the usual numeric values for day of the month, hours, minutes, and seconds, and the (four-digit) year; however note that the day of the month may be expressed as either one or two digits.

We use a pattern to match the $c_time string, capture the values of interest, and assign them to individual variables. Note that the regular expression \w{3} matches exactly three alphanumeric characters (actually, alphanumeric plus underscore), while \d{1,2} matches either one or two digits (no more, no less), to account for the way that ctime returns the day of the month.

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perlre.html#Regular-Expressions

Note 186

For people learning Perl: The `sprintf` function

Since the day of the month may be returned as either a one or two digit value (e.g., "1" or "10") we use the sprintf function (like the corresponding function in C) to modify the value of $da to be two digits, zero-padded on the left (e.g., "01" instead of "1").

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/func/sprintf.html

Note 187

Converting month name to month number

Since we also want the month number as well as the name, we use the month name as a key to look up the month number in the %month2num hash defined above.

Note 188

For people learning Perl: `DATA` and `END`

The __DATA__ line marks the end of the Blosxom code and the beginning of the flavour data. The __END__ line marks the end of the program as a whole. Anything after that is considered a comment; this area is used by some Perl developers to include a change history for the program, program documentation, license terms, and/or other useful information..

For more information see the following URL:

  http://www.perldoc.com/perl5.8.4/pod/perldata.html

Annotated blosxom.cgi

Annotation credits

For people learning Perl: Comments and #!

Overview of the Blosxom 2 source code

Blosxom initialization (lines 69-214)

Finding (and filtering) Blosxom entries (lines 221-226)

Generating output (lines 228-270)

The generate subroutine (lines 273-412)

The nice_date subroutine (lines 415-424)

For people learning Perl: Packages

For people learning Perl: Package "global" variables

For people learning Perl: Scalar variables

For people learning Perl: Array variables and qw

For people learning Perl: use vars

For people learning Perl: use strict

For people learning Perl: Importing modules with the use function

FileHandle

File::Find

File::stat

Time::localtime

CGI

The Blosxom version

For people learning Perl: my variables

For people learning Perl: Creating objects with new

For people learning Perl: Hashes

For people learning Perl: The keys and sort functions

For people learning Perl: The ||= operator

The value of url()

Server Side Includes and Blosxom

For people learning Perl: The =~ operator and regular expression matching

For people learning Perl: More on regular expressions and substitution

Adjusting $depth

For people learning Perl: The tr and and operators

Static vs. dynamic mode

For people learning Perl: The eq operator and CGI::param() function

For people learning Perl: Setting parameters using the CGI::param() function

PATH_INFO

For people learning Perl: The split function

For people learning Perl: The shift function

Interpreting Blosxom URLs

For people learning Perl: The while loop

Determining the flavour, part 1

For people learning Perl: $#path_info

For people learning Perl: "Greedy" matching

Indexes vs. individual entries

For people learning Perl: The pop function

Determining the flavour, part 2

For people learning Perl: Alternative patterns in regular expressions

Date references

For people learning Perl: Assigning into a list

Month abbreviations in Blosxom URLs

For people learning Perl: The lc, ucfirst, and undef functions

The template subroutine

For people learning Perl: Anonymous subroutines and references

Template subroutine arguments

For people learning Perl: Argument passing using @_

For people learning Perl: The do while loop

For people learning Perl: The join and open functions

For people learning Perl: Retrying the template file search

For people learning Perl: Values in nested hashes

For people learning Perl: Initializing a hash to be empty

Default templates

For people learning Perl: &lt;DATA&gt;

For people learning Perl: __END__ and last

For people learning Perl: Parsing template lines and pattern matching in a list context

For people learning Perl: Multiline mode in regular expressions

For people learning Perl: Nested hashes

For people learning Perl: The opendir function

For people learning Perl: The readdir, grep, and -f functions and the foreach loop

For people learning Perl: Parsing plugin names

For people learning Perl: Determining if a plugin is disabled

For people learning Perl: The require function

For people learning Perl: Calling a plugin's start routine

Start routine invocation

Method invocation vs. symbolic references

For people learning Perl: The closedir function

Overriding the default template subroutine

For people learning Perl: The can method

For people learning Perl: Inside the can method

The `generate` subroutine (lines 273-412)

The `nice_date` subroutine (lines 415-424)

For people learning Perl: Array variables and `qw`

For people learning Perl: `use vars`

For people learning Perl: `use strict`

For people learning Perl: Importing modules with the `use` function

For people learning Perl: `my` variables

For people learning Perl: Creating objects with `new`

For people learning Perl: The `keys` and `sort` functions

For people learning Perl: The `||=` operator

The value of `url()`

For people learning Perl: The `=~` operator and regular expression matching

Adjusting `$depth`

For people learning Perl: The `tr` and `and` operators

For people learning Perl: The `eq` operator and `CGI::param()` function

For people learning Perl: Setting parameters using the `CGI::param()` function

`PATH_INFO`

For people learning Perl: The `split` function

For people learning Perl: The `shift` function

For people learning Perl: The `while` loop

For people learning Perl: `$#path_info`

For people learning Perl: The `pop` function

For people learning Perl: The `lc`, `ucfirst`, and `undef` functions

For people learning Perl: Argument passing using `@_`

For people learning Perl: The `do while` loop

For people learning Perl: The `join` and `open` functions

For people learning Perl: `<DATA>`

For people learning Perl: `END` and `last`

For people learning Perl: The `opendir` function

For people learning Perl: The `readdir`, `grep`, and `-f` functions and the `foreach` loop

For people learning Perl: The `require` function

For people learning Perl: The `closedir` function

For people learning Perl: The `can` method

For people learning Perl: Inside the `can` method