1 #!/usr/bin/perl 2 3 # Blosxom # [1] [2] [3] 4 # Author: Rael Dornfest <rael@oreilly.com> 5 # Version: 2.0 6 # Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/ 7 8 package blosxom; # [4] 9 10 # --- Configurable variables ----- # [5] 11 12 # What's this blog's title? 13 $blog_title = "My Weblog"; # [6] 14 15 # What's this blog's description (for outgoing RSS feed)? 16 $blog_description = "Yet another Blosxom weblog."; 17 18 # What's this blog's primary language (for outgoing RSS feed)? 19 $blog_language = "en"; 20 21 # Where are this blog's entries kept? 22 $datadir = "/Library/WebServer/Documents/blosxom"; 23 24 # What's my preferred base URL for this blog (leave blank for automatic)? 25 $url = ""; 26 27 # Should I stick only to the datadir for items or travel down the 28 # directory hierarchy looking for items? If so, to what depth? 29 # 0 = infinite depth (aka grab everything), 1 = datadir only, n = n levels down 30 $depth = 0; 31 32 # How many entries should I show on the home page? 33 $num_entries = 40; 34 35 # What file extension signifies a blosxom entry? 36 $file_extension = "txt"; 37 38 # What is the default flavour? 39 $default_flavour = "html"; 40 41 # Should I show entries from the future (i.e. dated after now)? 42 $show_future_entries = 0; 43 44 # --- Plugins (Optional) ----- 45 46 # Where are my plugins kept? 47 $plugin_dir = ""; 48 49 # Where should my modules keep their state information? 50 $plugin_state_dir = "$plugin_dir/state"; 51 52 # --- Static Rendering ----- 53 54 # Where are this blog's static files to be created? 55 $static_dir = "/Library/WebServer/Documents/blog"; 56 57 # What's my administrative password (you must set this for static rendering)? 58 $static_password = ""; 59 60 # What flavours should I generate statically? 61 @static_flavours = qw/html rss/; # [7] 62 63 # Should I statically generate individual entries? 64 # 0 = no, 1 = yes 65 $static_entries = 0; 66 67 # -------------------------------- 68 69 use vars qw! $version $blog_title $blog_description $blog_language $datadir $url %template $template $depth $num_entries $file_extension $default_flavour $static_or_dynamic $plugin_dir $plugin_state_dir @plugins %plugins $static_dir $static_password @static_flavours $static_entries $path_info $path_info_yr $path_info_mo $path_info_da $path_info_mo_num $flavour $static_or_dynamic %month2num @num2month $interpolate $entries $output $header $show_future_entries %files %indexes %others !; # [8] 70 71 use strict; # [9] 72 use FileHandle; # [10] [11] 73 use File::Find; # [12] 74 use File::stat; # [13] 75 use Time::localtime; # [14] 76 use CGI qw/:standard :netscape/; # [15] 77 78 $version = "2.0"; # [16] 79 80 my $fh = new FileHandle; # [17] [18] 81 82 %month2num = (nil=>'00', Jan=>'01', Feb=>'02', Mar=>'03', Apr=>'04', May=>'05', Jun=>'06', Jul=>'07', Aug=>'08', Sep=>'09', Oct=>'10', Nov=>'11', Dec=>'12'); # [19] 83 @num2month = sort { $month2num{$a} <=> $month2num{$b} } keys %month2num; # [20] 84 85 # Use the stated preferred URL or figure it out automatically 86 $url ||= url(); # [21] [22] 87 $url =~ s/^included:/http:/; # Fix for Server Side Includes (SSI) # [23] [24] 88 $url =~ s!/$!!; # [25] 89 90 # Drop ending any / from dir settings 91 $datadir =~ s!/$!!; $plugin_dir =~ s!/$!!; $static_dir =~ s!/$!!; 92 93 # Fix depth to take into account datadir's path 94 $depth and $depth += ($datadir =~ tr[/][]) - 1; # [26] [27] 95 96 # Global variable to be used in head/foot.{flavour} templates 97 $path_info = ''; 98 99 $static_or_dynamic = (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) ? 'static' : 'dynamic'; # [28] [29] 100 $static_or_dynamic eq 'dynamic' and param(-name=>'-quiet', -value=>1); # [30] 101 102 # Path Info Magic 103 # Take a gander at HTTP's PATH_INFO for optional blog name, archive yr/mo/day 104 my @path_info = split m{/}, path_info() || param('path'); # [31] [32] 105 shift @path_info; # [33] 106 107 while ($path_info[0] and $path_info[0] =~ /^[a-zA-Z].*$/ and $path_info[0] !~ /(.*)\.(.*)/) { $path_info .= '/' . shift @path_info; } # [34] [35] 108 109 # Flavour specified by ?flav={flav} or index.{flav} 110 $flavour = ''; 111 112 if ( $path_info[$#path_info] =~ /(.+)\.(.+)$/ ) { # [36] [37] 113 $flavour = $2; # [38] 114 $1 ne 'index' and $path_info .= "/$1.$2"; # [39] 115 pop @path_info; # [40] 116 } else { 117 $flavour = param('flav') || $default_flavour; # [41] 118 } 119 120 # Strip spurious slashes 121 $path_info =~ s!(^/*)|(/*$)!!g; # [42] 122 123 # Date fiddling 124 ($path_info_yr,$path_info_mo,$path_info_da) = @path_info; # [43] [44] 125 $path_info_mo_num = $path_info_mo ? ( $path_info_mo =~ /\d{2}/ ? $path_info_mo : ($month2num{ucfirst(lc $path_info_mo)} || undef) ) : undef; # [45] [46] 126 127 # Define standard template subroutine, plugin-overridable at Plugins: Template 128 $template = # [47] [48] 129 sub { 130 my ($path, $chunk, $flavour) = @_; # [49] [50] 131 132 do { # [51] 133 return join '', <$fh> if $fh->open("< $datadir/$path/$chunk.$flavour"); # [52] 134 } while ($path =~ s/(\/*[^\/]*)$// and $1); # [53] 135 136 return join '', ($template{$flavour}{$chunk} || $template{error}{$chunk} || ''); # [54] 137 }; 138 # Bring in the templates 139 %template = (); # [55] 140 while (<DATA>) { # [56] [57] 141 last if /^(__END__)?$/; # [58] 142 my($ct, $comp, $txt) = /^(\S+)\s(\S+)\s(.*)$/; # [59] 143 $txt =~ s/\\n/\n/mg; # [60] 144 $template{$ct}{$comp} = $txt; # [61] 145 } 146 147 # Plugins: Start 148 if ( $plugin_dir and opendir PLUGINS, $plugin_dir ) { # [62] 149 foreach my $plugin ( grep { /^\w+$/ && -f "$plugin_dir/$_" } sort readdir(PLUGINS) ) { # [63] 150 my($plugin_name, $off) = $plugin =~ /^\d*(\w+?)(_?)$/; # [64] 151 my $on_off = $off eq '_' ? -1 : 1; # [65] 152 require "$plugin_dir/$plugin"; # [66] 153 $plugin_name->start() and ( $plugins{$plugin_name} = $on_off ) and push @plugins, $plugin_name; # [67] 154 } 155 closedir PLUGINS; # [68] 156 } 157 158 # Plugins: Template 159 # Allow for the first encountered plugin::template subroutine to override the 160 # default built-in template subroutine 161 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('template') and defined($tmp = $plugin->template()) and $template = $tmp and last; } # [69] [70] [71] 162 163 # Provide backward compatibility for Blosxom < 2.0rc1 plug-ins 164 sub load_template { # [72] 165 return &$template(@_); 166 } 167 168 # Define default find subroutine 169 $entries = # [73] 170 sub { 171 my(%files, %indexes, %others); # [74] 172 find( # [75] 173 sub { 174 my $d; 175 my $curr_depth = $File::Find::dir =~ tr[/][]; # [76] 176 return if $depth and $curr_depth > $depth; # [77] 177 178 if ( # [78] 179 # a match 180 $File::Find::name =~ m!^$datadir/(?:(.*)/)?(.+)\.$file_extension$! # [79] 181 # not an index, .file, and is readable 182 and $2 ne 'index' and $2 !~ /^\./ and (-r $File::Find::name) # [80] 183 ) { 184 185 # to show or not to show future entries # [81] 186 ( # [82] 187 $show_future_entries 188 or stat($File::Find::name)->mtime < time 189 ) 190 191 # add the file and its associated mtime to the list of files 192 and $files{$File::Find::name} = stat($File::Find::name)->mtime # [83] 193 194 # static rendering bits 195 and ( # [84] 196 param('-all') # [85] 197 or !-f "$static_dir/$1/index." . $static_flavours[0] # [86] 198 or stat("$static_dir/$1/index." . $static_flavours[0])->mtime < stat($File::Find::name)->mtime # [87] 199 ) 200 and $indexes{$1} = 1 # [88] 201 and $d = join('/', (nice_date($files{$File::Find::name}))[5,2,3]) # [89] 202 203 and $indexes{$d} = $d # [90] 204 and $static_entries and $indexes{ ($1 ? "$1/" : '') . "$2.$file_extension" } = 1 # [91] 205 206 } 207 else { 208 !-d $File::Find::name and -r $File::Find::name and $others{$File::Find::name} = stat($File::Find::name)->mtime # [92] 209 } 210 }, $datadir # [93] 211 ); 212 213 return (\%files, \%indexes, \%others); # [94] 214 }; 215 216 # Plugins: Entries 217 # Allow for the first encountered plugin::entries subroutine to override the 218 # default built-in entries subroutine 219 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('entries') and defined($tmp = $plugin->entries()) and $entries = $tmp and last; } # [95] 220 221 my ($files, $indexes, $others) = &$entries(); # [96] 222 %files = %$files; %indexes = %$indexes; %others = ref $others ? %$others : (); # [97] 223 224 # Plugins: Filter 225 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('filter') and $entries = $plugin->filter(\%files, \%others) } # [98] 226 227 # Static 228 if (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) { # [99] 229 230 param('-quiet') or print "Blosxom is generating static index pages...\n"; # [100] 231 232 # Home Page and Directory Indexes 233 my %done; # [101] 234 foreach my $path ( sort keys %indexes) { # [102] 235 my $p = ''; # [103] 236 foreach ( ('', split /\//, $path) ) { # [104] 237 $p .= "/$_"; # [105] 238 $p =~ s!^/!!; 239 $path_info = $p; # [106] 240 $done{$p}++ and next; # [107] [108] 241 (-d "$static_dir/$p" or $p =~ /\.$file_extension$/) or mkdir "$static_dir/$p", 0755; # [109] 242 foreach $flavour ( @static_flavours ) { # [110] 243 my $content_type = (&$template($p,'content_type',$flavour)); # [111] 244 $content_type =~ s!\n.*!!s; 245 my $fn = $p =~ m!^(.+)\.$file_extension$! ? $1 : "$p/index"; # [112] 246 param('-quiet') or print "$fn.$flavour\n"; 247 my $fh_w = new FileHandle "> $static_dir/$fn.$flavour" or die "Couldn't open $static_dir/$p for writing: $!"; # [113] 248 $output = ''; # [114] 249 print $fh_w # [115] 250 $indexes{$path} == 1 251 ? &generate('static', $p, '', $flavour, $content_type) # [116] 252 : &generate('static', '', $p, $flavour, $content_type); 253 $fh_w->close; # [117] 254 } 255 } 256 } 257 } 258 259 # Dynamic 260 else { # [118] 261 my $content_type = (&$template($path_info,'content_type',$flavour)); # [119] 262 $content_type =~ s!\n.*!!s; 263 264 $header = {-type=>$content_type}; # [120] [121] 265 266 print generate('dynamic', $path_info, "$path_info_yr/$path_info_mo_num/$path_info_da", $flavour, $content_type); # [122] 267 } 268 269 # Plugins: End 270 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('end') and $entries = $plugin->end() } # [123] 271 272 # Generate # [124] 273 sub generate { # [125] 274 my($static_or_dynamic, $currentdir, $date, $flavour, $content_type) = @_; # [126] 275 276 my %f = %files; # [127] 277 278 # Plugins: Skip 279 # Allow plugins to decide if we can cut short story generation 280 my $skip; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('skip') and defined($tmp = $plugin->skip()) and $skip = $tmp and last; } # [128] [129] 281 282 # Define default interpolation subroutine 283 $interpolate = # [130] 284 sub { 285 package blosxom; # [131] 286 my $template = shift; 287 $template =~ # [132] [133] 288 s/(\$\w+(?:::)?\w*)/"defined $1 ? $1 : ''"/gee; 289 return $template; 290 }; 291 292 unless (defined($skip) and $skip) { # [134] 293 294 # Plugins: Interpolate 295 # Allow for the first encountered plugin::interpolate subroutine to 296 # override the default built-in interpolate subroutine 297 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('interpolate') and defined($tmp = $plugin->interpolate()) and $interpolate = $tmp and last; } # [135] 298 299 # Head 300 my $head = (&$template($currentdir,'head',$flavour)); # [136] 301 302 # Plugins: Head 303 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('head') and $entries = $plugin->head($currentdir, \$head) } # [137] [138] 304 305 $head = &$interpolate($head); # [139] 306 307 $output .= $head; 308 309 # Stories 310 my $curdate = ''; 311 my $ne = $num_entries; # [140] 312 313 if ( $currentdir =~ /(.*?)([^\/]+)\.(.+)$/ and $2 ne 'index' ) { # [141] [142] 314 $currentdir = "$1$2.$file_extension"; # [143] 315 $files{"$datadir/$1$2.$file_extension"} and %f = ( "$datadir/$1$2.$file_extension" => $files{"$datadir/$1$2.$file_extension"} ); # [144] 316 } 317 else { 318 $currentdir =~ s!/index\..+$!!; # [145] 319 } 320 321 # Define a default sort subroutine 322 my $sort = sub { # [146] 323 my($files_ref) = @_; 324 return sort { $files_ref->{$b} <=> $files_ref->{$a} } keys %$files_ref; 325 }; 326 327 # Plugins: Sort 328 # Allow for the first encountered plugin::sort subroutine to override the 329 # default built-in sort subroutine 330 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('sort') and defined($tmp = $plugin->sort()) and $sort = $tmp and last; } # [147] 331 332 foreach my $path_file ( &$sort(\%f, \%others) ) { # [148] 333 last if $ne <= 0 && $date !~ /\d/; # [149] [150] 334 use vars qw/ $path $fn /; # [151] 335 ($path,$fn) = $path_file =~ m!^$datadir/(?:(.*)/)?(.*)\.$file_extension!; # [152] 336 337 # Only stories in the right hierarchy 338 $path =~ /^$currentdir/ or $path_file eq "$datadir/$currentdir" or next; # [153] 339 340 # Prepend a slash for use in templates only if a path exists 341 $path &&= "/$path"; # [154] 342 343 # Date fiddling for by-{year,month,day} archive views 344 use vars qw/ $dw $mo $mo_num $da $ti $yr $hr $min $hr12 $ampm /; # [155] 345 ($dw,$mo,$mo_num,$da,$ti,$yr) = nice_date($files{"$path_file"}); 346 ($hr,$min) = split /:/, $ti; 347 ($hr12, $ampm) = $hr >= 12 ? ($hr - 12,'pm') : ($hr, 'am'); 348 $hr12 =~ s/^0//; $hr12 == 0 and $hr12 = 12; 349 350 # Only stories from the right date 351 my($path_info_yr,$path_info_mo_num, $path_info_da) = split /\//, $date; # [156] 352 next if $path_info_yr && $yr != $path_info_yr; last if $path_info_yr && $yr < $path_info_yr; # [157] 353 next if $path_info_mo_num && $mo ne $num2month[$path_info_mo_num]; # [158] 354 next if $path_info_da && $da != $path_info_da; last if $path_info_da && $da < $path_info_da; # [159] 355 356 # Date 357 my $date = (&$template($path,'date',$flavour)); # [160] [161] 358 359 # Plugins: Date 360 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('date') and $entries = $plugin->date($currentdir, \$date, $files{$path_file}, $dw,$mo,$mo_num,$da,$ti,$yr) } # [162] 361 362 $date = &$interpolate($date); # [163] 363 364 $curdate ne $date and $curdate = $date and $output .= $date; # [164] 365 366 use vars qw/ $title $body $raw /; # [165] 367 if (-f "$path_file" && $fh->open("< $path_file")) { # [166] 368 chomp($title = <$fh>); 369 chomp($body = join '', <$fh>); 370 $fh->close; 371 $raw = "$title\n$body"; # [167] 372 } 373 my $story = (&$template($path,'story',$flavour)); # [168] 374 375 # Plugins: Story 376 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('story') and $entries = $plugin->story($path, $fn, \$story, \$title, \$body) } # [169] 377 378 if ($content_type =~ m{\Wxml$}) { # [170] 379 # Escape <, >, and &, and to produce valid RSS 380 my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); # [171] 381 my $escape_re = join '|' => keys %escape; # [172] 382 $title =~ s/($escape_re)/$escape{$1}/g; # [173] 383 $body =~ s/($escape_re)/$escape{$1}/g; 384 } 385 386 $story = &$interpolate($story); # [174] 387 388 $output .= $story; 389 $fh->close; # [175] 390 391 $ne--; 392 } 393 394 # Foot 395 my $foot = (&$template($currentdir,'foot',$flavour)); # [176] 396 397 # Plugins: Foot 398 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('foot') and $entries = $plugin->foot($currentdir, \$foot) } # [177] 399 400 $foot = &$interpolate($foot); # [178] 401 $output .= $foot; 402 403 # Plugins: Last 404 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('last') and $entries = $plugin->last() } # [179] 405 406 } # End skip 407 408 # Finally, add the header, if any and running dynamically 409 $static_or_dynamic eq 'dynamic' and $header and $output = header($header) . $output; # [180] 410 411 $output; # [181] 412 } 413 414 415 sub nice_date { # [182] 416 my($unixtime) = @_; # [183] 417 418 my $c_time = ctime($unixtime); # [184] 419 my($dw,$mo,$da,$ti,$yr) = ( $c_time =~ /(\w{3}) +(\w{3}) +(\d{1,2}) +(\d{2}:\d{2}):\d{2} +(\d{4})$/ ); # [185] 420 $da = sprintf("%02d", $da); # [186] 421 my $mo_num = $month2num{$mo}; # [187] 422 423 return ($dw,$mo,$mo_num,$da,$ti,$yr); 424 } 425 426 427 # Default HTML and RSS template bits # [188] 428 __DATA__ 429 html content_type text/html 430 html head <html><head><link rel="alternate" type="type="application/rss+xml" title="RSS" href="$url/index.rss" /><title>$blog_title $path_info_da $path_info_mo $path_info_yr</title></head><body><center><font size="+3">$blog_title</font><br />$path_info_da $path_info_mo $path_info_yr</center><p /> 431 html story <p><a name="$fn"><b>$title</b></a><br />$body<br /><br />posted at: $ti | path: <a href="$url$path">$path</a> | <a href="$url/$yr/$mo_num/$da#$fn">permanent link to this entry</a></p>\n 432 html date <h3>$dw, $da $mo $yr</h3>\n 433 html foot <p /><center><a href="http://www.blosxom.com/"><img src="http://www.blosxom.com/images/pb_blosxom.gif" border="0" /></a></body></html> 434 rss content_type text/xml 435 rss head <?xml version="1.0"?>\n<!-- name="generator" content="blosxom/$version" -->\n<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN" "http://my.netscape.com/publish/formats/rss-0.91.dtd">\n\n<rss version="0.91">\n <channel>\n <title>$blog_title $path_info_da $path_info_mo $path_info_yr</title>\n <link>$url</link>\n <description>$blog_description</description>\n <language>$blog_language</language>\n 436 rss story <item>\n <title>$title</title>\n <link>$url/$yr/$mo_num/$da#$fn</link>\n <description>$body</description>\n </item>\n 437 rss date \n 438 rss foot </channel>\n</rss> 439 error content_type text/html 440 error head <html><body><p><font color="red">Error: I'm afraid this is the first I've heard of a "$flavour" flavoured Blosxom. Try dropping the "/+$flavour" bit from the end of the URL.</font>\n\n 441 error story <p><b>$title</b><br />$body <a href="$url/$yr/$mo_num/$da#fn.$default_flavour">#</a></p>\n 442 error date <h3>$dw, $da $mo $yr</h3>\n 443 error foot </body></html> 444 __END__
This is version 0.9 of the Blosxom 2 annotations, by Frank Hecker <hecker@hecker.org>. These annotations to the blosxom.cgi
source code are made available under the same license terms as Blosxom itself. Comments and suggestions for changes are welcome.
The online Perl documentation was indispensable in creating these notes. The notes2html
script was used to create HTML documents from the inline notes.
For more information see the following URLs:
http://www.blosxom.com/downloads/blosxom.zip
http://www.blosxom.com/license.html
http://www.hecker.org/blosxom/
All lines starting with '#' are comments, not part of the code itself. The first line uses the standard Unix #! convention to identify the location of the Perl interpreter. You would need to change this line if for some reason the Perl interpreter were in a different directory or had a different name.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlrun.html
The Blosxom source file is divided into three major sections:
The main Blosxom code itself is further divided into various sections as discussed below.
In this section the following tasks are carried out:
$fh
: a FileHandle for reading files (line 80)%month2num
: hash to convert a month abbreviation to a month number (line 82)@num2month
: array to convert a month number to a month abbreviation (line 83)$url
: if a value for the base URL wasn't defined in the configurable variables section, set a new value as described below (lines 86-88)$datadir
: strip any trailing slash if present (line 91)$depth
: adjust to account for the number of path components in $datadir
(line 94)$url
: the part of the requested URL corresponding to the Blosxom script itself (e.g., http://www.example.com/cgi-bin/blosxom.cgi
) (lines 86-88)$static_or_dynamic
: set to 'static' if Blosxom is running in static mode, 'dynamic' if Blosxom isrunning in dynamic mode, i.e., through CGI (line 99)$path_info
: the part of the requested URL identifying a particular category or individual entry to be displayed, e.g., /society/literature
or /cooking/italian/bruschetta.html
(lines 97, 104-107, 121)$flavour
: the particular flavour of data being requested, e.g., 'html' or 'rss' (lines 110-118)$path_info_yr
, $path_info_mo
, and $path_info_da
: the dates for which we are requesting that entries be displayed (lines 124-125)$template
(lines 128-137)%template
hash keyed by the content type (e.g., 'html' or 'rss') and template component (e.g., 'head' or 'foot') (lines 139-145)@plugins
array (a list of plugin names, minus prefixes) and the %plugins
hash (which stores the enabled/disabled status for each plugin, keyed by the plugin name) (line 153)$entries
) (lines 169-214) and then allow overriding it by the first plugin that defines an alternate entries subroutine (line 219)This section of the code looks for Blosxom entries and related items of interest, performing the following tasks:
$entries
, and build up three hashes (lines 221-222):
%files
: files representing individual Blosxom entries (e.g., foo.txt
if '.txt' is the standard Blosxom file extension)%indexes
: directories for which index files might need to be created or updated as part of static page generation, as well as individual entry files for which static pages might need to be generated%others
: all other files not falling into the above two categories%files
and %others
by invoking the filter
subroutine for each and every plugin that defines one (line 225)The hashes %files
and %others
are keyed by the name of the entry file (for %files
) or other item (for %others
), in the form of an absolute pathname; the value for each element in %files
or %others
is the date/time last modified for the corresponding entry file or other item.
The hash %indexes
is keyed by the name of the directory or entry file for which static page generation should be done, expressed as a relative pathname relative to the Blosxom data directory (e.g., 'a/b' or '2004/05/22'); the value for elements in %indexes
is 1 for elements corresponding to category directories or individual entries, and for elements corresponding to date directories is the same as the key (e.g., '2004/05/22').
The next section of the Blosxom code generates HTML or other output. For dynamic invocation of Blosxom this is relatively simple, since we need to generate only one page in response to the requested URL (lines 260-267):
generate
subroutine to create the page output, based on the category, date, entry, and flavour information from the requested URL (line 266)generate
(which includes the HTTP header for the appropriate content type) (line 266)For static invocation of Blosxom page generation is more complex, since we may need to generate several pages (lines 230-257):
%indexes
(lines 234-256) and then for each element loop through each directory component of the item (directory or entry file) corresponding to the element (lines 236-255)
@static_flavours
(lines 242-254)
generate
subroutine to create the output for the page (lines 250-252)Finally, we loop through the plugins and call each plugin's end subroutine in order to do any final processing (line 270).
generate
subroutine (lines 273-412)The generate
subroutine creates the actual output for a page of the desired flavour, taking as input the path information for the category, entry file, and/or date, along with the flavour and content type, and an indication of whether static or dynamic page generation is desired. The generate
subroutine also uses the hashes %files
, %indexes
, and %others
previously populated.
The specific tasks performed by the generate
subroutine are as follows:
interpolate
subroutine for variable interpolation in templates (lines 283-290)interpolate
subroutine (line 297)$blog_title
) in the 'head' template and add the result to the output (lines 305-307)$currentdir
argument, which holds information on the category and/or individual entry for which a page needs to be generated (lines 313-319)%f
hash (a copy of %files
) so that it contains information for just that entry (line 315)%f
and %others
, each representing an entry to be added to the generated page (lines 332-392)
$num_entries
(line 333)nice_date
subroutine (lines 415-424)The nice_date
subroutine converts OS-provided time values (expressed as the number of seconds since some fixed date) into year, month, day, etc., values that we can use for printing date/times and creating date-based URLs. For more information see the notes for lines 415-424.
package
defines a namespace for variables, subroutines, etc., so that their names won't conflict with names defined in other Perl code used by Blosxom and pulled in from other places.
See the following URL for more information:
http://www.perldoc.com/perl5.8.4/pod/perlmod.html#Packages
The scope of the configurable variables is within the blosxom package. We put "global" in quotes because, as the Perl documentation notes, "there's really no such thing as a global variable in Perl", in the sense of global variables as used in C and similar languages. However the configurable variables are like global variables in that their values are visible anywhere in the Blosxom code (unless "hidden" by other variable declarations as described in the notes to line 171). See also the notes to line 69.
The configurable variables can be referenced from Blosxom plugins as $blosxom::foo
where $foo
is a variable. Alternatively, a Blosxom plugin can include a package blosxom
statement prior to a section of code to allow Blosxom configurable variables to be referenced within that code section without having to preface the variables' names with "blosxom::". (For example, a plugin would do this when defining its own version of the interpolate
subroutine; see the notes to lines 283 and 285 for more information.)
See the following URL for more information on variable scope:
http://www.perldoc.com/perl5.8.4/pod/perlmod.html
Note when reading the documentation that the configurable variables are considered to be "dynamic" (as opposed to "lexical") variables.
In Perl a variable starting with '$' is a scalar (i.e., single-valued) variable. Note that unlike shell syntax the '$' is used when assigning to the variable as well as when using its value.
The $blog_title
variable is used to hold a string. Like shell variables Perl scalar variables can have either string or numeric values. String values can be delimited by either single quotes or double quotes; like the Unix shell, if the string is within double quotes then it can include references to other Perl variables (e.g., "A Blog by $author"
) and the values of those variables will be interpolated into the string, replacing the variable references.
Because of this variable interpolation, if you want to use a '$' in your blog title or description then you need to either precede the '$' with a '\' ("My \$64,000 Blog"
) or use single quotes to delimit the string ('My $64,000 Blog'
). (If you use single quotes for your string delimiter then you will also need to escape any single quote character in the string itself by preceding it with a '\', e.g., 'John\'s $64,000 Blog'
; a similar rule holds when you want to include a double quote in a string delimited by double quotes.)
For more information on Perl scalar variables see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#Scalar-values
qw
In Perl a variable starting with '@' is an array variable that holds an ordered list of values indexed by array position (starting from 0 as the first position).
Here we define a 2-element array with the string values 'html' and 'rss'. qw
is a function that returns a list of words extracted out of a string enclosed within delimiters, e.g., qw/a b/
is the same as 'a', 'b'
. (Alternately you could use qw(a b)
or qw! a b !
or whatever.) This is a very common Perl idiom, as it eliminates the need to quote each and every word within the list.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
use vars
Here we declare global variables used in this package (actually, within the file, but the file just contains a single package). Note that use vars
was deemed obsolete as of Perl 5.6, being replaced by our
, but as used here supports use of Blosxom with earlier Perl 5.x versions.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlmodlib.html#Pragmatic-Modules
http://search.cpan.org/~nwclark/perl-5.8.4/lib/vars.pm
use strict
use strict
tells Perl to produce compiler warnings for all sorts of things, such as references to variables that were not previously defined or declared.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/strict.pm
use
functionThe next few lines import various Perl modules, making their functions and global variables available without needing to qualify the names with package names. (In other words, we can refer to bar()
rather than foo::bar()
where bar
is a function in the package foo
.)
On packages vs. modules: per the documentation, "A module is just a set of related functions in a library file, i.e., a Perl package with the same name as the file." Strictly speaking Blosxom 2.0 is a package but not a module; however Blosxom 3.0 will be a full-fledged module.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/use.html
The FileHandle module contains functions for basic file I/O operations: open
, new
, getc
, gets
, seek
, close
, etc.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
The File::Find module contains functions to traverse a directory tree in the file system, analogous to the Unix find
command. Blosxom uses File::Find functions and variables in its own find subroutine below.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/Find.pm
The File::stat module gets a file's attributes, like the Unix stat
kernel routine. Blosxom uses File::stat functions and variables to get the date/time modified for entry files and related information.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/stat.pm
The Time::localtime module gets the current date and time and performs other date/time-related operations, like the corresponding Unix functions. Blosxom uses Time::localtime functions in the subroutine nice_date
and elsewhere.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/Time/localtime.pm
The CGI module is used to parse incoming HTTP requests (e.g., to get the URL being requested) and to create HTTP headers and HTML pages sent in response (see the subroutine generate
for an example).
Note that :standard
imports a standard set of functions and :netscape
imports optional functions for Netscape-specific HTML extensions.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm
Blosxom 2.0 is considered stable. Blosxom 3.0 is currently in development.
my
variablesmy
creates a private variable visible only within the lexical scope within which it is defined (e.g., within a given code block enclosed by curly braces), and not visible anywhere else (including subroutines called from a given code block). In this case the lexical scope is considered to be the entire blosxom
package within the blosxom.cgi
source file.
For more information see
http://www.perldoc.com/perl5.8.4/pod/perlintro.html#Variable-scoping
http://www.perldoc.com/perl5.8.4/pod/perlsub.html#Private-Variables-via-my()
new
The FileHandle module presents an object-oriented interface, so new
in this context produces a new instance of the FileHandle class.
In object-oriented terms new
is a "constructor", i.e., a so-called "class method" that creates and initializes new objects. Unlike object-oriented languages like C++, in Perl a constructor could be called something other than "new", but it's a common convention.
For more information see
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
http://www.perldoc.com/perl5.8.4/pod/perlobj.html
We create a hash table (or plain "hash" in Perl jargon) with month names being the keys and month numbers (as strings) being the values. Hashes are initialized by providing a list in which the odd entries are the key values and the even entries are the corresponding values, e.g., ('key1', 'value1', 'key2', 'value2')
. The syntax (a=>'b', c=>'d')
is equivalent to ('a','b','c','d')
and is intended to make hash initialization more understandable.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
keys
and sort
functionsThis takes the list of keys in the previously-defined hash table, i.e., the list ('nil', 'Jan', 'Feb', ..., 'Dec')
, sorts it using a comparison function that compares the corresponding values in the hash table for each key, i.e., the values '00', '01', etc., and then assigns the resulting sorted list of keys to an array indexed by month number.
This is equivalent to defining the array as follows:
@num2month = ('nil', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
(Note that the 'nil' value is included because Perl arrays are indexed from 0 but month numbers start at 1.)
keys
is a function that takes as an argument a hash and returns either a list consisting of all the keys in the hash (if used in list context) or the number of keys in the hash (if used in scalar context). Here we're using keys
in list context, because as noted below the sort
function expects a list as an argument.
sort
is a function that takes as arguments the list of items to be sorted and (as an optional first argument) a subroutine defining how sort comparisons are to be done; in this case that subroutine is an "anonymous" inline routine enclosed in curly braces. $a
and $b
are special global variables used to hold the values being sorted at each step of the sort algorithm; <=>
is a comparison operator that returns -1, 0, or 1 depending on whether the first item is respectively less than, equal to, or greater than the second. (This is a numeric comparison.)
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/func/keys.html
http://www.perldoc.com/perl5.8.4/pod/func/sort.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators
||=
operatorThis defines $url
to be its existing value (if it has one) or (if it has no value) the value returned by the url
function (part of the CGI module) as described in the next note. (Perl has different namespaces for variables and functions, which is why we can name the variable the same as the function.)
The ||
operator is a logical "or" operator similar (but not identical) to that used in shell or C programming; $url ||= url();
is equivalent to $url = $url || url();
where the original value of $url
is considered false if it is undefined or its value is the empty string '', and true otherwise. So if $url
already has a value then the second part of the conditional expression (after ||
) is not executed, and that existing value is (re)assigned to $url
; otherwise the second part is executed to obtain the returned value from url()
, and that value is assigned to $url
.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#C-style-Logical-Or
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Assignment-Operators
url()
Note that url()
returns only the URL of the Blosxom CGI script itself, not the full URL being requested. Thus (for example) if the HTTP request were for the URL
http://www.example.com/cgi-bin/blosxom.cgi/2004/05/22
then url()
would return (and $url
would be set to) the URL
http://www.example.com/cgi-bin/blosxom.cgi
If you have configured the web server to hide the blosxom.cgi
part of the URL (as described in the FAQ referenced below) then the value of url()
will be that part of the full URL which was translated into the script location. For example, if you configured Apache using the ScriptAlias
directive as follows:
ScriptAlias /blog "/var/www/cgi-bin/blosxom.cgi"
then if the requested URL were
http://www.example.com/blog/2004/05/22
then url()
would return (and $url
would be set to) the URL
http://www.example.com/blog
For more information see the following URLs:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#OBTAINING_THE_SCRIPT'S_URL
http://www.blosxom.com/faq/cgi/hide_cgi_bit.htm
We assign $url
a new value consisting of its previous value with the initial string "included:" (if present) replaced with "http:". This is intended for the case when output from bloxsom.cgi
is included in an HTML file by a Server Side Include directive like the following:
<!--#include virtual="/cgi-bin/blosxom.cgi/2004/05/19" -->
When invoked in this way the URL returned by url()
above would be (for example)
included://www.example.com/cgi-bin/blosxom.cgi
instead of
http://www.example.com/cgi-bin/blosxom.cgi
For more information see the following URL:
http://httpd.apache.org/docs-2.0/howto/ssi.html
=~
operator and regular expression matching=~
is a special operator that takes the left side ($url
) and applies to it a pattern match specified on the right side (s/^included:/http:/
), in this case a pattern match that actually does substitution, using regular expressions modeled on those used in the Unix shell and utilities. (So, for example, in this case the '^' tells Perl to look for a match starting at the beginning of the string.) The result is that the value of $url
is modified if the match succeeds.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Binding-Operators
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
This statement strips off a trailing slash from the URL value if present; the '$' in the regular expression tells Perl to look for a match at the end of the string.
Note that the value returned by the url()
function doesn't have a trailing slash, but the person configuring Blosxom may have included a trailing slash when specifying a non-default value for the $url
variable.
$depth
If $depth
is non-zero (i.e., limiting search to n directories deep) then we take the $datadir
path, count the number of path components, subtract 1, and add that to $depth
to get the new value. For example, if the value of $data_dir
is /a/b/c
then a $depth
value of 2 would get changed to a value of 4.
The new value of $depth
can be interpreted as follows: Search through a directory only if the number of components in its path is $depth
or less. So in the previous example the directory /a/b/c/d
would be searched but the directory /a/b/c/d/e
would not.
tr
and and
operatorsCounting the number of path components is done using the tr
operator, which is typically used to modify a string by transliterating one set of characters with another, e.g., $s =~ tr[a-z][A-Z]
to change lowercase characters in $s
to uppercase. However in this case the set of replacement characters is empty ([]
) so no replacement is done; instead we simply use the standard return value from tr
, namely the number of times the character(s) in the search list (i.e., the '/' character in this case) was found.
Since the value of $datadir
is an absolute path (i.e., it starts with '/') and we trimmed any trailing '/' characters (see above) the number of '/' characters will be equal to the number of components in the path.
[Note: There is a minor potential bug here: If the value of $datadir
were specified with multiple trailing slashes, e.g., /a/b/c//
, then the code above would remove only a single trailing '/', leaving one extra '/' at the end, and the number of directory components would be miscounted as being higher than it actually is. The fix is simple: Replace the existing statement $datadir =~ s!/$!!;
with the statement $datadir =~ s!/*$!!;
to look for zero or more '/' characters at the end of the string and remove any found; even safer would be $datadir =~ s!/*\s*$!!;
to remove trailing whitespace as well.]
The and
operator here is used to conditionally change $depth
only if it is non-zero; if $depth
is zero then it is interpreted as false and the expression after the and
is not executed. However any non-zero value will be interpreted as true and $depth
modified as described above.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Quote-and-Quote-like-Operators
http://www.perldoc.com/perl5.8.4/pod/perlreref.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Logical-And
We set the variable $static_or_dynamic
to 'static' or 'dynamic' to reflect the mode we're in. We're in static mode if all the following are true:
-password
has a value$static_password
is defined (see above)-password
parameter is the same as the value of $static_password
Otherwise we're in dynamic mode.
eq
operator and CGI::param()
functionWe saw the and
operator above. The eq
operator tests for string equality. The expression $a ? $b : $c
is like that used in C: if $a
is true then return $b
, otherwise return $c
.
param()
is a CGI function, but it can also return values when the Perl script is invoked from the command line, e.g.
perl blosxom.cgi -password='secret'
would assign the string value 'secret' to the parameter -password
.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Conditional-Operator
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#DEBUGGING
http://www.blosxom.com/documentation/users/configure/static.html
CGI::param()
functionIf we're in dynamic mode then we set the value of the -quiet
parameter to be 1. When setting parameters the param()
function takes an argument list similar in syntax to the way hashes are initialized, e.g., param(-name=>'a', -value=>'b')
would set the parameter a
to the value b
.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#SETTING_THE_VALUE(S)_OF_A_NAMED_PARAMETER:
PATH_INFO
PATH_INFO
(the CGI environment variable whose value is returned by the path_info
function) contains any path information in the URL after the part of the URL that identifies the CGI script. For example, if the requested URL were http://www.example.com/cgi-bin/blosxom.cgi/2004/05
then the value returned by path_info()
would be /2004/05
.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#FETCHING_ENVIRONMENT_VARIABLES
split
functionWe use my
to define a private array variable @path_info
. To set this variable we first use the split
function on the string returned by the path_info()
function (if it's non-empty), splitting that value into different components separated by the '/' character. (m{/}
is a regular expression that will match a single '/'.) The split
function returns a list of strings, which is why we use an array to hold the result.
If for some reason path_info()
returns an empty string then we split the value of the path
CGI parameter instead. This would allow you to use a URL like
http://www.example.com/cgi-bin/blosxom.cgi?path=/2004/05/22
if you wished to do so. Note that the ||
operator has a higher precedence than the comma operator, so the decision whether to use the value of path_info()
or param('path')
is made before that value is passed to the split
function.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/split.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Operator-Precedence-and-Associativity
shift
functionThe shift
function discards the first element of an array. We do this because path_info()
returns a path with an initial '/', and the split
function as used above on that path will produce an empty string as the first element of the returned array; for example, the expression split m{/}, "/a/b/c"
will return the list ('', 'a', 'b', 'c')
. We don't want the initial empty string so we use shift
to get the list ('a', 'b', 'c')
instead.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/shift.html
Recall that after the part of the URL that references the Blosxom script itself (stored in $url
), a Blosxom URL can contain an additional path consisting of three possible parts: an optional set of categories, an optional set of year, month, and day values, and an optional reference to an individual entry. For example, the following are values that might be returned by the path_info()
function as applied to Blosxom URLs:
/society/literature
/2004/05/19
/music/index.rss
/personal/resolutions/2003/07
/cooking/italian/bruschetta.html
From the path returned by path_info()
we end up setting the following variables:
$path_info
: either an individual entry path including categories, subcategories, and entry name (e.g., /cooking/italian/bruschetta.html
) or a category/subcategory path for which we wish to see all entries (e.g., /society/literature
or /music
)$flavour
: the desired flavour, whether explicitly specified in the URL (e.g., 'html' for /cooking/italian/bruschetta.html
or 'rss' for /music/index.rss
) or defaulted (e.g., as in /society/literature/
)$path_info_yr
, $path_info_mo_num
, and $path_info_da
: the year, month, and day if present in the URL (e.g., for /personal/resolutions/2003/07
the year and month would be '2003' and '07' respectively while the day would be undefined)Our first task is to extract the path information relating to categories; since we know that category names can't begin with a digit we can simply look for path components starting with alphabetic characters. However we have to stop before we get to any reference to an individual entry; we identify such entries by the presence of a '.' character in their names.
[Note: This implies two additional restrictions in Blosxom as currently designed: you can't have a category name containing a '.', and you can't reference individual entries using URLs that don't have a file extension at the end (as recommended by the W3C, among others.]
For more information see the following URLs:
http://www.blosxom.com/documentation/users/view.html
http://www.w3.org/Provider/Style/URI
while
loopA while
loop executes a block of code (in curly braces) as long as a given condition (in parentheses) is true. In this case before executing the code block we first check to see if the first element of @path_info
is defined and non-empty; otherwise there are no more components and we're done. ($a[i]
is the i'th element of the array @a
; note that it's distinct from the scalar variable $a
.)
If we have a further component, we then check to see if its value starts with an alphabetic character, by trying to match it against the regular expression character class [a-zA-Z]
starting at the beginning of the string ('^'); otherwise the component represents a date and not a category, and we're done.
Finally we check to verify that the component's value does not have a literal period (\.
) in it; otherwise the component represents an individual entry (e.g., "a.html") and we're done. (The operator !~
is the reverse of =~
, returning a true value when the pattern match fails.)
See the notes for line 112 below for the meaning of the parentheses in the regular expression /(.*)\.(.*)/
used to check for a period in in the path component. For now we simply note that as used here the regular expression could have been replaced with the simpler regular expression /.*\..*/
without affecting things.
If the first element of @path_info
looks like a category then we append it to the scalar variable $path_info
, preceded by a '/', and remove the element from the @path_info array
. ($path_info
was defined above, with its initial value set to the empty string.) Note that shift @path_info
both does the removal and returns the removed element as a result. The .
operator concatenates two strings, in this case '/' and the removed first element. The .=
assignment operator is like the ||=
and +=
operators seen above, so that $a .= 'b'
is the same as $a = $a . 'b'
, where the .
operator concatenates two strings.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Compound-Statements
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Additive-Operators
If the flavour is specified by index.{flav}
, as in
http://www.example.com/cgi-bin/blosxom.cgi/music/index.rss
then it must be parsed from the PATH_INFO
value stored in @path_info
. However if the flavour is specified by ?flav={flav}
, as in
http://www.example.com/cgi-bin/blosxom.cgi/music?flav=rss
then its value must be obtained using param()
, since anything in the URL after a '?' is considered a CGI parameter and not part of PATH_INFO
.
$#path_info
$#path_info
returns the index of the last element of the array @path_info
. We match the value of that last element against a regular expression consisting of one or more characters followed by a literal '.' character followed by one or more characters to the end of the string. This match will succeed when the last element looks like, e.g., 'a.b', where we'll interpret 'b' as the flavour.
(Note that this regular expression is slightly different from the one used in the while
loop on line 107; the previous expression matched zero or more characters followed by a '.' followed by zero or more characters. In other words, the test at line 107 will match .
by itself, .a
, a.
, and so on, while the test here will not. In practice this doesn't matter: the first test was simply intended to reject path components that weren't categories, which can't contain '.'; the second test is intended to find flavour values, and for that purpose we need a component that actually has something after the '.', as well as before.)
The regular expression uses parentheses to save parts of the component that are matched, for later use. In particular, the regular expression /(.+)\.(.+)/
is used (instead of the simpler /.+\..+/)
to save the flavour value (matched by the expression in the second set of parentheses) and the entry name (matched by the expression in the first set of parentheses). The saved values can then be referenced by the special variables $1
(first part matched, the entry name) and $2
(second part matched, the flavour).
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#Variable-names
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#SYNTAX
If the last value in @path_info
does contain a '.' character then as noted above the value of the variable $2
will be the string to the right of the '.', and we save that value in $flavour
.
Because of the way regular expression matching works, if the final component actually has two or more periods, e.g., "example.com-news.html", $2
will be set to the string after the final '.', not the string after the first one. This "greedy" matching (i.e., match as many characters as you can) is exactly what we want to happen.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS
If the first part (before the '.') of the last path component is not equal to 'index' then that component points to an individual entry, and we save both the entry name and flavour by appending them to the $path_info
variable that stores the category components of the path.
On the other hand, if the first part is 'index' then the original URL was not a request for an individual entry but rather a request for all entries in a particular category or for a particular day, month, or year, displayed using a specified flavour. For such requests the path might be something like /a/b/index.rss
or /2004/05/index.rss
. In this case we don't need to save the value 'index.rss' (or whatever) as part of $path_info
, since all we need is the flavour value.
pop
functionNow that we've extracted the needed information from the last element of @path_info
we use the pop
function to remove it.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/pop.html
If the final component of the path does not contain a period then either the flavour was specified using the flav
parameter, as in the URL
http://www.example.com/cgi-bin/blosxom.cgi/a/b?flav=rss
or the flavour was omitted entirely. In the latter case we set $flavour
to the default flavor defined in the configurable variables section.
Using |
in a regular expression lets you search for (and in this case replace) two or more alternative patterns, in this case zero or more '/' characters at the beginning of $path_info
and zero or more at the end. The 'g' option replaces all patterns found, so we replace both '/' characters found at the beginning and any found at the end.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlretut.html#Matching-this-or-that
At this point we've extracted from @path_info
any category names (at the beginning of the path) and any final path component associated with either an individual entry or an index.{flav}
reference. So the only components left in @path_info
should be date references (if any) from URLs containing sequences like /2004/05/19
, /2004/05
, or /2004
.
This statement assigns $path_info[0]
(i.e., the first element in the array @path_info
) to $path_info_yr
, $path_info[1]
to $path_info_mo
, and $path_info[2]
to $path_info_da
. If @path_info
doesn't have three elements then some or all of the three variables may end up undefined (starting with $path_info_da
).
In general you can assign a list of scalar values into a list of scalar variables:
($a, $b, $c) = (1, 2, 3);
($a, $b, $c) = @d;
where the righthand side could be a constructed list (using ','), an array, a function returning a list, or any other expression returning a list.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#List-value-constructors
[Note: Although I don't believe the online documentation mentions this, based on this code it appears that you can use Blosxom URLs that identify months by their three-letter abbreviations instead of month numbers; so, for example, rather than identifying the date as /2004/01/31
it appears that you could request it as /2004/Jan/31
.
If so, there's no danger in mistaking a month abbreviation for a category name since the month must be preceded by a four-digit year, and Blosxom stops parsing the URL for categories as soon as it hits a component starting with a digit.]
lc
, ucfirst
, and undef
functionsThis statement can be paraphrased as follows: if $path_info_mo
has a (non-empty) value, then check to see if that value is a string with (at least) two digits (i.e., it matches the regular expression \d{2}
); if so, assign the value of $path_info_mo
to $path_info_num
. If $path_info_mo
has a value that doesn't contain two digits, then put the value in "initial cap" form and look it up in the %month2num
hash to see if the value is a month abbreviation; if so, assign the month number from the hash to $path_info_num
.
If the value of $path_info_mo
doesn't look like a month number or month abbreviation, or if it's empty or undefined, then $path_info_mo_num
is undefined as well.
The function lc
returns the lower-case equivalent of its string argument, and the ucfirst
function returns a copy of its argument with the first letter (only) capitalized. Hence ucfirst(lc 'jaN')
returns the value 'Jan', which is the capitalization style used in %month2num
.
The function undef
returns an undefined value that (as in this case) can be assigned to a variable.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS
http://www.perldoc.com/perl5.8.4/pod/func/lc.html
http://www.perldoc.com/perl5.8.4/pod/func/ucfirst.html
http://www.perldoc.com/perl5.8.4/pod/func/undef.html
The template subroutine is used to look for and return the contents of flavour template files (e.g., head.html
, foot.html
, etc.). It can be overridden by a plugin that defines its own template subroutine; see the notes for line 161.
sub { ... }
defines an "anonymous" (i.e., not named) subroutine, a reference to which is then assigned to the variable $template
. (References are basically names that can be used to refer to variables and subroutines, and are the third type of value that a scalar variable can have, along with numbers and strings.) The subroutine can then be called using the syntax &$template()
where you can put subroutine arguments inside the parentheses.
The template subroutine is defined in this way (using a reference stored in a variable rather than a named subroutine) so that the subroutine can be overridden; a plugin can define its own template subroutine, and a reference to that can be assigned to $template
, replacing the reference to the original subroutine defined here.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlsub.html
http://www.perldoc.com/perl5.8.4/pod/perlref.html
$flavour
is the flavour for which we are looking, e.g., 'html', 'rss', etc. $chunk
is the type of template we are looking for, e.g., 'head', 'foot', 'story', etc. $path
is the directory at which we should start our search, expressed as a relative pathname relative to the Blosxom data directory.
@_
Arguments to the subroutine are passed in a special array variable @_
, with the first three elements of that assigned to the private variables $path
, $chunk
, and $flavour
respectively.
do while
loopA do while
loop is like a while
loop except that the condition is checked at the bottom (after the loop is executed at least once) instead of at the top.
(The similarity between while
and do while
loops is only superficial, since in Perl the do {...} while
construct isn't considered to be a true loop. In particular, you can't put next
and last
statements within a do {...} while
; see the notes for lines 141 and 240.)
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/do.html
join
and open
functionsThe following statement is basically a backwards if statement: First we use the FileHandle $fh
(created above) and try to open a template file for read access ("<"), constructing a template pathname from the values of $datadir
, $path
, $chunk
, and $flavour
. So, for example, if $datadir
is '/blosxom', $path
is '/a/b', $chunk
is 'head', and $flavour is 'html', we look for a flavour template file '/blosxom/a/b/head.html'.
(Because the FileHandle module provides an object-oriented interface, we use the method invocation $fh->open(...)
rather than the function call open($fh, ...)
. Also note that if we have already opened a file using the FileHandle $fh
that file will be closed first before we open a new one.)
If the open succeeds (i.e., the template file exists and is readable) then we read in all the lines of the template file using the $fh
FileHandle and return a string containing all those lines concatenated together.
(<$fh>
would normally read only one line of the file, but using the join
function causes <$fh>
to be used in a list context -- because join
expects a list as its second argument -- and that causes <$fh>
to read all lines and return them as an array, with each array element being a newline-terminated line. The join
function then returns a string consisting of all the array elements concatenated together separated by the join
function's first argument, which in this case happens to be the empty string. So the returned result is a single string containing all the lines in the flavour template file, each terminated by a newline, e.g.,
<html>\n<body>\n<h1>A Blog</h1>\n...
for a typical head section.)
For more information see the following URLs:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
http://www.perldoc.com/perl5.8.4/pod/perlopentut.html
http://www.perldoc.com/perl5.8.4/pod/func/open.html
http://www.perldoc.com/perl5.8.4/pod/func/join.html
http://www.perldoc.com/perl5.8.4/pod/func/return.html
If the open fails (e.g., there was no file at the location we looked) then we modify the value of $path
by stripping off the last path component (e.g., if $path
has the value /a/b
we change it to /a
) and then we go back to the top of the loop and try the open again. (In other words, we search for the template file in the parent directory of the directory we just looked in.)
(To explain the regular expression a bit: '\/' matches a literal '/' and '[^\/]' matches anything but a slash, so '\/*[^\/]*' matches zero or more '/' characters followed by zero or more other characters. The regular expression \/*[^\/]*$
means look for this pattern at the end of the string, so that when the substitution is done -- replacing the matched pattern by an empty string -- it removes the last component of $path
. Finally, we use parentheses to save the matched pattern in the $1
variable for later checking, hence (\/*[^\/]*)$
is the final regular expression used.)
If we never succeed in opening a template file then the loop ends when all the path components have been removed, the matched pattern is an empty string so that $1
is empty and hence false, and the and
test fails.
If we never succeed in opening a template file (i.e., we drop out of the do while
loop) then we return a string consisting of lines from a flavour template already stored in a multidimensional hash, using $flavour
and $chunk
as keys. (This hash is defined below; recall that right now we are defining the subroutine, not executing it. See the notes for line 144 for more information.)
We set the %template
hash variable to contain nothing, i.e., no keys and no values.
Read in and store the default templates defined in the data section of this file, saving them in %template
.
<DATA>
<DATA>
causes lines to be read from the data section of this file (i.e., blosxom.cgi
). The data section starts after a line consisting of __DATA__
by itself. In this context <DATA>
returns a line at a time, returning an undefined value (and thus ending the while
loop) when we reach the end of the file.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
__END__
and last
Using <DATA>
would continue to read lines after __DATA__
until the end of the blosxom.cgi
file. However in our case we may want to put some additional text after the __END__
line (which marks the end of what the Perl compiler parses). We therefore explicitly check for the presence of __END__
on a line by itself, and if we find it we use the last
command to exit the while
loop immediately.
Note that since we are not using the =~
operator the string pattern match is done against the special variable $_
that holds the line just read from the data section using <DATA>
.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Loop-Control
Note that there are a couple of subtle points about the test for __END__
. First, the test is actually for either zero or one occurrence of __END__
, so the test would succeed (and reading of data end) if the __DATA__
section contained a blank line at some point. Second, the pattern match requested is for __END__
starting from the beginning of the line (^) and ending at the end of the line ($), with nothing else present. But the string being tested against (the value of the $_
) variable does in fact have something else in it, namely a newline at the end of the string.
Why then does the test work? Because as noted in the Perl online documentation, "the '^' character is guaranteed to match only the beginning of the string, the '$' character only the end (or before the newline at the end), ..." (emphasis added). In other words, the newline at the end of $_
is ignored for the purpose of matching the specified pattern /^(__END__)?$/
.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlre.html#Regular-Expressions
As can be seen by looking at the data sections below, the default templates are each defined as a single line containing the flavour, the type of template, and the template data itself, each field separated by whitespace. We therefore parse each line of the data section into three whitespace-separated fields, and then assign the values to the private variables $ct
, $comp
, and $txt
respectively.
In the code thus far we have seen pattern matching done in a scalar context; in that context a pattern match will return the number of matches found, or zero if no match exists. However here the pattern match is being done in a list context because of the assignment to ($ct, $comp, $txt)
. (Recall that this is comparable to an assignment of the form @a = ...
where @a
is an array variable.)
When done in a list context a pattern match will return an array ($1, $2, ...)
containing the parts of the string that were matched. Hence in this context $ct
will be assigned the value of $1
, $comp
will be assigned the value of $2
, and $txt
will be assigned the value of $3
.
To expand a bit on the regular expression: \s
matches a whitespace character (space, tab, etc.) and \S
matches a non-whitespace character. The first field gets matched by ^(\S+)
, the second field gets matched by (\S+)
, and the third field (which can contain spaces) gets matched by (.*)$
; the field patterns are then separated by the \s
pattern.
[Note: The regular expression looks for a single whitespace character between the fields. On each line in the data section there is in fact only a single space between the flavour specifier and the template type specifier, on each line, so this works out OK. However on some lines there is more than one space between the template type specifier and the template content. This does not cause any problem in practice, since the pattern for the third field can match spaces; the extra spaces are simply included as leading whitespace in the value matched for the third field and then assigned to $txt
.]
We modify $txt
to change literal occurrences of '\n' (i.e., the '\' character followed by the character 'n') to occurrences of the newline character.
'\\' in the pattern being searched for matches for a literal '\', and '\n' in the replacement string is interpreted as a newline character. The g
option does a global search and replace as noted above while the m
option searches in multiline mode.
Multiline mode treats the string as a multiline buffer, so you can use '^' and '$' to match at the beginning and end of newline terminated substrings within the string as a whole.
[Note: It's not exactly clear why multiline mode is used in this context, particularly since the regular expression doesn't use either '^' or '$'; in testing the substitution seemed to work fine even without the m
option.]
We store the default flavour template text read from the data section, indexing it by the flavour and type of content.
The usage $a{$b}{$c}
is an example of the use of Perl references to simulate multi-dimensional arrays or nested hashes. To expand on this: the syntax $a{$b}{$c}
is equivalent to $a{$b}->{$c}
, which in turn is equivalent to ${$a{$b}}{$c}
. Here %a
is a hash, the value of $b
is a key for that hash, and the hash value $a{$b}
is a reference that points to another hash. (The second hash is anonymous, i.e., it has no name of its own.) To refer to a value in the second hash we use ${$a{$b}}{$c}
where the value of $c
is a key in the second hash. As noted above we can also use the syntax $a{$b}->{$c}
instead, and can in turn shorten that to $a{$b}{$c}
.
When we make an assignment like $a{$b}{$c} = "def"
Perl automagically creates the anonymous hash and stores a reference to it in $a{$b}
. If Perl didn't do this then you'd have to go through the following machinations to make the same assignment (assuming that the hash %a
already existed):
%h = (); # Create an empty hash %h
$h{$c} = "def"; # Store value "def" in %h at key $c
$a{$b} = \%h; # Store reference to %h in hash %a at key $b
In this example the value could then be referenced as either $h{$c}
or ${$a{$b}}{$c}
. Per the online Perl documentation, "Anywhere you'd put an identifier ... as part of a variable ... name, you can replace the identifier with a simple scalar variable containing a reference of the correct type". So we are replacing the identifier "h" in $h{$c}
with the scalar variable $a{$b}
that contains a hash reference. We could actually use the syntax $$a{$b}{$c}
for this but we use the extra pair of curly braces to clarify what's going on. ${$a{$b}}{$c}
then becomes $a{$b}{$c}
through the alternative syntax discussed above.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlreftut.html
http://www.perldoc.com/perl5.8.4/pod/perlref.html#Using-References
opendir
functionIf there's a plugin directory defined we open it and look for plugins, using the file handle PLUGINS
; we use the opendir
function instead of open
because we are opening a directory, not a regular file.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/opendir.html
readdir
, grep
, and -f
functions and the foreach
loopWorking backwards from the end of the statement: We use the readdir
function to return a list of all the entries in the plugin directory, and then use the sort
function to sort those entries in the default (alphabetical) order. (readdir
returns all directory entries because it's being executed in a list context, since sort
expects a list argument; otherwise readdir
would return one directory entry at a time.)
We then use the grep
function to test each of the sorted directory entries against the specified expression (in curly braces) and return a list consisting of only those entries for which the expression is true. In this case the expression for grep
is a compound expression consisting of a regular expression and a file test function anded together.
We first test using /^\w+$/
to make sure that the directory entry starts with and contains only alphanumeric characters or '_'; this eliminates directory entries for .
(the current directory), ..
(the parent directory), and hidden files (e.g., .a
). (Note that we don't use the =~
operator here because we are matching against the special variable $_
that grep
sets in turn to hold the value of each element of the list passed to it.)
We then test using the file test function -f "$plugin_dir/$_"
to verify that the directory entry actually is a file and not something else; this eliminates directory entries for the plugin state directory and other subdirectories that might be present, as well as directory entries for special files like device files, named pipes, and the like. (Again we reference the special $_
variable set by grep
.)
[Note: Symbolic links do pass the -f
test (at least on Unix and Unix-like systems) if (and only if) they point to regular files. Unless other considerations apply, this should allow you to put a plugin file in another directory and put a symlink in the plugin directory itself.]
Finally, we use a foreach
loop to iterate over each element in the list of plugins, assigning the value of each element to the variable $plugin
in turn and executing the statements in the following code block.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/func/readdir.html
http://www.perldoc.com/perl5.8.4/pod/func/grep.html
http://www.perldoc.com/perl5.8.4/pod/func/-X.html
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Foreach-Loops
http://www.perldoc.com/perl5.8.4/pod/perlvar.html
Recall that plugins can have a (normally two-digit) number at the beginning of their names (to enforce a particular plugin order) and can also have an underscore character ('_') at the end of their names to disable them from being used.
Here we use a regular expression to match and save the actual plugin name and look for a concluding '_' if present. (We no longer need the numeric prefix since we are now processing the plugins in the proper sort order.) Note that the regular expression as written allows underscores to be used as part of the plugin name itself; only an underscore at the end is special.
The plugin name and the (optional) trailing underscore are saved in the special variables $1
and $2
and then assigned to the private variables $plugin_name
and $off
respectively. (See the note to line 142 for more information on pattern matching in a list context.)
If the final underscore is present ($off
has the value '_') we set $on_off
to -1 to indicate that the plugin is disabled; otherwise $on_off
is set to 1 to indicate an active plugin.
require
functionWe include the code for the current plugin. (This is somewhat analogous to #include
in C.) Note that since we are supplying a pathname the require
function will look for the plugin at the pathname (instead of looking in the directories specified by @INC
, the Perl search path analogous to LD_LIBRARY_PATH
and similar environment variables in Unix.)
[Note: The Perl online documentation for require
mentions only searching in @INC
directories for a filename, and does not explicitly address using a full pathname. This is presumably just an oversight.]
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/require.html
Now that the code for this plugin has been loaded we can call subroutines defined in the plugin. We first call the plugin's start routine, using the plugin's name in a method invocation (see below). Assuming that the start routine exists and returns a true value, we then use the plugin's name as a key to put the plugin's $on_off
value into the %plugins
hash. Finally, we create a new element in the @plugins
array and set its value to the plugin name. (Recall that %plugins
and @plugins
are entirely different variables that just happen to share the same name.)
Note that we set $on_off
to the value -1 for off instead of 0 because otherwise the middle expression (between the two and
's) would have evaluated false, and we would never have executed the third expression to set @plugins
.
For those wanting a more in-depth explanation, calling the start routine works as follows:
A plugin "abc" has to define a package abc
, as noted in the Blosxom plugin developer documentation. So as a result of the "abc" plugin being loaded (by require
) we can now refer to subroutines and variables defined by the package. (Strictly speaking we can't refer to everything defined by the package, but let's ignore that for now.) For example, if a scalar variable $foo
is defined by plugin "abc" (i.e., package abc
) then we could refer to it as $abc::foo
to obtain its value. Similarly we could call the start subroutine in package abc
using the notation abc::start()
.
However we have a problem: the Blosxom code doesn't know beforehand that there's going to be a plugin "abc" (or "foo", or whatever), so the Blosxom code can't use abc::start()
to invoke package abc
's start subroutine. The solution is to use a different way to call a routine defined in a plugin: Blosxom invokes abc::start
as a method rather than calling it as a subroutine.
Methods are a concept from object-oriented (OO) programming, in which (in theory) everything of interest is an "object", objects can belong to "classes", classes can have "methods" that operate on objects of that class, classes can be "subclasses" of higher-level classes, and so on.
For Blosxom (at least Blosxom 2.0) we don't need to worry about the full OO story, we simply need to know that in Perl terms an object is just a reference, a class is simply a package and a method is a subroutine defined by a package. So in our example rather than using abc::start()
to call the start subroutine in package abc
, we can use the method invocation notation abc->start()
instead. (Method invocation doesn't work exactly like subroutine calling, particularly in terms of which arguments are passed, but we can ignore that for now.)
However we still have the problem of Blosxom not knowing about package abc
beforehand, so using abc->start()
won't work either. Fortunately in method invocation instead of a package identifier to the left of the ->
we can substitute a scalar variable whose value is a string representing a valid package name. In particular, rather than using abc->start()
to invoke the start subroutine (using the package identifier abc
), we can set a scalar variable $foo
to the value "abc", and then use $foo->start()
to invoke the subroutine. (We're using $foo
as an example; Blosxom actually uses the variable $plugin_name
previously assigned.)
For more information see the following URLs:
http://www.blosxom.com/documentation/developers/plugins.html
http://www.perldoc.com/perl5.8.4/pod/perlobj.html#Method-Invocation
[Note: (This is for people like me who get led astray reading Perl documentation.) The usage $foo->start()
looks similar to the use of ->
with Perl references as previously discussed, and it's tempting to think of $foo
in this context as a kind of reference, in particular a symbolic reference, a Perl concept where a scalar variable containing the name of a variable or subroutine can get interpreted as a (real) reference to that (second) variable or subroutine.
However as far I can tell there is no connection between symbolic references and use of a scalar variable to specify the package (class) name in method invocation. This is supported by the fact that Blosxom does a use strict
, which flags use of symbolic references as an error; however this doesn't affect the use of scalar variables in method invocation.]
closedir
functionHaving cycled through all the plugins, we now close the PLUGINS
file handle we used to open the plugins directory.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/closedir.html
We loop through the @plugins
array, which now contains a list of plugin names for both active and disable plugins. For each plugin we look up its name in the %plugin
hash and determine whether the plugin is enabled (1) or disabled (-1). If a plugin is enabled we then use the plugin name to invoke the can
method to see if a template subroutine is defined by the plugin's package.
If so then we invoke the plugin's template subroutine, which returns a reference to a new (anonymous) subroutine to handle templates; we save the reference to that subroutine in the $template
variable (overriding the value set earlier, representing the default template subroutine), and then we exit the loop (and don't bother to look at the other plugins).
can
methodEarlier we saw a method invocation used to call a plugin's start subroutine, using the expression $plugin_name->start()
where the value of the scalar variable $plugin_name
was a string with the plugin's name (which is the same name as its package). The expression $plugin->can('start')
looks similar, except for the addition of an argument to be passed to the method.
However plugins don't actually define a can
method; where then does it come from? Here we see more of the object-oriented features of Perl: When doing method invocation (but not when doing a standard subroutine call) Perl will look for a method not only in that package/class (recall that they are the same in Perl), but also in higher-level classes from which the class in question inherits methods.
In particular, Perl has a package UNIVERSAL
from which all packages inherit the can
method. The expression abc->can('foo')
will invoke the can
method and check to see if the package abc
has the foo
method defined; if so, it returns a reference to the method, or an undefined value if no such method exists. The Blosxom code uses a similar expression but using a scalar variable holding the package name instead of the package identifier (which it can't know a priori).
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlobj.html#Default-UNIVERSAL-methods
can
methodBy knowing a little bit about how Perl represents subroutines internally we can get a general idea of how the can
method works. Internally Perl identifiers for variables, subroutines, etc., are stored in a special hash known as a "symbol table". Every package has its own symbol table, among other things to support the Perl feature that different packages can have different variables that happen to have the same names. A package's symbol table has entries for variables and subroutines defined in that package (except for lexically-scoped items, which we ignore here).
So if package abc
(corresponding to the "abc" plugin) has defined a template subroutine/method, then in the symbol table for package abc
(which can be accessed from Perl as the hash variable %abc::
) there will be a hash element with key 'template' that will have as its value a special data object called a "typeglob" (the typeglob value is accessible from Perl as $abc::{'template'}
or *abc::template
); that typeglob in turn can be used to find a reference to the template subroutine (accessible as $abc::template{CODE}
using a hash-like notation).
Given a package and a string with the name of the desired method, the can
method looks in the package's symbol table to find an entry for that name, and then looks at the typeglob to see if there's actually a subroutine defined with that name. (After all, the package might have a scalar variable, hash, or array with the same name as the subroutine.) The can
method then returns the subroutine reference obtained from the typeglob, or an undefined value if no such reference was found.
One question remains: How does the can
method know the package for which it's searching for a method? Because when the can
method is invoked Perl passes it an extra argument containing the name of the package/class on which the can
method was originally invoked (the abc
package in our example).
Such an extra argument is passed as the first argument to any subroutine invoked as a method (although in some types of method invocation the first argument is a reference and not a class/package name). The presence of this additional argument is another way in which method invocation is different than a subroutine call.
If you happen to read code for plugins, this is why some subroutines have an argument $pkg
(or whatever) that's not shown in the Blosxom code invoking that subroutine. The $pkg
argument is present only for plugin subroutines that take arguments in the first place, since in that case the subroutine has to skip over the $pkg
argument before getting to the "real" arguments. Plugin subroutines that don't take arguments (like the start and template subroutines) don't worry about this; they just ignore any arguments passed, including the package name argument.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlmod.html#Symbol-Tables
http://www.perldoc.com/perl5.8.4/pod/perlref.html
load_template
subroutine[Note: Since I've never seen code for Blosxom versions earlier than 2.0 I'm just going to ignore this code and not worry about it. It doesn't seem relevant for current 2.0-based plugins.]
We define a default subroutine to find entries, just as we previously defined a default subroutine to handle flavour templates. We define this as an anonymous subroutine and then store a reference to that subroutine in the variable $entries
. A plugin can then have a subroutine entries
(not to be confused with $entries
) that defines a new anonymous subroutine and returns its reference as a replacement for the reference in $entries
.
The entries subroutine returns a list containing three things: a hash of all the files representing individual entries (%files
), (for static rendering only) a hash of all directories needing index pages generated and individual entry files needing static pages generated (%indexes
), and a hash of all other files found (%others
).
Note that the private lexical variables %files
, %indexes
, and %others
declared here are entirely distinct from the global variables of the same names declared on line 69. In general using my
to declare a private variable within a given lexical scope will "hide" any global variables of the same name, as well as private variables of the same name declared at a higher-level lexical scope. (See the notes to lines 333 and 357 for another example of such hiding.)
find
subroutineThe default entries subroutine uses the find
subroutine from File:Find to do all the work. The find
subroutine is analogous to the Unix find
command and takes two arguments, a list of directories in which to search (here just $datadir
, the Blosxom data directory) and a reference to a subroutine that will be called by find
for each directory entry (e.g., file, subdirectory, symlink, etc.) found in the search. (Here we define that subroutine as an anonymous subroutine, which automatically produces the reference to be passed in.)
To help clarify how find
is used, if we wanted to mimic the operation of the simple Unix command
find /blosxom/data -name 'index.*' -print
(find all items whose filenames start with "index.", and print their pathnames) we could call find
as follows:
find( sub { /^index\..*\z/s && print "$name\n"; }, '/blosxom/data');
Here $name
(also known as $File::Find::name
) is a variable that find
sets to the current pathname being processed.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/Find.pm
$File::Find::dir
The value of $File::Find:dir
is the absolute path of the directory currently being searched. We count '/' characters in the path using tr
to obtain the number of directory components in the path.
$depth
We don't process the entries in a directory if it exceeds a specified search depth limit. Recall that if $depth
was originally set to a non-zero value (i.e., to limit the depth of search) then that value was adjusted to account for the number of components in the path to the Blosxom data directory. See the notes for line 94 for more information.
As noted above, $File::Find::name
contains the absolute pathname of the item we are currently processing. We check to see if the current item appears to be a Blosxom entry: Its filename has the proper extension (.txt
by default), it's not an index file or hidden file, and it's readable as a file (e.g., as opposed to being a directory with a name that looks like a blosxom entry). If so, we do further processing on the item as described below to build the %files
list. Otherwise we consider adding the item to the %others
list, as described in the notes for line 208.
The regular expression matching here has some subtleties worth exploring. Items that are entries are going to look like, e.g., /blosxom/data/foo.txt
(for an entry in the Blosxom data directory itself) or /blosxom/data/a/b/bar.txt
(for an entry in a subdirectory somewhere below the Blosxom data directory). For reasons that will become more clear below, we want to save the basename of the item's filename (e.g., foo
or bar
respectively in our example) as well as the sequence of subdirectories between the data directory and the filename (e.g., '' and a/b
respectively in our example).
With that in mind let's look more closely at the pattern match. First, we use m!...!
to delimit the pattern to be matched, as opposed to the usual /.../
, because the '/' character is part of the pattern itself and we don't want to have to escape it (i.e., as "\/"). We then match the beginning of the path against the data directory with ^$datadir/
; this would match /blosxom/data/
in our example above..
To match the subdirectory components we use the pattern (?:(.*)/)?
. The pattern .*/
by itself would match subdirectories up to the final '/' (e.g., in the example item ...a/b/bar.txt
above), and in order to save the subdirectory components (minus the trailing '/') we could use the pattern (.*)/
. However we also have to account for the possibility that the entry might be in the data directory itself, in which case there wouldn't be any subdirectory names and no second '/' character; we could handle this case using the pattern ((.*)/)?
(i.e., match either one or zero occurrences of (.*)/
).
However now we're capturing the subdirectory part of the path twice: one without trailing '/' (e.g., a/b
) and once with it (e.g., a/b/
); to avoid this redundancy we instead use the pattern (?:(.*)/)?
. (?:...)
is like (...)
except that it doesn't capture the matched string; as a result (?:(.*)/)?
captures only the string matched by (.*)
, and puts it into $1
.
To match the item's filename we would use a pattern like (.+)\.txt$
if we knew the extension would always be .txt
: we look for one or characters, then a literal '.', then the extension at the end of the string, and we capture the basename (i.e., the characters before the '.') for later use. In the case of Blosxom the value of the extension we're looking for is in a variable, so we use the pattern (.+)\.$file_extension$
instead, where the value of $file_extension
gets interpolated into the pattern as it would into a double-quoted string.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlre.html#Extended-Patterns
We skip over faux entries like index.txt
, .foo.txt
, and foo.txt
where foo.txt
can't be read as a file (e.g., it's a directory instead). Note that the last test also implies that Blosxom will silently ignore entry files if the web server userid (e.g., "http") does not have permission to read them (but does have permission to search the directory in which they're located).
[Note: I need more information on the treatment of symlinks by Blosxom. A symlink can pass the -r
test if it points to a readable file. Are there any other considerations that come into play here?]
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/-X.html
and
operatorsIf the current item passes our initial test to see if it might be an entry, we do a series of additional tests and operations, in the form of a series of expressions a and b and c and ... f
where each expression is evaluated and we stop if any expression evaluates to a false value. Note that in this case some of the expressions anded together are parenthesized expressions of the form (x or y or ... z)
.
If we're showing future entries (i.e., $show_future_entries
is true) then we proceed to the next test, otherwise the "last modified" time of the current item must be less than the current time. Note that stat(...)->mtime
is a method call where the left hand side of the ->
operator is an object reference (as opposed to a class/package name), in this case a File::stat object returned by stat(...)
. Both mtime
and time
are expressed in seconds since some fixed date ("the epoch") and hence are directly comparable.
For more information see the following URLs:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/stat.pm
http://www.perldoc.com/perl5.8.4/pod/func/stat.html
http://www.perldoc.com/perl5.8.4/pod/func/time.html
%files
hashThe %files
hash has the entry's absolute pathname as a key and its modification time as a value. If we didn't have to worry about modification times we could just use an array of entry pathnames but we need to keep a record of the entry modification times, in particular to do sorting of entries and to display the dates for entries.
Here we figure out which index.*
files we will need to generate (or regenerate) when we're doing static rendering, passed on the presence of new and/or updated entries.
[Note: It appears that the %indexes
hash will be populated even if we are doing dynamic rendering, although it's not clear that %indexes
will be used in that case.]
In general we will have two types of index files that need to be generated: index files for directories corresponding to categories (e.g., a/b
for an entry foo.txt
in that directory) and index files corresponding to dates (e.g., 2004
, 2004/05
, and 2004/05/22
for an entry foo.txt
last modified on May 22, 2004). (Note that the main Blosxom data directory is a special case of a category directory.)
As we determine which index files need to be (re)generated we build up a list (in %indexes
) of the directories in which they need to be created. We also use %indexes
to build up a list of individual entries for which static pages need to be generated.
-all
parameter for static renderingIf the -all
parameter was passed in with value 1 (i.e., -all=1
on the command line) then we (re)generate all index.*
files.
For more information see the following URL:
http://www.blosxom.com/documentation/users/configure/static.html
If there is no index file of the default flavour (e.g., index.html
) for the directory in which the entry is located ($static_dir/$1
) then we generate a new one. Recall that @static_flavours
is the list of flavours to be generated statically; $static_flavours[0]
is 'html' by default.
(Note that we actually end up generating index files for all the flavours in @static_flavours
, not just the first flavour. It's just more convenient to check for only one flavour, assuming that if its index file needs to be generated then the index files for the other static flavours do too.)
If the default index file (e.g., index.html
) is older than the entry being processed then we update the index.*
files in the entry's directory.
(Again, we're checking the index file for one flavour and extrapolating the results for the other static flavours.)
%indexes
hash and category directoriesThe %indexes
hash uses the directory pathname relative to the Blosxom data directory (e.g., a/b
) as a key. This relative pathname can also be thought of as a relative URL, with the base URL being the URL that resolves to the Blosxom script.
For an %indexes
element corresponding to a category directory (e.g., a/b
) we set the value of the element (e.g., $indexes{'a/b'}
) to 1. (See the note for line 203 for the value of %indexes
entries for date directories, e.g., 2004/05/22
.)
If an entry was created on a certain date (e.g., May 22, 2004) then we need to create index files in a subdirectory corresponding to that date (e.g., 2004/05/22/index.html
) so that date-based Blosxom URLs will work properly.
Note that the nice_date
subroutine (defined below) takes a time in seconds since the epoch (here the entry's "last modified" time as stored in %files
) and returns a list containing the various parts of the date/time broken out.
Here we need only the year, month number, and day, so rather than using the entire list returned by nice_date
we just use the elements we need, using Perl slice notation: @a[5,2,3]
means a list consisting of $a[5]
, $a[2]
, and $a[3]
, where here @a
is replaced by (nice_date(...))
. (The parentheses around nice_date(...)
are needed for proper Perl syntax.)
We then take the slice, e.g., ("2004", "05", "22")
, and join the elements with '/' to get the relative path we need, e.g., 2004/05/22
.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#Slices
%indexes
hash and date directoriesWe add the date subdirectory (e.g., 2004/05/22
) to the %indexes
hash.
For an %indexes
element corresponding to a date directory we set the value of the element (e.g., $indexes{'2004/05/22'}
) to the relative pathname of the date directory itself (e.g., 2004/05/22
, same as the key). (Recall from the notes for line 200 that for %indexes
elements corresponding to category directories we set the values of the elements to 1.)
%indexes
hash and static entry pagesIf we are generating static entry pages then we also add the entry's relative pathname (e.g., a/b/foo.txt
) to %indexes
. (We can't just use $File::Find:name
as the key here, as we did in %files
, because that's an absolute pathname that includes the Blosxom data directory.)
We use the conditional expression ($1 ? "$1/" : "")
because we have to handle specially the case when the entry is in the Blosxom data directory itself and not in a subdirectory somewhere underneath it; in that case the subdirectory part of the entry's pathname (the middle part stored in $1
) will be empty, and we don't want to add an extra '/' we don't need.
For a %indexes
element corresponding to an individual entry we set the value of the element (e.g., $indexes{'a/b/foo.txt'}
) to 1, the same as for %indexes
elements for category directories.
%others
hash and non-entry filesAs noted above, we come to the else
block when the item being processed does not appear to be a Blosxom entry file (e.g., it might be an existing file like foo.html
).
If the item is not a directory and it's readable then we add it to the %others
hash, using its absolute pathname as the key and its "last modified" time as the value. (This is the same way the %files
hash is structured.)
find
We've finally come to the end of the first argument to find
, the anonymous subroutine to process items, and we include $datadir
as the second argument, the directory at which we wish to start searching for items.
At the end of the default entries subroutine we return a list of references to the %files
, %indexes
, and %others
hashes.
Recall that %files
, %indexes
, and %others
were defined as private variables of this anonymous subroutine using my
. This makes them so-called "lexical" variables whose scope is limited to the subroutine, i.e., they would not normally be visible outside this subroutine. However by passing back references we make it possible for other parts of the Blosxom code to use the values of the %file
, %indexes
, and %others
variables, and we ensure that the values of the variables stick around as long as we need to access them. As the Perl online documentation puts it, "So long as something else references a lexical, that lexical won't be freed... This means that you can pass back or save away references to lexical variables".
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsub.html#Persistent-Private-Variables
This is exactly the same approach we used earlier to allow plugins to override the default template subroutine. See the notes for line 161 for more information concerning how this code works.
We invoke the entries subroutine to search for entries and build the %files
, %indexes
, and %others
hashes. Because $entries
is a reference to an anonymous subroutine (either the one we defined above or one defined by a plugin to override the default) we use &
to dereference the reference and actually call the subroutine.
Also recall that the entries subroutine returns a list of references to hashes, not the hashes themselves. That's why we assign the list of returned values into scalar variables, e.g., $files
will now have as its value a reference to the %files
hash created in the subroutine.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlref.html#Using-References
We dereference the returned references in $files
, $indexes
, and $others
to set our own hash variables %files
, %indexes
, and %others
.
Note that despite having the same names these are separate and distinct variables from the %files
, %indexes
, and %others
variables defined in the default entries subroutine: the %files
, etc., variables in the entries subroutine are private lexical variables while the %files
, etc., variables here are global variables for the Blosxom code (originally defined above at line 69 with use vars
).
Also note that the assignments %files = %$files
, etc., actually create copies of the original hashes constructed by the entries subroutine, just like an assignment %files = %h
where %h
is some existing hash.
Finally, the code used to assign %others
reflects the fact that (as shown in the example in the Blosxom plugin developer's documentation) a plugin might not actually build a %others
list and return a reference to it; in that case $others
would be undefined or empty. We therefore use the ref
function to verify that $others
is a proper reference before we attempt to deference it, otherwise we assign %others
to be an empty hash.
For more information see the following URLs:
http://www.blosxom.com/documentation/developers/plugins.html
http://www.perldoc.com/perl5.8.4/pod/func/ref.html
Having constructed the lists of entries and other files, we allow plugins to modify that list themselves by defining a filter subroutine. The code here is similar to the code used for the template and entry subroutines: we iterate over all plugins, check to see that the plugin is enabled and it defines a filter method, and if so then we invoke the method, passing references to the %files
and %others
hashes. The major difference here is that we do not terminate the loop early as soon as we find a plugin with a filter subroutine; instead we call the filter subroutine for each and every plugin that defines one.
Note that although the code passes references to both %files
and %others
the Blosxom developer documentation mentions passing only the %files
reference. (One more reason to read the actual code :-) Since the filter subroutine is passed references to the hashes it can modify them directly by deleting, modifying, or even adding hash elements.
Finally, note that although the return value from the filter subroutine is assigned into the $entries
variable used earlier to hold a reference to the entries subroutine, the return value is simply a 0 or 1 return code indicating whether the filter subroutine for a given plugin succeeded or failed. Since we already called the entries subroutine and won't do so again, we no longer need the original value of $entries
; from this point on in the code $entries
is used simply to save return values from plugin subroutines.
[Note: Arguably it's bad coding style to re-use $entries
in this potentially confusing way. As it happens it doesn't appear to be necessary to use a variable for this purpose anyway, since $entries
is assigned to but never referenced -- why not just use code like
foreach my $plugin ( @plugins ) { ... and $plugin->filter(...) }
where we just test the return value directly and don't save it?]
For more information see the following URL:
http://www.blosxom.com/documentation/developers/plugins.html
We check to see if we are generating static pages or dynamic pages, and execute the appropriate code.
[Note: The conditional expression below exactly duplicates the expression used at line 99 above to set the variable $static_or_dynamic
; why not just use the code
if ($static_or_dynamic eq 'static') {
instead?]
-quiet
optionWe print a status message (on stdout) unless the option -quiet
was passed on the command line.
%done
hashThe %done
hash is used to keep track of whether we've done static page generation for a particular directory; see the notes for line 240 below.
%indexes
We iterate over all index-related items stored in the %indexes
hash.
Recall that if we are generating static pages then %indexes
will contain three types of items, all expressed as relative pathnames (relative to the Blosxom data directory): category directories for which index.*
pages need to be generated (e.g., a/b
), date directories that need to be created with index.*
pages to support date-based URLs (e.g., 2004/05/22
), and individual entries for which static pages need to be generated (e.g., a/b/foo.txt
).
For each item we will need to create not only static pages for those items, but also the directories needed to contain those static pages, the higher-level directories containing those directories (e.g., subdirectory a
under the data directory for a category directory a/b
, or directories 2004
and 2004/05
for a date directory 2004/05/22
), and index pages for those higher-level directories.
As noted above we have to worry not only about static pages corresponding directly to each %indexes
key (e.g., the index page a/b/index.html
where $path
is the %indexes
item a/b
), but also static pages for any higher-level directories (e.g., the index page a/index.html
for directory a
as well as the index page index.html
for the data directory itself, the parent directory of a
).
The variable $p
is used to iterate over all directory components in $path
and make sure that the necessary directories are created and index pages generated. We start off at $p = ''
, representing the Blosxom data directory itself.
$path
componentsWe iterate over each component of the relative pathname stored in $path
, in order to create higher-level directories and their corresponding static pages where appropriate. We include the empty string '' as the first element of the foreach
list in order to handle index files at the level of the Blosxom data directory.
Thus, for example, if $path
is a/b
then we will iterate over '', a
, and b
. If $path
is 2004/05/22
then we will iterate over '', 2004
, 05
, and 22
.
$p
We add the current subdirectory component to the relative pathname being built up. Since $p
is initially the empty string '' and the first element of the foreach
loop is '' as well, $p
will be set to '/' the first time through the loop, and we'll then need to remove the leading '/'. On subsequent iterations $p
will end up being set to, e.g., 2004
, 2004/05
, etc., assuming a value for $path
of 2004/05/22
.
We save the current relative pathname (in $p
) as $path_info
so that plugins will have access to it (as a global variable in the blosxom
package).
We keep track of whether we have seen this relative path before. If not (i.e., if $done{$p}
is false) then we increment $done{$p}
by 1 and proceed to process it. Otherwise we skip to the next item in the foreach
loop.
$a++
vs. ++$a
Note that the check here works because ++
is used as a suffix operator, and hence $done{$p}
is incremented after its value is checked. Also, if $done{$p}
is undefined (which would be the case initially, since %done
is not otherwise initialized) then its value will be converted to zero prior to incrementing it.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Auto-increment-and-Auto-decrement
We check to see if there is already an existing directory corresponding to the path we're working on, or if the path represents an individual entry. Otherwise the path represents a directory that needs to be created, and we use the mkdir
function to create the directory. (We attempt to set the directory's access permissions to "rwxr-xr-x" so that anyone can look up files in the directory, but this may be made more restrictive by the umask
setting of the user executing blosxom.cgi
in static mode. Note that at a minimum the userid associated with the web server, e.g., "httpd", needs "r" access to the static pages and "rx" access to the directories containing them.)
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/func/-X.html
http://www.perldoc.com/perl5.8.4/pod/func/mkdir.html
We iterate over all the flavours for which we need to create static pages.
We use the reference stored in $template
to call a subroutine to determine what content type we should pass to the generate subroutine. (Recall that $template
was set to a reference to an anonymous subroutine returned by a template subroutine, either the default defined in blosxom.cgi
or one provided by a plugin to override the default.)
In the case of the default subroutine we look for a file with filename content_type.<i>$flavour</i>
(e.g., content_type.html
) in the directory specified by $p
or in its parent directories (up to and including the Blosxom data directory) and, if found, use the value of content_type
that it defines. Otherwise we use the default value of content_type
found in the %templates
hash; for example, for the 'html' flavour we would use a content type of 'text/html'.
We look for a newline in the content_type
value and delete it and anything after it. This might be the case if the content type were defined in a file; we only need the first line of the file (prior to the first newline) and can ignore the rest.
We determine the relative pathname for the static page we need to create, up to but not including the extension. If the path $p
represents an individual entry (e.g., a/b/foo.txt
) then $fn
will be, e.g., a/b/foo
; otherwise $p
represents a directory in which index files need to be created and $fn
will be, e.g., a/b/index
.
[Note: Unlike $content_type
(which depends on the specific flavour for which we need to create a static page), the value of $fn
could have been determined before entering the static flavour foreach
loop, since it will be the same no matter what the flavour happens to be.]
We attempt to create (or rewrite, i.e., open and truncate) the static page for this favour.
For more information see the following URLs:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
http://www.perldoc.com/perl5.8.4/pod/func/open.html
$output
is the global variable used by the generate
subroutine to build up the data for the page.
We call the generate
subroutine to generate the data for the static page and then write it to the just-opened file.
If the current %indexes
element has the value 1 then it corresponds to a category directory or individual entry, and we pass $p
to the generate
subroutine as its $currentdir
argument and the empty string '' as the $date
argument. Otherwise the %indexes
element represents a date-related index page and we pass $p
as the date and the empty string '' as the $currentdir
argument.
See the notes for lines 266 (generating a dynamic page) and 273 and 274 (the generate
subroutine) for more information about the arguments passed.
generate
subroutineNote: In this expression we use
&generate(...)
instead of
generate(...)
as one might expect. According to the online Perl documentation the initial '&' is typically optional and may be omitted. Using '&' does disable checking of prototypes, but the generate
subroutine doesn't use prototypes. Is there some other reason for using '&' here?
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsub.html
We have finished writing this static page; close the FileHandle and go on to the next static flavour.
The code for a dynamic page is much simpler than that for static pages; all it has to do is to create the content type header and call the generate
subroutine to create the content to be returned in the HTTP response.
See the notes for line 243 in the code for static pages for more information about $content_type
.
We store the content type in an anonymous hash referenced by $header
, under the key '-type'.
Note that this variable will later be passed to the CGI::header
function as its single argument. (See the notes to line 409.) By using a hash to store the arguments we allow Blosxom plugins to cause additional HTTP headers to be output (i.e., other than the Content-type header) by adding additional key/value pairs to the hash referenced by $blosxom::header
. For example, this is done by the cookies, lastmodified, and xhtml plugins.
For more information see the following URL:
http://www.blosxom.com/plugins/headers/index.html
CGI::header
functionThe CGI::header
function can be called in three different ways. First, it can be called with a single argument that is the content type in the form of a string, e.g.,
header('text->html')
It can also be called with multiple named arguments, of which the content type might be only one. When using multiple arguments there are two possible styles of argument passing, e.g.,
header(-type => 'text/html', -expires => '+3d')
or
header({-type => "text/html", -expires => "+3d"})
The former style is basically passing an array of arguments and is equivalent to, e.g.,
header('-type', 'text/html', '-expires', '+3d')
while the latter is passing a reference to an anonymous hash.
The Blosxom code assumes the third style of argument passing shown above, using the variable $header
to store a reference to an anonymous hash (as discussed in the previous note).
Note that older versions of the CGI module do not support all styles of argument passing. In particular, versions prior to 2.0 do not support passing multiple arguments to the CGI::header
function, and versions prior to 2.37b7 do not support putting curly braces around the argument list (i.e., passing the argument list as a hash reference).
If your hosting service does not support a recent version of the CGI module then you may need to patch Blosxom to fix the way the content type is handled. The simplest patch is to revert to the original style of passing a single string argument to CGI::header
:
$header = $content_type;
Note that if you use this patch then you will not be able to use Blosxom plugins that add their own HTTP headers. (See the previous note.)
For more information see the following URLs:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#CALLING_CGI.PM_ROUTINES
http://stein.cshl.org/WWW/software/CGI/#named_param
We call the generate
subroutine to do the actual work of generating the page, passing the following arguments:
'dynamic'
: Generate a dynamic page.$path_info
: A relative path containing the category and individual entry information we found in the originally-requested URL. Note that $path_info
will be empty if the URL contained date references only or the URL was requesting a top-level index, i.e., at the level of the Blosxom data directory. See the notes for lines 102 through 121 for more information."$path_info_yr/$path_info_mo_num/$path_info_da"
: We pass a string containing whatever date information we found in the originally requested URL. Note that the date string passed may be partial (e.g., '2004//' or '2004/05/') or "empty" ('//') if the original URL didn't reference a date. See the notes for lines 124 and 125 for more information.$flavour
: The flavour of page requested; see the notes for lines 110 through 118 for more information.$content_type
: The content type (determined by the flavour); see the notes for lines 261 and 262 above.We call the end subroutine for each plugin (if it defined one). The approach here is identical to that used for calling the filter subroutine and other plugin subroutines that are called for each plugin; see the notes for line 225 for an explanation of the code.
Note that the end subroutine is not passed any arguments; however it could use information previously stored in global variables in the plugin's package.
Once the end subroutines have been called we are done with processing.
This marks the end of the main Blosxom code section. The remainder of the code defines two subroutines:
generate
: does the main work of page generationnice_date
: parses Perl date/time values for easier processinggenerate
subroutineThe generate
subroutine does the actual work of generating a page, and returns a (multiline) string containing the generated output for the page.
generate
subroutineThe generate subroutine takes the following arguments:
$static_or_dynamic
has the value 'static' if we are generating a static page and 'dynamic' if we are generating a dynamic page.$currentdir
contains category and individual entry entry information, in the form of a relative path, e.g., a/b
or a/b/foo.html
. Note that $currentdir
will be empty if the request being processed or the static page being generated is for the index page at the top level (i.e., the Blosxom data directory) or is for an index of all entries for a particular date.$date
contains date information, in the form of a string 'yyyy/mm/dd'. Note that $date
may be "empty" ('//') if no date information is associated with the request or the static page being generated; $date
may also have a partial value (e.g., '2004//' or '2004/05//'). $flavour
is the flavour for which a page is being generated, e.g., 'html' or 'rss'.$content_type
is the MIME type for the page, e.g., 'text/html' for an HTML page or 'text/xml' for an RSS page.generate
subroutineBesides its arguments, the generate
subroutine also has access to the global variables %files
, etc. We make a local copy of the %files
hash containing information about all the entries we might need to include on this page.
We loop through all enabled plugins to see if any of them define a skip subroutine; if so we invoke it, set the returned value to $skip
, and if the returned value is true end the loop. Otherwise $skip
will end up set to false (i.e., we are not skipping story generation).
For more information on how this code works see the earlier notes for lines 161 and 225 discussing the template and filter subroutines.
The default interpolate subroutine is used to replace occurrences of Blosxom variables with the values of those variables. (For example, in the head section it would replace occurrences of the string '$blog_title
' with the value of the $blog_title
variable.) It is called with one argument, the template string in which interpolation is to be done.
The interpolate subroutine is defined as an anonymous subroutine whose reference is stored in $interpolate
. This is the same approach used with the template subroutine, etc.
[Note: The default interpolate subroutine is newly defined each time we call the generate
subroutine. This is presumably necessary because overriding the default interpolate subroutine (unlike overriding the template or entries subroutines) is not necessarily a one-time decision: different plugins might choose to define their own interpolate subroutine at different times for different reasons.]
We (re)specify the blosxom
package here to ensure that we are using the blosxom
package namespace when we execute this subroutine. This is necessary for us to properly interpolate Blosxom global variables when we call this subroutine from a plugin.
[Note: I need to double-check that this is the actual reason. Also, I presume that for a variable to be interpolated it must be a global variable in the blosxom
package, i.e., no lexical variables can be interpolated even if they are of file scope.]
$template
(the first and only argument to the interpolate subroutine) is the template contents to be processed.
We look for substrings in $template
that appear to be Perl identifiers, either unqualified identifiers like $foo
or package-qualified identifiers like $abc::foo
(but not, for example, variable references like ${foo}
or $abc::def::foo
). For any strings we find that are actual defined Perl variables we replace the string with the result of evaluating the variable, otherwise we replace it with the empty string ('').
The regular expression here matches a literal '$' followed by one or more "word" characters (alphanumeric or underscore) followed by an optional '::' if the identifier is package-qualified, followed by zero or more word characters (for any identifier after the '::'). We use (?:::)?
instead of (::)?
because we don't need to or want to capture the '::' string for later reference.
Note that the regular expression used will not match package-qualified identifiers of the form $abc::def::foo
. This is not a problem in practice because neither the blosxom
package nor the plugin packages use package names of the form abc::def
. The regular expression also will not match variables of the form ${foo}
. Again, this should not be a problem in practice because the braces are normally used to disambiguate variable references in a context where they would be ambiguous, e.g., "$foo's blog"
where the single quote could be interpreted as an old-style Perl package qualifier; however in our case we are looking at the variable in isolation, without any context, and that eliminates (or at least reduces) any possibly ambiguities.
(The only problem would be if we wanted to put alphanumeric characters immediately after the variable reference, e.g., if the template contents contained a string like "$blog_titlexxx"
then we would try to interpolate the variable $blog_titlexxx
(and fail) instead of concatenating the value of $blog_title
with three 'x' characters.)
In the replacement string we use the ee
option to treat the string as a Perl expression and evaluate it as such at runtime. So, for example, if the pattern matched is $blog_title
(i.e., a literal '$' followed by the word 'blog_title') then the replacement string becomes the Perl expression
if defined $blog_title ? $blog_title : ''
In other words, if evaluating $blog_title
produces a defined value (e.g., 'My First Blog'), then use that value to replace the matched pattern ($blog_title
), otherwise use the empty string '' as the replacement string. This Perl expression is then evaluated at run-time to produce the desired result.
The g
option matches all occurrences of things that look like Perl scalar variables. The net effect is that all apparent Perl scalar variables in the template contents are replaced with the value of the variables in question, or with the empty string if no such variables exist (or they exist but do not have defined values).
[Note: I have two open questions here: First, why is the replacement string enclosed in double-quotes, i.e.,
$template =~ s/(\$\w+(?:::)?\w*)/"defined $1 ? $1 : ''"/gee;
instead of
$template =~ s/(\$\w+(?:::)?\w*)/defined $1 ? $1 : ''/gee;
Second, why use the defined
function at all? In testing the following code seemed to work identically and without any errors:
$template =~ s/(\$\w+(?:::)?\w*)/$1/gee;
In what circumstances might an error occur with this simpler code?]
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlre.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
http://www.perldoc.com/perl5.8.4/pod/func/defined.html
Now begins the bulk of the work, which we skip entirely if some plugin has told us to skip it.
For more information on how this code works see the previous notes for line 161 concerning overriding the default template subroutine.
We dereference $template
to invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) and ask it to return the template used for the top of the page ('head'), for the specific flavour.
(For the 'html' flavour this template would typically include the DOCTYPE, <html> tag, <head> section, and the beginning of the <body> section, including the blog title.)
As noted previously, the default template subroutine will start looking for template files (e.g., head.html
for the 'html' flavour) in the directory $currentdir
(a relative pathname relative to the Blosxom data directory) and will continue looking in the parent directories of $currentdir
, up to and including the Blosxom data directory itself. (If $currentdir
is the empty string '' then the search will start and end in the Blosxom data directory.) If no template files are found then the default template subroutine will return the appropriate template previously read from the data section of blosxom.cgi
.
We loop over the plugins and call the head subroutine for any plugins that have defined one, passing the subroutine the path for which which we are generating a page, as well as a reference to $head
so that the subroutine can modify it in place.
For more information on how this code works see the notes for line 225 concerning calling the plugin filter subroutines.
Note that since we are actually invoking head
as a method the head subroutine will be passed the package name as its added first argument. The subroutine can simply ignore this.
Now that all plugins have had a chance to modify the contents of $head
, interpolate any variable references in the contents of $head
and then append the contents of$head
to the output being generated for the page.
Recall that $num_entries
is the default number of entries to show on a page.
We check to see if $currentdir
actually refers to an individual entry as opposed to an index page.
In the regular expression we attempt to look for patterns of the form .../foo.bar
where the final component of the path has a file extension of some sort. If we find such a match then $1
will be everything up to (but not including) the final '/', $2
will be the basename of the final component, and $3
will be the file extension. If $2
is not 'index' then we assume this is a reference to an individual entry (e.g., a/b/foo.html
).
For an individual entry we reassign $currentdir
to have the standard Blosxom file extension instead of whatever file extension it happened to have had. This has the effect of converting, e.g., a/b/foo.html
to a/b/foo.txt
.
This is necessary because we need to find the actual entry file from which to generate the output for whatever flavour was associated with the original request for the entry.
If we are generating a page for an individual entry then we do not need to worry about all the other entries. Thus we modify %f
(our local copy of %files
) to be a new hash with a single element; we take the key and value for the sole element of %f
from the element in %files
for this entry.
(This is assuming that we did in fact find an entry file corresponding to the entry being requested, as evidenced by there being an associated element in %files
. What happens if we have a request for an individual entry, e.g., a/b/foo.html
and there is no entry file a/b/foo.txt
? In that case %f
will retain its prior value as a copy of %files
. We will then proceed to iterate over the elements of %f
in the foreach
loop from lines 332 to 392, but the test on line 338 will always fail, i.e., no element will ever be matched. The result will be a page with a 'head' and 'foot' section but no content in between.)
If $currentdir
does not correspond to an individual entry then we remove the final path component if it is of the form index.foo
. It should now have the form of a pure directory reference, e.g., a/b/index.html
gets changed to a/b/
and 2004/05/index.rss
gets changed to 2004/05/
.
We define an anonymous subroutine to do sorting of entries, and store a reference to it in $sort
.
The sort subroutine is passed a reference to a hash assumed to be of the same form as %files
(hence the name $files_ref
for the argument).
We get a list of all keys in the hash referenced by $files_ref
, and then sort that list by comparing the values corresponding to each key, returning the sorted list.
In the %files
hash (and hashes modelled on it) the keys are (absolute) pathnames for files and the values are the "last modified" times for those files, so this returns a list of file pathnames in chronological order based on when the files were last modified.
For more information on how this code works see the previous notes for line 161 concerning overriding the default template subroutine.
We iterate over the sorted list of entry files (which may contain just one entry file if we are generating a page for an individual entry) and the list of other files. The files are sorted by file modification time if we're using the default sort subroutine, or possibly by some other criteria if a plugin has defined a different sort subroutine.
Note that although the code passes references to both the hashes %f
(the generate
subroutine's local copy of %files
) and %others
as arguments to the sort subroutine, in the default version of the sort subroutine the second argument (\%others
) is ignored.
We end the loop if we have exceeded the maximum number of entries per page and we are not requesting entries for a specific date. On the other hand, if we are requesting entries for a specific date (i.e., the $date
argument contains at least a year value, e.g., /2004//
) then we display all entries regardless of the value of the $num_entries
configuration variable.
[Note: Although the Blosxom documentation mentions the use of $num_entries
only in connection with a weblog's home page, it is also used to limit the number of entries displayed on category pages.]
For more information see the following URL:
http://www.blosxom.com/documentation/users/configure/
$date
variablesThe check here on line 333 and the statement on line 351 use the value of the $date
argument passed to the generate
subroutine, i.e., the $date
variable declared on line 274 using my
.
This variable is not the same as the $date
variable declared on line 357 and referenced on lines 360 through 364, even though all the references to $date
from line 333 to line 364 appear within the same foreach
loop. For more information see the notes for line 357.
$path
and $fn
[Note: Presumably $path
and $fn
are declared as global variables because they are referenced in the default templates (in particular, the story templates) and thus will need to be visible to the interpolate
subroutine; this would not be the case if the variables were declared with my
as being of lexical scope.]
The value of the loop variable $path_file
is an absolute pathname as used in %f
(%files
) and %others
. We extract from that pathname the directory path relative to the data directory ($path
) and the filename minus the file extension ($fn
).
Note that the directory path may be empty if the file is in the data directory itself (e.g., $datadir/foo.txt
). That accounts for the use of the regular expression (?:(.*)/)?
when matching the directory path.
We want to generate output only for entries whose files are in the current directory (whose value is in $currentdir
as a pathname relative to the Blosxom data directory) or in subdirectories underneath the current directory. So, for example, if the current directory is a/b
then the entry files a/b/foo.txt
or a/b/c/bar.txt
should be processed, but the entry file a/baz.txt
should not. In the latter case $path
will be a
, which does not contain a/b
($currentdir
) at the beginning, and hence the test will fail.
We also have to take into account the possibility that the value of $currentdir
represents an individual entry file and not a directory. In that case we check to verify that $path_file
(the full absolute pathname of the file being processed) matches the absolute pathname $datadir/$currentdir
.
If both of these tests fail (the current file is not in the right hierarchy, nor does it match the individual entry we want) then we skip to processing the next item.
&&=
operator"If a path exists" here means "if $path
is non-empty". As noted above $path
will be empty if the file in question is in the data directory itself.
The &&=
operator works similarly to the ||=
operator discussed in the notes for line 86: $a &&= $b
is equivalent to $a = $a && $b
, i.e., if the value of $a
is true (defined and non-empty) then assign the value of $b
to $a
. So in this case if $path
is non-empty then we assign it the value "/$path"
, the effect of which is to prepend a '/' to the original value of $path
.
There are a number of other ways to express this; for example,
$path and $path = "/$path";
would have worked as well, as would
$path and $path = '/' . $path;
However the syntax chosen is the most economical, if not necessarily the most understandable to Perl newbies.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#C-style-Logical-And
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Assignment-Operators
In the next series of lines we call the nice_date
subroutine to extract individual year, month, date, etc., values for the entry file's modification time and store them in global variables.
[Note: Presumably we use global variables here so that plugins can have access to the date information without having to reparse the files' modification time values.]
The nice_date
subroutine doesn't return the hours and minutes as individual values, so we split those out of $ti
using ':' as the separator.
Finally, the nice_date
subroutine returns a 24-hour time value, so we convert that to a 12-hour value (with am/pm indicator) for convenience. We also remove any zero on the left of the hour (e.g., change '02' to '2') and adjust "zero hour" values to the normal am/pm notation where hour values range from 1 to 12..
$date
will either be "empty" ('//') or will be a full or partial date of the form yyyy/mm/dd, e.g., 2004/05/22
, 2004/05/
, or 2004//
. We split this up (using '/' as the separator) to obtain individual year, month, day values (some of which may end up being empty for a given value of $date
).
If we are generating output for a particular year (i.e., $path_info_yr
has a non-empty value) then we skip to processing the next item if the year in which the entry file being processed was last modified ($yr
) is not the same as the year we want ($path_info_yr
).
We exit the loop entirely if the file was last modified in a year earlier than the year from the URL.
[Note: How would this second check ever succeed, given the previous check? We would get to the second check only if $path_info_yr
were empty or undefined, or if $yr
were equal to $path_info_yr
. So it would seem that the second check is guaranteed to always fail.]
If we are generating output for a particular month (i.e., $path_info_mo_num
has a non-empty value) then we skip to processing the next item if the month in which the file being processed was last modified is not the same as the month we want.
Note that $mo
as returned from nice_date
is actually a three-letter month abbreviation, while $path_info_mo_num
is a two-digit month number; hence we have to convert $path_info_mo_num
before comparing it to $mo
.
[Note: Why not just use $mo_num
as returned by nice_date
, instead of $mo
?]
If we are generating output for a particular day of the month (i.e., $path_info_da
has a non-empty value) then we skip to processing the next item if the day on which the file being processed was last modified ($da
) is not the same as the day we want ($path_info_da
).
We exit the loop entirely if the file was last modified on a day earlier than the day from the URL.
[Note: How would this second check ever succeed, given the previous check? We would get to the second check only if $path_info_da
were empty or undefined, or if $da
were equal to $path_info_da
. So it would seem that the second check is guaranteed to always fail.]
We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for date formats for the specific flavour. (For example, for the 'hmtl' flavour the date template will be
<h3>$dw, $da $mo $yr</h3>\n
if the default template subroutine is used and no date.html
file is found.)
$date
variablePrevious to this line references to $date
referred to the $date
variable declared on line 274 and used to store the date argument passed to the generate
subroutine. By using my
here to declare a new private variable $date
, we cause the previous $date
variable to be hidden, i.e., its value is no longer accessible to us.
The scope of lexical variables declared with my
is limited to the code block in which they were declared; in this case the enclosing code block is the foreach
loop used to loop over entries, which extends from line 332 to line 392. However the scope of this new $date
variable does not extend over the entire foreach
loop; rather it extends only from the point at which it was declared on line 357 to the end of the loop at line 392. References to $date
in the foreach
loop prior to line 357 refer to the original $date
argument declared on line 274.
We let each and every plugin have a chance to modify the contents of the date template if desired (e.g., by rearranging the order in which the date-related variables appear in the template). We pass in a number of date-related variables that the plugin subroutine(s) can use in making decisions about what to change.
We interpolate any variable references in $date
; among other things this will replace date-related variables such as $yr
, $my
, and $da
with the actual date values.
If we have a number of entries for the same date then we want to output the date section only once. If we haven't previously output a date section for this page ($curdate
is empty or otherwise not equal to the date section we're now processing) then we set $curdate
to the current date string and append the date section to the page output.
$title
, $body
, and $raw
[Note: Here we declare $title
, $body
, and $raw
as global variables. Presumably this is because these variables (or at least $title
and $body
) are referenced in the default story template and hence have to be visible in the interpolate
subroutine; if the variables were of lexical scope (i.e., using my
) then they would not be visible to be interpolated.]
If the file identified by $path_file
exists then we attempt to open it to read it. If this is successful we read the first line of the file and assign it to $title
; the chomp
function removes any trailing newline in $title
after it gets assigned. We then read the rest of the file and assign the lines to $body
, removing any trailing newline. (Within $body
the multiple lines from the file are separated by newlines.)
The join
function is used to cause <$fh>
to be evaluated in list context instead of scalar context, so that it will read the entire file (or more precisely, what's left of the file after reading the first line) instead of just reading a single line.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/chomp.html
Note that given the way that $title
and $body
were assigned, $raw
essentially recovers the raw contents of the entry file into a single multi-line string, minus any final newline in the file.
[Note: The global variable $raw
is not otherwise used in the main Blosxom code, and is not included in any of the default templates. Perhaps it's defined for the benefit of any plugins that might want to use it?]
We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for the story section for the specific flavour.
We let each and every plugin have a chance to modify the story section if desired. Note that the plugins' story subroutines have access both to the story template ($story
) and to the story contents ($title
and $body
).
If the content_type is an XML-based content type (e.g., 'text/xml') then we need to modify the generated output so that characters like '<' are properly escaped by replacing them with XML character entities.
[Note: The escaping is not done if the content_type is 'text/html', so HTML markup in the story (i.e., for the 'html' flavour) is not affected. However, if your blog pages were in XHTML and you had specified the content type to be 'application/xhtml+xml' (as recommended by Ian Hickson and the W3C) then your XHTML markup would presumably be mangled. (Of course, almost everybody with "XHTML" pages is actually serving them as 'text/html' because MSIE doesn't support the 'application/xhtml+xml' content type.)]
%escape
hashThe %escape
hash maps problematic characters to their respective character entities.
$escape_re
regular expressionThe $escape_re
variable holds a string value that can be used in a regular expression to look for alternate strings to match. In particular, the value of $escape_re
will be <|>|&|"
given the definition of %escape
.
Note that the expression
join '|' => keys %escape
as used in this context is equivalent to
join '|', keys %escape
The latter is the form typically used in the Blosxom code.
[Note: I wonder if there was a particular reason for using =>
here?]
We modify both $title
and $body
to replace the problematic characters with the equivalent character entities. Note that this takes advantage of the Perl feature where the search string and replacement string undergo interpolation to replace references to Perl variables ($escape_re
and $escape{...}
in this case) with the values of those variables.
Note that this is different from the approach used by the interpolate
subroutine. There we did not know what the actual variable names were going to be, so we had to "eval" the replacement string using the ee
option to the pattern match. Here we know what the variable names are at compile time, so we don't need the ee
option, just the normal variable interpolation that Perl does as a matter of course.
[Note: I need to double-check the ordering of variable interpolation. In order for the replacement string to work properly the value of $1
has to be known prior to interpolation taking place.]
We interpolate any variables present in the contents of the story section, and then append the resulting content to the output string being built.
We're now done with the file for this entry, and so can close its FileHandle.
Having added another entry to the output, we decrement $ne
to keep track of how many more entries we can add before hitting the specified maximum entries per page. (We don't check the number of entries when generating date-based pages, but we decrement $ne
anyway.)
We invoke the template subroutine being used (either the default subroutine or one supplied by a plugin) to return the template used for the foot section for the specified flavour. For example, for the 'html' flavour this would typically include the </body> and </html> closing tags.
We let each and every plugin have a chance to modify the contents of the foot section if desired.
We interpolate any variables present in the contents of the foot section, and then append the resulting content to the output string being built.
We let each and every plugin have a chance to do any final processing for this page. Note that the last subroutine is not passed arguments, but it has access to Blosxom global variables, including $output
.
Note that the HTTP header (if any) goes at the front of the generated output, and is needed only for dynamic page generation. The variable $header
contains the content type value as well as the values for any other HTTP headers added by plugins; see the notes to line 264.
generate
subroutine return valueWe return the $output
string as the value of the generate
subroutine. (The return
keyword is optional in Perl.)
nice_date
subroutineThe subroutine nice_date
converts OS-provided time values (expressed as the number of seconds since some fixed date) into year, month, day, etc., values that we can use for printing date/times and creating date-based URLs.
$unixtime
argumentThe subroutine argument is called $unixtime
but it's not specific to Unix; it's the time value returned by the Perl function time
and similar routines. The value is always a number in seconds, but its interpretation can differ slightly from OS to OS (for example, between Unix and Mac OS).
ctime
functionWe don't use the value of $unixtime
directly, we just pass it to the Time::localtime::ctime
routine to return a human-readable string representing the date/time, e.g., "Sat May 29 06:58:29 2004".
For more information see the following URLs:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/Time/localtime.pm
http://www.perldoc.com/perl5.8.4/pod/func/localtime.html
The value returned by ctime
has the format "Dow Mon dd hh:mm:ss yyyy" where "Dow" is the three letter abbreviation for the day of the week (e.g., "Sun", "Mon", etc.) and "Mon" is the three letter abbreviation for the month (e.g., "Jan, "Feb", etc.). The other fields have the usual numeric values for day of the month, hours, minutes, and seconds, and the (four-digit) year; however note that the day of the month may be expressed as either one or two digits.
We use a pattern to match the $c_time
string, capture the values of interest, and assign them to individual variables. Note that the regular expression \w{3}
matches exactly three alphanumeric characters (actually, alphanumeric plus underscore), while \d{1,2}
matches either one or two digits (no more, no less), to account for the way that ctime
returns the day of the month.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlre.html#Regular-Expressions
sprintf
functionSince the day of the month may be returned as either a one or two digit value (e.g., "1" or "10") we use the sprintf
function (like the corresponding function in C) to modify the value of $da
to be two digits, zero-padded on the left (e.g., "01" instead of "1").
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/sprintf.html
Since we also want the month number as well as the name, we use the month name as a key to look up the month number in the %month2num
hash defined above.
__DATA__
and __END__
The __DATA__
line marks the end of the Blosxom code and the beginning of the flavour data. The __END__
line marks the end of the program as a whole. Anything after that is considered a comment; this area is used by some Perl developers to include a change history for the program, program documentation, license terms, and/or other useful information..
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html