require 5; package Pod::Simple::HTML; use strict; use Pod::Simple::PullParser (); use vars qw( @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex $Doctype_decl $Content_decl ); @ISA = ('Pod::Simple::PullParser'); $VERSION = '3.03'; use UNIVERSAL (); BEGIN { if(defined &DEBUG) { } # no-op elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } else { *DEBUG = sub () {0}; } } $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. # qq{\n}; $Content_decl ||= q{}; $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; $Computerese = "" unless defined $Computerese; $LamePad = '' unless defined $LamePad; $Linearization_Limit = 120 unless defined $Linearization_Limit; # headings/items longer than that won't get an $Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' unless defined $Perldoc_URL_Prefix; $Perldoc_URL_Postfix = '' unless defined $Perldoc_URL_Postfix; $Title_Prefix = '' unless defined $Title_Prefix; $Title_Postfix = '' unless defined $Title_Postfix; %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text # 'item-text' stuff in the index doesn't quite work, and may # not be a good idea anyhow. __PACKAGE__->_accessorize( 'perldoc_url_prefix', # In turning L into http://whatever/Foo%3a%3aBar, what # to put before the "Foo%3a%3aBar". # (for singleton mode only?) 'perldoc_url_postfix', # what to put after "Foo%3a%3aBar" in the URL. Normally "". 'batch_mode', # whether we're in batch mode 'batch_mode_current_level', # When in batch mode, how deep the current module is: 1 for "LWP", # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 'title_prefix', 'title_postfix', # What to put before and after the title in the head. # Should already be &-escaped 'html_header_before_title', 'html_header_after_title', 'html_footer', 'index', # whether to add an index at the top of each page # (actually it's a table-of-contents, but we'll call it an index, # out of apparently longstanding habit) 'html_css', # URL of CSS file to point to 'html_javascript', # URL of CSS file to point to 'force_title', # should already be &-escaped 'default_title', # should already be &-escaped ); #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ my @_to_accept; %Tagmap = ( 'Verbatim' => "\n", '/Verbatim' => "\n", 'VerbatimFormatted' => "\n", '/VerbatimFormatted' => "\n", 'VerbatimB' => "", '/VerbatimB' => "", 'VerbatimI' => "", '/VerbatimI' => "", 'VerbatimBI' => "", '/VerbatimBI' => "", 'Data' => "\n", '/Data' => "\n", 'head1' => "\n

", # And also stick in an 'head2' => "\n
", # '' 'head3' => "\n
", # '' 'head4' => "\n
", # '' '/head1' => "
\n", '/head2' => "

\n", '/head3' => "\n", '/head4' => "\n", 'X' => "", changes(qw( Para=p B=b I=i over-bullet=ul over-number=ol over-text=dl over-block=blockquote item-bullet=li item-number=li item-text=dt )), changes2( map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } qw[ sample=samp definition=dfn kbd=keyboard variable=var citation=cite abbreviation=abbr acronym=acronym subscript=sub superscript=sup big=big small=small underline=u strikethrough=s ] # no point in providing a way to get ..., I think ), '/item-bullet' => "$LamePad\n", '/item-number' => "$LamePad\n", '/item-text' => "$LamePad\n", 'item-body' => "\n

", '/item-body' => "

\n", 'B' => "", '/B' => "", 'I' => "", '/I' => "", 'F' => "", '/F' => "", 'C' => "", '/C' => "", 'L' => "", # ideally never used! '/L' => "", ); sub changes { return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s ? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_" } @_; } sub changes2 { return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s ? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_" } @_; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub go { exit Pod::Simple::HTML->parse_from_file(@ARGV) } # Just so we can run from the command line. No options. # For that, use perldoc! #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $new = shift->SUPER::new(@_); #$new->nix_X_codes(1); $new->nbsp_for_S(1); $new->accept_targets( 'html', 'HTML' ); $new->accept_codes('VerbatimFormatted'); $new->accept_codes(@_to_accept); DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n"; $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); $new->title_prefix( $Title_Prefix ); $new->title_postfix( $Title_Postfix ); $new->html_header_before_title( qq[$Doctype_decl] ); $new->html_header_after_title( join "\n" => "", $Content_decl, "\n", $new->version_tag_comment, "\n", ); $new->html_footer( qq[\n\n\n\n] ); $new->{'Tagmap'} = {%Tagmap}; return $new; } sub batch_mode_page_object_init { my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; DEBUG and print "Initting $self\n for $module\n", " in $infile\n out $outfile\n depth $depth\n"; $self->batch_mode(1); $self->batch_mode_current_level($depth); return $self; } sub run { my $self = $_[0]; return $self->do_middle if $self->bare_output; return $self->do_beginning && $self->do_middle && $self->do_end; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub do_beginning { my $self = $_[0]; my $title; if(defined $self->force_title) { $title = $self->force_title; DEBUG and print "Forcing title to be $title\n"; } else { # Actually try looking for the title in the document: $title = $self->get_short_title(); unless($self->content_seen) { DEBUG and print "No content seen in search for title.\n"; return; } $self->{'Title'} = $title; if(defined $title and $title =~ m/\S/) { $title = $self->title_prefix . esc($title) . $self->title_postfix; } else { $title = $self->default_title; $title = '' unless defined $title; DEBUG and print "Title defaults to $title\n"; } } my $after = $self->html_header_after_title || ''; if($self->html_css) { my $link = $self->html_css =~ m/html_css # It's a big blob of markup, let's drop it in : sprintf( # It's just a URL, so let's wrap it up qq[\n], $self->html_css, ); $after =~ s{()}{$link\n$1}i; # otherwise nevermind } $self->_add_top_anchor(\$after); if($self->html_javascript) { my $link = $self->html_javascript =~ m/html_javascript # It's a big blob of markup, let's drop it in : sprintf( # It's just a URL, so let's wrap it up qq[\n], $self->html_javascript, ); $after =~ s{()}{$link\n$1}i; # otherwise nevermind } print {$self->{'output_fh'}} $self->html_header_before_title || '', $title, # already escaped $after, ; DEBUG and print "Returning from do_beginning...\n"; return 1; } sub _add_top_anchor { my($self, $text_r) = @_; unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack $$text_r .= "\n"; } return; } sub version_tag_comment { my $self = shift; return sprintf "\n", esc( ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), $], scalar(gmtime), ), $self->_modnote(), ; } sub _modnote { my $class = ref($_[0]) || $_[0]; return join "\n " => grep m/\S/, split "\n", qq{ If you want to change this HTML document, you probably shouldn't do that by changing it directly. Instead, see about changing the calling options to $class, and/or subclassing $class, then reconverting this document from the Pod source. When in doubt, email the author of $class for advice. See 'perldoc $class' for more info. }; } sub do_end { my $self = $_[0]; print {$self->{'output_fh'}} $self->html_footer || ''; return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Normally this would just be a call to _do_middle_main_loop -- but we # have to do some elaborate things to emit all the content and then # summarize it and output it /before/ the content that it's a summary of. sub do_middle { my $self = $_[0]; return $self->_do_middle_main_loop unless $self->index; if( $self->output_string ) { # An efficiency hack my $out = $self->output_string; #it's a reference to it my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; $$out .= $sneakytag; $self->_do_middle_main_loop; $sneakytag = quotemeta($sneakytag); my $index = $self->index_as_html(); if( $$out =~ s/$sneakytag/$index/s ) { # Expected case DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n"; } else { DEBUG and print "Odd, couldn't find where to insert the index in the output!\n"; # I don't think this should ever happen. } return 1; } unless( $self->output_fh ) { require Carp; Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); } # If we get here, we're outputting to a FH. So we need to do some magic. # Namely, divert all content to a string, which we output after the index. my $fh = $self->output_fh; my $content = ''; { # Our horrible bait and switch: $self->output_string( \$content ); $self->_do_middle_main_loop; $self->abandon_output_string(); $self->output_fh($fh); } print $fh $self->index_as_html(); print $fh $content; return 1; } ########################################################################### sub index_as_html { my $self = $_[0]; # This is meant to be called AFTER the input document has been parsed! my $points = $self->{'PSHTML_index_points'} || []; @$points > 1 or return qq[

\n]; # There's no point in having a 0-item or 1-item index, I dare say. my(@out) = qq{\n

}; my $level = 0; my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); foreach my $p (@$points, ['head0', '(end)']) { ($tagname, $text) = @$p; $anchorname = $self->section_escape($text); if( $tagname =~ m{^head(\d+)$} ) { $target_level = 0 + $1; } else { # must be some kinda list item if($previous_tagname =~ m{^head\d+$} ) { $target_level = $level + 1; } else { $target_level = $level; # no change needed } } # Get to target_level by opening or closing ULs while($level > $target_level) { --$level; push @out, (" " x $level) . ""; } while($level < $target_level) { ++$level; push @out, (" " x ($level-1)) . "

%s", $indent, $level, $anchorname, esc($text) ; } push @out, "

", # And also stick in an 'head2' => "\n", # '' 'head3' => "\n", # '' 'head4' => "\n", # '' '/head1' => "\n", '/head2' => "

", # '' 'head3' => "\n

", # '' 'head4' => "\n

", # '' '/head1' => "

", # And also stick in an 'head2' => "\n
", # '' 'head3' => "\n
", # '' 'head4' => "\n
", # '' '/head1' => "
\n", '/head2' => "