X-Git-Url: http://git.maemo.org/git/?p=dh-make-perl;a=blobdiff_plain;f=dev%2Farm%2Flibpod-simple-perl%2Flibpod-simple-perl-3.07%2Flib%2FPod%2FSimple%2FBlackBox.pm;fp=dev%2Farm%2Flibpod-simple-perl%2Flibpod-simple-perl-3.07%2Flib%2FPod%2FSimple%2FBlackBox.pm;h=4804973a2e65d6e82d2ee40b0f9b7942153740de;hp=0000000000000000000000000000000000000000;hb=f477fa73365d491991707e7ed9217b48d6994551;hpb=da95c414033799c3a62606f299c3c00b5c77ca11 diff --git a/dev/arm/libpod-simple-perl/libpod-simple-perl-3.07/lib/Pod/Simple/BlackBox.pm b/dev/arm/libpod-simple-perl/libpod-simple-perl-3.07/lib/Pod/Simple/BlackBox.pm new file mode 100644 index 0000000..4804973 --- /dev/null +++ b/dev/arm/libpod-simple-perl/libpod-simple-perl-3.07/lib/Pod/Simple/BlackBox.pm @@ -0,0 +1,1919 @@ + +package Pod::Simple::BlackBox; +# +# "What's in the box?" "Pain." +# +########################################################################### +# +# This is where all the scary things happen: parsing lines into +# paragraphs; and then into directives, verbatims, and then also +# turning formatting sequences into treelets. +# +# Are you really sure you want to read this code? +# +#----------------------------------------------------------------------------- +# +# The basic work of this module Pod::Simple::BlackBox is doing the dirty work +# of parsing Pod into treelets (generally one per non-verbatim paragraph), and +# to call the proper callbacks on the treelets. +# +# Every node in a treelet is a ['name', {attrhash}, ...children...] + +use integer; # vroom! +use strict; +use Carp (); +BEGIN { + require Pod::Simple; + *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +sub parse_line { shift->parse_lines(@_) } # alias + +# - - - Turn back now! Run away! - - - + +sub parse_lines { # Usage: $parser->parse_lines(@lines) + # an undef means end-of-stream + my $self = shift; + + my $code_handler = $self->{'code_handler'}; + my $cut_handler = $self->{'cut_handler'}; + $self->{'line_count'} ||= 0; + + my $scratch; + + DEBUG > 4 and + print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; + + DEBUG > 5 and + print "# About to parse lines: ", + join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; + + my $paras = ($self->{'paras'} ||= []); + # paragraph buffer. Because we need to defer processing of =over + # directives and verbatim paragraphs. We call _ponder_paragraph_buffer + # to process this. + + $self->{'pod_para_count'} ||= 0; + + my $line; + foreach my $source_line (@_) { + if( $self->{'source_dead'} ) { + DEBUG > 4 and print "# Source is dead.\n"; + last; + } + + unless( defined $source_line ) { + DEBUG > 4 and print "# Undef-line seen.\n"; + + push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; + push @$paras, $paras->[-1], $paras->[-1]; + # So that it definitely fills the buffer. + $self->{'source_dead'} = 1; + $self->_ponder_paragraph_buffer; + next; + } + + + if( $self->{'line_count'}++ ) { + ($line = $source_line) =~ tr/\n\r//d; + # If we don't have two vars, we'll end up with that there + # tr/// modding the (potentially read-only) original source line! + + } else { + DEBUG > 2 and print "First line: [$source_line]\n"; + + if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { + DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n"; + $self->_handle_encoding_line( "=encode utf8" ); + $line =~ tr/\n\r//d; + + } elsif( $line =~ s/^\xFE\xFF//s ) { + DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; + $self->scream( + $self->{'line_count'}, + "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." + ); + splice @_; + push @_, undef; + next; + + # TODO: implement somehow? + + } elsif( $line =~ s/^\xFF\xFE//s ) { + DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; + $self->scream( + $self->{'line_count'}, + "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." + ); + splice @_; + push @_, undef; + next; + + # TODO: implement somehow? + + } else { + DEBUG > 2 and print "First line is BOM-less.\n"; + ($line = $source_line) =~ tr/\n\r//d; + } + } + + + DEBUG > 5 and print "# Parsing line: [$line]\n"; + + if(!$self->{'in_pod'}) { + if($line =~ m/^=([a-zA-Z]+)/s) { + if($1 eq 'cut') { + $self->scream( + $self->{'line_count'}, + "=cut found outside a pod block. Skipping to next block." + ); + + ## Before there were errata sections in the world, it was + ## least-pessimal to abort processing the file. But now we can + ## just barrel on thru (but still not start a pod block). + #splice @_; + #push @_, undef; + + next; + } else { + $self->{'in_pod'} = $self->{'start_of_pod_block'} + = $self->{'last_was_blank'} = 1; + # And fall thru to the pod-mode block further down + } + } else { + DEBUG > 5 and print "# It's a code-line.\n"; + $code_handler->(map $_, $line, $self->{'line_count'}, $self) + if $code_handler; + # Note: this may cause code to be processed out of order relative + # to pods, but in order relative to cuts. + + # Note also that we haven't yet applied the transcoding to $line + # by time we call $code_handler! + + if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { + # That RE is from perlsyn, section "Plain Old Comments (Not!)", + #$fname = $2 if defined $2; + #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; + DEBUG > 1 and print "# Setting nextline to $1\n"; + $self->{'line_count'} = $1 - 1; + } + + next; + } + } + + # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + # Else we're in pod mode: + + # Apply any necessary transcoding: + $self->{'_transcoder'} && $self->{'_transcoder'}->($line); + + # HERE WE CATCH =encoding EARLY! + if( $line =~ m/^=encoding\s+\S+\s*$/s ) { + $line = $self->_handle_encoding_line( $line ); + } + + if($line =~ m/^=cut/s) { + # here ends the pod block, and therefore the previous pod para + DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; + $self->{'in_pod'} = 0; + # ++$self->{'pod_para_count'}; + $self->_ponder_paragraph_buffer(); + # by now it's safe to consider the previous paragraph as done. + $cut_handler->(map $_, $line, $self->{'line_count'}, $self) + if $cut_handler; + + # TODO: add to docs: Note: this may cause cuts to be processed out + # of order relative to pods, but in order relative to code. + + } elsif($line =~ m/^\s*$/s) { # it's a blank line + if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { + DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; + push @{$paras->[-1]}, $line; + } # otherwise it's not interesting + + if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { + DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; + } + + $self->{'last_was_blank'} = 1; + + } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... + + if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { + # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS + my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; + # Note that in "=head1 foo", the WS is lost. + # Example: ['=head1', {'start_line' => 123}, ' foo'] + + ++$self->{'pod_para_count'}; + + $self->_ponder_paragraph_buffer(); + # by now it's safe to consider the previous paragraph as done. + + push @$paras, $new; # the new incipient paragraph + DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; + + } elsif($line =~ m/^\s/s) { + + if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { + DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; + push @{$paras->[-1]}, $line; + } else { + ++$self->{'pod_para_count'}; + $self->_ponder_paragraph_buffer(); + # by now it's safe to consider the previous paragraph as done. + DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; + push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; + } + } else { + ++$self->{'pod_para_count'}; + $self->_ponder_paragraph_buffer(); + # by now it's safe to consider the previous paragraph as done. + push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; + DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; + } + $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; + + } else { + # It's a non-blank line /continuing/ the current para + if(@$paras) { + DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; + push @{$paras->[-1]}, $line; + } else { + # Unexpected case! + die "Continuing a paragraph but \@\$paras is empty?"; + } + $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; + } + + } # ends the big while loop + + DEBUG > 1 and print(pretty(@$paras), "\n"); + return $self; +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +sub _handle_encoding_line { + my($self, $line) = @_; + + # The point of this routine is to set $self->{'_transcoder'} as indicated. + + return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; + DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; + + my $e = $1; + my $orig = $e; + push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; + + my $enc_error; + + # Cf. perldoc Encode and perldoc Encode::Supported + + require Pod::Simple::Transcode; + + if( $self->{'encoding'} ) { + my $norm_current = $self->{'encoding'}; + my $norm_e = $e; + foreach my $that ($norm_current, $norm_e) { + $that = lc($that); + $that =~ s/[-_]//g; + } + if($norm_current eq $norm_e) { + DEBUG > 1 and print "The '=encoding $orig' line is ", + "redundant. ($norm_current eq $norm_e). Ignoring.\n"; + $enc_error = ''; + # But that doesn't necessarily mean that the earlier one went okay + } else { + $enc_error = "Encoding is already set to " . $self->{'encoding'}; + DEBUG > 1 and print $enc_error; + } + } elsif ( + # OK, let's turn on the encoding + do { + DEBUG > 1 and print " Setting encoding to $e\n"; + $self->{'encoding'} = $e; + 1; + } + and $e eq 'HACKRAW' + ) { + DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; + + } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { + + die($enc_error = "WHAT? _transcoder is already set?!") + if $self->{'_transcoder'}; # should never happen + require Pod::Simple::Transcode; + $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); + eval { + my @x = ('', "abc", "123"); + $self->{'_transcoder'}->(@x); + }; + $@ && die( $enc_error = + "Really unexpected error setting up encoding $e: $@\nAborting" + ); + + } else { + my @supported = Pod::Simple::Transcode::->all_encodings; + + # Note unsupported, and complain + DEBUG and print " Encoding [$e] is unsupported.", + "\nSupporteds: @supported\n"; + my $suggestion = ''; + + # Look for a near match: + my $norm = lc($e); + $norm =~ tr[-_][]d; + my $n; + foreach my $enc (@supported) { + $n = lc($enc); + $n =~ tr[-_][]d; + next unless $n eq $norm; + $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; + last; + } + my $encmodver = Pod::Simple::Transcode::->encmodver; + $enc_error = join '' => + "This document probably does not appear as it should, because its ", + "\"=encoding $e\" line calls for an unsupported encoding.", + $suggestion, " [$encmodver\'s supported encodings are: @supported]" + ; + + $self->scream( $self->{'line_count'}, $enc_error ); + } + push @{ $self->{'encoding_command_statuses'} }, $enc_error; + + return '=encoding ALREADYDONE'; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _handle_encoding_second_level { + # By time this is called, the encoding (if well formed) will already + # have been acted one. + my($self, $para) = @_; + my @x = @$para; + my $content = join ' ', splice @x, 2; + $content =~ s/^\s+//s; + $content =~ s/\s+$//s; + + DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; + + if($content eq 'ALREADYDONE') { + # It's already been handled. Check for errors. + if(! $self->{'encoding_command_statuses'} ) { + DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; + } elsif( $self->{'encoding_command_statuses'}[-1] ) { + $self->whine( $para->[1]{'start_line'}, + sprintf "Couldn't do %s: %s", + $self->{'encoding_command_reqs' }[-1], + $self->{'encoding_command_statuses'}[-1], + ); + } else { + DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; + } + + } else { + # Otherwise it's a syntax error + $self->whine( $para->[1]{'start_line'}, + "Invalid =encoding syntax: $content" + ); + } + + return; +} + +#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` + +{ +my $m = -321; # magic line number + +sub _gen_errata { + my $self = $_[0]; + # Return 0 or more fake-o paragraphs explaining the accumulated + # errors on this document. + + return() unless $self->{'errata'} and keys %{$self->{'errata'}}; + + my @out; + + foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { + push @out, + ['=item', {'start_line' => $m}, "Around line $line:"], + map( ['~Para', {'start_line' => $m, '~cooked' => 1}, + #['~Top', {'start_line' => $m}, + $_ + #] + ], + @{$self->{'errata'}{$line}} + ) + ; + } + + # TODO: report of unknown entities? unrenderable characters? + + unshift @out, + ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], + ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, + "Hey! ", + ['B', {}, + 'The above document had some coding errors, which are explained below:' + ] + ], + ['=over', {'start_line' => $m, 'errata' => 1}, ''], + ; + + push @out, + ['=back', {'start_line' => $m, 'errata' => 1}, ''], + ; + + DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; + + return @out; +} + +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +############################################################################## +## +## stop reading now stop reading now stop reading now stop reading now stop +## +## HERE IT BECOMES REALLY SCARY +## +## stop reading now stop reading now stop reading now stop reading now stop +## +############################################################################## + +sub _ponder_paragraph_buffer { + + # Para-token types as found in the buffer. + # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, + # =over, =back, =item + # and the null =pod (to be complained about if over one line) + # + # "~data" paragraphs are something we generate at this level, depending on + # a currently open =over region + + # Events fired: Begin and end for: + # directivename (like head1 .. head4), item, extend, + # for (from =begin...=end, =for), + # over-bullet, over-number, over-text, over-block, + # item-bullet, item-number, item-text, + # Document, + # Data, Para, Verbatim + # B, C, longdirname (TODO -- wha?), etc. for all directives + # + + my $self = $_[0]; + my $paras; + return unless @{$paras = $self->{'paras'}}; + my $curr_open = ($self->{'curr_open'} ||= []); + + my $scratch; + + DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; + + # We have something in our buffer. So apparently the document has started. + unless($self->{'doc_has_started'}) { + $self->{'doc_has_started'} = 1; + + my $starting_contentless; + $starting_contentless = + ( + !@$curr_open + and @$paras and ! grep $_->[0] ne '~end', @$paras + # i.e., if the paras is all ~ends + ) + ; + DEBUG and print "# Starting ", + $starting_contentless ? 'contentless' : 'contentful', + " document\n" + ; + + $self->_handle_element_start( + ($scratch = 'Document'), + { + 'start_line' => $paras->[0][1]{'start_line'}, + $starting_contentless ? ( 'contentless' => 1 ) : (), + }, + ); + } + + my($para, $para_type); + while(@$paras) { + last if @$paras == 1 and + ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' + or $paras->[0][0] eq '=item' ) + ; + # Those're the three kinds of paragraphs that require lookahead. + # Actually, an "=item Foo" inside an region + # and any =item inside an region (rare) + # don't require any lookahead, but all others (bullets + # and numbers) do. + +# TODO: winge about many kinds of directives in non-resolving =for regions? +# TODO: many? like what? =head1 etc? + + $para = shift @$paras; + $para_type = $para->[0]; + + DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", + $self->_dump_curr_open(), ")\n"; + + if($para_type eq '=for') { + next if $self->_ponder_for($para,$curr_open,$paras); + + } elsif($para_type eq '=begin') { + next if $self->_ponder_begin($para,$curr_open,$paras); + + } elsif($para_type eq '=end') { + next if $self->_ponder_end($para,$curr_open,$paras); + + } elsif($para_type eq '~end') { # The virtual end-document signal + next if $self->_ponder_doc_end($para,$curr_open,$paras); + } + + + # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + if(grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and + print "Skipping $para_type paragraph because in ignore mode.\n"; + next; + } + #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + + if($para_type eq '=pod') { + $self->_ponder_pod($para,$curr_open,$paras); + + } elsif($para_type eq '=over') { + next if $self->_ponder_over($para,$curr_open,$paras); + + } elsif($para_type eq '=back') { + next if $self->_ponder_back($para,$curr_open,$paras); + + } else { + + # All non-magical codes!!! + + # Here we start using $para_type for our own twisted purposes, to + # mean how it should get treated, not as what the element name + # should be. + + DEBUG > 1 and print "Pondering non-magical $para_type\n"; + + my $i; + + # Enforce some =headN discipline + if($para_type =~ m/^=head\d$/s + and ! $self->{'accept_heads_anywhere'} + and @$curr_open + and $curr_open->[-1][0] eq '=over' + ) { + DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; + $self->whine( + $para->[1]{'start_line'}, + "You forgot a '=back' before '$para_type'" + ); + unshift @$paras, ['=back', {}, ''], $para; # close the =over + next; + } + + + if($para_type eq '=item') { + + my $over; + unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { + $self->whine( + $para->[1]{'start_line'}, + "'=item' outside of any '=over'" + ); + unshift @$paras, + ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], + $para + ; + next; + } + + + my $over_type = $over->[1]{'~type'}; + + if(!$over_type) { + # Shouldn't happen1 + die "Typeless over in stack, starting at line " + . $over->[1]{'start_line'}; + + } elsif($over_type eq 'block') { + unless($curr_open->[-1][1]{'~bitched_about'}) { + $curr_open->[-1][1]{'~bitched_about'} = 1; + $self->whine( + $curr_open->[-1][1]{'start_line'}, + "You can't have =items (as at line " + . $para->[1]{'start_line'} + . ") unless the first thing after the =over is an =item" + ); + } + # Just turn it into a paragraph and reconsider it + $para->[0] = '~Para'; + unshift @$paras, $para; + next; + + } elsif($over_type eq 'text') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'text') { + # Nothing special needs doing for 'text' + } elsif($item_type eq 'number' or $item_type eq 'bullet') { + die "Unknown item type $item_type" + unless $item_type eq 'number' or $item_type eq 'bullet'; + # Undo our clobbering: + push @$para, $para->[1]{'~orig_content'}; + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + # =item-text thingies don't need any assimilation, it seems. + + } elsif($over_type eq 'number') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; + + if($item_type eq 'bullet') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + push @$para, $para->[1]{'~orig_content'}; + # restore the bullet, blocking the assimilation of next para + + } elsif($item_type eq 'text') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + # Text content will still be there and will block next ~Para + + } elsif($item_type ne 'number') { + die "Unknown item type $item_type"; # should never happen + + } elsif($expected_value == $para->[1]{'number'}) { + DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; + + } else { + DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, + " instead of the expected value of $expected_value\n"; + $self->whine( + $para->[1]{'start_line'}, + "You have '=item " . $para->[1]{'number'} . + "' instead of the expected '=item $expected_value'" + ); + $para->[1]{'number'} = $expected_value; # correcting!! + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + + } elsif($over_type eq 'bullet') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'bullet') { + # as expected! + + if( $para->[1]{'~_freaky_para_hack'} ) { + DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; + push @$para, delete $para->[1]{'~_freaky_para_hack'}; + } + + } elsif($item_type eq 'number') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + push @$para, $para->[1]{'~orig_content'}; + # and block assimilation of the next paragraph + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } elsif($item_type eq 'text') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + # But doesn't need processing. But it'll block assimilation + # of the next para. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + } else { + die "Unhandled =over type \"$over_type\"?"; + # Shouldn't happen! + } + + $para_type = 'Plain'; + $para->[0] .= '-' . $over_type; + # Whew. Now fall thru and process it. + + + } elsif($para_type eq '=extend') { + # Well, might as well implement it here. + $self->_ponder_extend($para); + next; # and skip + } elsif($para_type eq '=encoding') { + # Not actually acted on here, but we catch errors here. + $self->_handle_encoding_second_level($para); + + next; # and skip + } elsif($para_type eq '~Verbatim') { + $para->[0] = 'Verbatim'; + $para_type = '?Verbatim'; + } elsif($para_type eq '~Para') { + $para->[0] = 'Para'; + $para_type = '?Plain'; + } elsif($para_type eq 'Data') { + $para->[0] = 'Data'; + $para_type = '?Data'; + } elsif( $para_type =~ s/^=//s + and defined( $para_type = $self->{'accept_directives'}{$para_type} ) + ) { + DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; + } else { + # An unknown directive! + DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", + $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) + ; + $self->whine( + $para->[1]{'start_line'}, + "Unknown directive: $para->[0]" + ); + + # And maybe treat it as text instead of just letting it go? + next; + } + + if($para_type =~ s/^\?//s) { + if(! @$curr_open) { # usual case + DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; + } else { + my @fors = grep $_->[0] eq '=for', @$curr_open; + DEBUG > 1 and print "Containing fors: ", + join(',', map $_->[1]{'target'}, @fors), "\n"; + + if(! @fors) { + DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; + + #} elsif(grep $_->[1]{'~resolve'}, @fors) { + #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { + } elsif( $fors[-1][1]{'~resolve'} ) { + # Look to the immediately containing for + + if($para_type eq 'Data') { + DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; + $para->[0] = 'Para'; + $para_type = 'Plain'; + } else { + DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; + } + } else { + DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; + $para->[0] = $para_type = 'Data'; + } + } + } + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if($para_type eq 'Plain') { + $self->_ponder_Plain($para); + } elsif($para_type eq 'Verbatim') { + $self->_ponder_Verbatim($para); + } elsif($para_type eq 'Data') { + $self->_ponder_Data($para); + } else { + die "\$para type is $para_type -- how did that happen?"; + # Shouldn't happen. + } + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + $para->[0] =~ s/^[~=]//s; + + DEBUG and print "\n", pretty($para), "\n"; + + # traverse the treelet (which might well be just one string scalar) + $self->{'content_seen'} ||= 1; + $self->_traverse_treelet_bit(@$para); + } + } + + return; +} + +########################################################################### +# The sub-ponderers... + + + +sub _ponder_for { + my ($self,$para,$curr_open,$paras) = @_; + + # Fake it out as a begin/end + my $target; + + if(grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Ignoring ignorable =for\n"; + return 1; + } + + for(my $i = 2; $i < @$para; ++$i) { + if($para->[$i] =~ s/^\s*(\S+)\s*//s) { + $target = $1; + last; + } + } + unless(defined $target) { + $self->whine( + $para->[1]{'start_line'}, + "=for without a target?" + ); + return 1; + } + DEBUG > 1 and + print "Faking out a =for $target as a =begin $target / =end $target\n"; + + $para->[0] = 'Data'; + + unshift @$paras, + ['=begin', + {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, + $target, + ], + $para, + ['=end', + {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, + $target, + ], + ; + + return 1; +} + +sub _ponder_begin { + my ($self,$para,$curr_open,$paras) = @_; + my $content = join ' ', splice @$para, 2; + $content =~ s/^\s+//s; + $content =~ s/\s+$//s; + unless(length($content)) { + $self->whine( + $para->[1]{'start_line'}, + "=begin without a target?" + ); + DEBUG and print "Ignoring targetless =begin\n"; + return 1; + } + + my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; + $para->[1]{'title'} = $title if ($title); + $para->[1]{'target'} = $target; # without any ':' + $content = $target; # strip off the title + + $content =~ s/^:!/!:/s; + my $neg; # whether this is a negation-match + $neg = 1 if $content =~ s/^!//s; + my $to_resolve; # whether to process formatting codes + $to_resolve = 1 if $content =~ s/^://s; + + my $dont_ignore; # whether this target matches us + + foreach my $target_name ( + split(',', $content, -1), + $neg ? () : '*' + ) { + DEBUG > 2 and + print " Considering whether =begin $content matches $target_name\n"; + next unless $self->{'accept_targets'}{$target_name}; + + DEBUG > 2 and + print " It DOES match the acceptable target $target_name!\n"; + $to_resolve = 1 + if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; + $dont_ignore = 1; + $para->[1]{'target_matching'} = $target_name; + last; # stop looking at other target names + } + + if($neg) { + if( $dont_ignore ) { + $dont_ignore = ''; + delete $para->[1]{'target_matching'}; + DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; + } else { + $dont_ignore = 1; + $para->[1]{'target_matching'} = '!'; + DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; + } + } + + $para->[0] = '=for'; # Just what we happen to call these, internally + $para->[1]{'~really'} ||= '=begin'; + $para->[1]{'~ignore'} = (! $dont_ignore) || 0; + $para->[1]{'~resolve'} = $to_resolve || 0; + + DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', + "ignore contents of this region\n"; + DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", + ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; + DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; + + push @$curr_open, $para; + if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Ignoring ignorable =begin\n"; + } else { + $self->{'content_seen'} ||= 1; + $self->_handle_element_start((my $scratch='for'), $para->[1]); + } + + return 1; +} + +sub _ponder_end { + my ($self,$para,$curr_open,$paras) = @_; + my $content = join ' ', splice @$para, 2; + $content =~ s/^\s+//s; + $content =~ s/\s+$//s; + DEBUG and print "Ogling '=end $content' directive\n"; + + unless(length($content)) { + $self->whine( + $para->[1]{'start_line'}, + "'=end' without a target?" . ( + ( @$curr_open and $curr_open->[-1][0] eq '=for' ) + ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) + : '' + ) + ); + DEBUG and print "Ignoring targetless =end\n"; + return 1; + } + + unless($content =~ m/^\S+$/) { # i.e., unless it's one word + $self->whine( + $para->[1]{'start_line'}, + "'=end $content' is invalid. (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content\n"; + return 1; + } + + unless(@$curr_open and $curr_open->[-1][0] eq '=for') { + $self->whine( + $para->[1]{'start_line'}, + "=end $content without matching =begin. (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content\n"; + return 1; + } + + unless($content eq $curr_open->[-1][1]{'target'}) { + $self->whine( + $para->[1]{'start_line'}, + "=end $content doesn't match =begin " + . $curr_open->[-1][1]{'target'} + . ". (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; + return 1; + } + + # Else it's okay to close... + if(grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; + # And that may be because of this to-be-closed =for region, or some + # other one, but it doesn't matter. + } else { + $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; + # what's that for? + + $self->{'content_seen'} ||= 1; + $self->_handle_element_end( my $scratch = 'for' ); + } + DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; + pop @$curr_open; + + return 1; +} + +sub _ponder_doc_end { + my ($self,$para,$curr_open,$paras) = @_; + if(@$curr_open) { # Deal with things left open + DEBUG and print "Stack is nonempty at end-document: (", + $self->_dump_curr_open(), ")\n"; + + DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; + unshift @$paras, $self->_closers_for_all_curr_open; + # Make sure there is exactly one ~end in the parastack, at the end: + @$paras = grep $_->[0] ne '~end', @$paras; + push @$paras, $para, $para; + # We need two -- once for the next cycle where we + # generate errata, and then another to be at the end + # when that loop back around to process the errata. + return 1; + + } else { + DEBUG and print "Okay, stack is empty now.\n"; + } + + # Try generating errata section, if applicable + unless($self->{'~tried_gen_errata'}) { + $self->{'~tried_gen_errata'} = 1; + my @extras = $self->_gen_errata(); + if(@extras) { + unshift @$paras, @extras; + DEBUG and print "Generated errata... relooping...\n"; + return 1; # I.e., loop around again to process these fake-o paragraphs + } + } + + splice @$paras; # Well, that's that for this paragraph buffer. + DEBUG and print "Throwing end-document event.\n"; + + $self->_handle_element_end( my $scratch = 'Document' ); + return 1; # Hasta la byebye +} + +sub _ponder_pod { + my ($self,$para,$curr_open,$paras) = @_; + $self->whine( + $para->[1]{'start_line'}, + "=pod directives shouldn't be over one line long! Ignoring all " + . (@$para - 2) . " lines of content" + ) if @$para > 3; + # Content is always ignored. + return; +} + +sub _ponder_over { + my ($self,$para,$curr_open,$paras) = @_; + return 1 unless @$paras; + my $list_type; + + if($paras->[0][0] eq '=item') { # most common case + $list_type = $self->_get_initial_item_type($paras->[0]); + + } elsif($paras->[0][0] eq '=back') { + # Ignore empty lists. TODO: make this an option? + shift @$paras; + return 1; + + } elsif($paras->[0][0] eq '~end') { + $self->whine( + $para->[1]{'start_line'}, + "=over is the last thing in the document?!" + ); + return 1; # But feh, ignore it. + } else { + $list_type = 'block'; + } + $para->[1]{'~type'} = $list_type; + push @$curr_open, $para; + # yes, we reuse the paragraph as a stack item + + my $content = join ' ', splice @$para, 2; + my $overness; + if($content =~ m/^\s*$/s) { + $para->[1]{'indent'} = 4; + } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { + no integer; + $para->[1]{'indent'} = $1; + if($1 == 0) { + $self->whine( + $para->[1]{'start_line'}, + "Can't have a 0 in =over $content" + ); + $para->[1]{'indent'} = 4; + } + } else { + $self->whine( + $para->[1]{'start_line'}, + "=over should be: '=over' or '=over positive_number'" + ); + $para->[1]{'indent'} = 4; + } + DEBUG > 1 and print "=over found of type $list_type\n"; + + $self->{'content_seen'} ||= 1; + $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); + + return; +} + +sub _ponder_back { + my ($self,$para,$curr_open,$paras) = @_; + # TODO: fire off or or ?? + + my $content = join ' ', splice @$para, 2; + if($content =~ m/\S/) { + $self->whine( + $para->[1]{'start_line'}, + "=back doesn't take any parameters, but you said =back $content" + ); + } + + if(@$curr_open and $curr_open->[-1][0] eq '=over') { + DEBUG > 1 and print "=back happily closes matching =over\n"; + # Expected case: we're closing the most recently opened thing + #my $over = pop @$curr_open; + $self->{'content_seen'} ||= 1; + $self->_handle_element_end( my $scratch = + 'over-' . ( (pop @$curr_open)->[1]{'~type'} ) + ); + } else { + DEBUG > 1 and print "=back found without a matching =over. Stack: (", + join(', ', map $_->[0], @$curr_open), ").\n"; + $self->whine( + $para->[1]{'start_line'}, + '=back without =over' + ); + return 1; # and ignore it + } +} + +sub _ponder_item { + my ($self,$para,$curr_open,$paras) = @_; + my $over; + unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { + $self->whine( + $para->[1]{'start_line'}, + "'=item' outside of any '=over'" + ); + unshift @$paras, + ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], + $para + ; + return 1; + } + + + my $over_type = $over->[1]{'~type'}; + + if(!$over_type) { + # Shouldn't happen1 + die "Typeless over in stack, starting at line " + . $over->[1]{'start_line'}; + + } elsif($over_type eq 'block') { + unless($curr_open->[-1][1]{'~bitched_about'}) { + $curr_open->[-1][1]{'~bitched_about'} = 1; + $self->whine( + $curr_open->[-1][1]{'start_line'}, + "You can't have =items (as at line " + . $para->[1]{'start_line'} + . ") unless the first thing after the =over is an =item" + ); + } + # Just turn it into a paragraph and reconsider it + $para->[0] = '~Para'; + unshift @$paras, $para; + return 1; + + } elsif($over_type eq 'text') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'text') { + # Nothing special needs doing for 'text' + } elsif($item_type eq 'number' or $item_type eq 'bullet') { + die "Unknown item type $item_type" + unless $item_type eq 'number' or $item_type eq 'bullet'; + # Undo our clobbering: + push @$para, $para->[1]{'~orig_content'}; + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + # =item-text thingies don't need any assimilation, it seems. + + } elsif($over_type eq 'number') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; + + if($item_type eq 'bullet') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + push @$para, $para->[1]{'~orig_content'}; + # restore the bullet, blocking the assimilation of next para + + } elsif($item_type eq 'text') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + # Text content will still be there and will block next ~Para + + } elsif($item_type ne 'number') { + die "Unknown item type $item_type"; # should never happen + + } elsif($expected_value == $para->[1]{'number'}) { + DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; + + } else { + DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, + " instead of the expected value of $expected_value\n"; + $self->whine( + $para->[1]{'start_line'}, + "You have '=item " . $para->[1]{'number'} . + "' instead of the expected '=item $expected_value'" + ); + $para->[1]{'number'} = $expected_value; # correcting!! + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + + } elsif($over_type eq 'bullet') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'bullet') { + # as expected! + + if( $para->[1]{'~_freaky_para_hack'} ) { + DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; + push @$para, delete $para->[1]{'~_freaky_para_hack'}; + } + + } elsif($item_type eq 'number') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + push @$para, $para->[1]{'~orig_content'}; + # and block assimilation of the next paragraph + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } elsif($item_type eq 'text') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + # But doesn't need processing. But it'll block assimilation + # of the next para. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + } else { + die "Unhandled =over type \"$over_type\"?"; + # Shouldn't happen! + } + $para->[0] .= '-' . $over_type; + + return; +} + +sub _ponder_Plain { + my ($self,$para) = @_; + DEBUG and print " giving plain treatment...\n"; + unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) + or $para->[1]{'~cooked'} + ) { + push @$para, + @{$self->_make_treelet( + join("\n", splice(@$para, 2)), + $para->[1]{'start_line'} + )}; + } + # Empty paragraphs don't need a treelet for any reason I can see. + # And precooked paragraphs already have a treelet. + return; +} + +sub _ponder_Verbatim { + my ($self,$para) = @_; + DEBUG and print " giving verbatim treatment...\n"; + + $para->[1]{'xml:space'} = 'preserve'; + for(my $i = 2; $i < @$para; $i++) { + foreach my $line ($para->[$i]) { # just for aliasing + while( $line =~ + # Sort of adapted from Text::Tabs -- yes, it's hardwired in that + # tabs are at every EIGHTH column. For portability, it has to be + # one setting everywhere, and 8th wins. + s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e + ) {} + + # TODO: whinge about (or otherwise treat) unindented or overlong lines + + } + } + + # Now the VerbatimFormatted hoodoo... + if( $self->{'accept_codes'} and + $self->{'accept_codes'}{'VerbatimFormatted'} + ) { + while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } + # Kill any number of terminal newlines + $self->_verbatim_format($para); + } elsif ($self->{'codes_in_verbatim'}) { + push @$para, + @{$self->_make_treelet( + join("\n", splice(@$para, 2)), + $para->[1]{'start_line'}, $para->[1]{'xml:space'} + )}; + $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines + } else { + push @$para, join "\n", splice(@$para, 2) if @$para > 3; + $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines + } + return; +} + +sub _ponder_Data { + my ($self,$para) = @_; + DEBUG and print " giving data treatment...\n"; + $para->[1]{'xml:space'} = 'preserve'; + push @$para, join "\n", splice(@$para, 2) if @$para > 3; + return; +} + + + + +########################################################################### + +sub _traverse_treelet_bit { # for use only by the routine above + my($self, $name) = splice @_,0,2; + + my $scratch; + $self->_handle_element_start(($scratch=$name), shift @_); + + foreach my $x (@_) { + if(ref($x)) { + &_traverse_treelet_bit($self, @$x); + } else { + $self->_handle_text($x); + } + } + + $self->_handle_element_end($scratch=$name); + return; +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +sub _closers_for_all_curr_open { + my $self = $_[0]; + my @closers; + foreach my $still_open (@{ $self->{'curr_open'} || return }) { + my @copy = @$still_open; + $copy[1] = {%{ $copy[1] }}; + #$copy[1]{'start_line'} = -1; + if($copy[0] eq '=for') { + $copy[0] = '=end'; + } elsif($copy[0] eq '=over') { + $copy[0] = '=back'; + } else { + die "I don't know how to auto-close an open $copy[0] region"; + } + + unless( @copy > 2 ) { + push @copy, $copy[1]{'target'}; + $copy[-1] = '' unless defined $copy[-1]; + # since =over's don't have targets + } + + DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; + unshift @closers, \@copy; + } + return @closers; +} + +#-------------------------------------------------------------------------- + +sub _verbatim_format { + my($it, $p) = @_; + + my $formatting; + + for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines + DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; + $p->[$i] .= "\n"; + # Unlike with simple Verbatim blocks, we don't end up just doing + # a join("\n", ...) on the contents, so we have to append a + # newline to ever line, and then nix the last one later. + } + + if( DEBUG > 4 ) { + print "<<\n"; + for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines + print "_verbatim_format $i: $p->[$i]"; + } + print ">>\n"; + } + + for(my $i = $#$p; $i > 2; $i--) { + # work backwards over the lines, except the first (#2) + + #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s + # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; + # look at a formatty line preceding a nonformatty one + DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; + if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { + DEBUG > 5 and print " It's a formatty line. ", + "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; + + if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { + DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; + next; + } else { + DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; + } + } else { + DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; + next; + } + + # A formatty line has to have #: in the first two columns, and uses + # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. + # Example: + # What do you want? i like pie. [or whatever] + # #:^^^^^^^^^^^^^^^^^ ///////////// + + + DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; + + $formatting = ' ' . $1; + $formatting =~ s/\s+$//s; # nix trailing whitespace + unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op + splice @$p,$i,1; # remove this line + $i--; # don't consider next line + next; + } + + if( length($formatting) >= length($p->[$i-1]) ) { + $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; + } else { + $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); + } + # Make $formatting and the previous line be exactly the same length, + # with $formatting having a " " as the last character. + + DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; + + + my @new_line; + while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { + #print "Format matches $1\n"; + + if($2) { + #print "SKIPPING <$2>\n"; + push @new_line, + substr($p->[$i-1], pos($formatting)-length($1), length($1)); + } else { + #print "SNARING $+\n"; + push @new_line, [ + ( + $3 ? 'VerbatimB' : + $4 ? 'VerbatimI' : + $5 ? 'VerbatimBI' : die("Should never get called") + ), {}, + substr($p->[$i-1], pos($formatting)-length($1), length($1)) + ]; + #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; + } + } + my @nixed = + splice @$p, $i-1, 2, @new_line; # replace myself and the next line + DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; + + DEBUG > 6 and print "New version of the above line is these tokens (", + scalar(@new_line), "):", + map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; + $i--; # So the next line we scrutinize is the line before the one + # that we just went and formatted + } + + $p->[0] = 'VerbatimFormatted'; + + # Collapse adjacent text nodes, just for kicks. + for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last + if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { + DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; + $p->[$i] .= splice @$p, $i+1, 1; # merge + --$i; # and back up + } + } + + # Now look for the last text token, and remove the terminal newline + for( my $i = $#$p; $i >= 2; $i-- ) { + # work backwards over the tokens, even the first + if( !ref($p->[$i]) ) { + if($p->[$i] =~ s/\n$//s) { + DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; + } else { + DEBUG > 5 and print + "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; + } + last; # we only want the next one + } + } + + return; +} + + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + +sub _treelet_from_formatting_codes { + # Given a paragraph, returns a treelet. Full of scary tokenizing code. + # Like [ '~Top', {'start_line' => $start_line}, + # "I like ", + # [ 'B', {}, "pie" ], + # "!" + # ] + + my($self, $para, $start_line, $preserve_space) = @_; + + my $treelet = ['~Top', {'start_line' => $start_line},]; + + unless ($preserve_space || $self->{'preserve_whitespace'}) { + $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'}; + + $para =~ s/\s+/ /g; # collapse and trim all whitespace first. + $para =~ s/ $//; + $para =~ s/^ //; + } + + # Only apparent problem the above code is that N<< >> turns into + # N<< >>. But then, word wrapping does that too! So don't do that! + + my @stack; + my @lineage = ($treelet); + + DEBUG > 4 and print "Paragraph:\n$para\n\n"; + + # Here begins our frightening tokenizer RE. The following regex matches + # text in four main parts: + # + # * Start-codes. The first alternative matches C< or C<<, the latter + # followed by some whitespace. $1 will hold the entire start code + # (including any space following a multiple-angle-bracket delimiter), + # and $2 will hold only the additional brackets past the first in a + # multiple-bracket delimiter. length($2) + 1 will be the number of + # closing brackets we have to find. + # + # * Closing brackets. Match some amount of whitespace followed by + # multiple close brackets. The logic to see if this closes anything + # is down below. Note that in order to parse C<< >> correctly, we + # have to use look-behind (?<=\s\s), since the match of the starting + # code will have consumed the whitespace. + # + # * A single closing bracket, to close a simple code like C<>. + # + # * Something that isn't a start or end code. We have to be careful + # about accepting whitespace, since perlpodspec says that any whitespace + # before a multiple-bracket closing delimiter should be ignored. + # + while($para =~ + m/\G + (?: + # Match starting codes, including the whitespace following a + # multiple-delimiter start code. $1 gets the whole start code and + # $2 gets all but one of the {2,}) + | + (\s?>) # $5: simple end-codes + | + ( # $6: stuff containing no start-codes or end-codes + (?: + [^A-Z\s>] + | + (?: + [A-Z](?!<) + ) + | + # whitespace is ok, but we don't want to eat the whitespace before + # a multiple-bracket end code. + # NOTE: we may still have problems with e.g. S<< >> + (?: + \s(?!\s*>{2,}) + ) + )+ + ) + ) + /xgo + ) { + DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; + if(defined $1) { + if(defined $2) { + DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; + push @stack, length($2) + 1; + # length of the necessary complex end-code string + } else { + DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; + push @stack, 0; # signal that we're looking for simple + } + push @lineage, [ substr($1,0,1), {}, ]; # new node object + push @{ $lineage[-2] }, $lineage[-1]; + + } elsif(defined $4) { + DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; + # This is where it gets messy... + if(! @stack) { + # We saw " >>>>" but needed nothing. This is ALL just stuff then. + DEBUG > 4 and print " But it's really just stuff.\n"; + push @{ $lineage[-1] }, $3, $4; + next; + } elsif(!$stack[-1]) { + # We saw " >>>>" but needed only ">". Back pos up. + DEBUG > 4 and print " And that's more than we needed to close simple.\n"; + push @{ $lineage[-1] }, $3; # That was a for-real space, too. + pos($para) = pos($para) - length($4) + 1; + } elsif($stack[-1] == length($4)) { + # We found " >>>>", and it was exactly what we needed. Commonest case. + DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; + } elsif($stack[-1] < length($4)) { + # We saw " >>>>" but needed only " >>". Back pos up. + DEBUG > 4 and print " And that's more than we needed to close complex.\n"; + pos($para) = pos($para) - length($4) + $stack[-1]; + } else { + # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! + DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; + push @{ $lineage[-1] }, $3, $4; + next; + } + #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; + + push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; + # Keep the element from being childless + + pop @stack; + pop @lineage; + + } elsif(defined $5) { + DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n"; + + if(@stack and ! $stack[-1]) { + # We're indeed expecting a simple end-code + DEBUG > 4 and print " It's indeed an end-code.\n"; + + if(length($5) == 2) { # There was a space there: " >" + push @{ $lineage[-1] }, ' '; + } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element + push @{ $lineage[-1] }, ''; # keep it from being really childless + } + + pop @stack; + pop @lineage; + } else { + DEBUG > 4 and print " It's just stuff.\n"; + push @{ $lineage[-1] }, $5; + } + + } elsif(defined $6) { + DEBUG > 3 and print "Found stuff \"$6\"\n"; + push @{ $lineage[-1] }, $6; + + } else { + # should never ever ever ever happen + DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; + die "SPORK 512512!"; + } + } + + if(@stack) { # Uhoh, some sequences weren't closed. + my $x= "..."; + while(@stack) { + push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; + # Hmmmmm! + + my $code = (pop @lineage)->[0]; + my $ender_length = pop @stack; + if($ender_length) { + --$ender_length; + $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); + } else { + $x = $code . "<$x>"; + } + } + DEBUG > 1 and print "Unterminated $x sequence\n"; + $self->whine($start_line, + "Unterminated $x sequence", + ); + } + + return $treelet; +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) + return stringify_lol($_[1]); +} + +sub stringify_lol { # function: stringify_lol($lol) + my $string_form = ''; + _stringify_lol( $_[0] => \$string_form ); + return $string_form; +} + +sub _stringify_lol { # the real recursor + my($lol, $to) = @_; + use UNIVERSAL (); + for(my $i = 2; $i < @$lol; ++$i) { + if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { + _stringify_lol( $lol->[$i], $to); # recurse! + } else { + $$to .= $lol->[$i]; + } + } + return; +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +sub _dump_curr_open { # return a string representation of the stack + my $curr_open = $_[0]{'curr_open'}; + + return '[empty]' unless @$curr_open; + return join '; ', + map {; + ($_->[0] eq '=for') + ? ( ($_->[1]{'~really'} || '=over') + . ' ' . $_->[1]{'target'}) + : $_->[0] + } + @$curr_open + ; +} + +########################################################################### +my %pretty_form = ( + "\a" => '\a', # ding! + "\b" => '\b', # BS + "\e" => '\e', # ESC + "\f" => '\f', # FF + "\t" => '\t', # tab + "\cm" => '\cm', + "\cj" => '\cj', + "\n" => '\n', # probably overrides one of either \cm or \cj + '"' => '\"', + '\\' => '\\\\', + '$' => '\\$', + '@' => '\\@', + '%' => '\\%', + '#' => '\\#', +); + +sub pretty { # adopted from Class::Classless + # Not the most brilliant routine, but passable. + # Don't give it a cyclic data structure! + my @stuff = @_; # copy + my $x; + my $out = + # join ",\n" . + join ", ", + map {; + if(!defined($_)) { + "undef"; + } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { + $x = "[ " . pretty(@$_) . " ]" ; + $x; + } elsif(ref($_) eq 'SCALAR') { + $x = "\\" . pretty($$_) ; + $x; + } elsif(ref($_) eq 'HASH') { + my $hr = $_; + $x = "{" . join(", ", + map(pretty($_) . '=>' . pretty($hr->{$_}), + sort keys %$hr ) ) . "}" ; + $x; + } elsif(!length($_)) { q{''} # empty string + } elsif( + $_ eq '0' # very common case + or( + m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s + and $_ ne '-0' # the strange case that that RE lets thru + ) + ) { $_; + } else { + if( chr(65) eq 'A' ) { + s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> + #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; + <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; + } else { + # We're in some crazy non-ASCII world! + s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> + #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; + <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; + } + qq{"$_"}; + } + } @stuff; + # $out =~ s/\n */ /g if length($out) < 75; + return $out; +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +# A rather unsubtle method of blowing away all the state information +# from a parser object so it can be reused. Provided as a utility for +# backward compatibilty in Pod::Man, etc. but not recommended for +# general use. + +sub reinit { + my $self = shift; + foreach (qw(source_dead source_filename doc_has_started +start_of_pod_block content_seen last_was_blank paras curr_open +line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen +Title)) { + + delete $self->{$_}; + } +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +1; +