X-Git-Url: http://git.maemo.org/git/?p=dh-make-perl;a=blobdiff_plain;f=dev%2Fi386%2Flibhtml-tree-perl%2Flibhtml-tree-perl-3.23%2Flib%2FHTML%2FParse.pm;fp=dev%2Fi386%2Flibhtml-tree-perl%2Flibhtml-tree-perl-3.23%2Flib%2FHTML%2FParse.pm;h=1575b4465d304d1725d0a68c9ffed82c86c6f0a1;hp=0000000000000000000000000000000000000000;hb=8977e561d8a9eae6959218b0306c9df2056a38a9;hpb=df794b845212301ea0d267c919232538bfef356a diff --git a/dev/i386/libhtml-tree-perl/libhtml-tree-perl-3.23/lib/HTML/Parse.pm b/dev/i386/libhtml-tree-perl/libhtml-tree-perl-3.23/lib/HTML/Parse.pm new file mode 100644 index 0000000..1575b44 --- /dev/null +++ b/dev/i386/libhtml-tree-perl/libhtml-tree-perl-3.23/lib/HTML/Parse.pm @@ -0,0 +1,155 @@ +package HTML::Parse; + +=head1 NAME + +HTML::Parse - Deprecated, a wrapper around HTML::TreeBuilder + +=head1 SYNOPSIS + + See the documentation for HTML::TreeBuilder + +=head1 DESCRIPTION + +Disclaimer: This module is provided only for backwards compatibility +with earlier versions of this library. New code should I use +this module, and should really use the HTML::Parser and +HTML::TreeBuilder modules directly, instead. + +The C module provides functions to parse HTML documents. +There are two functions exported by this module: + +=over 4 + +=item parse_html($html) or parse_html($html, $obj) + +This function is really just a synonym for $obj->parse($html) and $obj +is assumed to be a subclass of C. Refer to +L for more documentation. + +If $obj is not specified, the $obj will default to an internally +created new C object configured with strict_comment() +turned on. That class implements a parser that builds (and is) a HTML +syntax tree with HTML::Element objects as nodes. + +The return value from parse_html() is $obj. + +=item parse_htmlfile($file, [$obj]) + +Same as parse_html(), but pulls the HTML to parse, from the named file. + +Returns C if the file could not be opened, or $obj otherwise. + +=back + +When a C object is created, the following variables +control how parsing takes place: + +=over 4 + +=item $HTML::Parse::IMPLICIT_TAGS + +Setting this variable to true will instruct the parser to try to +deduce implicit elements and implicit end tags. If this variable is +false you get a parse tree that just reflects the text as it stands. +Might be useful for quick & dirty parsing. Default is true. + +Implicit elements have the implicit() attribute set. + +=item $HTML::Parse::IGNORE_UNKNOWN + +This variable contols whether unknow tags should be represented as +elements in the parse tree. Default is true. + +=item $HTML::Parse::IGNORE_TEXT + +Do not represent the text content of elements. This saves space if +all you want is to examine the structure of the document. Default is +false. + +=item $HTML::Parse::WARN + +Call warn() with an apropriate message for syntax errors. Default is +false. + +=back + +=head1 REMEMBER! + +HTML::TreeBuilder objects should be explicitly destroyed when you're +finished with them. See L. + +=head1 SEE ALSO + +L, L, L + +=head1 COPYRIGHT + +Copyright 1995-1998 Gisle Aas, 1999-2004 Sean M. Burke, 2005 Andy Lester, +2006 Pete Krawczyk. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +This program is distributed in the hope that it will be useful, but +without any warranty; without even the implied warranty of +merchantability or fitness for a particular purpose. + +=head1 AUTHOR + +Currently maintained by Pete Krawczyk C<< >> + +Original authors: Gisle Aas, Sean Burke and Andy Lester. + +=cut + + +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw(parse_html parse_htmlfile); + +use strict; +use vars qw($VERSION + $IMPLICIT_TAGS $IGNORE_UNKNOWN $IGNORE_TEXT $WARN + ); + +# Backwards compatability +$IMPLICIT_TAGS = 1; +$IGNORE_UNKNOWN = 1; +$IGNORE_TEXT = 0; +$WARN = 0; + +require HTML::TreeBuilder; + +$VERSION = '2.71'; + + +sub parse_html ($;$) +{ + my $p = $_[1]; + $p = _new_tree_maker() unless $p; + $p->parse($_[0]); +} + + +sub parse_htmlfile ($;$) +{ + my($file, $p) = @_; + local(*HTML); + open(HTML, $file) or return undef; + $p = _new_tree_maker() unless $p; + $p->parse_file(\*HTML); +} + +sub _new_tree_maker +{ + my $p = HTML::TreeBuilder->new( + implicit_tags => $IMPLICIT_TAGS, + ignore_unknown => $IGNORE_UNKNOWN, + ignore_text => $IGNORE_TEXT, + 'warn' => $WARN, + ); + $p->strict_comment(1); + $p; +} + +1;