1 use Test::More tests => 17;
6 # First we create an HTML document to test
8 my $file = "ttest$$.htm";
9 die "$file already exists" if -e $file;
11 open(F, ">$file") or die "Can't create $file: $!";
12 print F <<'EOT'; close(F);
16 This is the <title>
19 <base href="http://www.perl.com">
22 <body background="bg.gif">
24 <h1>This is the <b>title</b> again
27 And this is a link to the <a href="http://www.perl.com"><img src="camel.gif" alt="Perl"> <!--nice isn't it-->Institute</a>
29 <br/><? process instruction >
36 END { unlink($file) || warn "Can't unlink $file: $!"; }
42 $p = HTML::TokeParser->new($file) || die "Can't open $file: $!";
43 ok($p->unbroken_text);
44 if ($p->get_tag("foo", "title")) {
45 my $title = $p->get_trimmed_text;
46 #diag "Title: $title";
47 is($title, "This is the <title>");
51 # Test with reference to glob
52 open(F, $file) || die "Can't open $file: $!";
53 $p = HTML::TokeParser->new(\*F);
58 while (my $token = $p->get_token) {
59 $scount++ if $token->[0] eq "S";
60 $ecount++ if $token->[0] eq "E";
61 $pcount++ if $token->[0] eq "PI";
67 open(F, $file) || die "Can't open $file: $!";
68 $p = HTML::TokeParser->new(*F);
69 $tcount++ while $p->get_tag;
73 # Test with plain file name
74 $p = HTML::TokeParser->new($file) || die;
75 $tcount++ while $p->get_tag;
78 #diag "Number of tokens found: $tcount/2 = $scount + $ecount";
83 is($tcount/2, $scount + $ecount);
85 ok(!HTML::TokeParser->new("/noT/thEre/$$"));
88 $p = HTML::TokeParser->new($file) || die;
90 my $atext = $p->get_text;
93 is($atext, "Perl\240Institute");
95 # test parsing of embeded document
96 $p = HTML::TokeParser->new(\<<HTML);
103 ok($p->get_tag("h1"));
104 is($p->get_trimmed_text, "Heading");
107 # test parsing of large embedded documents
108 my $doc = "<a href='foo'>foo is bar</a>\n\n\n" x 2022;
110 #use Time::HiRes qw(time);
112 $p = HTML::TokeParser->new(\$doc);
113 #diag "Construction time: ", time - $start;
116 while (my $t = $p->get_token) {
117 $count++ if $t->[0] eq "S";
119 #diag "Parse time: ", time - $start;
123 $p = HTML::TokeParser->new(\<<'EOT');
124 <H1>This is a heading</H1>
125 This is s<b>o</b>me<hr>text.
127 This is some more text.
129 This is even some more.
134 my $t = $p->get_trimmed_text("br", "p");
135 is($t, "This is some text.");
139 $t = $p->get_trimmed_text("br", "p");
140 is($t,"This is some more text.");
144 $p = HTML::TokeParser->new(\<<'EOT');
145 <H1>This is a <b>bold</b> heading</H1>
146 This is some <i>italic</i> text.<br />This is some <span id=x>more text</span>.
148 This is even some more.
154 is($t, "This is a bold heading");
162 is($t, "This is some italic text. This is some more text.");