Add the original source packages to maemo, source lenny
[dh-make-perl] / dev / i386 / libwww-perl / libwww-perl-5.813 / t / robot / rules.t
diff --git a/dev/i386/libwww-perl/libwww-perl-5.813/t/robot/rules.t b/dev/i386/libwww-perl/libwww-perl-5.813/t/robot/rules.t
new file mode 100644 (file)
index 0000000..6125e85
--- /dev/null
@@ -0,0 +1,228 @@
+#!/local/bin/perl
+
+=head1 NAME
+
+robot-rules.t
+
+=head1 DESCRIPTION
+
+Test a number of different A</robots.txt> files against a number
+of different User-agents.
+
+=cut
+
+require WWW::RobotRules;
+use Carp;
+use strict;
+
+print "1..50\n"; # for Test::Harness
+
+# We test a number of different /robots.txt files,
+#
+
+my $content1 = <<EOM;
+# http://foo/robots.txt
+User-agent: *
+Disallow: /private
+Disallow: http://foo/also_private
+
+User-agent: MOMspider
+Disallow:
+EOM
+
+my $content2 = <<EOM;
+# http://foo/robots.txt
+User-agent: MOMspider
+ # comment which should be ignored
+Disallow: /private
+EOM
+
+my $content3 = <<EOM;
+# http://foo/robots.txt
+EOM
+
+my $content4 = <<EOM;
+# http://foo/robots.txt
+User-agent: *
+Disallow: /private
+Disallow: mailto:foo
+
+User-agent: MOMspider
+Disallow: /this
+
+User-agent: Another
+Disallow: /that
+
+
+User-agent: SvartEnke1
+Disallow: http://fOO
+Disallow: http://bar
+
+User-Agent: SvartEnke2
+Disallow: ftp://foo
+Disallow: http://foo:8080/
+Disallow: http://bar/
+EOM
+
+my $content5 = <<EOM;
+# I've locked myself away
+User-agent: *
+Disallow: /
+# The castle is your home now, so you can go anywhere you like.
+User-agent: Belle
+Disallow: /west-wing/ # except the west wing!
+# It's good to be the Prince...
+User-agent: Beast
+Disallow: 
+EOM
+
+# same thing backwards
+my $content6 = <<EOM;
+# It's good to be the Prince...
+User-agent: Beast
+Disallow: 
+# The castle is your home now, so you can go anywhere you like.
+User-agent: Belle
+Disallow: /west-wing/ # except the west wing!
+# I've locked myself away
+User-agent: *
+Disallow: /
+EOM
+
+# and a number of different robots:
+
+my @tests1 = (
+          [$content1, 'MOMspider' =>
+           1 => 'http://foo/private' => 1,
+           2 => 'http://foo/also_private' => 1,
+          ],
+
+          [$content1, 'Wubble' =>
+           3 => 'http://foo/private' => 0,
+           4 => 'http://foo/also_private' => 0,
+           5 => 'http://foo/other' => 1,
+          ],
+
+          [$content2, 'MOMspider' =>
+           6 => 'http://foo/private' => 0,
+           7 => 'http://foo/other' => 1,
+          ],
+
+          [$content2, 'Wubble' =>
+           8  => 'http://foo/private' => 1,
+           9  => 'http://foo/also_private' => 1,
+           10 => 'http://foo/other' => 1,
+          ],
+
+          [$content3, 'MOMspider' =>
+           11 => 'http://foo/private' => 1,
+           12 => 'http://foo/other' => 1,
+          ],
+
+          [$content3, 'Wubble' =>
+           13 => 'http://foo/private' => 1,
+           14 => 'http://foo/other' => 1,
+          ],
+
+          [$content4, 'MOMspider' =>
+           15 => 'http://foo/private' => 1,
+           16 => 'http://foo/this' => 0,
+           17 => 'http://foo/that' => 1,
+          ],
+
+          [$content4, 'Another' =>
+           18 => 'http://foo/private' => 1,
+           19 => 'http://foo/this' => 1,
+           20 => 'http://foo/that' => 0,
+          ],
+
+          [$content4, 'Wubble' =>
+           21 => 'http://foo/private' => 0,
+           22 => 'http://foo/this' => 1,
+           23 => 'http://foo/that' => 1,
+          ],
+
+          [$content4, 'Another/1.0' =>
+           24 => 'http://foo/private' => 1,
+           25 => 'http://foo/this' => 1,
+           26 => 'http://foo/that' => 0,
+          ],
+
+          [$content4, "SvartEnke1" =>
+           27 => "http://foo/" => 0,
+           28 => "http://foo/this" => 0,
+           29 => "http://bar/" => 1,
+          ],
+
+          [$content4, "SvartEnke2" =>
+           30 => "http://foo/" => 1,
+           31 => "http://foo/this" => 1,
+           32 => "http://bar/" => 1,
+          ],
+
+          [$content4, "MomSpiderJr" =>   # should match "MomSpider"
+           33 => 'http://foo/private' => 1,
+           34 => 'http://foo/also_private' => 1,
+           35 => 'http://foo/this/' => 0,
+          ],
+
+          [$content4, "SvartEnk" =>      # should match "*"
+           36 => "http://foo/" => 1,
+           37 => "http://foo/private/" => 0,
+           38 => "http://bar/" => 1,
+          ],
+
+          [$content5, 'Villager/1.0' =>
+           39 => 'http://foo/west-wing/' => 0,
+           40 => 'http://foo/' => 0,
+          ],
+
+          [$content5, 'Belle/2.0' =>
+           41 => 'http://foo/west-wing/' => 0,
+           42 => 'http://foo/' => 1,
+          ],
+
+          [$content5, 'Beast/3.0' =>
+           43 => 'http://foo/west-wing/' => 1,
+           44 => 'http://foo/' => 1,
+          ],
+
+          [$content6, 'Villager/1.0' =>
+           45 => 'http://foo/west-wing/' => 0,
+           46 => 'http://foo/' => 0,
+          ],
+
+          [$content6, 'Belle/2.0' =>
+           47 => 'http://foo/west-wing/' => 0,
+           48 => 'http://foo/' => 1,
+          ],
+
+          [$content6, 'Beast/3.0' =>
+           49 => 'http://foo/west-wing/' => 1,
+           50 => 'http://foo/' => 1,
+          ],
+
+          # when adding tests, remember to increase
+          # the maximum at the top
+
+         );
+
+my $t;
+
+for $t (@tests1) {
+    my ($content, $ua) = splice(@$t, 0, 2);
+
+    my $robotsrules = new WWW::RobotRules($ua);
+    $robotsrules->parse('http://foo/robots.txt', $content);
+
+    my ($num, $path, $expected);
+    while(($num, $path, $expected) = splice(@$t, 0, 3)) {
+       my $allowed = $robotsrules->allowed($path);
+       $allowed = 1 if $allowed;
+       if($allowed != $expected) {
+           $robotsrules->dump;
+           confess "Test Failed: $ua => $path ($allowed != $expected)";
+       }
+       print "ok $num\n";
+    }
+}