git.maemo.org Git - pkg-perl/blob - deb-src/libwww-mechanize-perl/libwww-mechanize-perl-1.34/t/find_link.t

   1 #!perl -Tw
   2
   3 use warnings;
   4 use strict;
   5 use Test::More tests => 59;
   6 use URI::file;
   7
   8 BEGIN {
   9     delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)};  # Placates taint-unsafe Cwd.pm in 5.6.1
  10     use_ok( 'WWW::Mechanize' );
  11 }
  12
  13 my $mech = WWW::Mechanize->new( cookie_jar => undef );
  14 isa_ok( $mech, 'WWW::Mechanize' );
  15
  16 my $uri = URI::file->new_abs( 't/find_link.html' )->as_string;
  17
  18 $mech->get( $uri );
  19 ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page};
  20
  21 my $x;
  22 $x = $mech->find_link();
  23 isa_ok( $x, 'WWW::Mechanize::Link' );
  24 is( $x->[0], 'http://www.drphil.com/', 'First link on the page' );
  25 is( $x->url, 'http://www.drphil.com/', 'First link on the page' );
  26
  27 $x = $mech->find_link( url_regex => qr/upcase/i );
  28 isa_ok( $x, 'WWW::Mechanize::Link' );
  29 like( $x->url, qr/upcase.com/i, 'found link in uppercase meta tag' );
  30
  31 $x = $mech->find_link( text => 'CPAN A' );
  32 isa_ok( $x, 'WWW::Mechanize::Link' );
  33 is( $x->[0], 'http://a.cpan.org/', 'First CPAN link' );
  34 is( $x->url, 'http://a.cpan.org/', 'First CPAN link' );
  35
  36 $x = $mech->find_link( url => 'CPAN' );
  37 ok( !defined $x, 'No url matching CPAN' );
  38
  39 $x = $mech->find_link( text_regex => qr/CPAN/, n=>3 );
  40 isa_ok( $x, 'WWW::Mechanize::Link' );
  41 is( $x->[0], 'http://c.cpan.org/', '3rd CPAN text' );
  42 is( $x->url, 'http://c.cpan.org/', '3rd CPAN text' );
  43
  44 $x = $mech->find_link( text => 'CPAN', n=>34 );
  45 ok( !defined $x, 'No 34th CPAN text' );
  46
  47 $x = $mech->find_link( text_regex => qr/(?i:cpan)/ );
  48 isa_ok( $x, 'WWW::Mechanize::Link' );
  49 is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' );
  50 is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' );
  51
  52 $x = $mech->find_link( text_regex => qr/cpan/i );
  53 isa_ok( $x, 'WWW::Mechanize::Link' );
  54 is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' );
  55 is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' );
  56
  57 $x = $mech->find_link( text_regex => qr/cpan/i, n=>153 );
  58 ok( !defined $x, 'No 153rd cpan link' );
  59
  60 $x = $mech->find_link( url => 'http://b.cpan.org/' );
  61 isa_ok( $x, 'WWW::Mechanize::Link' );
  62 is( $x->[0], 'http://b.cpan.org/', 'Got b.cpan.org' );
  63 is( $x->url, 'http://b.cpan.org/', 'Got b.cpan.org' );
  64
  65 $x = $mech->find_link( url => 'http://b.cpan.org', n=>2 );
  66 ok( !defined $x, 'Not a second b.cpan.org' );
  67
  68 $x = $mech->find_link( url_regex => qr/[b-d]\.cpan\.org/, n=>2 );
  69 isa_ok( $x, 'WWW::Mechanize::Link' );
  70 is( $x->[0], 'http://c.cpan.org/', 'Got c.cpan.org' );
  71 is( $x->url, 'http://c.cpan.org/', 'Got c.cpan.org' );
  72
  73 my @wanted_links= (
  74    [ 'http://a.cpan.org/', 'CPAN A', undef, 'a' ],
  75    [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ],
  76    [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ],
  77    [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ],
  78 );
  79 my @links = $mech->find_all_links( text_regex => qr/CPAN/ );
  80 @{$_} = @{$_}[0..3] for @links;
  81 is_deeply( \@links, \@wanted_links, 'Correct links came back' );
  82
  83 my $linkref = $mech->find_all_links( text_regex => qr/CPAN/ );
  84 is_deeply( $linkref, \@wanted_links, 'Correct links came back' );
  85
  86 # Check combinations of links
  87 $x = $mech->find_link( text => 'News' );
  88 isa_ok( $x, 'WWW::Mechanize::Link' );
  89 is( $x->[0], 'http://www.msnbc.com/', 'First News is MSNBC' );
  90 is( $x->url, 'http://www.msnbc.com/', 'First News is MSNBC' );
  91
  92 $x = $mech->find_link( text => 'News', url_regex => qr/bbc/ );
  93 isa_ok( $x, 'WWW::Mechanize::Link' );
  94 is( $x->[0], 'http://www.bbc.co.uk/', 'First BBC news link' );
  95 is( $x->url, 'http://www.bbc.co.uk/', 'First BBC news link' );
  96 is( $x->[1], 'News', 'First BBC news text' );
  97 is( $x->text, 'News', 'First BBC news text' );
  98
  99 $x = $mech->find_link( text => 'News', url_regex => qr/cnn/ );
 100 isa_ok( $x, 'WWW::Mechanize::Link' );
 101 is( $x->[0], 'http://www.cnn.com/', 'First CNN news link' );
 102 is( $x->url, 'http://www.cnn.com/', 'First CNN news link' );
 103 is( $x->[1], 'News', 'First CNN news text' );
 104 is( $x->text, 'News', 'First CNN news text' );
 105
 106 AREA_CHECKS: {
 107     my @wanted_links = (
 108         [ 'http://www.cnn.com/', 'CNN', undef, 'a' ],
 109         [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ],
 110         # Can someone confirm that I just fixed a bug here, and
 111         # area tags /should/ have names? -mls
 112         [ 'http://www.cnn.com/area', undef, 'Marty', 'area' ],
 113     );
 114     my @links = $mech->find_all_links( url_regex => qr/cnn\.com/ );
 115     @{$_} = @{$_}[0..3] for @links;
 116     is_deeply( \@links, \@wanted_links, 'Correct links came back' );
 117 }
 118
 119 $x = $mech->find_link( name => 'bongo' );
 120 isa_ok( $x, 'WWW::Mechanize::Link' );
 121 is_deeply( $x, [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ], 'Got the CPAN C link' );
 122
 123 $x = $mech->find_link( name_regex => qr/^[A-Z]/, n => 2 );
 124 isa_ok( $x, 'WWW::Mechanize::Link' );
 125 is_deeply( $x, [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ], 'Got 2nd link that begins with a capital' );
 126
 127 $x = $mech->find_link( tag => 'a', n => 3 );
 128 isa_ok( $x, 'WWW::Mechanize::Link' );
 129 is_deeply( $x, [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ], 'Got 3rd <A> tag' );
 130
 131 $x = $mech->find_link( tag_regex => qr/^(a|frame)$/, n => 7 );
 132 isa_ok( $x, 'WWW::Mechanize::Link' );
 133 is_deeply( $x, [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ], 'Got 7th <A> or <FRAME> tag' );
 134
 135 $x = $mech->find_link( text => 'Rebuild Index' );
 136 isa_ok( $x, 'WWW::Mechanize::Link' );
 137 is_deeply( [@{$x}[0..3]], [ '/cgi-bin/MT/mt.cgi', 'Rebuild Index', undef, 'a' ], 'Got the JavaScript link' );
 138
 139 $x = $mech->find_link( url => 'blongo.html' );
 140 isa_ok( $x, 'WWW::Mechanize::Link' );
 141
 142 $x = $mech->find_link( url_abs => 'blongo.html' );
 143 ok( !defined $x, 'No match' );
 144
 145 $x = $mech->find_link( url_abs_regex => qr[t/blongo\.html$] );
 146 isa_ok( $x, 'WWW::Mechanize::Link' );