3 WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
7 Plenty of people have learned WWW::Mechanize, and now, you can too!
9 Following are user-supplied samples of WWW::Mechanize in action.
10 If you have samples you'd like to contribute, please send 'em to
11 C<< <andy@petdance.com> >>.
13 You can also look at the F<t/*.t> files in the distribution.
15 Please note that these examples are not intended to do any specific task.
16 For all I know, they're no longer functional because the sites they
17 hit have changed. They're here to give examples of how people have
20 Note that the examples are in reverse order of my having received them,
21 so the freshest examples are always at the top.
23 =head2 Starbucks Density Calculator, by Nat Torkington
25 Here's a pair of scripts from Nat Torkington, editor for O'Reilly Media
26 and co-author of the I<Perl Cookbook>.
30 Rael [Dornfest] discovered that you can easily find out how many Starbucks
31 there are in an area by searching for "Starbucks". So I wrote a silly
32 scraper for some old census data and came up with some Starbucks density
33 figures. There's no meaning to these numbers thanks to errors from using
34 old census data coupled with false positives in Yahoo search (e.g.,
35 "Dodie Starbuck-Your Style Desgn" in Portland OR). But it was fun to
38 Here are the top twenty cities in descending order of population,
39 with the amount of territory each Starbucks has. E.g., A New York NY
40 Starbucks covers 1.7 square miles of ground.
72 $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
73 $m = WWW::Mechanize->new();
78 $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
79 or die "Can't find the population table\n";
81 @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
84 @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
85 for ($x = 0; $x < @bits; $x++) {
87 @v = split /\s*<BR>\s*/, $b;
88 foreach (@v) { s/^\s+//; s/\s+$// }
89 push @{$data[$x]}, @v;
93 for ($y = 0; $y < @{$data[0]}; $y++) {
94 $data{$data[1][$y]} = {
97 POP => comma_free($data[2][$y]),
98 AREA => comma_free($data[3][$y]),
99 DENS => comma_free($data[4][$y]),
103 store(\%data, "cities.dat");
120 $SIG{__WARN__} = sub {} ; # ssssssh
122 my $Cities = retrieve("cities.dat");
124 my $m = WWW::Mechanize->new();
125 $m->get("http://local.yahoo.com/");
127 my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
128 foreach my $c ( @cities ) {
130 'stx' => "starbucks",
134 my $r = $m->submit_form(form_number => 2,
136 die "Couldn't submit form" unless $r->is_success;
138 my $hits = number_of_hits($r);
139 # my $ppl = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
140 # print "$c has $hits Starbucks. That's one for every $ppl people.\n";
141 my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
142 print "$c : $density\n";
148 if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
151 if ($c =~ m{Sorry, no .*? found in or near}) {
154 if ($c =~ m{Your search matched multiple cities}) {
155 warn "Your search matched multiple cities\n";
158 if ($c =~ m{Sorry we couldn.t find that location}) {
162 if ($c =~ m{Could not find.*?, showing results for}) {
166 die "Unknown response\n$c\n";
171 =head2 pb-upload, by John Beppu
173 This script takes filenames of images from the command line and
174 uploads them to a www.photobucket.com folder. John Beppu, the author, says:
178 I had 92 pictures I wanted to upload, and doing it through a browser
179 would've been torture. But thanks to mech, all I had to do was
180 `./pb.upload *.jpg` and watch it do its thing. It felt good.
181 If I had more time, I'd implement WWW::Photobucket on top of
186 #!/usr/bin/perl -w -T
191 my $login = "login_name";
192 my $password = "password";
193 my $folder = "folder";
195 my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
197 # login to your photobucket.com account
198 my $mech = WWW::Mechanize->new();
202 fields => { password => $password },
204 die unless ($mech->success);
206 # upload image files specified on command line
209 $mech->form_number(2);
210 $mech->field('the_file[]' => $_);
214 =head2 listmod, by Ian Langworth
216 Ian Langworth contributes this little gem that will bring joy to
217 beleagured mailing list admins. It discards spam messages through
218 mailman's web interface.
221 #!/arch/unix/bin/perl
225 # listmod - fast alternative to mailman list interface
227 # usage: listmod crew XXXXXXXX
230 die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
231 my ($listname, $password) = @ARGV;
233 use CGI qw(unescape);
236 my $m = WWW::Mechanize->new( autocheck => 1 );
239 my $term = Term::ReadLine->new($0);
241 # submit the form, get the cookie, go to the list admin page
242 $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
243 $m->set_visible( $password );
246 # exit if nothing to do
247 print "There are no pending requests.\n" and exit
248 if $m->content =~ /There are no pending requests/;
250 # select the first form and examine its contents
252 my $f = $m->current_form or die "Couldn't get first form!\n";
254 # get me the base form element for each email item
255 my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
256 or die "Couldn't get items in first form!\n";
258 # iterate through items, prompt user, commit actions
259 foreach my $item (@items) {
262 my $sender = unescape($item);
263 my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
264 =~ /Subject:\s+(.+?)\s+Size:/g;
268 while ( $choice !~ /^[DAX]$/ ) {
269 print "$sender\: '$subject'\n";
270 $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
275 $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
281 =head2 ccdl, by Andy Lester
283 Steve McConnell, author of the landmark I<Code Complete> has put
284 up the chapters for the 2nd edition in PDF format on his website.
285 I needed to download them to take to Kinko's to have printed. This
286 little script did it for me.
294 my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
296 my $mech = WWW::Mechanize->new( autocheck => 1 );
297 $mech->get( $start );
299 my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
301 for my $link ( @links ) {
302 my $url = $link->url_abs;
304 $filename =~ s[^.+/][];
306 print "Fetching $url";
307 $mech->get( $url, ':content_file' => $filename );
309 print " ", -s $filename, " bytes\n";
312 =head2 quotes.pl, by Andy Lester
314 This was a script that was going to get a hack in I<Spidering Hacks>,
315 but got cut at the last minute, probably because it's against IMDB's TOS
316 to scrape from it. I present it here as an example, not a suggestion
317 that you break their TOS.
319 Last I checked, it didn't work because their HTML didn't match, but it's
320 still good as sample code.
333 "match=s" => \$match,
334 "random" => \$random,
337 my $movie = shift @ARGV or die "Must specify a movie\n";
339 my $quotes_page = get_quotes_page( $movie );
340 my @quotes = extract_quotes( $quotes_page );
343 $match = quotemeta($match);
344 @quotes = grep /$match/i, @quotes;
348 print $quotes[rand @quotes];
350 print join( "\n", @quotes );
354 sub get_quotes_page {
357 my $mech = new WWW::Mechanize;
358 $mech->get( "http://www.imdb.com/search" );
359 $mech->success or die "Can't get the search page";
365 restrict => "Movies only",
369 my @links = $mech->find_all_links( url_regex => qr[^/Title] )
370 or die "No matches for \"$movie\" were found.\n";
373 my ( $url, $title ) = @{$links[0]};
375 warn "Checking $title...\n";
378 my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
379 or die qq{"$title" has no quotes in IMDB!\n};
381 warn "Fetching quotes...\n\n";
382 $mech->get( $link->[0] );
384 return $mech->content;
391 # Nibble away at the unwanted HTML at the beginnning...
392 $page =~ s/.+Memorable Quotes//si;
393 $page =~ s/.+?(<a name)/$1/si;
395 # ... and the end of the page
396 $page =~ s/Browse titles in the movie quotes.+$//si;
399 # Quotes separated by an <HR> tag
400 my @quotes = split( /<hr.+?>/, $page );
402 for my $quote ( @quotes ) {
403 my @lines = split( /<br>/, $quote );
405 s/<[^>]+>//g; # Strip HTML tags
406 s/\s+/ /g; # Squash whitespace
407 s/^ //; # Strip leading space
408 s/ $//; # Strip trailing space
409 s/"/"/g; # Replace HTML entity quotes
411 # Word-wrap to fit in 72 columns
412 $Text::Wrap::columns = 72;
413 $_ = wrap( '', ' ', $_ );
415 $quote = join( "\n", @lines );
421 =head2 cpansearch.pl, by Ed Silva
423 A quick little utility to search the CPAN and fire up a browser
428 # turn on perl's safety features
432 # work out the name of the module we're looking for
433 my $module_name = $ARGV[0]
434 or die "Must specify module name on command line";
436 # create a new browser
438 my $browser = WWW::Mechanize->new();
440 # tell it to get the main page
441 $browser->get("http://search.cpan.org/");
443 # okay, fill in the box with the name of the
444 # module we want to look up
445 $browser->form_number(1);
446 $browser->field("query", $module_name);
449 # click on the link that matches the module name
450 $browser->follow_link( text_regex => $module_name );
452 my $url = $browser->uri;
454 # launch a browser...
455 system('galeon', $url);
460 =head2 lj_friends.cgi, by Matt Cashner
464 # Provides an rss feed of a paid user's LiveJournal friends list
465 # Full entries, protected entries, etc.
466 # Add to your favorite rss reader as
467 # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
475 my $cgi = CGI->new();
476 my $form = $cgi->Vars;
478 my $agent = WWW::Mechanize->new();
480 $agent->get('http://www.livejournal.com/login.bml');
481 $agent->form_number('3');
482 $agent->field('user',$form->{user});
483 $agent->field('password',$form->{password});
485 $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
486 print "Content-type: text/plain\n\n";
487 print $agent->content();
489 =head2 Hacking Movable Type, by Dan Rinzel
493 # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates
495 my $mech = WWW::Mechanize->new();
497 $entry->{title} = "Test AutoEntry Title";
498 $entry->{btext} = "Test AutoEntry Body";
499 $entry->{date} = '2002-04-15 14:18:00';
500 my $start = qq|http://my.blog.site/mt.cgi|;
503 $mech->field('username','und3f1n3d');
504 $mech->field('password','obscur3d');
505 $mech->submit(); # to get login cookie
506 $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
507 $mech->form_name('entry_form');
508 $mech->field('title',$entry->{title});
509 $mech->field('category_id',1); # adjust as needed
510 $mech->field('text',$entry->{btext});
511 $mech->field('status',2); # publish, or 1 = draft
512 $results = $mech->submit();
514 # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
515 # we're done. Otherwise, time to be tricksy
516 # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
517 # which takes the user to an editable version of the form where the create date can be edited
518 # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
520 if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
521 # travel the redirect
522 $results = $mech->get($results->{_headers}->{location});
523 $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
525 $js =~ /\'([^']+)\'/;
526 $results = $mech->get($start.$1);
527 $mech->form_name('entry_form');
528 $mech->field('created_on_manual',$entry->{date});
532 =head2 get-despair, by Randal Schwartz
534 Randal submitted this bot that walks the despair.com site sucking down
543 my $m = WWW::Mechanize->new;
545 $m->get("http://www.despair.com/indem.html");
547 my @top_links = @{$m->links};
549 for my $top_link_num (0..$#top_links) {
550 next unless $top_links[$top_link_num][0] =~ /^http:/;
552 $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
555 for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
556 my $local = basename $image;
557 print " $image...", $m->mirror($image, $local)->message, "\n"
560 $m->back or die "can't go back";