1 package WWW::RobotRules::AnyDBM_File;
3 require WWW::RobotRules;
4 @ISA = qw(WWW::RobotRules);
14 WWW::RobotRules::AnyDBM_File - Persistent RobotRules
18 require WWW::RobotRules::AnyDBM_File;
21 # Create a robot useragent that uses a diskcaching RobotRules
22 my $rules = new WWW::RobotRules::AnyDBM_File 'my-robot/1.0', 'cachefile';
23 my $ua = new WWW::RobotUA 'my-robot/1.0', 'me@foo.com', $rules;
25 # Then just use $ua as usual
26 $res = $ua->request($req);
30 This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
31 package to implement persistent diskcaching of F<robots.txt> and host
34 The constructor (the new() method) takes an extra argument specifying
35 the name of the DBM file to use. If the DBM file already exists, then
36 you can specify undef as agent name as the name can be obtained from
43 my ($class, $ua, $file) = @_;
44 Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
46 my $self = bless { }, $class;
47 $self->{'filename'} = $file;
48 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
49 or Carp::croak("Can't open $file: $!");
55 # Try to obtain name from DBM file
56 $ua = $self->{'dbm'}{"|ua-name|"};
57 Carp::croak("No agent name specified") unless $ua;
64 my($self, $newname) = @_;
65 my $old = $self->{'dbm'}{"|ua-name|"};
66 if (defined $newname) {
67 $newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
68 unless ($old && $old eq $newname) {
69 # Old info is now stale.
70 my $file = $self->{'filename'};
71 untie %{$self->{'dbm'}};
72 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
73 %{$self->{'dbm'}} = ();
74 $self->{'dbm'}{"|ua-name|"} = $newname;
81 my ($self, $netloc) = @_;
82 my $t = $self->{'dbm'}{"$netloc|vis"};
84 (split(/;\s*/, $t))[0];
88 my ($self, $netloc) = @_;
89 my $t = $self->{'dbm'}{"$netloc|vis"};
90 return undef unless $t;
91 (split(/;\s*/, $t))[1];
95 my ($self, $netloc, $fresh) = @_;
96 my $old = $self->{'dbm'}{"$netloc|exp"};
98 $old =~ s/;.*//; # remove cleartext
100 if (defined $fresh) {
101 $fresh .= "; " . localtime($fresh);
102 $self->{'dbm'}{"$netloc|exp"} = $fresh;
108 my($self, $netloc, $time) = @_;
112 my $old = $self->{'dbm'}{"$netloc|vis"};
115 ($count,$last) = split(/;\s*/, $old);
116 $time = $last if $last > $time;
119 $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
123 my($self, $netloc, @rules) = @_;
125 $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
128 $self->{'dbm'}{"$netloc|r$cnt"} = $_;
134 my($self, $netloc) = @_;
136 while ($self->{'dbm'}{"$netloc|r$cnt"}) {
137 delete $self->{'dbm'}{"$netloc|r$cnt"};
143 my($self, $netloc) = @_;
147 my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
163 L<WWW::RobotRules>, L<LWP::RobotUA>
167 Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>