3 sub Version { $VERSION; }
8 @EXPORT = qw(parse_dir);
13 use HTTP::Date qw(str2time);
19 my($dir, $tz, $fstype, $error) = @_;
22 $fstype = "File::Listing::" . lc $fstype;
25 push(@args, $tz) if(@_ >= 2);
26 push(@args, $error) if(@_ >= 4);
28 $fstype->parse(@args);
32 sub line { Carp::croak("Not implemented yet"); }
33 sub init { } # Dummy sub
38 # This routine was originally borrowed from Graham Barr's
45 s/^(.)// and $type = $1;
49 $mode |= 1 if $1 ne "-" &&
55 $type eq "d" and $mode |= 0040000 or # Directory
56 $type eq "l" and $mode |= 0120000 or # Symbolic Link
57 $mode |= 0100000; # Regular File
59 $mode |= 0004000 if /^...s....../i;
60 $mode |= 0002000 if /^......s.../i;
61 $mode |= 0001000 if /^.........t/i;
69 my($pkg, $dir, $tz, $error) = @_;
71 # First let's try to determine what kind of dir parameter we have
72 # received. We allow both listings, reference to arrays and
73 # file handles to read from.
75 if (ref($dir) eq 'ARRAY') {
78 elsif (ref($dir) eq 'GLOB') {
82 Carp::croak("Illegal argument to parse_dir()");
84 elsif ($dir =~ /^\*\w+(::\w+)+$/) {
85 # This scalar looks like a file handle, so we assume it is
88 # A normal scalar listing
89 $dir = [ split(/\n/, $dir) ];
95 if (ref($dir) eq 'ARRAY') {
97 push(@files, $pkg->line($_, $tz, $error));
104 push(@files, $pkg->line($_, $tz, $error));
107 wantarray ? @files : \@files;
112 package File::Listing::unix;
114 use HTTP::Date qw(str2time);
116 # A place to remember current directory from last line parsed.
117 use vars qw($curdir);
120 @ISA = qw(File::Listing);
132 shift; # package name
134 my($tz, $error) = @_;
139 my ($kind, $size, $date, $name);
140 if (($kind, $size, $date, $name) =
141 /^([\-FlrwxsStTdD]{10}) # Type and permission bits
145 (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})) # Date
146 \s+ # Some more space
151 return if $name eq '.' || $name eq '..';
152 $name = "$curdir/$name" if length $curdir;
154 if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) {
158 elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file
161 elsif ($kind =~ /^[dD]/) {
163 $size = undef; # Don't believe the reported size
165 return [$name, $type, $size, str2time($date, $tz),
166 File::Listing::file_mode($kind)];
169 elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) {
171 return () if $dir eq '.';
175 elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) {
178 elsif (/not found/ || # OSF1, HPUX, and SunOS return
180 /No such file/ || # IRIX returns
181 # "UX:ls: ERROR: Cannot access $file: No such file or directory"
183 # "$file: No such file or directory"
184 /cannot find/ # Windows NT returns
185 # "The system cannot find the path specified."
187 return () unless defined $error;
188 &$error($_) if ref($error) eq 'CODE';
189 warn "Error: $_\n" if $error eq 'warn';
192 elsif ($_ eq '') { # AIX, and Linux return nothing
193 return () unless defined $error;
194 &$error("No such file or directory") if ref($error) eq 'CODE';
195 warn "Warning: No such file or directory\n" if $error eq 'warn';
199 # parse failed, check if the dosftp parse understands it
200 return(File::Listing::dosftp->line($_,$tz,$error));
207 package File::Listing::dosftp;
209 use HTTP::Date qw(str2time);
211 # A place to remember current directory from last line parsed.
212 use vars qw($curdir);
215 @ISA = qw(File::Listing);
227 shift; # package name
229 my($tz, $error) = @_;
233 my ($kind, $size, $date, $name);
235 # 02-05-96 10:48AM 1415 src.slf
236 # 09-10-96 09:18AM <DIR> sl_util
237 if (($date,$size_or_dir,$name) =
238 /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info
240 (<\w{3}>|\d+) # Dir or Size
241 \s+ # Some more space
245 return if $name eq '.' || $name eq '..';
246 $name = "$curdir/$name" if length $curdir;
248 if ($size_or_dir eq '<DIR>') {
250 $size = ""; # directories have no size in the pc listing
254 $size = $size_or_dir;
256 return [$name, $type, $size, str2time($date, $tz),
257 File::Listing::file_mode($kind)];
261 return () unless defined $error;
262 &$error($_) if ref($error) eq 'CODE';
263 warn "Can't parse: $_\n" if $error eq 'warn';
271 package File::Listing::vms;
272 @File::Listing::vms::ISA = qw(File::Listing);
274 package File::Listing::netware;
275 @File::Listing::netware::ISA = qw(File::Listing);
279 package File::Listing::apache;
281 @ISA = qw(File::Listing);
288 shift; # package name
290 my($tz, $error) = @_; # ignored for now...
292 if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kM]?|-))!i) {
293 my($filename, $filesize) = ($1, $7);
294 my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6);
296 $filesize = 0 if $filesize eq '-';
297 if ($filesize =~ s/k$//i) {
300 elsif ($filesize =~ s/M$//) {
301 $filesize *= 1024*1024;
303 elsif ($filesize =~ s/G$//) {
304 $filesize *= 1024*1024*1024;
306 $filesize = int $filesize;
309 my $filetime = Time::Local::timelocal(0,$M,$H,$d,_monthabbrev_number($m)-1,_guess_year($y)-1900);
310 my $filetype = ($filename =~ s|/$|| ? "d" : "f");
311 return [$filename, $filetype, $filesize, $filetime, undef];
330 sub _monthabbrev_number {
354 File::Listing - parse directory listing
358 use File::Listing qw(parse_dir);
359 for (parse_dir(`ls -l`)) {
360 ($name, $type, $size, $mtime, $mode) = @$_;
361 next if $type ne 'f'; # plain file
365 # directory listing can also be read from a file
366 open(LISTING, "zcat ls-lR.gz|");
367 $dir = parse_dir(\*LISTING, '+0000');
371 This module exports a single function called parse_dir(), which can be
372 used to parse directory listings. Currently it only understand Unix
373 C<'ls -l'> and C<'ls -lR'> format. It should eventually be able to
374 most things you might get back from a ftp server file listing (LIST
375 command), i.e. VMS listings, NT listings, DOS listings,...
377 The first parameter to parse_dir() is the directory listing to parse.
378 It can be a scalar, a reference to an array of directory lines or a
379 glob representing a filehandle to read the directory listing from.
381 The second parameter is the time zone to use when parsing time stamps
382 in the listing. If this value is undefined, then the local time zone is
385 The third parameter is the type of listing to assume. The values will
386 be strings like 'unix', 'vms', 'dos'. Currently only 'unix' is
387 implemented and this is also the default value. Ideally, the listing
388 type should be determined automatically.
390 The fourth parameter specifies how unparseable lines should be treated.
391 Values can be 'ignore', 'warn' or a code reference. Warn means that
392 the perl warn() function will be called. If a code reference is
393 passed, then this routine will be called and the return value from it
394 will be incorporated in the listing. The default is 'ignore'.
396 Only the first parameter is mandatory.
398 The return value from parse_dir() is a list of directory entries. In
399 a scalar context the return value is a reference to the list. The
400 directory entries are represented by an array consisting of [
401 $filename, $filetype, $filesize, $filetime, $filemode ]. The
402 $filetype value is one of the letters 'f', 'd', 'l' or '?'. The
403 $filetime value is the seconds since Jan 1, 1970. The
404 $filemode is a bitmask like the mode returned by stat().
408 Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and
409 Net::FTP's parse_dir (Graham Barr).