1 package URI::Heuristic;
3 # $Id: Heuristic.pm,v 4.17 2004/01/14 13:33:44 gisle Exp $
7 URI::Heuristic - Expand URI using heuristics
11 use URI::Heuristic qw(uf_uristr);
12 $u = uf_uristr("perl"); # http://www.perl.com
13 $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
14 $u = uf_uristr("aas"); # http://www.aas.no
15 $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
16 $u = uf_uristr("/etc/passwd"); # file:/etc/passwd
20 This module provides functions that expand strings into real absolute
21 URIs using some built-in heuristics. Strings that already represent
22 absolute URIs (i.e. that start with a C<scheme:> part) are never modified
23 and are returned unchanged. The main use of these functions is to
24 allow abbreviated URIs similar to what many web browsers allow for URIs
27 The following functions are provided:
33 Tries to make the argument string
34 into a proper absolute URI string. The "uf_" prefix stands for "User
35 Friendly". Under MacOS, it assumes that any string with a common URL
36 scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
37 your volumes after common URL schemes and expect uf_uristr() to construct
38 valid file: URL's on those volumes for you, because it won't.
42 Works the same way as uf_uristr() but
43 returns a C<URI> object.
49 If the hostname portion of a URI does not contain any dots, then
50 certain qualified guesses are made. These guesses are governed by
51 the following two environment variables:
57 The two-letter country code (ISO 3166) for your location. If
58 the domain name of your host ends with two letters, then it is taken
59 to be the default country. See also L<Locale::Country>.
61 =item URL_GUESS_PATTERN
63 Contains a space-separated list of URL patterns to try. The string
64 "ACME" is for some reason used as a placeholder for the host name in
65 the URL provided. Example:
67 URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
68 export URL_GUESS_PATTERN
70 Specifying URL_GUESS_PATTERN disables any guessing rules based on
71 country. An empty URL_GUESS_PATTERN disables any guessing that
72 involves host name lookups.
78 Copyright 1997-1998, Gisle Aas
80 This library is free software; you can redistribute it and/or
81 modify it under the same terms as Perl itself.
87 use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
90 *import = \&Exporter::import;
91 @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
92 $VERSION = sprintf("%d.%02d", q$Revision: 4.17 $ =~ /(\d+)\.(\d+)/);
98 # First try the environment.
100 return $_ if defined;
102 # Could use LANG, LC_ALL, etc at this point, but probably too
103 # much of a wild guess. (Catalan != Canada, etc.)
106 # Last bit of domain name. This may access the network.
108 my $fqdn = Net::Domain::hostfqdn();
109 $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
110 return $_ if defined;
112 # Give up. Defined but false.
119 'us' => [qw(www.ACME.gov www.ACME.mil)],
120 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
121 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
122 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
123 # send corrections and new entries to <gisle@aas.no>
130 print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
131 return unless defined;
136 if (/^(www|web|home)\./) {
139 } elsif (/^(ftp|gopher|news|wais|http|https)\./) {
142 } elsif ($^O ne "MacOS" &&
143 (m,^/, || # absolute file name
144 m,^\.\.?/, || # relative file name
145 m,^[a-zA-Z]:[/\\],) # dosish file name
150 } elsif ($^O eq "MacOS" && m/:/) {
151 # potential MacOS file name
152 unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
154 my $a = URI::file->new($_)->as_string;
155 $_ = ($a =~ m/^file:/) ? $a : "file:$a";
157 } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
160 } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified
161 if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
164 if ($host !~ /\./ && $host ne "localhost") {
166 if (exists $ENV{URL_GUESS_PATTERN}) {
167 @guess = map { s/\bACME\b/$host/; $_ }
168 split(' ', $ENV{URL_GUESS_PATTERN});
171 my $special = $LOCAL_GUESSING{MY_COUNTRY()};
173 my @special = @$special;
174 push(@guess, map { s/\bACME\b/$host/; $_ }
177 push(@guess, 'www.$host.' . MY_COUNTRY());
180 push(@guess, map "www.$host.$_",
181 "com", "org", "net", "edu", "int");
186 for $guess (@guess) {
187 print STDERR "uf_uristr: gethostbyname('$guess.')..."
189 if (gethostbyname("$guess.")) {
190 print STDERR "yes\n" if $DEBUG;
194 print STDERR "no\n" if $DEBUG;
197 $_ = "http://$host$_";
200 # pure junk, just return it unchanged...
204 print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
212 URI->new(uf_uristr($_[0]));
216 *uf_urlstr = \*uf_uristr;
221 URI::URL->new(uf_uristr($_[0]));