#!/usr/bin/perl -w # # Lintian HTML reporting tool -- Create Lintian web reports # # Copyright (C) 1998 Christian Schwarz and Richard Braakman # Copyright (C) 2007 Russ Allbery # # This program is free software. It is distributed under the terms of # the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any # later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. use strict; use File::Copy qw(copy); use URI::Escape; use Text::Template (); # ------------------------------ # Global variables and configuration # Maximum number of identical tags per package to display. Any remaining tags # will be compressed into a "... reported %d more times" line. our $MAX_TAGS = 8; # These have no default and must be set in the configuration file. # FIXME: $statistics_file should be in all caps as well. our ($LINTIAN_ROOT, $LINTIAN_LAB, $LINTIAN_ARCHIVEDIR, $LINTIAN_DIST, $LINTIAN_SECTION, $LINTIAN_ARCH, $HTML_TMP_DIR, $statistics_file, $LINTIAN_AREA); # Read the configuration. require './config'; if (defined $LINTIAN_SECTION and not defined $LINTIAN_AREA) { $LINTIAN_AREA = $LINTIAN_SECTION; } # The path to the mirror timestamp. our $LINTIAN_TIMESTAMP = "$LINTIAN_ARCHIVEDIR/project/trace/ftp-master.debian.org"; # FIXME: At least the lab should be a parameter to Read_pkglists rather # than an environment variable. $ENV{'LINTIAN_LAB'} = $LINTIAN_LAB; $ENV{'LINTIAN_ROOT'} = $LINTIAN_ROOT; # Import Lintian Perl libraries. use lib "$ENV{LINTIAN_ROOT}/lib"; use Lintian::Tag::Info (); use Read_pkglists; use Text_utils; use Util; # Global variables from Read_pkglists. Ugh. # FIXME: Read_pkglists should return this information instead. our (%binary_info, %source_info, %udeb_info, %bin_src_ref); # Get additional tag information. our %tag_extra = (); opendir(CHECKDIR, "$LINTIAN_ROOT/checks") or fail("cannot read directory $LINTIAN_ROOT/checks"); for my $check (readdir CHECKDIR) { next unless $check =~ /\.desc$/; my @tags = read_dpkg_control("$LINTIAN_ROOT/checks/$check"); shift(@tags); foreach my $tag (@tags) { next unless $tag->{severity} and $tag->{certainty}; my $name = $tag->{tag}; $tag_extra{$name}{severity} = $tag->{severity}; $tag_extra{$name}{certainty} = $tag->{certainty}; } } closedir(CHECKDIR); # Set the Lintian version, current timestamp, and archive timestamp. our $LINTIAN_VERSION = `$LINTIAN_ROOT/frontend/lintian --print-version`; our $timestamp = `date -u --rfc-822`; our $mirror_timestamp = slurp_entire_file($LINTIAN_TIMESTAMP); chomp ($LINTIAN_VERSION, $timestamp); $mirror_timestamp =~ s/\n.*//s; # ------------------------------ # Initialize templates # The path to our templates. our $TEMPLATES = "$LINTIAN_ROOT/reporting/templates"; # This only has to be done once, so do it at the start and then reuse the same # templates throughout. our %templates; for my $template (qw/head foot clean index maintainer maintainers packages tag tags tags-severity/) { my %options = (TYPE => 'FILE', SOURCE => "$TEMPLATES/$template.tmpl"); $templates{$template} = Text::Template->new (%options) or die "cannot load template $template: $Text::Template::ERROR\n"; } # ------------------------------ # Main routine # Read the package lists. # # FIXME: get_bin_src_ref runs read_src_list unconditionally so we can't call # it directly, which is confusing. read_bin_list; read_udeb_list; get_bin_src_ref; # Create output directories. mkdir($HTML_TMP_DIR, 0777) or die "cannot create output directory $HTML_TMP_DIR: $!\n"; mkdir("$HTML_TMP_DIR/full", 0777) or die "cannot create output directory $HTML_TMP_DIR/full: $!\n"; mkdir("$HTML_TMP_DIR/maintainer", 0777) or die "cannot create output directory $HTML_TMP_DIR/maintainer: $!\n"; mkdir("$HTML_TMP_DIR/tags", 0777) or die "cannot create output directory $HTML_TMP_DIR/tags: $!\n"; symlink(".", "$HTML_TMP_DIR/reports") or die "cannot create symlink $HTML_TMP_DIR/reports: $!\n"; symlink("$LINTIAN_ROOT/doc/lintian.html", "$HTML_TMP_DIR/manual") or die "cannot create symlink $HTML_TMP_DIR/manual: $!\n"; if ($ARGV[0]) { symlink($ARGV[0], "$HTML_TMP_DIR/lintian.log") or die "cannot create symlink $HTML_TMP_DIR/lintian.log: $!\n"; } copy("$LINTIAN_ROOT/reporting/lintian.css", "$HTML_TMP_DIR/lintian.css") or die "cannot copy lintian.css to $HTML_TMP_DIR: $!\n"; for my $image (qw/ico.png l.png logo-small.png/) { copy("$LINTIAN_ROOT/reporting/images/$image", "$HTML_TMP_DIR/$image") or die "cannot copy images/$image to $HTML_TMP_DIR: $!\n"; } # This variable will accumulate statistics. For tags: errors, warnings, # experimental, overridden, and info are the keys holding the count of tags of # that sort. For packages: binary, udeb, and source are the number of # packages of each type with Lintian errors or warnings. For maintainers: # maintainers is the number of maintainers with Lintian errors or warnings. my %statistics; # %by_maint holds a hash of maintainer names to packages and tags. Each # maintainer is a key. The value is a hash of package names to hashes. Each # package hash is in turn a hash of versions to an anonymous array of hashes, # with each hash having keys code, package, type, tag, severity, certainty, # extra, and xref. xref gets the partial URL of the maintainer page for that # source package. # # In other words, the lintian output line: # # W: gnubg source: substvar-source-version-is-deprecated gnubg-data # # for gnubg 0.15~20061120-1 maintained by Russ Allbery is # turned into the following structure: # # { 'gnubg' => { # '0.15~20061120-1' => [ # { code => 'W', # package => 'gnubg', # type => 'source', # tag => 'substvar-source-version-is-deprecated', # severity => 'normal', # certainty => 'certain', # extra => 'gnubg-data' # xref => 'rra@debian.org.html#gnubg' } ] } } # # and then stored under the key 'Russ Allbery ' # # %by_uploader holds the same thing except for packages for which the person # is only an uploader. # # %by_tag is a hash of tag names to an anonymous array of tag information # hashes just like the inside-most data structure above. my (%by_maint, %by_uploader, %by_tag); # We take a lintian log file on either standard input or as the first # argument. This log file contains all the tags lintian found, plus N: tags # with informational messages. Ignore all the N: tags and load everything # else into the hashes we use for all web page generation. # # We keep track of a hash from maintainer page URLs to maintainer values so # that we don't have two maintainers who map to the same page and overwrite # each other's pages. If we find two maintainers who map to the same URL, # just assume that the second maintainer is the same as the first (but warn # about it). my (%seen, %saw_maintainer); while (<>) { chomp; next unless m/^([EWIXO]): (\S+)(?: (\S+))?: (\S+)(?:\s+(.*))?/; my ($code, $package, $type, $tag, $extra) = ($1, $2, $3, $4, $5); $type = 'binary' unless (defined $type); next unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb'); # Update statistics. my $key = { E => 'errors', W => 'warnings', I => 'info', X => 'experimental', O => 'overridden' }->{$code}; $statistics{$key}++; unless ($seen{"$package $type"}) { $statistics{"$type-packages"}++; $seen{"$package $type"} = 1; } # Determine the source package for this package and warn if there appears # to be no source package in the archive. Determine the maintainer and # version. Work around a missing source package by pulling information # from a binary package or udeb of the same name if there is any. my ($source, $version, $source_version, $maintainer, $uploaders); if ($type eq 'source') { $source = $package; if (exists $source_info{$source}) { $version = $source_info{$source}->{version}; $maintainer = $source_info{$source}->{maintainer}; $uploaders = $source_info{$source}->{uploaders}; } else { warn "source package $package not listed!\n"; } } else { $source = $bin_src_ref{$package}; if ($source and exists $source_info{$source}) { $maintainer = $source_info{$source}->{maintainer}; $uploaders = $source_info{$source}->{uploaders}; } else { warn "source for package $package not found!\n"; $source = $package; if ($type eq 'binary') { $maintainer = $binary_info{$package}->{maintainer}; } elsif ($type eq 'udeb') { $maintainer = $udeb_info{$package}->{maintainer}; } } if ($type eq 'binary') { $version = $binary_info{$package}->{version}; $source_version = $binary_info{$package}->{'source-version'}; } elsif ($type eq 'udeb') { $version = $udeb_info{$package}->{version}; $source_version = $udeb_info{$package}->{'source-version'}; } } $maintainer ||= '(unknown)'; $version ||= 'unknown'; $source_version ||= $version; # Check if we've seen the URL for this maintainer before and, if so, map # them to the same person as the previous one. $maintainer = map_maintainer ($maintainer); $saw_maintainer{$maintainer} = 1; # Update maintainer statistics. $statistics{maintainers}++ unless defined $by_maint{$maintainer}; # Sanitize, just out of paranoia. $source =~ s/[^a-zA-Z0-9.+-]/_/g; $version =~ s/[^a-zA-Z0-9.+:~-]/_/g; # Add the tag information to our hashes. Share the data between the # hashes to save space (which means we can't later do destructive tricks # with it). my $info = { code => html_quote ($code), package => html_quote ($package), version => html_quote ($version), type => html_quote ($type), tag => html_quote ($tag), severity => html_quote ($tag_extra{$tag}{severity}), certainty => html_quote ($tag_extra{$tag}{certainty}), extra => html_quote ($extra), xref => maintainer_url ($maintainer) . "#$source" }; $by_maint{$maintainer}{$source}{$source_version} ||= []; push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info); $by_tag{$tag} ||= []; push(@{ $by_tag{$tag} }, $info); # If the package had uploaders listed, also add the information to # %by_uploaders (still sharing the data between hashes). if ($uploaders) { my @uploaders = split (/\s*,\s*/, $uploaders); for (@uploaders) { my $uploader = map_maintainer ($_); next if $uploader eq $maintainer; $saw_maintainer{$uploader} = 1; $by_uploader{$uploader}{$source}{$source_version} ||= []; push(@{ $by_uploader{$uploader}{$source}{$source_version} }, $info); } } } # Build a hash of all maintainers, not just those with Lintian tags. We use # this later to generate stub pages for maintainers whose packages are all # Lintian-clean. my %clean; for my $source (keys %source_info) { my $maintainer = $source_info{$source}->{maintainer}; my $id = maintainer_url ($maintainer); $clean{$id} = $maintainer; } # Now, walk through the tags by source package (sorted by maintainer). Output # a summary page of errors and warnings for each maintainer, output a full # page that includes info, experimental, and overriden tags, and assemble the # maintainer index and the QA package list as we go. my (%qa, %maintainers, %packages); my @maintainers; { my %seen; @maintainers = sort grep { !$seen{$_}++ } keys (%by_maint), keys (%by_uploader); } for my $maintainer (@maintainers) { my $id = maintainer_url ($maintainer); delete $clean{$id}; # For each of this maintainer's packages, add statistical information # about warnings and errors to the QA list and build the packages hash # used for the package index. We only do this for the maintainer # packages, not the uploader packages, to avoid double-counting. for my $source (keys %{ $by_maint{$maintainer} }) { my ($errors, $warnings) = (0, 0); for my $version (keys %{ $by_maint{$maintainer}{$source} }) { my $tags = $by_maint{$maintainer}{$source}{$version}; for my $tag (@$tags) { $errors++ if $tag->{code} eq 'E'; $warnings++ if $tag->{code} eq 'W'; $packages{$tag->{package}} = $tag->{xref}; } } $qa{$source} = [ $errors, $warnings ]; } # Determine if the maintainer's page is clean. Check all packages for # which they're either maintainer or uploader and set $error_clean if # they have no errors or warnings. my $error_clean = 1; for my $source (keys %{ $by_maint{$maintainer} }, keys %{ $by_uploader{$maintainer} }) { my $versions = $by_maint{$maintainer}{$source} || $by_uploader{$maintainer}{$source}; for my $version (keys %$versions) { my $tags = $versions->{$version}; for my $tag (@$tags) { $error_clean = 0 if ($tag->{code} eq 'E'); $error_clean = 0 if ($tag->{code} eq 'W'); } } } # Determine the parts of the maintainer and the file name for the # maintainer page. my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/); $name = 'Unknown Maintainer' unless $name; $email = 'unknown' unless $email; my $regular = "maintainer/$id"; my $full = "full/$id"; # Create the regular maintainer page (only errors and warnings) and the # full maintainer page (all tags, including overrides and info tags). print "Generating page for $id\n"; my %data = ( email => html_quote (uri_escape ($email)), errors => 1, id => $id, maintainer => html_quote ($maintainer), name => html_quote ($name), packages => $by_maint{$maintainer}, uploads => $by_uploader{$maintainer}, ); my $template; if ($error_clean) { $template = $templates{clean}; } else { $template = $templates{maintainer}; } output_template ($regular, $template, \%data); $template = $templates{maintainer}; $data{errors} = 0; output_template ($full, $template, \%data); # Add this maintainer to the hash of maintainer to URL mappings. $maintainers{$maintainer} = $id; } # Write out the maintainer index. my %data = ( maintainers => \%maintainers, ); output_template ('maintainers.html', $templates{maintainers}, \%data); # Write out the QA package list. open (QA, '>', "$HTML_TMP_DIR/qa-list.txt") or die "cannot create qa-list.txt: $!\n"; for my $source (sort keys %qa) { print QA "$source $qa{$source}[0] $qa{$source}[1]\n"; } close QA or die "cannot write to qa-list: $!\n"; # Now, generate stub pages for every maintainer who has only clean packages. for my $id (keys %clean) { my $maintainer = $clean{$id}; my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/); $email = 'unknown' unless $email; my %data = ( email => html_quote (uri_escape ($email)), maintainer => html_quote ($maintainer), name => html_quote ($name), ); print "Generating clean page for $id\n"; output_template ("maintainer/$id", $templates{clean}, \%data); output_template ("full/$id", $templates{clean}, \%data); } # Create the pages for each tag. Each page shows the extended description for # the tag and all the packages for which that tag was issued. for my $tag (sort keys %by_tag) { my $info = Lintian::Tag::Info->new($tag); my $description; if ($info) { $description = $info->description('html', ' '); } else { $description = "

Can't find description of tag $tag.

"; } my $code = 'O'; foreach (@{$by_tag{$tag}}) { if ($_->{code} ne 'O') { $code = $_->{code}; last; } } my %data = ( description => $description, tag => html_quote ($tag), code => $code, tags => $by_tag{$tag}, ); output_template ("tags/$tag.html", $templates{tag}, \%data); } # Create the general tag indices. %data = ( tags => \%by_tag, ); output_template ('tags.html', $templates{tags}, \%data); output_template ('tags-severity.html', $templates{'tags-severity'}, \%data); # Generate the package lists. These are huge, so we break them into four # separate pages. # # FIXME: Does anyone actually use these pages? They're basically unreadable. my %list; $list{'0-9, A-F'} = []; $list{'G-L'} = []; $list{'M-R'} = []; $list{'S-Z'} = []; for my $package (sort keys %packages) { my $first = uc substr($package, 0, 1); if ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) } elsif ($first le 'L') { push(@{ $list{'G-L'} }, $package) } elsif ($first le 'R') { push(@{ $list{'M-R'} }, $package) } else { push(@{ $list{'S-Z'} }, $package) } } %data = ( packages => \%packages, ); my $i = 1; for my $area (sort keys %list) { $data{area} = $area; $data{list} = $list{$area}; output_template ("packages_$i.html", $templates{packages}, \%data); $i++; } # Finally, we can start creating the index page. First, read in the old # statistics file so that we can calculate deltas for all of our statistics. my $old_statistics; if (-f $statistics_file) { ($old_statistics) = read_dpkg_control($statistics_file); } my %delta; my @attrs = qw(maintainers source-packages binary-packages udeb-packages errors warnings info experimental overridden); for my $attr (@attrs) { my $old = $old_statistics->{$attr} || 0; $statistics{$attr} ||= 0; $delta{$attr} = sprintf("%d (%+d)", $statistics{$attr}, $statistics{$attr} - $old); } # Update the statistics file. open (STATS, '>', $statistics_file) or die "cannot open $statistics_file for writing: $!\n"; print STATS "last-updated: $timestamp\n"; print STATS "mirror-timestamp: $mirror_timestamp\n"; for my $attr (@attrs) { print STATS "$attr: $statistics{$attr}\n"; } print STATS "lintian-version: $LINTIAN_VERSION\n"; close STATS or die "cannot write to $statistics_file: $!\n"; # Create the main page. %data = ( architecture => $LINTIAN_ARCH, delta => \%delta, dist => $LINTIAN_DIST, mirror => $mirror_timestamp, previous => $old_statistics->{'last-updated'}, area => $LINTIAN_AREA, ); output_template ('index.html', $templates{index}, \%data); exit 0; # ------------------------------ # Utility functions # Determine the file name for the maintainer page given a maintainer. It # should be .html where is their email address with all # characters other than a-z A-Z 0-9 - _ . @ = + replaced with _. Don't change # this without coordinating with QA. sub maintainer_url { my ($maintainer) = @_; my ($email) = ($maintainer =~ /<([^>]+)>/); my ($regular, $full); if ($email) { my $id = $email; $id =~ tr/a-zA-Z0-9_.@=+-/_/c; return "$id.html"; } else { return 'unsorted.html'; } } # Deduplicate maintainers. Maintains a cache of the maintainers we've seen # with a given e-mail address, issues a warning if two maintainers have the # same e-mail address, and returns the maintainer string that we should use # (which is whatever maintainer we saw first with that e-mail). { my (%urlmap, %warned); sub map_maintainer { my ($maintainer) = @_; my $url = maintainer_url ($maintainer); if ($urlmap{$url} && $urlmap{$url} ne $maintainer) { warn "$maintainer has the same page as $urlmap{$url}\n" unless ($warned{$maintainer} || lc ($maintainer) eq lc ($urlmap{$url}) || $maintainer =~ /\@lists\.(alioth\.)?debian\.org>/); $warned{$maintainer}++; $maintainer = $urlmap{$url}; } else { $urlmap{$url} = $maintainer; } return $maintainer; } } # Quote special characters for HTML output. sub html_quote { my ($text) = @_; $text ||= ''; $text =~ s/&/\&/g; $text =~ s//\>/g; return $text; } # Given a file name, a template, and a data hash, fill out the template with # that data hash and output the results to the file. sub output_template { my ($file, $template, $data) = @_; $data->{version} ||= $LINTIAN_VERSION; $data->{timestamp} ||= $timestamp; $data->{head} ||= sub { $templates{head}->fill_in (HASH => { page_title => $_[0], path_prefix => '../' x ($_[1]||0), %$data }) }; $data->{foot} ||= sub { $templates{foot}->fill_in (HASH => $data) }; open (OUTPUT, '>', "$HTML_TMP_DIR/$file") or die "creating $HTML_TMP_DIR/$file falied: $!\n"; $template->fill_in (OUTPUT => \*OUTPUT, HASH => $data) or die "filling out $file failed: $Text::Template::ERROR\n"; close OUTPUT; } # Local Variables: # indent-tabs-mode: nil # cperl-indent-level: 4 # End: # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround