'
#
# %by_uploader holds the same thing except for packages for which the person
# is only an uploader.
#
# %by_tag is a hash of tag names to an anonymous array of tag information
# hashes just like the inside-most data structure above.
my (%by_maint, %by_uploader, %by_tag);
# We take a lintian log file on either standard input or as the first
# argument. This log file contains all the tags lintian found, plus N: tags
# with informational messages. Ignore all the N: tags and load everything
# else into the hashes we use for all web page generation.
#
# We keep track of a hash from maintainer page URLs to maintainer values so
# that we don't have two maintainers who map to the same page and overwrite
# each other's pages. If we find two maintainers who map to the same URL,
# just assume that the second maintainer is the same as the first (but warn
# about it).
my (%seen, %saw_maintainer);
while (<>) {
chomp;
next unless m/^([EWIXO]): (\S+)(?: (\S+))?: (\S+)(?:\s+(.*))?/;
my ($code, $package, $type, $tag, $extra) = ($1, $2, $3, $4, $5);
$type = 'binary' unless (defined $type);
next unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb');
# Update statistics.
my $key = {
E => 'errors',
W => 'warnings',
I => 'info',
X => 'experimental',
O => 'overridden'
}->{$code};
$statistics{$key}++;
unless ($seen{"$package $type"}) {
$statistics{"$type-packages"}++;
$seen{"$package $type"} = 1;
}
# Determine the source package for this package and warn if there appears
# to be no source package in the archive. Determine the maintainer and
# version. Work around a missing source package by pulling information
# from a binary package or udeb of the same name if there is any.
my ($source, $version, $source_version, $maintainer, $uploaders);
if ($type eq 'source') {
$source = $package;
if (exists $source_info{$source}) {
$version = $source_info{$source}->{version};
$maintainer = $source_info{$source}->{maintainer};
$uploaders = $source_info{$source}->{uploaders};
} else {
warn "source package $package not listed!\n";
}
} else {
$source = $bin_src_ref{$package};
if ($source and exists $source_info{$source}) {
$maintainer = $source_info{$source}->{maintainer};
$uploaders = $source_info{$source}->{uploaders};
} else {
warn "source for package $package not found!\n";
$source = $package;
if ($type eq 'binary') {
$maintainer = $binary_info{$package}->{maintainer};
} elsif ($type eq 'udeb') {
$maintainer = $udeb_info{$package}->{maintainer};
}
}
if ($type eq 'binary') {
$version = $binary_info{$package}->{version};
$source_version = $binary_info{$package}->{'source-version'};
} elsif ($type eq 'udeb') {
$version = $udeb_info{$package}->{version};
$source_version = $udeb_info{$package}->{'source-version'};
}
}
$maintainer ||= '(unknown)';
$version ||= 'unknown';
$source_version ||= $version;
# Check if we've seen the URL for this maintainer before and, if so, map
# them to the same person as the previous one.
$maintainer = map_maintainer ($maintainer);
$saw_maintainer{$maintainer} = 1;
# Update maintainer statistics.
$statistics{maintainers}++ unless defined $by_maint{$maintainer};
# Sanitize, just out of paranoia.
$source =~ s/[^a-zA-Z0-9.+-]/_/g;
$version =~ s/[^a-zA-Z0-9.+:~-]/_/g;
# Add the tag information to our hashes. Share the data between the
# hashes to save space (which means we can't later do destructive tricks
# with it).
my $info = {
code => html_quote ($code),
package => html_quote ($package),
version => html_quote ($version),
type => html_quote ($type),
tag => html_quote ($tag),
severity => html_quote ($tag_extra{$tag}{severity}),
certainty => html_quote ($tag_extra{$tag}{certainty}),
extra => html_quote ($extra),
xref => maintainer_url ($maintainer) . "#$source"
};
$by_maint{$maintainer}{$source}{$source_version} ||= [];
push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info);
$by_tag{$tag} ||= [];
push(@{ $by_tag{$tag} }, $info);
# If the package had uploaders listed, also add the information to
# %by_uploaders (still sharing the data between hashes).
if ($uploaders) {
my @uploaders = split (/\s*,\s*/, $uploaders);
for (@uploaders) {
my $uploader = map_maintainer ($_);
next if $uploader eq $maintainer;
$saw_maintainer{$uploader} = 1;
$by_uploader{$uploader}{$source}{$source_version} ||= [];
push(@{ $by_uploader{$uploader}{$source}{$source_version} }, $info);
}
}
}
# Build a hash of all maintainers, not just those with Lintian tags. We use
# this later to generate stub pages for maintainers whose packages are all
# Lintian-clean.
my %clean;
for my $source (keys %source_info) {
my $maintainer = $source_info{$source}->{maintainer};
my $id = maintainer_url ($maintainer);
$clean{$id} = $maintainer;
}
# Now, walk through the tags by source package (sorted by maintainer). Output
# a summary page of errors and warnings for each maintainer, output a full
# page that includes info, experimental, and overriden tags, and assemble the
# maintainer index and the QA package list as we go.
my (%qa, %maintainers, %packages);
my @maintainers;
{
my %seen;
@maintainers =
sort grep { !$seen{$_}++ } keys (%by_maint), keys (%by_uploader);
}
for my $maintainer (@maintainers) {
my $id = maintainer_url ($maintainer);
delete $clean{$id};
# For each of this maintainer's packages, add statistical information
# about warnings and errors to the QA list and build the packages hash
# used for the package index. We only do this for the maintainer
# packages, not the uploader packages, to avoid double-counting.
for my $source (keys %{ $by_maint{$maintainer} }) {
my ($errors, $warnings) = (0, 0);
for my $version (keys %{ $by_maint{$maintainer}{$source} }) {
my $tags = $by_maint{$maintainer}{$source}{$version};
for my $tag (@$tags) {
$errors++ if $tag->{code} eq 'E';
$warnings++ if $tag->{code} eq 'W';
$packages{$tag->{package}} = $tag->{xref};
}
}
$qa{$source} = [ $errors, $warnings ];
}
# Determine if the maintainer's page is clean. Check all packages for
# which they're either maintainer or uploader and set $error_clean if
# they have no errors or warnings.
my $error_clean = 1;
for my $source (keys %{ $by_maint{$maintainer} },
keys %{ $by_uploader{$maintainer} }) {
my $versions = $by_maint{$maintainer}{$source}
|| $by_uploader{$maintainer}{$source};
for my $version (keys %$versions) {
my $tags = $versions->{$version};
for my $tag (@$tags) {
$error_clean = 0 if ($tag->{code} eq 'E');
$error_clean = 0 if ($tag->{code} eq 'W');
}
}
}
# Determine the parts of the maintainer and the file name for the
# maintainer page.
my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
$name = 'Unknown Maintainer' unless $name;
$email = 'unknown' unless $email;
my $regular = "maintainer/$id";
my $full = "full/$id";
# Create the regular maintainer page (only errors and warnings) and the
# full maintainer page (all tags, including overrides and info tags).
print "Generating page for $id\n";
my %data = (
email => html_quote (uri_escape ($email)),
errors => 1,
id => $id,
maintainer => html_quote ($maintainer),
name => html_quote ($name),
packages => $by_maint{$maintainer},
uploads => $by_uploader{$maintainer},
);
my $template;
if ($error_clean) {
$template = $templates{clean};
} else {
$template = $templates{maintainer};
}
output_template ($regular, $template, \%data);
$template = $templates{maintainer};
$data{errors} = 0;
output_template ($full, $template, \%data);
# Add this maintainer to the hash of maintainer to URL mappings.
$maintainers{$maintainer} = $id;
}
# Write out the maintainer index.
my %data = (
maintainers => \%maintainers,
);
output_template ('maintainers.html', $templates{maintainers}, \%data);
# Write out the QA package list.
open (QA, '>', "$HTML_TMP_DIR/qa-list.txt")
or die "cannot create qa-list.txt: $!\n";
for my $source (sort keys %qa) {
print QA "$source $qa{$source}[0] $qa{$source}[1]\n";
}
close QA or die "cannot write to qa-list: $!\n";
# Now, generate stub pages for every maintainer who has only clean packages.
for my $id (keys %clean) {
my $maintainer = $clean{$id};
my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
$email = 'unknown' unless $email;
my %data = (
email => html_quote (uri_escape ($email)),
maintainer => html_quote ($maintainer),
name => html_quote ($name),
);
print "Generating clean page for $id\n";
output_template ("maintainer/$id", $templates{clean}, \%data);
output_template ("full/$id", $templates{clean}, \%data);
}
# Create the pages for each tag. Each page shows the extended description for
# the tag and all the packages for which that tag was issued.
for my $tag (sort keys %by_tag) {
my $info = Lintian::Tag::Info->new($tag);
my $description;
if ($info) {
$description = $info->description('html', ' ');
} else {
$description = " Can't find description of tag $tag.
";
}
my $code = 'O';
foreach (@{$by_tag{$tag}}) {
if ($_->{code} ne 'O') {
$code = $_->{code};
last;
}
}
my %data = (
description => $description,
tag => html_quote ($tag),
code => $code,
tags => $by_tag{$tag},
);
output_template ("tags/$tag.html", $templates{tag}, \%data);
}
# Create the general tag indices.
%data = (
tags => \%by_tag,
);
output_template ('tags.html', $templates{tags}, \%data);
output_template ('tags-severity.html', $templates{'tags-severity'}, \%data);
# Generate the package lists. These are huge, so we break them into four
# separate pages.
#
# FIXME: Does anyone actually use these pages? They're basically unreadable.
my %list;
$list{'0-9, A-F'} = [];
$list{'G-L'} = [];
$list{'M-R'} = [];
$list{'S-Z'} = [];
for my $package (sort keys %packages) {
my $first = uc substr($package, 0, 1);
if ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) }
elsif ($first le 'L') { push(@{ $list{'G-L'} }, $package) }
elsif ($first le 'R') { push(@{ $list{'M-R'} }, $package) }
else { push(@{ $list{'S-Z'} }, $package) }
}
%data = (
packages => \%packages,
);
my $i = 1;
for my $area (sort keys %list) {
$data{area} = $area;
$data{list} = $list{$area};
output_template ("packages_$i.html", $templates{packages}, \%data);
$i++;
}
# Finally, we can start creating the index page. First, read in the old
# statistics file so that we can calculate deltas for all of our statistics.
my $old_statistics;
if (-f $statistics_file) {
($old_statistics) = read_dpkg_control($statistics_file);
}
my %delta;
my @attrs = qw(maintainers source-packages binary-packages udeb-packages
errors warnings info experimental overridden);
for my $attr (@attrs) {
my $old = $old_statistics->{$attr} || 0;
$statistics{$attr} ||= 0;
$delta{$attr} = sprintf("%d (%+d)", $statistics{$attr},
$statistics{$attr} - $old);
}
# Update the statistics file.
open (STATS, '>', $statistics_file)
or die "cannot open $statistics_file for writing: $!\n";
print STATS "last-updated: $timestamp\n";
print STATS "mirror-timestamp: $mirror_timestamp\n";
for my $attr (@attrs) {
print STATS "$attr: $statistics{$attr}\n";
}
print STATS "lintian-version: $LINTIAN_VERSION\n";
close STATS or die "cannot write to $statistics_file: $!\n";
# Create the main page.
%data = (
architecture => $LINTIAN_ARCH,
delta => \%delta,
dist => $LINTIAN_DIST,
mirror => $mirror_timestamp,
previous => $old_statistics->{'last-updated'},
area => $LINTIAN_AREA,
);
output_template ('index.html', $templates{index}, \%data);
exit 0;
# ------------------------------
# Utility functions
# Determine the file name for the maintainer page given a maintainer. It
# should be .html where is their email address with all
# characters other than a-z A-Z 0-9 - _ . @ = + replaced with _. Don't change
# this without coordinating with QA.
sub maintainer_url {
my ($maintainer) = @_;
my ($email) = ($maintainer =~ /<([^>]+)>/);
my ($regular, $full);
if ($email) {
my $id = $email;
$id =~ tr/a-zA-Z0-9_.@=+-/_/c;
return "$id.html";
} else {
return 'unsorted.html';
}
}
# Deduplicate maintainers. Maintains a cache of the maintainers we've seen
# with a given e-mail address, issues a warning if two maintainers have the
# same e-mail address, and returns the maintainer string that we should use
# (which is whatever maintainer we saw first with that e-mail).
{
my (%urlmap, %warned);
sub map_maintainer {
my ($maintainer) = @_;
my $url = maintainer_url ($maintainer);
if ($urlmap{$url} && $urlmap{$url} ne $maintainer) {
warn "$maintainer has the same page as $urlmap{$url}\n"
unless ($warned{$maintainer}
|| lc ($maintainer) eq lc ($urlmap{$url})
|| $maintainer =~ /\@lists\.(alioth\.)?debian\.org>/);
$warned{$maintainer}++;
$maintainer = $urlmap{$url};
} else {
$urlmap{$url} = $maintainer;
}
return $maintainer;
}
}
# Quote special characters for HTML output.
sub html_quote {
my ($text) = @_;
$text ||= '';
$text =~ s/&/\&/g;
$text =~ s/\</g;
$text =~ s/>/\>/g;
return $text;
}
# Given a file name, a template, and a data hash, fill out the template with
# that data hash and output the results to the file.
sub output_template {
my ($file, $template, $data) = @_;
$data->{version} ||= $LINTIAN_VERSION;
$data->{timestamp} ||= $timestamp;
$data->{head} ||= sub { $templates{head}->fill_in (HASH => { page_title => $_[0],
path_prefix => '../' x ($_[1]||0),
%$data }) };
$data->{foot} ||= sub { $templates{foot}->fill_in (HASH => $data) };
open (OUTPUT, '>', "$HTML_TMP_DIR/$file")
or die "creating $HTML_TMP_DIR/$file falied: $!\n";
$template->fill_in (OUTPUT => \*OUTPUT, HASH => $data)
or die "filling out $file failed: $Text::Template::ERROR\n";
close OUTPUT;
}
# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 ts=4 et shiftround