git.maemo.org Git - pkg-perl/blob - deb-src/libalgorithm-diff-perl/libalgorithm-diff-perl-1.19.02/cdiff.pl

   1 #!/usr/bin/perl -w
   2 #
   3 # `Diff' program in Perl
   4 # Copyright 1998 M-J. Dominus. (mjd-perl-diff@plover.com)
   5 #
   6 # This program is free software; you can redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # Altered to output in `context diff' format (but without context)
  10 # September 1998 Christian Murphy (cpm@muc.de)
  11 #
  12 # Command-line arguments and context lines feature added
  13 # September 1998 Amir D. Karger (karger@bead.aecom.yu.edu)
  14 #
  15 # In this file, "item" usually means "line of text", and "item number" usually
  16 # means "line number". But theoretically the code could be used more generally
  17 use strict;
  18
  19 use Algorithm::Diff qw(diff);
  20 use File::stat;
  21 use vars qw ($opt_C $opt_c $opt_u $opt_U);
  22 use Getopt::Std;
  23
  24 my $usage = << "ENDUSAGE";
  25 Usage: $0 [{-c | -u}] [{-C | -U} lines] oldfile newfile
  26     -c will do a context diff with 3 lines of context
  27     -C will do a context diff with 'lines' lines of context
  28     -u will do a unified diff with 3 lines of context
  29     -U will do a unified diff with 'lines' lines of context
  30 ENDUSAGE
  31
  32 getopts('U:C:cu') or bag("$usage");
  33 bag("$usage") unless @ARGV == 2;
  34 my ($file1, $file2) = @ARGV;
  35 if (defined $opt_C || defined $opt_c) {
  36     $opt_c = ""; # -c on if -C given on command line
  37     $opt_u = undef;
  38 } elsif (defined $opt_U || defined $opt_u) {
  39     $opt_u = ""; # -u on if -U given on command line
  40     $opt_c = undef;
  41 } else {
  42     $opt_c = ""; # by default, do context diff, not old diff
  43 }
  44
  45 my ($char1, $char2); # string to print before file names
  46 my $Context_Lines; # lines of context to print
  47 if (defined $opt_c) {
  48     $Context_Lines = defined $opt_C ? $opt_C : 3;
  49     $char1 = '*' x 3; $char2 = '-' x 3;
  50 } elsif (defined $opt_u) {
  51     $Context_Lines = defined $opt_U ? $opt_U : 3;
  52     $char1 = '-' x 3; $char2 = '+' x 3;
  53 }
  54
  55 # After we've read up to a certain point in each file, the number of items
  56 # we've read from each file will differ by $FLD (could be 0)
  57 my $File_Length_Difference = 0;
  58
  59 open (F1, $file1) or bag("Couldn't open $file1: $!");
  60 open (F2, $file2) or bag("Couldn't open $file2: $!");
  61 my (@f1, @f2);
  62 chomp(@f1 = <F1>);
  63 close F1;
  64 chomp(@f2 = <F2>);
  65 close F2;
  66
  67 # diff yields lots of pieces, each of which is basically a Block object
  68 my $diffs = diff(\@f1, \@f2);
  69 exit 0 unless @$diffs;
  70
  71 my $st = stat($file1);
  72 print "$char1 $file1\t", scalar localtime($st->mtime), "\n";
  73 $st = stat($file2);
  74 print "$char2 $file2\t", scalar localtime($st->mtime), "\n";
  75
  76 my ($hunk,$oldhunk);
  77 # Loop over hunks. If a hunk overlaps with the last hunk, join them.
  78 # Otherwise, print out the old one.
  79 foreach my $piece (@$diffs) {
  80     $hunk = new Hunk ($piece, $Context_Lines);
  81     next unless $oldhunk;
  82
  83     if ($hunk->does_overlap($oldhunk)) {
  84         $hunk->prepend_hunk($oldhunk);
  85     } else {
  86         $oldhunk->output_diff(\@f1, \@f2);
  87     }
  88
  89 } continue {
  90     $oldhunk = $hunk;
  91 }
  92
  93 # print the last hunk
  94 $oldhunk->output_diff(\@f1, \@f2);
  95 exit 1;
  96 # END MAIN PROGRAM
  97
  98 sub bag {
  99   my $msg = shift;
 100   $msg .= "\n";
 101   warn $msg;
 102   exit 2;
 103 }
 104
 105 # Package Hunk. A Hunk is a group of Blocks which overlap because of the
 106 # context surrounding each block. (So if we're not using context, every
 107 # hunk will contain one block.)
 108 {
 109 package Hunk;
 110
 111 sub new {
 112 # Arg1 is output from &LCS::diff (which corresponds to one Block)
 113 # Arg2 is the number of items (lines, e.g.,) of context around each block
 114 #
 115 # This subroutine changes $File_Length_Difference
 116 #
 117 # Fields in a Hunk:
 118 # blocks      - a list of Block objects
 119 # start       - index in file 1 where first block of the hunk starts
 120 # end         - index in file 1 where last block of the hunk ends
 121 #
 122 # Variables:
 123 # before_diff - how much longer file 2 is than file 1 due to all hunks
 124 #               until but NOT including this one
 125 # after_diff  - difference due to all hunks including this one
 126     my ($class, $piece, $context_items) = @_;
 127
 128     my $block = new Block ($piece); # this modifies $FLD!
 129
 130     my $before_diff = $File_Length_Difference; # BEFORE this hunk
 131     my $after_diff = $before_diff + $block->{"length_diff"};
 132     $File_Length_Difference += $block->{"length_diff"};
 133
 134     # @remove_array and @insert_array hold the items to insert and remove
 135     # Save the start & beginning of each array. If the array doesn't exist
 136     # though (e.g., we're only adding items in this block), then figure
 137     # out the line number based on the line number of the other file and
 138     # the current difference in file lenghts
 139     my @remove_array = $block->remove;
 140     my @insert_array = $block->insert;
 141     my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
 142     $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
 143     $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
 144     $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
 145     $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
 146
 147     $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
 148     $end1   = $a2 == -1 ? $b2 - $after_diff  : $a2;
 149     $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
 150     $end2   = $b2 == -1 ? $a2 + $after_diff  : $b2;
 151
 152     # At first, a hunk will have just one Block in it
 153     my $hunk = {
 154             "start1" => $start1,
 155             "start2" => $start2,
 156             "end1" => $end1,
 157             "end2" => $end2,
 158             "blocks" => [$block],
 159               };
 160     bless $hunk, $class;
 161
 162     $hunk->flag_context($context_items);
 163
 164     return $hunk;
 165 }
 166
 167 # Change the "start" and "end" fields to note that context should be added
 168 # to this hunk
 169 sub flag_context {
 170     my ($hunk, $context_items) = @_;
 171     return unless $context_items; # no context
 172
 173     # add context before
 174     my $start1 = $hunk->{"start1"};
 175     my $num_added = $context_items > $start1 ? $start1 : $context_items;
 176     $hunk->{"start1"} -= $num_added;
 177     $hunk->{"start2"} -= $num_added;
 178
 179     # context after
 180     my $end1 = $hunk->{"end1"};
 181     $num_added = ($end1+$context_items > $#f1) ?
 182                   $#f1 - $end1 :
 183                   $context_items;
 184     $hunk->{"end1"} += $num_added;
 185     $hunk->{"end2"} += $num_added;
 186 }
 187
 188 # Is there an overlap between hunk arg0 and old hunk arg1?
 189 # Note: if end of old hunk is one less than beginning of second, they overlap
 190 sub does_overlap {
 191     my ($hunk, $oldhunk) = @_;
 192     return "" unless $oldhunk; # first time through, $oldhunk is empty
 193
 194     # Do I actually need to test both?
 195     return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
 196             $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
 197 }
 198
 199 # Prepend hunk arg1 to hunk arg0
 200 # Note that arg1 isn't updated! Only arg0 is.
 201 sub prepend_hunk {
 202     my ($hunk, $oldhunk) = @_;
 203
 204     $hunk->{"start1"} = $oldhunk->{"start1"};
 205     $hunk->{"start2"} = $oldhunk->{"start2"};
 206
 207     unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
 208 }
 209
 210
 211 # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
 212 sub output_diff {
 213     if    (defined $main::opt_u) {&output_unified_diff(@_)}
 214     elsif (defined $main::opt_c) {&output_context_diff(@_)}
 215     else {die "unknown diff"}
 216 }
 217
 218 sub output_unified_diff {
 219     my ($hunk, $fileref1, $fileref2) = @_;
 220     my @blocklist;
 221
 222     # Calculate item number range.
 223     my $range1 = $hunk->unified_range(1);
 224     my $range2 = $hunk->unified_range(2);
 225     print "@@ -$range1 +$range2 @@\n";
 226
 227     # Outlist starts containing the hunk of file 1.
 228     # Removing an item just means putting a '-' in front of it.
 229     # Inserting an item requires getting it from file2 and splicing it in.
 230     #    We splice in $num_added items. Remove blocks use $num_added because
 231     # splicing changed the length of outlist.
 232     #    We remove $num_removed items. Insert blocks use $num_removed because
 233     # their item numbers---corresponding to positions in file *2*--- don't take
 234     # removed items into account.
 235     my $low = $hunk->{"start1"};
 236     my $hi = $hunk->{"end1"};
 237     my ($num_added, $num_removed) = (0,0);
 238     my @outlist = @$fileref1[$low..$hi];
 239     map {s/^/ /} @outlist; # assume it's just context
 240
 241     foreach my $block (@{$hunk->{"blocks"}}) {
 242         foreach my $item ($block->remove) {
 243             my $op = $item->{"sign"}; # -
 244             my $offset = $item->{"item_no"} - $low + $num_added;
 245             $outlist[$offset] =~ s/^ /$op/;
 246             $num_removed++;
 247         }
 248         foreach my $item ($block->insert) {
 249             my $op = $item->{"sign"}; # +
 250             my $i = $item->{"item_no"};
 251             my $offset = $i - $hunk->{"start2"} + $num_removed;
 252             splice(@outlist,$offset,0,"$op$$fileref2[$i]");
 253             $num_added++;
 254         }
 255     }
 256
 257     map {s/$/\n/} @outlist; # add \n's
 258     print @outlist;
 259
 260 }
 261
 262 sub output_context_diff {
 263     my ($hunk, $fileref1, $fileref2) = @_;
 264     my @blocklist;
 265
 266     print "***************\n";
 267     # Calculate item number range.
 268     my $range1 = $hunk->context_range(1);
 269     my $range2 = $hunk->context_range(2);
 270
 271     # Print out file 1 part for each block in context diff format if there are
 272     # any blocks that remove items
 273     print "*** $range1 ****\n";
 274     my $low = $hunk->{"start1"};
 275     my $hi  = $hunk->{"end1"};
 276     if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
 277         my @outlist = @$fileref1[$low..$hi];
 278         map {s/^/  /} @outlist; # assume it's just context
 279         foreach my $block (@blocklist) {
 280             my $op = $block->op; # - or !
 281             foreach my $item ($block->remove) {
 282                 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
 283             }
 284         }
 285         map {s/$/\n/} @outlist; # add \n's
 286         print @outlist;
 287     }
 288
 289     print "--- $range2 ----\n";
 290     $low = $hunk->{"start2"};
 291     $hi  = $hunk->{"end2"};
 292     if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
 293         my @outlist = @$fileref2[$low..$hi];
 294         map {s/^/  /} @outlist; # assume it's just context
 295         foreach my $block (@blocklist) {
 296             my $op = $block->op; # + or !
 297             foreach my $item ($block->insert) {
 298                 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
 299             }
 300         }
 301         map {s/$/\n/} @outlist; # add \n's
 302         print @outlist;
 303     }
 304 }
 305
 306 sub context_range {
 307 # Generate a range of item numbers to print. Only print 1 number if the range
 308 # has only one item in it. Otherwise, it's 'start,end'
 309     my ($hunk, $flag) = @_;
 310     my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
 311     $start++; $end++;  # index from 1, not zero
 312     my $range = ($start < $end) ? "$start,$end" : $end;
 313     return $range;
 314 }
 315
 316 sub unified_range {
 317 # Generate a range of item numbers to print for unified diff
 318 # Print number where block starts, followed by number of lines in the block
 319 # (don't print number of lines if it's 1)
 320     my ($hunk, $flag) = @_;
 321     my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
 322     $start++; $end++;  # index from 1, not zero
 323     my $length = $end - $start + 1;
 324     my $first = $length < 2 ? $end : $start; # strange, but correct...
 325     my $range = $length== 1 ? $first : "$first,$length";
 326     return $range;
 327 }
 328 } # end Package Hunk
 329
 330 # Package Block. A block is an operation removing, adding, or changing
 331 # a group of items. Basically, this is just a list of changes, where each
 332 # change adds or deletes a single item.
 333 # (Change could be a separate class, but it didn't seem worth it)
 334 {
 335 package Block;
 336 sub new {
 337 # Input is a chunk from &Algorithm::LCS::diff
 338 # Fields in a block:
 339 # length_diff - how much longer file 2 is than file 1 due to this block
 340 # Each change has:
 341 # sign        - '+' for insert, '-' for remove
 342 # item_no     - number of the item in the file (e.g., line number)
 343 # We don't bother storing the text of the item
 344 #
 345     my ($class,$chunk) = @_;
 346     my @changes = ();
 347
 348 # This just turns each change into a hash.
 349     foreach my $item (@$chunk) {
 350         my ($sign, $item_no, $text) = @$item;
 351         my $hashref = {"sign" => $sign, "item_no" => $item_no};
 352         push @changes, $hashref;
 353     }
 354
 355     my $block = { "changes" => \@changes };
 356     bless $block, $class;
 357
 358     $block->{"length_diff"} = $block->insert - $block->remove;
 359     return $block;
 360 }
 361
 362
 363 # LOW LEVEL FUNCTIONS
 364 sub op {
 365 # what kind of block is this?
 366     my $block = shift;
 367     my $insert = $block->insert;
 368     my $remove = $block->remove;
 369
 370     $remove && $insert and return '!';
 371     $remove and return '-';
 372     $insert and return '+';
 373     warn "unknown block type";
 374     return '^'; # context block
 375 }
 376
 377 # Returns a list of the changes in this block that remove items
 378 # (or the number of removals if called in scalar context)
 379 sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
 380
 381 # Returns a list of the changes in this block that insert items
 382 sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
 383
 384 } # end of package Block
 385