3 # `Diff' program in Perl
4 # Copyright 1998 M-J. Dominus. (mjd-perl-diff@plover.com)
6 # This program is free software; you can redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # Altered to output in `context diff' format (but without context)
10 # September 1998 Christian Murphy (cpm@muc.de)
12 # Context lines feature added
13 # Unified, "Old" (Standard UNIX), Ed diff added September 1998
14 # Reverse_Ed (-f option) added March 1999
15 # Amir D. Karger (karger@bead.aecom.yu.edu)
17 # Modular functions integrated into program
18 # February 1999 M-J. Dominus (mjd-perl-diff@plover.com)
20 # In this file, "item" usually means "line of text", and "item number" usually
21 # means "line number". But theoretically the code could be used more generally
23 use Algorithm::Diff qw(diff);
25 # GLOBAL VARIABLES ####
26 # After we've read up to a certain point in each file, the number of items
27 # we've read from each file will differ by $FLD (could be 0)
28 my $File_Length_Difference = 0;
30 #ed diff outputs hunks *backwards*, so we need to save hunks when doing ed diff
32 ########################
34 my $usage = << "ENDUSAGE";
35 Usage: $0 [{-c | -C lines -e | -f | -u | -U lines}] oldfile newfile
36 -c do a context diff with 3 lines of context
37 -C do a context diff with 'lines' lines of context (implies -c)
38 -e create a script for the ed editor to change oldfile to newfile
39 -f like -e but in reverse order
40 -u do a unified diff with 3 lines of context
41 -U do a unified diff with 'lines' lines of context (implies -u)
42 -q report only whether or not the files differ
44 By default it will do an "old-style" diff, with output like UNIX diff
47 my $Context_Lines = 0; # lines of context to print. 0 for old-style diff
48 my $Diff_Type = "OLD"; # by default, do standard UNIX diff
49 my ($opt_c, $opt_u, $opt_e, $opt_f, $opt_q);
50 while ($ARGV[0] =~ /^-/) {
53 if ($opt =~ /^-C(.*)/) {
54 $Context_Lines = $1 || shift;
56 $Diff_Type = "CONTEXT";
57 } elsif ($opt =~ /^-c$/) {
60 $Diff_Type = "CONTEXT";
61 } elsif ($opt =~ /^-e$/) {
64 } elsif ($opt =~ /^-f$/) {
66 $Diff_Type = "REVERSE_ED";
67 } elsif ($opt =~ /^-U(.*)$/) {
68 $Context_Lines = $1 || shift;
70 $Diff_Type = "UNIFIED";
71 } elsif ($opt =~ /^-u$/) {
74 $Diff_Type = "UNIFIED";
75 } elsif ($opt =~ /^-q$/) {
82 bag("Illegal option -- $opt");
86 if ($opt_q and grep($_,($opt_c, $opt_f, $opt_u)) > 1) {
87 bag("Combining -q with other options is nonsensical");
90 if (grep($_,($opt_c, $opt_e, $opt_f, $opt_u)) > 1) {
91 bag("Only one of -c, -u, -f, -e are allowed");
94 bag($usage) unless @ARGV == 2;
97 my ($file1, $file2) = @ARGV;
99 my ($char1, $char2); # string to print before file names
100 if ($Diff_Type eq "CONTEXT") {
101 $char1 = '*' x 3; $char2 = '-' x 3;
102 } elsif ($Diff_Type eq "UNIFIED") {
103 $char1 = '-' x 3; $char2 = '+' x 3;
106 open (F1, $file1) or bag("Couldn't open $file1: $!");
107 open (F2, $file2) or bag("Couldn't open $file2: $!");
114 # diff yields lots of pieces, each of which is basically a Block object
115 my $diffs = diff(\@f1, \@f2);
116 exit 0 unless @$diffs;
118 if ($opt_q and @$diffs) {
119 print "Files $file1 and $file2 differ\n";
123 if ($Diff_Type =~ /UNIFIED|CONTEXT/) {
124 my @st = stat($file1);
126 print "$char1 $file1\t", scalar localtime($st[$MTIME]), "\n";
128 print "$char2 $file2\t", scalar localtime($st[$MTIME]), "\n";
132 # Loop over hunks. If a hunk overlaps with the last hunk, join them.
133 # Otherwise, print out the old one.
134 foreach my $piece (@$diffs) {
135 $hunk = new Hunk ($piece, $Context_Lines);
136 next unless $oldhunk; # first time through
138 # Don't need to check for overlap if blocks have no context lines
139 if ($Context_Lines && $hunk->does_overlap($oldhunk)) {
140 $hunk->prepend_hunk($oldhunk);
142 $oldhunk->output_diff(\@f1, \@f2, $Diff_Type);
149 # print the last hunk
150 $oldhunk->output_diff(\@f1, \@f2, $Diff_Type);
152 # Print hunks backwards if we're doing an ed diff
153 map {$_->output_ed_diff(\@f1, \@f2, $Diff_Type)} @Ed_Hunks if @Ed_Hunks;
166 # Package Hunk. A Hunk is a group of Blocks which overlap because of the
167 # context surrounding each block. (So if we're not using context, every
168 # hunk will contain one block.)
173 # Arg1 is output from &LCS::diff (which corresponds to one Block)
174 # Arg2 is the number of items (lines, e.g.,) of context around each block
176 # This subroutine changes $File_Length_Difference
179 # blocks - a list of Block objects
180 # start - index in file 1 where first block of the hunk starts
181 # end - index in file 1 where last block of the hunk ends
184 # before_diff - how much longer file 2 is than file 1 due to all hunks
185 # until but NOT including this one
186 # after_diff - difference due to all hunks including this one
187 my ($class, $piece, $context_items) = @_;
189 my $block = new Block ($piece); # this modifies $FLD!
191 my $before_diff = $File_Length_Difference; # BEFORE this hunk
192 my $after_diff = $before_diff + $block->{"length_diff"};
193 $File_Length_Difference += $block->{"length_diff"};
195 # @remove_array and @insert_array hold the items to insert and remove
196 # Save the start & beginning of each array. If the array doesn't exist
197 # though (e.g., we're only adding items in this block), then figure
198 # out the line number based on the line number of the other file and
199 # the current difference in file lenghts
200 my @remove_array = $block->remove;
201 my @insert_array = $block->insert;
202 my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
203 $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
204 $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
205 $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
206 $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
208 $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
209 $end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
210 $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
211 $end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
213 # At first, a hunk will have just one Block in it
219 "blocks" => [$block],
223 $hunk->flag_context($context_items);
228 # Change the "start" and "end" fields to note that context should be added
231 my ($hunk, $context_items) = @_;
232 return unless $context_items; # no context
235 my $start1 = $hunk->{"start1"};
236 my $num_added = $context_items > $start1 ? $start1 : $context_items;
237 $hunk->{"start1"} -= $num_added;
238 $hunk->{"start2"} -= $num_added;
241 my $end1 = $hunk->{"end1"};
242 $num_added = ($end1+$context_items > $#f1) ?
245 $hunk->{"end1"} += $num_added;
246 $hunk->{"end2"} += $num_added;
249 # Is there an overlap between hunk arg0 and old hunk arg1?
250 # Note: if end of old hunk is one less than beginning of second, they overlap
252 my ($hunk, $oldhunk) = @_;
253 return "" unless $oldhunk; # first time through, $oldhunk is empty
255 # Do I actually need to test both?
256 return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
257 $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
260 # Prepend hunk arg1 to hunk arg0
261 # Note that arg1 isn't updated! Only arg0 is.
263 my ($hunk, $oldhunk) = @_;
265 $hunk->{"start1"} = $oldhunk->{"start1"};
266 $hunk->{"start2"} = $oldhunk->{"start2"};
268 unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
272 # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
274 # First arg is the current hunk of course
275 # Next args are refs to the files
276 # last arg is type of diff
277 my $diff_type = $_[-1];
278 my %funchash = ("OLD" => \&output_old_diff,
279 "CONTEXT" => \&output_context_diff,
280 "ED" => \&store_ed_diff,
281 "REVERSE_ED" => \&output_ed_diff,
282 "UNIFIED" => \&output_unified_diff,
284 if (exists $funchash{$diff_type}) {
285 &{$funchash{$diff_type}}(@_); # pass in all args
286 } else {die "unknown diff type $diff_type"}
289 sub output_old_diff {
290 # Note that an old diff can't have any context. Therefore, we know that
291 # there's only one block in the hunk.
292 my ($hunk, $fileref1, $fileref2) = @_;
293 my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c');
295 my @blocklist = @{$hunk->{"blocks"}};
296 warn ("Expecting one block in an old diff hunk!") if scalar @blocklist != 1;
297 my $block = $blocklist[0];
298 my $op = $block->op; # +, -, or !
300 # Calculate item number range.
301 # old diff range is just like a context diff range, except the ranges
302 # are on one line with the action between them.
303 my $range1 = $hunk->context_range(1);
304 my $range2 = $hunk->context_range(2);
305 my $action = $op_hash{$op} || warn "unknown op $op";
306 print "$range1$action$range2\n";
308 # If removing anything, just print out all the remove lines in the hunk
309 # which is just all the remove lines in the block
310 if ($block->remove) {
311 my @outlist = @$fileref1[$hunk->{"start1"}..$hunk->{"end1"}];
312 map {$_ = "< $_\n"} @outlist; # all lines will be '< text\n'
316 print "---\n" if $op eq '!'; # only if inserting and removing
317 if ($block->insert) {
318 my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}];
319 map {$_ = "> $_\n"} @outlist; # all lines will be '> text\n'
324 sub output_unified_diff {
325 my ($hunk, $fileref1, $fileref2) = @_;
328 # Calculate item number range.
329 my $range1 = $hunk->unified_range(1);
330 my $range2 = $hunk->unified_range(2);
331 print "@@ -$range1 +$range2 @@\n";
333 # Outlist starts containing the hunk of file 1.
334 # Removing an item just means putting a '-' in front of it.
335 # Inserting an item requires getting it from file2 and splicing it in.
336 # We splice in $num_added items. Remove blocks use $num_added because
337 # splicing changed the length of outlist.
338 # We remove $num_removed items. Insert blocks use $num_removed because
339 # their item numbers---corresponding to positions in file *2*--- don't take
340 # removed items into account.
341 my $low = $hunk->{"start1"};
342 my $hi = $hunk->{"end1"};
343 my ($num_added, $num_removed) = (0,0);
344 my @outlist = @$fileref1[$low..$hi];
345 map {s/^/ /} @outlist; # assume it's just context
347 foreach my $block (@{$hunk->{"blocks"}}) {
348 foreach my $item ($block->remove) {
349 my $op = $item->{"sign"}; # -
350 my $offset = $item->{"item_no"} - $low + $num_added;
351 $outlist[$offset] =~ s/^ /$op/;
354 foreach my $item ($block->insert) {
355 my $op = $item->{"sign"}; # +
356 my $i = $item->{"item_no"};
357 my $offset = $i - $hunk->{"start2"} + $num_removed;
358 splice(@outlist,$offset,0,"$op$$fileref2[$i]");
363 map {s/$/\n/} @outlist; # add \n's
368 sub output_context_diff {
369 my ($hunk, $fileref1, $fileref2) = @_;
372 print "***************\n";
373 # Calculate item number range.
374 my $range1 = $hunk->context_range(1);
375 my $range2 = $hunk->context_range(2);
377 # Print out file 1 part for each block in context diff format if there are
378 # any blocks that remove items
379 print "*** $range1 ****\n";
380 my $low = $hunk->{"start1"};
381 my $hi = $hunk->{"end1"};
382 if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
383 my @outlist = @$fileref1[$low..$hi];
384 map {s/^/ /} @outlist; # assume it's just context
385 foreach my $block (@blocklist) {
386 my $op = $block->op; # - or !
387 foreach my $item ($block->remove) {
388 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
391 map {s/$/\n/} @outlist; # add \n's
395 print "--- $range2 ----\n";
396 $low = $hunk->{"start2"};
397 $hi = $hunk->{"end2"};
398 if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
399 my @outlist = @$fileref2[$low..$hi];
400 map {s/^/ /} @outlist; # assume it's just context
401 foreach my $block (@blocklist) {
402 my $op = $block->op; # + or !
403 foreach my $item ($block->insert) {
404 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
407 map {s/$/\n/} @outlist; # add \n's
413 # ed diff prints out diffs *backwards*. So save them while we're generating
414 # them, then print them out at the end
416 unshift @Ed_Hunks, $hunk;
420 # This sub is used for ed ('diff -e') OR reverse_ed ('diff -f').
421 # last arg is type of diff
422 my $diff_type = $_[-1];
423 my ($hunk, $fileref1, $fileref2) = @_;
424 my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c');
426 # Can't be any context for this kind of diff, so each hunk has one block
427 my @blocklist = @{$hunk->{"blocks"}};
428 warn ("Expecting one block in an ed diff hunk!") if scalar @blocklist != 1;
429 my $block = $blocklist[0];
430 my $op = $block->op; # +, -, or !
432 # Calculate item number range.
433 # old diff range is just like a context diff range, except the ranges
434 # are on one line with the action between them.
435 my $range1 = $hunk->context_range(1);
436 $range1 =~ s/,/ / if $diff_type eq "REVERSE_ED";
437 my $action = $op_hash{$op} || warn "unknown op $op";
438 print ($diff_type eq "ED" ? "$range1$action\n" : "$action$range1\n");
440 if ($block->insert) {
441 my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}];
442 map {s/$/\n/} @outlist; # add \n's
444 print ".\n"; # end of ed 'c' or 'a' command
449 # Generate a range of item numbers to print. Only print 1 number if the range
450 # has only one item in it. Otherwise, it's 'start,end'
451 # Flag is the number of the file (1 or 2)
452 my ($hunk, $flag) = @_;
453 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
454 $start++; $end++; # index from 1, not zero
455 my $range = ($start < $end) ? "$start,$end" : $end;
460 # Generate a range of item numbers to print for unified diff
461 # Print number where block starts, followed by number of lines in the block
462 # (don't print number of lines if it's 1)
463 my ($hunk, $flag) = @_;
464 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
465 $start++; $end++; # index from 1, not zero
466 my $length = $end - $start + 1;
467 my $first = $length < 2 ? $end : $start; # strange, but correct...
468 my $range = $length== 1 ? $first : "$first,$length";
474 # Package Block. A block is an operation removing, adding, or changing
475 # a group of items. Basically, this is just a list of changes, where each
476 # change adds or deletes a single item.
477 # (Change could be a separate class, but it didn't seem worth it)
481 # Input is a chunk from &Algorithm::LCS::diff
483 # length_diff - how much longer file 2 is than file 1 due to this block
485 # sign - '+' for insert, '-' for remove
486 # item_no - number of the item in the file (e.g., line number)
487 # We don't bother storing the text of the item
489 my ($class,$chunk) = @_;
492 # This just turns each change into a hash.
493 foreach my $item (@$chunk) {
494 my ($sign, $item_no, $text) = @$item;
495 my $hashref = {"sign" => $sign, "item_no" => $item_no};
496 push @changes, $hashref;
499 my $block = { "changes" => \@changes };
500 bless $block, $class;
502 $block->{"length_diff"} = $block->insert - $block->remove;
507 # LOW LEVEL FUNCTIONS
509 # what kind of block is this?
511 my $insert = $block->insert;
512 my $remove = $block->remove;
514 $remove && $insert and return '!';
515 $remove and return '-';
516 $insert and return '+';
517 warn "unknown block type";
518 return '^'; # context block
521 # Returns a list of the changes in this block that remove items
522 # (or the number of removals if called in scalar context)
523 sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
525 # Returns a list of the changes in this block that insert items
526 sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
528 } # end of package Block