git.maemo.org Git - pkg-perl/blob - deb-src/libalgorithm-diff-perl/libalgorithm-diff-perl-1.19.02/lib/Algorithm/DiffOld.pm

   1 # This is a version of Algorithm::Diff that uses only a comparison function,
   2 # like versions <= 0.59 used to.
   3 # $Revision: 1.3 $
   4
   5 package Algorithm::DiffOld;
   6 use strict;
   7 use vars qw($VERSION @EXPORT_OK @ISA @EXPORT);
   8 use integer;            # see below in _replaceNextLargerWith() for mod to make
   9                                         # if you don't use this
  10 require Exporter;
  11 @ISA = qw(Exporter);
  12 @EXPORT = qw();
  13 @EXPORT_OK = qw(LCS diff traverse_sequences);
  14 $VERSION = 1.10;        # manually tracking Algorithm::Diff
  15
  16 # McIlroy-Hunt diff algorithm
  17 # Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
  18 # by Ned Konz, perl@bike-nomad.com
  19
  20 =head1 NAME
  21
  22 Algorithm::DiffOld - Compute `intelligent' differences between two files / lists
  23 but use the old (<=0.59) interface.
  24
  25 =head1 NOTE
  26
  27 This has been provided as part of the Algorithm::Diff package by Ned Konz.
  28 This particular module is B<ONLY> for people who B<HAVE> to have the old
  29 interface, which uses a comparison function rather than a key generating
  30 function.
  31
  32 Because each of the lines in one array have to be compared with each
  33 of the lines in the other array, this does M*N comparisions. This can
  34 be very slow. I clocked it at taking 18 times as long as the stock
  35 version of Algorithm::Diff for a 4000-line file. It will get worse
  36 quadratically as array sizes increase.
  37
  38 =head1 SYNOPSIS
  39
  40   use Algorithm::DiffOld qw(diff LCS traverse_sequences);
  41
  42   @lcs    = LCS( \@seq1, \@seq2, $comparison_function );
  43
  44   $lcsref = LCS( \@seq1, \@seq2, $comparison_function );
  45
  46   @diffs = diff( \@seq1, \@seq2, $comparison_function );
  47
  48   traverse_sequences( \@seq1, \@seq2,
  49                      { MATCH => $callback,
  50                        DISCARD_A => $callback,
  51                        DISCARD_B => $callback,
  52                      },
  53                      $comparison_function );
  54
  55 =head1 COMPARISON FUNCTIONS
  56
  57 Each of the main routines should be passed a comparison function. If you
  58 aren't passing one in, B<use Algorithm::Diff instead>.
  59
  60 These functions should return a true value when two items should compare
  61 as equal.
  62
  63 For instance,
  64
  65   @lcs    = LCS( \@seq1, \@seq2, sub { my ($a, $b) = @_; $a eq $b } );
  66
  67 but if that is all you're doing with your comparison function, just use
  68 Algorithm::Diff and let it do this (this is its default).
  69
  70 Or:
  71
  72   sub someFunkyComparisonFunction
  73   {
  74         my ($a, $b) = @_;
  75         $a =~ m{$b};
  76   }
  77
  78   @diffs = diff( \@lines, \@patterns, \&someFunkyComparisonFunction );
  79
  80 which would allow you to diff an array @lines which consists of text
  81 lines with an array @patterns which consists of regular expressions.
  82
  83 This is actually the reason I wrote this version -- there is no way
  84 to do this with a key generation function as in the stock Algorithm::Diff.
  85
  86 =cut
  87
  88 # Find the place at which aValue would normally be inserted into the array. If
  89 # that place is already occupied by aValue, do nothing, and return undef. If
  90 # the place does not exist (i.e., it is off the end of the array), add it to
  91 # the end, otherwise replace the element at that point with aValue.
  92 # It is assumed that the array's values are numeric.
  93 # This is where the bulk (75%) of the time is spent in this module, so try to
  94 # make it fast!
  95
  96 sub _replaceNextLargerWith
  97 {
  98         my ( $array, $aValue, $high ) = @_;
  99         $high ||= $#$array;
 100
 101         # off the end?
 102         if ( $high == -1  || $aValue > $array->[ -1 ] )
 103         {
 104                 push( @$array, $aValue );
 105                 return $high + 1;
 106         }
 107
 108         # binary search for insertion point...
 109         my $low = 0;
 110         my $index;
 111         my $found;
 112         while ( $low <= $high )
 113         {
 114                 $index = ( $high + $low ) / 2;
 115 #               $index = int(( $high + $low ) / 2);             # without 'use integer'
 116                 $found = $array->[ $index ];
 117
 118                 if ( $aValue == $found )
 119                 {
 120                         return undef;
 121                 }
 122                 elsif ( $aValue > $found )
 123                 {
 124                         $low = $index + 1;
 125                 }
 126                 else
 127                 {
 128                         $high = $index - 1;
 129                 }
 130         }
 131
 132         # now insertion point is in $low.
 133         $array->[ $low ] = $aValue;             # overwrite next larger
 134         return $low;
 135 }
 136
 137 # This method computes the longest common subsequence in $a and $b.
 138
 139 # Result is array or ref, whose contents is such that
 140 #       $a->[ $i ] == $b->[ $result[ $i ] ]
 141 # foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
 142
 143 # An additional argument may be passed; this is a CODE ref to a comparison
 144 # routine. By default, comparisons will use "eq" .
 145 # Note that this routine will be called as many as M*N times, so make it fast!
 146
 147 # Additional parameters, if any, will be passed to the key generation routine.
 148
 149 sub _longestCommonSubsequence
 150 {
 151         my $a = shift;  # array ref
 152         my $b = shift;  # array ref
 153         my $compare = shift || sub { my $a = shift; my $b = shift; $a eq $b };
 154
 155         my $aStart = 0;
 156         my $aFinish = $#$a;
 157         my $bStart = 0;
 158         my $bFinish = $#$b;
 159         my $matchVector = [];
 160
 161         # First we prune off any common elements at the beginning
 162         while ( $aStart <= $aFinish
 163                 and $bStart <= $bFinish
 164                 and &$compare( $a->[ $aStart ], $b->[ $bStart ], @_ ) )
 165         {
 166                 $matchVector->[ $aStart++ ] = $bStart++;
 167         }
 168
 169         # now the end
 170         while ( $aStart <= $aFinish
 171                 and $bStart <= $bFinish
 172                 and &$compare( $a->[ $aFinish ], $b->[ $bFinish ], @_ ) )
 173         {
 174                 $matchVector->[ $aFinish-- ] = $bFinish--;
 175         }
 176
 177         my $thresh = [];
 178         my $links = [];
 179
 180         my ( $i, $ai, $j, $k );
 181         for ( $i = $aStart; $i <= $aFinish; $i++ )
 182         {
 183                 $k = 0;
 184                 # look for each element of @b between $bStart and $bFinish
 185                 # that matches $a->[ $i ], in reverse order
 186                 for ($j = $bFinish; $j >= $bStart; $j--)
 187                 {
 188                         next if ! &$compare( $a->[$i], $b->[$j], @_ );
 189                         # optimization: most of the time this will be true
 190                         if ( $k
 191                                 and $thresh->[ $k ] > $j
 192                                 and $thresh->[ $k - 1 ] < $j )
 193                         {
 194                                 $thresh->[ $k ] = $j;
 195                         }
 196                         else
 197                         {
 198                                 $k = _replaceNextLargerWith( $thresh, $j, $k );
 199                         }
 200
 201                         # oddly, it's faster to always test this (CPU cache?).
 202                         if ( defined( $k ) )
 203                         {
 204                                 $links->[ $k ] =
 205                                         [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
 206                         }
 207                 }
 208         }
 209
 210         if ( @$thresh )
 211         {
 212                 for ( my $link = $links->[ $#$thresh ]; $link; $link = $link->[ 0 ] )
 213                 {
 214                         $matchVector->[ $link->[ 1 ] ] = $link->[ 2 ];
 215                 }
 216         }
 217
 218         return wantarray ? @$matchVector : $matchVector;
 219 }
 220
 221 sub traverse_sequences
 222 {
 223         my $a = shift;  # array ref
 224         my $b = shift;  # array ref
 225         my $callbacks = shift || { };
 226         my $compare = shift;
 227         my $matchCallback = $callbacks->{'MATCH'} || sub { };
 228         my $discardACallback = $callbacks->{'DISCARD_A'} || sub { };
 229         my $finishedACallback = $callbacks->{'A_FINISHED'};
 230         my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { };
 231         my $finishedBCallback = $callbacks->{'B_FINISHED'};
 232         my $matchVector = _longestCommonSubsequence( $a, $b, $compare, @_ );
 233         # Process all the lines in match vector
 234         my $lastA = $#$a;
 235         my $lastB = $#$b;
 236         my $bi = 0;
 237         my $ai;
 238         for ( $ai = 0; $ai <= $#$matchVector; $ai++ )
 239         {
 240                 my $bLine = $matchVector->[ $ai ];
 241                 if ( defined( $bLine ) )        # matched
 242                 {
 243                         &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
 244                         &$matchCallback( $ai, $bi++, @_ );
 245                 }
 246                 else
 247                 {
 248                         &$discardACallback( $ai, $bi, @_ );
 249                 }
 250         }
 251         # the last entry (if any) processed was a match.
 252
 253         if ( defined( $finishedBCallback ) && $ai <= $lastA )
 254         {
 255                 &$finishedBCallback( $bi, @_ );
 256         }
 257         else
 258         {
 259                 &$discardACallback( $ai++, $bi, @_ ) while ( $ai <= $lastA );
 260         }
 261
 262         if ( defined( $finishedACallback ) && $bi <= $lastB )
 263         {
 264                 &$finishedACallback( $ai, @_ );
 265         }
 266         else
 267         {
 268                 &$discardBCallback( $ai, $bi++, @_ ) while ( $bi <= $lastB );
 269         }
 270         return 1;
 271 }
 272
 273 sub LCS
 274 {
 275         my $a = shift;  # array ref
 276         my $matchVector = _longestCommonSubsequence( $a, @_ );
 277         my @retval;
 278         my $i;
 279         for ( $i = 0; $i <= $#$matchVector; $i++ )
 280         {
 281                 if ( defined( $matchVector->[ $i ] ) )
 282                 {
 283                         push( @retval, $a->[ $i ] );
 284                 }
 285         }
 286         return wantarray ? @retval : \@retval;
 287 }
 288
 289 sub diff
 290 {
 291         my $a = shift;  # array ref
 292         my $b = shift;  # array ref
 293         my $retval = [];
 294         my $hunk = [];
 295         my $discard = sub { push( @$hunk, [ '-', $_[ 0 ], $a->[ $_[ 0 ] ] ] ) };
 296         my $add = sub { push( @$hunk, [ '+', $_[ 1 ], $b->[ $_[ 1 ] ] ] ) };
 297         my $match = sub { push( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] };
 298         traverse_sequences( $a, $b,
 299                 { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add },
 300                 @_ );
 301         &$match();
 302         return wantarray ? @$retval : $retval;
 303 }
 304
 305 1;