git.maemo.org Git - pkg-perl/blob - deb-src/libarray-compare-perl/libarray-compare-perl-1.16/lib/Array/Compare.pm

   1 #
   2 # $Id: Compare.pm 30 2008-06-29 14:04:29Z dave $
   3 #
   4
   5 =head1 NAME
   6
   7 Array::Compare - Perl extension for comparing arrays.
   8
   9 =head1 SYNOPSIS
  10
  11   use Array::Compare;
  12
  13   my $comp1 = Array::Compare->new;
  14   $comp->Sep('|');
  15   $comp->Skip({3 => 1, 4 => 1});
  16   $comp->WhiteSpace(0);
  17   $comp->Case(1);
  18
  19   my $comp2 = Array::Compare->new(Sep => '|',
  20                                   WhiteSpace => 0,
  21                                   Case => 1,
  22                                   Skip => {3 => 1, 4 => 1});
  23
  24   my @arr1 = 0 .. 10;
  25   my @arr2 = 0 .. 10;
  26
  27   $comp1->compare(\@arr1, \@arr2);
  28   $comp2->compare(\@arr1, \@arr2);
  29
  30 =head1 DESCRIPTION
  31
  32 If you have two arrays and you want to know if they are the same or
  33 different, then Array::Compare will be useful to you.
  34
  35 All comparisons are carried out via a comparator object. In the
  36 simplest usage, you can create and use a comparator object like
  37 this:
  38
  39   my @arr1 = 0 .. 10;
  40   my @arr2 = 0 .. 10;
  41
  42   my $comp = Array::Compare->new;
  43
  44   if ($comp->compare(\@arr1, \@arr2)) {
  45     print "Arrays are the same\n";
  46   } else {
  47     print "Arrays are different\n";
  48   }
  49
  50 Notice that you pass references to the two arrays to the comparison
  51 method.
  52
  53 Internally the comparator compares the two arrays by using C<join>
  54 to turn both arrays into strings and comparing the strings using
  55 C<eq>. In the joined strings, the elements of the original arrays
  56 are separated with the C<^G> character. This can cause problems if
  57 your array data contains C<^G> characters as it is possible that
  58 two different arrays can be converted to the same string.
  59
  60 To avoid this, it is possible to override the default separator
  61 character, either by passing and alternative to the C<new> function
  62
  63   my $comp = Array::Compare->new(Sep => '|');
  64
  65 or by changing the seperator for an existing comparator object
  66
  67   $comp->Sep('|');
  68
  69 In general you should choose a separator character that won't appear
  70 in your data.
  71
  72 You can also control whether or not whitespace within the elements of
  73 the arrays should be considered significant when making the comparison.
  74 The default is that all whitespace is significant. The alternative is
  75 for all consecutive white space characters to be converted to a single
  76 space for the pruposes of the comparison. Again, this can be turned on
  77 when creating a comparator object:
  78
  79   my $comp = Array::Compare->new(WhiteSpace => 0);
  80
  81 or by altering an existing object:
  82
  83   $comp->WhiteSpace(0);
  84
  85 You can also control whether or not the case of the data is significant
  86 in the comparison. The default is that the case of data is taken into
  87 account. This can be changed in the standard ways when creating a new
  88 comparator object:
  89
  90   my $comp = Array::Compare->new(Case => 0);
  91
  92 or by altering an existing object:
  93
  94   $comp->Case(0);
  95
  96 In addition to the simple comparison described above (which returns true
  97 if the arrays are the same and false if they're different) there is also
  98 a full comparison which returns a list containing the indexes of elements
  99 which differ between the two arrays. If the arrays are the same it returns
 100 an empty list. In scalar context the full comparison returns the length of
 101 this list (i.e. the number of elements that differ). You can access the full
 102 comparision in two ways. Firstly, there is a C<DefFull> attribute. If this
 103 is C<true> then a full comparison if carried out whenever the C<compare>
 104 method is called.
 105
 106   my $comp = Array::Compare->new(DefFull => 1);
 107   $comp->compare(\@arr1, \@arr2); # Full comparison
 108
 109   $comp->DefFull(0);
 110   $comp->compare(\@arr1, \@arr2); # Simple comparison
 111
 112   $comp->DefFull(1);
 113   $comp->compare(\@arr1, \@arr2); # Full comparison again
 114
 115
 116 Secondly, you can access the full comparison method directly
 117
 118   $comp->full_compare(\@arr1, \@arr2);
 119
 120 For symmetry, there is also a direct method to use to call the simple
 121 comparison.
 122
 123   $comp->simple_compare(\@arr1, \@arr2);
 124
 125 The final complication is the ability to skip elements in the comparison.
 126 If you know that two arrays will always differ in a particular element
 127 but want to compare the arrays I<ignoring> this element, you can do it
 128 with Array::Compare without taking array slices. To do this, a
 129 comparator object has an optional attribute called C<Skip> which is a
 130 reference to a hash. The keys in this hash are the indexes of the array
 131 elements and the values should be any true value for elements that should
 132 be skipped.
 133
 134 For example, if you want to compare two arrays, ignoring the values in
 135 elements two and four, you can do something like this:
 136
 137   my %skip = (2 => 1, 4 => 1);
 138   my @a = (0, 1, 2, 3, 4, 5);
 139   my @b = (0, 1, X, 3, X, 5);
 140
 141   my $comp = Array::Compare->new(Skip => \%skip);
 142
 143   $comp->compare(\@a, \@b);
 144
 145 This should return I<true>, as we are explicitly ignoring the columns
 146 which differ.
 147
 148 Of course, having created a comparator object with no skip hash, it is
 149 possible to add one later:
 150
 151   $comp->Skip({1 => 1, 2 => 1});
 152
 153 or:
 154
 155   my %skip = (1 => 1, 2 => 2);
 156   $comp->Skip(\%skip);
 157
 158 To reset the comparator so that no longer skips elements, set the skip
 159 hash to an empty hash.
 160
 161   $comp->Skip({});
 162
 163 You can also check to see if one array is a permutation of another, i.e.
 164 they contain the same elements but in a different order.
 165
 166   if ($comp->perm(\@a, \@b) {
 167     print "Arrays are perms\n";
 168   else {
 169     print "Nope. Arrays are completely different\n";
 170   }
 171
 172 In this case the values of C<WhiteSpace> and C<Case> are still used,
 173 but C<Skip> is ignored for, hopefully, obvious reasons.
 174
 175 =head1 METHODS
 176
 177 =cut
 178
 179 package Array::Compare;
 180
 181 require 5.006_000;
 182 use strict;
 183 use warnings;
 184 our ($VERSION, $AUTOLOAD);
 185
 186 use Carp;
 187
 188 $VERSION = 1.15;
 189
 190 my %_defaults = (Sep => '^G',
 191                  WhiteSpace => 1,
 192                  Case => 1,
 193                  Skip => {},
 194                  DefFull => 0);
 195
 196 =head2 new [ %OPTIONS ]
 197
 198 Constructs a new comparison object.
 199
 200 Takes an optional hash containing various options that control how
 201 comparisons are carried out. Any omitted options take useful defaults.
 202
 203 =over 4
 204
 205 =item Sep
 206
 207 This is the value that is used to separate fields when the array is joined
 208 into a string. It should be a value which doesn't appear in your data.
 209 Default is '^G'.
 210
 211 =item WhiteSpace
 212
 213 Flag that indicates whether or not whitespace is significant in the
 214 comparison. If this value is true then all multiple whitespace characters
 215 are changed into a single space before the comparison takes place. Default
 216 is 1 (whitespace is significant).
 217
 218 =item Case
 219
 220 Flag that indicates whther or not the case of the data should be significant
 221 in the comparison. Default is 1 (case is significant).
 222
 223 =item Skip
 224
 225 a reference to a hash which contains the numbers of any columns that should
 226 be skipped in the comparison. Default is an empty hash (all columns are
 227 significant).
 228
 229 =item DefFull
 230
 231 Flag which indicates whether the default comparison is simple (just returns
 232 true if the arrays are the same or false if they're not) or full (returns an
 233 array containing the indexes of the columns that differ). Default is 0 (simple
 234 comparison).
 235
 236 =back
 237
 238 =cut
 239
 240 sub new {
 241   my $class = shift;
 242
 243   my $self = {%_defaults, @_};
 244
 245   bless $self, $class;
 246
 247   return $self;
 248 }
 249
 250 #
 251 # Utility function to check the arguments to any of the comparison
 252 # function. Ensures that there are two arguments and that they are
 253 # both arrays.
 254 #
 255 sub _check_args {
 256   my $self = shift;
 257   croak('Must compare two arrays.') unless @_ == 2;
 258   croak('Argument 1 is not an array') unless ref($_[0]) eq 'ARRAY';
 259   croak('Argument 2 is not an array') unless ref($_[1]) eq 'ARRAY';
 260
 261   return;
 262 }
 263
 264 =head2 compare_len \@ARR1, \@ARR2
 265
 266 Very simple comparison. Just checks the lengths of the arrays are
 267 the same.
 268
 269 =cut
 270
 271 sub compare_len {
 272   my $self = shift;
 273
 274   $self->_check_args(@_);
 275
 276   return @{$_[0]} == @{$_[1]};
 277 }
 278
 279 =head2 compare \@ARR1, \@ARR2
 280
 281 Compare the values in two arrays and return a data indicating whether
 282 the arrays are the same. The exact return values differ depending on
 283 the comparison method used. See the descriptions of L<simple_compare>
 284 and L<full_compare> for details.
 285
 286 Uses the value of DefFull to determine which comparison routine
 287 to use.
 288
 289 =cut
 290
 291 sub compare {
 292   my $self = shift;
 293
 294   if ($self->DefFull) {
 295     return $self->full_compare(@_);
 296   } else {
 297     return $self->simple_compare(@_);
 298   }
 299 }
 300
 301 =head2 simple_compare \@ARR1, \@ARR2
 302
 303 Compare the values in two arrays and return a flag indicating whether or
 304 not the arrays are the same.
 305
 306 Returns true if the arrays are the same or false if they differ.
 307
 308 Uses the values of 'Sep', 'WhiteSpace' and 'Skip' to influence
 309 the comparison.
 310
 311 =cut
 312
 313 sub simple_compare {
 314   my $self = shift;
 315
 316   $self->_check_args(@_);
 317
 318   my ($row1, $row2) = @_;
 319
 320   # No point in continuing if the number of elements is different.
 321   return unless $self->compare_len(@_);
 322
 323   # @check contains the indexes into the two arrays, i.e. the numbers
 324   # from 0 to one less than the number of elements.
 325   my @check = 0 .. $#$row1;
 326
 327   my ($pkg, $caller) = (caller(1))[0, 3];
 328   my $perm = $caller eq __PACKAGE__ . "::perm";
 329
 330   # Filter @check so it only contains indexes that should be compared.
 331   # N.B. Makes no sense to do this if we are called from 'perm'.
 332   unless ($perm) {
 333     @check = grep {!(exists $self->Skip->{$_}
 334                      && $self->Skip->{$_}) } @check
 335                        if keys %{$self->Skip};
 336   }
 337
 338   # Build two strings by taking array slices containing only the columns
 339   # that we shouldn't skip and joining those array slices using the Sep
 340   # character. Hopefully we can then just do a string comparison.
 341   # Note: this makes the function liable to errors if your arrays
 342   # contain the separator character.
 343   my $str1 = join($self->Sep, @{$row1}[@check]);
 344   my $str2 = join($self->Sep, @{$row2}[@check]);
 345
 346   # If whitespace isn't significant, collapse it
 347   unless ($self->WhiteSpace) {
 348     $str1 =~ s/\s+/ /g;
 349     $str2 =~ s/\s+/ /g;
 350   }
 351
 352   # If case isn't significant, change to lower case
 353   unless ($self->Case) {
 354     $str1 = lc $str1;
 355     $str2 = lc $str2;
 356   }
 357
 358   return $str1 eq $str2;
 359 }
 360
 361 =head2 full_compare \@ARR1, \@ARR2
 362
 363 Do a full comparison between two arrays.
 364
 365 Checks each individual column. In scalar context returns the number
 366 of columns that differ (zero if the arrays are the same). In list
 367 context returns an list containing the indexes of the columns that
 368 differ (an empty list if the arrays are the same).
 369
 370 Uses the values of 'Sep' and 'WhiteSpace' to influence the comparison.
 371
 372 B<Note:> If the two arrays are of different lengths then this method
 373 just returns the indexes of the elements that appear in one array but
 374 not the other (i.e. the indexes from the longer array that are beyond
 375 the end of the shorter array). This might be a little
 376 counter-intuitive.
 377
 378 =cut
 379
 380 sub full_compare {
 381   my $self = shift;
 382
 383   $self->_check_args(@_);
 384
 385   my ($row1, $row2) = @_;
 386
 387   # No point in continuing if the number of elements is different.
 388   # Because of the expected return value from this function we can't
 389   # just say 'the arrays are different'. We need to do some work to
 390   # calculate a meaningful return value.
 391   # If we've been called in array context we return a list containing
 392   # the number of the columns that appear in the longer list and aren't
 393   # in the shorter list. If we've been called in scalar context we
 394   # return the difference in the lengths of the two lists.
 395   unless ($self->compare_len(@_)) {
 396     if (wantarray) {
 397       my ($max, $min);
 398       if ($#{$row1} > $#{$row2}) {
 399         ($max, $min) = ($#{$row1}, $#{$row2} + 1);
 400       } else {
 401         ($max, $min) = ($#{$row2}, $#{$row1} + 1);
 402       }
 403       return ($min .. $max);
 404     } else {
 405       return abs(@{$row1} - @{$row2});
 406     }
 407   }
 408
 409   my ($arr1, $arr2) = @_;
 410
 411   my @diffs = ();
 412
 413   foreach (0 .. $#{$arr1}) {
 414     next if keys %{$self->Skip} && $self->Skip->{$_};
 415
 416     my ($val1, $val2) = ($arr1->[$_], $arr2->[$_]);
 417     unless ($self->WhiteSpace) {
 418       $val1 =~ s/\s+/ /g;
 419       $val2 =~ s/\s+/ /g;
 420     }
 421
 422     unless ($self->Case) {
 423       $val1 = lc $val1;
 424       $val2 = lc $val2;
 425     }
 426
 427     push @diffs, $_ unless $val1 eq $val2;
 428   }
 429
 430   return wantarray ? @diffs : scalar @diffs;
 431 }
 432
 433 =head2 perm \@ARR1, \@ARR2
 434
 435 Check to see if one array is a permutation of the other (i.e. contains
 436 the same set of elements, but in a different order).
 437
 438 We do this by sorting the arrays and passing references to the assorted
 439 versions to simple_compare. There are also some small changes to
 440 simple_compare as it should ignore the Skip hash if we are called from
 441 perm.
 442
 443 =cut
 444
 445 sub perm {
 446   my $self = shift;
 447
 448   return $self->simple_compare([sort @{$_[0]}], [sort @{$_[1]}]);
 449 }
 450
 451 #
 452 # Attempt to be clever with object attributes.
 453 # Each object attribute is always accessed using an access method.
 454 # None of these access methods exist in the object code.
 455 # If an unknown method is called then the AUTOLOAD method is called
 456 # in its place with the same parameters and the variable $AUTOLOAD
 457 # set to the name of the unknown method.
 458 #
 459 # In this function we work out which method has been called and
 460 # simulate it by returning the correct attribute value (and setting
 461 # it to a new value if the method was passed a new value to use).
 462 #
 463 # We're also a little cleverer than that as we create a new method on
 464 # the fly so that the next time we call the missing method it has
 465 # magically sprung into existance, thereby avoiding the overhead of
 466 # calling AUTOLOAD more than once for each method called.
 467 #
 468 sub AUTOLOAD {
 469   no strict 'refs';
 470   my ($self, $val) = @_;
 471   my ($name) = $AUTOLOAD =~ m/.*::(\w*)/;
 472
 473   *{$AUTOLOAD} = sub { return @_ > 1 ?
 474                          $_[0]->{$name} = $_[1] :
 475                            $_[0]->{$name}};
 476
 477   return defined $val ? $self->{$name} = $val : $self->{$name};
 478 }
 479
 480 #
 481 # One (small) downside of the AUTOLOAD trick, is that we need to
 482 # explicitly define a DESTROY method to prevent Perl from passing
 483 # those calls to AUTOLOAD. In this case we don't need to do anything.
 484 #
 485 sub DESTROY { }
 486
 487 1;
 488 __END__
 489
 490 =head1 AUTHOR
 491
 492 Dave Cross <dave@mag-sol.com>
 493
 494 =head1 SEE ALSO
 495
 496 perl(1).
 497
 498 =head1 COPYRIGHT AND LICENSE
 499
 500 Copyright (C) 2000-2005, Magnum Solutions Ltd.  All Rights Reserved.
 501
 502 This script is free software; you can redistribute it and/or modify it
 503 under the same terms as Perl itself.
 504
 505 =cut