1 package Text::Diff::Table;
5 Text::Diff::Table - Text::Diff plugin to generate "table" format output
11 diff \@a, $b { STYLE => "Table" };
15 This is a plugin output formatter for Text::Diff that generates "table" style
18 +--+----------------------------------+--+------------------------------+
19 | |../Test-Differences-0.2/MANIFEST | |../Test-Differences/MANIFEST |
20 | |Thu Dec 13 15:38:49 2001 | |Sat Dec 15 02:09:44 2001 |
21 +--+----------------------------------+--+------------------------------+
23 | 1|Differences.pm | 2|Differences.pm |
24 | 2|MANIFEST | 3|MANIFEST |
25 | | * 4|MANIFEST.SKIP *
26 | 3|Makefile.PL | 5|Makefile.PL |
27 | | * 6|t/00escape.t *
28 | 4|t/00flatten.t | 7|t/00flatten.t |
29 | 5|t/01text_vs_data.t | 8|t/01text_vs_data.t |
30 | 6|t/10test.t | 9|t/10test.t |
31 +--+----------------------------------+--+------------------------------+
33 This format also goes to some pains to highlight "invisible" characters on
34 differing elements by selectively escaping whitespace. Each element is split
35 in to three segments (leading whitespace, body, trailing whitespace). If
36 whitespace differs in a segement, that segment is whitespace escaped.
38 Here is an example of the selective whitespace.
40 +--+--------------------------+--------------------------+
41 | |demo_ws_A.txt |demo_ws_B.txt |
42 | |Fri Dec 21 08:36:32 2001 |Fri Dec 21 08:36:50 2001 |
43 +--+--------------------------+--------------------------+
44 | 1|identical |identical |
45 * 2| spaced in | also spaced in *
46 * 3|embedded space |embedded tab *
47 | 4|identical |identical |
48 * 5| spaced in |\ttabbed in *
49 * 6|trailing spaces\s\s\n |trailing tabs\t\t\n *
50 | 7|identical |identical |
51 * 8|lf line\n |crlf line\r\n *
52 * 9|embedded ws |embedded\tws *
53 +--+--------------------------+--------------------------+
55 Here's why the lines do or do not have whitespace escaped:
59 =item lines 1, 4, 7 don't differ, no need.
61 =item lines 2, 3 differ in non-whitespace, no need.
63 =item lines 5, 6, 8, 9 all have subtle ws changes.
67 Whether or not line 3 should have that tab character escaped is a judgement
68 call; so far I'm choosing not to.
72 @ISA = qw( Text::Diff::Base Exporter );
73 @EXPORT_OK = qw( expand_tabs );
82 $_ eq '"' || $_ eq '$' ? qq{'$_'}
83 : $_ eq "\\" ? qq{"\\\\"}
88 map( sprintf( "\\x%02x", $_ ), ( 0..31, 127..255 ) ),
89 # map( "\\c$_", "A".."Z"),
90 "\\t", "\\n", "\\r", "\\f", "\\b", "\\a", "\\e"
91 ## NOTE: "\\\\" is not here because some things are explicitly
92 ## escaped before escape() is called and we don't want to
93 ## double-escape "\". Also, in most texts, leaving "\" more
94 ## readable makes sense.
101 $s =~ s{(\t)(\t*)|([^\t]+)}{
103 my $spaces = " " x ( 8 - $count % 8 + 8 * length $2 );
117 sub trim_trailing_line_ends($) {
119 $s =~ s/[\r\n]+(?!\n)$//;
126 ## use utf8 if available. don't if not.
127 my $escaper = <<'EOCODE' ;
134 : sprintf( "\\x{%04x}", $_ ) ;
140 unless ( eval $escaper ) {
141 $escaper =~ s/ *use *utf8 *;\n// or die "Can't drop use utf8;";
142 eval $escaper or die $@;
149 return bless { @_ }, $proto
152 my $missing_elt = [ "", "" ];
156 my @seqs = ( shift, shift );
157 my $ops = shift; ## Leave sequences in @_[0,1]
162 my $opcode = $_->[Text::Diff::OPCODE()];
163 if ( $opcode eq " " ) {
164 push @A, $missing_elt while @A < @B;
165 push @B, $missing_elt while @B < @A;
167 push @A, [ $_->[0] + ( $options->{OFFSET_A} || 0), $seqs[0][$_->[0]] ]
168 if $opcode eq " " || $opcode eq "-";
169 push @B, [ $_->[1] + ( $options->{OFFSET_B} || 0), $seqs[1][$_->[1]] ]
170 if $opcode eq " " || $opcode eq "+";
173 push @A, $missing_elt while @A < @B;
174 push @B, $missing_elt while @B < @A;
177 my ( $A, $B ) = (shift @A, shift @B );
179 ## Do minimal cleaning on identical elts so these look "normal":
180 ## tabs are expanded, trailing newelts removed, etc. For differing
181 ## elts, make invisible characters visible if the invisible characters
183 my $elt_type = $B == $missing_elt ? "A" :
184 $A == $missing_elt ? "B" :
185 $A->[1] eq $B->[1] ? "="
187 if ( $elt_type ne "*" ) {
188 if ( $elt_type eq "=" || $A->[1] =~ /\S/ || $B->[1] =~ /\S/ ) {
189 $A->[1] = escape trim_trailing_line_ends expand_tabs $A->[1];
190 $B->[1] = escape trim_trailing_line_ends expand_tabs $B->[1];
193 $A->[1] = escape $A->[1];
194 $B->[1] = escape $B->[1];
198 ## not using \z here for backcompat reasons.
199 $A->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
200 my ( $l_ws_A, $body_A, $t_ws_A ) = ( $1, $2, $3 );
201 $body_A = "" unless defined $body_A;
202 $B->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
203 my ( $l_ws_B, $body_B, $t_ws_B ) = ( $1, $2, $3 );
204 $body_B = "" unless defined $body_B;
208 if ( $l_ws_A ne $l_ws_B ) {
209 ## Make leading tabs visible. Other non-' ' chars
210 ## will be dealt with in escape(), but this prevents
211 ## tab expansion from hiding tabs by making them
213 $added_escapes = 1 if $l_ws_A =~ s/\t/\\t/g;
214 $added_escapes = 1 if $l_ws_B =~ s/\t/\\t/g;
217 if ( $t_ws_A ne $t_ws_B ) {
218 ## Only trailing whitespace gets the \s treatment
219 ## to make it obvious what's going on.
220 $added_escapes = 1 if $t_ws_A =~ s/ /\\s/g;
221 $added_escapes = 1 if $t_ws_B =~ s/ /\\s/g;
222 $added_escapes = 1 if $t_ws_A =~ s/\t/\\t/g;
223 $added_escapes = 1 if $t_ws_B =~ s/\t/\\t/g;
226 $t_ws_A = $t_ws_B = "";
229 my $do_tab_escape = $added_escapes || do {
230 my $expanded_A = expand_tabs join( $body_A, $l_ws_A, $t_ws_A );
231 my $expanded_B = expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
232 $expanded_A eq $expanded_B;
235 my $do_back_escape = $do_tab_escape || do {
236 my ( $unescaped_A, $escaped_A,
237 $unescaped_B, $escaped_B
240 join( "", /(\\.)/g ),
244 expand_tabs join( $body_A, $l_ws_A, $t_ws_A ),
245 expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
246 $unescaped_A ne $unescaped_B && $escaped_A eq $escaped_B;
249 if ( $do_back_escape ) {
250 $body_A =~ s/\\/\\\\/g;
251 $body_B =~ s/\\/\\\\/g;
254 my $line_A = join $body_A, $l_ws_A, $t_ws_A;
255 my $line_B = join $body_B, $l_ws_B, $t_ws_B;
257 unless ( $do_tab_escape ) {
258 $line_A = expand_tabs $line_A;
259 $line_B = expand_tabs $line_B;
262 $A->[1] = escape $line_A;
263 $B->[1] = escape $line_B;
266 push @elts, [ @$A, @$B, $elt_type ];
270 push @{$self->{ELTS}}, @elts, ["bar"];
282 my @seqs = (shift,shift);
287 if ( defined $options->{FILENAME_A} || defined $options->{FILENAME_B} ) {
288 push @heading_lines, [
291 ( "", escape( defined $_ ? $_ : "<undef>" ) );
293 ( @{$options}{qw( FILENAME_A FILENAME_B)} )
299 if ( defined $options->{MTIME_A} || defined $options->{MTIME_B} ) {
300 push @heading_lines, [
304 ( defined $_ && length $_ )
310 @{$options}{qw( MTIME_A MTIME_B )}
316 if ( defined $options->{INDEX_LABEL} ) {
317 push @heading_lines, [ "", "", "", "", "=" ] unless @heading_lines;
318 $heading_lines[-1]->[0] = $heading_lines[-1]->[2] =
319 $options->{INDEX_LABEL};
322 ## Not ushifting on to @{$self->{ELTS}} in case it's really big. Want
323 ## to avoid the overhead.
325 my $four_column_mode = 0;
326 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
327 next if $cols->[-1] eq "bar";
328 if ( $cols->[0] ne $cols->[2] ) {
329 $four_column_mode = 1;
334 unless ( $four_column_mode ) {
335 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
336 next if $cols->[-1] eq "bar";
342 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
343 next if $cols->[-1] eq "bar";
344 for my $i (0..($#$cols-1)) {
345 $w[$i] = length $cols->[$i]
346 if defined $cols->[$i] && length $cols->[$i] > $w[$i];
350 my %fmts = $four_column_mode
352 "=" => "| %$w[0]s|%-$w[1]s | %$w[2]s|%-$w[3]s |\n",
353 "A" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s |\n",
354 "B" => "| %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
355 "*" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
358 "=" => "| %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
359 "A" => "* %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
360 "B" => "| %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
361 "*" => "* %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
364 $fmts{bar} = sprintf $fmts{"="}, "", "", "", "" ;
365 $fmts{bar} =~ s/\S/+/g;
366 $fmts{bar} =~ s/ /-/g;
369 sprintf( $fmts{$_->[-1]}, @$_ )
373 @heading_lines ? ["bar"] : (),
378 @{$self->{ELTS}} = [];
384 Table formatting requires buffering the entire diff in memory in order to
385 calculate column widths. This format should only be used for smaller
388 Assumes tab stops every 8 characters, as $DIETY intended.
390 Assumes all character codes >= 127 need to be escaped as hex codes, ie that the
391 user's terminal is ASCII, and not even "high bit ASCII", capable. This can be
392 made an option when the need arises.
394 Assumes that control codes (character codes 0..31) that don't have slash-letter
395 escapes ("\n", "\r", etc) in Perl are best presented as hex escapes ("\x01")
396 instead of octal ("\001") or control-code ("\cA") escapes.
400 Barrie Slaymaker <barries@slaysys.com>
404 Copyright 2001 Barrie Slaymaker, All Rights Reserved.
406 You may use this software under the terms of the GNU public license, any
407 version, or the Artistic license.