1 ##############################################################################
2 # $URL: http://perlcritic.tigris.org/svn/perlcritic/trunk/Perl-Critic/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm $
3 # $Date: 2008-07-03 10:19:10 -0500 (Thu, 03 Jul 2008) $
6 ##############################################################################
8 package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
14 use List::MoreUtils qw(none);
15 use Scalar::Util qw(refaddr);
17 use English qw(-no_match_vars);
20 use Perl::Critic::Utils qw{ :booleans :severities split_nodes_on_comma };
21 use Perl::Critic::Utils::PPIRegexp qw{ parse_regexp get_match_string get_substitute_string get_modifiers };
22 use base 'Perl::Critic::Policy';
24 our $VERSION = '1.088';
26 #-----------------------------------------------------------------------------
28 Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
29 Readonly::Scalar my $EXPL => [252];
31 #-----------------------------------------------------------------------------
33 sub supported_parameters { return qw() }
34 sub default_severity { return $SEVERITY_MEDIUM }
35 sub default_themes { return qw( core pbp maintenance ) }
36 sub applies_to { return qw(PPI::Token::Regexp::Match
37 PPI::Token::Regexp::Substitute) }
39 #-----------------------------------------------------------------------------
41 Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
44 my ( $self, $elem, undef ) = @_;
46 # optimization: don't bother parsing the regexp if there are no parens
47 return if $elem !~ m/[(]/xms;
49 my $re = parse_regexp($elem);
51 my $ncaptures = @{$re->captures};
52 return if 0 == $ncaptures;
54 my @captures; # List of expected captures
55 $#captures = $ncaptures - 1;
57 # Look for references to the capture in the regex itself
58 my $iter = $re->walker;
59 while (my $token = $iter->()) {
60 if ($token->isa('Regexp::Parser::ref')) {
61 my ($num) = $token->raw =~ m/ (\d+) /xms;
62 $captures[$num-1] = 1;
65 my $subst = get_substitute_string($elem);
68 # TODO: This is a quick hack. Really, we should parse the string. It could
69 # be false positive (s///e) or false negative (s/(.)/\$1/)
71 for my $num ($subst =~ m/\$(\d+)/xms) {
72 $captures[$num-1] = 1;
75 return if none {! defined $_} @captures;
77 my %modifiers = get_modifiers($elem);
79 $ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
80 $#captures = $ncaptures - 1;
83 return if _enough_assignments($elem, \@captures);
84 return if _is_in_slurpy_array_context($elem);
85 return if _enough_magic($elem, \@captures);
87 return $self->violation( $DESC, $EXPL, $elem );
90 sub _enough_assignments { ##no critic(ExcessComplexity) # TODO
91 my ($elem, $captures) = @_;
93 # look backward for the assignment operator
94 my $psib = $elem->sprevious_sibling;
98 if ($psib->isa('PPI::Token::Operator')) {
99 last SIBLING if q{=} eq $psib;
100 return if q{!~} eq $psib;
102 $psib = $psib->sprevious_sibling;
105 $psib = $psib->sprevious_sibling;
106 return if !$psib; # syntax error: '=' at the beginning of a statement???
108 if ($psib->isa('PPI::Token::Symbol')) {
113 return 1 if _symbol_is_slurpy($psib);
115 } elsif ($psib->isa('PPI::Structure::Block')) {
118 return 1 if _block_is_slurpy($psib);
120 } elsif ($psib->isa('PPI::Structure::List')) {
123 # ($foo,$bar) = m/(foo)(bar)/
124 # (@foo) = m/(foo)(bar)/
125 # ($foo,@foo) = m/(foo)(bar)/
126 # ($foo,@$foo) = m/(foo)(bar)/
127 # ($foo,@{$foo}) = m/(foo)(bar)/
129 my @args = $psib->schildren;
130 return 1 if !@args; # empty list (perhaps the "goatse" operator) is slurpy
132 # Forward looking: PPI might change in v1.200 so schild(0) is a PPI::Statement::Expression
133 if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
134 @args = $args[0]->schildren;
137 my @parts = split_nodes_on_comma(@args);
139 for my $i (0 .. $#parts) {
140 if (1 == @{$parts[$i]}) {
141 my $var = $parts[$i]->[0];
142 if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
143 return 1 if _has_array_sigil($var);
146 $captures->[$i] = 1; # ith evariable captures
150 return none {! defined $_} @{$captures};
153 sub _symbol_is_slurpy {
156 return 1 if _has_array_sigil($symbol);
157 return 1 if _has_hash_sigil($symbol);
158 return 1 if _is_preceded_by_array_or_hash_cast($symbol);
162 sub _has_array_sigil {
163 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
165 return q{@} eq substr $elem->content, 0, 1;
168 sub _has_hash_sigil {
169 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
171 return q{%} eq substr $elem->content, 0, 1;
174 sub _block_is_slurpy {
177 return 1 if _is_preceded_by_array_or_hash_cast($block);
181 sub _is_preceded_by_array_or_hash_cast {
183 my $psib = $elem->sprevious_sibling;
185 while ($psib && $psib->isa('PPI::Token::Cast')) {
187 $psib = $psib->sprevious_sibling;
190 my $sigil = substr $cast->content, 0, 1;
191 return q{@} eq $sigil || q{%} eq $sigil;
194 sub _is_in_slurpy_array_context {
197 # return true is the result of the regexp is passed to a subroutine.
198 # doesn't check for array context due to assignment.
200 # look backward for explict regex operator
201 my $psib = $elem->sprevious_sibling;
202 if ($psib && $psib eq q{=~}) {
203 # Track back through value
204 $psib = _skip_lhs($psib);
208 my $parent = $elem->parent;
210 if ($parent->isa('PPI::Statement')) {
211 $parent = $parent->parent;
214 return 1 if $parent->isa('PPI::Structure::List');
215 return 1 if $parent->isa('PPI::Structure::Constructor');
216 if ($parent->isa('PPI::Structure::Block')) {
217 return 1 if refaddr($elem->statement) eq refaddr([$parent->schildren]->[-1]);
221 if ($psib->isa('PPI::Token::Operator')) {
222 # most operators kill slurpiness (except assignment, which is handled elsewhere)
223 return 1 if q{,} eq $psib;
232 # TODO: better implementation to handle casts, expressions, subcalls, etc.
233 $elem = $elem->sprevious_sibling;
239 my ($elem, $captures) = @_;
241 _check_for_magic($elem, $captures);
243 return none {! defined $_} @{$captures};
247 sub _check_for_magic {
248 my ($elem, $captures) = @_;
250 # Search for $1..$9 in :
251 # * the rest of this statement
252 # * subsequent sibling statements
253 # * if this is in a conditional boolean, the if/else bodies of the conditional
254 # * if this is in a while/for condition, the loop body
255 # But NO intervening regexps!
257 return if ! _check_rest_of_statement($elem, $captures);
259 my $parent = $elem->parent;
260 while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
261 return if ! _check_rest_of_statement($parent, $captures);
262 $parent = $parent->parent;
268 # false if we hit another regexp
269 sub _check_rest_of_statement {
270 my ($elem, $captures) = @_;
272 my $nsib = $elem->snext_sibling;
274 return if $nsib->isa('PPI::Token::Regexp');
275 if ($nsib->isa('PPI::Node')) {
276 return if ! _check_node_children($nsib, $captures);
278 _mark_magic($nsib, $captures);
280 $nsib = $nsib->snext_sibling;
285 # false if we hit another regexp
286 sub _check_node_children {
287 my ($elem, $captures) = @_;
289 # caveat: this will descend into subroutine definitions...
291 for my $child ($elem->schildren) {
292 return if $child->isa('PPI::Token::Regexp');
293 if ($child->isa('PPI::Node')) {
294 return if ! _check_node_children($child, $captures);
296 _mark_magic($child, $captures);
303 my ($elem, $captures) = @_;
305 # Record if we see $1, $2, $3, ...
307 if ($elem->isa('PPI::Token::Magic') && $elem =~ m/\A \$ (\d+) /xms) {
309 if (0 < $num) { # don't mark $0
310 # Only mark the captures we really need -- don't mark superfluous magic vars
311 if ($num <= @{$captures}) {
312 $captures->[$num-1] = 1;
323 #-----------------------------------------------------------------------------
327 =for stopwords refactored
331 Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
335 This Policy is part of the core L<Perl::Critic> distribution.
340 Perl regular expressions have multiple types of grouping syntax. The basic
341 parentheses (e.g. C<m/(foo)/>) captures into the magic variable C<$1>.
342 Non-capturing groups (e.g. C<m/(?:foo)/> are useful because they have better
343 runtime performance and do not copy strings to the magic global capture
346 It's also easier on the maintenance programmer if you consistently use
347 capturing vs. non-capturing groups, because that programmer can tell more
348 easily which regexps can be refactored without breaking surrounding code which
349 may use the captured values.
354 This Policy is not configurable except for the standard options.
359 =head2 Regexp::Parser
361 We use L<Regexp::Parser> to analyze the regular expression syntax. This is an
362 optional module for Perl::Critic, so it will not be automatically installed by
363 CPAN for you. If you wish to use this policy, you must install that module
366 =head2 C<qr//> interpolation
368 This policy can be confused by interpolation of C<qr//> elements, but those
369 are always false negatives. For example:
371 my $foo_re = qr/(foo)/;
372 my ($foo) = m/$foo_re (bar)/x;
374 A human can tell that this should be a violation because there are two
375 captures but only the first capture is used, not the second. The policy only
376 notices that there is one capture in the regexp and remains happy.
380 Initial development of this policy was supported by a grant from the Perl Foundation.
384 Chris Dolan <cdolan@cpan.org>
388 Copyright (c) 2007-2008 Chris Dolan. Many rights reserved.
390 This program is free software; you can redistribute it and/or modify
391 it under the same terms as Perl itself. The full text of this license
392 can be found in the LICENSE file included with this module
398 # cperl-indent-level: 4
400 # indent-tabs-mode: nil
401 # c-indentation-style: bsd
403 # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :