1 ##############################################################################
2 # $URL: http://perlcritic.tigris.org/svn/perlcritic/trunk/Perl-Critic/lib/Perl/Critic/Policy/RegularExpressions/ProhibitEnumeratedClasses.pm $
3 # $Date: 2008-07-03 10:19:10 -0500 (Thu, 03 Jul 2008) $
6 ##############################################################################
8 package Perl::Critic::Policy::RegularExpressions::ProhibitEnumeratedClasses;
15 use English qw(-no_match_vars);
16 use List::MoreUtils qw(all);
19 use Perl::Critic::Utils qw{ :booleans :severities hashify };
20 use Perl::Critic::Utils::PPIRegexp qw{ ppiify parse_regexp get_modifiers };
22 use base 'Perl::Critic::Policy';
24 our $VERSION = '1.088';
26 #-----------------------------------------------------------------------------
28 Readonly::Scalar my $DESC => q{Use named character classes};
29 Readonly::Scalar my $EXPL => [248];
31 Readonly::Array my @PATTERNS => ( # order matters: most to least specific
32 [q{ },'\\t','\\r','\\n'] => ['\\s', '\\S'], ##no critic (Interpolation)
33 ['A-Z','a-z','_'] => ['\\w', '\\W'],
34 ['A-Z','a-z'] => ['[[:alpha:]]','[[:^alpha:]]'],
35 ['A-Z'] => ['[[:upper:]]','[[:^upper:]]'],
36 ['a-z'] => ['[[:lower:]]','[[:^lower:]]'],
37 ['0-9'] => ['\\d','\\D'],
38 ['\w'] => [undef, '\\W'],
39 ['\s'] => [undef, '\\S'],
42 #-----------------------------------------------------------------------------
44 sub supported_parameters { return qw() }
45 sub default_severity { return $SEVERITY_LOWEST }
46 sub default_themes { return qw( core pbp cosmetic unicode ) }
47 sub applies_to { return qw(PPI::Token::Regexp::Match
48 PPI::Token::Regexp::Substitute
49 PPI::Token::QuoteLike::Regexp) }
51 #-----------------------------------------------------------------------------
54 my ( $self, $elem, undef ) = @_;
56 # optimization: don't bother parsing the regexp if there are no character classes
57 return if $elem !~ m/\[/xms;
59 my $re = ppiify(parse_regexp($elem));
62 # Must pass a sub to find() because our node classes don't start with PPI::
63 my $anyofs = $re->find(sub {$_[1]->isa('Perl::Critic::PPIRegexp::anyof')});
65 for my $anyof (@{$anyofs}) {
66 my $violation = $self->_get_character_class_violations($elem, $anyof);
67 return $violation if $violation;
72 sub _get_character_class_violations {
73 my ($self, $elem, $anyof) = @_;
76 for my $element ($anyof->children) {
77 if ($element->isa('Perl::Critic::PPIRegexp::exact')) {
78 my @tokens = split m/(\\.[^\\]*)/xms, $element->content;
79 for my $token (map { split m/\A (\\[nrf])/xms, _fixup($_); } @tokens) { ##no critic(Comma) ## TODO: FALSE POSITIVE
80 $elements{$token} = 1;
82 } elsif ($element->isa('Perl::Critic::PPIRegexp::anyof_char') ||
83 $element->isa('Perl::Critic::PPIRegexp::anyof_range') ||
84 $element->isa('Perl::Critic::PPIRegexp::anyof_class')) {
85 for my $token (split m/\A (\\[nrf])/xms, _fixup($element->content)) {
86 $elements{$token} = 1;
89 # no known way to get to this branch; just for forward compatibility
90 carp 'Unexpected type inside a character class: ' . (ref $element) . " '$element'";
93 for (my $i = 0; $i < @PATTERNS; $i += 2) { ##no critic (CStyleForLoop)
94 if (all { exists $elements{$_} } @{$PATTERNS[$i]}) {
95 my $neg = $anyof->re->neg;
96 my $improvement = $PATTERNS[$i + 1]->[$neg ? 1 : 0];
97 next if !defined $improvement;
99 if ($neg && ! defined $PATTERNS[$i + 1]->[0]) {
100 # the [^\w] => \W rule only applies if \w is the only token.
101 # that is it does not apply to [^\w\s]
102 next if 1 != scalar keys %elements;
105 my $orig = join q{}, '[', ($neg ? q{^} : ()), @{$PATTERNS[$i]}, ']';
106 return $self->violation( $DESC . " ($orig vs. $improvement)", $EXPL, $elem );
113 Readonly::Hash my %HEX => ( # Note: this is ASCII specific!
114 '0a' => '\\n', ##no critic (Interpolation)
115 '0c' => '\\f', ##no critic (Interpolation)
116 '0d' => '\\r', ##no critic (Interpolation)
123 $chars =~ s/\A \\x([\da-fA-F]{2})/\\x{$1}/gxms;
126 $chars =~ s/\A \\[ ]/ /gxms;
129 $chars =~ s/\A \\0([0-7]{2})/'\\x{'.(sprintf "%02x", oct $1).'}'/egxms;
132 $chars =~ s/\A (\\x [{] ([\da-fA-F]+) [}] ) /exists $HEX{$2} ? $HEX{$2} : $1/egxms;
141 #-----------------------------------------------------------------------------
147 Perl::Critic::Policy::RegularExpressions::ProhibitEnumeratedClasses - Use named character classes instead of explicit character lists.
151 This Policy is part of the core L<Perl::Critic> distribution.
156 This policy is not for everyone! If you are working in pure ASCII,
157 then disable it now or you may see some false violations.
159 On the other hand many of us are working in a multilingual world with
160 an extended character set, probably Unicode. In that world, patterns
161 like C<m/[A-Z]/> can be a source of bugs when you really meant
162 C<m/\p{IsUpper}/>. This policy catches a selection of possible
163 incorrect character class usage.
165 Specifically, the patterns are:
167 B<C<[\t\r\n\f\ ]>> vs. B<C<\s>>
169 B<C<[\t\r\n\ ]>> vs. B<C<\s>> (because many people forget C<\f>)
171 B<C<[A-Za-z_]>> vs. B<C<\w>>
173 B<C<[A-Za-z]>> vs. B<C<\p{IsAlphabetic}>>
175 B<C<[A-Z]>> vs. B<C<\p{IsUpper}>>
177 B<C<[a-z]>> vs. B<C<\p{IsLower}>>
179 B<C<[0-9]>> vs. B<C<\d>>
181 B<C<[^\w]>> vs. B<C<\W>>
183 B<C<[^\s]>> vs. B<C<\S>>
188 This Policy is not configurable except for the standard options.
193 Initial development of this policy was supported by a grant from the Perl Foundation.
197 Chris Dolan <cdolan@cpan.org>
201 Copyright (c) 2007-2008 Chris Dolan. Many rights reserved.
203 This program is free software; you can redistribute it and/or modify
204 it under the same terms as Perl itself. The full text of this license
205 can be found in the LICENSE file included with this module
211 # cperl-indent-level: 4
213 # indent-tabs-mode: nil
214 # c-indentation-style: bsd
216 # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :