1 ##############################################################################
2 # $URL: http://perlcritic.tigris.org/svn/perlcritic/trunk/Perl-Critic/lib/Perl/Critic/Utils/PPIRegexp.pm $
3 # $Date: 2008-07-03 10:19:10 -0500 (Thu, 03 Jul 2008) $
6 ##############################################################################
8 package Perl::Critic::Utils::PPIRegexp;
14 use English qw(-no_match_vars);
22 our $VERSION = '1.088';
24 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
44 eval { require Regexp::Parser; } or return;
46 my $re = get_match_string($elem);
47 return if !defined $re;
49 # Are there any external regexp modifiers? If so, embed the ones
50 # that matter before parsing.
51 my %modifiers = get_modifiers($elem);
52 my $mods = join q{}, map {$modifiers{$_} ? $_ : q{}} qw(i m x s);
57 my $parser = Regexp::Parser->new;
58 # If we can't parse the regexp, don't return a parse tree
60 local $SIG{__WARN__} = sub {}; # blissful silence...
61 return if ! $parser->regex($re);
67 #-----------------------------------------------------------------------------
69 sub get_match_string {
71 return if !$elem->{sections};
72 my $section = $elem->{sections}->[0];
74 return substr $elem->content, $section->{position}, $section->{size};
77 #-----------------------------------------------------------------------------
79 sub get_substitute_string {
81 return if !$elem->{sections};
82 my $section = $elem->{sections}->[1];
84 return substr $elem->content, $section->{position}, $section->{size};
87 #-----------------------------------------------------------------------------
91 return if !$elem->{modifiers};
92 return %{ $elem->{modifiers} };
95 #-----------------------------------------------------------------------------
99 return if !$elem->{sections};
101 if (!$elem->{sections}->[0]->{type}) {
102 # PPI v1.118 workaround: the delimiters were not recorded in some cases
103 # hack: pull them out ourselves
104 # limitation: this regexp fails on s{foo}<bar>
105 my $operator = defined $elem->{operator} ? $elem->{operator} : q{};
106 @delimiters = join q{}, $elem =~ m/\A $operator (.).*?(.) (?:[xmsocgie]*) \z/mx;
108 @delimiters = ($elem->{sections}->[0]->{type});
109 if ($elem->{sections}->[1]) {
110 push @delimiters, $elem->{sections}->[1]->{type} || $delimiters[0];
116 #-----------------------------------------------------------------------------
119 ## This nastiness is to auto-vivify PPI packages from Regexp::Parser classes
121 # Track which ones are already created
122 my %seen = ('Regexp::Parser::__object__' => 1);
124 sub _get_ppi_package {
125 my ($src_class, $re_node) = @_;
126 (my $dest_class = $src_class) =~ s/\A Regexp::Parser::/Perl::Critic::PPIRegexp::/mx;
127 if (!$seen{$src_class}) {
128 $seen{$src_class} = 1;
129 croak 'Regexp node which is not in the Regexp::Parser namespace'
130 if $dest_class eq $src_class;
131 my $src_isa_name = $src_class . '::ISA';
132 my $dest_isa_name = $dest_class . '::ISA';
134 for my $isa (eval "\@$src_isa_name") { ##no critic(Eval)
135 my $dest_isa = _get_ppi_package($isa, $re_node);
136 push @isa, $dest_isa;
138 eval "\@$dest_isa_name = qw(@isa)"; ##no critic(Eval)
139 croak $EVAL_ERROR if $EVAL_ERROR;
145 Readonly::Scalar my $NO_DEPTH_USED => -1;
151 # walk the Regexp::Parser tree, converting to PPI nodes as we go
153 my $ppire = PPI::Node->new;
154 my @stack = ($ppire);
155 my $iter = $re->walker;
156 my $last_depth = $NO_DEPTH_USED;
157 while (my ($node, $depth) = $iter->()) {
158 if ($last_depth > $depth) { # -> parent
159 # walker() creates pseudo-closing nodes for reasons I don't understand
160 while ($last_depth-- > $depth) {
164 my $src_class = ref $node;
165 my $ppipkg = _get_ppi_package($src_class, $node);
166 my $ppinode = $ppipkg->new($node);
167 if ($last_depth == $depth) { # -> sibling
168 $stack[-1] = $ppinode;
170 push @stack, $ppinode;
172 $stack[-2]->add_element($ppinode); ## no critic qw(MagicNumbers)
174 $last_depth = $depth;
180 package ## no critic (ProhibitMultiplePackages) # hide from PAUSE
181 Perl::Critic::PPIRegexp::__object__;
182 use base 'PPI::Node';
184 # Base wrapper class for PPI versions of Regexp::Parser classes
186 # This is a hack because we call everything PPI::Node instances instead of
187 # PPI::Token instances. One downside is that PPI::Dumper doesn't work on
191 my ($class, $re_node) = @_;
192 my $self = $class->SUPER::new();
193 $self->{_re} = $re_node;
198 return $self->{_re}->visual;
210 #-----------------------------------------------------------------------------
218 Perl::Critic::Utils::PPIRegexp - Utility functions for dealing with PPI regexp tokens.
222 use Perl::Critic::Utils::PPIRegexp qw(:all);
224 my $doc = PPI::Document->new(\'m/foo/');
225 my $elem = $doc->find('PPI::Token::Regexp::Match')->[0];
226 print get_match_string($elem); # yields 'foo'
230 As of PPI v1.1xx, the PPI regexp token classes
231 (L<PPI::Token::Regexp::Match>, L<PPI::Token::Regexp::Substitute> and
232 L<PPI::Token::QuoteLike::Regexp>) has a very weak interface, so it is
233 necessary to dig into internals to learn anything useful. This
234 package contains subroutines to encapsulate that excess intimacy. If
235 future versions of PPI gain better accessors, this package will start
238 =head1 IMPORTABLE SUBS
242 =item C<parse_regexp( $token )>
244 Parse the regexp token with L<Regexp::Parser>. If that module is not
245 available or if there is a parse error, returns undef. If a parse success,
246 returns a Regexp::Parser instance that can be used to walk the regexp object
249 CAVEAT: This method pays special attention to the C<x> modifier to the regexp.
250 If present, we wrap the regexp string in C<(?x:...)> to ensure a proper parse.
251 This does change the object model though.
253 Someday if PPI gets native Regexp support, this method may become deprecated.
255 =item C<ppiify( $regexp )>
257 Given a L<Regexp::Parser> instance (perhaps as returned from C<parse_regexp>)
258 convert it to a tree of L<PPI::Node> instances. This is useful because PPI
259 has a more familiar and powerful programming model than the Regexp::Parser
262 Someday if PPI gets native Regexp support, this method may become a no-op.
264 =item C<get_match_string( $token )>
266 Returns the match portion of the regexp or undef if the specified
267 token is not a regexp. Examples:
269 m/foo/; # yields 'foo'
270 s/foo/bar/; # yields 'foo'
271 / \A a \z /xms; # yields ' \\A a \\z '
272 qr{baz}; # yields 'baz'
274 =item C<get_substitute_string( $token )>
276 Returns the substitution portion of a search-and-replace regexp or
277 undef if the specified token is not a valid regexp. Examples:
279 m/foo/; # yields undef
280 s/foo/bar/; # yields 'bar'
282 =item C<get_modifiers( $token )>
284 Returns a hash containing booleans for the modifiers of the regexp, or
285 undef if the token is not a regexp.
287 /foo/xms; # yields (m => 1, s => 1, x => 1)
289 qr/foo/i; # yields (i => 1)
291 =item C<get_delimiters( $token )>
293 Returns one (or two for a substitution regexp) two-character strings
294 indicating the delimiters of the regexp, or an empty list if the token is not
295 a regular expression token. For example:
297 m/foo/; # yields ('//')
298 m#foo#; # yields ('##')
299 m<foo>; # yields ('<>')
300 s/foo/bar/; # yields ('//', '//')
301 s{foo}{bar}; # yields ('{}', '{}')
302 s{foo}/bar/; # yields ('{}', '//') valid, but yuck!
303 qr/foo/; # yields ('//')
309 Chris Dolan <cdolan@cpan.org>
313 Copyright (c) 2007-2008 Chris Dolan. Many rights reserved.
315 This program is free software; you can redistribute it and/or modify
316 it under the same terms as Perl itself. The full text of this license
317 can be found in the LICENSE file included with this module.
323 # cperl-indent-level: 4
325 # indent-tabs-mode: nil
326 # c-indentation-style: bsd
328 # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :