Subject: | Recognize <*> as being a glob operation, not a readline. |
It would be helpful in code analysis if PPI could distinguish between
uses of <> as a readline operator and uses of <> as a glob operator. The
perlop docs say that if the <> enclose nothing, a file handle, or a scalar.
Noodling around with perl -e seems to show that (at least under 5.8.8
and 5.10.0) whitespace or subscripts inside the <> cause it to be parsed
as a glob. That is, '< STDIN >' is a glob, and so is '<$foo[0]>'.
The attached svn diff represents a possible implementation. There are
test failures in svn revision 5506, but applying this diff introduces no
new ones.
I confess to believing the code around lib/PPI/Token/Whitespace.pm line
315 (post-patch, detecting a glob after a '}') should probably be looked
at extra closely for the possibility of false matches. False failures to
match simply duplicate the current behavior, which results in the glob
being misclassified as '<' and '>' operators.
Thanks for your time and attention,
Tom Wyant
Subject: | ppi_glob.patch |
Index: t/data/05_lexer/10_readline.dump
===================================================================
--- t/data/05_lexer/10_readline.dump (revision 5506)
+++ t/data/05_lexer/10_readline.dump (working copy)
@@ -49,6 +49,22 @@
PPI::Token::Whitespace ' '
PPI::Token::Symbol '@v'
PPI::Token::Operator '='
- PPI::Token::QuoteLike::Readline '<$up../*.v>'
+ PPI::Token::QuoteLike::Glob '<$up../*.v>'
PPI::Token::Structure ';'
PPI::Token::Whitespace '\n'
+ PPI::Statement::Variable
+ PPI::Token::Word 'my'
+ PPI::Token::Whitespace ' '
+ PPI::Token::Symbol '@v'
+ PPI::Token::Whitespace ' '
+ PPI::Token::Operator '='
+ PPI::Token::Whitespace ' '
+ PPI::Token::Word 'grep'
+ PPI::Token::Whitespace ' '
+ PPI::Structure::Block { ... }
+ PPI::Statement
+ PPI::Token::Regexp::Match 'm/foo/'
+ PPI::Token::Whitespace ' '
+ PPI::Token::QuoteLike::Glob '<$up../*.v>'
+ PPI::Token::Structure ';'
+ PPI::Token::Whitespace '\n'
Index: t/data/05_lexer/10_readline.code
===================================================================
--- t/data/05_lexer/10_readline.code (revision 5506)
+++ t/data/05_lexer/10_readline.code (working copy)
@@ -4,3 +4,4 @@
print while <>;
grep { /foo/ } <FOO>;
my @v=<$up../*.v>;
+my @v = grep {m/foo/} <$up../*.v>;
Index: lib/PPI.pm
===================================================================
--- lib/PPI.pm (revision 5506)
+++ lib/PPI.pm (working copy)
@@ -448,6 +448,7 @@
PPI::Token::QuoteLike::Regexp
PPI::Token::QuoteLike::Words
PPI::Token::QuoteLike::Readline
+ PPI::Token::QuoteLike::Glob
PPI::Token::Regexp
PPI::Token::Regexp::Match
PPI::Token::Regexp::Substitute
Index: lib/PPI/Token/QuoteLike/Glob.pm
===================================================================
--- lib/PPI/Token/QuoteLike/Glob.pm (revision 0)
+++ lib/PPI/Token/QuoteLike/Glob.pm (revision 0)
@@ -0,0 +1,64 @@
+package PPI::Token::QuoteLike::Glob;
+
+=pod
+
+=head1 NAME
+
+PPI::Token::QuoteLike::Glob - The glob quote-like operator
+
+=head1 INHERITANCE
+
+ PPI::Token::QuoteLike::Glob
+ isa PPI::Token::QuoteLike
+ isa PPI::Token
+ isa PPI::Element
+
+=head1 DESCRIPTION
+
+The C<glob> quote-like operator is used to scan a directory for matching
+files, as follows.
+
+ # Glob with wildcards
+ @files = <*.c>;
+=head1 METHODS
+
+There are no methods available for C<PPI::Token::QuoteLike::Glob>
+beyond those provided by the parent L<PPI::Token::QuoteLike>, L<PPI::Token>
+and L<PPI::Element> classes.
+
+Got any ideas for methods? Submit a report to rt.cpan.org!
+
+=cut
+
+use strict;
+use base 'PPI::Token::_QuoteEngine::Full',
+ 'PPI::Token::QuoteLike';
+
+use vars qw{$VERSION};
+BEGIN {
+ $VERSION = '1.204_01';
+}
+
+1;
+
+=pod
+
+=head1 SUPPORT
+
+See the L<support section|PPI/SUPPORT> in the main module.
+
+=head1 AUTHOR
+
+Adam Kennedy E<lt>adamk@cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2001 - 2008 Adam Kennedy.
+
+This program is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+The full text of the license can be found in the
+LICENSE file included with this module.
+
+=cut
Index: lib/PPI/Token/Whitespace.pm
===================================================================
--- lib/PPI/Token/Whitespace.pm (revision 5506)
+++ lib/PPI/Token/Whitespace.pm (working copy)
@@ -266,32 +266,56 @@
return 'Operator';
}
+ # If the next character is a word character or a '$', it
+ # is probably a readline, otherwise it is probably a
+ # glob.
+
+ my $line = substr( $t->{line}, $t->{line_cursor} );
+ my $probable_class;
+ if ( $next_char eq '$' ) {
+ $probable_class = $line =~ m/<\$\w+>/ ?
+ 'QuoteLike::Readline' : 'QuoteLike::Glob';
+ } elsif ( $next_char eq '>' ) {
+ $probable_class = 'QuoteLike::Readline';
+ } elsif ( $next_char =~ m/(?!\d)\w/ ) {
+ $probable_class = 'QuoteLike::Readline';
+ } else {
+ $probable_class = 'QuoteLike::Glob';
+ }
+
# The most common group of readlines are used like
# while ( <...> )
# while <>;
my $prec = $prev->content;
if ( $prev->isa('PPI::Token::Structure') and $prec eq '(' ) {
- return 'QuoteLike::Readline';
+ return $probable_class;
}
if ( $prev->isa('PPI::Token::Word') and $prec eq 'while' ) {
return 'QuoteLike::Readline';
}
+ if ( $prev->isa('PPI::Token::Word') and (
+ $prec eq 'for' or $prec eq 'foreach' ) ) {
+ return 'QuoteLike::Glob';
+ }
if ( $prev->isa('PPI::Token::Operator') and $prec eq '=' ) {
- return 'QuoteLike::Readline';
+ return $probable_class;
}
if ( $prev->isa('PPI::Token::Operator') and $prec eq ',' ) {
- return 'QuoteLike::Readline';
+ return $probable_class;
}
if ( $prev->isa('PPI::Token::Structure') and $prec eq '}' ) {
# Could go either way... do a regex check
# $foo->{bar} < 2;
# grep { .. } <foo>;
- my $line = substr( $t->{line}, $t->{line_cursor} );
if ( $line =~ /^<(?!\d)\w+>/ ) {
# Almost definitely readline
return 'QuoteLike::Readline';
}
+ if ( $line =~ /^<\s*(?![-+\d.]).*?>/ ) {
+ # Almost definitely glob
+ return 'QuoteLike::Glob';
+ }
}
# Otherwise, we guess operator, which has been the default up
Index: lib/PPI/Token/QuoteLike.pm
===================================================================
--- lib/PPI/Token/QuoteLike.pm (revision 5506)
+++ lib/PPI/Token/QuoteLike.pm (working copy)
@@ -15,7 +15,7 @@
=head1 DESCRIPTION
The C<PPI::Token::QuoteLike> class is never instantiated, and simply
-provides a common abstract base class for the five quote-like operator
+provides a common abstract base class for the six quote-like operator
classes. In PPI, a "quote-like" is the set of quote-like things that
exclude the string quotes and regular expressions.
@@ -33,6 +33,8 @@
=item <FOO> - L<PPI::Token::QuoteLike::Readline>
+=item <*.c> - L<PPI::Token::QuoteLike::Glob>
+
=back
The names are hopefully obvious enough not to have to explain what
Index: lib/PPI/Tokenizer.pm
===================================================================
--- lib/PPI/Tokenizer.pm (revision 5506)
+++ lib/PPI/Tokenizer.pm (working copy)
@@ -717,6 +717,7 @@
'PPI::Token::QuoteLike::Backtick' => 'operator',
'PPI::Token::QuoteLike::Command' => 'operator',
'PPI::Token::QuoteLike::Readline' => 'operator',
+ 'PPI::Token::QuoteLike::Glob' => 'operator',
'PPI::Token::QuoteLike::Regexp' => 'operator',
'PPI::Token::QuoteLike::Words' => 'operator',
);
Index: lib/PPI/Token.pm
===================================================================
--- lib/PPI/Token.pm (revision 5506)
+++ lib/PPI/Token.pm (working copy)
@@ -60,6 +60,7 @@
use PPI::Token::QuoteLike::Regexp ();
use PPI::Token::QuoteLike::Words ();
use PPI::Token::QuoteLike::Readline ();
+use PPI::Token::QuoteLike::Glob ();
use PPI::Token::Regexp::Match ();
use PPI::Token::Regexp::Substitute ();
use PPI::Token::Regexp::Transliterate ();