Subject: | Access to parent subrule results |
In my dabblings with Regexp::Grammars I found myself wanting to be able
to use backrefs (ie. <\IDENT>) to access the results of a subrule in the
parent rule, rather than the current rule.
In Parse::RecDescent, this is possible by using subrule arguments, but
it doesn't seem that something like this is supported in
Regexp::Grammars.
I've attached a patch (for 1.005) in which I've added the former
behavior using a new subrule modifier '^', so that <^\IDENT> will get
the result of <IDENT> from the parent rule, and <^^\IDENT> gets the
result from the grandparent rule (and so on).
This patch isn't exactly comprehensive... I haven't spent any time
adding debugging or error handling support for the new modifier - perl
throws an exception if you attempt to reach too high.
Perhaps this feature or something like it could be considered for a
future version of Regexp::Grammars.
Subject: | parent_backref.diff |
=== added file 'demo/demo_indented_blocks.pl'
--- demo/demo_indented_blocks.pl 1970-01-01 00:00:00 +0000
+++ demo/demo_indented_blocks.pl 2010-09-17 04:52:35 +0000
@@ -0,0 +1,72 @@
+use v5.10;
+use warnings;
+
+use Data::Dumper qw< Dumper >;
+use Regexp::Grammars;
+
+#
+# The following grammar matches indented lines.
+# Try it out with any indented text, from eg. ifconfig.
+#
+
+my $grammar = qr{
+ # Match all groups
+ \A <[group]>+
+
+ # Match a variable amount of whitespace
+ # - stored privately, so that it won't be included in the match result
+ <token: _indent>
+ \s*
+
+ # Match a sequence beginning and ending with a non-whitespace character that is
+ # followed by some whitespace and a newline
+ # - don't include the trailing whitespace and newline in the match result
+ <token: data>
+ <MATCH=(?: \S (?: .* \S )? )> \s* \n
+
+ # Match a line containing data, optionally followed by a group with deeper indentation
+ # - this keeps any nested groups associated with their preceding line
+ <token: line>
+ <data>
+ (?:
+ (?= <^\_indent> \s ) <group>
+ )?
+
+ # Match a series of lines on the same indent level
+ # - any nested groups (with deeper indentation) will be captured by the <line> token
+ <token: group>
+ <_indent>
+ <[line]> ** <\_indent>
+
+}x;
+
+sub clean_match ($) {
+ my $match = shift;
+
+ return $match unless ref $match;
+
+ my @children;
+
+ given (ref $match) {
+ when ('HASH') {
+ delete $match->{''};
+ push @children, values %$match;
+ }
+ when ('ARRAY') {
+ push @children, @$match;
+ }
+ default {
+ warn 'unhandled ref type: ' . ref $match;
+ }
+ }
+
+ clean_match($_) for @children;
+
+ return $match;
+}
+
+undef $/;
+my $text = <>;
+$Data::Dumper::Indent = 1;
+$Data::Dumper::Sortkeys = 1;
+print Dumper clean_match \%/ if $text =~ $grammar;
=== modified file 'lib/Regexp/Grammars.pm'
--- lib/Regexp/Grammars.pm 2010-09-16 12:34:29 +0000
+++ lib/Regexp/Grammars.pm 2010-09-17 04:51:05 +0000
@@ -1239,13 +1239,13 @@
\[ (?<alias>(?&IDENT)) \s* = \s* (?<varname>(?&HASH)) \s* (?<keypat>(?&BRACES))? \s* \]
)
| (?<backref>
- \s* (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s*
+ \s* (?<carat> \^*) (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s*
)
| (?<alias_backref>
- (?<alias>(?&IDENT)) \s* = \s* (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s*
+ (?<alias>(?&IDENT)) \s* = \s* (?<carat> \^*) (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s*
)
| (?<alias_backref_list>
- \[ (?<alias>(?&IDENT)) \s* = \s* (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s* \]
+ \[ (?<alias>(?&IDENT)) \s* = \s* (?<carat> \^*) (?<slash> \\ | /) (?<subrule>(?&QUALIDENT)) \s* \]
)
|
(?<minimize_directive>
@@ -1384,7 +1384,8 @@
);
}
elsif ($+{backref} || $+{alias_backref} || $+{alias_backref_list}) {
- my $backref = qq{\$Regexp::Grammars::RESULT_STACK[-1]{'$+{subrule}'}};
+ my $level = -1 * (length($+{carat}) + 1);
+ my $backref = qq{\$Regexp::Grammars::RESULT_STACK[$level]{'$+{subrule}'}};
my $quoter = $+{slash} eq '\\'
? "quotemeta($backref)"
: "Regexp::Grammars::_invert_delim($backref)"