Subject: | [^\]] is not treated as a charset |
The parser was accepting [^]] but not [^\]]. Needless to say, this was
very confusing. I've attached a test and patch. All the existing tests
pass with this patch.
Subject: | regexp-grammars.patch |
diff -ruN Regexp-Grammars-1.001005/lib/Regexp/Grammars.pm Regexp-Grammars-1.001005.new/lib/Regexp/Grammars.pm
--- Regexp-Grammars-1.001005/lib/Regexp/Grammars.pm 2009-08-02 08:12:46.000000000 -0500
+++ Regexp-Grammars-1.001005.new/lib/Regexp/Grammars.pm 2009-10-27 13:51:18.000000000 -0500
@@ -636,7 +636,7 @@
(?&PARENS)
(?(DEFINE)
(?<PARENS> \( (?: \\. | (?&PARENS) | (?&CHARSET) | [^][()\\]++)*+ \) )
- (?<CHARSET> \[ \^?+ \]?+ [^]]*+ \] )
+ (?<CHARSET> \[ \^?+ \\?+ \]?+ [^]]*+ \] )
)
}xms;
@@ -1073,7 +1073,7 @@
(?<BRACES> \{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \} )
(?<PARENCODE> \(\?\{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \}\) )
(?<HASH> \% (?&IDENT) (?: :: (?&IDENT) )* )
- (?<CHARSET> \[ \^?+ \]?+ [^]]*+ \] )
+ (?<CHARSET> \[ \^?+ \\?+ \]?+ [^]]*+ \] )
(?<IDENT> [^\W\d]\w*+ )
)
}{
diff -ruN Regexp-Grammars-1.001005/t/charset_bug.t Regexp-Grammars-1.001005.new/t/charset_bug.t
--- Regexp-Grammars-1.001005/t/charset_bug.t 1969-12-31 18:00:00.000000000 -0600
+++ Regexp-Grammars-1.001005.new/t/charset_bug.t 2009-10-27 13:50:00.000000000 -0500
@@ -0,0 +1,64 @@
+use 5.010;
+use warnings;
+
+use Test::More 'no_plan';
+
+use Regexp::Grammars;
+
+# This checks for a bug where [^\]] was not interpreted as a charset.
+my $bracket_bug = qr{
+ <Bracketed>
+
+ <token: Bracketed>
+ \[ <text=( [^\]]+ )> \]
+}xms;
+
+my $escaped_bs = qr{
+ <Bracketed>
+
+ <token: Bracketed>
+ \[ <text=( [^\\]+ )> \]
+}xms;
+
+my $old_bracket = qr{
+ <Bracketed>
+
+ <token: Bracketed>
+ \[ <text=( [^]]+ )> \]
+}xms;
+
+no Regexp::Grammars;
+
+while (my $input = <DATA>) {
+ chomp $input;
+ my ( $text, $to_match ) = split /:/, $input;
+
+ if ( $to_match =~ $bracket_bug ) {
+ ok( 'matched bracketed text with [^\]]+' );
+ is( $/{Bracketed}{text}, $text );
+ }
+ else {
+ fail( 'did not match bracketed text' );
+ }
+
+ if ( $to_match =~ $escaped_bs ) {
+ ok( 'matched bracketed text with [^\\]+' );
+ is( $/{Bracketed}{text}, $text );
+ }
+ else {
+ fail( 'did not match bracketed text' );
+ }
+
+ if ( $to_match =~ /$old_bracket/ ) {
+ ok( 'matched bracketed text with [^]]+' );
+ is( $/{Bracketed}{text}, $text );
+ }
+ else {
+ fail( 'did not match bracketed text' );
+ }
+}
+
+
+__DATA__
+some text:[some text]
+ and more text :[ and more text ]