On Fri Oct 22 10:07:21 2010, PEVANS wrote:
Show quoted text> It's currently not quite possible to use extract_tagged to extract a
> POD-like escape. It can parse fixed-length ones, such as
>
> eval: [ extract_tagged "B<<bold>> text", '[CBU]<<', '>>' ]
> [ 'B<<bold>>', ' text', '', 'B<<', 'bold', '>>' ]
>
> What it cannot do is parse arbitrary-length ones, because the close tags
> get confused:
>
> eval: [ extract_tagged 'C<<< $self->foo >> method', '[CBU]<+', '>+' ]
> [ 'C<<< $self->', 'foo >> method', '', 'C<<<', ' $self-', '>' ]
>
> It would be great if the close tag could instead be a CODEref, allowing
> the actual pattern to be deferred until the open tag has been matched.
> Perhaps something like:
>
> extract_tagged ..., '[CBU]<+', sub { '>' x (length $_[0])-1 };
Dear all,
I maked you a patch. I hope the test suffices. The closing tag may be a subref in order to defer
its evaluation until the opening tag has been found. The opening tag is passed in as the first
argument.
--- Text-Balanced-2.02-_khKH4/lib/Text/Balanced.pm 2009-07-29 16:44:15.000000000 +0100
+++ Text-Balanced-2.02-FaqLdE/lib/Text/Balanced.pm 2010-11-01 21:54:19.385469881 +0000
@@ -325,17 +325,19 @@
if (!defined $rdel)
{
- $rdelspec = substr($$textref, $-[0], $+[0] - $-[0]);
- unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2".
_revbracket($1) /oes)
+ my $tmp = substr($$textref, $-[0], $+[0] - $-[0]);
+ unless ($tmp =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1)
/oes)
{
_failmsg "Unable to construct closing tag to match: $rdel",
pos $$textref;
goto failed;
}
+
+ $rdelspec = sub { $tmp };
}
- else
+ elsif (! ref $rdel)
{
- $rdelspec = eval "qq{$rdel}" || do {
+ my $tmp = eval "qq{$rdel}" || do {
my $del;
for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',)
{ next if $rdel =~ /\Q$_/; $del = $_; last }
@@ -345,18 +347,26 @@
}
eval "qq$del$rdel$del";
};
+
+ $rdelspec = sub { $tmp };
}
+ else
+ {
+ $rdelspec = $rdel;
+ }
while (pos($$textref) < length($$textref))
{
next if $$textref =~ m/\G\\./gc;
+ my $rd = $rdelspec->($1);
+
if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
{
$parapos = pos($$textref) - length($1)
unless defined $parapos;
}
- elsif ($$textref =~ m/\G($rdelspec)/gc )
+ elsif ($$textref =~ m/\G($rd)/gc )
{
$closetagpos = pos($$textref)-length($1);
goto matched;
Only in Text-Balanced-2.02-FaqLdE/: Makefile.old
diff -ru Text-Balanced-2.02-_khKH4/t/07_exttag.t Text-Balanced-2.02-FaqLdE/t/07_exttag.t
--- Text-Balanced-2.02-_khKH4/t/07_exttag.t 2009-07-29 16:44:15.000000000 +0100
+++ Text-Balanced-2.02-FaqLdE/t/07_exttag.t 2010-11-01 21:55:02.769471024 +0000
@@ -13,7 +13,7 @@
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
-BEGIN { $| = 1; print "1..53\n"; }
+BEGIN { $| = 1; print "1..57\n"; }
END {print "not ok 1\n" unless $loaded;}
use Text::Balanced qw ( extract_tagged gen_extract_tagged );
$loaded = 1;
@@ -62,6 +62,10 @@
ignore\n this and then BEGINHERE at the ENDHERE;
ignore\n this and then BEGINTHIS at the ENDTHIS;
+# USING: extract_tagged($str,"BEGIN([A-Z]+)",sub{'END'.reverse shift},"(?s).*?(?=BEGIN)");
+ ignore\n this and then BEGINHERE at the ENDEREH;
+ ignore\n this and then BEGINTHIS at the ENDSIHT;
+
# USING: extract_tagged($str,"BEGIN([A-Z]+)",'END$1',"(?s).*?(?=BEGIN)");
ignore\n this and then BEGINHERE at the ENDHERE;
ignore\n this and then BEGINTHIS at the ENDTHIS;