diff --git a/lib/URI/Find.pm b/lib/URI/Find.pm
index 7b4ba62..36f66e7 100644
--- a/lib/URI/Find.pm
+++ b/lib/URI/Find.pm
@@ -115,9 +115,60 @@ sub find {
$self->{_uris_found} = 0;
- # Don't assume http.
+ my @parts = $self->_get_parts($r_text);
+
+ # Don't assume http (used in _uri_filter calling _is_uri)
my $old_strict = URI::URL::strict(1);
+ my @reduced = shift @parts;
+ for my $part (@parts) {
+ next if ! defined $part || (ref $part && (!defined $part->{str} || $part->{str} eq ''));
+
+ # Find out if part needs filtering, if not, make it a regular string
+ # ready for reducing, and later escape function
+ if (ref $part && $part->{state} eq 'filter') {
+ my $filtered = $self->_uri_filter($part->{str});
+ if (defined $filtered) {
+ $part->{str} = $filtered;
+ $part->{state} = 'filtered';
+ }
+ else {
+ $part = $part->{orig};
+ }
+ }
+
+ # Reduce part, ie. collapse strings that follow eachother so that escape
+ # function gets as whole strings as possible
+ if ( ref $part ) {
+ push @reduced, $part;
+ }
+ elsif ( ! ref $reduced[-1] ) {
+ $reduced[-1] .= $part;
+ }
+ else {
+ push @reduced, $part;
+ }
+ }
+
+ URI::URL::strict($old_strict);
+
+ my $replace = "";
+
+ foreach my $part (@reduced) {
+ next if ! defined $part;
+ $replace .= ref $part ? $part->{str} : $escape_func->($part);
+ }
+
+ ${$r_text} = $replace;
+
+ return $self->{_uris_found};
+
+}
+
+
+sub _get_parts {
+ my ($self, $r_text) = @_;
+
# Yes, evil. Basically, look for something vaguely resembling a URL,
# then hand it off to URI::URL for examination. If it passes, throw
# it to a callback and put the result in its place.
@@ -127,17 +178,21 @@ sub find {
my $uriRe = sprintf '(?:%s|%s)', $self->uri_re, $self->schemeless_uri_re;
+ my @parts = ();
+
$$r_text =~ s{ (.*?) (?:(<(?:URL:)?)(.+?)(>)|($uriRe)) | (.+?)$ }{
- my $replace = '';
+ my $pre = $2;
+ my $post = $4;
+
if( defined $6 ) {
- $replace = $escape_func->($6);
+ push @parts, $6;
}
else {
my $maybe_uri = '';
- $replace = $escape_func->($1);
+ push @parts, $1;
- if( defined $2 ) {
+ if( defined $pre ) {
$maybe_uri = $3;
my $is_uri = do { # Don't alter $1...
$maybe_uri =~ s/\s+//g;
@@ -145,9 +200,11 @@ sub find {
};
if( $is_uri ) {
- $replace .= $escape_func->($2);
- $replace .= $self->_uri_filter($maybe_uri);
- $replace .= $escape_func->($4);
+ push @parts,
+ $pre,
+ { str => $maybe_uri, orig => $3, set_at => __LINE__, state => 'filter' },
+ $post
+ ;
}
else {
# the whole text inside of the <...> was not a url, but
@@ -156,29 +213,21 @@ sub find {
$maybe_uri = $3;
$maybe_uri =~ /$uriRe/;
};
- if( $has_uri ) {
- my $pre = $2;
- my $post = $4;
- do { $self->find(\$maybe_uri, $escape_func) };
- $replace .= $escape_func->($pre);
- $replace .= $maybe_uri;
- $replace .= $escape_func->($post);
- }
- else {
- $replace .= $escape_func->($2.$3.$4);
- }
+ push @parts,
+ $pre,
+ ( $has_uri ? do { $self->_get_parts(\$maybe_uri) } : $3 ),
+ $post,
+ ;
}
}
else {
- $replace .= $self->_uri_filter($5);
+ push @parts, { str => $5, orig => $5, set_at => __LINE__, state => 'filter' };
}
}
-
- $replace;
+ "";
}gsex;
- URI::URL::strict($old_strict);
- return $self->{_uris_found};
+ return @parts;
}
@@ -201,7 +250,7 @@ sub _uri_filter {
}
else {
# False alarm
- $replacement = $orig_match;
+ return;
}
# Return recrufted replacement
@@ -526,7 +575,7 @@ Darren Chamberlain wrote urifind.
Copyright 2000, 2009-2010 by Michael G Schwern E<lt>schwern@pobox.comE<gt>.
-This program is free software; you can redistribute it and/or
+This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
See F<
http://www.perlfoundation.org/artistic_license_1_0>
diff --git a/t/filter.t b/t/filter.t
index c7636e4..8a3be72 100644
--- a/t/filter.t
+++ b/t/filter.t
@@ -21,6 +21,7 @@ my @tasks = (
["Foo&Bar\n
http://abc.com.\nFoo&Bar", "Foo&Bar\nxx&.\nFoo&Bar"],
["Foo&Bar\n
http://abc.com.
http://def.com.\nFoo&Bar",
"Foo&Bar\nxx&. xx&.\nFoo&Bar"],
+ ["noturi:& should also be escaped", "noturi:& should also be escaped"],
);
for my $task (@tasks) {
@@ -33,7 +34,6 @@ for my $task (@tasks) {
sub simple_escape {
my($toencode) = @_;
-
$toencode =~ s{&}{&}gso;
return $toencode;
}