? Makefile
? blib
? patch.txt
? pm_to_blib
Index: lib/URI/Find.pm
===================================================================
RCS file: /home/mikeschilli/cvs/URI-Find/lib/URI/Find.pm,v
retrieving revision 1.1.1.1
diff -a -u -r1.1.1.1 Find.pm
--- lib/URI/Find.pm 5 Sep 2008 07:55:51 -0000 1.1.1.1
+++ lib/URI/Find.pm 5 Sep 2008 07:57:37 -0000
@@ -96,11 +96,23 @@
$text is a string to search and possibly modify with your callback.
+Alternatively, C<find> can be called with a replacement function for
+the rest of the text:
+
+ use CGI qw(escapeHTML);
+ # ...
+ my $how_many_found = $finder->find(\$text, \&escapeHTML);
+
+will not only call the callback function for every URL found (and
+perform the replacement instructions therein), but also run the rest
+of the text through C<escapeHTML()>. This makes it easier to turn
+plain text which contains URLs into HTML (see example below).
+
=cut
sub find {
- @_ == 2 || __PACKAGE__->badinvo;
- my($self, $r_text) = @_;
+ @_ == 2 || @_ == 3 || __PACKAGE__->badinvo;
+ my($self, $r_text, $escape_func) = @_;
my $urlsfound = 0;
@@ -116,10 +128,16 @@
my $uriRe = sprintf '(?:%s|%s)', $self->uri_re, $self->schemeless_uri_re;
- $$r_text =~ s{(<$uriRe>|$uriRe)}{
- my($orig_match) = $1;
+ $$r_text =~ s{(.*?)(<$uriRe>|$uriRe)|(?:(.+)$)}{
+ my $orig_match = $2;
+ my $prematch = $1;
+ my $replacement = "";
+ if(defined $3) {
+ $orig_match = "";
+ $prematch = $3;
+ }
- # A heruristic. Often you'll see things like:
+ # A heuristic. Often you'll see things like:
# "I saw this site,
http://www.foo.com, and its really neat!"
# or "Foo Industries (at
http://www.foo.com)"
# We want to avoid picking up the trailing paren, period or comma.
@@ -127,17 +145,23 @@
# not it corrects a parse mistake.
$orig_match = $self->decruft($orig_match);
- if( my $uri = $self->_is_uri(\$orig_match) ) { # Its a URI.
+ if( length $orig_match and
+ my $uri = $self->_is_uri(\$orig_match) ) { # Its a URI.
$urlsfound++;
- # Don't forget to put any cruft we accidentally matched back.
- $self->recruft($self->{callback}->($uri, $orig_match));
+ $replacement = $self->{callback}->($uri, $orig_match);
}
else { # False alarm.
- # Again, don't forget the cruft.
- $self->recruft($orig_match);
+ $replacement = $orig_match;
+ }
+
+ if(defined $escape_func) {
+ $prematch = $escape_func->($prematch);
}
- }eg;
+
+ # return concatenation of escaped prematch and recrufted URI
+ $prematch . $self->recruft($replacement);
+ }gse;
URI::URL::strict($old_strict);
return $urlsfound;
@@ -371,13 +395,15 @@
use CGI qw(escapeHTML);
- $text = "<pre>\n" . escapeHTML($text) . "</pre>\n";
my $finder = URI::Find->new(
sub {
my($uri, $orig_uri) = @_;
return qq|<a href="$uri">$orig_uri</a>|;
- });
+ },
+ \&escapeHTML
+ );
$finder->find(\$text);
+ print "<pre>$text</pre>
=head1 CAVEATS, BUGS, ETC...
Index: t/Find.t
===================================================================
RCS file: /home/mikeschilli/cvs/URI-Find/t/Find.t,v
retrieving revision 1.1.1.1
diff -a -u -r1.1.1.1 Find.t
--- t/Find.t 5 Sep 2008 07:55:52 -0000 1.1.1.1
+++ t/Find.t 5 Sep 2008 07:57:37 -0000
@@ -244,3 +244,37 @@
$f->find(\$t);
ok $val == URI::URL::strict, "URI::URL::strict $val";
}
+
+# Test new filter function
+
+BEGIN { $Total_tests += 10 }
+my @tasks = (
+ ["Foo&Bar
http://abc.com.", "Foo&Bar xx&."],
+ ["
http://abc.com.
http://abc.com.", "xx&. xx&."],
+ ["
http://abc.com?foo=bar&baz=foo", "xx&"],
+ ["&
http://abc.com?foo=bar&baz=foo", "& xx&"],
+ ["
http://abc.com?foo=bar&baz=foo &", "xx& &"],
+ ["Foo&Bar
http://abc.com", "Foo&Bar xx&"],
+ ["
http://abc.com. Foo&Bar", "xx&. Foo&Bar"],
+ ["Foo&Bar
http://abc.com. Foo&Bar", "Foo&Bar xx&. Foo&Bar"],
+ ["Foo&Bar\n
http://abc.com.\nFoo&Bar", "Foo&Bar\nxx&.\nFoo&Bar"],
+ ["Foo&Bar\n
http://abc.com.
http://def.com.\nFoo&Bar",
+ "Foo&Bar\nxx&. xx&.\nFoo&Bar"],
+);
+
+for my $task (@tasks) {
+ my($str, $result) = @$task;
+ my $org = $str;
+ my $f = URI::Find->new(sub { return "xx&" });
+ $f->find(\$str, \&simple_escape);
+ ok($str eq $result, "[$org => $result] vs. [$str]");
+}
+
+sub simple_escape {
+ my($toencode) = @_;
+
+# print "Escaping $toencode\n";
+ $toencode =~ s{&}{&}gso;
+# print "Result: $toencode\n";
+ return $toencode;
+}