CC: | Jeff.Fearn [...] gmail.com |
Subject: | Patch for HTML::Tree to add new_from_url() (Using LWP) |
As discussed, I have written an enhancement to add a new_from_url()
static constructor that used LWP to fetch content from a url.
--
David Pottage (AKA: SPUDSOUP)
david@electric-spoon.com
Subject: | new_from_url.patch |
diff --git a/Makefile.PL b/Makefile.PL
index afe58c2..83c20e8 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -15,6 +15,7 @@ WriteMakefile
'Test::Exception' => 0,
'HTML::Tagset' => '3.02',
'HTML::Parser' => '3.46',
+ 'LWP::UserAgent' => 0,
'Module::Build' => 0
}
)
diff --git a/lib/HTML/Tree.pm b/lib/HTML/Tree.pm
index 31b5fde..d05f90f 100644
--- a/lib/HTML/Tree.pm
+++ b/lib/HTML/Tree.pm
@@ -55,6 +55,12 @@ sub new_from_content {
goto &HTML::TreeBuilder::new_from_content;
}
+sub new_from_url {
+ shift;
+ unshift @_, 'HTML::TreeBuilder';
+ goto &HTML::TreeBuilder::new_from_url;
+}
+
1;
__END__
diff --git a/lib/HTML/TreeBuilder.pm b/lib/HTML/TreeBuilder.pm
index 3e8b1ff..1000ce3 100644
--- a/lib/HTML/TreeBuilder.pm
+++ b/lib/HTML/TreeBuilder.pm
@@ -52,6 +52,7 @@ BEGIN {
#---------------------------------------------------------------------------
+use LWP::UserAgent;
use HTML::Entities ();
use HTML::Tagset 3.02 ();
@@ -109,6 +110,19 @@ sub new_from_content { # from any number of scalars
return $new;
}
+sub new_from_url { # should accept anything that LWP does.
+ my $class = shift;
+ Carp::croak("new_from_url takes only one argument")
+ unless @_ == 1;
+ Carp::croak("new_from_url is a class method only")
+ if ref $class;
+ my $new = $class->new();
+
+ my $fetch_result = LWP::UserAgent->new->get( $_[0] );
+ $new->parse( $fetch_result->content );
+ return $new;
+}
+
# TODO: document more fully?
sub parse_content { # from any number of scalars
my $tree = shift;
diff --git a/t/construct_tree.t b/t/construct_tree.t
index 6b1faed..6a23b6d 100644
--- a/t/construct_tree.t
+++ b/t/construct_tree.t
@@ -1,9 +1,9 @@
-#!/usr/bin/perl -T
+#!/usr/bin/perl
use warnings;
use strict;
-use Test::More tests => ( 3 + 7 * 8 );
+use Test::More tests => ( 3 + 7 * 10 );
#initial tests + number of tests in test_new_obj() * number of times called
@@ -69,6 +69,29 @@ is( $HTMLPart1 . $HTMLPart2, $HTML, "split \$HTML correctly" );
test_new_obj( $parse_content_obj, "new(); parse_content Scalar" );
}
+# URL tests
+{
+ SKIP: {
+ eval {
+ require URI::file;
+ require LWP::UserAgent;
+ };
+ skip "URI::file or LWP::UserAgent not installed", 2 if $@;
+
+ my $file_url = URI->new("file:".$TestInput);
+
+ {
+ my $file_obj = HTML::Tree->new_from_url($file_url->as_string);
+ test_new_obj( $file_obj, "new_from_url Scalar" );
+ }
+
+ {
+ my $file_obj = HTML::Tree->new_from_url($file_url);
+ test_new_obj( $file_obj, "new_from_url Object" );
+ }
+ }
+}
+
# Scalar REF Tests
{
my $content_obj = HTML::Tree->new_from_content($HTML);