Subject: | document and improve secret _element_class feature |
As discussed via email, this allows for somewhat easier subclassing of
TreeBuilder to emit non-HTML::Element objects.
Patch attached. A parallel patch has been sent to MIROD for
HTML::TreeBuilder::XPath which will rely on this patch's application.
--
rjbs
Subject: | treebuilder.diff |
diff -Nur old/HTML-Tree-3.23/Changes new/HTML-Tree-3.24/Changes
--- old/HTML-Tree-3.23/Changes 2006-11-12 12:11:05.000000000 -0500
+++ new/HTML-Tree-3.24/Changes 2007-04-10 18:23:13.000000000 -0400
@@ -1,5 +1,11 @@
Changelog for HTML-Tree
+3.24 Tue Apr 10 18:23:02 EDT 2007
+ [ENHANCEMENTS]
+ * The secret hack to allow elements to be created from classes other than
+ HTML::Element has been cleaned up and documented for the benefit of
+ TreeBuilder subclasses. q.v., HTML::TreeBuilder->element_class
+
3.23 Sun Nov 12 11:09:31 CST 2006
[THINGS THAT MAY BREAK YOUR CODE OR TESTS]
* Mark-Jason Dominus points out that the fix for as_html was not
diff -Nur old/HTML-Tree-3.23/lib/HTML/Element.pm new/HTML-Tree-3.24/lib/HTML/Element.pm
--- old/HTML-Tree-3.23/lib/HTML/Element.pm 2006-11-12 12:13:33.000000000 -0500
+++ new/HTML-Tree-3.24/lib/HTML/Element.pm 2007-04-10 18:21:24.000000000 -0400
@@ -6,12 +6,12 @@
=head1 VERSION
-Version 3.23
+Version 3.24
=cut
use vars qw( $VERSION );
-$VERSION = '3.23';
+$VERSION = '3.24';
=head1 SYNOPSIS
@@ -1358,7 +1358,7 @@
$e = $tag;
$tag = $e->tag;
} else { # just a tag name -- so make the element
- $e = ($self->{'_element_class'} || __PACKAGE__)->new($tag);
+ $e = $self->element_class->new($tag);
++($self->{'_element_count'}) if exists $self->{'_element_count'};
# undocumented. see TreeBuilder.
}
@@ -3482,8 +3482,7 @@
if(ref($c)) {
unshift @stack, $c; # visit it later.
} else {
- $c = ( $this->{'_element_class'} || __PACKAGE__
- )->new('~text', 'text' => $c, '_parent' => $this);
+ $c = $this->element_class->new('~text', 'text' => $c, '_parent' => $this);
}
}
}
@@ -3769,6 +3768,20 @@
return @errors;
}
+=head2 $h->element_class
+
+This method returns the class which will be used for new elements. It
+defaults to HTML::Element, but can be overridden by subclassing or esoteric
+means best left to those will will read the source and then not complain when
+those esoteric means change. (Just subclass.)
+
+=cut
+
+sub element_class {
+ $_[0]->{_element_class} || __PACKAGE__
+}
+
+
1;
=head1 BUGS
diff -Nur old/HTML-Tree-3.23/lib/HTML/TreeBuilder.pm new/HTML-Tree-3.24/lib/HTML/TreeBuilder.pm
--- old/HTML-Tree-3.23/lib/HTML/TreeBuilder.pm 2006-11-12 12:13:46.000000000 -0500
+++ new/HTML-Tree-3.24/lib/HTML/TreeBuilder.pm 2007-04-10 18:29:28.000000000 -0400
@@ -4,7 +4,7 @@
use integer; # vroom vroom!
use Carp ();
use vars qw(@ISA $VERSION $DEBUG);
-$VERSION = '3.23';
+$VERSION = '3.24';
#---------------------------------------------------------------------------
# Make a 'DEBUG' constant...
@@ -121,7 +121,9 @@
my $class = shift;
$class = ref($class) || $class;
- my $self = HTML::Element->new('html'); # Initialize HTML::Element part
+ # Initialize HTML::Element part
+ my $self = $class->element_class->new('html');
+
{
# A hack for certain strange versions of Parser:
my $other_self = HTML::Parser->new();
@@ -145,7 +147,6 @@
$self->{'_implicit'} = 1; # to delete, once we find a real open-"html" tag
- $self->{'_element_class'} = 'HTML::Element';
$self->{'_ignore_unknown'} = 1;
$self->{'_ignore_text'} = 0;
$self->{'_warn'} = 0;
@@ -267,8 +268,7 @@
# Looks bad, but is fine for round-tripping.
}
- my $e =
- ($self->{'_element_class'} || 'HTML::Element')->new($tag, %$attr);
+ my $e = $self->element_class->new($tag, %$attr);
# Make a new element object.
# (Only rarely do we end up just throwing it away later in this call.)
@@ -1151,9 +1151,7 @@
".\n";
}
- (my $e = (
- $self->{'_element_class'} || 'HTML::Element'
- )->new('~comment'))->{'text'} = $text;
+ (my $e = $self->element_class->new('~comment'))->{'text'} = $text;
$pos->push_content($e);
++($self->{'_element_count'});
@@ -1184,9 +1182,7 @@
join('/', reverse($pos->{'_tag'}, @lineage_tags)) || 'Root',
".\n";
}
- (my $e = (
- $self->{'_element_class'} || 'HTML::Element'
- )->new('~declaration'))->{'text'} = $text;
+ (my $e = $self->element_class->new('~declaration'))->{'text'} = $text;
$self->{_decl} = $e;
return $e;
@@ -1214,9 +1210,7 @@
join('/', reverse($pos->{'_tag'}, @lineage_tags)) || 'Root',
".\n";
}
- (my $e = (
- $self->{'_element_class'} || 'HTML::Element'
- )->new('~pi'))->{'text'} = $text;
+ (my $e = $self->element_class->new('~pi'))->{'text'} = $text;
$pos->push_content($e);
++($self->{'_element_count'});
@@ -1383,7 +1377,7 @@
sub elementify {
# Rebless this object down into the normal element class.
my $self = $_[0];
- my $to_class = ($self->{'_element_class'} || 'HTML::Element');
+ my $to_class = $self->element_class;
delete @{$self}{ grep {;
length $_ and substr($_,0,1) eq '_'
# The private attributes that we'll retain:
@@ -1394,6 +1388,11 @@
bless $self, $to_class; # Returns the same object we were fed
}
+sub element_class {
+ return 'HTML::Element' if not ref $_[0];
+ return $_[0]->{_element_class} || 'HTML::Element';
+}
+
#--------------------------------------------------------------------------
sub guts {