Skip Menu |

This queue is for tickets about the HTML-Query CPAN distribution.

Report information
The Basics
Id: 62100
Status: resolved
Worked: 2.5 hours (150 min)
Priority: 0/
Queue: HTML-Query

People
Owner: Nobody in particular
Requestors: tomas.zemres [...] gmail.com
Cc:
AdminCc:

Bug Information
Severity: (no value)
Broken in: 0.04
Fixed in: (no value)



Subject: Querying on returned "HTML::Query" object does not work correctly
For example: If I select <p> tags from whole html document, and save this result into variable "$subset" and then I call $subset->query('.aaa') this does not reutrn <p class="aaa"> that is contained in $subset. Testing code: #!/usr/bin/env perl use strict; use warnings; use HTML::Query; print "Version: $HTML::Query::VERSION\n"; my $doc = HTML::Query->new(text => '<p id="1" class="a">A</p>' .'<p id="2" class="b">B</p>'); my $result1 = $doc->query('p'); print "Found <p> in whole doc:\n", $result1->as_HTML, "\n\n"; my $result2 = $result1->query('.b'); print "Found class=b in result1:\n", $result2->as_HTML, "\n\n"; print "END\n"; __END__ Script output: Version: 0.04 Found <p> in whole doc: <p class="a" id="1">A <p class="b" id="2">B Found class=b in result1: END -------- But with older version (0.02) this was working: Version: 0.02 Found <p> in whole doc: <p class="a" id="1">A <p class="b" id="2">B Found class=b in result1: <p class="b" id="2">B END
Subject: html-query-bug-test.pl
#!/usr/bin/env perl use strict; use warnings; use HTML::Query; print "Version: $HTML::Query::VERSION\n"; my $doc = HTML::Query->new(text => '<p id="1" class="a">A</p>' .'<p id="2" class="b">B</p>'); print $doc->as_HTML; my $result1 = $doc->query('p'); print "Found <p> in whole doc:\n", $result1->as_HTML, "\n\n"; my $result2 = $result1->query('.b'); print "Found class=b in result1:\n", $result2->as_HTML, "\n\n"; print "END\n";
This is fixed in development. It will be in the next release soon. It was a rather large patch... it is attached if you want to try it out.
Subject: query.patch
Index: t/iterative.t =================================================================== --- t/iterative.t (revision 2833) +++ t/iterative.t (working copy) @@ -7,20 +7,40 @@ use strict; use warnings; -use HTML::Query; +use lib qw( ./lib ../lib ); +use HTML::Query 'Query'; +use HTML::TreeBuilder; use Badger::Test - tests => 4, + tests => 10, debug => 'HTML::Query', args => \@ARGV; -my $doc = HTML::Query->new(text => '<p id="1" class="a">A</p>'.'<p id="2" class="b">B</p>'); +my $tree = HTML::TreeBuilder->new; +$tree->parse( '<p id="1" class="a">A</p><p id="2" class="b">B</p>' ); +ok( $tree, 'parsed tree'); +# test method interface + +my $doc = HTML::Query->new(text => $tree->as_HTML ); + my $result1 = $doc->query('p'); is( $result1->size, 2, 'two p elements in query' ); -warn $result1->as_trimmed_text(); is( join(', ', $result1->as_trimmed_text()), 'A, B', 'proper elements returned' ); my $result2 = $result1->query('.b'); is( $result2->size, 1, 'one p element in query' ); is( join(', ', $result2->as_trimmed_text), 'B', 'proper element returned' ); + +# test class interface + +my $query = Query $tree; +ok( $query, 'created query' ); + +my $result3 = $query->query('p'); +is( $result3->size, 2, 'two p elements in query' ); +is( join(', ', $result3->as_trimmed_text()), 'A, B', 'proper elements returned' ); + +my $result4 = $result3->query('.b'); +is( $result4->size, 1, 'one p element in query' ); +is( join(', ', $result4->as_trimmed_text), 'B', 'proper element returned' ); Index: lib/HTML/Query.pm =================================================================== --- lib/HTML/Query.pm (revision 2833) +++ lib/HTML/Query.pm (working copy) @@ -8,7 +8,7 @@ base => 'Badger::Base', utils => 'blessed', import => 'class CLASS', - vars => '$error AUTOLOAD', + vars => 'AUTOLOAD', constants => 'ARRAY', constant => { ELEMENT => 'HTML::Element', @@ -51,9 +51,6 @@ }, }; -#our $error; # how can we store this in the class itself? this is stupid... - - sub _export_query_to_element { class(ELEMENT)->load->method( # this Just Works[tm] because first arg is HTML::Element object @@ -64,14 +61,14 @@ sub _report_error { my ($self, $message) = @_; - if (suppress_errors()) { + if ($self->suppress_errors()) { if (defined($message)) { - $error = $message; + $self->{error} = $message; } return undef; } else { - $self->error($message); + $self->error($message); # this will DIE } } @@ -90,6 +87,13 @@ $class = ref $class || $class; + my $self = { + error => undef, + suppress_errors => undef, + match_self => undef, + elements => \@elements + }; + # each element should be an HTML::Element object, although we might # want to subclass this module to recognise a different kind of object, # so we get the element class from the ELEMENT constant method which a @@ -127,7 +131,7 @@ next; } elsif ($element->isa($class)) { - push(@elements, @$element); + push(@elements, @{$element->get_elements}); next; } } @@ -135,7 +139,7 @@ return $class->error_msg( bad_element => $element ); } - my $self = bless \@elements, $class; + bless $self, $class; return defined $select ? $self->query($select) @@ -145,19 +149,18 @@ sub suppress_errors { my ($self, $setting) = @_; - our $suppress; - if (defined($setting)) { - $suppress = $setting; + $self->{suppress_errors} = $setting; } - return $suppress; + + return $self->{suppress_errors}; } sub get_error { my ($self) = @_; - return $error; + return $self->{error}; } sub query { @@ -166,7 +169,7 @@ my $ops = 0; my $pos = 0; - $error = undef; + $self->{error} = undef; return $self->error_msg('no_query') unless defined $query && length $query; @@ -174,8 +177,8 @@ # multiple specs can be comma separated, e.g. "table tr td, li a, div.foo" COMMA: while (1) { # each comma-separated traversal spec is applied downward from - # the source elements in the @$self query - my @elements = @$self; + # the source elements in the $self->{elements} query + my @elements = @{$self->get_elements}; my $comops = 0; # for each whitespace delimited descendant spec we grok the correct @@ -309,21 +312,25 @@ # we're just looking for any descendent if( !$relationship ) { - # look_down() will match self in addition to descendents, - # so we explicitly disallow matches on self as we iterate - # thru the list. The other cases below already exclude self. - # https://rt.cpan.org/Public/Bug/Display.html?id=58918 - my @accumulator; - foreach my $e (@elements) { - if ($e->root() == $e) { - push(@accumulator, $e->look_down(@args)); + if ($self->{match_self}) { + # if we are re-querying, be sure to match ourselves not just descendents + @elements = map { $_->look_down(@args) } @elements; + } else { + # look_down() will match self in addition to descendents, + # so we explicitly disallow matches on self as we iterate + # thru the list. The other cases below already exclude self. + # https://rt.cpan.org/Public/Bug/Display.html?id=58918 + my @accumulator; + foreach my $e (@elements) { + if ($e->root() == $e) { + push(@accumulator, $e->look_down(@args)); + } + else { + push(@accumulator, grep { $_ != $e } $e->look_down(@args)); + } } - else { - push(@accumulator, grep { $_ != $e } $e->look_down(@args)); - } + @elements = @accumulator; } - - @elements = @accumulator; } # immediate child selector elsif( $relationship eq '>' ) { @@ -413,15 +420,27 @@ # check that we performed at least one query operation unless ($ops) { - return $self->_report_error( $self->message( bad_query => $query ) ); + return $self->_report_error( $self->message( bad_query => $query ) ); } return wantarray ? @result - : $self->new(@result); + : $self->new_match_self(@result); } +# instantiate an instance with match_self turned on, for use with +# follow-up queries, so they match the top-most elements. +sub new_match_self { + my $self = shift; + + my $result = $self->new(@_); + + $result->{match_self} = 1; + return $result; +} + + sub list { return wantarray ? @{ $_[0] } # return list of items @@ -430,26 +449,33 @@ sub size { - return scalar @{ $_[0] }; + my $self = shift; + return scalar @{$self->get_elements}; } sub first { my $self = shift; - return @$self - ? $self->[0] + return @{$self->get_elements} + ? $self->get_elements->[0] : $self->error_msg('is_empty'); } sub last { my $self = shift; - return @$self - ? $self->[-1] + return @{$self->get_elements} + ? $self->get_elements->[-1] : $self->error_msg('is_empty'); } +# return reference to elements array +sub get_elements { + my $self = shift; + return $self->{elements}; +} + sub AUTOLOAD { my $self = shift; my ($method) = ($AUTOLOAD =~ /([^:]+)$/ ); @@ -459,7 +485,7 @@ # try to call against the HTML::Element objects in the query my @results = map { $_->$method(@_) } - @$self; + @{$self->get_elements}; return wantarray ? @results
Release went out last night, please report any additional problems you encounter. thanks, Kevin On Thu Oct 21 16:50:24 2010, VKHERA wrote: Show quoted text
> This is fixed in development. It will be in the next release soon. > It was a rather large > patch... it is attached if you want to try it out.