Subject: | Support for relationship selectors (>, +, *) |
HTML::Query doesn't currently have support for various CSS relationship
selector modifiers. These include ">" (immediate descendent or child),
"*" (non-immediate descendent or grandchild) and "+" (immediate sibling).
The attached patches add support for these along with tests.
Subject: | test1.html.patch |
--- html/test1.html 2009-06-15 02:47:29.000000000 -0400
+++ html/test1.html.new 2010-06-01 16:50:11.099441796 -0400
@@ -43,12 +43,12 @@
<table class="two">
<tr class="wibble">
<td>
- Wibble2
+ <span>Wibble2</span>
</td>
</tr>
<tr class="wobble">
<td>
- Wobble2
+ <span>Wobble2</span>
</td>
</tr>
</table>
Subject: | query.t.patch |
--- query.t 2009-06-15 02:50:56.000000000 -0400
+++ query.t.new 2010-06-01 16:48:44.066431209 -0400
@@ -14,7 +14,7 @@
use HTML::TreeBuilder;
use Badger::Filesystem '$Bin Dir';
use Badger::Test
- tests => 55,
+ tests => 67,
debug => 'HTML::Query',
args => \@ARGV;
@@ -141,6 +141,41 @@
is( $tds->size, 2, 'two elements in table tr.wibble td query' );
is( join(', ', $tds->as_trimmed_text), 'Wibble1, Wibble2', 'got wibbles' );
+#-----------------------------------------------------------------------
+# child elements
+#-----------------------------------------------------------------------
+
+my $spans = $query->query('body > span');
+ok( $spans, 'got child element query' );
+is( $spans->size, 1, 'just a single span body child' );
+is( join('', $spans->as_trimmed_text), 'This is a span with bar class' );
+
+#-----------------------------------------------------------------------
+# grandchild elements
+#-----------------------------------------------------------------------
+
+$spans = $query->query('body * span');
+ok( $spans, 'got child element query' );
+is( $spans->size, 2, 'two span body grandchildren' );
+is( join(', ', $spans->as_trimmed_text), 'Wibble2, Wobble2' );
+
+#-----------------------------------------------------------------------
+# sibling elements
+#-----------------------------------------------------------------------
+
+my $inputs = $query->query('input + input');
+ok( $inputs, 'got sibling element query' );
+is( $inputs->size, 2, 'two input sibling' );
+is( join(', ', $inputs->attr('class')), 'other, search' );
+
+#-----------------------------------------------------------------------
+# mixing relationships with everything else
+#-----------------------------------------------------------------------
+
+$spans = $query->query('table.two tr + tr * span');
+ok( $spans, 'got mixed relationship element query' );
+is( $spans->size, 1, 'one span reached' );
+is( join(', ', $spans->as_trimmed_text), 'Wobble2' );
#-----------------------------------------------------------------------
# list of specifications: table.foo, input.bar, etc
Subject: | Query.pm.patch |
--- lib/HTML/Query.pm 2009-06-15 02:50:29.000000000 -0400
+++ lib/HTML/Query.pm.new 2010-06-01 16:49:22.789398758 -0400
@@ -28,6 +28,7 @@
bad_spec => 'Invalid specification "%s" in query: %s',
is_empty => 'The query does not contain any elements',
};
+use Scalar::Util qw(refaddr);
our $SOURCES = {
@@ -149,10 +150,18 @@
SEQUENCE: while (1) {
my @args;
$pos = pos($query) || 0;
+ my $relationship = '';
# ignore any leading whitespace
$query =~ / \G \s+ /cgsx;
+ # get any relationship modifiers
+ if( $query =~ / \G (>|\*|\+)\s*/cgx ) {
+ # can't have a relationship modifier as the first part of the query
+ $relationship = $1;
+ return $self->error_msg( bad_spec => $relationship, $query ) if !$comops;
+ }
+
# optional leading word is a tag name
if ($query =~ / \G (\w+) /cgx) {
push( @args, _tag => $1 );
@@ -193,9 +202,55 @@
' into args [', join(', ', @args), ']'
) if DEBUG;
- # call look_down() against each element to get the new elements
- @elements = map { $_->look_down(@args) } @elements;
-
+ # we're just looking for any descendent
+ if(!$relationship ) {
+ @elements = map { $_->look_down(@args) } @elements;
+ }
+ # immediate child selector
+ elsif( $relationship eq '>' ) {
+ @elements = map {
+ $_->look_down(
+ @args,
+ sub {
+ my $tag = shift;
+ my $root = $_;
+ return $tag->depth == $root->depth + 1;
+ }
+ )
+ } @elements;
+ }
+ # immediate sibling selector
+ elsif( $relationship eq '+' ) {
+ @elements = map {
+ $_->parent->look_down(
+ @args,
+ sub {
+ my $tag = shift;
+ my $root = $_;
+ my @prev_sibling = $tag->left;
+ # get prev next non-text sibling
+ foreach my $sibling (reverse @prev_sibling) {
+ next unless ref $sibling;
+ return refaddr($sibling) == refaddr($root);
+ }
+ }
+ )
+ } @elements;
+ }
+ # grandchild selector
+ elsif( $relationship eq '*' ) {
+ @elements = map {
+ $_->look_down(
+ @args,
+ sub {
+ my $tag = shift;
+ my $root = $_;
+ return $tag->depth > $root->depth + 1;
+ }
+ )
+ } @elements;
+ }
+
# so we can check we've done something
$comops++;
}
@@ -682,6 +737,29 @@
td.value'
);
+=head3 Immediate Descendents (children)
+
+When you combine selectors with whitespace elements are selected if
+they are descended from the parent in some way. But if you just want
+to select the children (and not the grandchildren, great-grandchildren,
+etc) then you can combine the selectors with the C<< > >> character.
+
+ @elems = $query->query('a > img');
+
+=head3 Non-Immediate Descendents
+
+If you just want any descendents that aren't children then you can combine
+selectors with the C<*> character.
+
+ @elems = $query->query('div * a');
+
+=head3 Immediate Siblings
+
+If you want to use a sibling relationship then you can can join selectors
+with the C<+> character.
+
+ @elems = $query->query('img + span');
+
=head2 Combining Selectors
You can combine basic and hierarchical selectors into a single query