Subject: | Table parsing does not comply to S26 |
Damian,
The table parsing in 0.000011 does not comply to S26. For instance,
H1 | H2
============
row1 | a
row2 | b-c+d
gets parsed as a three-columns table although S26 specifies that "Columns are separated by two or more consecutive whitespace characters, or by a vertical line (|) or a border intersection (+), either of which must be separated from any content by at least one whitespace character.".
I have looked at the code and the problem is in _column_template() that does not require whitespace characters surrounding the column separator.
I have rewritten this routine to comply to S26 and the table above is now parsed correctly. However, this breaks one test (table_header.t). I have looked at the test and I have the feeling that the test is incorrect: why should the cell be ' Superpower' and not 'Superpower'? So I have changed the test too...
The result is the attached patch that should improve S26 compliance.
Cheers,
Lionel
Subject: | pp6.patch |
--- lib/Perl6/Perldoc/Parser.pm- 2013-02-25 12:37:08.000000000 +0100
+++ ./lib/Perl6/Perldoc/Parser.pm 2014-10-24 07:14:34.000000000 +0200
@@ -2132,9 +2132,7 @@
}
# Regexes to help with table parsing...
-my $HWS = qr{ [ \t] }xms;
-
-my $COL_SEP = qr{ $HWS* [|+]{1,2} | $HWS{2,} }xms;
+my $HWS = qr{ [ \t] }xms;
my $ROW_SEP_LINE = qr{ ^ [-=_ \t|+]* \n }xms;
my $NWS_ROW_SEP = qr{ [-=_+] }xms;
@@ -2158,43 +2156,25 @@
my $max_width = _max(map {length} @lines);
- # Detect rivers...
- my %rivers;
- my %is_visible;
+ # Compute the vector of column separator positions
+ my $zvec = pack("b*", 0 x $max_width);
+ my $rvec = pack("b*", 1 x $max_width);
for my $line (@lines) {
- # Hide single/double spaces and single/double horizontal lines...
- $line =~ s/[^\s|+][ ][^\s|+]/***/g;
- $line =~ s{((?:\A|[^=_-]) [=_-]{1,2} (?:[^=_-]|\Z))}
- {'*' x length $1}egxms;
-
-
+ # Skip row separators...
+ next if $line =~ /^[\s\-=_+|]*$/;
$line .= q{ } x ($max_width - length $line);
-
- # Check each position for a column boundary character...
- my @char = split(//, $line);
- for my $pos (0..$#char) {
- my $char = $char[$pos];
- if ($char =~ m{[-=_ ]}) {
- $rivers{$pos}++;
- }
- elsif ($char =~ m{[|+]}) {
- $rivers{$pos}++;
- $is_visible{$pos} = 1;
+ my $lvec = $zvec;
+ # Mark column separators
+ while ($line =~ /(\s+[\s+|]\s+)/g) {
+ my $pos = pos($line);
+ for my $p ($pos - length($1) .. $pos - 1) {
+ vec($lvec, $p, 1) = 1;
}
}
+ # The result vector must match the line vector
+ $rvec &= $lvec;
}
-
- # Remove partial rivers...
- delete @rivers{grep { ($rivers{$_}||0) < @lines } keys %rivers};
-
- # Fill river positions with '1' (or '2' if a visible boundary)...
- my $template = '0' x $max_width;
- for my $pos (keys %rivers) {
- substr($template,$pos,1,$is_visible{$pos} ? 2 : 1);
- }
-
- # Rivers with visible boundaries are only rivers in the visible bits...
- $template =~ s{ (1*)(2+)(1*) }{ 0 x length($1) . 1 x length($2) . 0 x length($3) }egxms;
+ my $template = substr(join("", unpack("b*", $rvec)), 0, $max_width);
# Add any missing external boundaries...
my $prefix = $template =~ /^0/ ? 'A0' : q{};
--- t/table_header.t- 2007-06-13 12:58:24.000000000 +0200
+++ t/table_header.t 2014-10-23 13:51:58.000000000 +0200
@@ -76,12 +76,12 @@
'typename' => 'pod',
'content' => [
bless( {
- 'typename' => 'code',
+ 'typename' => 'para',
'content' => [
- ' Superpower'
+ 'Superpower'
],
'style' => 'implicit'
- }, 'Perl6::Perldoc::Block::code' )
+ }, 'Perl6::Perldoc::Block::para' )
],
'style' => 'implicit'
}, 'Perl6::Perldoc::Block::pod' )