Subject: | Performance: where() called by fail() when backtracking |
The fail() method calls where() to extract the current line number and
the whole line where the error occurred. This information is needed for
showing an error message to the user, but most times the error raised by
fail() is captured and used for backtracking.
The fact that where() is called unnecessarily causes a performance hit.
There is a huge performance increase by postponing where() to when an
error message needs to be generated.
On a test program parsing a real program file of about 128K bytes, I got
the following elapsed times:
13.97 s in the original 0.07 version of the module
1.77 s with the attached modification
The patch also adds the file name to the error message, in case
from_file() was called. This seams to be the intent of the 'filename'
attribute, but it is not used. The xxerror.t tests this feature.
Please feel free to include the patch in a next version of the module,
or to do something completely different.
Best regards,
Paulo Custodio
Subject: | Parser-MGC-0.07_02.patch |
*** Parser-MGC-0.07_01\lib\Parser\MGC.pm 2011-03-19 14:20:16.000000000 +0000
--- Parser-MGC-0.07_02\lib\Parser\MGC.pm 2011-03-22 19:36:16.165379000 +0000
***************
*** 257,275 ****
my $pos = pos $self->{str};
my $str = $self->{str};
! my $sol = $pos;
! $sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/;
! $sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/;
!
! my $eol = $pos;
! $eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/;
!
! my $line = substr( $str, $sol, $eol - $sol );
!
! my $col = $pos - $sol;
! my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1;
!
! return ( $lineno, $col, $line );
}
=head2 $parser->fail( $message )
--- 257,263 ----
my $pos = pos $self->{str};
my $str = $self->{str};
! return _where($str, $pos);
}
=head2 $parser->fail( $message )
***************
*** 285,291 ****
my $self = shift;
my ( $message ) = @_;
! die Parser::MGC::Failure->new( $message, $self->where );
}
=head2 $eos = $parser->at_eos
--- 273,280 ----
my $self = shift;
my ( $message ) = @_;
! die Parser::MGC::Failure->new( $message, $self->{str}, pos($self->{str}),
! $self->{filename} );
}
=head2 $eos = $parser->at_eos
***************
*** 880,885 ****
--- 869,893 ----
return $kw;
}
+ sub _where {
+ my($str, $pos) = @_;
+
+ my $sol = $pos;
+ $sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/;
+ $sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/;
+
+ my $eol = $pos;
+ $eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/;
+
+ my $line = substr( $str, $sol, $eol - $sol );
+
+ my $col = $pos - $sol;
+ my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1;
+
+ return ( $lineno, $col, $line );
+ }
+
+
package # hide from indexer
Parser::MGC::Failure;
***************
*** 887,893 ****
{
my $class = shift;
my $self = bless {}, $class;
! @{$self}{qw( message linenum col text )} = @_;
return $self;
}
--- 895,901 ----
{
my $class = shift;
my $self = bless {}, $class;
! @{$self}{qw( message str pos filename )} = @_;
return $self;
}
***************
*** 895,910 ****
sub STRING
{
my $self = shift;
# Column number only counts characters. There may be tabs in there.
# Rather than trying to calculate the visual column number, just print the
# indentation as it stands.
! my $indent = substr( $self->{text}, 0, $self->{col} );
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace
! return "$self->{message} on line $self->{linenum} at:\n" .
! "$self->{text}\n" .
"$indent^\n";
}
--- 903,927 ----
sub STRING
{
my $self = shift;
+
+ # compute where only when generating the error message, not when fail()
+ # is called, as it is an expensive operation and not needed if we are just
+ # backtracking
+
+ my($linenum, $col, $text) = Parser::MGC::_where(@{$self}{qw( str pos )});
# Column number only counts characters. There may be tabs in there.
# Rather than trying to calculate the visual column number, just print the
# indentation as it stands.
! my $indent = substr( $text, 0, $col );
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace
! return "$self->{message} on ".
! (defined($self->{filename}) && ! ref($self->{filename})
! ? $self->{filename}." " : "").
! "line $linenum at:\n" .
! "$text\n" .
"$indent^\n";
}
Subject: | xxerror.t |
#!/usr/bin/perl -w
use strict;
use Test::More tests => 7;
package TestParser;
use base qw( Parser::MGC );
sub parse
{
my $self = shift;
return $self->token_int;
}
package main;
my $parser = TestParser->new;
isa_ok( $parser, "TestParser", '$parser' );
isa_ok( $parser, "Parser::MGC", '$parser' );
my $value = $parser->from_string( "\t123" );
is( $value, 123, '->from_string' );
eval { $parser->from_string( "\t123." ) };
is( $@, "Expected end of input on line 1 at:\n".
"\t123.\n".
"\t ^\n", 'error with no file' );
eval { $parser->from_file( \*DATA ) };
is( $@, "Expected end of input on line 1 at:\n".
" 123.\n".
" ^\n", 'error with glob file' );
my $tmpfile = "test_error.tmp";
ok( open(my $fh, ">", $tmpfile), "create $tmpfile" );
print $fh " 123.\n";
close $fh;
eval { $parser->from_file( $tmpfile ) };
is( $@, "Expected end of input on $tmpfile line 1 at:\n".
" 123.\n".
" ^\n", 'error with glob file' );
__DATA__
123.