Subject: | Slow operations for thousands of inserts on CSV file |
I think I've found a problem when doing thousands of inserts on a simple CSV file, because it slows down inserts at a horrific rate of 50-60 records/sec, while normal operation performs around 1500 records/sec.
OS, perl, DBI, DBD::File versions are not influent on this problem.
It appears from SQL::Statement v1.005 until current.
From what I've been able to track down, in open_tables(), the <CODE>$self->{all_cols}</CODE> array keeps growing undefinitely, slowing down more and more the insert operations (see attached patch for test case t/06allcols.t).
Patching Statement.pm as included solves the problem and keeps <CODE>$self->{all_cols}</CODE> array stable. The patch also seems to pass all tests.
Let me know...
--
Cosimo
diff -u -Naubr SQL-Statement-1.11_original/lib/SQL/Statement.pm SQL-Statement-1.11_patched/lib/SQL/Statement.pm
--- SQL-Statement-1.11_original/lib/SQL/Statement.pm Tue Mar 29 19:12:16 2005
+++ SQL-Statement-1.11_patched/lib/SQL/Statement.pm Fri Apr 15 13:21:35 2005
@@ -56,6 +56,7 @@
my $class = shift;
my $sql = shift;
my $flags = shift;
+
#
# IF USER DEFINED extend_csv IN SCRIPT
# USE THE ANYDATA DIALECT RATHER THAN THE CSV DIALECT
@@ -1443,11 +1444,24 @@
}
@c = ( @c, @newcols );
}
- my $all_cols = $self->{all_cols}
- || [ map {$_->{name} }@{$self->{columns}} ]
- || [];
- @$all_cols = (@$all_cols,@c);
+
+ # JZ
+ #my $all_cols = $self->{all_cols}
+ # || [ map {$_->{name} }@{$self->{columns}} ]
+ # || [];
+ #@$all_cols = (@$all_cols,@c);
+ #$self->{all_cols} = $all_cols;
+
+ # CS
+ my $all_cols = [];
+ if(!$self->{all_cols})
+ {
+ $all_cols = [ map {$_->{name}} @{$self->{columns}} ];
+ $all_cols ||= []; # ?
+ @$all_cols = (@$all_cols, @c);
$self->{all_cols} = $all_cols;
+ }
+
return SQL::Eval->new({'tables' => $t}), \@c;
}
@@ -1855,10 +1869,11 @@
my $self = shift;
my $col = shift;
if (!$self->{"columns"}) { return 0; }
- if (defined $col and $col =~ /^\d+$/) { # arg1 = a number
+ if (defined $col)
+ {
+ if ($col =~ /^\d+$/) { # arg1 = a number
return $self->{"columns"}->[$col];
}
- elsif (defined $col) { # arg1 = string
for my $c(@{$self->{"columns"}}) {
return $c if $c->name eq $col;
}
@@ -1869,7 +1884,6 @@
else { # no arg1,no array context
return scalar @{ $self->{"columns"} };
}
-
}
sub colname2colnum{
my $self = shift;
diff -u -Naubr SQL-Statement-1.11_original/t/06allcols.t SQL-Statement-1.11_patched/t/06allcols.t
--- SQL-Statement-1.11_original/t/06allcols.t Thu Jan 1 01:00:00 1970
+++ SQL-Statement-1.11_patched/t/06allcols.t Fri Apr 15 13:17:54 2005
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+$|=1;
+use strict;
+use Test::More;
+eval { require DBD::File; };
+if ($@) {
+ plan skip_all => "No DBD::File available";
+}
+else {
+ plan tests => 1;
+}
+use lib qw( ../lib );
+use SQL::Statement; printf "SQL::Statement v.%s\n", $SQL::Statement::VERSION;
+use DBI;
+use vars qw($dbh $sth $DEBUG);
+$dbh = DBI->connect('dbi:File(RaiseError=1):') or die "Can't create dbi:File connection";
+
+# Create a test table
+$dbh->do("CREATE TEMP TABLE allcols ( f1 char(10), f2 char(10) )");
+$sth = $dbh->prepare("INSERT INTO allcols (f1,f2) VALUES (?,?)") or die "Can't prepare insert sth";
+
+$sth->execute('abc', 'def');
+my $allcols_before = @{$sth->{f_stmt}->{all_cols}};
+diag('@all_cols before is '.$allcols_before);
+
+$sth->execute('abc', 'def') for 1 .. 100;
+my $allcols_after = @{$sth->{f_stmt}->{all_cols}};
+diag('@all_cols after is '.$allcols_after);
+
+ok( $allcols_before == $allcols_after, '->{all_cols} structure does not grow beyond control');
+
+$sth->finish();
+$dbh->disconnect();
+