Subject: | cpanidx-gendb is slow when building the SQLite db |
Hello,
Supplied is a patch against App-CPANIDX 0.08 that speeds up the sqlite
code by more than a factor of 10.
cpan@smoker-master:~$ time cpanidx-gendb
Fetching 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to
'/home/cpan/.cpanidx'
01mailrc.txt.gz
Downloaded 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to
'/home/cpan/.cpanidx/01mailrc.txt.gz'
Fetching 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to
'/home/cpan/.cpanidx'
02packages.details.txt.gz
Downloaded 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz'
to '/home/cpan/.cpanidx/02packages.details.txt.gz'
Populating auths ... DONE
Populating dists and mods ... DONE
---
-
timestamp: 1270176612
real 2m34.818s
user 0m8.570s
sys 0m13.810s
cpan@smoker-master:~$ time cpanidx-gendb-myver
Fetching 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to
'/home/cpan/.cpanidx'
01mailrc.txt.gz
Downloaded 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to
'/home/cpan/.cpanidx/01mailrc.txt.gz'
Fetching 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to
'/home/cpan/.cpanidx'
02packages.details.txt.gz
Downloaded 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz'
to '/home/cpan/.cpanidx/02packages.details.txt.gz'
Populating auths ... DONE
Populating dists and mods ... DONE
---
-
timestamp: 1270179165
real 0m8.849s
user 0m8.090s
sys 0m0.410s
--
~Apocalypse
Subject: | cpanidx.diff |
diff --git a/bin/cpanidx-gendb b/bin/cpanidx-gendb
index f8fe4ff..8d5b29a 100755
--- a/bin/cpanidx-gendb
+++ b/bin/cpanidx-gendb
@@ -41,6 +41,7 @@ my $idxdir = _cpanidx_dir();
mkpath( $idxdir ) unless -d $idxdir;
fetch_indexes($idxdir,$mirror,$mailrc_file,$packages_file);
my $dbh = DBI->connect($dsn,$user,$pass);
+$dbh->do(qq{PRAGMA synchronous = OFF}) or die $dbh->errstr;
print "Populating auths ... ";
populate_auths($dbh,$idxdir,$mailrc_file);
print "DONE\nPopulating dists and mods ... ";
@@ -89,16 +90,33 @@ sub populate_dists {
}
push @mods, [ $module, $d->dist, $d->version, $d->cpanid, $version ];
}
- create_table( $handle, 'dists' );
+
+ $handle->begin_work;
+
+ create_table( $handle, 'tmp_dists' );
foreach my $dist ( keys %dists ) {
- my $sth = $handle->prepare_cached(qq{INSERT INTO dists values (?,?,?,?)}) or die $handle->errstr;
+ my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_dists values (?,?,?,?)}) or die $handle->errstr;
$sth->execute( @{ $dists{ $dist } } );
}
- create_table( $handle, 'mods' );
+ create_table( $handle, 'tmp_mods' );
foreach my $mod ( @mods ) {
- my $sth = $handle->prepare_cached(qq{INSERT INTO mods values (?,?,?,?,?)}) or die $handle->errstr;
+ my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_mods values (?,?,?,?,?)}) or die $handle->errstr;
$sth->execute( @{ $mod } );
}
+
+ $handle->do(qq{DROP TABLE IF EXISTS dists}) or die $handle->errstr;
+ $handle->do(qq{ALTER TABLE tmp_dists RENAME TO dists}) or die $handle->errstr;
+ $handle->do(qq{DROP TABLE IF EXISTS mods}) or die $handle->errstr;
+ $handle->do(qq{ALTER TABLE tmp_mods RENAME TO mods}) or die $handle->errstr;
+
+ foreach my $table ( qw( dists mods ) ) {
+ foreach my $sql ( @{ App::CPANIDX::Tables->index( $table ) } ) {
+ $handle->do( $sql ) or die $handle->errstr;
+ }
+ }
+
+ $handle->commit;
+
return 1;
}
@@ -114,32 +132,45 @@ sub populate_auths {
my ($name, $email) = $long =~ /(.*) <(.+)>$/;
push @auths, [ $pauseid, $name, $email ];
}
- create_table( $handle, 'auths' );
+
+ $handle->begin_work;
+
+ create_table( $handle, 'tmp_auths' );
foreach my $auth ( @auths ) {
- my $sth = $handle->prepare_cached(qq{INSERT INTO auths values (?,?,?)}) or die $handle->errstr;
+ my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_auths values (?,?,?)}) or die $handle->errstr;
$sth->execute( @{ $auth } ) or die $handle->errstr;
}
+
+ $handle->do(qq{DROP TABLE IF EXISTS auths}) or die $handle->errstr;
+ $handle->do(qq{ALTER TABLE tmp_auths RENAME TO auths}) or die $handle->errstr;
+
+ foreach my $sql ( @{ App::CPANIDX::Tables->index( 'auths' ) } ) {
+ $handle->do( $sql ) or die $handle->errstr;
+ }
+
+ $handle->commit;
+
return 1;
}
sub fetch_indexes {
-my ($location,$mirror,$mailrc,$packages) = @_;
-
-my $mailurl = URI->new($mirror);
-my $packurl = URI->new($mirror);
-
-$mailurl->path_segments( ( grep { $_ } $mailurl->path_segments ), 'authors', $mailrc );
-$packurl->path_segments( ( grep { $_ } $packurl->path_segments ), 'modules', $packages );
-
-foreach my $file ( $mailurl, $packurl ) {
- my $url = $file->as_string;
- print "Fetching '$url' to '$location'\n";
- my $ff = File::Fetch->new( uri => $url );
- print $ff->output_file, "\n";
- my $stat = $ff->fetch( to => $location );
- next unless $stat;
- print "Downloaded '$url' to '$stat'\n";
-}
+ my ($location,$mirror,$mailrc,$packages) = @_;
+
+ my $mailurl = URI->new($mirror);
+ my $packurl = URI->new($mirror);
+
+ $mailurl->path_segments( ( grep { $_ } $mailurl->path_segments ), 'authors', $mailrc );
+ $packurl->path_segments( ( grep { $_ } $packurl->path_segments ), 'modules', $packages );
+
+ foreach my $file ( $mailurl, $packurl ) {
+ my $url = $file->as_string;
+ print "Fetching '$url' to '$location'\n";
+ my $ff = File::Fetch->new( uri => $url );
+ print $ff->output_file, "\n";
+ my $stat = $ff->fetch( to => $location );
+ next unless $stat;
+ print "Downloaded '$url' to '$stat'\n";
+ }
}
sub poll_server {
diff --git a/lib/App/CPANIDX/Tables.pm b/lib/App/CPANIDX/Tables.pm
index 13659ae..5d68718 100644
--- a/lib/App/CPANIDX/Tables.pm
+++ b/lib/App/CPANIDX/Tables.pm
@@ -30,6 +30,28 @@ my $tables = {
],
};
+my $indexes = {
+ mods => [
+ 'CREATE INDEX mods_mod_name ON mods ( mod_name )',
+ 'CREATE INDEX mods_dist_name ON mods ( dist_name )',
+ 'CREATE INDEX mods_dist_vers ON mods ( dist_vers )',
+ 'CREATE INDEX mods_all ON mods ( mod_name, dist_name, dist_vers )',
+ ],
+ dists => [
+ 'CREATE INDEX dists_dist_name ON dists ( dist_name )',
+ 'CREATE INDEX dists_dist_vers ON dists ( dist_vers )',
+ 'CREATE INDEX dists_cpan_id ON dists ( cpan_id )',
+ 'CREATE INDEX dists_all ON dists ( dist_name, dist_vers )',
+ ],
+ auths => [
+ 'CREATE INDEX auths_cpan_id ON auths ( cpan_id )',
+ ],
+};
+
+# make the temp mappings
+foreach my $k ( qw( mods dists auths ) ) {
+ $tables->{ 'tmp_' . $k } = $tables->{ $k };
+}
+
sub table {
return unless @_;
my $table = shift;
@@ -42,6 +64,15 @@ sub table {
return $sql;
}
+sub index {
+ return unless @_;
+ my $table = shift;
+ $table = shift if $table->isa(__PACKAGE__);
+ return unless $table;
+ return unless exists $indexes->{ $table };
+ return [ @{ $indexes->{ $table } } ];
+}
+
sub tables {
return sort keys %{ $tables };
}
@@ -78,6 +109,12 @@ Takes one argument, the name of a table to lookup.
Returns a SQL statement that can be used to create the table.
+=item C<index>
+
+Takes one argument, the name of a table to lookup.
+
+Returns an arrayref of SQL statements to create the indexes for the table.
+
=back
=head1 AUTHOR