Skip Menu |

This queue is for tickets about the App-CPANIDX CPAN distribution.

Report information
The Basics
Id: 56214
Status: resolved
Priority: 0/
Queue: App-CPANIDX

People
Owner: Nobody in particular
Requestors: APOCAL [...] cpan.org
Cc:
AdminCc:

Bug Information
Severity: (no value)
Broken in: 0.08
Fixed in: (no value)



Subject: cpanidx-gendb is slow when building the SQLite db
Hello, Supplied is a patch against App-CPANIDX 0.08 that speeds up the sqlite code by more than a factor of 10. cpan@smoker-master:~$ time cpanidx-gendb Fetching 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to '/home/cpan/.cpanidx' 01mailrc.txt.gz Downloaded 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to '/home/cpan/.cpanidx/01mailrc.txt.gz' Fetching 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to '/home/cpan/.cpanidx' 02packages.details.txt.gz Downloaded 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to '/home/cpan/.cpanidx/02packages.details.txt.gz' Populating auths ... DONE Populating dists and mods ... DONE --- - timestamp: 1270176612 real 2m34.818s user 0m8.570s sys 0m13.810s cpan@smoker-master:~$ time cpanidx-gendb-myver Fetching 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to '/home/cpan/.cpanidx' 01mailrc.txt.gz Downloaded 'ftp://192.168.0.200/CPAN/authors/01mailrc.txt.gz' to '/home/cpan/.cpanidx/01mailrc.txt.gz' Fetching 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to '/home/cpan/.cpanidx' 02packages.details.txt.gz Downloaded 'ftp://192.168.0.200/CPAN/modules/02packages.details.txt.gz' to '/home/cpan/.cpanidx/02packages.details.txt.gz' Populating auths ... DONE Populating dists and mods ... DONE --- - timestamp: 1270179165 real 0m8.849s user 0m8.090s sys 0m0.410s -- ~Apocalypse
Subject: cpanidx.diff
diff --git a/bin/cpanidx-gendb b/bin/cpanidx-gendb index f8fe4ff..8d5b29a 100755 --- a/bin/cpanidx-gendb +++ b/bin/cpanidx-gendb @@ -41,6 +41,7 @@ my $idxdir = _cpanidx_dir(); mkpath( $idxdir ) unless -d $idxdir; fetch_indexes($idxdir,$mirror,$mailrc_file,$packages_file); my $dbh = DBI->connect($dsn,$user,$pass); +$dbh->do(qq{PRAGMA synchronous = OFF}) or die $dbh->errstr; print "Populating auths ... "; populate_auths($dbh,$idxdir,$mailrc_file); print "DONE\nPopulating dists and mods ... "; @@ -89,16 +90,33 @@ sub populate_dists { } push @mods, [ $module, $d->dist, $d->version, $d->cpanid, $version ]; } - create_table( $handle, 'dists' ); + + $handle->begin_work; + + create_table( $handle, 'tmp_dists' ); foreach my $dist ( keys %dists ) { - my $sth = $handle->prepare_cached(qq{INSERT INTO dists values (?,?,?,?)}) or die $handle->errstr; + my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_dists values (?,?,?,?)}) or die $handle->errstr; $sth->execute( @{ $dists{ $dist } } ); } - create_table( $handle, 'mods' ); + create_table( $handle, 'tmp_mods' ); foreach my $mod ( @mods ) { - my $sth = $handle->prepare_cached(qq{INSERT INTO mods values (?,?,?,?,?)}) or die $handle->errstr; + my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_mods values (?,?,?,?,?)}) or die $handle->errstr; $sth->execute( @{ $mod } ); } + + $handle->do(qq{DROP TABLE IF EXISTS dists}) or die $handle->errstr; + $handle->do(qq{ALTER TABLE tmp_dists RENAME TO dists}) or die $handle->errstr; + $handle->do(qq{DROP TABLE IF EXISTS mods}) or die $handle->errstr; + $handle->do(qq{ALTER TABLE tmp_mods RENAME TO mods}) or die $handle->errstr; + + foreach my $table ( qw( dists mods ) ) { + foreach my $sql ( @{ App::CPANIDX::Tables->index( $table ) } ) { + $handle->do( $sql ) or die $handle->errstr; + } + } + + $handle->commit; + return 1; } @@ -114,32 +132,45 @@ sub populate_auths { my ($name, $email) = $long =~ /(.*) <(.+)>$/; push @auths, [ $pauseid, $name, $email ]; } - create_table( $handle, 'auths' ); + + $handle->begin_work; + + create_table( $handle, 'tmp_auths' ); foreach my $auth ( @auths ) { - my $sth = $handle->prepare_cached(qq{INSERT INTO auths values (?,?,?)}) or die $handle->errstr; + my $sth = $handle->prepare_cached(qq{INSERT INTO tmp_auths values (?,?,?)}) or die $handle->errstr; $sth->execute( @{ $auth } ) or die $handle->errstr; } + + $handle->do(qq{DROP TABLE IF EXISTS auths}) or die $handle->errstr; + $handle->do(qq{ALTER TABLE tmp_auths RENAME TO auths}) or die $handle->errstr; + + foreach my $sql ( @{ App::CPANIDX::Tables->index( 'auths' ) } ) { + $handle->do( $sql ) or die $handle->errstr; + } + + $handle->commit; + return 1; } sub fetch_indexes { -my ($location,$mirror,$mailrc,$packages) = @_; - -my $mailurl = URI->new($mirror); -my $packurl = URI->new($mirror); - -$mailurl->path_segments( ( grep { $_ } $mailurl->path_segments ), 'authors', $mailrc ); -$packurl->path_segments( ( grep { $_ } $packurl->path_segments ), 'modules', $packages ); - -foreach my $file ( $mailurl, $packurl ) { - my $url = $file->as_string; - print "Fetching '$url' to '$location'\n"; - my $ff = File::Fetch->new( uri => $url ); - print $ff->output_file, "\n"; - my $stat = $ff->fetch( to => $location ); - next unless $stat; - print "Downloaded '$url' to '$stat'\n"; -} + my ($location,$mirror,$mailrc,$packages) = @_; + + my $mailurl = URI->new($mirror); + my $packurl = URI->new($mirror); + + $mailurl->path_segments( ( grep { $_ } $mailurl->path_segments ), 'authors', $mailrc ); + $packurl->path_segments( ( grep { $_ } $packurl->path_segments ), 'modules', $packages ); + + foreach my $file ( $mailurl, $packurl ) { + my $url = $file->as_string; + print "Fetching '$url' to '$location'\n"; + my $ff = File::Fetch->new( uri => $url ); + print $ff->output_file, "\n"; + my $stat = $ff->fetch( to => $location ); + next unless $stat; + print "Downloaded '$url' to '$stat'\n"; + } } sub poll_server { diff --git a/lib/App/CPANIDX/Tables.pm b/lib/App/CPANIDX/Tables.pm index 13659ae..5d68718 100644 --- a/lib/App/CPANIDX/Tables.pm +++ b/lib/App/CPANIDX/Tables.pm @@ -30,6 +30,28 @@ my $tables = { ], }; +my $indexes = { + mods => [ + 'CREATE INDEX mods_mod_name ON mods ( mod_name )', + 'CREATE INDEX mods_dist_name ON mods ( dist_name )', + 'CREATE INDEX mods_dist_vers ON mods ( dist_vers )', + 'CREATE INDEX mods_all ON mods ( mod_name, dist_name, dist_vers )', + ], + dists => [ + 'CREATE INDEX dists_dist_name ON dists ( dist_name )', + 'CREATE INDEX dists_dist_vers ON dists ( dist_vers )', + 'CREATE INDEX dists_cpan_id ON dists ( cpan_id )', + 'CREATE INDEX dists_all ON dists ( dist_name, dist_vers )', + ], + auths => [ + 'CREATE INDEX auths_cpan_id ON auths ( cpan_id )', + ], +}; + +# make the temp mappings +foreach my $k ( qw( mods dists auths ) ) { + $tables->{ 'tmp_' . $k } = $tables->{ $k }; +} + sub table { return unless @_; my $table = shift; @@ -42,6 +64,15 @@ sub table { return $sql; } +sub index { + return unless @_; + my $table = shift; + $table = shift if $table->isa(__PACKAGE__); + return unless $table; + return unless exists $indexes->{ $table }; + return [ @{ $indexes->{ $table } } ]; +} + sub tables { return sort keys %{ $tables }; } @@ -78,6 +109,12 @@ Takes one argument, the name of a table to lookup. Returns a SQL statement that can be used to create the table. +=item C<index> + +Takes one argument, the name of a table to lookup. + +Returns an arrayref of SQL statements to create the indexes for the table. + =back =head1 AUTHOR
Updated version on CPAN contains this patch, thanks BinGOs! -- ~Apocalypse