Subject: | PATCH: Improve performance by 25%, and docs as well. |
Using the attached benchmarking script, I compared the basic overhead of
using DBD::Multi for selects. The results were poor on my setup... about
212% slower, or a difference of 4,000 fewer SELECTs/second.
By using Devel::DProf, I was able to make a number of optimizations,
bringing the difference down to... 187% slower. The summary of the
changes include:
- using direct hash access instead of accessors
- caching sorts that never change.
These changes pass the whole regression suite. I also added a number
blocks of "private" comments about the internal functions and data
structures.
Further optimization could possibly be made if calls to FETCH could be
replaced by using an internal object accesses. I wasn't sure about
what's possible there, while meeting the spec for what it means to be a
DBD driver.
I'm attaching the "diff" of my changes, which may need the other patches
I've previously submitted to work.
I'm also attaching the complete file in case that's helpful.
Overall, I realize that DBD::Multi can still over significant benefit
when the performance of the database is truly the bottleneck, and not
how fast Perl's access to it is.
Still, this research indicates that use of DBD::Multi is best limited to
places that are truly identified as "hot spots". Otherwise, the use can
be counter-productive.
Mark
Subject: | dbd_multi_overhead.pl |
=head
The purpose of this script is to see how much overhead
is added by using DBD::Multi vs. a direct connection
=cut
use strict;
use Benchmark ':all';
use DBI;
use DBD::Multi;
print "testing with DBD::Multi $DBD::Multi::VERSION\n";
my $raw_dbh = DBI->connect("dbi:Pg:dbname=saveapet_dev");
my $multi_dbh = DBI->connect('dbi:Multi:', undef, undef, {
dsns => [ 10 => $raw_dbh ]
});
cmpthese(10_000, {
raw => sub { $raw_dbh->selectrow_array("SELECT CURRENT_DATE") },
multi => sub { $multi_dbh->selectrow_array("SELECT CURRENT_DATE") },
});
Subject: | Multi.pm |
Message body is not shown because it is too large.
Subject: | dbd-multi-optimization.patch |
--- old-saveapet-1/perllib/DBD/Multi.pm 2007-01-19 17:02:09.000000000 -0500
+++ new-saveapet-1/perllib/DBD/Multi.pm 2007-01-19 17:02:09.000000000 -0500
@@ -6,7 +6,7 @@
use vars qw[$VERSION $err $errstr $sqlstate $drh];
-$VERSION = '0.03';
+$VERSION = '0.10';
$err = 0; # DBI::err
$errstr = ""; # DBI::errstr
@@ -34,7 +34,10 @@
package DBD::Multi::dr;
use strict;
+no warnings;
$DBD::Multi::dr::imp_data_size = 0;
+use warnings;
+
use DBD::File;
use base qw[DBD::File::dr];
@@ -56,9 +59,9 @@
my $handler = DBD::Multi::Handler->new({
dsources => [ @dsns ],
});
- $handler->failed_max($attr->{failed_max})
+ $handler->{failed_max} = $attr->{failed_max}
if exists $attr->{failed_max};
- $handler->failed_expire($attr->{failed_expire})
+ $handler->{failed_expire} = $attr->{failed_expire}
if exists $attr->{failed_expire};
$dbh->STORE(_handler => $handler);
@@ -74,7 +77,10 @@
package DBD::Multi::db;
use strict;
+no warnings;
$DBD::Multi::db::imp_data_size = 0;
+use warnings;
+
use base qw[DBD::File::db];
sub prepare {
@@ -142,7 +148,10 @@
package DBD::Multi::st;
use strict;
+no warnings;
$DBD::Multi::st::imp_data_size = 0;
+use warnings;
+
use base qw[DBD::File::st];
use vars qw[@METHODS @FIELDS];
@@ -218,54 +227,74 @@
package DBD::Multi::Handler;
use strict;
-use base qw[Class::Accessor::Fast];
+sub new {
+ my ($class, $args) = @_;
+ my $self = $args;
-__PACKAGE__->mk_accessors(qw[
- dsources
- nextid
- all_dsources
- current_dsource
- used
- failed
- failed_last
- failed_max
- failed_expire
-]);
+ bless $self, $class;
-sub new {
- my ($class, $args) = @_;
- my $self = $class->SUPER::new($args);
- $self->nextid(0) unless defined $self->nextid;
- $self->all_dsources({});
- $self->used({});
- $self->failed({});
- $self->failed_last({});
- $self->failed_max(3) unless defined $self->failed_max;
- $self->failed_expire(60*5) unless defined $self->failed_expire;
+ $self->{nextid} = 0 unless defined $self->{nextid};
+ $self->{all_dsources} = {};
+ $self->{used} = {};
+ $self->{failed} = {};
+ $self->{failed_last} = {};
+ $self->{failed_max} = 3 unless defined $self->{failed_max};
+ $self->{failed_expire} = 60*5 unless defined $self->{failed_expire};
$self->_configure_dsources;
+
return $self;
}
sub all_sources {
my ($self) = @_;
- return values %{$self->all_dsources};
+ return values %{ $self->{all_dsources} };
}
+=begin private
+
+=head2 add_to_pri
+
+ $self->add_to_pri($priority, $data_source);
+
+Given a $priority, and a $data_source, updates internal data structures
+for later use.
+
+C<all_dsources> is a hashref with a structure that that looks like this:
+
+ '0' => $first_raw_data_source,
+ '1' => $second_raw_data_source,
+
+C<dsources> provides an alternate lookup by priority. It has a structure, like
+this:
+
+ # The top level keys are priorities
+ '10' => {
+ # Second level keys correspond to all_sources keys
+ # The values are always just "1".
+ '1' => 1,
+ '0' => 1
+ },
+ '20' => {
+ '2' => 1
+ }
+
+=end private
+
+=cut
+
sub add_to_pri {
my ($self, $pri, $dsource) = @_;
- my $dsource_id = $self->nextid;
- my $dsources = $self->dsources;
- my $all = $self->all_dsources;
+ my $dsource_id = $self->{nextid};
- $all->{$dsource_id} = $dsource;
- $dsources->{$pri}->{$dsource_id} = 1;
+ $self->{all_dsources}->{$dsource_id} = $dsource;
+ $self->{dsources}->{$pri}->{$dsource_id} = 1;
- $self->nextid($dsource_id + 1);
+ $self->{nextid} = $dsource_id + 1;
}
sub dbh {
my $self = shift;
- my $dbh = $self->_connect_dsource;
+ my $dbh = $self->connect_dsource;
return $dbh if $dbh;
$self->dbh_failed;
$self->dbh;
@@ -274,21 +303,34 @@
sub dbh_failed {
my ($self) = @_;
- my $current_dsource = $self->current_dsource;
- $self->failed->{$current_dsource}++;
- $self->failed_last->{$current_dsource} = time;
+ my $current_dsource = $self->{current_dsource};
+ $self->{failed}->{$current_dsource}++;
+ $self->{failed_last}->{$current_dsource} = time;
}
+
+=begin private
+
+=head2 _purge_old_failures
+
+ $self->_purge_old_failures
+
+Allow sources to be considered for use again if enough time as passed.
+
+=end private
+
+=cut
+
sub _purge_old_failures {
my ($self) = @_;
my $now = time;
- my @all = keys %{$self->all_dsources};
+ my @all = keys %{$self->{all_dsources} };
- foreach my $dsource ( @all ) {
- next unless $self->failed->{$dsource};
- if ( ($now - $self->failed_last->{$dsource}) > $self->failed_expire ) {
- delete $self->failed->{$dsource};
- delete $self->failed_last->{$dsource};
+ for my $dsource ( @all ) {
+ next unless $self->{failed}->{$dsource};
+ if ( ($now - $self->{failed_last}->{$dsource}) > $self->{failed_expire} ) {
+ delete $self->{failed}->{$dsource};
+ delete $self->{failed_last}->{$dsource};
}
}
}
@@ -296,62 +338,110 @@
sub _pick_dsource {
my ($self) = @_;
$self->_purge_old_failures;
- my $dsources = $self->dsources;
- my @pri = sort { $a <=> $b } keys %{$dsources};
- foreach my $pri ( @pri ) {
- my $dsource = $self->_pick_pri_dsource($dsources->{$pri});
+ my $dsources = $self->{dsources};
+
+ for my $pri ( @{ $self->{sorted_pris} } ) {
+ my $dsource = $self->_pick_pri_dsource($pri,$dsources->{$pri});
if ( defined $dsource ) {
- $self->current_dsource($dsource);
+ $self->{current_dsource} = $dsource;
return;
}
}
- $self->used({});
+ $self->{used} = {};
return $self->_pick_dsource
- if (grep {$self->failed->{$_} >= $self->failed_max} keys(%{$self->failed})) < keys(%{$self->all_dsources});
+ if (grep {$self->{failed}->{$_} >= $self->{failed_max}} keys(%{$self->{failed}})) < keys(%{$self->{all_dsources}});
die("All data sources failed!");
}
+=begin private
+
+=head2 _pick_pri_dsource
+
+ my $dsource = $self->_pick_pri_dsource($pri,$dsources->{$pri});
+
+Given 1 or more data sources with the same priority, return
+one that is available, or undef if they have all failed.
+
+Args:
+ - priority
+ - hashref that maps integer keys from the all_dsources hash to '1'.
+
+See L<add_to_pri()> for more data structure docs.
+
+=end private
+
+=cut
+
sub _pick_pri_dsource {
- my ($self, $dsources) = @_;
- my @dsources = sort { $a <=> $b } keys %{$dsources};
- my @used = grep { exists $self->used->{$_} } @dsources;
- my @failed = grep { exists($self->failed->{$_}) && $self->failed->{$_} >= $self->failed_max } @dsources;
+ my ($self, $pri, $dsources) = @_;
+
+ my $cached_dsources = $self->{dsources_sorted_by_pri}{$pri};
+
+ my @used = grep { exists $self->{used}->{$_} } @$cached_dsources;
+ my @failed = grep { exists($self->{failed}->{$_}) && $self->{failed}->{$_} >= $self->{failed_max} } @$cached_dsources;
# We've used them all and they all failed. Escallate.
- return if @used == @dsources && @failed == @dsources;
+ return if @used == @$cached_dsources && @failed == @$cached_dsources;
# We've used them all but some are good. Purge and reuse.
- delete @{$self->used}{@dsources} if @used == @dsources;
+ delete @{$self->{used} }{@$cached_dsources} if @used == @$cached_dsources;
- foreach my $dsource ( @dsources ) {
- next if $self->failed->{$dsource}
- && $self->failed->{$dsource} >= $self->failed_max;
- next if $self->used->{$dsource};
+ for my $dsource ( @$cached_dsources ) {
+ next if $self->{failed}->{$dsource}
+ && $self->{failed}->{$dsource} >= $self->{failed_max};
+ next if $self->{used}->{$dsource};
- $self->used->{$dsource} = 1;
+ $self->{used}->{$dsource} = 1;
return $dsource;
}
- return;
+ return undef;
}
+=begin private
+
+=head2 _configure_dsources
+
+ $self->_configure_dsources;
+
+Set up internal tracking of data sources. This is meant
+to be called just once as part of new().
+
+
+=end private
+
+=cut
+
sub _configure_dsources {
my ($self) = @_;
- my $dsources = $self->dsources;
- $self->dsources({});
+ # XXX Here, 'dsources' refers to the 'dsns' array that the user provides.
+ # However, here it is reset and used in a different way, documented in add_to_pri
+ my $dsources = $self->{dsources};
+ $self->{dsources} = {};
while ( my $pri = shift @{$dsources} ) {
my $dsource = shift @{$dsources} or last;
$self->add_to_pri($pri => $dsource);
}
+
+
+ # Cache these, since $dsources doesn't change over the object lifetime.
+ $self->{sorted_pris} = [ sort { $a <=> $b } keys %{$self->{dsources} } ];
+
+ for my $pri (@{ $self->{sorted_pris} }) {
+ $self->{dsources_sorted_by_pri}{$pri} = [ sort { $a <=> $b } keys %{ $self->{dsources}{$pri} } ];
+ }
+
+
+
}
-sub _connect_dsource {
+sub connect_dsource {
my ($self, $dsource) = @_;
unless ( $dsource ) {
$self->_pick_dsource;
- $dsource = $self->all_dsources->{$self->current_dsource};
+ $dsource = $self->{all_dsources}->{ $self->{current_dsource} };
}
# Support ready-made handles
@@ -364,15 +454,10 @@
return $dbh;
}
-sub connect_dsource {
- my ($self, $dsource) = @_;
- $self->_connect_dsource($dsource);
-}
-
sub multi_do_all {
my ($self, $code) = @_;
- my @all = values %{$self->all_dsources};
+ my @all = values %{ $self->{all_dsources} };
foreach my $source ( @all ) {
my $dbh = $self->connect_dsource($source);