Subject: | No reindexing after update objects, previosly stored in BerkeleyDB. |
perl version: 5.8.8
os version: Linux fireball 2.6.18-6-686 #1 SMP Tue Jun 17 21:31:27 UTC
2008 i686 GNU/Linux
KiokuDB version: 0.27
KiokiDB::Backend::BDB version: 0.14
BerkeleyDB (perl package) version: 0.38
BerkeleyDB version: 4.7.25
Each time i update object, secondary keys only inserts into secondary
table, but previos keys wan't delete.
It happens because on update data BerkeleyDB calls callback function
twice - 1st time to determinate what secondary keys has new data, and
2nd - to determinate what secondary keys has data currently stored in db
(to remove old-data keys).
In current package implementation callback function has 2 bugs:
1. It cleares temp keys on each call
2. It wan't watch on data, given to function and watch just on id.
How to reproduce: execute attached test script.
Patch (suggestion) fixing this bug atatched. It also fixes bugs,
reported in https://rt.cpan.org/Ticket/Display.html?id=46377 and
https://rt.cpan.org/Ticket/Display.html?id=46370
Subject: | KiokuDB-Backend-DBD-GIN-patch2.pm |
#!/usr/bin/perl
package KiokuDB::Backend::BDB::GIN;
use Moose;
use Data::Stream::Bulk::Util qw(cat);
use BerkeleyDB;
use namespace::clean -except => 'meta';
extends qw(KiokuDB::Backend::BDB);
with qw(
KiokuDB::Backend::Role::Query::GIN
Search::GIN::Extract::Delegate
);
sub BUILD { shift->secondary_db } # early
has secondary_db => (
is => "ro",
lazy_build => 1,
);
sub _build_secondary_db {
my $self = shift;
$self->_open_secondary( name => "secondary", file => "gin_index" );
}
has root_only => (
isa => "Bool",
is => "ro",
default => 0,
);
has '+extract' => (
required => 0,
);
use Time::HiRes qw/time/;
sub _open_secondary {
my ( $self, @args ) = @_;
my $secondary = $self->manager->open_db( dup => 1, dupsort => 1, @args );
$self->manager->associate(
secondary => $secondary,
primary => $self->primary_db,
callback => sub {
my $id = shift;
my $data = shift;
my ($entry, $keys_count, $keys) = unpack 'N/a*Ca*', $data;
if ($keys_count){
my @keys = unpack 'Z*' x $keys_count, $keys;
return \@keys;
} else {
return [];
}
}
);
return $secondary;
}
before insert => sub {
my ( $self, @entries ) = @_;
if ( $self->extract ) {
foreach my $entry ( @entries ) {
if ( $entry->deleted || !$entry->has_object || ( !$entry->root && $self->root_only ) ) {
$entry->clear_backend_data;
} else {
my $entry_bd = $entry->backend_data || $entry->backend_data({});
my @keys = $self->extract_values( $entry->object, entry => $entry );
$entry_bd->{keys} = \@keys;
}
}
}
};
# possible there is a better way to write different Serializer,
# instead of use around modifilers
around serialize => sub
{
my ($serializer, $self, $entry) = @_;
my $version = $entry->backend_data->{version};
my $serialized = $serializer->($self, $entry);
my @keys = @{$entry->backend_data->{keys} || []};
my $keys_count = scalar @keys;
my $mask = 'N/a*C' . 'Z*' x $keys_count;
return pack($mask, $serialized, $keys_count, @keys);
};
around deserialize => sub
{
my ($deserializer, $self, $data) = @_;
my ($serialized) = unpack 'N/a*', $data;
return $deserializer->($self, $serialized);
};
sub search
{
my ( $self, $query, @args ) = @_;
my %args = (
distinct => $self->distinct,
@args,
);
my $live_objects = $args{live_objects};
my %spec = $query->extract_values($self);
my @cursors;
foreach my $key (@{ $spec{values} }){
my $cur = $self->secondary_db->db_cursor;
my $data = '';
my $ret = $cur->c_get($key, $data, DB_SET);
if ($ret == DB_NOTFOUND){
return new Data::Stream::Bulk::Nil;
}
confess "Can't join tables: $ret" unless $ret == 0;
push @cursors, $cur;
}
my $cursor = scalar @cursors
? $self->primary_db->db_join(\@cursors)
: $self->secondary_db->db_cursor;
my $flags = scalar @cursors ? 0 : DB_NEXT;
my @result;
return new Data::Stream::Bulk::Callback callback => sub
{
my ($id, $data) = ('', '');
my $ret = $cursor->c_get($id, $data, $flags);
if ($ret == 0){
my ($entry) = $live_objects->ids_to_entries($id);
unless (defined $entry){
$entry = $self->deserialize($data);
}
return [$entry];
} elsif ($ret == DB_NOTFOUND){
$cursor->c_close;
return;
} else {
throw LGame::Err "Error while fetching db: $ret";
}
}
}
sub fetch_entry { die "TODO" }
sub remove_ids {
my ( $self, @ids ) = @_;
die "Deletion the GIN index is handled implicitly by BDB";
}
sub insert_entry {
my ( $self, $id, @keys ) = @_;
die "Insertion to the GIN index is handled implicitly by BDB";
}
__PACKAGE__->meta->make_immutable;
__PACKAGE__
# documentation removed, this file is just a suggestion
Subject: | kioku-test-update.pl |
{
package App::Object;
use Moose;
has indexed_field => (
is => 'rw',
isa => 'Int'
);
__PACKAGE__->meta->make_immutable;
1;
}
package main;
use KiokuDB;
use Search::GIN::Extract::Callback;
use Search::GIN::Query::Manual;
use Test::Simple tests => 2;
my $dir = KiokuDB->connect(
"bdb-gin:dir=/home/self/tmp/db/",
create => 1,
extract => Search::GIN::Extract::Callback->new(extract =>
sub {
my ( $object, $extractor, @args) = @_;
my $index = { class => ref $object };
if ( $object->isa('App::Object') ){
$index->{"indexed_field"} = $object->indexed_field;
for (1..10){
$index->{"indexed_field$_"} = $object->indexed_field;
}
}
return $index;
})
);
my $id;
{
# create new object
my $scope = $dir->new_scope;
my $obj = App::Object->new(indexed_field => 111);
$id = $dir->store($obj);
}
{
# change field value, should be reindexed
my $scope = $dir->new_scope;
my $obj = $dir->lookup($id);
$obj->indexed_field(123);
$dir->update($obj);
}
{
my $scope = $dir->new_scope;
my $new_query = Search::GIN::Query::Manual->new(
values => {
class => 'App::Object',
indexed_field => 123
},
);
my @new_result = $dir->search($new_query)->all;
ok @new_result == 1, 'New index key exists';
my $old_query = Search::GIN::Query::Manual->new(
values => {
class => 'App::Object',
indexed_field => 111
},
);
my @old_result = $dir->search($old_query)->all;
ok @old_result == 0, 'Old index key removed';
}