Skip Menu |

This queue is for tickets about the Plucene CPAN distribution.

Report information
The Basics
Id: 18975
Status: new
Priority: 0/
Queue: Plucene

People
Owner: Nobody in particular
Requestors: mcrawfor [...] cpan.org
Cc:
AdminCc:

Bug Information
Severity: Important
Broken in: 1.24
Fixed in: (no value)



Subject: Unicode does not match
Plucene does not appear to match unicode. The following/attached snippet produces: mcrawfor@inkey 13:31 $ perl plucene.pl 0 at plucene.pl line 40. 1 at plucene.pl line 49. Which I feel exemplifies the problem. Test code: use Plucene; use Plucene::Analysis::SimpleAnalyzer; use Plucene::Index::Writer; use Plucene::Search::IndexSearcher; use Plucene::Document::Field; use Plucene::Document; use Plucene::Search::HitCollector; use Plucene::QueryParser; my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}"; my $norm = "foo"; my $analyzer = Plucene::Analysis::SimpleAnalyzer->new(); my $writer = Plucene::Index::Writer->new("uni.index", $analyzer, 1); my $doc = Plucene::Document->new; $doc->add(Plucene::Document::Field->Text(content => $uni)); $writer->add_document($doc); $doc = Plucene::Document->new; $doc->add(Plucene::Document::Field->Text(content => $norm)); $writer->add_document($doc); undef $writer; my $parser = Plucene::QueryParser->new({ analyzer => Plucene::Analysis::SimpleAnalyzer->new(), default => "content" }); my @docs; my $query = $parser->parse($uni); my $searcher = Plucene::Search::IndexSearcher->new("uni.index"); my $hc = Plucene::Search::HitCollector->new(collect => sub { my ($self, $doc, $score) = @_; push @docs, $doc; }); $searcher->search_hc($query => $hc); warn scalar @docs; $query = $parser->parse($norm); $hc = Plucene::Search::HitCollector->new(collect => sub { my ($self, $doc, $score) = @_; push @docs, $doc; }); $searcher->search_hc($query => $hc); warn scalar @docs;
Subject: plucene.pl
use Plucene; use Plucene::Analysis::SimpleAnalyzer; use Plucene::Index::Writer; use Plucene::Search::IndexSearcher; use Plucene::Document::Field; use Plucene::Document; use Plucene::Search::HitCollector; use Plucene::QueryParser; my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}"; my $norm = "foo"; my $analyzer = Plucene::Analysis::SimpleAnalyzer->new(); my $writer = Plucene::Index::Writer->new("uni.index", $analyzer, 1); my $doc = Plucene::Document->new; $doc->add(Plucene::Document::Field->Text(content => $uni)); $writer->add_document($doc); $doc = Plucene::Document->new; $doc->add(Plucene::Document::Field->Text(content => $norm)); $writer->add_document($doc); undef $writer; my $parser = Plucene::QueryParser->new({ analyzer => Plucene::Analysis::SimpleAnalyzer->new(), default => "content" }); my @docs; my $query = $parser->parse($uni); my $searcher = Plucene::Search::IndexSearcher->new("uni.index"); my $hc = Plucene::Search::HitCollector->new(collect => sub { my ($self, $doc, $score) = @_; push @docs, $doc; }); $searcher->search_hc($query => $hc); warn scalar @docs; $query = $parser->parse($norm); $hc = Plucene::Search::HitCollector->new(collect => sub { my ($self, $doc, $score) = @_; push @docs, $doc; }); $searcher->search_hc($query => $hc); warn scalar @docs; #use Plucene::Simple; # #my $plucy = Plucene::Simple->open('uni.plu'); # ##my $uni = "\x{0E01}\x{0E09}\x{0E0C}\x{0E50}"; #my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}"; #my $norm = "foo"; # #$plucy->add( # test => { # content => $uni, # }, # test2 => { # content => $norm, # } #); # #warn $plucy->search($uni); #warn $plucy->search($norm); # #