Subject: | Unicode does not match |
Plucene does not appear to match unicode.
The following/attached snippet produces:
mcrawfor@inkey 13:31 $ perl plucene.pl
0 at plucene.pl line 40.
1 at plucene.pl line 49.
Which I feel exemplifies the problem.
Test code:
use Plucene;
use Plucene::Analysis::SimpleAnalyzer;
use Plucene::Index::Writer;
use Plucene::Search::IndexSearcher;
use Plucene::Document::Field;
use Plucene::Document;
use Plucene::Search::HitCollector;
use Plucene::QueryParser;
my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}";
my $norm = "foo";
my $analyzer = Plucene::Analysis::SimpleAnalyzer->new();
my $writer = Plucene::Index::Writer->new("uni.index", $analyzer, 1);
my $doc = Plucene::Document->new;
$doc->add(Plucene::Document::Field->Text(content => $uni));
$writer->add_document($doc);
$doc = Plucene::Document->new;
$doc->add(Plucene::Document::Field->Text(content => $norm));
$writer->add_document($doc);
undef $writer;
my $parser = Plucene::QueryParser->new({
analyzer => Plucene::Analysis::SimpleAnalyzer->new(),
default => "content"
});
my @docs;
my $query = $parser->parse($uni);
my $searcher = Plucene::Search::IndexSearcher->new("uni.index");
my $hc = Plucene::Search::HitCollector->new(collect => sub {
my ($self, $doc, $score) = @_;
push @docs, $doc;
});
$searcher->search_hc($query => $hc);
warn scalar @docs;
$query = $parser->parse($norm);
$hc = Plucene::Search::HitCollector->new(collect => sub {
my ($self, $doc, $score) = @_;
push @docs, $doc;
});
$searcher->search_hc($query => $hc);
warn scalar @docs;
Subject: | plucene.pl |
use Plucene;
use Plucene::Analysis::SimpleAnalyzer;
use Plucene::Index::Writer;
use Plucene::Search::IndexSearcher;
use Plucene::Document::Field;
use Plucene::Document;
use Plucene::Search::HitCollector;
use Plucene::QueryParser;
my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}";
my $norm = "foo";
my $analyzer = Plucene::Analysis::SimpleAnalyzer->new();
my $writer = Plucene::Index::Writer->new("uni.index", $analyzer, 1);
my $doc = Plucene::Document->new;
$doc->add(Plucene::Document::Field->Text(content => $uni));
$writer->add_document($doc);
$doc = Plucene::Document->new;
$doc->add(Plucene::Document::Field->Text(content => $norm));
$writer->add_document($doc);
undef $writer;
my $parser = Plucene::QueryParser->new({
analyzer => Plucene::Analysis::SimpleAnalyzer->new(),
default => "content"
});
my @docs;
my $query = $parser->parse($uni);
my $searcher = Plucene::Search::IndexSearcher->new("uni.index");
my $hc = Plucene::Search::HitCollector->new(collect => sub {
my ($self, $doc, $score) = @_;
push @docs, $doc;
});
$searcher->search_hc($query => $hc);
warn scalar @docs;
$query = $parser->parse($norm);
$hc = Plucene::Search::HitCollector->new(collect => sub {
my ($self, $doc, $score) = @_;
push @docs, $doc;
});
$searcher->search_hc($query => $hc);
warn scalar @docs;
#use Plucene::Simple;
#
#my $plucy = Plucene::Simple->open('uni.plu');
#
##my $uni = "\x{0E01}\x{0E09}\x{0E0C}\x{0E50}";
#my $uni = "\x{3028}\x{3063}\x{3057}\x{3024}";
#my $norm = "foo";
#
#$plucy->add(
# test => {
# content => $uni,
# },
# test2 => {
# content => $norm,
# }
#);
#
#warn $plucy->search($uni);
#warn $plucy->search($norm);
#
#