Subject: | a AI::Categorizer::Document->add_category($category) would be useful. |
hi,
given the situation:
m:n relation of document and category.
mean:
3 tables:
document(d_id, content)
category(c_id, c_name)
document_category(d_id, c_id)
iterating over a join of these 3 tables the following code seems to do
well ans looks nice: (simplyfied)
for $row (@rows) {
$c = AI::Categorizer::Category->by_name(...) unless exists;
$d = AI::Categorizer::Category->new(...) unless exists;
$c->add_document($d);
$d->add_category($c);
}
well, but there is no AI::Categorizer::Document->add_category($category)
so I wrote one. (see patch below)
the tests are all ok (except tests that require modules which are not
installed on my system).
also 2 tests I wrote (one using the conventional methode and the other
one using the new methode) yield the same result. (example.pl & example2.pl)
as far as I understand the source, it should work just fine.
but, please have a deeper look if you plan to integrate it.
kind regards,
andreas
patch: (also in patch.txt)
--- AI/Categorizer/Document.pm_original
2007-09-20 01:12:29.000000000 +0200
+++ AI/Categorizer/Document.pm
2007-09-20 01:18:04.000000000 +0200
@@ -147,6 +147,11 @@
sub name { $_[0]->{name} }
sub stopword_behavior { $_[0]->{stopword_behavior} }
+sub add_category {
+ my $self = shift;
+ $self->{categories}->insert( $_[0] );
+}
+
sub features {
my $self = shift;
if (@_) {
Subject: | example2.pl |
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use AI::Categorizer;
use AI::Categorizer::Collection::Files;
use AI::Categorizer::Learner::NaiveBayes;
my $category_class = AI::Categorizer::Category->by_name(name => "class");
my $category_not_class = AI::Categorizer::Category->by_name(name => "not_class");
my $doc1 = AI::Categorizer::Document->new
( content => "word1 word2 word3 word4 word9 word10");
# categories => [$category_class] );
my $doc2 = AI::Categorizer::Document->new
( content => "word1 word2 word3 word5 word8 word10");
# categories => [$category_class] );
my $doc3 = AI::Categorizer::Document->new
( content => "word2 word3 word4 word5 word6 word7");
# categories => [$category_not_class] );
my $doc4 = AI::Categorizer::Document->new
( content => "word1 word2 word4 word5 word6");
# categories => [$category_not_class] );
my $docX = AI::Categorizer::Document->new
( content => "word3 word4 word10" );
$doc1->add_category($category_class);
$doc2->add_category($category_class);
$doc3->add_category($category_not_class);
$doc4->add_category($category_not_class);
$category_class->add_document($doc1);
$category_class->add_document($doc2);
$category_not_class->add_document($doc3);
$category_not_class->add_document($doc4);
my $knowledge_set = AI::Categorizer::KnowledgeSet->new( verbose => 1,
categories => [$category_class, $category_not_class],
documents => [$doc1, $doc2, $doc3, $doc4]);
my $learner = AI::Categorizer::Learner::NaiveBayes->new( verbose => 1 );
$learner->train( knowledge_set => $knowledge_set );
my $hypothesis = $learner->categorize( $docX );
# print Dumper $learner;
# print Dumper $hypothesis;
print "For test document:\n".
" Best category = ".$hypothesis->best_category."\n".
" All categories = ". join(', ', $hypothesis->categories)."\n".
" Assigned scores: ". join(', ', $hypothesis->scores( $hypothesis->categories )). "\n";
Subject: | example.pl |
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use AI::Categorizer;
use AI::Categorizer::Collection::Files;
use AI::Categorizer::Learner::NaiveBayes;
# example2.pl
my $category_class = AI::Categorizer::Category->by_name(name => "class");
my $category_not_class = AI::Categorizer::Category->by_name(name => "not_class");
my $doc1 = AI::Categorizer::Document->new
( content => "word1 word2 word3 word4 word9 word10",
categories => [$category_class] );
my $doc2 = AI::Categorizer::Document->new
( content => "word1 word2 word3 word5 word8 word10",
categories => [$category_class] );
my $doc3 = AI::Categorizer::Document->new
( content => "word2 word3 word4 word5 word6 word7",
categories => [$category_not_class] );
my $doc4 = AI::Categorizer::Document->new
( content => "word1 word2 word4 word5 word6",
categories => [$category_not_class] );
my $docX = AI::Categorizer::Document->new
( content => "word3 word4 word10" );
$category_class->add_document($doc1);
$category_class->add_document($doc2);
$category_not_class->add_document($doc3);
$category_not_class->add_document($doc4);
my $knowledge_set = AI::Categorizer::KnowledgeSet->new( verbose => 1,
categories => [$category_class, $category_not_class],
documents => [$doc1, $doc2, $doc3, $doc4]);
my $learner = AI::Categorizer::Learner::NaiveBayes->new( verbose => 1 );
$learner->train( knowledge_set => $knowledge_set );
my $hypothesis = $learner->categorize( $docX );
# print Dumper $learner;
# print Dumper $hypothesis;
print "For test document:\n".
" Best category = ".$hypothesis->best_category."\n".
" All categories = ". join(', ', $hypothesis->categories)."\n".
" Assigned scores: ". join(', ', $hypothesis->scores( $hypothesis->categories )). "\n";
Subject: | patch.txt |
--- /usr/local/share/perl/5.8.8/AI/Categorizer/Document.pm_original 2007-09-20 01:12:29.000000000 +0200
+++ /usr/local/share/perl/5.8.8/AI/Categorizer/Document.pm 2007-09-20 01:18:04.000000000 +0200
@@ -147,6 +147,11 @@
sub name { $_[0]->{name} }
sub stopword_behavior { $_[0]->{stopword_behavior} }
+sub add_category {
+ my $self = shift;
+ $self->{categories}->insert( $_[0] );
+}
+
sub features {
my $self = shift;
if (@_) {