Subject: | Two bugs in "_group_set_bysubject" |
Two bugs found in Mail::Thread's "_group_set_bysubject". I've attached a patch with my "fixes".
1: "_group_set_bysubject" would crash when all messages in a thread didn't have the same subject. This happened because when populating a hash with subjects, only the "topmost" subject for each container in the rootset was included. This lead to "_group_set_bysubject" trying to work on undefined objects because it fetches objects from the poorly populated subject hash. My fix is simply a recursive collection of *all* subjects into the hash instead of just the topmost ones. To do this, I created Mail::Thread::Container->_populate_hash_with_subjects() wich expects a hash reference as parameter.
2: "_group_set_bysubject" sometimes misplaces containers. This happens when it is "removing the "second" message from the root set". I really don't know why this happens, so I put an ugly workaround int the code. Actually fixing the bug would of course be better, but I don't know what it is.
3: Not a bug at all. Just a change that suits me. I've made Mail::Thread::Container->simple_subject() be a bit more brutal in order to be able to thread by subject for mailing lists and messges created by software where the "Re:" has been localized (for example, some swedish translations of Lookout Express uses "Sv:" instead of "Re:".
Notes:
I started programming perl just a few months ago. My fixes seems to work for me, but I might well have introduced memory leaks in my workaround for bug 2 above. I haven't really come to grips with when perls GC works and when it doesn't yet.
A, yes. Including info that should be included:
Distribution: Mail::Thread 2.41 installed from CPAN
Perl Version: 5.8.2
OS: FreeBSD 4.9 RELEASE, Generic i386
Regards
/Jonas Eckerman
--- Thread.pm Sun Jan 25 23:08:32 2004
+++ Thread.pm.jonas Sun Jan 25 23:08:21 2004
@@ -66,32 +66,45 @@
my $self = shift;
my $root = $self->_container_class->new( 'fakeroot' );
$root->set_children( $self->rootset );
-
my %subject;
- for (my $walk = $root->child; $walk; $walk = $walk->next) {
- my $sub = $walk->topmost->simple_subject or next;
- # Add this container to the hash if:
- # - There is no container in the hash with this subject, or
- # - This one is a dummy container and the old one is not: the dummy
- # one is more interesting as a root, so put it in the hash instead.
- # - The container in the table has a "Re:" version of this subject,
- # and this container has a non-"Re:" version of this subject.
- # The non-re version is the more interesting of the two.
-
- my $old = $subject{$sub};
- if (!$old ||
- (!$walk->message && !$old->message) ||
- ($old->message && $old->isreply &&
- $walk->message && !$walk->isreply)) {
- $subject{$sub} = $walk;
- }
- }
+
+ # Poupulate the hash with *all* subjects!
+ # /Jonas Eckerman, 2004-01-25
+ $root->_populate_hash_with_subjects(\%subject);
+
+ # This code took for granted that all posts in the same thread
+ # has the same subject. It didn't live in reality!
+ # /Jonas Eckerman, 2004-01-25
+ #for (my $walk = $root->child; $walk; $walk = $walk->next) {
+ # my $sub = $walk->topmost->simple_subject or next;
+ # # Add this container to the hash if:
+ # # - There is no container in the hash with this subject, or
+ # # - This one is a dummy container and the old one is not: the dummy
+ # # one is more interesting as a root, so put it in the hash instead.
+ # # - The container in the table has a "Re:" version of this subject,
+ # # and this container has a non-"Re:" version of this subject.
+ # # The non-re version is the more interesting of the two.
+ #
+ # my $old = $subject{$sub};
+ # if (!$old ||
+ # (!$walk->message && !$old->message) ||
+ # ($old->message && $old->isreply &&
+ # $walk->message && !$walk->isreply)) {
+ # $subject{$sub} = $walk;
+ # }
+ #}
+
return unless %subject;
# %subject is now populated with one entry for each subject which
# occurs in the root set. Now iterate over the root set, and
# gather together the difference.
+ # Work around _group_set_bysubject tendency to misplace containers...
+ # /Jonas Eckerman 2004-01-25
+ my @all = ();
+ $root->_collect_to_list(\@all);
+
my ($prev, $walk, $rest);
for ($walk = $root->child, $rest = eval{ $walk->next };
$walk;
@@ -99,12 +112,14 @@
my $subj = $walk->topmost->simple_subject or next;
my $old = $subject{$subj};
next if $old == $walk;
-
# Remove the "second" message from the root set
- if (!$prev) { $root->child( $walk->next ) }
- else { $prev->next( $walk->next ) }
+ if (!$prev) {
+ $root->child( $walk->next )
+ }
+ else {
+ $prev->next( $walk->next )
+ }
$walk->next(undef);
-
if (!$old->message && !$walk->message) {
# They're both dummies; merge them.
$old->add_child( $_ ) for $walk->children;
@@ -126,16 +141,51 @@
$new->add_child( $_ ) for $old->children;
$old->add_child( $walk );
$old->add_child( $new );
+ # Work around _group_set_bysubject tendency to misplace containers...
+ # /Jonas Eckerman 2004-01-25
+ push @all, $new;
}
# we've done a merge, so keep the same `prev' next time around.
$walk = $prev;
}
+ # Work around _group_set_bysubject tendency to misplace containers...
+ # /Jonas Eckerman 2004-01-25
+ my %left = ();
+ $root->_collect_to_hash(\%left);
+ foreach my $cont (@all) {
+ if ($cont->message && !$left{$cont}) {
+ $cont->child( undef );
+ $root->add_child( $cont );
+ }
+ }
+
# repopulate the rootset from our fake one
@{$self->{rootset}} = $root->children;
$root->remove_child($_) for $self->rootset;
}
+# Collect all containers with messages to a list
+sub _collect_to_list {
+ my $self = shift;
+ my @list = ();
+ foreach my $cont (@{$self->{rootset}}) {
+ $cont->_collect_to_list(\@list);
+ }
+ return @list;
+}
+
+# Collect all containers with messages to a list
+sub _collect_to_hash {
+ my $self = shift;
+ my %hash = ();
+ foreach my $cont (@{$self->{rootset}}) {
+ $cont->_collect_to_hash(\%hash);
+ }
+ return %hash;
+
+}
+
sub thread {
my $self = shift;
$self->_setup();
@@ -317,7 +367,6 @@
sub subject { $_[0]->header("subject") }
sub header { eval { my $s = $_[0]->message->head->get( $_[1] ) || ''; chomp $s; $s; } }
-
sub topmost {
my $self = shift;
@@ -335,11 +384,63 @@
$subject =~ m{^re:\s+}i;
}
+# Collect containers with messages to an array
+sub _collect_to_list {
+ my $self = shift;
+ my $list = shift;
+ while ($self) {
+ push @$list, $self if ($self->message);
+ _collect_to_list($self->child,$list);
+ $self = $self->next;
+ }
+}
+
+# Collect containers with messages to a hash
+sub _collect_to_hash {
+ my $self = shift;
+ my $hash = shift;
+ while ($self) {
+ ${$hash}{$self} = 1 if ($self->message);
+ _collect_to_hash($self->child,$hash);
+ $self = $self->next;
+ }
+}
+
+# Poupulate the hash with *all* subjects!
+# /Jonas Eckerman, 2004-01-25
+sub _populate_hash_with_subjects {
+ my $self = shift;
+ return if (!$self);
+ my $hash = shift;
+ my $subj;
+ while ($self) {
+ $subj = $self->simple_subject;
+ if ($subj) {
+ my $old = ${$hash}{$subj};
+ if (!$old ||
+ (!$self->message && !$old->message) ||
+ ($old->message && $old->isreply &&
+ $self->message && !$self->isreply)) {
+ ${$hash}{$subj} = $self;
+ }
+ }
+ _populate_hash_with_subjects($self->child,$hash);
+ $self = $self->next;
+ }
+}
+
+# re: changed because I want this to work with
+# stupidly localized software as well as mailinglists...
+# /Jonas Eckerman, 2004-01-25
sub simple_subject {
my $self = shift;
my $subject = $self->subject;
- $subject =~ s/^re:\s+//gi;
- $subject;
+ $subject =~ s/^\s+//;
+ 1 while $subject =~ s/^([a-z]{1,3}|\[[-a-z]+\])(\s*\[\d*\])?[:-;]\s*//gi;
+ $subject =~ s/\s+/ /g;
+ $subject =~ s/\s+$//;
+ #$subject =~ s/^re:\s+//gi;
+ return lc($subject);
}
sub add_child {