| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Search::Mousse::Writer; | 
| 2 | 2 |  |  | 2 |  | 3918 | use strict; | 
|  | 2 |  |  |  |  | 7 |  | 
|  | 2 |  |  |  |  | 137 |  | 
| 3 | 2 |  |  | 2 |  | 14 | use base qw(Class::Accessor::Chained::Fast); | 
|  | 2 |  |  |  |  | 7 |  | 
|  | 2 |  |  |  |  | 276 |  | 
| 4 |  |  |  |  |  |  | __PACKAGE__->mk_accessors( | 
| 5 |  |  |  |  |  |  | qw(directory name stemmer key_to_id id_to_key id_to_value word_to_id seen_key) | 
| 6 |  |  |  |  |  |  | ); | 
| 7 | 2 |  |  | 2 |  | 14 | use CDB_File; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 90 |  | 
| 8 | 2 |  |  | 2 |  | 12 | use CDB_File_Thawed; | 
|  | 2 |  |  |  |  | 68 |  | 
|  | 2 |  |  |  |  | 104 |  | 
| 9 | 2 |  |  | 2 |  | 43 | use File::Temp qw/ :POSIX /; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 401 |  | 
| 10 | 2 |  |  | 2 |  | 12 | use List::Uniq qw(uniq); | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 108 |  | 
| 11 | 2 |  |  | 2 |  | 11 | use Path::Class; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 1830 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | my $ID = 1; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | sub new { | 
| 16 | 2 |  |  | 2 | 1 | 5209 | my $class = shift; | 
| 17 | 2 |  |  |  |  | 8 | my $self  = {}; | 
| 18 | 2 |  |  |  |  | 10 | bless $self, $class; | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 2 |  |  |  |  | 14 | my %args = @_; | 
| 21 | 2 |  |  |  |  | 15 | $self->directory($args{directory}); | 
| 22 | 2 |  |  |  |  | 57 | $self->name($args{name}); | 
| 23 |  |  |  |  |  |  | $self->stemmer( | 
| 24 |  |  |  |  |  |  | $args{stemmer} || | 
| 25 |  |  |  |  |  |  | sub { | 
| 26 | 110 |  |  | 110 |  | 649 | my $words = lc shift; | 
| 27 | 110 |  |  |  |  | 499 | return uniq(split / /, $words); | 
| 28 |  |  |  |  |  |  | } | 
| 29 | 2 |  | 100 |  |  | 44 | ); | 
| 30 |  |  |  |  |  |  |  | 
| 31 | 2 |  |  |  |  | 87 | $self->_init; | 
| 32 | 2 |  |  |  |  | 26 | return $self; | 
| 33 |  |  |  |  |  |  | } | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | sub _init { | 
| 36 | 2 |  |  | 2 |  | 6 | my ($self) = @_; | 
| 37 | 2 |  |  |  |  | 9 | my $name = $self->name; | 
| 38 |  |  |  |  |  |  |  | 
| 39 | 2 |  |  |  |  | 29 | my $filename = file($self->directory, "${name}_key_to_id.cdb"); | 
| 40 | 2 |  |  |  |  | 751 | my $tempfile = tmpnam(); | 
| 41 | 2 | 50 |  |  |  | 955 | $self->key_to_id(CDB_File->new($filename, $tempfile)) or die $!; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 2 |  |  |  |  | 315 | $filename = file($self->directory, "${name}_id_to_key.cdb"); | 
| 44 | 2 |  |  |  |  | 218 | $tempfile = tmpnam(); | 
| 45 | 2 | 50 |  |  |  | 533 | $self->id_to_key(CDB_File->new($filename, $tempfile)) or die $!; | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 2 |  |  |  |  | 278 | $filename = file($self->directory, "${name}_id_to_value.cdb"); | 
| 48 | 2 |  |  |  |  | 190 | $tempfile = tmpnam(); | 
| 49 | 2 | 50 |  |  |  | 462 | $self->id_to_value(CDB_File_Thawed->new($filename, $tempfile)) or die $!; | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 2 |  |  |  |  | 29 | $self->word_to_id({}); | 
| 52 | 2 |  |  |  |  | 21 | $self->seen_key({}); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | sub add { | 
| 56 | 220 |  |  | 220 | 1 | 247858 | my ($self, $key, $value, $words) = @_; | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | # key must be unique | 
| 59 | 220 | 50 |  |  |  | 838 | return if $self->seen_key->{$key}++; | 
| 60 |  |  |  |  |  |  |  | 
| 61 | 220 |  |  |  |  | 1732 | my $id = $ID++; | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 220 |  |  |  |  | 551 | $self->key_to_id->insert($key,  $id); | 
| 64 | 220 |  |  |  |  | 1660 | $self->id_to_key->insert($id,   $key); | 
| 65 | 220 |  |  |  |  | 1523 | $self->id_to_value->insert($id, $value); | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 220 |  |  |  |  | 670 | my @words = $self->stemmer->($words); | 
| 68 | 220 |  |  |  |  | 17452 | foreach my $word (@words) { | 
| 69 | 985 |  |  |  |  | 5559 | push @{ $self->word_to_id->{$word} }, $id; | 
|  | 985 |  |  |  |  | 2377 |  | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | sub write { | 
| 74 | 2 |  |  | 2 | 1 | 27 | my ($self) = @_; | 
| 75 | 2 |  |  |  |  | 9 | my $name = $self->name; | 
| 76 |  |  |  |  |  |  |  | 
| 77 | 2 |  |  |  |  | 18 | $self->key_to_id->finish; | 
| 78 | 2 |  |  |  |  | 1969025 | $self->id_to_key->finish; | 
| 79 | 2 |  |  |  |  | 2208335 | $self->id_to_value->finish; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 2 |  |  |  |  | 35 | my $filename = file($self->directory, "${name}_word_to_id.cdb"); | 
| 82 | 2 |  |  |  |  | 428 | my $tempfile = tmpnam(); | 
| 83 | 2 | 50 |  |  |  | 657 | my $cdb      = CDB_File_Thawed->new($filename, $tempfile) or die $!; | 
| 84 |  |  |  |  |  |  |  | 
| 85 | 2 |  |  |  |  | 6 | while (my ($key, $value) = each %{ $self->word_to_id }) { | 
|  | 426 |  |  |  |  | 4127 |  | 
| 86 | 424 |  |  |  |  | 4186 | $value = [ uniq @{$value} ]; | 
|  | 424 |  |  |  |  | 1517 |  | 
| 87 | 424 |  |  |  |  | 35602 | $cdb->insert($key, $value); | 
| 88 |  |  |  |  |  |  | } | 
| 89 | 2 |  |  |  |  | 26 | $cdb->finish; | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | 1; | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | __END__ |