File Coverage

blib/lib/AxKit/XSP/Wiki/Indexer.pm
Criterion Covered Total %
statement 9 48 18.7
branch 0 6 0.0
condition 0 4 0.0
subroutine 3 7 42.8
pod 2 4 50.0
total 14 69 20.2


line stmt bran cond sub pod time code
1             # $Id: Indexer.pm,v 1.2 2003/02/02 21:20:46 matt Exp $
2              
3             package AxKit::XSP::Wiki::Indexer;
4 1     1   5 use strict;
  1         2  
  1         28  
5 1     1   2400 use XML::SAX::Base;
  1         28813  
  1         36  
6 1     1   13 use vars qw($VERSION @ISA);
  1         2  
  1         697  
7             $VERSION = '1.00';
8             @ISA = qw(XML::SAX::Base);
9              
10             sub new {
11 0     0 0   my $class = shift;
12 0           my (%opts) = @_;
13            
14 0   0       my $db = $opts{DB} || die "DB argument required";
15 0   0       my $page_id = $opts{PageId} || die "PageId argument required";
16            
17 0           my $self = bless { DB => $db, PageId => $page_id }, $class;
18            
19 0           $self->{InsertCTI} = $db->prepare("INSERT INTO ContentIndex (page_id, word_id, value) VALUES (?, ?, ?)");
20 0           $self->{InsertWord} = $db->prepare("INSERT INTO Word (word) VALUES (?)");
21 0           $self->{InsertWord}->{PrintError} = 0;
22 0           $self->{FindWord} = $db->prepare("SELECT id FROM Word WHERE word = ?");
23 0           $self->{DeleteCTI} = $db->prepare("DELETE FROM ContentIndex WHERE page_id = ?");
24            
25 0           $self->{Words} = {};
26 0           $self->{DocSize} = 0;
27            
28 0           return $self;
29             }
30              
31             sub end_document {
32 0     0 1   my ($self) = @_;
33            
34             # Delete current index for this page
35 0           $self->{DeleteCTI}->execute($self->{PageId});
36            
37 0           for my $word (keys %{$self->{Words}}) {
  0            
38 0 0         next unless $word;
39 0           my $word_id = $self->insert_word($word);
40 0 0         next unless $word_id;
41 0           warn("Indexing: $self->{PageId}, $word_id, $word\n");
42 0           $self->{InsertCTI}->execute(
43             $self->{PageId},
44             $word_id,
45             $self->{Words}{$word},
46             );
47             }
48 0           $self->{DB}->commit;
49             }
50              
51             sub insert_word {
52 0     0 0   my ($self, $word) = @_;
53            
54 0           my $word_id;
55 0           eval {
56 0           $self->{InsertWord}->execute($word);
57 0           $word_id = $self->{DB}->func('last_insert_rowid');
58             };
59 0 0         if ($@) {
60 0           $self->{FindWord}->execute($word);
61 0           my $row = $self->{FindWord}->fetch;
62 0           $word_id = $row->[0];
63             }
64            
65 0           return $word_id;
66             }
67              
68             # NB: This implementation assumes SAX parsers that don't break mid-word.
69             # (Could use filter if this is a problem)
70             sub characters {
71 0     0 1   my ($self, $node) = @_;
72            
73 0           while ($node->{Data} =~ /\G(\S*)\s*/gc) {
74 0           my $word = $1;
75 0           $word =~ s/\W*$//; # strip trailing non-word chars
76 0           $word =~ s/^\W*//; # strip leading non-word chars
77 0           $self->{Words}{lc($word)}++;
78             }
79             }
80              
81             1;