File Coverage

blib/lib/CPAN/Mini/Webserver/Index.pm

Criterion	Covered	Total	%
statement	7	9	77.7
branch			n/a
condition			n/a
subroutine	3	3	100.0
pod			n/a
total	10	12	83.3

line	stmt	sub	time	code
1	1	1	24353	use strict;
	1		2
	1		32
2	1	1	5	use warnings;
	1		2
	1		60
3
4				package CPAN::Mini::Webserver::Index;
5
6				# ABSTRACT: search term index for a CPAN::Mini web server
7
8				our $VERSION = '0.58'; # VERSION
9
10	1	1	1421	use Moose;
	0
	0
11				use List::MoreUtils qw(uniq);
12				use Search::QueryParser;
13				use String::CamelCase qw(wordsplit);
14				use Text::Unidecode;
15				use Search::Tokenizer;
16				use Pod::Simple::Text;
17				use Lingua::StopWords qw( getStopWords );
18
19				has 'index' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
20				has 'full_text' => ( is => 'ro' );
21				has 'index_subs' => ( is => 'ro' );
22
23				sub add {
24				my ( $self, $key, $words ) = @_;
25
26				my $index = $self->index;
27				push @{ $index->{$_} }, $key for @{$words};
28
29				return;
30				}
31
32				sub create_index {
33				my ( $self, $parse_cpan_authors, $parse_cpan_packages ) = @_;
34
35				$self->_index_items_with( "_author_words", $parse_cpan_authors->authors );
36				$self->_index_items_with( "_dist_words", $parse_cpan_packages->latest_distributions );
37				$self->_index_items_with( "_package_words", $parse_cpan_packages->packages );
38
39				$self->_index_sub_routines( $parse_cpan_packages->packages ) if $self->index_subs;
40
41				return;
42				}
43
44				sub _index_sub_routines {
45				my ( $self, @packages ) = @_;
46
47				$self->{subs}{ $_ } = 1 for map { $_->subs } @packages;
48
49				return;
50				}
51
52				sub _index_items_with {
53				my ( $self, $method, @items ) = @_;
54
55				for my $item ( @items ) {
56				my @words = $self->$method( $item );
57				@words = uniq map { lc } @words;
58				$self->add( $item, \@words );
59				}
60
61				return;
62				}
63
64				sub _author_words {
65				my ( $self, $author ) = @_;
66				my @words = ( $author->name, $author->pauseid );
67				return @words;
68				}
69
70				sub _dist_words {
71				my ( $self, $dist ) = @_;
72				my @words = split '-', unidecode $dist->dist;
73				@words = map { $_, wordsplit( $_ ) } @words;
74				return @words;
75				}
76
77				sub _package_words {
78				my ( $self, $package ) = @_;
79				my @words = split '::', unidecode $package->package;
80				@words = map { $_, wordsplit( $_ ) } @words;
81
82				push @words, $self->_full_text_words( $package ) if $self->full_text;
83
84				return @words;
85				}
86
87				sub _full_text_words {
88				my ( $self, $package ) = @_;
89				my @words = split '::', unidecode $package->package;
90				@words = map { $_, wordsplit( $_ ) } @words;
91
92				my $content = $package->file_content;
93				my $text;
94				my $parser = Pod::Simple::Text->new;
95				$parser->no_whining( 1 );
96				$parser->no_errata_section( 1 );
97				$parser->output_string( \$text );
98				$parser->parse_string_document( $content );
99
100				my $stopwords = { %{ getStopWords('en') }, NAME => 1, DESCRIPTION => 1, USAGE => 1, RETURNS => 1 };
101				my $iterator = Search::Tokenizer->new( regex => qr/\p{Word}+/, lower => 0, stopwords => $stopwords )->( $text );
102				while (my ($term, $len, $start, $end, $index) = $iterator->()) {
103				push @words, $term;
104				}
105
106				return @words;
107				}
108
109				sub search {
110				my ( $self, $q ) = @_;
111
112				my $qp = Search::QueryParser->new( rxField => qr/NOTAFIELD/, );
113				my $query = $qp->parse( $q, 1 );
114				return if !$query;
115
116				my $index = $self->index;
117				my @results;
118
119				for my $part ( @{ $query->{'+'} } ) {
120				my $value = $part->{value};
121				my @words = split /(?:\:\:\| \|-)/, unidecode lc $value;
122				for my $word ( @words ) {
123				my @word_results = @{ $index->{$word} \|\| [] };
124				if ( @results ) {
125				my %seen;
126				$seen{$_} = 1 for @word_results;
127				@results = grep { $seen{$_} } @results;
128				}
129				else {
130				@results = @word_results;
131				}
132				}
133				}
134
135				for my $part ( @{ $query->{'-'} } ) {
136				my $value = $part->{value};
137				my @word_results = $self->search_word( $value );
138				my %seen;
139				$seen{$_} = 1 for @word_results;
140				@results = grep { !$seen{$_} } @results;
141				}
142
143				return @results;
144				}
145
146				sub search_word {
147				my ( $self, $word ) = @_;
148
149				my $index = $self->index;
150				my @words = split /(?:\:\:\| \|-)/, unidecode lc $word;
151				@words = grep exists( $index->{$_} ), @words;
152
153				my @results = map @{ $index->{$_} }, @words;
154				return @results;
155				}
156
157				1;
158
159
160
161				=pod
162
163				=head1 NAME
164
165				CPAN::Mini::Webserver::Index - search term index for a CPAN::Mini web server
166
167				=head1 VERSION
168
169				version 0.58
170
171				=head1 DESCRIPTION
172
173				This module indexes words for the search feature in CPAN::Mini::Webserver.
174
175				=head1 AUTHORS
176
177				=over 4
178
179				=item *
180
181				Leon Brocard <acme@astray.com>
182
183				=item *
184
185				Christian Walde <walde.christian@googlemail.com>
186
187				=back
188
189				=head1 COPYRIGHT AND LICENSE
190
191				This software is copyright (c) 2012 by Christian Walde.
192
193				This is free software; you can redistribute it and/or modify it under
194				the same terms as the Perl 5 programming language system itself.
195
196				=cut
197
198
199				__END__
200
201