File Coverage

buildlib/KinoSearch/Test/TestUtils.pm

Criterion	Covered	Total	%
statement	126	126	100.0
branch	15	24	62.5
condition			n/a
subroutine	24	24	100.0
pod	0	13	0.0
total	165	187	88.2

line	stmt	bran	sub	pod	time	code
1	46		46		2850918	use strict;
	46				115
	46				1754
2	46		46		398	use warnings;
	46				165
	46				2229
3
4						package KinoSearch::Test::TestUtils;
5	46		46		397	use base qw( Exporter );
	46				101
	46				7355
6
7						our @EXPORT_OK = qw(
8						working_dir
9						create_working_dir
10						remove_working_dir
11						create_index
12						create_uscon_index
13						test_index_loc
14						persistent_test_index_loc
15						init_test_index_loc
16						get_uscon_docs
17						utf8_test_strings
18						test_analyzer
19						doc_ids_from_td_coll
20						modulo_set
21						);
22
23	46		46		26578	use KinoSearch;
	46				132
	46				2382
24	46		46		23923	use KinoSearch::Test;
	46				114
	46				1442
25
26	46		46		418	use lib 'sample';
	46				97
	46				473
27	46		46		25341	use KinoSearch::Test::USConSchema;
	46				131
	46				2023
28
29	46		46		289	use File::Spec::Functions qw( catdir catfile curdir );
	46				95
	46				3510
30	46		46		57702	use Encode qw( _utf8_off );
	46				646193
	46				4765
31	46		46		429	use File::Path qw( rmtree );
	46				87
	46				2822
32	46		46		335	use Carp;
	46				97
	46				90060
33
34						my $working_dir = catfile( curdir(), 'kinosearch_test' );
35
36						# Return a directory within the system's temp directory where we will put all
37						# testing scratch files.
38	3		3	0	84	sub working_dir {$working_dir}
39
40						sub create_working_dir {
41	1	50	1	0	132	mkdir( $working_dir, 0700 ) or die "Can't mkdir '$working_dir': $!";
42						}
43
44						# Verify that this user owns the working dir, then zap it. Returns true upon
45						# success.
46						sub remove_working_dir {
47	2	100	2	0	76	return unless -d $working_dir;
48	1				23028	rmtree $working_dir;
49	1				10	return 1;
50						}
51
52						# Return a location for a test index to be used by a single test file. If
53						# the test file crashes it cannot clean up after itself, so we put the cleanup
54						# routine in a single test file to be run at or near the end of the test
55						# suite.
56						sub test_index_loc {
57	9		9	0	97	return catdir( $working_dir, 'test_index' );
58						}
59
60						# Return a location for a test index intended to be shared by multiple test
61						# files. It will be cleaned as above.
62						sub persistent_test_index_loc {
63	3		3	0	195	return catdir( $working_dir, 'persistent_test_index' );
64						}
65
66						# Destroy anything left over in the test_index location, then create the
67						# directory. Finally, return the path.
68						sub init_test_index_loc {
69	9		9	0	85	my $dir = test_index_loc();
70	9				10376	rmtree $dir;
71	9	50			175	die "Can't clean up '$dir'" if -e $dir;
72	9	50			839	mkdir $dir or die "Can't mkdir '$dir': $!";
73	9				51	return $dir;
74						}
75
76						# Build a RAM index, using the supplied array of strings as source material.
77						# The index will have a single field: "content".
78						sub create_index {
79	27		27	0	240886	my $folder = KinoSearch::Store::RAMFolder->new;
80	27				1157	my $indexer = KinoSearch::Index::Indexer->new(
81						index => $folder,
82						schema => KinoSearch::Test::TestSchema->new,
83						);
84	27				53959	$indexer->add_doc( { content => $_ } ) for @_;
85	27				231606	$indexer->commit;
86	27				2818	return $folder;
87						}
88
89						# Slurp us constitition docs and build hashrefs.
90						sub get_uscon_docs {
91
92	2		2	0	26	my $uscon_dir = catdir( 'sample', 'us_constitution' );
93	2	50			1282	opendir( my $uscon_dh, $uscon_dir )
94						or die "couldn't opendir '$uscon_dir': $!";
95	2				340	my @filenames = grep {/\.txt$/} sort readdir $uscon_dh;
	112				262
96	2	50			63	closedir $uscon_dh or die "couldn't closedir '$uscon_dir': $!";
97
98	2				6	my %docs;
99
100	2				9	for my $filename (@filenames) {
101	104				671	my $filepath = catfile( $uscon_dir, $filename );
102	104	50			5216	open( my $fh, '<', $filepath )
103						or die "couldn't open file '$filepath': $!";
104	104				137	my $content = do { local $/; <$fh> };
	104				386
	104				53727
105	104	50			690	$content =~ /(.?)\n\n(.)/s
106						or die "Can't extract title/bodytext from '$filepath'";
107	104				324	my $title = $1;
108	104				292	my $bodytext = $2;
109	104				9502	$bodytext =~ s/\s+/ /sg;
110	104	50			636	my $category
		100
		100
111						= $filename =~ /art/ ? 'article'
112						: $filename =~ /amend/ ? 'amendment'
113						: $filename =~ /preamble/ ? 'preamble'
114						: confess "Can't derive category for $filename";
115
116	104				2698	$docs{$filename} = {
117						title => $title,
118						bodytext => $bodytext,
119						url => "/us_constitution/$filename",
120						category => $category,
121						};
122						}
123
124	2				34	return \%docs;
125						}
126
127						sub create_uscon_index {
128	1		1	0	5	my $folder = KinoSearch::Store::FSFolder->new(
129						path => persistent_test_index_loc() );
130	1				80	my $schema = KinoSearch::Test::USConSchema->new;
131	1				13	my $indexer = KinoSearch::Index::Indexer->new(
132						schema => $schema,
133						index => $folder,
134						truncate => 1,
135						create => 1,
136						);
137
138	1				104	$indexer->add_doc( { content => "zz$_" } ) for ( 0 .. 10000 );
139	1				250320	$indexer->commit;
140	1				4	undef $indexer;
141
142	1				117	$indexer = KinoSearch::Index::Indexer->new(
143						schema => $schema,
144						index => $folder,
145						);
146	1				7	my $source_docs = get_uscon_docs();
147						$indexer->add_doc( { content => $_->{bodytext} } )
148	1				687	for values %$source_docs;
149	1				28842	$indexer->commit;
150	1				4	undef $indexer;
151
152	1				233	$indexer = KinoSearch::Index::Indexer->new(
153						schema => $schema,
154						index => $folder,
155						);
156	1				11	my @chars = ( 'a' .. 'z' );
157	1				6	for ( 0 .. 1000 ) {
158	1001				1898	my $content = '';
159	1001				2043	for my $num_words ( 1 .. int( rand(20) ) ) {
160	9570				15882	for ( 1 .. ( int( rand(10) ) + 10 ) ) {
161	139035				227414	$content .= @chars[ rand(@chars) ];
162						}
163	9570				14177	$content .= ' ';
164						}
165	1001				73037	$indexer->add_doc( { content => $content } );
166						}
167	1				9	$indexer->optimize;
168	1				98	$indexer->commit;
169						}
170
171						# Return 3 strings useful for verifying UTF-8 integrity.
172						sub utf8_test_strings {
173	5		5	0	136802	my $smiley = "\x{263a}";
174	5				14	my $not_a_smiley = $smiley;
175	5				70	_utf8_off($not_a_smiley);
176	5				13	my $frowny = $not_a_smiley;
177	5				25	utf8::upgrade($frowny);
178	5				22	return ( $smiley, $not_a_smiley, $frowny );
179						}
180
181						# Verify an Analyzer's transform, transform_text, and split methods.
182						sub test_analyzer {
183	9		9	0	2733	my ( $analyzer, $source, $expected, $message ) = @_;
184
185	9				212	my $inversion = KinoSearch::Analysis::Inversion->new( text => $source );
186	9				235	$inversion = $analyzer->transform($inversion);
187	9				48	my @got;
188	9				168	while ( my $token = $inversion->next ) {
189	13				114	push @got, $token->get_text;
190						}
191	9				76	Test::More::is_deeply( \@got, $expected, "analyze: $message" );
192
193	9				11897	$inversion = $analyzer->transform_text($source);
194	9				127	@got = ();
195	9				90	while ( my $token = $inversion->next ) {
196	13				96	push @got, $token->get_text;
197						}
198	9				56	Test::More::is_deeply( \@got, $expected, "transform_text: $message" );
199
200	9				11672	@got = @{ $analyzer->split($source) };
	9				297
201	9				72	Test::More::is_deeply( \@got, $expected, "split: $message" );
202						}
203
204						# Extract all doc nums from a SortCollector. Return two sorted array refs:
205						# by_score and by_id.
206						sub doc_ids_from_td_coll {
207	1223		1223	0	498958	my $collector = shift;
208	1223				2080	my @by_score;
209	1223				29624	my $match_docs = $collector->pop_match_docs;
210	17616	50			54919	my @by_score_then_id = map { $_->get_doc_id }
	19040				127458
211						sort {
212	1223				5595	$b->get_score <=> $a->get_score
213						\|\| $a->get_doc_id <=> $b->get_doc_id
214						} @$match_docs;
215	1223				3643	my @by_id = sort { $a <=> $b } @by_score_then_id;
	37343				41009
216	1223				21753	return ( \@by_score_then_id, \@by_id );
217						}
218
219						# Use a modulus to generate a set of numbers.
220						sub modulo_set {
221	3229		3229	0	2401714	my ( $interval, $max ) = @_;
222	3229				4835	my @out;
223	3229				10929	for ( my $doc = $interval; $doc < $max; $doc += $interval ) {
224	45031				106110	push @out, $doc;
225						}
226	3229				11216	return \@out;
227						}
228
229						1;
230
231						__END__