File Coverage

blib/lib/Text/Annotate/DataSource.pm

Criterion	Covered	Total	%
statement	53	55	96.3
branch	4	6	66.6
condition	1	3	33.3
subroutine	13	14	92.8
pod	1	8	12.5
total	72	86	83.7

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Annotate::DataSource;
2	1			1		7	use strict;
	1					7
	1					37
3	1			1		1653	use Text::Annotate::HTMLWriter;
	1					3
	1					142
4	1			1		568	use Text::Annotate::WordScan;
	1					2
	1					34
5	1			1		7	use Carp;
	1					2
	1					75
6							BEGIN {
7	1			1		1	our $VERSION;
8	1					593	$VERSION = 0.01_2;
9							};
10
11							=head1 NAME
12
13							Text::Annotate::DataSource - parent class for annotation data sources
14
15							=head1
16
17							An annotation datasource provides two things:
18							a) a way to look up potential key phrases. Key phrases that match our data
19							yield URIs or similar identifiers.
20							b) a way to "explain" those URIs, producing a brief HTML description of the
21							phrase in question.
22
23							=cut
24
25							sub new {
26	1			1	0	2	my $class = shift;
27	1		33			8	$class = ref $class \|\| $class;
28	1					5	my $self = bless {}, $class;
29	1					4	$self;
30							};
31
32
33							# "obj" is supposed to provide access to annotation objects.
34							# TODO: refactor into a superclass
35							# TODO: some classes will want to be instantiated at the beginning, shut down
36							# later ...
37							# TODO: "require" objects when needed
38							# TODO: make objects configurable, at with an easily inherited interface
39							sub obj {
40	5			5	0	14	my ($class, $name) = @_;
41	5					33	my $o = {
42							HTMLWriter => "Text::Annotate::HTMLWriter",
43							WordScan => "Text::Annotate::WordScan"
44							}->{$name};
45	5	50				18	($o) or confess "no '$o' object recognised";
46	5					33	$o;
47							};
48
49							=head1 METHODS
50
51							=head2 index_lookup
52
53							Typical call:
54
55							$datasource->index_lookup ("get", "your", "widgets", ... "getyourwidgetsfrom", "wigentiacorp")
56
57							index_lookup is called with a list of (already canonicalised) key phrases. Where those
58							phrases match one we know about, we return a two-element list.
59
60							Typical output:
61
62							(["wigentiacorp", "http://www.wigentia.com/"],
63							["herring", "http://www.example.com/fishguide/herring/"])
64
65							It is passed a list so we don't get overwhelmed by function-switching overhead, whilst
66							painstakingly trundling through piles of useless words in search of something interesting.
67
68							You may well want to override this.
69
70							=cut
71
72							# TODO: doc interface here
73							# TODO: wouldn't references be faster?
74							sub index_lookup {
75	1			1	1	8	my ($self, @keys) = @_;
76	1					3	my $idx = $self->{idx};
77	1					2	my @out;
78	1					3	foreach my $k (@keys) {
79	45	100				87	push @out, [$k, $idx->{$k}] if $idx->{$k};
80							}
81	1					8	@out;
82							}
83
84							sub search_html {
85	1			1	0	102	my ($self, $to_search) = @_;
86	1					11	$to_search =~ s/\<.+?\>//sg;
87	1					10	$self->search($to_search);
88							}
89
90							sub search {
91	1			1	0	2	my $self = shift;
92	1					2	my $to_search = shift;
93	1					2	my @out;
94							my %seen;
95							my $cbk = sub {
96	1			1		2	my $in = shift;
97	1					10	my ($results) = [$self->index_lookup(@$in)];
98	1					7	while (@$results) {
99	2					4	my $r = shift @$results;
100	2	50				6	if ($seen{$r->[0]}) {
101	0					0	next; # ooh
102							};
103	2					5	$seen{$r->[0]} = 1;
104	2					3	foreach my $link (@{$r->[1]}) {
	2					5
105	2					9	push @out, {title => $r->[0], link => $link, $self->explain_link($link)};
106							};
107							};
108	1					4	();
109	1					7	};
110	1					7	$self->scan_words($to_search, $cbk);
111
112	1					3	foreach my $e (@out) {
113	2					10	%$e = (%$e, $self->explain_link($e->{link}));
114							};
115
116	1					3	return $self->obj("HTMLWriter")->format_annotations(\@out);
117							};
118
119
120							sub scan_words {
121	1			1	0	2	my $self = shift;
122	1					3	$self->obj("WordScan")->scan_words(@_);
123							};
124
125							sub canonicalize_id {
126	2			2	0	4	my ($self, $word) = @_;
127	2					9	$self->obj("WordScan")->canonicalize_id($word);
128							};
129
130	0			0	0		sub explain_link { () };
131
132							1;