line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Hailo::Role::Tokenizer; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:AVAR'; |
3
|
|
|
|
|
|
|
$Hailo::Role::Tokenizer::VERSION = '0.75'; |
4
|
30
|
|
|
30
|
|
20877
|
use v5.10.0; |
|
30
|
|
|
|
|
120
|
|
5
|
30
|
|
|
30
|
|
221
|
use Moose::Role; |
|
30
|
|
|
|
|
69
|
|
|
30
|
|
|
|
|
263
|
|
6
|
30
|
|
|
30
|
|
109015
|
use MooseX::Types::Moose ':all'; |
|
30
|
|
|
|
|
68
|
|
|
30
|
|
|
|
|
288
|
|
7
|
30
|
|
|
30
|
|
245541
|
use namespace::clean -except => 'meta'; |
|
30
|
|
|
|
|
76
|
|
|
30
|
|
|
|
|
287
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
has spacing => ( |
10
|
|
|
|
|
|
|
isa => HashRef[Int], |
11
|
|
|
|
|
|
|
is => 'rw', |
12
|
|
|
|
|
|
|
default => sub { { |
13
|
|
|
|
|
|
|
normal => 0, |
14
|
|
|
|
|
|
|
prefix => 1, |
15
|
|
|
|
|
|
|
postfix => 2, |
16
|
|
|
|
|
|
|
infix => 3, |
17
|
|
|
|
|
|
|
} }, |
18
|
|
|
|
|
|
|
); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub BUILD { |
21
|
262
|
|
|
262
|
0
|
570
|
my ($self) = @_; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# This performance hack is here because calling |
24
|
|
|
|
|
|
|
# $self->spacing->{...} was significant part Tokenizer execution |
25
|
|
|
|
|
|
|
# time (~20s / ~1200s) since we're doing one method call and a |
26
|
|
|
|
|
|
|
# hash dereference |
27
|
|
|
|
|
|
|
|
28
|
262
|
|
|
|
|
7401
|
my $spacing = $self->spacing; |
29
|
262
|
|
|
|
|
1115
|
while (my ($k, $v) = each %$spacing) { |
30
|
1048
|
|
|
|
|
3339
|
$self->{"_spacing_$k"} = $v; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
262
|
|
|
|
|
6619
|
return; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
requires 'make_tokens'; |
37
|
|
|
|
|
|
|
requires 'make_output'; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
1; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=encoding utf8 |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 NAME |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Hailo::Role::Tokenizer - A role representing a L<Hailo|Hailo> tokenizer |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 METHODS |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head2 C<new> |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
This is the constructor. It takes no arguments. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head2 C<make_tokens> |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Takes a line of input and returns an array reference of tokens. A token is |
56
|
|
|
|
|
|
|
an array reference containing two elements: a I<spacing attribute> and the |
57
|
|
|
|
|
|
|
I<token text>. The spacing attribute is an integer which will be stored along |
58
|
|
|
|
|
|
|
with the token text in the database. The following values are currently being |
59
|
|
|
|
|
|
|
used: |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=over |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item C<0> - normal token |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item C<1> - prefix token (no whitespace follows it) |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item C<2> - postfix token (no whitespace precedes it) |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item C<3> - infix token (no whitespace follows or precedes it) |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=back |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 C<make_output> |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Takes an array reference of tokens and returns a line of output. A token is |
76
|
|
|
|
|
|
|
an array reference as described in L<C<make_tokens>|/make_tokens>. The tokens |
77
|
|
|
|
|
|
|
will be joined together into a sentence according to the whitespace |
78
|
|
|
|
|
|
|
attributes associated with the tokens, as well as any formatting provided by |
79
|
|
|
|
|
|
|
the tokenizer implementation. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 AUTHORS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org> |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and |
90
|
|
|
|
|
|
|
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org> |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
This program is free software, you can redistribute it and/or modify |
93
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |