File Coverage

blib/lib/Text/Mining/Parser/Base.pm

Criterion	Covered	Total	%
statement	3	3	100.0
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	4	4	100.0

line	stmt	sub	time	code
1				package Text::Mining::Parser::Base;
2	1	1	5	use base qw(Text::Mining::Base);
	1		3
	1		163
3				use Class::Std;
4				use Class::Std::Utils;
5				use Module::Runtime qw(use_module);
6
7				use warnings;
8				use strict;
9				use Carp;
10
11				use version; our $VERSION = qv('0.0.8');
12
13				{
14				my %document_of : ATTR( get => 'document', set => 'document' );
15				my %token_list_of : ATTR( get => 'token_list', set => 'token_list' );
16				my %algorithm_of : ATTR( get => 'algorithm', set => 'algorithm' );
17				my %section_ndx_of : ATTR( default => 1, get => 'section_ndx', set => 'section_ndx' );
18				my %paragraph_ndx_of : ATTR( default => 1, get => 'paragraph_ndx', set => 'paragraph_ndx' );
19				my %line_ndx_of : ATTR( default => 1, get => 'line_ndx', set => 'line_ndx' );
20
21				sub version { return "Text::Mining::Parser::Base Version $VERSION"; }
22
23				sub BUILD {
24				my ($self, $ident, $arg_ref) = @_;
25
26				my $algorithm = use_module('Text::Mining::Algorithm::' . $arg_ref->{algorithm}, 0.0.1)->new( $arg_ref );
27				$self->set_algorithm( $algorithm );
28
29				return;
30				}
31
32				sub parse_document() {
33				my ($self, $arg_ref) = @_;
34
35				# Check for new document, get document
36				if (defined $arg_ref->{document}) { $self->set_document( $arg_ref->{document} ); }
37				my $document = $self->get_document();
38
39				my $algorithm = $self->get_algorithm();
40
41				# This design assumes text will fit in a scalar. Will probably need
42				# a handle-based method using random access.
43
44				# PD: Parse and process the entire text
45				my $text = $self->_get_all_text();
46				$algorithm->_by_text({ text => $text });
47
48				# PD: Parse and process each document section
49				my $section = $self->_get_next_section();
50				while (defined $section) { $algorithm->_by_section({ section => $section }); $section = $self->_get_next_section(); }
51
52				# PD: Parse and process each document paragraph
53				my $paragraph = $self->_get_next_paragraph();
54				while (defined $paragraph) { $algorithm->_by_paragraph({ paragraph => $paragraph }); $paragraph = $self->_get_next_paragraph(); }
55
56				# PD: Parse and process each document line
57				my $line = $self->_get_next_line();
58				while (defined $line) { $algorithm->_by_line({ line => $line }); $line = $self->_get_next_line(); }
59
60				# PD: Annotate the token list
61				#$self->_annotate();
62
63				return;
64				}
65
66				sub parse {
67				my ($self, $arg_ref) = @_;
68
69				use Text::Mining::Algorithm::AllTokens;
70
71				my $text = $self->_get_file_text( $self->get_file_name() );
72				my $algorithm = Text::Mining::Algorithm::AllTokens->new();
73				my $tokens = $algorithm->harvest_tokens( $arg_ref );
74
75				return $tokens;
76				}
77
78				sub stats {
79				my ($self, $arg_ref) = @_;
80
81				my $text = $self->_get_file_text( $self->get_file_name() );
82				my @lines = split(/\n/, $text);
83				my @sentences = split(/\./, $text);
84				my @tokens = split(/\s+/, $text);
85
86				return " Lines: " . scalar( @lines) . ";",
87				" Sentences: " . scalar( @sentences) . ";",
88				" Tokens: " . scalar( @tokens) . ";";
89				}
90
91				sub _get_all_text() {
92				my ($self, $arg_ref) = @_;
93				return;
94				}
95
96				sub _get_next_section() {
97				my ($self, $arg_ref) = @_;
98				return;
99				}
100
101				sub _get_next_paragraph() {
102				my ($self, $arg_ref) = @_;
103				return;
104				}
105
106				sub _get_next_line() {
107				my ($self, $arg_ref) = @_;
108				return;
109				}
110
111				sub _update_stats() {
112				my ($self, $arg_ref) = @_;
113				return;
114				}
115
116				sub _annotate() {
117				my ($self, $arg_ref) = @_;
118				my $algorithm = $self->get_algorithm();
119
120				$self->get_document()->annotate({ type => 'parser', value => $self->version() });
121				$self->get_document()->annotate({ type => 'algorithm', value => $algorithm->version() });
122				return;
123				}
124
125				}
126
127				1; # Magic true value required at end of module
128				__END__