line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Mining::Corpus::Document; |
2
|
3
|
|
|
3
|
|
83858
|
use base qw(Text::Mining::Base); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
1464
|
|
3
|
|
|
|
|
|
|
use Class::Std; |
4
|
|
|
|
|
|
|
use Class::Std::Utils; |
5
|
|
|
|
|
|
|
use Text::Mining::Parser; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use warnings; |
8
|
|
|
|
|
|
|
use strict; |
9
|
|
|
|
|
|
|
use Carp; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.0.8'); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
{ |
14
|
|
|
|
|
|
|
my %document_id_of : ATTR( :set :get ); |
15
|
|
|
|
|
|
|
my %document_type_of : ATTR( :set :get :default<> ); |
16
|
|
|
|
|
|
|
my %corpus_id_of : ATTR( :init_arg :set :get :default<> ); |
17
|
|
|
|
|
|
|
my %corpus_name_of : ATTR( :set :get :default<> ); |
18
|
|
|
|
|
|
|
my %submitted_by_user_id_of : ATTR( :set :get :default<> ); |
19
|
|
|
|
|
|
|
my %document_title_of : ATTR( :set :get :default<> ); |
20
|
|
|
|
|
|
|
my %document_url_of : ATTR( :set :get :default<> ); |
21
|
|
|
|
|
|
|
my %document_path_of : ATTR( :set :get :default<> ); |
22
|
|
|
|
|
|
|
my %file_name_of : ATTR( :init_arg :set :get :default<> ); |
23
|
|
|
|
|
|
|
my %file_type_of : ATTR( :init_arg :set :get :default<> ); |
24
|
|
|
|
|
|
|
my %bytes_of : ATTR( :set :get :default<> ); |
25
|
|
|
|
|
|
|
my %enter_date_of : ATTR( :set :get :default<> ); |
26
|
|
|
|
|
|
|
my %exit_date_of : ATTR( :set :get :default<> ); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub BUILD { |
29
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
return; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub START { |
35
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#print " FILE TYPE: ", $self->get_file_type(), "\n\n"; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
if (defined $arg_ref->{document_id}) { $self->_get_document($arg_ref); } |
40
|
|
|
|
|
|
|
elsif (defined $arg_ref->{file_name}) { $self->insert( $arg_ref ); } |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
return; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub parse { |
46
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
47
|
|
|
|
|
|
|
$arg_ref->{file_type} = $self->get_document_type(); |
48
|
|
|
|
|
|
|
$arg_ref->{file_name} = $self->get_file_name(); |
49
|
|
|
|
|
|
|
$arg_ref->{document_id} = $self->get_document_id(); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my $parser = Text::Mining::Parser->new( $arg_ref ); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
print STDERR $parser->version(), "\n"; |
54
|
|
|
|
|
|
|
print STDERR $parser->stats(), "\n"; |
55
|
|
|
|
|
|
|
print STDERR $parser->parse(), "\n"; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $document_id = $self->get_document_id(); |
58
|
|
|
|
|
|
|
return $document_id; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# sub get_id { my ($self) = @_; return $id_of{ident $self}; } |
62
|
|
|
|
|
|
|
# sub get_document_id { my ($self) = @_; return $id_of{ident $self}; } |
63
|
|
|
|
|
|
|
# sub get_submitted_document_id { my ($self) = @_; return $id_of{ident $self}; } |
64
|
|
|
|
|
|
|
# sub get_corpus_id { my ($self) = @_; return $corpus_id_of{ident $self}; } |
65
|
|
|
|
|
|
|
# sub get_submitted_by_user_id { my ($self) = @_; return $submitted_by_user_id_of{ident $self}; } |
66
|
|
|
|
|
|
|
# sub get_document_url { my ($self) = @_; return $document_url_of{ident $self}; } |
67
|
|
|
|
|
|
|
# sub get_document_path { my ($self) = @_; return $document_path_of{ident $self}; } |
68
|
|
|
|
|
|
|
# sub get_file_name { my ($self) = @_; return $file_name_of{ident $self}; } |
69
|
|
|
|
|
|
|
# sub get_enter_date { my ($self) = @_; return $enter_date_of{ident $self}; } |
70
|
|
|
|
|
|
|
# sub get_exit_date { my ($self) = @_; return $exit_date_of{ident $self}; } |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _get_document { |
73
|
|
|
|
|
|
|
my ($self, $arg_ref) = @_; |
74
|
|
|
|
|
|
|
my $ident = ident $self; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my $sql = "select document_id, document_type_id, corpus_id, document_path, document_file_name, bytes, enter_date "; |
77
|
|
|
|
|
|
|
$sql .= "from documents "; |
78
|
|
|
|
|
|
|
$sql .= "where document_id = '$arg_ref->{document_id}'"; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
($document_id_of{$ident}, |
81
|
|
|
|
|
|
|
$document_type_of{$ident}, |
82
|
|
|
|
|
|
|
$corpus_id_of{$ident}, |
83
|
|
|
|
|
|
|
$document_path_of{$ident}, |
84
|
|
|
|
|
|
|
$file_name_of{$ident}, |
85
|
|
|
|
|
|
|
$bytes_of{$ident}, |
86
|
|
|
|
|
|
|
$enter_date_of{$ident}) = $self->library()->sqlexec($sql, '@'); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub all { |
90
|
|
|
|
|
|
|
my ($self) = @_; |
91
|
|
|
|
|
|
|
my (@documents); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
my $sql = "select document_id from documents order by document_id asc"; |
94
|
|
|
|
|
|
|
my $documents = $self->library()->sqlexec( $sql, '\@@' ); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
foreach my $document (@$documents) { push @documents, Text::Librarian::Document->new({ document_id => $document->[0] }); } |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
return \@documents; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub display_all { |
102
|
|
|
|
|
|
|
my ($self, $c, $root_url) = @_; |
103
|
|
|
|
|
|
|
my @switch = (1, 0); |
104
|
|
|
|
|
|
|
my @classes = ('rowB', 'rowA'); |
105
|
|
|
|
|
|
|
my $documents = Text::Librarian::Document->all(); |
106
|
|
|
|
|
|
|
my ($html, $switch, $class); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
$html .= " \n";
109
|
|
|
|
|
|
|
$html .= " | \n";
110
|
|
|
|
|
|
|
$html .= " | | \n";
111
|
|
|
|
|
|
|
$html .= " | Name | \n";
112
|
|
|
|
|
|
|
$html .= " | Description | \n";
113
|
|
|
|
|
|
|
$html .= " | Path | \n";
114
|
|
|
|
|
|
|
$html .= " | \n";
115
|
|
|
|
|
|
|
foreach my $document (@$documents) { |
116
|
|
|
|
|
|
|
$switch = $switch[$switch]; |
117
|
|
|
|
|
|
|
$class = $classes[$switch]; |
118
|
|
|
|
|
|
|
$html .= " | \n";
119
|
|
|
|
|
|
|
$html .= " | [X] | \n";
120
|
|
|
|
|
|
|
$html .= " | " . $document->get_name() . " | \n";
121
|
|
|
|
|
|
|
$html .= " | " . $document->get_desc() . " | \n";
122
|
|
|
|
|
|
|
$html .= " | " . $document->get_path() . " | \n";
123
|
|
|
|
|
|
|
$html .= " | \n";
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
$html .= " | \n"; |
126
|
|
|
|
|
|
|
return $html; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub update { |
130
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
131
|
|
|
|
|
|
|
my $ident = ident $self; |
132
|
|
|
|
|
|
|
my @updates = (); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
if ( defined $arg_ref->{corpus_id} ) { $self->set_desc( $arg_ref->{corpus_id} ); push @updates, "corpus_id = '" . $self->_html_to_sql( $arg_ref->{corpus_id} ) . "'"; } |
135
|
|
|
|
|
|
|
if ( defined $arg_ref->{document_path} ) { $self->set_path( $arg_ref->{document_path} ); push @updates, "document_path = '" . $self->_html_to_sql( $arg_ref->{document_path} ) . "'"; } |
136
|
|
|
|
|
|
|
if ( defined $arg_ref->{file_name} ) { $self->set_file_name( $arg_ref->{file_name} ); push @updates, "file_name = '" . $self->_html_to_sql( $arg_ref->{file_name} ) . "'"; } |
137
|
|
|
|
|
|
|
if ( defined $arg_ref->{bytes} ) { $self->set_desc( $arg_ref->{bytes} ); push @updates, "bytes = '" . $self->_html_to_sql( $arg_ref->{bytes} ) . "'"; } |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
my $sql = "update documents set " . join( ', ', @updates ) . " where document_id = '$document_id_of{$ident}'"; |
140
|
|
|
|
|
|
|
$self->library()->sqlexec($sql); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub insert { |
144
|
|
|
|
|
|
|
my ($self, $arg_ref) = @_; |
145
|
|
|
|
|
|
|
foreach ('corpus_id', 'document_title', 'document_path', 'file_name', 'bytes') { $arg_ref->{$_} = $self->_html_to_sql( $arg_ref->{$_} || '' ); } |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Set doc_type_id : alpha - should be live or at least configured |
148
|
|
|
|
|
|
|
my %doc_types = ( txt => 1, xml => 2, pdf => 3 ); |
149
|
|
|
|
|
|
|
$arg_ref->{document_type_id} = $doc_types{ $arg_ref->{file_type} }; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $sql = "insert into documents (document_type_id, corpus_id, document_title, document_path, document_file_name, bytes) "; |
152
|
|
|
|
|
|
|
$sql .= "values ('$arg_ref->{document_type_id}', '$arg_ref->{corpus_id}', '$arg_ref->{document_title}', '$arg_ref->{document_path}', '$arg_ref->{file_name}', '$arg_ref->{bytes}') "; |
153
|
|
|
|
|
|
|
#print "\n", $sql, "\n\n"; |
154
|
|
|
|
|
|
|
$self->library()->sqlexec($sql); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
$sql = "select LAST_INSERT_ID()"; |
157
|
|
|
|
|
|
|
( $arg_ref->{document_id} ) = $self->library()->sqlexec($sql, '@'); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$self->_get_document( $arg_ref ); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub delete { |
163
|
|
|
|
|
|
|
my ( $self ) = @_; |
164
|
|
|
|
|
|
|
my $ident = ident $self; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$self->library()->sqlexec("delete from documents where document_id = '" . $self->get_document_id() . "'"); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
171
|
|
|
|
|
|
|
__END__ |