line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Record::Stats;
|
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
246232
|
use warnings;
|
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
104
|
|
4
|
2
|
|
|
2
|
|
12
|
use strict;
|
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
70
|
|
5
|
2
|
|
|
2
|
|
1744
|
use version;
|
|
2
|
|
|
|
|
4998
|
|
|
2
|
|
|
|
|
13
|
|
6
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
1571
|
use MARC::Record::Stats::Report;
|
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
1632
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
MARC::Record::Stats - scans one or many MARC::Record and gives a statistics on the tags and subtags
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 VERSION
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Version 0.0.4
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=cut
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = qv('0.0.4');
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This module provides functionality for L script.
|
25
|
|
|
|
|
|
|
Description of the module interface follows.
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use MARC::Record::Stats;
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
{
|
30
|
|
|
|
|
|
|
my $records = [];
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# code skipped ...
|
33
|
|
|
|
|
|
|
my $stats = MARC::Record::Stats->new;
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# $records is array of MARC::Record
|
36
|
|
|
|
|
|
|
for my $r ( @$records ) {
|
37
|
|
|
|
|
|
|
$stats->add_record_to_stats( $r );
|
38
|
|
|
|
|
|
|
}
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$stats->report( *STDOUT, { dots => 1 } );
|
41
|
|
|
|
|
|
|
}
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
###
|
44
|
|
|
|
|
|
|
### Some useless features:
|
45
|
|
|
|
|
|
|
###
|
46
|
|
|
|
|
|
|
{
|
47
|
|
|
|
|
|
|
my $record;
|
48
|
|
|
|
|
|
|
my $records = [];
|
49
|
|
|
|
|
|
|
# code skipped ...
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# single record statistics
|
52
|
|
|
|
|
|
|
# $record is a MARC::Record
|
53
|
|
|
|
|
|
|
my $stats1 = Marc::Record::Stats->new( $record );
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# merge $stats1 and statistics for $records
|
56
|
|
|
|
|
|
|
# $records is a reference to an array of MARC::Record
|
57
|
|
|
|
|
|
|
my $stats2 = Marc::Record::Stats->new( $records, $stats1 );
|
58
|
|
|
|
|
|
|
# ...
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
$stats1->report( *STDOUT );
|
61
|
|
|
|
|
|
|
# $stats2->report( *STDOUT );
|
62
|
|
|
|
|
|
|
}
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 METHODS
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 new $records [, $stats]
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Builds statistics on $records, appends $stats if given.
|
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=over 4
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item $records
|
73
|
|
|
|
|
|
|
A MARC::Record or a reference to an array of MARC::Record
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item $stats
|
76
|
|
|
|
|
|
|
Marc::Record::Stats object that contains accumulated statistics.
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=back
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=cut
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub new {
|
83
|
3
|
|
|
3
|
1
|
540
|
my ($class, $records, $stats) = @_;
|
84
|
|
|
|
|
|
|
|
85
|
3
|
|
|
|
|
13
|
my $self = {
|
86
|
|
|
|
|
|
|
stats => { nrecords => 0 },
|
87
|
|
|
|
|
|
|
};
|
88
|
|
|
|
|
|
|
|
89
|
3
|
|
|
|
|
9
|
bless $self, $class;
|
90
|
|
|
|
|
|
|
|
91
|
3
|
100
|
|
|
|
16
|
$self->_copy_stats($stats)
|
92
|
|
|
|
|
|
|
if $stats;
|
93
|
|
|
|
|
|
|
|
94
|
3
|
100
|
|
|
|
17
|
my $reclist = (ref $records eq 'ARRAY') ? $records : [ $records ];
|
95
|
3
|
|
|
|
|
8
|
foreach my $rec ( @$reclist ) {
|
96
|
4
|
|
|
|
|
13
|
$self->add_record_to_stats( $rec );
|
97
|
|
|
|
|
|
|
}
|
98
|
3
|
|
|
|
|
9
|
return $self;
|
99
|
|
|
|
|
|
|
}
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 report $fh, $config
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Prints out a report on the collected statistics to a filehandle $fh.
|
104
|
|
|
|
|
|
|
$config keeps configuretion for the reporter. See L
|
105
|
|
|
|
|
|
|
for details
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub report {
|
110
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh, $config) = @_;
|
111
|
0
|
|
|
|
|
0
|
MARC::Record::Stats::Report->report($fh, $self, $config);
|
112
|
|
|
|
|
|
|
}
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 get_stats_hash
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Returns a hashref that contains the statistics:
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
= {
|
120
|
|
|
|
|
|
|
nrecords => , # the number of records
|
121
|
|
|
|
|
|
|
tags => {
|
122
|
|
|
|
|
|
|
=> , # for every tag found in records
|
123
|
|
|
|
|
|
|
...
|
124
|
|
|
|
|
|
|
}
|
125
|
|
|
|
|
|
|
}
|
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
= \d{3} # a tag, three digits
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
= {
|
130
|
|
|
|
|
|
|
occurence => , # how many records contain this tag
|
131
|
|
|
|
|
|
|
subtags => ,
|
132
|
|
|
|
|
|
|
}
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
= {
|
135
|
|
|
|
|
|
|
=> {
|
136
|
|
|
|
|
|
|
occurence => , # how many records contain this subtag
|
137
|
|
|
|
|
|
|
repeatable => <0|1>, # whether or not is repeatable
|
138
|
|
|
|
|
|
|
}
|
139
|
|
|
|
|
|
|
}
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
= [a-z0-9] # alphanum, subtag
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut
|
144
|
|
|
|
|
|
|
|
145
|
10
|
|
|
10
|
1
|
57
|
sub get_stats_hash { return $_[0]->{stats} }
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=begin DEVELOPER
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Deep copy of stats
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=end DEVELOPER
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=cut
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub _copy_stats {
|
157
|
1
|
|
|
1
|
|
38
|
my ($self, $stats) = @_;
|
158
|
1
|
|
|
|
|
5
|
my $stathash = $stats->get_stats_hash;
|
159
|
1
|
|
|
|
|
4
|
my $selfstat = $self->get_stats_hash;
|
160
|
|
|
|
|
|
|
|
161
|
1
|
|
|
|
|
2
|
$selfstat->{nrecords} = $stathash->{nrecords};
|
162
|
1
|
|
|
|
|
2
|
foreach my $tag ( keys %{ $stathash->{tags} } ) {
|
|
1
|
|
|
|
|
3
|
|
163
|
4
|
|
|
|
|
7
|
my $tagstat = $stathash->{tags}->{$tag};
|
164
|
4
|
|
|
|
|
6
|
$selfstat->{tags}->{$tag}->{occurence} = $tagstat->{occurence};
|
165
|
4
|
|
|
|
|
9
|
$selfstat->{tags}->{$tag}->{repeatable} = $tagstat->{repeatable};
|
166
|
4
|
|
|
|
|
5
|
$selfstat->{tags}->{$tag}->{subtags} = { };
|
167
|
4
|
|
|
|
|
5
|
foreach my $subtag ( keys %{ $tagstat->{subtags} } ) {
|
|
4
|
|
|
|
|
9
|
|
168
|
4
|
|
|
|
|
10
|
$selfstat->{tags}->{$tag}->{subtags}->{$subtag}->{occurence} = $tagstat->{subtags}->{$subtag}->{occurence};
|
169
|
4
|
|
|
|
|
11
|
$selfstat->{tags}->{$tag}->{subtags}->{$subtag}->{repeatable} = $tagstat->{subtags}->{$subtag}->{repeatable};
|
170
|
|
|
|
|
|
|
}
|
171
|
|
|
|
|
|
|
}
|
172
|
|
|
|
|
|
|
}
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head2 add_record_to_stats $record
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Add $record to statistics.
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub add_record_to_stats {
|
181
|
4
|
|
|
4
|
1
|
6
|
my ($self, $record) = @_;
|
182
|
|
|
|
|
|
|
|
183
|
4
|
50
|
|
|
|
15
|
return unless $record;
|
184
|
|
|
|
|
|
|
|
185
|
4
|
|
|
|
|
11
|
my $stats = $self->get_stats_hash;
|
186
|
|
|
|
|
|
|
|
187
|
4
|
|
|
|
|
7
|
$stats->{nrecords}++;
|
188
|
|
|
|
|
|
|
|
189
|
4
|
|
|
|
|
12
|
my $record_stats = $self->get_record_stats($record);
|
190
|
|
|
|
|
|
|
|
191
|
4
|
|
|
|
|
15
|
foreach my $tag ( keys %$record_stats ) {
|
192
|
16
|
|
|
|
|
30
|
$stats->{tags}->{$tag}->{occurence}++;
|
193
|
16
|
|
100
|
|
|
94
|
$stats->{tags}->{$tag}->{subtags} ||= {};
|
194
|
|
|
|
|
|
|
|
195
|
16
|
100
|
|
|
|
45
|
$stats->{tags}->{$tag}->{repeatable} =
|
196
|
|
|
|
|
|
|
$record_stats->{$tag}->{occurence} > 1 ?
|
197
|
|
|
|
|
|
|
1 : 0;
|
198
|
|
|
|
|
|
|
|
199
|
16
|
|
|
|
|
23
|
my $subtag_stats = $stats->{tags}->{$tag}->{subtags};
|
200
|
|
|
|
|
|
|
|
201
|
16
|
|
|
|
|
16
|
foreach my $subtag ( keys %{ $record_stats->{$tag}->{subtags} } ) {
|
|
16
|
|
|
|
|
48
|
|
202
|
18
|
|
|
|
|
30
|
$subtag_stats->{$subtag}->{occurence}++;
|
203
|
|
|
|
|
|
|
|
204
|
18
|
100
|
|
|
|
104
|
$subtag_stats->{$subtag}->{repeatable} =
|
205
|
|
|
|
|
|
|
$record_stats->{$tag}->{subtags}->{$subtag} > 1 ?
|
206
|
|
|
|
|
|
|
1 : 0;
|
207
|
|
|
|
|
|
|
}
|
208
|
|
|
|
|
|
|
}
|
209
|
|
|
|
|
|
|
}
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 get_record_stats $record
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
returns a reference to a hash: { => }
|
214
|
|
|
|
|
|
|
where is a reference to a hash with the keys
|
215
|
|
|
|
|
|
|
I - how many times the field with the tag
|
216
|
|
|
|
|
|
|
was found in the record, I - result of
|
217
|
|
|
|
|
|
|
subtag_stats.
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub get_record_stats {
|
222
|
4
|
|
|
4
|
1
|
5
|
my ($self, $record) = @_;
|
223
|
4
|
|
|
|
|
9
|
my $stats;
|
224
|
|
|
|
|
|
|
|
225
|
4
|
|
|
|
|
18
|
foreach my $field ( $record->fields ) {
|
226
|
18
|
|
|
|
|
68
|
my $tag = $field->tag;
|
227
|
|
|
|
|
|
|
|
228
|
18
|
|
|
|
|
83
|
$stats->{$tag}->{occurence}++;
|
229
|
|
|
|
|
|
|
|
230
|
18
|
100
|
|
|
|
59
|
if( $tag > 9 ) {
|
231
|
14
|
|
50
|
|
|
29
|
$stats->{$tag}->{subtags} = $self->subtag_stats($field) || { };
|
232
|
|
|
|
|
|
|
}
|
233
|
|
|
|
|
|
|
}
|
234
|
4
|
|
|
|
|
9
|
return $stats;
|
235
|
|
|
|
|
|
|
}
|
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 subtag_stats $field
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
returns a reference to a hash { => }
|
240
|
|
|
|
|
|
|
where is the number of times the subfield with
|
241
|
|
|
|
|
|
|
the code was found in the fied $field.
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
$field is MARC::Field
|
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub subtag_stats {
|
248
|
14
|
|
|
14
|
1
|
21
|
my ($self, $field) = @_;
|
249
|
14
|
|
|
|
|
16
|
my $substat = { };
|
250
|
|
|
|
|
|
|
|
251
|
14
|
|
|
|
|
36
|
foreach my $subtag ( $field->subfields ) {
|
252
|
22
|
|
|
|
|
233
|
$substat->{ $subtag->[0] }++;
|
253
|
|
|
|
|
|
|
}
|
254
|
|
|
|
|
|
|
|
255
|
14
|
|
|
|
|
65
|
return $substat;
|
256
|
|
|
|
|
|
|
}
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
1; # End of Marc::Record::Stats
|
259
|
|
|
|
|
|
|
__END__
|