line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RDF::Generator::Void::Stats; |
2
|
5
|
|
|
5
|
|
1821
|
use Progress::Any; |
|
5
|
|
|
|
|
17002
|
|
|
5
|
|
|
|
|
26
|
|
3
|
|
|
|
|
|
|
|
4
|
5
|
|
|
5
|
|
182
|
use 5.006; |
|
5
|
|
|
|
|
11
|
|
5
|
5
|
|
|
5
|
|
17
|
use strict; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
76
|
|
6
|
5
|
|
|
5
|
|
17
|
use warnings; |
|
5
|
|
|
|
|
3
|
|
|
5
|
|
|
|
|
100
|
|
7
|
5
|
|
|
5
|
|
24
|
use Moose; |
|
5
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
30
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Typically called for you by L<RDF::Generator::Void> as: |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $stats = RDF::Generator::Void::Stats->new(generator => $self); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head2 METHODS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head3 C<< BUILD >> |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Called by Moose to initialize an object. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head3 C<generator> |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Parameter to the constructor, to pass a L<RDF::Generator::Void> object. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head3 C<vocabularies> |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
A hashref used to find common vocabularies in the data. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head3 C<entities> |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
The number of distinct entities, as defined in the specification. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head3 C<properties> |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
The number of distinct properties, as defined in the specification. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head3 C<subjects> |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The number of distinct subjects, as defined in the specification. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head3 C<objects> |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
The number of distinct objects, as defined in the specification. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head3 C<propertyPartitions> |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
A hashref containing the number of triples for each property. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head3 C<classPartitions> |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
A hashref containing the number of triples for each class. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# The following attributes also act as read-write methods. |
62
|
|
|
|
|
|
|
has vocabularies => ( is => 'rw', isa => 'HashRef' ); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' ); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
has propertyPartitions => (is => 'rw', isa => 'HashRef' ); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
has classPartitions => (is => 'rw', isa => 'HashRef' ); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# This is a read-only method, meaning that the constructor has it as a |
71
|
|
|
|
|
|
|
# parameter, but then it can only be read from. |
72
|
|
|
|
|
|
|
has generator => ( |
73
|
|
|
|
|
|
|
is => 'ro', |
74
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void', |
75
|
|
|
|
|
|
|
required => 1, |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# The BUILD method is kinda the constructor. It is called when the |
79
|
|
|
|
|
|
|
# user calls the constructor. In here, the statistics is generated. |
80
|
|
|
|
|
|
|
sub BUILD { |
81
|
6
|
|
|
6
|
1
|
8065
|
my ($self) = @_; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Initialize local hashes to count stuff. |
84
|
6
|
|
|
|
|
10
|
my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes); |
85
|
6
|
|
|
|
|
22
|
my $progress = Progress::Any->get_indicator(task => "compute"); |
86
|
|
|
|
|
|
|
|
87
|
6
|
|
|
|
|
301
|
my $gen = $self->generator; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# Here, we take the data in the model we want to generate |
90
|
|
|
|
|
|
|
# statistics for and we iterate over it. Doing it this way, we |
91
|
|
|
|
|
|
|
# should be able to generate all statistics in a single pass of the |
92
|
|
|
|
|
|
|
# data. |
93
|
|
|
|
|
|
|
$gen->inmodel->get_statements->each(sub { |
94
|
4687
|
|
|
4687
|
|
459702
|
my $st = shift; |
95
|
4687
|
50
|
|
|
|
8011
|
next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3) |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# wrap in eval, as this can potentially throw an exception. |
98
|
4687
|
|
|
|
|
113598
|
eval { |
99
|
4687
|
|
|
|
|
6559
|
my ($vocab_uri) = $st->predicate->qname; |
100
|
|
|
|
|
|
|
# The hash has a unique key, so now we count the number of qnames for each qname in the data |
101
|
4687
|
|
|
|
|
701327
|
$vocab_counter{$vocab_uri}++; |
102
|
|
|
|
|
|
|
}; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
4687
|
100
|
66
|
|
|
137397
|
if ($gen->has_urispace && $st->subject->is_resource) { |
107
|
|
|
|
|
|
|
# Compute entities. We assume that all entities are subjects |
108
|
|
|
|
|
|
|
# with a prefix matching the uriSpace. Again, we use the |
109
|
|
|
|
|
|
|
# property that keys are unique, but we just set it to some |
110
|
|
|
|
|
|
|
# true value since we don't need to count how frequently each |
111
|
|
|
|
|
|
|
# entity is present. |
112
|
4291
|
|
|
|
|
130641
|
(my $urispace = $gen->urispace) =~ s/\./\\./g; |
113
|
4291
|
50
|
|
|
|
7443
|
$entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
4687
|
|
|
|
|
59404
|
$subjects{$st->subject->sse} = 1; |
117
|
4687
|
|
|
|
|
39809
|
$properties{$st->predicate->uri_value}{'triples'}++; |
118
|
4687
|
|
|
|
|
26001
|
$objects{$st->object->sse} = 1; |
119
|
|
|
|
|
|
|
|
120
|
4687
|
50
|
33
|
|
|
178118
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
66
|
|
|
|
|
121
|
4687
|
100
|
66
|
|
|
7099
|
if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') |
122
|
|
|
|
|
|
|
&& $st->object->is_resource) { |
123
|
692
|
|
|
|
|
9272
|
$classes{$st->object->uri_value}++ |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
4687
|
100
|
66
|
|
|
136715
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) { |
|
|
|
66
|
|
|
|
|
128
|
4675
|
|
|
|
|
7297
|
$properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1; |
129
|
4675
|
|
|
|
|
55733
|
$properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1; |
130
|
|
|
|
|
|
|
} |
131
|
4687
|
|
|
|
|
74482
|
$progress->update(message => "Examening triples"); |
132
|
6
|
|
|
|
|
154
|
}); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Finally, we update the attributes above, they are returned as a side-effect |
136
|
6
|
|
|
|
|
2519
|
$self->vocabularies(\%vocab_counter); |
137
|
6
|
|
|
|
|
162
|
$self->entities(scalar keys %entities); |
138
|
6
|
|
|
|
|
186
|
$self->properties(scalar keys %properties); |
139
|
6
|
|
|
|
|
159
|
$self->subjects(scalar keys %subjects); |
140
|
6
|
|
|
|
|
153
|
$self->objects(scalar keys %objects); |
141
|
6
|
50
|
33
|
|
|
185
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
66
|
|
|
|
|
142
|
6
|
|
|
|
|
163
|
$self->propertyPartitions(\%properties); |
143
|
6
|
|
|
|
|
172
|
$self->classPartitions(\%classes); |
144
|
|
|
|
|
|
|
} |
145
|
6
|
|
|
|
|
21
|
$progress->update(message => "Data transfer"); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head1 FURTHER DOCUMENTATION |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for further documentation. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 AUTHORS AND COPYRIGHT |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for information about authors and copyright for this module. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
1; |