line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::DublinCore; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
66585
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
68
|
|
4
|
2
|
|
|
2
|
|
11
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
61
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
11
|
use Carp qw( croak ); |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
159
|
|
7
|
2
|
|
|
2
|
|
11
|
use base qw( DublinCore::Record HTML::Parser ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
3669
|
|
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
45144
|
use DublinCore::Element; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
19
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = .4; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
HTML::DublinCore - Extract Dublin Core metadata from HTML |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use HTML::DublinCore; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
## pass HTML to constructor |
22
|
|
|
|
|
|
|
my $dc = HTML::DublinCore->new( $html ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
## get the title element and print it's content |
25
|
|
|
|
|
|
|
my $title = $dc->element( 'Title' ); |
26
|
|
|
|
|
|
|
print "title: ", $title->content(), "\n"; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
## get the same title content in one step |
29
|
|
|
|
|
|
|
print "title: ", $dc->element( 'Title' )->content(), "\n"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
## list context will retrieve all of a particular element |
32
|
|
|
|
|
|
|
foreach my $element ( $dc->element( 'Creator' ) ) { |
33
|
|
|
|
|
|
|
print "creator: ",$element->content(),"\n"; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
## qualified dublin core |
37
|
|
|
|
|
|
|
my $creation = $dc->element( 'Date.created' )->content(); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
HTML::DublinCore is a module for easily extracting Dublin Core metadata |
42
|
|
|
|
|
|
|
that is embedded in HTML documents. The Dublin Core is a small set of metadata |
43
|
|
|
|
|
|
|
elements for describing information resources. Dublin Core is typically |
44
|
|
|
|
|
|
|
stored in the EHEADE of and HTML document using the EMETAE tag. |
45
|
|
|
|
|
|
|
For more information on embedding DublinCore in HTML see RFC 2731 |
46
|
|
|
|
|
|
|
L. For a definition of the |
47
|
|
|
|
|
|
|
meaning of various Dublin Core elements please see |
48
|
|
|
|
|
|
|
L. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
HTML::DublinCore actually extends Brian Cassidy's excellent DublinCore::Record |
51
|
|
|
|
|
|
|
framework by adding some asHTML() methods, and a new constructor. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 METHODS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
## valid dublin core elements |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 new() |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Constructor which you pass HTML content. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
$dc = HTML::DublinCore->new( $html ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub new { |
68
|
1
|
|
|
1
|
1
|
506
|
my ( $class, $html ) = @_; |
69
|
|
|
|
|
|
|
|
70
|
1
|
|
|
|
|
15
|
my $self = $class->SUPER::new; |
71
|
|
|
|
|
|
|
|
72
|
1
|
|
|
|
|
54
|
bless $self, $class; |
73
|
|
|
|
|
|
|
|
74
|
1
|
50
|
|
|
|
5
|
croak( "please supply string of HTML as argument to new()" ) if !$html; |
75
|
1
|
|
|
|
|
14
|
$self->{ "DC_errors" } = []; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
## initialize our parser, and parse |
78
|
1
|
|
|
|
|
11
|
$self->init(); |
79
|
1
|
|
|
|
|
116
|
$self->parse( $html ); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 asHtml() |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Serialize your Dublin Core metadata as HTML EMETAE tags. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
print $dc->asHtml(); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=cut |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub asHtml { |
92
|
1
|
|
|
1
|
1
|
5513
|
my $self = shift; |
93
|
1
|
|
|
|
|
3
|
my $html = ''; |
94
|
|
|
|
|
|
|
|
95
|
1
|
|
|
|
|
9
|
foreach my $element ( $self->elements ) { |
96
|
8
|
|
|
|
|
55
|
$html .= $element->asHtml() . "\n"; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
8
|
return( $html ); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 TODO |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=over 4 |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item * More comprehensive tests. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item * Handle HTML entities properly. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * Collect error messages so they can be reported out of the object. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=back |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 SEE ALSO |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=over 4 |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item * DublinCore::Record |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item * Dublin Core L |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item * RFC 2731 L |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * HTML::Parser |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * perl4lib L |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=back |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 AUTHORS |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item * Ed Summers Eehs@pobox.comE |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * Brian Cassidy Ebricas@cpan.orgE |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=back |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Copyright 2004 by Ed Summers, Brian Cassidy |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
145
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
## start tag hander. This automatically gets called in new() when we |
150
|
|
|
|
|
|
|
## parse HTML since HTML::DublinCore inherits from HTML::Parser. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub start { |
153
|
1752
|
|
|
1752
|
1
|
16727
|
my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; |
154
|
1752
|
100
|
|
|
|
11554
|
return if ( $tagname ne 'meta' ); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
## lowercase keys |
157
|
13
|
|
|
|
|
31
|
my %attributes = map { lc($_) => $attr->{$_} } keys( %$attr ); |
|
26
|
|
|
|
|
76
|
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
## parse name attribute (eg. DC.Identifier.ISBN ) |
160
|
13
|
100
|
|
|
|
43
|
return( undef ) if ! exists( $attributes{ name } ); |
161
|
|
|
|
|
|
|
my ( $namespace, $element, $qualifier ) = |
162
|
12
|
|
|
|
|
36
|
split /\./, lc( $attributes{ name } ); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## ignore non-DublinCore data |
165
|
12
|
100
|
|
|
|
47
|
return( undef ) if $namespace ne 'dc'; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
## make sure element is dublin core |
168
|
8
|
50
|
|
|
|
13
|
if ( ! grep { $element } @DublinCore::Record::VALID_ELEMENTS ) { |
|
120
|
|
|
|
|
153
|
|
169
|
0
|
|
|
|
|
0
|
$self->_error( "invalid element: $element found" ); |
170
|
0
|
|
|
|
|
0
|
return( undef ); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
## return if we don't have a content attribute |
174
|
8
|
50
|
|
|
|
27
|
if ( ! exists( $attributes{ content } ) ) { |
175
|
0
|
|
|
|
|
0
|
$self->_error( "element $element lacks content" ); |
176
|
0
|
|
|
|
|
0
|
return( undef ); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
## create a new HTML::DublinCore::Element object |
180
|
8
|
|
|
|
|
29
|
my $dc = DublinCore::Element->new(); |
181
|
8
|
|
|
|
|
299
|
$dc->name( $element ); |
182
|
8
|
|
|
|
|
191
|
$dc->qualifier( $qualifier ); |
183
|
8
|
|
|
|
|
159
|
$dc->content( $attributes{ content } ); |
184
|
8
|
50
|
|
|
|
154
|
if ( exists( $attributes{ scheme } ) ) { |
185
|
0
|
|
|
|
|
0
|
$dc->scheme( $attributes{ scheme } ); |
186
|
|
|
|
|
|
|
} |
187
|
8
|
50
|
|
|
|
19
|
if ( exists( $attributes{ lang } ) ) { |
188
|
0
|
|
|
|
|
0
|
$dc->language( $attributes{ lang } ); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
## stash it for later |
192
|
8
|
|
|
|
|
22
|
$self->add( $dc ); |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub _error { |
196
|
0
|
|
|
0
|
|
0
|
my ( $self, $msg ) = @_; |
197
|
0
|
|
|
|
|
0
|
push( @{ $self->{ DC_errors } }, $msg ); |
|
0
|
|
|
|
|
0
|
|
198
|
0
|
|
|
|
|
0
|
return( 1 ); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# add in a method to write DC elements as HTML meta tags. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
package DublinCore::Element; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub asHtml { |
206
|
8
|
|
|
8
|
0
|
11
|
my $self = shift; |
207
|
8
|
|
|
|
|
16
|
my $name = ucfirst( $self->name() ); |
208
|
8
|
100
|
|
|
|
81
|
if ( $self->qualifier() ) { $name .= '.' . $self->qualifier(); } |
|
2
|
|
|
|
|
17
|
|
209
|
8
|
|
|
|
|
75
|
my $content = $self->content(); |
210
|
8
|
|
|
|
|
65
|
my $scheme = $self->scheme(); |
211
|
8
|
|
|
|
|
55
|
my $lang = $self->language(); |
212
|
|
|
|
|
|
|
|
213
|
8
|
|
|
|
|
64
|
my $html = qq(
|
214
|
8
|
50
|
|
|
|
13
|
if ( $scheme ) { |
215
|
0
|
|
|
|
|
0
|
$html .= qq( scheme="$scheme"); |
216
|
|
|
|
|
|
|
} |
217
|
8
|
50
|
|
|
|
16
|
if ( $lang ) { |
218
|
0
|
|
|
|
|
0
|
$html .= qq( lang="$lang"); |
219
|
|
|
|
|
|
|
} |
220
|
8
|
|
|
|
|
10
|
$html .= '>'; |
221
|
8
|
|
|
|
|
37
|
return ( $html ); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
1; |