| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Web::Microformats2::Parser; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 2 |  |  | 2 |  | 1144 | use Moo; | 
|  | 2 |  |  |  |  | 23250 |  | 
|  | 2 |  |  |  |  | 9 |  | 
| 4 | 2 |  |  | 2 |  | 4301 | use Types::Standard qw(InstanceOf); | 
|  | 2 |  |  |  |  | 150224 |  | 
|  | 2 |  |  |  |  | 18 |  | 
| 5 | 2 |  |  | 2 |  | 2625 | use HTML::TreeBuilder::XPath; | 
|  | 2 |  |  |  |  | 131481 |  | 
|  | 2 |  |  |  |  | 17 |  | 
| 6 | 2 |  |  | 2 |  | 84 | use HTML::Entities; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 120 |  | 
| 7 | 2 |  |  | 2 |  | 29 | use v5.10; | 
|  | 2 |  |  |  |  | 60 |  | 
| 8 | 2 |  |  | 2 |  | 14 | use Scalar::Util qw(blessed); | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 131 |  | 
| 9 | 2 |  |  | 2 |  | 741 | use JSON; | 
|  | 2 |  |  |  |  | 8413 |  | 
|  | 2 |  |  |  |  | 19 |  | 
| 10 | 2 |  |  | 2 |  | 1426 | use DateTime::Format::ISO8601; | 
|  | 2 |  |  |  |  | 1174242 |  | 
|  | 2 |  |  |  |  | 95 |  | 
| 11 | 2 |  |  | 2 |  | 1438 | use URI; | 
|  | 2 |  |  |  |  | 9395 |  | 
|  | 2 |  |  |  |  | 65 |  | 
| 12 | 2 |  |  | 2 |  | 15 | use Carp; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 131 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 2 |  |  | 2 |  | 1026 | use Web::Microformats2::Item; | 
|  | 2 |  |  |  |  | 9 |  | 
|  | 2 |  |  |  |  | 103 |  | 
| 15 | 2 |  |  | 2 |  | 1009 | use Web::Microformats2::Document; | 
|  | 2 |  |  |  |  | 8 |  | 
|  | 2 |  |  |  |  | 77 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 | 2 |  |  | 2 |  | 1205 | use Readonly; | 
|  | 2 |  |  |  |  | 8044 |  | 
|  | 2 |  |  |  |  | 8174 |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | has 'url_context' => ( | 
| 20 |  |  |  |  |  |  | is => 'rw', | 
| 21 |  |  |  |  |  |  | isa => InstanceOf['URI'], | 
| 22 |  |  |  |  |  |  | coerce => sub { URI->new( $_[0] ) }, | 
| 23 |  |  |  |  |  |  | lazy => 1, | 
| 24 |  |  |  |  |  |  | clearer => '_clear_url_context', | 
| 25 |  |  |  |  |  |  | default => sub { URI->new( 'http://example.com/' ) }, | 
| 26 |  |  |  |  |  |  | ); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | sub parse { | 
| 29 | 74 |  |  | 74 | 1 | 307531 | my $self = shift; | 
| 30 | 74 |  |  |  |  | 215 | my ( $html, %args ) = @_; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 74 |  |  |  |  | 225 | $self->_clear; | 
| 33 | 74 | 50 |  |  |  | 615 | if ( $args{ url_context } ) { | 
| 34 | 0 |  |  |  |  | 0 | $self->url_context( $args{url_context} ); | 
| 35 |  |  |  |  |  |  | } | 
| 36 |  |  |  |  |  |  |  | 
| 37 | 74 |  |  |  |  | 516 | my $tree = HTML::TreeBuilder::XPath->new; | 
| 38 | 74 |  |  |  |  | 18883 | $tree->ignore_unknown( 0 ); | 
| 39 | 74 |  |  |  |  | 921 | $tree->no_space_compacting( 1 ); | 
| 40 | 74 |  |  |  |  | 754 | $tree->ignore_ignorable_whitespace( 0 ); | 
| 41 | 74 |  |  |  |  | 712 | $tree->no_expand_entities( 1 ); | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | # Adding HTML5 elements because it's 2018. | 
| 44 | 74 |  |  |  |  | 1171 | foreach (qw(article aside details figcaption figure footer header main mark nav section summary time)) { | 
| 45 | 962 |  |  |  |  | 1676 | $HTML::TreeBuilder::isBodyElement{$_}=1; | 
| 46 |  |  |  |  |  |  | } | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 74 |  |  |  |  | 963 | $tree->parse( $html ); | 
| 49 |  |  |  |  |  |  |  | 
| 50 | 74 | 100 |  |  |  | 193051 | if ( my $base_url = $tree->findvalue( './/base/@href' ) ) { | 
| 51 | 6 |  |  |  |  | 7247 | $self->url_context( $base_url ); | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 74 |  |  |  |  | 96305 | my $document = Web::Microformats2::Document->new; | 
| 55 | 74 |  |  |  |  | 4745 | $self->analyze_element( $document, $tree ); | 
| 56 | 74 |  |  |  |  | 1953 | return $document; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | # analyze_element: Recursive method that scans an element for new microformat | 
| 60 |  |  |  |  |  |  | # definitions (h-*) or properties (u|dt|e|p-*) and then does the right thing. | 
| 61 |  |  |  |  |  |  | # It also builds up the MF2 document's rels and rel-urls as it goes. | 
| 62 |  |  |  |  |  |  | sub analyze_element { | 
| 63 | 1913 |  |  | 1913 | 0 | 2828 | my $self = shift; | 
| 64 | 1913 |  |  |  |  | 3342 | my ( $document, $element, $current_item ) = @_; | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 1913 | 100 | 66 |  |  | 8833 | return unless blessed( $element) && $element->isa( 'HTML::Element' ); | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 898 |  |  |  |  | 2424 | $self->_add_element_rels_to_mf2_document( $element, $document ); | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 898 |  |  |  |  | 7113 | my $mf2_attrs = $self->_tease_out_mf2_attrs( $element ); | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 898 |  |  |  |  | 1725 | my $h_attrs = delete $mf2_attrs->{h}; | 
| 73 | 898 |  |  |  |  | 1322 | my $new_item; | 
| 74 | 898 | 100 |  |  |  | 1795 | if ( $h_attrs->[0] ) { | 
| 75 | 136 |  |  |  |  | 3281 | $new_item = Web::Microformats2::Item->new( { | 
| 76 |  |  |  |  |  |  | types => $h_attrs, | 
| 77 |  |  |  |  |  |  | parent => $current_item, | 
| 78 |  |  |  |  |  |  | } ); | 
| 79 | 136 |  |  |  |  | 5120 | $document->add_item( $new_item ); | 
| 80 | 136 | 100 |  |  |  | 7751 | unless ( $current_item ) { | 
| 81 | 94 |  |  |  |  | 1626 | $document->add_top_level_item( $new_item ); | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  | } | 
| 84 |  |  |  |  |  |  |  | 
| 85 | 898 |  |  |  |  | 7843 | while (my ($mf2_type, $properties_ref ) = each( %$mf2_attrs ) ) { | 
| 86 | 3592 | 100 |  |  |  | 8263 | next unless $current_item; | 
| 87 | 1924 | 100 |  |  |  | 2443 | next unless @{ $properties_ref }; | 
|  | 1924 |  |  |  |  | 5749 |  | 
| 88 | 303 | 100 |  |  |  | 895 | if ( $mf2_type eq 'p' ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | # p-property: | 
| 90 |  |  |  |  |  |  | # A catch-all generic property to store on the current | 
| 91 |  |  |  |  |  |  | # MF2 item being defined. | 
| 92 |  |  |  |  |  |  | # (If this same element begins an h-* microformat, we don't parse | 
| 93 |  |  |  |  |  |  | # this p-* any further; instead we'll store the new item under | 
| 94 |  |  |  |  |  |  | # this property name.) | 
| 95 | 190 | 100 |  |  |  | 409 | unless ( $new_item ) { | 
| 96 | 158 |  |  |  |  | 293 | for my $property ( @$properties_ref ) { | 
| 97 | 163 |  |  |  |  | 399 | my $value = $self->_parse_property_value( $element ); | 
| 98 | 163 | 100 |  |  |  | 406 | if ( defined $value ) { | 
| 99 | 161 |  |  |  |  | 667 | $current_item->add_property( | 
| 100 |  |  |  |  |  |  | "p-$property", | 
| 101 |  |  |  |  |  |  | $value, | 
| 102 |  |  |  |  |  |  | ); | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  | elsif ( $mf2_type eq 'u' ) { | 
| 108 |  |  |  |  |  |  | # u-property: | 
| 109 |  |  |  |  |  |  | # Look for a URL in child attributes, and store it as a property. | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | # (But not if a new h-format has been defined, in which case we'll | 
| 112 |  |  |  |  |  |  | # just use the u-property's name to store it. Why would you do that | 
| 113 |  |  |  |  |  |  | # instead of using a p-property? I don't know, but the tests demand | 
| 114 |  |  |  |  |  |  | # it.) | 
| 115 | 44 | 100 |  |  |  | 129 | unless ( $new_item ) { | 
| 116 | 42 |  |  |  |  | 103 | for my $property ( @$properties_ref ) { | 
| 117 | 43 |  |  |  |  | 105 | my $vcp_fragments_ref = | 
| 118 |  |  |  |  |  |  | $self->_seek_value_class_pattern( $element ); | 
| 119 | 43 | 100 |  |  |  | 120 | if ( my $url = $self->_tease_out_url( $element ) ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 120 | 38 |  |  |  |  | 174 | $current_item->add_property( "u-$property", $url ); | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  | elsif ( @$vcp_fragments_ref ) { | 
| 123 | 2 |  |  |  |  | 11 | $current_item->add_property( | 
| 124 |  |  |  |  |  |  | "u-$property", | 
| 125 |  |  |  |  |  |  | join q{}, @$vcp_fragments_ref, | 
| 126 |  |  |  |  |  |  | ) | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  | elsif ( $url = $self->_tease_out_unlikely_url($element)) { | 
| 129 | 2 |  |  |  |  | 10 | $current_item->add_property( "u-$property", $url ); | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  | else { | 
| 132 | 1 |  |  |  |  | 6 | $current_item->add_property( | 
| 133 |  |  |  |  |  |  | "u-$property", | 
| 134 |  |  |  |  |  |  | _trim( $element->as_text ), | 
| 135 |  |  |  |  |  |  | ); | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | elsif ( $mf2_type eq 'e' ) { | 
| 141 |  |  |  |  |  |  | # e-property: | 
| 142 |  |  |  |  |  |  | # Create a struct with keys "html" and "value", and then | 
| 143 |  |  |  |  |  |  | # store this in a new property. | 
| 144 | 18 |  |  |  |  | 54 | for my $property ( @$properties_ref ) { | 
| 145 | 18 |  |  |  |  | 30 | my %e_data; | 
| 146 | 18 |  |  |  |  | 58 | for my $content_piece ( $element->content_list ) { | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | # Make sure all URLs found in certain HTML attrs are | 
| 149 |  |  |  |  |  |  | # absolute. | 
| 150 | 42 | 100 |  |  |  | 4705 | if ( ref $content_piece ) { | 
| 151 |  |  |  |  |  |  | # XXX This is probably a bit too loose about what tags | 
| 152 |  |  |  |  |  |  | #     these attrs can appear on. | 
| 153 | 14 |  |  |  |  | 61 | for my $href_element ( $content_piece, $content_piece->findnodes('.//*[@href|@src]') ) { | 
| 154 | 22 |  |  |  |  | 22975 | foreach ( qw( href src ) ) { | 
| 155 | 44 |  |  |  |  | 219 | my $url = $href_element->attr($_); | 
| 156 | 44 | 100 |  |  |  | 529 | if ( $url ) { | 
| 157 | 9 |  |  |  |  | 218 | my $abs_url = URI->new_abs( $url, $self->url_context)->as_string; | 
| 158 | 9 |  |  |  |  | 1913 | $href_element->attr( $_=> $abs_url ); | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  | } | 
| 162 | 14 |  |  |  |  | 81 | $e_data{html} .= $content_piece->as_HTML( '<>&', undef, {} ); | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  | else { | 
| 166 |  |  |  |  |  |  |  | 
| 167 | 28 |  |  |  |  | 98 | $e_data{html} .= $content_piece; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | } | 
| 170 | 18 |  |  |  |  | 443 | $e_data{ value } = _trim (decode_entities( $element->as_text) ); | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | # The official tests specifically trim space-glyphs per se; | 
| 173 |  |  |  |  |  |  | # all other trailing whitespace stays. Shrug. | 
| 174 | 18 |  |  |  |  | 180 | $e_data{ html } =~ s/ +$//; | 
| 175 |  |  |  |  |  |  |  | 
| 176 | 18 |  |  |  |  | 101 | $current_item->add_property( "e-$property", \%e_data ); | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  | elsif ( $mf2_type eq 'dt' ) { | 
| 180 |  |  |  |  |  |  | # dt-property: | 
| 181 |  |  |  |  |  |  | # Read a child attribute as an ISO-8601 date-time string. | 
| 182 |  |  |  |  |  |  | # Store it as a property in the MF2 date-time representation format. | 
| 183 | 51 |  |  |  |  | 113 | for my $property ( @$properties_ref ) { | 
| 184 | 51 |  |  |  |  | 77 | my $dt_string; | 
| 185 | 51 |  |  |  |  | 123 | my $vcp_fragments_ref = | 
| 186 |  |  |  |  |  |  | $self->_seek_value_class_pattern( $element ); | 
| 187 | 51 | 100 |  |  |  | 166 | if ( @$vcp_fragments_ref ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 188 | 22 |  |  |  |  | 82 | $dt_string = $self->_format_datetime(join (q{T}, @$vcp_fragments_ref), $current_item); | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  | elsif ( my $alt = $element->findvalue( './@datetime|@title|@value' ) ) { | 
| 191 | 26 |  |  |  |  | 23486 | $dt_string = $alt; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | elsif ( my $text = $element->as_trimmed_text ) { | 
| 194 | 3 |  |  |  |  | 2414 | $dt_string = $text; | 
| 195 |  |  |  |  |  |  | } | 
| 196 | 51 | 50 |  |  |  | 2350 | if ( defined $dt_string ) { | 
| 197 | 51 |  |  |  |  | 225 | $current_item->add_property( | 
| 198 |  |  |  |  |  |  | "dt-$property", | 
| 199 |  |  |  |  |  |  | $dt_string, | 
| 200 |  |  |  |  |  |  | ); | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  |  | 
| 206 | 898 | 100 |  |  |  | 1872 | if ( $new_item ) { | 
| 207 | 136 |  |  |  |  | 462 | for my $child_element ( $element->content_list ) { | 
| 208 | 587 |  |  |  |  | 3434 | $self->analyze_element( $document, $child_element, $new_item ); | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | # Now that the new item's been recursively scanned, perform | 
| 212 |  |  |  |  |  |  | # some post-processing. | 
| 213 |  |  |  |  |  |  | # First, add any implied properties. | 
| 214 | 136 |  |  |  |  | 505 | for my $impliable_property (qw(name photo url)) { | 
| 215 | 408 | 100 |  |  |  | 15361 | unless ( $new_item->has_property( $impliable_property ) ) { | 
| 216 | 324 |  |  |  |  | 29901 | my $method = "_set_implied_$impliable_property"; | 
| 217 | 324 |  |  |  |  | 944 | $self->$method( $new_item, $element ); | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  | } | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | # Put this onto the parent item's property-list, or its children-list, | 
| 222 |  |  |  |  |  |  | # depending on context. | 
| 223 | 136 |  |  |  |  | 2031 | my @item_properties; | 
| 224 | 136 |  |  |  |  | 243 | for my $prefix (qw( u p ) ) { | 
| 225 | 272 |  |  |  |  | 373 | push @item_properties, map { "$prefix-$_" } @{ $mf2_attrs->{$prefix} }; | 
|  | 37 |  |  |  |  | 141 |  | 
|  | 272 |  |  |  |  | 574 |  | 
| 226 |  |  |  |  |  |  | } | 
| 227 | 136 | 100 | 100 |  |  | 886 | if ( $current_item && @item_properties ) { | 
|  |  | 100 |  |  |  |  |  | 
| 228 | 33 |  |  |  |  | 79 | for my $item_property ( @item_properties ) { | 
| 229 |  |  |  |  |  |  | # We place a clone of the new item into the current item's | 
| 230 |  |  |  |  |  |  | # property list, rather than the item itself. This allows for | 
| 231 |  |  |  |  |  |  | # edge cases where the same item needs to go under multiple | 
| 232 |  |  |  |  |  |  | # properties, but carry different 'value' attributes. | 
| 233 | 37 |  |  |  |  | 407 | my $cloned_new_item = | 
| 234 |  |  |  |  |  |  | bless { %$new_item }, ref $new_item; | 
| 235 |  |  |  |  |  |  |  | 
| 236 | 37 |  |  |  |  | 212 | $current_item | 
| 237 |  |  |  |  |  |  | ->add_property( "$item_property", $cloned_new_item ); | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | # Now add a "value" attribute to this new item, if appropriate, | 
| 240 |  |  |  |  |  |  | # according to the MF2 spec. | 
| 241 | 37 |  |  |  |  | 67 | my $value_attribute; | 
| 242 | 37 | 100 |  |  |  | 161 | if ( $item_property =~ /^p-/ ) { | 
|  |  | 50 |  |  |  |  |  | 
| 243 | 35 | 100 |  |  |  | 114 | if ( my $name = $new_item->get_properties('name')->[0] ) { | 
| 244 | 28 |  |  |  |  | 61 | $value_attribute = $name; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  | else { | 
| 247 | 7 |  |  |  |  | 21 | $value_attribute = | 
| 248 |  |  |  |  |  |  | $self->_parse_property_value( $element ); | 
| 249 |  |  |  |  |  |  | } | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  | elsif ( $item_property =~ /^u-/ ) { | 
| 252 | 2 |  |  |  |  | 7 | $value_attribute = $new_item->get_properties('url')->[0]; | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  |  | 
| 255 | 37 | 50 |  |  |  | 852 | $cloned_new_item->value( $value_attribute ) if defined ($value_attribute); | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | } | 
| 258 |  |  |  |  |  |  | elsif ($current_item) { | 
| 259 | 9 |  |  |  |  | 186 | $current_item->add_child ( $new_item ); | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | } | 
| 263 |  |  |  |  |  |  | else { | 
| 264 | 762 |  |  |  |  | 1877 | for my $child_element ( $element->content_list ) { | 
| 265 | 1252 |  |  |  |  | 6063 | $self->analyze_element( $document, $child_element, $current_item ); | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | sub _tease_out_mf2_attrs { | 
| 271 | 1053 |  |  | 1053 |  | 1579 | my $self = shift; | 
| 272 | 1053 |  |  |  |  | 1798 | my ( $element ) = @_; | 
| 273 |  |  |  |  |  |  |  | 
| 274 | 1053 |  |  |  |  | 1528 | my %mf2_attrs; | 
| 275 | 1053 |  |  |  |  | 1914 | foreach ( qw( h e u dt p ) ) { | 
| 276 | 5265 |  |  |  |  | 10023 | $mf2_attrs{ $_ } = []; | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  |  | 
| 279 | 1053 |  |  |  |  | 2366 | my $class_attr = $element->attr('class'); | 
| 280 | 1053 | 100 |  |  |  | 11744 | if ( $class_attr ) { | 
| 281 | 538 |  |  |  |  | 3547 | while ($class_attr =~ /\b(h|e|u|dt|p)-([a-z]+(\-[a-z]+)*)($|\s)/g ) { | 
| 282 | 549 |  |  |  |  | 1340 | my $mf2_type = $1; | 
| 283 | 549 |  |  |  |  | 962 | my $mf2_attr = $2; | 
| 284 |  |  |  |  |  |  |  | 
| 285 | 549 |  |  |  |  | 724 | push @{ $mf2_attrs{ $mf2_type } }, $mf2_attr; | 
|  | 549 |  |  |  |  | 2528 |  | 
| 286 |  |  |  |  |  |  | } | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  |  | 
| 289 | 1053 |  |  |  |  | 2402 | return \%mf2_attrs; | 
| 290 |  |  |  |  |  |  | } | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | sub _tease_out_url { | 
| 293 | 43 |  |  | 43 |  | 81 | my $self = shift; | 
| 294 | 43 |  |  |  |  | 90 | my ( $element ) = @_; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 43 |  |  |  |  | 83 | my $xpath; | 
| 297 |  |  |  |  |  |  | my $url; | 
| 298 | 43 | 100 |  |  |  | 118 | if ( $element->tag =~ /^(a|area|link)$/ ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 299 | 26 |  |  |  |  | 265 | $xpath = './@href'; | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  | elsif ( $element->tag =~ /^(img|audio)$/ ) { | 
| 302 | 10 |  |  |  |  | 200 | $xpath = './@src'; | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  | elsif ( $element->tag eq 'video' ) { | 
| 305 | 1 |  |  |  |  | 19 | $xpath = './@src|@poster'; | 
| 306 |  |  |  |  |  |  | } | 
| 307 |  |  |  |  |  |  | elsif ( $element->tag eq 'object' ) { | 
| 308 | 1 |  |  |  |  | 25 | $xpath = './@data'; | 
| 309 |  |  |  |  |  |  | } | 
| 310 |  |  |  |  |  |  |  | 
| 311 | 43 | 100 |  |  |  | 221 | if ( $xpath ) { | 
| 312 | 38 |  |  |  |  | 147 | $url = $element->findvalue( $xpath ); | 
| 313 |  |  |  |  |  |  | } | 
| 314 |  |  |  |  |  |  |  | 
| 315 | 43 | 100 |  |  |  | 16318 | if ( defined $url ) { | 
| 316 | 38 |  |  |  |  | 952 | $url = URI->new_abs( $url, $self->url_context )->as_string; | 
| 317 |  |  |  |  |  |  | } | 
| 318 |  |  |  |  |  |  |  | 
| 319 | 43 |  |  |  |  | 10288 | return $url; | 
| 320 |  |  |  |  |  |  | } | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | sub _tease_out_unlikely_url { | 
| 323 | 3 |  |  | 3 |  | 6 | my $self = shift; | 
| 324 | 3 |  |  |  |  | 8 | my ( $element ) = @_; | 
| 325 |  |  |  |  |  |  |  | 
| 326 | 3 |  |  |  |  | 6 | my $xpath; | 
| 327 |  |  |  |  |  |  | my $url; | 
| 328 | 3 | 100 |  |  |  | 7 | if ( $element->tag eq 'abbr' ) { | 
|  |  | 100 |  |  |  |  |  | 
| 329 | 1 |  |  |  |  | 8 | $xpath = './@title'; | 
| 330 |  |  |  |  |  |  | } | 
| 331 |  |  |  |  |  |  | elsif ( $element->tag =~ /^(data|input)$/ ) { | 
| 332 | 1 |  |  |  |  | 18 | $xpath = './@value'; | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  |  | 
| 335 | 3 | 100 |  |  |  | 17 | if ( $xpath ) { | 
| 336 | 2 |  |  |  |  | 9 | $url = $element->findvalue( $xpath ); | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  |  | 
| 339 | 3 |  |  |  |  | 1573 | return $url; | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | sub _set_implied_name { | 
| 343 | 83 |  |  | 83 |  | 154 | my $self = shift; | 
| 344 | 83 |  |  |  |  | 172 | my ( $item, $element ) = @_; | 
| 345 |  |  |  |  |  |  |  | 
| 346 | 83 |  |  |  |  | 225 | my $types = $item->types; | 
| 347 |  |  |  |  |  |  |  | 
| 348 | 83 | 100 | 100 |  |  | 1390 | return if $item->has_properties || $item->has_children; | 
| 349 |  |  |  |  |  |  |  | 
| 350 | 59 |  |  |  |  | 6411 | my $xpath; | 
| 351 |  |  |  |  |  |  | my $name; | 
| 352 | 59 |  |  |  |  | 0 | my $kid; | 
| 353 | 59 |  |  |  |  | 107 | my $accept_if_empty = 1; # If true, then null-string names are okay. | 
| 354 | 59 | 100 | 100 |  |  | 195 | if ( $element->tag =~ /^(img|area)$/ ) { | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 355 | 8 |  |  |  |  | 89 | $xpath = './@alt'; | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  | elsif ( $element->tag eq 'abbr' ) { | 
| 358 | 1 |  |  |  |  | 16 | $xpath = './@title'; | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  | elsif ( | 
| 361 |  |  |  |  |  |  | ( $kid = $self->_non_h_unique_child( $element, 'img' ) ) | 
| 362 |  |  |  |  |  |  | || ( $kid = $self->_non_h_unique_child( $element, 'area' ) ) | 
| 363 |  |  |  |  |  |  | ) { | 
| 364 | 7 |  |  |  |  | 14 | $xpath = './@alt'; | 
| 365 | 7 |  |  |  |  | 13 | $accept_if_empty = 0; | 
| 366 |  |  |  |  |  |  | } | 
| 367 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_child( $element, 'abbr' ) ) { | 
| 368 | 1 |  |  |  |  | 3 | $xpath = './@title'; | 
| 369 | 1 |  |  |  |  | 3 | $accept_if_empty = 0; | 
| 370 |  |  |  |  |  |  | } | 
| 371 |  |  |  |  |  |  | elsif ( | 
| 372 |  |  |  |  |  |  | ( $kid = $self->_non_h_unique_grandchild( $element, 'img' ) ) | 
| 373 |  |  |  |  |  |  | || ( $kid = $self->_non_h_unique_grandchild( $element, 'area' ) ) | 
| 374 |  |  |  |  |  |  | ) { | 
| 375 | 3 |  |  |  |  | 7 | $xpath = './@alt'; | 
| 376 | 3 |  |  |  |  | 5 | $accept_if_empty = 0; | 
| 377 |  |  |  |  |  |  | } | 
| 378 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_grandchild( $element, 'abbr' ) ) { | 
| 379 | 1 |  |  |  |  | 4 | $xpath = './@title'; | 
| 380 | 1 |  |  |  |  | 5 | $accept_if_empty = 0; | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  |  | 
| 383 | 59 |  | 66 |  |  | 199 | my $foo = $kid || $element; | 
| 384 |  |  |  |  |  |  |  | 
| 385 | 59 | 100 |  |  |  | 124 | if ( $xpath ) { | 
| 386 | 21 |  | 66 |  |  | 67 | my $element_to_check = $kid || $element; | 
| 387 | 21 |  |  |  |  | 78 | my $value = $element_to_check->findvalue( $xpath ); | 
| 388 | 21 | 100 | 66 |  |  | 8499 | if ( ( $value ne q{} ) || $accept_if_empty ) { | 
| 389 | 19 |  |  |  |  | 42 | $name = $value; | 
| 390 |  |  |  |  |  |  | } | 
| 391 |  |  |  |  |  |  | } | 
| 392 |  |  |  |  |  |  |  | 
| 393 | 59 | 100 |  |  |  | 133 | unless ( defined $name ) { | 
| 394 | 40 |  |  |  |  | 117 | $name = _trim( $element->as_text ); | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 | 59 | 50 |  |  |  | 207 | if ( length $name > 0 ) { | 
| 398 | 59 |  |  |  |  | 187 | $item->add_property( 'p-name', $name ); | 
| 399 |  |  |  |  |  |  | } | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | } | 
| 402 |  |  |  |  |  |  |  | 
| 403 |  |  |  |  |  |  | sub _set_implied_photo { | 
| 404 | 126 |  |  | 126 |  | 216 | my $self = shift; | 
| 405 | 126 |  |  |  |  | 255 | my ( $item, $element ) = @_; | 
| 406 |  |  |  |  |  |  |  | 
| 407 | 126 |  |  |  |  | 339 | my $xpath; | 
| 408 |  |  |  |  |  |  | my $url; | 
| 409 | 126 |  |  |  |  | 0 | my $kid; | 
| 410 |  |  |  |  |  |  |  | 
| 411 | 126 | 100 |  |  |  | 401 | if ( $element->tag eq 'img' ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 412 | 5 |  |  |  |  | 41 | $xpath = './@src'; | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  | elsif ( $element->tag eq 'object' ) { | 
| 415 | 1 |  |  |  |  | 13 | $xpath = './@data'; | 
| 416 |  |  |  |  |  |  | } | 
| 417 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_child( $element, 'img' ) ) { | 
| 418 | 4 |  |  |  |  | 10 | $xpath = './@src'; | 
| 419 | 4 |  |  |  |  | 6 | $element = $kid; | 
| 420 |  |  |  |  |  |  | } | 
| 421 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_child( $element, 'object' ) ) { | 
| 422 | 2 |  |  |  |  | 4 | $xpath = './@data'; | 
| 423 | 2 |  |  |  |  | 4 | $element = $kid; | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_grandchild( $element, 'img' ) ) { | 
| 426 | 3 |  |  |  |  | 6 | $xpath = './@src'; | 
| 427 | 3 |  |  |  |  | 7 | $element = $kid; | 
| 428 |  |  |  |  |  |  | } | 
| 429 |  |  |  |  |  |  | elsif ( $kid = $self->_non_h_unique_grandchild( $element, 'object' ) ) { | 
| 430 | 1 |  |  |  |  | 2 | $xpath = './@data'; | 
| 431 | 1 |  |  |  |  | 3 | $element = $kid; | 
| 432 |  |  |  |  |  |  | } | 
| 433 |  |  |  |  |  |  |  | 
| 434 | 126 | 100 |  |  |  | 287 | if ( $xpath ) { | 
| 435 | 16 |  |  |  |  | 60 | $url = $element->findvalue( $xpath ); | 
| 436 |  |  |  |  |  |  | } | 
| 437 |  |  |  |  |  |  |  | 
| 438 | 126 | 100 |  |  |  | 6608 | if ( defined $url ) { | 
| 439 | 16 |  |  |  |  | 359 | $url = URI->new_abs( $url, $self->url_context )->as_string; | 
| 440 | 16 |  |  |  |  | 4826 | $item->add_property( 'u-photo', $url ); | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | } | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | sub _set_implied_url { | 
| 446 | 115 |  |  | 115 |  | 195 | my $self = shift; | 
| 447 | 115 |  |  |  |  | 218 | my ( $item, $element ) = @_; | 
| 448 |  |  |  |  |  |  |  | 
| 449 | 115 |  |  |  |  | 287 | my $xpath; | 
| 450 |  |  |  |  |  |  | my $url; | 
| 451 |  |  |  |  |  |  |  | 
| 452 | 115 |  |  |  |  | 0 | my $kid; | 
| 453 | 115 | 100 | 100 |  |  | 312 | if ( $element->tag =~ /^(a|area)$/ ) { | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  |  | 100 |  |  |  |  | 
| 454 | 23 |  |  |  |  | 216 | $xpath = './@href'; | 
| 455 |  |  |  |  |  |  | } | 
| 456 |  |  |  |  |  |  | elsif ( | 
| 457 |  |  |  |  |  |  | ( $kid = $self->_non_h_unique_child( $element, 'a' ) ) | 
| 458 |  |  |  |  |  |  | || ( $kid = $self->_non_h_unique_child( $element, 'area' ) ) | 
| 459 |  |  |  |  |  |  | || ( $kid = $self->_non_h_unique_grandchild( $element, 'a' ) ) | 
| 460 |  |  |  |  |  |  | || ( $kid = $self->_non_h_unique_grandchild( $element, 'area' ) ) | 
| 461 |  |  |  |  |  |  | ) { | 
| 462 | 10 |  |  |  |  | 23 | $xpath = './@href'; | 
| 463 | 10 |  |  |  |  | 14 | $element = $kid; | 
| 464 |  |  |  |  |  |  | } | 
| 465 |  |  |  |  |  |  |  | 
| 466 | 115 | 100 |  |  |  | 254 | if ( $xpath ) { | 
| 467 | 33 |  |  |  |  | 124 | $url = $element->findvalue( $xpath ); | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 | 115 | 100 |  |  |  | 12152 | if ( defined $url ) { | 
| 471 | 33 |  |  |  |  | 749 | $url = URI->new_abs( $url, $self->url_context )->as_string; | 
| 472 | 33 |  |  |  |  | 8339 | $item->add_property( 'u-url', $url ); | 
| 473 |  |  |  |  |  |  | } | 
| 474 |  |  |  |  |  |  |  | 
| 475 |  |  |  |  |  |  | } | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  | sub _non_h_unique_child { | 
| 478 | 642 |  |  | 642 |  | 3680 | my $self = shift; | 
| 479 | 642 |  |  |  |  | 1135 | my ( $element, $tag ) = @_; | 
| 480 |  |  |  |  |  |  |  | 
| 481 | 642 | 100 |  |  |  | 1304 | my @children = grep { (ref $_) && $_->tag eq $tag  } $element->content_list; | 
|  | 1864 |  |  |  |  | 10118 |  | 
| 482 |  |  |  |  |  |  |  | 
| 483 | 642 | 100 |  |  |  | 2469 | if ( @children == 1 ) { | 
| 484 | 38 |  |  |  |  | 108 | my $mf2_attrs = $self->_tease_out_mf2_attrs( $children[0] ); | 
| 485 | 38 | 100 |  |  |  | 105 | if (not ( $mf2_attrs->{h}->[0] ) ) { | 
| 486 | 32 |  |  |  |  | 164 | return $children[0]; | 
| 487 |  |  |  |  |  |  | } | 
| 488 |  |  |  |  |  |  | } | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 610 |  |  |  |  | 2206 | return; | 
| 491 |  |  |  |  |  |  | } | 
| 492 |  |  |  |  |  |  |  | 
| 493 |  |  |  |  |  |  | sub _non_h_unique_grandchild { | 
| 494 | 512 |  |  | 512 |  | 783 | my $self = shift; | 
| 495 | 512 |  |  |  |  | 841 | my ( $element, $tag ) = @_; | 
| 496 |  |  |  |  |  |  |  | 
| 497 | 512 |  |  |  |  | 963 | my @children = grep { ref $_ } $element->content_list; | 
|  | 1675 |  |  |  |  | 4543 |  | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 512 | 100 |  |  |  | 1216 | if ( @children == 1 ) { | 
| 500 | 117 |  |  |  |  | 245 | my $mf2_attrs = $self->_tease_out_mf2_attrs( $children[0] ); | 
| 501 | 117 | 100 |  |  |  | 307 | if (not ( $mf2_attrs->{h}->[0] ) ) { | 
| 502 | 89 |  |  |  |  | 187 | return $self->_non_h_unique_child( $children[0], $tag ); | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  |  | 
| 506 | 423 |  |  |  |  | 1326 | return; | 
| 507 |  |  |  |  |  |  | } | 
| 508 |  |  |  |  |  |  |  | 
| 509 |  |  |  |  |  |  | sub _clear { | 
| 510 | 74 |  |  | 74 |  | 118 | my $self = shift; | 
| 511 |  |  |  |  |  |  |  | 
| 512 | 74 |  |  |  |  | 2009 | $self->_clear_url_context; | 
| 513 |  |  |  |  |  |  | } | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | sub _seek_value_class_pattern { | 
| 516 | 348 |  |  | 348 |  | 551 | my $self = shift; | 
| 517 |  |  |  |  |  |  |  | 
| 518 | 348 |  |  |  |  | 608 | my ( $element, $vcp_fragments_ref ) = @_; | 
| 519 |  |  |  |  |  |  |  | 
| 520 | 348 |  | 100 |  |  | 1188 | $vcp_fragments_ref ||= []; | 
| 521 |  |  |  |  |  |  |  | 
| 522 | 348 |  |  |  |  | 867 | my $class = $element->attr( 'class' ); | 
| 523 | 348 | 100 | 100 |  |  | 4454 | if ( $class && $class =~ /\bvalue(-title)?\b/ ) { | 
| 524 | 57 | 100 | 100 |  |  | 188 | if ( $1 ) { | 
|  |  | 100 |  |  |  |  |  | 
| 525 | 6 |  |  |  |  | 18 | push @$vcp_fragments_ref, $element->attr( 'title' ); | 
| 526 |  |  |  |  |  |  | } | 
| 527 |  |  |  |  |  |  | elsif ( ( $element->tag =~ /^(del|ins|time)$/ ) && defined( $element->attr('datetime'))) { | 
| 528 | 20 |  |  |  |  | 397 | push @$vcp_fragments_ref, $element->attr('datetime'); | 
| 529 |  |  |  |  |  |  | } | 
| 530 |  |  |  |  |  |  | else { | 
| 531 | 31 |  |  |  |  | 376 | my $html; | 
| 532 | 31 |  |  |  |  | 74 | for my $content_piece ( $element->content_list ) { | 
| 533 | 31 | 50 |  |  |  | 182 | if ( ref $content_piece ) { | 
| 534 | 0 |  |  |  |  | 0 | $html .= $content_piece->as_HTML; | 
| 535 |  |  |  |  |  |  | } | 
| 536 |  |  |  |  |  |  | else { | 
| 537 | 31 |  |  |  |  | 85 | $html .= $content_piece; | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  | } | 
| 540 | 31 |  |  |  |  | 76 | push @$vcp_fragments_ref, $html; | 
| 541 |  |  |  |  |  |  | } | 
| 542 |  |  |  |  |  |  | } | 
| 543 |  |  |  |  |  |  | else { | 
| 544 | 291 |  |  |  |  | 736 | for my $child_element ( grep { ref $_ } $element->content_list ) { | 
|  | 417 |  |  |  |  | 2063 |  | 
| 545 | 84 |  |  |  |  | 194 | $self->_seek_value_class_pattern( | 
| 546 |  |  |  |  |  |  | $child_element, $vcp_fragments_ref | 
| 547 |  |  |  |  |  |  | ); | 
| 548 |  |  |  |  |  |  | } | 
| 549 |  |  |  |  |  |  | } | 
| 550 |  |  |  |  |  |  |  | 
| 551 | 348 |  |  |  |  | 999 | return $vcp_fragments_ref; | 
| 552 |  |  |  |  |  |  | } | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | sub _trim { | 
| 555 | 231 |  |  | 231 |  | 123870 | my ($string) = @_; | 
| 556 | 231 |  |  |  |  | 825 | $string =~ s/^\s+//; | 
| 557 | 231 |  |  |  |  | 914 | $string =~ s/\s+$//; | 
| 558 | 231 |  |  |  |  | 633 | return $string; | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  |  | 
| 561 |  |  |  |  |  |  | sub _format_datetime { | 
| 562 | 22 |  |  | 22 |  | 61 | my ($self, $dt_string, $current_item) = @_; | 
| 563 |  |  |  |  |  |  |  | 
| 564 | 22 |  |  |  |  | 30 | my $dt; | 
| 565 |  |  |  |  |  |  |  | 
| 566 |  |  |  |  |  |  | # Knock off leading/trailing whitespace. | 
| 567 | 22 |  |  |  |  | 44 | $dt_string = _trim($dt_string); | 
| 568 |  |  |  |  |  |  |  | 
| 569 | 22 |  |  |  |  | 48 | $dt_string =~ s/t/T/; | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | # Note presence of AM/PM, but toss it out of the string. | 
| 572 | 22 |  |  |  |  | 116 | $dt_string =~ s/((?:a|p)\.?m\.?)//i; | 
| 573 | 22 |  | 50 |  |  | 79 | my $am_or_pm = $1 || ''; | 
| 574 |  |  |  |  |  |  |  | 
| 575 |  |  |  |  |  |  | # Store the provided TZ offset. | 
| 576 | 22 |  |  |  |  | 85 | my ($provided_offset) = $dt_string =~ /([\-\+Z](?:\d\d:?\d\d)?)$/; | 
| 577 | 22 |  | 100 |  |  | 98 | $provided_offset ||= ''; | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | # Reformat HHMM offset as HH:MM. | 
| 580 | 22 |  |  |  |  | 101 | $dt_string =~ s/(-|\+)(\d\d)(\d\d)/$1$2:$3/; | 
| 581 |  |  |  |  |  |  |  | 
| 582 |  |  |  |  |  |  | # Store the provided seconds. | 
| 583 | 22 |  |  |  |  | 77 | my ($seconds) = $dt_string =~ /\d\d:\d\d:(\d\d)/; | 
| 584 | 22 | 100 |  |  |  | 54 | $seconds = '' unless defined $seconds; | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | # Insert :00 seconds on time when paired with a TZ offset. | 
| 587 | 22 |  |  |  |  | 74 | $dt_string =~ s/T(\d\d:\d\d)([\-\+Z])/T$1:00$2/; | 
| 588 | 22 |  |  |  |  | 54 | $dt_string =~ s/^(\d\d:\d\d)([\-\+Z])/$1:00$2/; | 
| 589 |  |  |  |  |  |  |  | 
| 590 |  |  |  |  |  |  | # Zero-pad hours when only a single-digit hour appears. | 
| 591 | 22 |  |  |  |  | 58 | $dt_string =~ s/T(\d)$/T0$1/; | 
| 592 | 22 |  |  |  |  | 62 | $dt_string =~ s/T(\d):/T0$1:/; | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | # Insert :00 minutes on time when only an hour is listed. | 
| 595 | 22 |  |  |  |  | 97 | $dt_string =~ s/T(\d\d)$/T$1:00/; | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | # Treat a space separator between date & time as a 'T'. | 
| 598 | 22 |  |  |  |  | 39 | $dt_string =~ s/ /T/; | 
| 599 |  |  |  |  |  |  |  | 
| 600 |  |  |  |  |  |  | # If this is a time with no date, try to apply a previously-seen | 
| 601 |  |  |  |  |  |  | # date to it. | 
| 602 | 22 |  |  |  |  | 39 | my $date_is_defined = 1; | 
| 603 | 22 | 100 |  |  |  | 53 | if ( $dt_string =~ /^\d\d:/ ) { | 
| 604 | 1 | 50 |  |  |  | 25 | if ( my $previous_dt = $current_item->last_seen_date ) { | 
| 605 | 1 |  |  |  |  | 24 | $dt_string = $previous_dt->ymd . "T$dt_string"; | 
| 606 |  |  |  |  |  |  | } | 
| 607 |  |  |  |  |  |  | else { | 
| 608 | 0 |  |  |  |  | 0 | $date_is_defined = 0; | 
| 609 | 0 |  |  |  |  | 0 | carp "Encountered a value-class datetime with only a time, " | 
| 610 |  |  |  |  |  |  | . "no date, and no date defined earlier. Results may " | 
| 611 |  |  |  |  |  |  | . "not be what you expect. (Data: $dt_string)"; | 
| 612 |  |  |  |  |  |  | } | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  |  | 
| 615 | 22 |  |  |  |  | 54 | eval { | 
| 616 | 22 |  |  |  |  | 102 | $dt = DateTime::Format::ISO8601->new | 
| 617 |  |  |  |  |  |  | ->parse_datetime( $dt_string ); | 
| 618 |  |  |  |  |  |  | }; | 
| 619 |  |  |  |  |  |  |  | 
| 620 | 22 | 50 |  |  |  | 13885 | return if $@; | 
| 621 |  |  |  |  |  |  |  | 
| 622 | 22 | 50 |  |  |  | 58 | if ($date_is_defined) { | 
| 623 | 22 |  |  |  |  | 502 | $current_item->last_seen_date( $dt ); | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  |  | 
| 626 | 22 | 100 |  |  |  | 817 | if ($am_or_pm =~ /^[pP]/) { | 
| 627 |  |  |  |  |  |  | # There was a 'pm' specified, so add 12 hours. | 
| 628 | 7 |  |  |  |  | 26 | $dt->add( hours => 12 ); | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  |  | 
| 631 | 22 |  |  |  |  | 6982 | my $format; | 
| 632 | 22 | 100 | 66 |  |  | 161 | if ( ($dt_string =~ /-/) && ($dt_string =~ /[ T]/) ) { | 
|  |  | 50 |  |  |  |  |  | 
| 633 | 21 |  |  |  |  | 41 | my $offset; | 
| 634 | 21 | 100 |  |  |  | 54 | if ($provided_offset eq 'Z') { | 
|  |  | 100 |  |  |  |  |  | 
| 635 | 2 |  |  |  |  | 4 | $offset = 'Z'; | 
| 636 |  |  |  |  |  |  | } | 
| 637 |  |  |  |  |  |  | elsif ($provided_offset) { | 
| 638 | 7 |  |  |  |  | 14 | $offset = '%z'; | 
| 639 |  |  |  |  |  |  | } | 
| 640 |  |  |  |  |  |  | else { | 
| 641 | 12 |  |  |  |  | 20 | $offset = ''; | 
| 642 |  |  |  |  |  |  | } | 
| 643 | 21 | 100 |  |  |  | 61 | $seconds = ":$seconds" if length $seconds; | 
| 644 | 21 |  |  |  |  | 60 | $format = "%Y-%m-%d %H:%M$seconds$offset"; | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  | elsif ( $dt_string =~ /-/ ) { | 
| 647 | 1 |  |  |  |  | 3 | $format = '%Y-%m-%d'; | 
| 648 |  |  |  |  |  |  | } | 
| 649 |  |  |  |  |  |  |  | 
| 650 | 22 |  |  |  |  | 76 | return $dt->strftime( $format ); | 
| 651 |  |  |  |  |  |  | } | 
| 652 |  |  |  |  |  |  |  | 
| 653 |  |  |  |  |  |  | sub _parse_property_value { | 
| 654 | 170 |  |  | 170 |  | 320 | my ( $self, $element ) = @_; | 
| 655 |  |  |  |  |  |  |  | 
| 656 | 170 |  |  |  |  | 254 | my $value; | 
| 657 |  |  |  |  |  |  |  | 
| 658 | 170 |  |  |  |  | 356 | my $vcp_fragments_ref = | 
| 659 |  |  |  |  |  |  | $self->_seek_value_class_pattern( $element ); | 
| 660 | 170 | 100 |  |  |  | 620 | if ( @$vcp_fragments_ref ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 661 | 11 |  |  |  |  | 28 | $value = join q{}, @$vcp_fragments_ref; | 
| 662 |  |  |  |  |  |  | } | 
| 663 |  |  |  |  |  |  | elsif ( my $alt = $element->findvalue( './@title|@value|@alt' ) ) { | 
| 664 | 9 |  |  |  |  | 8438 | $value = $alt; | 
| 665 |  |  |  |  |  |  | } | 
| 666 |  |  |  |  |  |  | elsif ( my $text = _trim( decode_entities($element->as_text) ) ) { | 
| 667 | 148 |  |  |  |  | 284 | $value = $text; | 
| 668 |  |  |  |  |  |  | } | 
| 669 |  |  |  |  |  |  |  | 
| 670 | 170 |  |  |  |  | 491 | return $value; | 
| 671 |  |  |  |  |  |  | } | 
| 672 |  |  |  |  |  |  |  | 
| 673 |  |  |  |  |  |  | sub _add_element_rels_to_mf2_document { | 
| 674 | 898 |  |  | 898 |  | 1608 | my ( $self, $element, $document ) = @_; | 
| 675 |  |  |  |  |  |  |  | 
| 676 | 898 | 100 |  |  |  | 2239 | return unless $element->tag =~ /^(a|link)$/; | 
| 677 |  |  |  |  |  |  |  | 
| 678 | 99 |  |  |  |  | 942 | my $rel = $element->attr( 'rel' ); | 
| 679 | 99 | 100 |  |  |  | 1097 | return unless defined $rel; | 
| 680 |  |  |  |  |  |  |  | 
| 681 | 42 |  |  |  |  | 89 | my $href = $element->attr( 'href' ); | 
| 682 | 42 |  |  |  |  | 1155 | my $url = URI->new_abs( $href, $self->url_context)->as_string; | 
| 683 |  |  |  |  |  |  |  | 
| 684 | 42 |  |  |  |  | 5518 | my @rels = split /\s+/, $rel; | 
| 685 | 42 |  |  |  |  | 100 | for my $rel ( @rels ) { | 
| 686 | 48 |  |  |  |  | 151 | $document->add_rel( $rel, $url ); | 
| 687 |  |  |  |  |  |  | } | 
| 688 |  |  |  |  |  |  |  | 
| 689 | 42 |  |  |  |  | 81 | my $rel_url_value = {}; | 
| 690 | 42 |  |  |  |  | 72 | foreach (qw( hreflang media title type ) ) { | 
| 691 | 168 | 50 |  |  |  | 325 | next if defined $rel_url_value->{ $_ }; | 
| 692 | 168 |  |  |  |  | 376 | my $value = $element->attr( $_ ); | 
| 693 | 168 | 100 |  |  |  | 1835 | if ( defined $value ) { | 
| 694 | 6 |  |  |  |  | 18 | $rel_url_value->{ $_ } = $value; | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  | } | 
| 697 | 42 |  |  |  |  | 118 | my $text = ($element->as_text); | 
| 698 | 42 | 50 |  |  |  | 1032 | if ( defined $text ) { | 
| 699 | 42 |  |  |  |  | 90 | $rel_url_value->{ text } = $text; | 
| 700 |  |  |  |  |  |  | } | 
| 701 |  |  |  |  |  |  |  | 
| 702 | 42 |  |  |  |  | 80 | $rel_url_value->{ rels } = \@rels; | 
| 703 |  |  |  |  |  |  |  | 
| 704 | 42 |  |  |  |  | 126 | $document->add_rel_url( $url, $rel_url_value ); | 
| 705 |  |  |  |  |  |  |  | 
| 706 |  |  |  |  |  |  | } | 
| 707 |  |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  | 1; | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | =pod | 
| 711 |  |  |  |  |  |  |  | 
| 712 |  |  |  |  |  |  | =head1 NAME | 
| 713 |  |  |  |  |  |  |  | 
| 714 |  |  |  |  |  |  | Web::Microformats2::Parser - Read Microformats2 information from HTML | 
| 715 |  |  |  |  |  |  |  | 
| 716 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | An object of this class represents a Microformats2 parser. | 
| 719 |  |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  | See L<Web::Microformats2> for further context and purpose. | 
| 721 |  |  |  |  |  |  |  | 
| 722 |  |  |  |  |  |  | =head1 METHODS | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | =head2 Class Methods | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | =head3 new | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | $parser = Web::Microformats2::Parser->new; | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | Returns a parser object. | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  | =head2 Object Methods | 
| 733 |  |  |  |  |  |  |  | 
| 734 |  |  |  |  |  |  | =head3 parse | 
| 735 |  |  |  |  |  |  |  | 
| 736 |  |  |  |  |  |  | $doc = $parser->parse( $html, %args ); | 
| 737 |  |  |  |  |  |  |  | 
| 738 |  |  |  |  |  |  | Pass in a string containing HTML which itself contains Microformats2 | 
| 739 |  |  |  |  |  |  | metadata, and receive a L<Web::Microformats2::Document> object in return. | 
| 740 |  |  |  |  |  |  |  | 
| 741 |  |  |  |  |  |  | The optional args hash recognizes the following keys: | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | =over | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | =item url_context | 
| 746 |  |  |  |  |  |  |  | 
| 747 |  |  |  |  |  |  | A L<URI> object or URI-shaped string that will be used as a context for | 
| 748 |  |  |  |  |  |  | transforming all relative URL properties encountered within MF2 tags | 
| 749 |  |  |  |  |  |  | into absolute URLs. | 
| 750 |  |  |  |  |  |  |  | 
| 751 |  |  |  |  |  |  | The default value is C<http://example.com>, so you'll probably want to | 
| 752 |  |  |  |  |  |  | set this to something more interesting, such as the absolute URL of the | 
| 753 |  |  |  |  |  |  | HTML that we are parsing. | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | =back | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  | =head1 AUTHOR | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | Jason McIntosh (jmac@jmac.org) | 
| 760 |  |  |  |  |  |  |  | 
| 761 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 762 |  |  |  |  |  |  |  | 
| 763 |  |  |  |  |  |  | This software is Copyright (c) 2018 by Jason McIntosh. | 
| 764 |  |  |  |  |  |  |  | 
| 765 |  |  |  |  |  |  | This is free software, licensed under: | 
| 766 |  |  |  |  |  |  |  | 
| 767 |  |  |  |  |  |  | The MIT (X11) License |