| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Aard; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 15880 | use 5.014000; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 4 | 1 |  |  | 1 |  | 3 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 31 |  | 
| 5 | 1 |  |  | 1 |  | 4 | use warnings; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 6 |  |  |  |  |  |  | our $VERSION = '0.001'; | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 1 |  |  | 1 |  | 1735 | use IO::Uncompress::Inflate qw/inflate/; | 
|  | 1 |  |  |  |  | 42819 |  | 
|  | 1 |  |  |  |  | 58 |  | 
| 9 | 1 |  |  | 1 |  | 656 | use IO::Uncompress::Bunzip2 qw/bunzip2/; | 
|  | 1 |  |  |  |  | 5217 |  | 
|  | 1 |  |  |  |  | 56 |  | 
| 10 | 1 |  |  | 1 |  | 6 | use List::Util qw/sum/; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 101 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 1 |  |  | 1 |  | 457 | use JSON::MaybeXS qw/decode_json/; | 
|  | 1 |  |  |  |  | 6546 |  | 
|  | 1 |  |  |  |  | 55 |  | 
| 13 | 1 |  |  | 1 |  | 517 | use UUID::Tiny qw/uuid_to_string/; | 
|  | 1 |  |  |  |  | 18204 |  | 
|  | 1 |  |  |  |  | 106 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 | 1 |  |  |  |  | 964 | use constant HEADER_SPEC => [ | 
| 16 |  |  |  |  |  |  | [signature             => 'Z4' , 4 ], | 
| 17 |  |  |  |  |  |  | [sha1sum               => 'Z40', 40], | 
| 18 |  |  |  |  |  |  | [version               => 'S>' , 2 ], | 
| 19 |  |  |  |  |  |  | [uuid                  => 'Z16', 16], | 
| 20 |  |  |  |  |  |  | [volume                => 'S>' , 2 ], | 
| 21 |  |  |  |  |  |  | [total_volumes         => 'S>' , 2 ], | 
| 22 |  |  |  |  |  |  | [meta_length           => 'L>' , 4 ], | 
| 23 |  |  |  |  |  |  | [index_count           => 'L>' , 4 ], | 
| 24 |  |  |  |  |  |  | [article_offset        => 'L>' , 4 ], | 
| 25 |  |  |  |  |  |  | [index1_item_format    => 'Z4' , 4 ], | 
| 26 |  |  |  |  |  |  | [key_length_format     => 'Z2' , 2 ], | 
| 27 |  |  |  |  |  |  | [article_length_format => 'Z2' , 2 ], | 
| 28 | 1 |  |  | 1 |  | 7 | ]; | 
|  | 1 |  |  |  |  | 1 |  | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | my $header_length = sum map { $_->[2] } @{HEADER_SPEC()}; | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | sub decompress { | 
| 33 | 2 |  |  | 2 | 0 | 3 | my ($input) = @_; | 
| 34 | 2 |  |  |  |  | 3 | my $output = $input; | 
| 35 | 2 |  |  |  |  | 10 | inflate \$input => \$output; | 
| 36 | 2 | 50 |  |  |  | 2595 | bunzip2 \$input => \$output if $input =~ /^BZ/; | 
| 37 | 2 |  |  |  |  | 31 | $output | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | sub read_at { | 
| 41 | 3 |  |  | 3 | 0 | 3 | my ($self, $offset, $length) = @_; | 
| 42 | 3 |  |  |  |  | 6 | my $fh = $self->{fh}; | 
| 43 | 3 |  |  |  |  | 2 | my $part; | 
| 44 | 3 |  |  |  |  | 10 | seek $fh, $offset, 0; | 
| 45 | 3 |  |  |  |  | 26 | read $fh, $part, $length; | 
| 46 | 3 |  |  |  |  | 8 | $part | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub index1 { | 
| 50 | 2 |  |  | 2 | 0 | 2 | my ($self, $index) = @_; | 
| 51 | 2 | 100 |  |  |  | 22 | unless (exists $self->{index1}{$index}) { | 
| 52 | 1 |  |  |  |  | 6 | my $part = $self->read_at($self->{index1_offset} + $index * $self->{index_length}, $self->{index_length}); | 
| 53 | 1 |  |  |  |  | 6 | $self->{index1}{$index} = [unpack $self->{index_format}, $part] | 
| 54 |  |  |  |  |  |  | } | 
| 55 | 2 |  |  |  |  | 8 | $self->{index1}{$index} | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 0 |  |  | 0 | 1 | 0 | sub fh            { shift->{fh} } | 
| 59 | 0 |  |  | 0 | 0 | 0 | sub sha1sum       { shift->{sha1sum} } | 
| 60 | 1 |  |  | 1 | 1 | 9 | sub uuid          { shift->{uuid} } | 
| 61 | 1 |  |  | 1 | 1 | 7 | sub uuid_string   { uuid_to_string shift->uuid } | 
| 62 | 1 |  |  | 1 | 1 | 506 | sub volume        { shift->{volume} } | 
| 63 | 1 |  |  | 1 | 1 | 5 | sub total_volumes { shift->{total_volumes} } | 
| 64 | 1 |  |  | 1 | 1 | 5 | sub count         { shift->{index_count} } | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 2 |  |  | 2 | 1 | 9 | sub meta                          { shift->{meta} } | 
| 67 | 0 |  |  | 0 | 1 | 0 | sub article_count                 { shift->meta->{article_count} } | 
| 68 | 0 |  |  | 0 | 1 | 0 | sub article_count_is_volume_total { shift->meta->{article_count_is_volume_total} } | 
| 69 | 1 |  |  | 1 | 1 | 3 | sub index_language                { shift->meta->{index_language} } | 
| 70 | 0 |  |  | 0 | 1 | 0 | sub article_language              { shift->meta->{article_language} } | 
| 71 | 1 |  |  | 1 | 1 | 5 | sub title                         { shift->meta->{title} } | 
| 72 | 0 |  |  | 0 | 1 | 0 | sub version                       { shift->meta->{version} } | 
| 73 | 0 |  |  | 0 | 1 | 0 | sub description                   { shift->meta->{description} } | 
| 74 | 0 |  |  | 0 | 1 | 0 | sub copyright                     { shift->meta->{copyright} } | 
| 75 | 0 |  |  | 0 | 1 | 0 | sub license                       { shift->meta->{license} } | 
| 76 | 0 |  |  | 0 | 1 | 0 | sub source                        { shift->meta->{source} } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub key { | 
| 79 | 1 |  |  | 1 | 1 | 2 | my ($self, $index) = @_; | 
| 80 | 1 | 50 |  |  |  | 6 | unless (exists $self->{key}{$index}) { | 
| 81 | 1 |  |  |  |  | 5 | my $part = $self->read_at($self->{index2_offset} + $self->index1($index)->[0], 2); | 
| 82 | 1 |  |  |  |  | 3 | my $len = unpack 'S>', $part; | 
| 83 | 1 |  |  |  |  | 4 | read $self->{fh}, $self->{key}{$index}, $len; | 
| 84 |  |  |  |  |  |  | } | 
| 85 | 1 |  |  |  |  | 4 | $self->{key}{$index} | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | sub article { | 
| 89 | 1 |  |  | 1 | 1 | 2 | my ($self, $index) = @_; | 
| 90 | 1 | 50 |  |  |  | 6 | unless (exists $self->{article}{$index}) { | 
| 91 | 1 |  |  |  |  | 4 | my $part = $self->read_at($self->{article_offset} + $self->index1($index)->[1], 4); | 
| 92 | 1 |  |  |  |  | 4 | my $len = unpack 'L>', $part; | 
| 93 | 1 |  |  |  |  | 3 | read $self->{fh}, $part, $len; | 
| 94 | 1 |  |  |  |  | 4 | $self->{article}{$index} = decompress $part | 
| 95 |  |  |  |  |  |  | } | 
| 96 | 1 |  |  |  |  | 9 | $self->{article}{$index} | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | sub new { | 
| 100 | 1 |  |  | 1 | 1 | 10 | my ($self, $file) = @_; | 
| 101 | 1 | 50 |  |  |  | 47 | open my $fh, '<', $file or die $!; | 
| 102 | 1 |  |  |  |  | 4 | binmode $fh; | 
| 103 | 1 |  |  |  |  | 1 | my %header; | 
| 104 | 1 |  |  |  |  | 1 | for (@{HEADER_SPEC()}) { | 
|  | 1 |  |  |  |  | 3 |  | 
| 105 | 12 |  |  |  |  | 42 | read $fh, my $part, $_->[2]; | 
| 106 | 12 |  |  |  |  | 34 | $header{$_->[0]} = unpack $_->[1], $part; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 1 | 50 |  |  |  | 5 | die 'Not a recognized aarddict dictionary file' if $header{signature} ne 'aard'; | 
| 110 | 1 | 50 |  |  |  | 4 | die 'Unknown file format version' if $header{version} != 1; | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 1 |  |  |  |  | 3 | read $fh, my $meta, $header{meta_length}; | 
| 113 | 1 |  |  |  |  | 3 | $meta = decode_json decompress $meta; | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 1 | 50 |  |  |  | 17 | my %obj = ( | 
|  |  | 50 |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | %header, | 
| 117 |  |  |  |  |  |  | fh => $fh, | 
| 118 |  |  |  |  |  |  | meta => $meta, | 
| 119 |  |  |  |  |  |  | index_format => ($header{index1_item_format} eq '>LL' ? 'L>L>' : 'L>Q>'), | 
| 120 |  |  |  |  |  |  | index_length => ($header{index1_item_format} eq '>LL' ? 8 : 12), | 
| 121 |  |  |  |  |  |  | ); | 
| 122 | 1 |  |  |  |  | 3 | $obj{index1_offset} = $header_length + $obj{meta_length}; | 
| 123 | 1 |  |  |  |  | 4 | $obj{index2_offset} = $obj{index1_offset} + $obj{index_count} * $obj{index_length}; | 
| 124 | 1 |  |  |  |  | 5 | bless \%obj, $self | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | 1; | 
| 128 |  |  |  |  |  |  | __END__ |