File Coverage

blib/lib/Image/ExifTool/Torrent.pm
Criterion Covered Total %
statement 107 132 81.0
branch 54 96 56.2
condition 8 23 34.7
subroutine 7 7 100.0
pod 0 4 0.0
total 176 262 67.1


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: Torrent.pm
3             #
4             # Description: Read information from BitTorrent file
5             #
6             # Revisions: 2013/08/27 - P. Harvey Created
7             #
8             # References: 1) https://wiki.theory.org/BitTorrentSpecification
9             #------------------------------------------------------------------------------
10              
11             package Image::ExifTool::Torrent;
12              
13 1     1   4157 use strict;
  1         1  
  1         31  
14 1     1   3 use vars qw($VERSION);
  1         1  
  1         35  
15 1     1   3 use Image::ExifTool qw(:DataAccess :Utils);
  1         2  
  1         1659  
16              
17             $VERSION = '1.08';
18              
19             sub ReadBencode($$$);
20             sub ExtractTags($$$;$$@);
21              
22             # tags extracted from BitTorrent files
23             %Image::ExifTool::Torrent::Main = (
24             GROUPS => { 2 => 'Document' },
25             NOTES => q{
26             Below are tags commonly found in BitTorrent files. As well as these tags,
27             any other existing tags will be extracted. For convenience, list items are
28             expanded into individual tags with an index in the tag name, but only the
29             tags with index "1" are listed in the tables below. See
30             L for the BitTorrent
31             specification.
32             },
33             'announce' => { },
34             'announce-list' => { Name => 'AnnounceList1' },
35             'comment' => { },
36             'created by' => { Name => 'Creator' }, # software used to create the torrent
37             'creation date' => {
38             Name => 'CreateDate',
39             Groups => { 2 => 'Time' },
40             ValueConv => 'ConvertUnixTime($val,1)',
41             PrintConv => '$self->ConvertDateTime($val)',
42             },
43             'encoding' => { },
44             'info' => {
45             SubDirectory => { TagTable => 'Image::ExifTool::Torrent::Info' },
46             Notes => 'extracted as a structure with the Struct option',
47             },
48             'url-list' => { Name => 'URLList1' },
49             );
50              
51             %Image::ExifTool::Torrent::Info = (
52             GROUPS => { 2 => 'Document' },
53             'file-duration' => { Name => 'File1Duration' },
54             'file-media' => { Name => 'File1Media' },
55             'files' => { SubDirectory => { TagTable => 'Image::ExifTool::Torrent::Files' } },
56             'length' => { },
57             'md5sum' => { Name => 'MD5Sum' },
58             'name' => { },
59             'name.utf-8' => { Name => 'NameUTF-8' },
60             'piece length' => { Name => 'PieceLength' },
61             'pieces' => {
62             Name => 'Pieces',
63             Notes => 'concatenation of 20-byte SHA-1 digests for each piece',
64             },
65             'private' => { },
66             'profiles' => { SubDirectory => { TagTable => 'Image::ExifTool::Torrent::Profiles' } },
67             );
68              
69             %Image::ExifTool::Torrent::Profiles = (
70             GROUPS => { 2 => 'Document' },
71             'width' => { Name => 'Profile1Width' },
72             'height' => { Name => 'Profile1Height' },
73             'acodec' => { Name => 'Profile1AudioCodec' },
74             'vcodec' => { Name => 'Profile1VideoCodec' },
75             );
76              
77             %Image::ExifTool::Torrent::Files = (
78             GROUPS => { 2 => 'Document' },
79             'length' => { Name => 'File1Length', PrintConv => 'ConvertFileSize($val)' },
80             'md5sum' => { Name => 'File1MD5Sum' },
81             'path' => { Name => 'File1Path', JoinPath => 1 },
82             'path.utf-8' => { Name => 'File1PathUTF-8', JoinPath => 1 },
83             );
84              
85             #------------------------------------------------------------------------------
86             # Read 64kB more data into buffer
87             # Inputs: 0) RAF ref, 1) buffer ref
88             # Returns: number of bytes read
89             # Notes: Sets BencodeEOF element of RAF on end of file
90             sub ReadMore($$)
91             {
92 1     1 0 3 my ($raf, $dataPt) = @_;
93 1         1 my $buf2;
94 1         3 my $n = $raf->Read($buf2, 65536);
95 1 50       4 $$raf{BencodeEOF} = 1 if $n != 65536;
96 1 50       5 $$dataPt = substr($$dataPt, pos($$dataPt)) . $buf2 if $n;
97 1         2 return $n;
98             }
99              
100             #------------------------------------------------------------------------------
101             # Read bencoded value
102             # Inputs: 0) ExifTool ref, 1) input file, 2) buffer (pos must be set to current position)
103             # Returns: HASH ref, ARRAY ref, SCALAR ref, SCALAR, or undef on error or end of data
104             # Notes: Sets BencodeError element of RAF on any error
105             sub ReadBencode($$$)
106             {
107 70     70 0 74 my ($et, $raf, $dataPt) = @_;
108              
109             # read more if necessary (keep a minimum of 64 bytes in the buffer)
110 70         70 my $pos = pos($$dataPt);
111 70 50       75 return undef unless defined $pos;
112 70         69 my $remaining = length($$dataPt) - $pos;
113 70 100 100     106 ReadMore($raf, $dataPt) if $remaining < 64 and not $$raf{BencodeEOF};
114              
115             # read next token
116 70 50       101 $$dataPt =~ /(.)/sg or return undef;
117              
118 70         60 my $val;
119 70         73 my $tok = $1;
120 70 100 33     202 if ($tok eq 'i') { # integer
    100          
    100          
    100          
    50          
121 6 50       15 $$dataPt =~ /\G(-?\d+)e/g or return $val;
122 6         6 $val = $1;
123             } elsif ($tok eq 'd') { # dictionary
124 6         7 $val = { };
125 6         6 for (;;) {
126 26         33 my $k = ReadBencode($et, $raf, $dataPt);
127 26 100       31 last unless defined $k;
128             # the key must be a byte string
129 20 50       24 if (ref $k) {
130 0 0       0 ref $k ne 'SCALAR' and $$raf{BencodeError} = 'Bad dictionary key', last;
131 0         0 $k = $$k;
132             }
133 20         23 my $v = ReadBencode($et, $raf, $dataPt);
134 20 50       23 last unless defined $v;
135 20         32 $$val{$k} = $v;
136             }
137             } elsif ($tok eq 'l') { # list
138 8         9 $val = [ ];
139 8         9 for (;;) {
140 23         35 my $v = ReadBencode($et, $raf, $dataPt);
141 23 100       29 last unless defined $v;
142 15         18 push @$val, $v;
143             }
144             } elsif ($tok eq 'e') { # end of dictionary or list
145             # return undef (no error)
146             } elsif ($tok =~ /^\d$/ and $$dataPt =~ /\G(\d*):/g) { # byte string
147 36         51 my $len = $tok . $1;
148 36         44 my $more = $len - (length($$dataPt) - pos($$dataPt));
149 36         54 my $value;
150 36 50       37 if ($more <= 0) {
    0          
151 36         46 $value = substr($$dataPt,pos($$dataPt),$len);
152 36         56 pos($$dataPt) = pos($$dataPt) + $len;
153             } elsif ($more > 10000000) {
154             # just skip over really long values
155 0 0       0 $val = \ "(Binary data $len bytes)" if $raf->Seek($more, 1);
156             } else {
157             # need to read more from file
158 0         0 my $buff;
159 0         0 my $n = $raf->Read($buff, $more);
160 0 0       0 if ($n == $more) {
161 0         0 $value = substr($$dataPt,pos($$dataPt)) . $buff;
162 0         0 $$dataPt = '';
163 0         0 pos($$dataPt) = 0;
164             }
165             }
166 36 50       43 if (defined $value) {
    0          
167             # return as binary data unless it is a reasonable-length ASCII string
168 36 50       70 if (length($value) > 256) {
    100          
169 0         0 $val = \$value;
170             } elsif ($value =~ /[^\t\x20-\x7e]/) {
171 1 50       6 if (Image::ExifTool::IsUTF8(\$value) >= 0) {
172 0         0 $val = $et->Decode($value, 'UTF8');
173             } else {
174 1         2 $val = \$value;
175             }
176             } else {
177 35         39 $val = $value;
178             }
179             } elsif (not defined $val) {
180 0         0 $$raf{BencodeError} = 'Truncated byte string';
181             }
182             } else {
183 0         0 $$raf{BencodeError} = 'Bad format';
184             }
185 70         91 return $val;
186             }
187              
188             #------------------------------------------------------------------------------
189             # Extract tags from dictionary hash
190             # Inputs: 0) ExifTool ref, 1) dictionary hash reference, 2) tag table ref,
191             # 3) parent hash ID, 4) parent hash name, 5-N) list indices
192             # Returns: number of tags extracted
193             sub ExtractTags($$$;$$@)
194             {
195 6     6 0 13 my ($et, $hashPtr, $tagTablePtr, $baseID, $baseName, @index) = @_;
196 6         6 my $count = 0;
197 6         7 my $tag;
198 6         35 foreach $tag (sort keys %$hashPtr) {
199 20         29 my $val = $$hashPtr{$tag};
200 20         20 my ($i, $j, @more);
201 20         28 for (; defined $val; $val = shift @more) {
202 27 50       46 my $id = defined $baseID ? "$baseID/$tag" : $tag;
203 27 50       38 unless ($$tagTablePtr{$id}) {
204 0         0 my $name = ucfirst $tag;
205             # capitalize all words in tag name and remove illegal characters
206 0         0 $name =~ s/[^-_a-zA-Z0-9]+(.?)/\U$1/g;
207 0 0 0     0 $name = "Tag$name" if length($name) < 2 or $name !~ /^[A-Z]/;
208 0 0       0 $name = $baseName . $name if defined $baseName; # add base name if necessary
209 0         0 AddTagToTable($tagTablePtr, $id, { Name => $name });
210 0         0 $et->VPrint(0, " [adding $id '${name}']\n");
211             }
212 27 50       42 my $tagInfo = $et->GetTagInfo($tagTablePtr, $id) or next;
213 27 100       41 if (ref $val eq 'ARRAY') {
214 8 100       12 if ($$tagInfo{JoinPath}) {
215 4 50       7 $val = join '/', map { ref $_ ? '(Binary data)' : $_ } @$val;
  5         13  
216             } else {
217 4 50       7 next unless @$val; # ignore empty arrays
218 4         7 push @more, @$val;
219 4 100       9 next if ref $more[0] eq 'ARRAY'; # continue expanding nested lists
220 3         3 $val = shift @more;
221 3 50       6 $i or $i = 0, push(@index, $i);
222             }
223             }
224 26 100       37 $index[-1] = ++$i if defined $i;
225 26 100       35 if (@index) {
226 17         28 $id .= join '_', @index; # add instance number(s) to tag ID
227 17 50       28 unless ($$tagTablePtr{$id}) {
228 17         25 my $name = $$tagInfo{Name};
229             # embed indices at position of '1' in tag name
230 17         22 my $n = ($name =~ tr/1/#/);
231 17         27 for ($j=0; $j<$n; ++$j) {
232 13   50     20 my $idx = $index[$j] || '';
233 13         36 $name =~ s/#/$idx/;
234             }
235             # put remaining indices at end of tag name
236 17         23 for (; $j<@index; ++$j) {
237 4 50       12 $name .= '_' if $name =~ /\d$/;
238 4         7 $name .= $index[$j];
239             }
240 17         68 AddTagToTable($tagTablePtr, $id, { %$tagInfo, Name => $name });
241             }
242 17 50       31 $tagInfo = $et->GetTagInfo($tagTablePtr, $id) or next;
243             }
244 26 100       36 if (ref $val eq 'HASH') {
245 5 0 33     11 if ($et->Options('Struct') and $tagInfo and $$tagInfo{Name} eq 'Info') {
      33        
246 0         0 $et->FoundTag($tagInfo, $val);
247 0         0 ++$count;
248 0         0 next;
249             }
250             # extract tags from this dictionary
251 5         7 my ($table, $rootID, $rootName);
252 5 50       9 if ($$tagInfo{SubDirectory}) {
253 5         10 $table = GetTagTable($$tagInfo{SubDirectory}{TagTable});
254             } else {
255 0         0 $table = $tagTablePtr;
256             # use hash ID and Name as base for contained tags to avoid conflicts
257 0         0 $rootID = $id;
258 0         0 $rootName = $$tagInfo{Name};
259             }
260 5         14 $count += ExtractTags($et, $val, $table, $rootID, $rootName, @index);
261             } else {
262             # handle this simple tag value
263 21         48 $et->HandleTag($tagTablePtr, $id, $val);
264 21         37 ++$count;
265             }
266             }
267 20 100       31 pop @index if defined $i;
268             }
269 6         16 return $count;
270             }
271              
272             #------------------------------------------------------------------------------
273             # Process BitTorrent file
274             # Inputs: 0) ExifTool object reference, 1) dirInfo reference (with RAF set)
275             # Returns: 1 on success, 0 if this wasn't a valid BitTorrent file
276             sub ProcessTorrent($$)
277             {
278 1     1 0 2 my ($et, $dirInfo) = @_;
279 1         1 my $success = 0;
280 1         2 my $raf = $$dirInfo{RAF};
281 1         2 my $buff = '';
282 1         3 pos($buff) = 0;
283 1         4 my $dict = ReadBencode($et, $raf, \$buff);
284 1         9 my $err = $$raf{BencodeError};
285 1 50       4 $et->Warn("Bencode error: $err") if $err;
286 1 0 0     5 if (ref $dict eq 'HASH' and ($$dict{announce} or $$dict{'created by'} or $$dict{info})) {
      33        
287 1         5 $et->SetFileType();
288 1         3 my $tagTablePtr = GetTagTable('Image::ExifTool::Torrent::Main');
289 1 50       4 ExtractTags($et, $dict, $tagTablePtr) and $success = 1;
290             }
291 1         9 return $success;
292             }
293              
294             1; # end
295              
296             __END__