File Coverage

blib/lib/FlashVideo/Utils.pm
Criterion Covered Total %
statement 47 197 23.8
branch 6 76 7.8
condition 5 36 13.8
subroutine 12 30 40.0
pod 0 19 0.0
total 70 358 19.5


line stmt bran cond sub pod time code
1             # Part of get-flash-videos. See get_flash_videos for copyright.
2             package FlashVideo::Utils;
3              
4 5     5   52193 use strict;
  5         10  
  5         176  
5 5     5   25 no warnings 'uninitialized';
  5         7  
  5         165  
6 5     5   26 use base 'Exporter';
  5         8  
  5         496  
7 5     5   5096 use HTML::Entities;
  5         34412  
  5         527  
8 5     5   4503 use HTML::TokeParser;
  5         30042  
  5         160  
9 5     5   5364 use Encode;
  5         61704  
  5         513  
10              
11 5     5   47 use constant FP_KEY => "Genuine Adobe Flash Player 001";
  5         10  
  5         556  
12 5     5   28 use constant EXTENSIONS => qr/\.(?:flv|mp4|mov|wmv|avi|m4v)/i;
  5         9  
  5         247  
13 5     5   24 use constant MAX_REDIRECTS => 5;
  5         9  
  5         7252  
14              
15             our @EXPORT = qw(debug info error
16             extract_title extract_info title_to_filename get_video_filename url_exists
17             swfhash swfhash_data EXTENSIONS get_user_config_dir get_win_codepage
18             is_program_on_path get_terminal_width json_unescape
19             convert_sami_subtitles_to_srt from_xml);
20              
21             sub debug(@) {
22             # Remove some sensitive data
23 0     0 0 0 my $string = "@_\n";
24 0         0 $string =~ s/\Q$ENV{HOME}\E/~/g;
25 0 0       0 print STDERR $string if $App::get_flash_videos::opt{debug};
26             }
27              
28             sub info(@) {
29 0 0   0 0 0 print STDERR "@_\n" unless $App::get_flash_videos::opt{quiet};
30             }
31              
32             sub error(@) {
33 0     0 0 0 print STDERR "@_\n";
34             }
35              
36             sub extract_title {
37 0     0 0 0 my($browser) = @_;
38 0         0 return extract_info($browser)->{title};
39             }
40              
41             sub extract_info {
42 0     0 0 0 my($browser) = @_;
43 0         0 my($title, $meta_title);
44              
45 0         0 my $p = HTML::TokeParser->new(\$browser->content);
46 0         0 while(my $token = $p->get_tag("title", "meta")) {
47 0         0 my($tag, $attr) = @$token;
48              
49 0 0 0     0 if($tag eq 'meta' && $attr->{name} =~ /title/i) {
    0          
50 0         0 $meta_title = $attr->{content};
51             } elsif($tag eq 'title') {
52 0         0 $title = $p->get_trimmed_text;
53             }
54             }
55              
56             return {
57 0         0 title => $title,
58             meta_title => $meta_title,
59             };
60             }
61              
62             sub swfhash {
63 0     0 0 0 my($browser, $url) = @_;
64              
65 0         0 $browser->get($url);
66              
67 0         0 return swfhash_data($browser->content, $url);
68             }
69              
70             sub swfhash_data {
71 0     0 0 0 my ($data, $url) = @_;
72              
73             die "Must have Compress::Zlib and Digest::SHA for this RTMP download\n"
74 0 0       0 unless eval {
75 0         0 require Compress::Zlib;
76 0         0 require Digest::SHA;
77             };
78              
79 0         0 $data = "F" . substr($data, 1, 7)
80             . Compress::Zlib::uncompress(substr $data, 8);
81              
82             return
83 0         0 swfsize => length $data,
84             swfhash => Digest::SHA::hmac_sha256_hex($data, FP_KEY),
85             swfUrl => $url;
86             }
87              
88             sub url_exists {
89 0     0 0 0 my($browser, $url) = @_;
90              
91 0         0 $browser->head($url);
92 0         0 my $response = $browser->response;
93 0         0 debug "Exists on $url: " . $response->code;
94 0 0       0 return $url if $response->code == 200;
95              
96 0         0 my $redirects = 0;
97 0   0     0 while ( ($response->code =~ /^30\d/) and ($response->header('Location'))
      0        
98             and ($redirects < MAX_REDIRECTS) ) {
99 0         0 $url = URI->new_abs($response->header('Location'), $url);
100 0         0 $response = $browser->head($url);
101 0         0 debug "Redirected to $url (" . $response->code . ")";
102 0 0       0 if ($response->code == 200) {
103 0         0 return $url;
104             }
105 0         0 $redirects++;
106             }
107              
108 0         0 return '';
109             }
110              
111             sub title_to_filename {
112 17     17 0 9922 my($title, $type) = @_;
113              
114             # Extract the extension if we're passed a URL.
115 17 100 100     32 if($title =~ s/(@{[EXTENSIONS]})$//) {
  17 100       250  
116 6         17 $type = substr $1, 1;
117             } elsif ($type && $type !~ /^\w+$/) {
118 6         31 $type = substr((URI->new($type)->path =~ /(@{[EXTENSIONS]})$/)[0], 1);
  6         11394  
119             }
120              
121 17   100     106 $type ||= "flv";
122              
123             # We want \w below to match non-ASCII characters.
124 17         43 utf8::upgrade($title);
125              
126             # Some sites have double-encoded entities, so handle this
127 17 50       52 if ($title =~ /&(?:\w+|#(?:\d+|x[A-F0-9]+));/) {
128             # Double-encoded - decode again
129 0         0 $title = decode_entities($title);
130             }
131              
132 17         127 $title =~ s/\s+/_/g;
133 17         61 $title =~ s/[^\w\-,()&]/_/g;
134 17         100 $title =~ s/^_+|_+$//g; # underscores at the start and end look bad
135            
136             # If we have nothing then return a filestamped filename.
137 17 50       37 return get_video_filename($type) unless $title;
138              
139 17         73 return "$title.$type";
140             }
141              
142             sub get_video_filename {
143 0     0 0   my($type) = @_;
144 0   0       $type ||= "flv";
145 0           return "video" . get_timestamp_in_iso8601_format() . "." . $type;
146             }
147              
148             sub get_timestamp_in_iso8601_format {
149 5     5   4641 use Time::localtime;
  5         46819  
  5         1212  
150 0     0 0   my $time = localtime;
151 0           return sprintf("%04d%02d%02d%02d%02d%02d",
152             $time->year + 1900, $time->mon + 1,
153             $time->mday, $time->hour, $time->min, $time->sec);
154             }
155              
156             sub get_vlc_exe_from_registry {
157 0 0   0 0   if ($^O !~ /MSWin/i) {
158 0           die "Doesn't make sense to call this except on Windows";
159             }
160              
161 0           my $HAS_WIN32_REGISTRY = eval { require Win32::Registry };
  0            
162              
163 0 0         die "Win32::Registry required for JustWorks(tm) playing on Windows"
164             unless $HAS_WIN32_REGISTRY;
165              
166 0           require Win32::Registry;
167              
168             # This module, along with Win32::TieRegistry, is horrible and primarily
169             # works by exporting various symbols into the calling package.
170             # Win32::TieRegistry does not offer an easy way of getting the $Registry
171             # object if you require the module rather than use-ing it.
172 0           Win32::Registry->import();
173            
174             # Ignoring the fact that polluting your caller's namespace is bad
175             # practice, it's also evil because I now have to disable strict so that
176             # Perl won't complain that $HKEY_LOCAL_MACHINE which is exported into my
177             # package at runtime doesn't exist.
178 0           my $local_machine;
179              
180             {
181 5     5   51 no strict 'vars';
  5         13  
  5         15757  
  0            
182 0           $local_machine = $::HKEY_LOCAL_MACHINE;
183             }
184              
185 0           my $key = 'SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall';
186              
187 0           $local_machine->Open($key, my $reg);
188              
189             # Believe it or not, this is Perl, not C
190 0           my @applications;
191 0           $reg->GetKeys(\@applications);
192              
193 0           my $vlc_binary;
194              
195 0           foreach my $application (@applications) {
196 0 0         next unless $application =~ /VLC Media Player/i;
197              
198 0           $reg->Open($application, my $details);
199              
200 0           my %app_properties;
201 0           $details->GetValues(\%app_properties);
202              
203             # These values are arrayrefs with value name, type and data. data is
204             # what we care about.
205 0 0         if ($app_properties{DisplayIcon}->[-1] =~ /\.exe$/i) {
206             # Assume this is the VLC executable
207 0           $vlc_binary = $app_properties{DisplayIcon}->[-1];
208 0           last;
209             }
210             }
211            
212 0           return $vlc_binary;
213             }
214              
215             sub get_win_codepage {
216 0     0 0   require Win32::API;
217              
218             # Hack for older versions of Win32::API::Type (which Win32::API->import
219             # uses to parse prototypes) to avoid "unknown output parameter type"
220             # warning. Older versions of this module have an INIT block for reading
221             # type information from the DATA filehandle. This doesn't get called when
222             # we require the module rather than use-ing it. More recent versions of
223             # the module don't bother with an INIT block, and instead just have the
224             # initialisation code at package level.
225 0 0         if (! %Win32::API::Type::Known) {
226 0           %Win32::API::Type::Known = (int => 'i');
227             }
228              
229 0           Win32::API->Import("kernel32", "int GetACP()");
230 0           return "cp" . GetACP();
231             }
232              
233             # Returns a path to the user's configuration data and/or plugins directory.
234             sub get_user_config_dir {
235             # On Windows, use "Application Data" and "get_flash_videos". On other
236             # platforms, use the user's home directory (specified by the HOME
237             # environment variable) and ".get_flash_videos". Note that on Windows,
238             # the directory has no . prefix as historically, Windows and Windows
239             # applications tend to make dealing with such directories awkward.
240              
241             # Note that older versions of Windows don't set an APPDATA environment
242             # variable.
243              
244 0 0 0 0 0   return $^O =~ /MSWin/i ? ($ENV{APPDATA} || 'c:/windows/application data')
245             . "/get_flash_videos"
246             : "$ENV{HOME}/.get_flash_videos";
247             }
248              
249             # Is the specified program on the system PATH?
250             sub is_program_on_path {
251 0     0 0   my($program) = @_;
252 0           my $win = $^O =~ /MSWin/i;
253              
254 0 0         for my $dir(split($win ? ";" : ":", $ENV{PATH})) {
255 0 0         return 1 if -f "$dir/$program" . ($win ? ".exe" : "");
    0          
256             }
257 0           return 0;
258             }
259              
260             sub get_terminal_width {
261 0 0 0 0 0   if(eval { require Term::ReadKey } && (my($width) = Term::ReadKey::GetTerminalSize())) {
  0 0          
262 0 0         return $width - 1 if $^O =~ /MSWin|cygwin/i; # seems to be off by 1 on Windows
263 0           return $width;
264             } elsif($ENV{COLUMNS}) {
265 0           return $ENV{COLUMNS};
266             } else {
267 0           return 80;
268             }
269             }
270              
271             # Maybe should use a proper JSON parser, but want to avoid the dependency for now..
272             # (There is now one in FlashVideo::JSON, so consider that -- this is just here
273             # until we have a chance to fix things using it).
274             sub json_unescape {
275 0     0 0   my($s) = @_;
276              
277 0           $s =~ s/\\u([0-9a-f]{1,4})/chr hex $1/ge;
  0            
278 0           $s =~ s{(\\[\\/rnt"])}{"\"$1\""}gee;
  0            
279 0           return $s;
280             }
281              
282             sub convert_sami_subtitles_to_srt {
283 0     0 0   my ($sami_subtitles, $filename, $decrypt_callback) = @_;
284              
285 0 0         die "SAMI subtitles must be provided" unless $sami_subtitles;
286 0 0         die "Output SRT filename must be provided" unless $filename;
287              
288             # Use regexes to "parse" SAMI since HTML::TokeParser is too awkward. It
289             # makes it hard to preserve linebreaks and other formatting in subtitles.
290             # It's also quite slow.
291 0           $sami_subtitles =~ s/[\r\n]//g; # flatten
292              
293 0           my @lines = split /
294 0           shift @lines; # Skip headers
295              
296 0           my @subtitles;
297 0           my $count = 0;
298              
299 0           my $last_proper_sub_end_time = '';
300              
301 0           for (@lines) {
302 0           my ($begin, $sub);
303             # Remove span elements
304 0           s|<\/?span.*?>| |g;
305            
306             # replace "&" with "&"
307 0           s|&|&|g;
308              
309             # replace " " with " "
310 0           s{&(?:nbsp|#160);}{ }g;
311              
312             # Start="2284698">

I won't have to drink it
in this crappy warehouse.

313             #($begin, $sub) = ($1, $2) if m{.*Start="(.+?)".+(.+?)<\/p>.*?<\/Sync>}i;
314              
315 0 0         ($begin, $sub) = ($1, $2) if m{[^>]*Start="(.+?)"[^>]*>(.*?)<\/Sync>}i;
316              
317 0 0         if (/^\s*Encrypted="true"\s*/i) {
318 0 0 0       if ($decrypt_callback and ref($decrypt_callback) eq 'CODE') {
319 0           $sub = $decrypt_callback->($sub);
320             }
321             }
322              
323 0           $sub =~ s@&@&@g;
324 0           $sub =~ s@(?:]*>| | )@ @g;
325              
326             # Do some tidying up.
327             # Note only

tags are removed-- tags are left in place since VLC

328             # and others support this for formatting.
329 0           $sub =~ s{]*?>}{}g; # remove

and similar

330              
331             # VLC is very sensitive to tag case.
332 0           $sub =~ s{<(/)?([BI])>}{"<$1" . lc($2) . ">"}eg;
  0            
333            
334 0           decode_entities($sub); # in void context, this works in place
335              
336 0 0 0       if ($sub and ($begin or $begin == 0)) {
      0        
337             # Convert milliseconds into HH:MM:ss,mmm format
338 0           my $seconds = int( $begin / 1000.0 );
339 0           my $ms = $begin - ( $seconds * 1000.0 );
340 0           $begin = sprintf("%02d:%02d:%02d,%03d", (gmtime($seconds))[2,1,0], $ms );
341              
342             # Don't strip simple HTML like - VLC and other players
343             # support basic subtitle styling, see:
344             # http://git.videolan.org/?p=vlc.git;a=blob;f=modules/codec/subtitles/subsdec.c
345              
346             # Leading/trailing spaces
347 0           $sub =~ s/^\s*(.*?)\s*$/$1/;
348              
349             # strip multispaces
350 0           $sub =~ s/\s{2,}/ /g;
351              
352             # Replace
(and similar) with \n. VLC handles \n in SubRip files
353             # fine. For
it is case and slash sensitive.
354 0           $sub =~ s|
|\n|ig;
355              
356 0           $sub =~ s/^\s*|\s*$//mg;
357              
358 0 0 0       if ($count and !$subtitles[$count - 1]->{end}) {
359 0           $subtitles[$count - 1]->{end} = $begin;
360             }
361              
362             # SAMI subtitles are a bit crap. Only a start time is specified for
363             # each subtitle. No end time is specified, so the subtitle is displayed
364             # until the next subtitle is ready to be shown. This means that if
365             # subtitles aren't meant to be shown for part of the video, a dummy
366             # subtitle (usually just a space) has to be inserted.
367 0 0 0       if (!$sub or $sub =~ /^\s+$/) {
368 0 0         if ($count) {
369 0           $last_proper_sub_end_time = $subtitles[$count - 1]->{end};
370             }
371              
372             # Gap in subtitles.
373 0           next; # this is not a meaningful subtitle
374             }
375              
376 0           push @subtitles, {
377             start => $begin,
378             text => $sub,
379             };
380              
381 0           $count++;
382             }
383             }
384              
385             # Ensure the end time for the last subtitle is correct.
386 0           $subtitles[$count - 1]->{end} = $last_proper_sub_end_time;
387              
388             # Write subtitles
389 0 0         open my $subtitle_fh, '>', $filename
390             or die "Can't open subtitles file $filename: $!";
391              
392             # Set filehandle to UTF-8 to avoid "wide character in print" warnings.
393             # Note this does *not* double-encode data as UTF-8 (verify with hexdump).
394             # As per the documentation for binmode: ":utf8 just marks the data as
395             # UTF-8 without further checking". This will cause mojibake if
396             # ISO-8859-1/Latin1 and UTF-8 and are mixed in the same file though.
397 0           binmode $subtitle_fh, ':utf8';
398              
399 0           $count = 1;
400              
401 0           foreach my $subtitle (@subtitles) {
402 0           print $subtitle_fh "$count\n$subtitle->{start} --> $subtitle->{end}\n" .
403             "$subtitle->{text}\n\n";
404 0           $count++;
405             }
406              
407 0           close $subtitle_fh;
408              
409 0           return 1;
410             }
411              
412             sub from_xml {
413 0     0 0   my($xml, @args) = @_;
414              
415 0 0         if(!eval { require XML::Simple && XML::Simple::XMLin("") }) {
  0 0          
416 0           die "Must have XML::Simple to download " . caller =~ /::([^:])+$/ . " videos\n";
417             }
418              
419 0           $xml = eval {
420 0 0         XML::Simple::XMLin(ref $xml eq 'SCALAR' ? $xml
    0          
421             : ref $xml ? $xml->content
422             : $xml, @args);
423             };
424              
425 0 0         if($@) {
426 0           die "$@ (from ", join("::", caller), ")\n";
427             }
428              
429 0           return $xml;
430             }
431              
432             1;