| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | =head1 NAME | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | IMDB::BaseClass - a base class for IMDB::Film and IMDB::Persons. | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | use base qw(IMDB::BaseClass); | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | IMDB::BaseClass implements a base functionality for IMDB::Film | 
| 12 |  |  |  |  |  |  | and IMDB::Persons. | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =cut | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | package IMDB::BaseClass; | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 1 |  |  | 1 |  | 7 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 39 |  | 
| 19 | 1 |  |  | 1 |  | 7 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 30 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 1 |  |  | 1 |  | 980 | use HTML::TokeParser; | 
|  | 1 |  |  |  |  | 14129 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 22 | 1 |  |  | 1 |  | 998 | use LWP::Simple qw($ua get); | 
|  | 1 |  |  |  |  | 82981 |  | 
|  | 1 |  |  |  |  | 11 |  | 
| 23 | 1 |  |  | 1 |  | 1317 | use Cache::FileCache; | 
|  | 1 |  |  |  |  | 59459 |  | 
|  | 1 |  |  |  |  | 62 |  | 
| 24 | 1 |  |  | 1 |  | 1088 | use Text::Unidecode qw(unidecode); | 
|  | 1 |  |  |  |  | 3568 |  | 
|  | 1 |  |  |  |  | 81 |  | 
| 25 | 1 |  |  | 1 |  | 10 | use HTML::Entities; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 62 |  | 
| 26 | 1 |  |  | 1 |  | 7 | use Carp; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 53 |  | 
| 27 |  |  |  |  |  |  |  | 
| 28 | 1 |  |  | 1 |  | 1036 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 7480 |  | 
|  | 1 |  |  |  |  | 100 |  | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 1 |  |  | 1 |  | 14 | use constant MAIN_TAG	=> 'h4'; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 69 |  | 
| 31 | 1 |  |  | 1 |  | 6 | use constant ID_LENGTH	=> 6; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 44 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 1 |  |  | 1 |  | 6 | use vars qw($VERSION %FIELDS $AUTOLOAD %STATUS_DESCR); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 108 |  | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | BEGIN { | 
| 36 | 1 |  |  | 1 |  | 3 | $VERSION = '0.53'; | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 1 |  |  |  |  | 34 | %STATUS_DESCR = ( | 
| 39 |  |  |  |  |  |  | 0 => 'Empty', | 
| 40 |  |  |  |  |  |  | 1 => 'Filed', | 
| 41 |  |  |  |  |  |  | 2 => 'Fresh', | 
| 42 |  |  |  |  |  |  | 3 => 'Cached', | 
| 43 |  |  |  |  |  |  | ); | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 1 |  |  | 1 |  | 7 | use constant FORCED 		=> 1; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 49 |  | 
| 47 | 1 |  |  | 1 |  | 5 | use constant CLASS_NAME 	=> 'IMDB::BaseClass'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 1 |  |  | 1 |  | 5 | use constant FROM_FILE		=> 1; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 48 |  | 
| 50 | 1 |  |  | 1 |  | 7 | use constant FROM_INTERNET	=> 2; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 52 |  | 
| 51 | 1 |  |  | 1 |  | 7 | use constant FROM_CACHE		=> 3; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 76 |  | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 1 |  |  |  |  | 11 | use fields qw(	content | 
| 54 |  |  |  |  |  |  | parser | 
| 55 |  |  |  |  |  |  | matched | 
| 56 |  |  |  |  |  |  | proxy | 
| 57 |  |  |  |  |  |  | error | 
| 58 |  |  |  |  |  |  | cache | 
| 59 |  |  |  |  |  |  | host | 
| 60 |  |  |  |  |  |  | query | 
| 61 |  |  |  |  |  |  | search | 
| 62 |  |  |  |  |  |  | cacheObj | 
| 63 |  |  |  |  |  |  | cache_exp | 
| 64 |  |  |  |  |  |  | cache_root | 
| 65 |  |  |  |  |  |  | clear_cache | 
| 66 |  |  |  |  |  |  | debug | 
| 67 |  |  |  |  |  |  | status | 
| 68 |  |  |  |  |  |  | file | 
| 69 |  |  |  |  |  |  | timeout | 
| 70 |  |  |  |  |  |  | user_agent | 
| 71 |  |  |  |  |  |  | decode_html | 
| 72 |  |  |  |  |  |  | exact | 
| 73 |  |  |  |  |  |  | _code | 
| 74 | 1 |  |  | 1 |  | 1090 | ); | 
|  | 1 |  |  |  |  | 1729 |  | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =head2 Constructor and initialization | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | =over 4 | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | =item new() | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | Object's constructor. You should pass as parameter movie title or IMDB code. | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | my $imdb = new IMDB::Film(crit => ); | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | or | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | my $imdb = new IMDB::Film(crit => ); | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | Also, you can specify following optional parameters: | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | - proxy - define proxy server name and port; | 
| 93 |  |  |  |  |  |  | - debug	- switch on debug mode (on by default); | 
| 94 |  |  |  |  |  |  | - cache - cache or not of content retrieved pages. | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | =cut | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub new { | 
| 99 | 0 |  |  | 0 | 1 |  | my $caller = shift; | 
| 100 | 0 |  | 0 |  |  |  | my $class = ref($caller) || $caller; | 
| 101 | 0 |  |  |  |  |  | my $self = fields::new($class); | 
| 102 | 0 |  |  |  |  |  | $self->_init(@_); | 
| 103 | 0 |  |  |  |  |  | return $self; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | =item _init() | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | Initialize object. It gets list of service class properties and assign value to them from input | 
| 109 |  |  |  |  |  |  | parameters or from the hash with default values. | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | =cut | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | sub _init { | 
| 114 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 115 | 0 |  |  |  |  |  | my %args = @_; | 
| 116 |  |  |  |  |  |  |  | 
| 117 | 1 |  |  | 1 |  | 257 | no warnings 'deprecated'; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 2964 |  | 
| 118 |  |  |  |  |  |  |  | 
| 119 | 0 |  |  |  |  |  | for my $prop ( keys %{ $self->fields } ) { | 
|  | 0 |  |  |  |  |  |  | 
| 120 | 0 | 0 |  |  |  |  | unless($prop =~ /^_/) { | 
| 121 | 0 | 0 |  |  |  |  | $self->{$prop} = defined $args{$prop} ? $args{$prop} : $self->_get_default_value($prop); | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  |  | 
| 125 | 0 | 0 |  |  |  |  | if($self->_cache()) { | 
| 126 | 0 |  |  |  |  |  | $self->_cacheObj( new Cache::FileCache( { 	default_expires_in 	=> $self->_cache_exp, | 
| 127 |  |  |  |  |  |  | cache_root 			=> $self->_cache_root } ) ); | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 0 | 0 |  |  |  |  | $self->_cacheObj->clear() if $self->_clear_cache; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 0 | 0 |  |  |  |  | if($self->_proxy) { $ua->proxy(['http', 'ftp'], $self->_proxy()) } | 
|  | 0 |  |  |  |  |  |  | 
| 133 | 0 |  |  |  |  |  | else { $ua->env_proxy() } | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 0 |  |  |  |  |  | $ua->timeout($self->timeout); | 
| 136 | 0 |  |  |  |  |  | $ua->agent($self->user_agent); | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 0 |  |  |  |  |  | $self->_content( $args{crit} ); | 
| 139 | 0 |  |  |  |  |  | $self->_parser(); | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | =item user_agent() | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | Define an user agent for HTTP request. It's 'Mozilla/5.0' by default. | 
| 145 |  |  |  |  |  |  | For more information refer to LWP::UserAgent. | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | =cut | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub user_agent { | 
| 150 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 151 | 0 | 0 |  |  |  |  | if(@_) { $self->{user_agent} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 152 | 0 |  |  |  |  |  | return $self->{user_agent} | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | =item timeout() | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | Define a timeout for HTTP request in seconds. By default it's 10 sec. | 
| 158 |  |  |  |  |  |  | For more information refer to LWP::UserAgent. | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | =cut | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub timeout { | 
| 163 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 164 | 0 | 0 |  |  |  |  | if(@_) { $self->{timeout} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 165 | 0 |  |  |  |  |  | return $self->{timeout} | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | =item code() | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | Get IMDB film code. | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | my $code = $film->code(); | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | =cut | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | sub code { | 
| 177 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 178 | 0 | 0 |  |  |  |  | if(@_) { $self->{_code} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 179 | 0 |  |  |  |  |  | return $self->{_code}; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | =item id() | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | Get IMDB film id (actually, it's the same as code). | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | my $id = $film->id(); | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | =cut | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | sub id { | 
| 191 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 192 | 0 | 0 |  |  |  |  | if(@_) { $self->{_code} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 193 | 0 |  |  |  |  |  | return $self->{_code}; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | =item _proxy() | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | Store address of proxy server. You can pass a proxy name as parameter into | 
| 199 |  |  |  |  |  |  | object constructor: | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, proxy => 'my.proxy.host:8080'); | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | or you can define environment variable 'http_host'. For exanple, for Linux | 
| 204 |  |  |  |  |  |  | you shoud do a following: | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | export http_proxy=my.proxy.host:8080 | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | =cut | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | sub _proxy { | 
| 211 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 212 | 0 | 0 |  |  |  |  | if(@_) { $self->{proxy} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 213 | 0 |  |  |  |  |  | return $self->{proxy}; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub _decode_html { | 
| 217 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 218 | 0 | 0 |  |  |  |  | if(@_) { $self->{decode_html} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 219 | 0 |  |  |  |  |  | return $self->{decode_html}; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | =item _cache() | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | Store cache flag. Indicate use file cache to store content page or not: | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, cache => 1); | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | =cut | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | sub _cache { | 
| 231 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 232 | 0 | 0 |  |  |  |  | if(@_) { $self->{cache} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 233 | 0 |  |  |  |  |  | return $self->{cache} | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | =item _clear_cache | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | Store flag clear_cache which is indicated clear exisisting cache or not (false by default): | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, cache => 1, clear_cache => 1); | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | =cut | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | sub _clear_cache { | 
| 245 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 246 | 0 | 0 |  |  |  |  | if($_) { $self->{clear_cache} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 247 | 0 |  |  |  |  |  | return $self->{clear_cache}; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | =item _cacheObj() | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | In case of using cache, we create new Cache::File object and store it in object's | 
| 253 |  |  |  |  |  |  | propery. For more details about Cache::File please see Cache::Cache documentation. | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | =cut | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | sub _cacheObj { | 
| 258 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 259 | 0 | 0 |  |  |  |  | if(@_) { $self->{cacheObj} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  |  | return $self->{cacheObj} | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | =item _cache_exp() | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | In case of using cache, we can define value time of cache expire. | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, cache_exp => '1 h'); | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | For more details please see Cache::Cache documentation. | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | =cut | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | sub _cache_exp { | 
| 274 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 275 | 0 | 0 |  |  |  |  | if(@_) { $self->{cache_exp} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 276 | 0 |  |  |  |  |  | return $self->{cache_exp} | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | sub _cache_root { | 
| 280 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 281 | 0 | 0 |  |  |  |  | $self->{cache_root} = shift if @_; | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 0 |  |  |  |  |  | $self->_show_message("CACHE ROOT is " . $self->{cache_root}, 'DEBUG'); | 
| 284 |  |  |  |  |  |  |  | 
| 285 | 0 |  |  |  |  |  | return $self->{cache_root}; | 
| 286 |  |  |  |  |  |  | } | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | sub _show_message { | 
| 289 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 290 | 0 |  | 0 |  |  |  | my $msg = shift || 'Unknown error'; | 
| 291 | 0 |  | 0 |  |  |  | my $type = shift || 'ERROR'; | 
| 292 |  |  |  |  |  |  |  | 
| 293 | 0 | 0 | 0 |  |  |  | return if $type =~ /^debug$/i && !$self->_debug(); | 
| 294 |  |  |  |  |  |  |  | 
| 295 | 0 | 0 |  |  |  |  | if($type =~ /(debug|info|warn)/i) { carp "[$type] $msg" } | 
|  | 0 |  |  |  |  |  |  | 
| 296 | 0 |  |  |  |  |  | else { croak "[$type] $msg" } | 
| 297 |  |  |  |  |  |  | } | 
| 298 |  |  |  |  |  |  |  | 
| 299 |  |  |  |  |  |  | =item _host() | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | Store IMDB host name. You can pass this value in object constructor: | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, host => 'us.imdb.com'); | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | By default, it uses 'www.imdb.com'. | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | =cut | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | sub _host { | 
| 310 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 311 | 0 | 0 |  |  |  |  | if(@_) { $self->{host} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 312 | 0 |  |  |  |  |  | return $self->{host} | 
| 313 |  |  |  |  |  |  | } | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | =item _query() | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | Store query string to retrieve film by its ID. You can define | 
| 318 |  |  |  |  |  |  | different value for that: | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, query => 'some significant string'); | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | Default value is 'title/tt'. | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | B | 
| 325 |  |  |  |  |  |  | real case.> | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | =cut | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | sub _query { | 
| 330 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 331 | 0 | 0 |  |  |  |  | if(@_) { $self->{query} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 332 | 0 |  |  |  |  |  | return $self->{query} | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | =item _search() | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | Store search string to find film by its title. You can define | 
| 338 |  |  |  |  |  |  | different value for that: | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, seach => 'some significant string'); | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | Default value is 'Find?select=Titles&for='. | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | =cut | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | sub _search { | 
| 347 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 348 | 0 | 0 |  |  |  |  | if(@_) { $self->{search} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 349 | 0 |  |  |  |  |  | return $self->{search} | 
| 350 |  |  |  |  |  |  | } | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | sub _exact { | 
| 353 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 354 | 0 | 0 |  |  |  |  | if(@_) { $self->{exact} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 355 | 0 |  |  |  |  |  | return $self->{exact}; | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | =item _debug() | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | Indicate to use DEBUG mode to display some debug messages: | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | my $imdb = new IMDB::Film(code => 111111, debug => 1); | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | By default debug mode is switched off. | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | =cut | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | sub _debug { | 
| 369 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 370 | 0 | 0 |  |  |  |  | if(@_) { $self->{debug} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 371 | 0 |  |  |  |  |  | return $self->{debug} | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | =item _content() | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | Connect to the IMDB, retrieve page according to crit: by film | 
| 377 |  |  |  |  |  |  | IMDB ID or its title and store content of that page in the object | 
| 378 |  |  |  |  |  |  | property. | 
| 379 |  |  |  |  |  |  | In case using cache, first check if page was already stored in the | 
| 380 |  |  |  |  |  |  | cache then retrieve page from the cache else store content of the | 
| 381 |  |  |  |  |  |  | page in the cache. | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | =cut | 
| 384 |  |  |  |  |  |  |  | 
| 385 |  |  |  |  |  |  | sub _content { | 
| 386 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 387 | 0 | 0 |  |  |  |  | if(@_) { | 
| 388 | 0 |  | 0 |  |  |  | my $crit = shift || ''; | 
| 389 | 0 |  |  |  |  |  | my $page; | 
| 390 |  |  |  |  |  |  |  | 
| 391 | 0 | 0 |  |  |  |  | $self->code($crit) if $crit =~ /^\d{6,8}$/; | 
| 392 | 0 | 0 |  |  |  |  | $page = $self->_cacheObj()->get($crit) if $self->_cache(); | 
| 393 |  |  |  |  |  |  |  | 
| 394 | 0 |  |  |  |  |  | $self->_show_message("CRIT: $crit", 'DEBUG'); | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 0 | 0 |  |  |  |  | unless($page) { | 
| 397 | 0 | 0 |  |  |  |  | if( -f $crit ) { | 
| 398 | 0 |  |  |  |  |  | $self->_show_message("Parse IMDB HTML file ...", 'DEBUG'); | 
| 399 |  |  |  |  |  |  |  | 
| 400 | 0 |  |  |  |  |  | local $/; | 
| 401 | 0 |  |  |  |  |  | undef $/; | 
| 402 | 0 | 0 |  |  |  |  | open FILE, $crit or die "Cannot open off-line IMDB file: $!!"; | 
| 403 | 0 |  |  |  |  |  | $page = ; | 
| 404 | 0 |  |  |  |  |  | close FILE; | 
| 405 | 0 |  |  |  |  |  | $self->status(FROM_FILE); | 
| 406 |  |  |  |  |  |  | } else { | 
| 407 | 0 |  |  |  |  |  | $self->_show_message("Retrieving page from internet ...", 'DEBUG'); | 
| 408 |  |  |  |  |  |  |  | 
| 409 | 0 | 0 | 0 |  |  |  | my $url = 'http://'.$self->_host().'/'. | 
| 410 |  |  |  |  |  |  | ($crit =~ /^\d+$/ && length($crit) >= ID_LENGTH ? $self->_query() : $self->_search()) . $crit; | 
| 411 |  |  |  |  |  |  |  | 
| 412 | 0 |  |  |  |  |  | $page = $self->_get_page_from_internet($url); | 
| 413 | 0 |  |  |  |  |  | $self->status(FROM_INTERNET); | 
| 414 |  |  |  |  |  |  | } | 
| 415 |  |  |  |  |  |  |  | 
| 416 | 0 | 0 |  |  |  |  | $self->_cacheObj()->set($crit, $page, $self->_cache_exp()) if $self->_cache(); | 
| 417 |  |  |  |  |  |  | } else { | 
| 418 | 0 |  |  |  |  |  | $self->_show_message("Retrieving page from cache ...", 'DEBUG'); | 
| 419 | 0 |  |  |  |  |  | $self->status(FROM_CACHE); | 
| 420 |  |  |  |  |  |  | } | 
| 421 |  |  |  |  |  |  |  | 
| 422 | 0 |  |  |  |  |  | $self->{content} = \$page; | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 | 0 |  |  |  |  |  | $self->{content}; | 
| 426 |  |  |  |  |  |  | } | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | sub _get_page_from_internet { | 
| 429 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 430 | 0 |  |  |  |  |  | my $url = shift; | 
| 431 |  |  |  |  |  |  |  | 
| 432 | 0 |  |  |  |  |  | $self->_show_message("URL is [$url]...", 'DEBUG'); | 
| 433 |  |  |  |  |  |  |  | 
| 434 | 0 |  |  |  |  |  | my $page = get($url); | 
| 435 |  |  |  |  |  |  |  | 
| 436 | 0 | 0 |  |  |  |  | unless($page) { | 
| 437 | 0 |  |  |  |  |  | $self->error("Cannot retieve an url: [$url]!"); | 
| 438 | 0 |  |  |  |  |  | $self->_show_message("Cannot retrieve url [$url]", 'CRITICAL'); | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  |  | 
| 441 | 0 |  |  |  |  |  | return $page; | 
| 442 |  |  |  |  |  |  | } | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | =item _parser() | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | Setup HTML::TokeParser and store. To have possibility to inherite that class | 
| 447 |  |  |  |  |  |  | we should every time initialize parser using stored content of page. | 
| 448 |  |  |  |  |  |  | For more information please see HTML::TokeParser documentation. | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | =cut | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | sub _parser { | 
| 453 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 454 | 0 |  | 0 |  |  |  | my $forced = shift || 0; | 
| 455 | 0 |  | 0 |  |  |  | my $page = shift || undef; | 
| 456 |  |  |  |  |  |  |  | 
| 457 | 0 | 0 |  |  |  |  | if($forced) { | 
| 458 | 0 | 0 |  |  |  |  | my $content = defined $page ? $page : $self->_content(); | 
| 459 |  |  |  |  |  |  |  | 
| 460 | 0 | 0 |  |  |  |  | my $parser = new HTML::TokeParser($content) or croak "[CRITICAL] Cannot create HTML parser: $!!"; | 
| 461 | 0 |  |  |  |  |  | $self->{parser} = $parser; | 
| 462 |  |  |  |  |  |  | } | 
| 463 |  |  |  |  |  |  |  | 
| 464 | 0 |  |  |  |  |  | return $self->{parser}; | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | =item _get_simple_prop() | 
| 468 |  |  |  |  |  |  |  | 
| 469 |  |  |  |  |  |  | Retrieve a simple movie property which surrownded by . | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  | =cut | 
| 472 |  |  |  |  |  |  |  | 
| 473 |  |  |  |  |  |  | sub _get_simple_prop { | 
| 474 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 475 | 0 |  | 0 |  |  |  | my $target = shift || ''; | 
| 476 |  |  |  |  |  |  |  | 
| 477 | 0 |  |  |  |  |  | my $parser = $self->_parser(FORCED); | 
| 478 |  |  |  |  |  |  |  | 
| 479 | 0 |  |  |  |  |  | while(my $tag = $parser->get_tag(MAIN_TAG)) { | 
| 480 | 0 |  |  |  |  |  | my $text = $parser->get_text; | 
| 481 |  |  |  |  |  |  |  | 
| 482 | 0 |  |  |  |  |  | $self->_show_message("[$tag->[0]] $text --> $target", 'DEBUG'); | 
| 483 | 0 | 0 |  |  |  |  | last if $text =~ /$target/i; | 
| 484 |  |  |  |  |  |  | } | 
| 485 |  |  |  |  |  |  |  | 
| 486 | 0 |  |  |  |  |  | my $end_tag = '/a'; | 
| 487 | 0 | 0 |  |  |  |  | $end_tag = '/div' if $target eq 'trivia'; | 
| 488 | 0 | 0 |  |  |  |  | $end_tag = 'span' if $target eq 'Production Co'; | 
| 489 | 0 | 0 |  |  |  |  | $end_tag = '/div' if $target eq 'aspect ratio'; | 
| 490 |  |  |  |  |  |  |  | 
| 491 | 0 |  |  |  |  |  | my $res = $parser->get_trimmed_text($end_tag); | 
| 492 |  |  |  |  |  |  |  | 
| 493 | 0 |  |  |  |  |  | $res =~ s/\s+(see )?more$//i; | 
| 494 |  |  |  |  |  |  |  | 
| 495 | 0 |  |  |  |  |  | $self->_show_message("RES: $res", 'DEBUG'); | 
| 496 |  |  |  |  |  |  |  | 
| 497 | 0 |  |  |  |  |  | $res = $self->_decode_special_symbols($res); | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 0 |  |  |  |  |  | return $res; | 
| 500 |  |  |  |  |  |  | } | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | sub _search_results { | 
| 503 | 0 |  |  | 0 |  |  | my CLASS_NAME $self = shift; | 
| 504 | 0 |  | 0 |  |  |  | my $pattern = shift || croak 'Please, specify search pattern!'; | 
| 505 | 0 |  | 0 |  |  |  | my $end_tag = shift || '/li'; | 
| 506 | 0 |  |  |  |  |  | my $year	= shift; | 
| 507 |  |  |  |  |  |  |  | 
| 508 | 0 |  |  |  |  |  | my(@matched, @guess_res, %matched_hash); | 
| 509 | 0 |  |  |  |  |  | my $parser = $self->_parser(); | 
| 510 |  |  |  |  |  |  |  | 
| 511 | 0 |  |  |  |  |  | my $count = 0; | 
| 512 | 0 |  |  |  |  |  | while( my $tag = $parser->get_tag('a') ) { | 
| 513 | 0 |  |  |  |  |  | my $href = $tag->[1]{href}; | 
| 514 | 0 |  |  |  |  |  | my $title = $parser->get_trimmed_text('a', $end_tag); | 
| 515 |  |  |  |  |  |  |  | 
| 516 | 0 |  |  |  |  |  | $self->_show_message("TITLE: " . $title, 'DEBUG'); | 
| 517 | 0 | 0 | 0 |  |  |  | next if $title =~ /\[IMG\]/i or !$href or $href =~ /pro.imdb.com/; | 
|  |  |  | 0 |  |  |  |  | 
| 518 |  |  |  |  |  |  |  | 
| 519 |  |  |  |  |  |  | # Remove garbage from the first title | 
| 520 | 0 |  |  |  |  |  | $title =~ s/(\n|\r)//g; | 
| 521 | 0 |  |  |  |  |  | $title =~ s/\s*\.media_strip_thumbs.*//m; | 
| 522 |  |  |  |  |  |  |  | 
| 523 | 0 | 0 |  |  |  |  | if(my($id) = $href =~ /$pattern/) { | 
| 524 | 0 |  |  |  |  |  | $matched_hash{$id} = {title => $title, 'pos' => $count++}; | 
| 525 | 0 | 0 | 0 |  |  |  | @guess_res = ($id, $title) if $year && $title =~ /$year/ && !@guess_res; | 
|  |  |  | 0 |  |  |  |  | 
| 526 |  |  |  |  |  |  | } | 
| 527 |  |  |  |  |  |  | } | 
| 528 |  |  |  |  |  |  |  | 
| 529 | 0 |  |  |  |  |  | @matched = map { {title => $matched_hash{$_}->{title}, id => $_} } | 
|  | 0 |  |  |  |  |  |  | 
| 530 | 0 |  |  |  |  |  | sort { $matched_hash{$a}->{'pos'} <=> $matched_hash{$b}->{'pos'} } keys %matched_hash; | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 0 |  |  |  |  |  | $self->matched(\@matched); | 
| 533 |  |  |  |  |  |  |  | 
| 534 | 0 |  |  |  |  |  | $self->_show_message("matched: " . Dumper(\@matched), 'DEBUG'); | 
| 535 | 0 |  |  |  |  |  | $self->_show_message("guess: " . Dumper(\@guess_res), 'DEBUG'); | 
| 536 |  |  |  |  |  |  |  | 
| 537 | 0 |  |  |  |  |  | my($title, $id); | 
| 538 | 0 | 0 |  |  |  |  | if(@guess_res) { | 
| 539 | 0 |  |  |  |  |  | ($id, $title) = @guess_res; | 
| 540 |  |  |  |  |  |  | } else { | 
| 541 | 0 |  |  |  |  |  | $title = $matched[0]->{title}; | 
| 542 | 0 |  |  |  |  |  | $id = $matched[0]->{id}; | 
| 543 |  |  |  |  |  |  | } | 
| 544 |  |  |  |  |  |  |  | 
| 545 | 0 |  |  |  |  |  | $self->_content($id); | 
| 546 | 0 |  |  |  |  |  | $self->_parser(FORCED); | 
| 547 |  |  |  |  |  |  |  | 
| 548 | 0 |  |  |  |  |  | return $title; | 
| 549 |  |  |  |  |  |  | } | 
| 550 |  |  |  |  |  |  |  | 
| 551 |  |  |  |  |  |  | =item matched() | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | Retrieve list of matched films each element of which is hash reference - | 
| 554 |  |  |  |  |  |  | { id => , title => : | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | my @matched = @{ $film->matched() }; | 
| 557 |  |  |  |  |  |  |  | 
| 558 |  |  |  |  |  |  | Note: if movie was matched by title unambiguously it won't be present in this array! | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | =cut | 
| 561 |  |  |  |  |  |  |  | 
| 562 |  |  |  |  |  |  | sub matched { | 
| 563 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 564 | 0 | 0 |  |  |  |  | if(@_) { $self->{matched} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 565 | 0 |  |  |  |  |  | return $self->{matched}; | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | sub status { | 
| 569 | 0 |  |  | 0 | 0 |  | my CLASS_NAME $self = shift; | 
| 570 | 0 | 0 |  |  |  |  | if(@_) { $self->{status} = shift } | 
|  | 0 |  |  |  |  |  |  | 
| 571 | 0 |  |  |  |  |  | return $self->{status}; | 
| 572 |  |  |  |  |  |  | } | 
| 573 |  |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | sub status_descr { | 
| 575 | 0 |  |  | 0 | 0 |  | my CLASS_NAME $self = shift; | 
| 576 | 0 |  | 0 |  |  |  | return $STATUS_DESCR{$self->{status}} || $self->{status}; | 
| 577 |  |  |  |  |  |  | } | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | sub retrieve_code { | 
| 580 | 0 |  |  | 0 | 0 |  | my CLASS_NAME $self = shift; | 
| 581 | 0 |  |  |  |  |  | my $parser = shift; | 
| 582 | 0 |  |  |  |  |  | my $pattern = shift; | 
| 583 | 0 |  |  |  |  |  | my($id, $tag); | 
| 584 |  |  |  |  |  |  |  | 
| 585 | 0 |  |  |  |  |  | while($tag = $parser->get_tag('link')) { | 
| 586 | 0 | 0 | 0 |  |  |  | if($tag->[1]{href} && $tag->[1]{href} =~ m!$pattern!) { | 
| 587 | 0 |  |  |  |  |  | $self->code($1); | 
| 588 | 0 |  |  |  |  |  | last; | 
| 589 |  |  |  |  |  |  | } | 
| 590 |  |  |  |  |  |  | } | 
| 591 |  |  |  |  |  |  | } | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  | =item error() | 
| 594 |  |  |  |  |  |  |  | 
| 595 |  |  |  |  |  |  | Return string which contains error messages separated by \n: | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | my $errors = $film->error(); | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | =cut | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | sub error { | 
| 602 | 0 |  |  | 0 | 1 |  | my CLASS_NAME $self = shift; | 
| 603 | 0 | 0 |  |  |  |  | if(@_) { push @{ $self->{error} }, shift() } | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 604 | 0 | 0 |  |  |  |  | return join("\n", @{ $self->{error} }) if $self->{error}; | 
|  | 0 |  |  |  |  |  |  | 
| 605 |  |  |  |  |  |  | } | 
| 606 |  |  |  |  |  |  |  | 
| 607 |  |  |  |  |  |  | sub _decode_special_symbols { | 
| 608 | 0 |  |  | 0 |  |  | my($self, $text) = @_; | 
| 609 | 0 | 0 |  |  |  |  | if($self->_decode_html) { | 
| 610 | 0 |  |  |  |  |  | $text = unidecode(decode_entities($text)); | 
| 611 |  |  |  |  |  |  | } | 
| 612 | 0 |  |  |  |  |  | return $text; | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  |  | 
| 615 |  |  |  |  |  |  | sub AUTOLOAD { | 
| 616 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 617 | 0 |  |  |  |  |  | my($class, $method) = $AUTOLOAD =~ /(.*)::(.*)/; | 
| 618 | 0 |  |  |  |  |  | my($pack, $file, $line) = caller; | 
| 619 |  |  |  |  |  |  |  | 
| 620 | 0 |  |  |  |  |  | carp "Method [$method] not found in the class [$class]!\n Called from $pack	at line $line"; | 
| 621 |  |  |  |  |  |  | } | 
| 622 |  |  |  |  |  |  |  | 
| 623 |  |  |  |  |  |  | sub DESTROY { | 
| 624 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 625 |  |  |  |  |  |  | } | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | 1; | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  | __END__ |