line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Net::ChooseFName; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
58285
|
use 5.005; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
45
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
984
|
use URI::URL 'url'; |
|
1
|
|
|
|
|
2002999
|
|
|
1
|
|
|
|
|
73
|
|
7
|
1
|
|
|
1
|
|
11
|
use File::Path 'mkpath'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
54
|
|
8
|
1
|
|
|
1
|
|
1393
|
use LWP::MediaTypes qw(guess_media_type media_suffix add_type); |
|
1
|
|
|
|
|
21603
|
|
|
1
|
|
|
|
|
196
|
|
9
|
1
|
|
|
1
|
|
13
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
5379
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$VERSION = '0.01'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Net::ChooseFName - Perl extension for choosing a name of a local mirror |
16
|
|
|
|
|
|
|
of a net (e.g., FTP or HTTP) resource. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Net::ChooseFName; |
21
|
|
|
|
|
|
|
$namer = Net::ChooseFName->new(max_length => 64); # Copies to CD ok |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$name = $namer->find_name_by_response($LWP_response); |
24
|
|
|
|
|
|
|
$name = $namer->find_name_by_response($LWP_response, $as_if_content_type); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name, |
27
|
|
|
|
|
|
|
$content_type, $content_encoding); |
28
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name, $content_type); |
29
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name); |
30
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$namer_returns_undef = Net::ChooseFName->failer(); # Funny constructor |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 DESCRIPTION |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
This module helps to pick up a local file name for a remote resource |
38
|
|
|
|
|
|
|
(e.g., one downloaded from Internet). It turns out that this is a |
39
|
|
|
|
|
|
|
tricky business; keep in mind that most servers are misconfigured, |
40
|
|
|
|
|
|
|
most URLs are malformed, and most filesystems are limited |
41
|
|
|
|
|
|
|
w.r.t. possible filenames. As a result most downloaders fail to work |
42
|
|
|
|
|
|
|
in some situations since they choose names which are not supported on |
43
|
|
|
|
|
|
|
particular filesystems, or not useful for C-related work. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Because of the many possible twists and ramifications, the design of |
46
|
|
|
|
|
|
|
this module is to be as much configurable as possible. One of ways of |
47
|
|
|
|
|
|
|
configurations is a rich system of options which influence |
48
|
|
|
|
|
|
|
different steps of the process. To cover cases when options are not |
49
|
|
|
|
|
|
|
flexible enough, the process is broken into many steps; each step is |
50
|
|
|
|
|
|
|
easily overridable by subclassing C. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The defaults are chosen to be as safe as possible while not getting |
53
|
|
|
|
|
|
|
very much into the ways. For example, since C<%> is a special |
54
|
|
|
|
|
|
|
character on DOSish shells, to simplify working from command line on |
55
|
|
|
|
|
|
|
such systems, we avoid this letter in generated file names. |
56
|
|
|
|
|
|
|
Similarly, since MacOS has problems with filenames with 8-bit |
57
|
|
|
|
|
|
|
characters, we avoid them too; since may Unix programs have problem |
58
|
|
|
|
|
|
|
with spaces in file names, we massage them into underscores; the |
59
|
|
|
|
|
|
|
length of the longest file path component is restricted to 255 chars. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Note that in many situations it is advisable to make these |
62
|
|
|
|
|
|
|
restrictions yet stronger. For example, for copying to CD one should |
63
|
|
|
|
|
|
|
restrict names yet more (C 64>); for copying to MSDOS |
64
|
|
|
|
|
|
|
file systems enable option C<'8+3' =E 1>. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
[In the description of methods the $self argument is omitted.] |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 Principal methods |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=over |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item new(OPT1 => $val1, ...) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Constructor method. Creates an object with given options. Default |
75
|
|
|
|
|
|
|
values for the unspecified options are (comments list in which methods |
76
|
|
|
|
|
|
|
this option is used): |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
protect => # protect_characters() |
79
|
|
|
|
|
|
|
# $1 should contain the match |
80
|
|
|
|
|
|
|
qr/([?*|\"<>\\:?#\x00-\x1F\x7F-\xFF\[\])/, |
81
|
|
|
|
|
|
|
protect_pref => '@', # protect_characters(), protect_directory() |
82
|
|
|
|
|
|
|
root => '.', # find_directory() |
83
|
|
|
|
|
|
|
dir_mode => 0775, # directory_found() |
84
|
|
|
|
|
|
|
mkpath => 1, # directory_found() |
85
|
|
|
|
|
|
|
max_suff_len => 4, # split_suffix() 'jpeg' |
86
|
|
|
|
|
|
|
keepsuff_same_mediatype => 1, # choose_suffix() |
87
|
|
|
|
|
|
|
type_suff => # choose_suffix() |
88
|
|
|
|
|
|
|
{'text/ftp-dir-listing' => '.dirl'} |
89
|
|
|
|
|
|
|
keep_suff => { text/plain => 1, |
90
|
|
|
|
|
|
|
application/octet-stream => 1 }, |
91
|
|
|
|
|
|
|
short_suffices => # eight_plus_three() |
92
|
|
|
|
|
|
|
{jpeg => 'jpg', html => 'htm', |
93
|
|
|
|
|
|
|
'tar.bz2' => 'tbz', 'tar.gz' => 'tgz'}, |
94
|
|
|
|
|
|
|
suggest_disposition => 1, # find_name_by_response() |
95
|
|
|
|
|
|
|
suggested_only_basename => 1, # find_name_by_response(), raw_name() |
96
|
|
|
|
|
|
|
fix_url_backslashes => 1, # protect_characters() |
97
|
|
|
|
|
|
|
max_length => 255, # fix_dups(), fix_component() |
98
|
|
|
|
|
|
|
cache_name => 1, # name_found() |
99
|
|
|
|
|
|
|
queryless_types => # url_takes_query() |
100
|
|
|
|
|
|
|
{ map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01 |
101
|
|
|
|
|
|
|
qw(image/djvu image/x-djvu image/dejavu image/x-dejavu |
102
|
|
|
|
|
|
|
image/djvw image/x.djvu image/vnd.djvu ))}, |
103
|
|
|
|
|
|
|
queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query() |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The option C is special so that the user-specified value is |
106
|
|
|
|
|
|
|
I to this hash, and not I it. Similarly, the value |
107
|
|
|
|
|
|
|
of option C is used to populate the value for C |
108
|
|
|
|
|
|
|
of this hash. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Other, options have C as the default value. Their effects are |
111
|
|
|
|
|
|
|
documented in the documentation of the methods they affect. With the |
112
|
|
|
|
|
|
|
exception of C, these options are booleans. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
html_suff # new() |
115
|
|
|
|
|
|
|
known_names # known_names() name_found(); hash ref or undef |
116
|
|
|
|
|
|
|
only_known # known_names() |
117
|
|
|
|
|
|
|
hierarchical # raw_name(), find_directory() |
118
|
|
|
|
|
|
|
use_query # raw_name() |
119
|
|
|
|
|
|
|
8+3 # fix_basename(), fix_component() |
120
|
|
|
|
|
|
|
keep_space # fix_component() |
121
|
|
|
|
|
|
|
keep_dots # fix_component() |
122
|
|
|
|
|
|
|
tolower # fix_component() |
123
|
|
|
|
|
|
|
dir_query # find_directory() |
124
|
|
|
|
|
|
|
site_dir # find_directory() |
125
|
|
|
|
|
|
|
ignore_existing_files # fix_dups |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
keep_nosuff, type_suff_no_enc, type_suff_fallback, |
128
|
|
|
|
|
|
|
type_suff_fallback_no_enc # choose_suffix() |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Summary of the most useful in applications options (with defaults if |
131
|
|
|
|
|
|
|
applicable): |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
html_suff # Suffix for HTML (dot will be prepended) |
134
|
|
|
|
|
|
|
root => '.', # Where to put files? |
135
|
|
|
|
|
|
|
mkpath => 1, # Create directories with chosen names? |
136
|
|
|
|
|
|
|
max_length => 255, # Maximal length of a path component |
137
|
|
|
|
|
|
|
ignore_existing_files # Should the filename be "new"? |
138
|
|
|
|
|
|
|
cache_name => 1, # Return the same filename on the same URL, |
139
|
|
|
|
|
|
|
# even if file jumped to existence? |
140
|
|
|
|
|
|
|
hierarchical # Only the last component of URL path matters? |
141
|
|
|
|
|
|
|
suggested_only_basename => 1, # Should suggested name be relative the path? |
142
|
|
|
|
|
|
|
use_query # Do not ignore the query part of URL? |
143
|
|
|
|
|
|
|
# Value is used as (literal) prefix of query |
144
|
|
|
|
|
|
|
dir_query # Make the non-query part of URL a directory? |
145
|
|
|
|
|
|
|
site_dir # Put the hostname part of URL into directory? |
146
|
|
|
|
|
|
|
keepsuff_same_mediatype # Preserve the file extensions matching type? |
147
|
|
|
|
|
|
|
8+3 # Is the filesystem DOSish? |
148
|
|
|
|
|
|
|
keep_space # Map spaces in URL to spaces in filenames? |
149
|
|
|
|
|
|
|
tolower # Translate filenames to lowercase? |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
type_suff, type_suff_no_enc, type_suff_fallback, type_suff_fallback_no_enc, |
152
|
|
|
|
|
|
|
keep_suff, keep_nosuff # Hashes indexed by lowercased types; |
153
|
|
|
|
|
|
|
# Allow tuning choosing the suffix |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my $pr = '([?*|\"<>\\\\:?#\x00-\x1F\x7F-\xFF\\[\\]])'; |
158
|
|
|
|
|
|
|
my $defaults = { |
159
|
|
|
|
|
|
|
protect => eval("qr/$pr/") || $pr, |
160
|
|
|
|
|
|
|
protect_pref => '@', |
161
|
|
|
|
|
|
|
root => '.', |
162
|
|
|
|
|
|
|
dir_mode => 0775, |
163
|
|
|
|
|
|
|
mkpath => 1, |
164
|
|
|
|
|
|
|
keep_suff => {map(($_ => 1), |
165
|
|
|
|
|
|
|
qw( text/plain |
166
|
|
|
|
|
|
|
application/octet-stream application/download ))}, |
167
|
|
|
|
|
|
|
max_suff_len => 4, # 'jpeg' |
168
|
|
|
|
|
|
|
keepsuff_same_mediatype => 1, |
169
|
|
|
|
|
|
|
short_suffices => {qw( jpeg jpg tar.bz2 tbz html htm |
170
|
|
|
|
|
|
|
tar.gz tgz )}, # used if '8+3' is true |
171
|
|
|
|
|
|
|
suggest_disposition => 1, |
172
|
|
|
|
|
|
|
suggested_only_basename => 1, |
173
|
|
|
|
|
|
|
fix_url_backslashes => 1, |
174
|
|
|
|
|
|
|
max_length => 255, |
175
|
|
|
|
|
|
|
cache_name => 1, # name_found() |
176
|
|
|
|
|
|
|
queryless_types => # url_takes_query() |
177
|
|
|
|
|
|
|
{ map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01 |
178
|
|
|
|
|
|
|
qw(image/djvu image/x-djvu image/dejavu image/x-dejavu |
179
|
|
|
|
|
|
|
image/vnd.djvw image/djvw image/x.djvu))}, |
180
|
|
|
|
|
|
|
queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query() |
181
|
|
|
|
|
|
|
}; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
my %default_suff = ( |
184
|
|
|
|
|
|
|
'text/ftp-dir-listing' => '.dirl', |
185
|
|
|
|
|
|
|
); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub new { |
188
|
21
|
|
|
21
|
1
|
18904
|
my $class = shift; |
189
|
21
|
|
|
|
|
386
|
my $self = bless {%$defaults, @_}, $class; |
190
|
21
|
|
50
|
|
|
164
|
$self->{type_suff} ||= {}; |
191
|
21
|
50
|
|
|
|
164
|
$self->{type_suff}{'text/html'} = ".$self->{html_suff}" |
192
|
|
|
|
|
|
|
if defined $self->{html_suff}; |
193
|
21
|
|
|
|
|
50
|
$self->{type_suff} = {%default_suff, %{$self->{type_suff}}}; |
|
21
|
|
|
|
|
82
|
|
194
|
21
|
|
|
|
|
75
|
$self; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item find_name_by_url($url, $suggested_name, $type, $enc) |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
This method returns a suitable filename for the resource given its URL. |
200
|
|
|
|
|
|
|
Optional arguments are a suggested name (possibly, it will be modified |
201
|
|
|
|
|
|
|
according to options of the object), the content-type, and the |
202
|
|
|
|
|
|
|
content-encoding of the resource. If multiple content-encodings are |
203
|
|
|
|
|
|
|
required, specify them as an array reference. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
A chain of helper methods (L<"Transformation chain">) is called to |
206
|
|
|
|
|
|
|
apply certain transformations to the name. C is returned if |
207
|
|
|
|
|
|
|
any of the helper methods (except known_names() and protect_query()) |
208
|
|
|
|
|
|
|
return undefined values; the caller is free to interpret this as "load |
209
|
|
|
|
|
|
|
to memory", if appropriate. These helper methods are listed in the |
210
|
|
|
|
|
|
|
following section. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=cut |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub find_name_by_url { |
215
|
39
|
|
|
39
|
1
|
11087
|
my ($self, $url, $suggested, $type, $enc) = @_; |
216
|
|
|
|
|
|
|
|
217
|
39
|
50
|
|
|
|
109
|
defined($url = $self->url_2resource($url, $type, $enc)) or return; |
218
|
|
|
|
|
|
|
|
219
|
39
|
|
|
|
|
116
|
my $f = $self->known_names($url, $suggested, $type, $enc); |
220
|
39
|
100
|
|
|
|
132
|
return $f if defined $f; |
221
|
|
|
|
|
|
|
|
222
|
31
|
50
|
|
|
|
92
|
($f, my $q) = $self->raw_name($url, $suggested, $type, $enc) |
223
|
|
|
|
|
|
|
or return; |
224
|
|
|
|
|
|
|
|
225
|
31
|
|
|
|
|
1121
|
$f = $self->protect_characters($f, $q, $url, $suggested, $type, $enc); |
226
|
31
|
50
|
|
|
|
88
|
return unless defined $f; |
227
|
|
|
|
|
|
|
|
228
|
31
|
|
|
|
|
82
|
$q = $self->protect_query($f, $q, $url, $suggested, $type, $enc); |
229
|
|
|
|
|
|
|
|
230
|
31
|
50
|
|
|
|
98
|
(my $dirname, $f, $q) |
231
|
|
|
|
|
|
|
= $self->find_directory($f, $q, $url, $suggested, $type, $enc) |
232
|
|
|
|
|
|
|
or return; |
233
|
|
|
|
|
|
|
|
234
|
31
|
|
|
|
|
111
|
$dirname = $self->protect_directory($dirname, $f, $q, $url, $suggested, $type, $enc); |
235
|
31
|
50
|
|
|
|
110
|
return unless defined $dirname; |
236
|
|
|
|
|
|
|
|
237
|
31
|
|
|
|
|
77
|
$dirname = $self->directory_found($dirname, $f, $q, $url, $suggested, $type, $enc); |
238
|
31
|
50
|
|
|
|
81
|
return unless defined $dirname; |
239
|
|
|
|
|
|
|
|
240
|
31
|
50
|
|
|
|
226
|
($f, my $suff) = |
241
|
|
|
|
|
|
|
$self->split_suffix($f, $dirname, $q, $url, $suggested, $type, $enc) |
242
|
|
|
|
|
|
|
or return; |
243
|
31
|
50
|
|
|
|
115
|
($f, $suff) = $self->choose_suffix($f, $suff, $dirname, $q, $url, |
244
|
|
|
|
|
|
|
$suggested, $type, $enc) |
245
|
|
|
|
|
|
|
or return; |
246
|
31
|
50
|
|
|
|
107
|
($f, $suff) = |
247
|
|
|
|
|
|
|
$self->fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc) |
248
|
|
|
|
|
|
|
or return; |
249
|
31
|
|
|
|
|
98
|
$f = $self->fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc); |
250
|
31
|
50
|
|
|
|
74
|
return unless defined $f; |
251
|
31
|
|
|
|
|
105
|
return $self->name_found($url, $f, $dirname, $suff, $suggested, $type, $enc); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item find_name_by_response($response [, $content_type]) |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
This method returns name given an LWP response object (and, |
257
|
|
|
|
|
|
|
optionally, an overriding C). If option |
258
|
|
|
|
|
|
|
C is TRUE, uses the header C |
259
|
|
|
|
|
|
|
from the response as the suggested name, then passes the fields from |
260
|
|
|
|
|
|
|
the response object to the method find_name_by_url(). |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=cut |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub find_name_by_response { |
265
|
0
|
|
|
0
|
1
|
0
|
my ($self, $res, $ct) = (shift, shift, shift); |
266
|
0
|
0
|
|
|
|
0
|
$ct = $res->content_type unless defined $ct; |
267
|
|
|
|
|
|
|
# "Content-Disposition" header is defined by RFC1806; supported by Netscape |
268
|
0
|
|
0
|
|
|
0
|
my $cd = $self->{suggest_disposition} && $res->header("Content-Disposition"); |
269
|
0
|
|
|
|
|
0
|
my $suggested; |
270
|
0
|
0
|
0
|
|
|
0
|
if ($cd && $cd =~ /\bfilename\s*=\s*(\S+)/) { |
271
|
0
|
|
|
|
|
0
|
$suggested = $1; |
272
|
0
|
|
|
|
|
0
|
$suggested =~ s/;$//; |
273
|
0
|
|
|
|
|
0
|
$suggested =~ s/^([\"\'])(.*)\1$/$2/; |
274
|
0
|
0
|
|
|
|
0
|
$suggested =~ s,.*[\\/],, if $self->{suggested_only_basename}; |
275
|
|
|
|
|
|
|
} |
276
|
0
|
|
|
|
|
0
|
$self->find_name_by_url($res->request->url, $suggested, |
277
|
|
|
|
|
|
|
$ct, $res->content_encoding); |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=back |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=head2 Transformation chain |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=over |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item url_2resource($url [, $type, $encoding]) |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
This method returns $url modified by removing the parts related to |
289
|
|
|
|
|
|
|
access to I of the resource. In particular, the I part is |
290
|
|
|
|
|
|
|
removed, as well as the I part if url_is_queryless() returns TRUE. |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub url_2resource { |
295
|
39
|
|
|
39
|
1
|
64
|
my ($self, $url, $type, $enc) = @_; |
296
|
|
|
|
|
|
|
|
297
|
39
|
50
|
|
|
|
194
|
$url = url($url) unless ref($url); |
298
|
39
|
|
|
|
|
19240
|
my $cpy; |
299
|
39
|
100
|
|
|
|
368
|
if (defined $url->frag) { |
300
|
10
|
|
|
|
|
344
|
$cpy = $url = $url->clone; |
301
|
10
|
|
|
|
|
159
|
$url->frag(undef); |
302
|
|
|
|
|
|
|
} |
303
|
39
|
50
|
33
|
|
|
1052
|
if (defined $url->equery and $self->url_takes_query($url, $type, $enc)) { |
304
|
0
|
0
|
|
|
|
0
|
$url = $url->clone unless $cpy; |
305
|
0
|
|
|
|
|
0
|
$url->query(undef); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
$url |
308
|
39
|
|
|
|
|
155
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=item known_names($url, $suggested, $type, $enc) |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
The method find_name_by_url() will return the return value of this |
313
|
|
|
|
|
|
|
method (unless L) immediately. Unless overriden, this method |
314
|
|
|
|
|
|
|
returns the value of the hash option C indexed by the |
315
|
|
|
|
|
|
|
$url. (By default this hash is empty.) |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
If the option C is true, it is a fatal error if $url is |
318
|
|
|
|
|
|
|
not a key of this hash. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub known_names { |
323
|
39
|
|
|
39
|
1
|
54
|
my ($self, $url) = @_; |
324
|
39
|
|
|
|
|
221
|
my $f = $self->{known_names}{$url}; |
325
|
39
|
100
|
|
|
|
508
|
return $f if defined $f; |
326
|
|
|
|
|
|
|
|
327
|
0
|
|
|
|
|
0
|
die "URL with unknown name `$url'" |
328
|
31
|
50
|
33
|
|
|
95
|
if $self->{only_known} and keys %{$self->{known_names}}; |
329
|
31
|
|
|
|
|
53
|
return; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item raw_name($url, $suggested, $type, $enc) |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Returns the 0th approximation to the filename of the resource; the |
335
|
|
|
|
|
|
|
return value has two parts: the principal part, and the query string |
336
|
|
|
|
|
|
|
(C if should not be used). |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
If $suggested is undefined, returns the path part of the $url, and the |
339
|
|
|
|
|
|
|
query part, if present and if option C is TRUE). Otherwise |
340
|
|
|
|
|
|
|
either returns $suggested, or (if options C |
341
|
|
|
|
|
|
|
and C are both true), returns the I part of the |
342
|
|
|
|
|
|
|
$url with the last component changed to $suggested; the query part is |
343
|
|
|
|
|
|
|
ignored in this case. In the latter case, if option C is TRUE, only the last path component of $suggested is used. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=cut |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub raw_name { |
348
|
31
|
|
|
31
|
1
|
46
|
my ($self, $url, $suggested) = @_; |
349
|
31
|
100
|
|
|
|
69
|
if (defined $suggested) { |
350
|
4
|
100
|
100
|
|
|
29
|
if ($self->{suggested_only_basename} and $self->{hierarchical}) { |
351
|
1
|
|
|
|
|
7
|
my @p = $url->path_segments; |
352
|
1
|
50
|
|
|
|
47
|
$suggested =~ s,.*/,, if $self->{suggested_basename}; |
353
|
1
|
|
|
|
|
9
|
return join '/', @p[0..$#p-1], $suggested; |
354
|
|
|
|
|
|
|
} |
355
|
3
|
|
|
|
|
14
|
return $suggested; |
356
|
|
|
|
|
|
|
} else { |
357
|
27
|
100
|
|
|
|
82
|
my $q = $self->{use_query} ? $url->equery : undef; |
358
|
27
|
|
|
|
|
518
|
return ($url->path, $q); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=item protect_characters($f, $query, $url, $suggested, $type, $enc) |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns the filename $f with necessary character-by-character |
365
|
|
|
|
|
|
|
translations performed. Unless overriden, it translates backslashes |
366
|
|
|
|
|
|
|
to slashes if the option C is TRUE, replaces |
367
|
|
|
|
|
|
|
characters matched by regular expression in the option C by |
368
|
|
|
|
|
|
|
their hexadecimal representation (with the leader being the value of |
369
|
|
|
|
|
|
|
the option C), and replaces percent signs by the value |
370
|
|
|
|
|
|
|
of the option C. |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub protect_characters { |
375
|
31
|
|
|
31
|
1
|
47
|
my ($self, $f) = @_; |
376
|
31
|
50
|
|
|
|
137
|
$f =~ s,\\,/,g if $self->{fix_url_backslashes}; |
377
|
|
|
|
|
|
|
# Protect against funny characters, some filesystems can bark on them |
378
|
31
|
|
|
|
|
1091
|
$f =~ s($self->{protect}) |
379
|
0
|
|
|
|
|
0
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
380
|
31
|
50
|
|
|
|
110
|
$f =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%'; |
381
|
31
|
|
|
|
|
67
|
$f |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item protect_query($f, $query, $url, $suggested, $type, $enc) |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Returns $query with necessary character-by-character translations |
387
|
|
|
|
|
|
|
performed. Unless overriden, it translates slashes, backslashes, and |
388
|
|
|
|
|
|
|
characters matched byregular expression in the option C by |
389
|
|
|
|
|
|
|
their hexadecimal representation (with the leader being the value of |
390
|
|
|
|
|
|
|
the option C), and replaces percent signs by the value |
391
|
|
|
|
|
|
|
of the option C. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub protect_query { |
396
|
31
|
|
|
31
|
1
|
76
|
my ($self, $f, $q) = @_; |
397
|
31
|
100
|
|
|
|
92
|
return unless defined $q; |
398
|
|
|
|
|
|
|
# Protect against funny characters, some filesystems can bark on them |
399
|
7
|
|
|
|
|
45
|
$q =~ s($self->{protect}) |
400
|
49
|
|
|
|
|
232
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
401
|
7
|
50
|
|
|
|
388
|
$q =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%'; |
402
|
7
|
|
|
|
|
31
|
$q =~ s(([/\\])) |
403
|
0
|
|
|
|
|
0
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
404
|
7
|
|
|
|
|
22
|
$q |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item find_directory($f, $query, $url, $suggested, $type, $enc) |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Returns a triple of the appropriate directory name, the relative |
410
|
|
|
|
|
|
|
filename, and a string to append to the filename, based on |
411
|
|
|
|
|
|
|
processed-so-far filename $f and the $query string. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Unless overriden, does the following: unless the option |
414
|
|
|
|
|
|
|
C is TRUE, all but the last path components of $f are |
415
|
|
|
|
|
|
|
ignored. If the option C is TRUE, the host part of the URL |
416
|
|
|
|
|
|
|
(as well as the port part - if non-standard) are prepended to the |
417
|
|
|
|
|
|
|
filename. The leading backslash is always stripped, and the option |
418
|
|
|
|
|
|
|
C is used as the lead components of the directory name. If |
419
|
|
|
|
|
|
|
$query is defined, and the option C is true, $f is used as |
420
|
|
|
|
|
|
|
the last component of the directory, and $query as file name (with |
421
|
|
|
|
|
|
|
option C prepended). |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
(Dirname is assumed to be C>-terminated.) |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=cut |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
sub find_directory { |
428
|
31
|
|
|
31
|
1
|
58
|
my ($self, $f, $q, $url) = @_; |
429
|
|
|
|
|
|
|
# trim path until only the basename is left |
430
|
31
|
|
|
|
|
138
|
$f =~ s|(.*/)||; |
431
|
31
|
100
|
66
|
|
|
201
|
my $dirname = ($self->{hierarchical} and $1) ? $1 : ''; |
432
|
31
|
|
|
|
|
102
|
$dirname =~ s#^/##; |
433
|
|
|
|
|
|
|
|
434
|
31
|
100
|
|
|
|
70
|
if (defined $q) { |
435
|
7
|
|
|
|
|
21
|
$q = "$self->{use_query}$q"; |
436
|
7
|
100
|
|
|
|
22
|
if ($self->{dir_query}) { |
437
|
6
|
|
|
|
|
14
|
$dirname = "$dirname$f/"; # XXXX If it already exists as a file? |
438
|
6
|
|
|
|
|
10
|
$f = $q; |
439
|
6
|
|
|
|
|
12
|
$q = ''; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} else { |
442
|
24
|
|
|
|
|
38
|
$q = ''; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
31
|
100
|
|
|
|
72
|
if ($self->{site_dir}) { |
446
|
9
|
|
|
|
|
16
|
eval { |
447
|
9
|
|
|
|
|
53
|
my $site = lc $url->host; |
448
|
9
|
|
|
|
|
290
|
my $port = $url->port; |
449
|
9
|
|
|
|
|
424
|
my $def = $url->default_port; |
450
|
9
|
50
|
|
|
|
94
|
$port = '' if $port == $def; |
451
|
9
|
50
|
|
|
|
20
|
$site .= "=port$port" if length $port; |
452
|
9
|
|
|
|
|
35
|
$dirname = "$self->{root}/$site/$dirname"; |
453
|
|
|
|
|
|
|
}; |
454
|
|
|
|
|
|
|
} else { |
455
|
22
|
|
|
|
|
62
|
$dirname = "$self->{root}/$dirname"; |
456
|
|
|
|
|
|
|
} |
457
|
31
|
|
|
|
|
157
|
($dirname, $f, $q) |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=item protect_directory($dirname, $f, $append, $url, $suggested, $type, $enc) |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Returns the provisional directory part of the filename. Unless |
463
|
|
|
|
|
|
|
overriden, replaces empty components by the string C preceeded |
464
|
|
|
|
|
|
|
by the value of C option; then applies the method |
465
|
|
|
|
|
|
|
fix_component() to each component of the directory. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=cut |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub protect_directory { |
470
|
31
|
|
|
31
|
1
|
50
|
my ($self, $dirname) = @_; |
471
|
31
|
|
|
|
|
116
|
$dirname =~ s,/(?=/),/$self->{protect_pref}empty,g; # empty components |
472
|
31
|
|
|
|
|
163
|
return join '/', map($self->fix_component($_,1), split m|/|, $dirname), ''; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item directory_found($dirname, $f, $append, $url, $suggested, $type, $enc) |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
A callback to process the calculated directory name. Unless |
478
|
|
|
|
|
|
|
overriden, it creates the directory (with permissions per option |
479
|
|
|
|
|
|
|
C) if the option C is TRUE. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
Actually, the directory name is the return value, so this is the last |
482
|
|
|
|
|
|
|
chance to change the directory name... |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=cut |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub directory_found { |
487
|
31
|
|
|
31
|
1
|
101
|
my ($self, $dirname) = @_; |
488
|
31
|
100
|
66
|
|
|
6371
|
mkpath $dirname, $self->{verbose}, $self->{dir_mode} |
|
|
|
100
|
|
|
|
|
489
|
|
|
|
|
|
|
if $self->{mkpath} and length $dirname and not -d $dirname; |
490
|
31
|
|
|
|
|
82
|
$dirname; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# Copied from LWP::Mediatypes v1.32 |
494
|
|
|
|
|
|
|
my %suffixEncoding = ( |
495
|
|
|
|
|
|
|
'Z' => 'compress', |
496
|
|
|
|
|
|
|
'gz' => 'gzip', |
497
|
|
|
|
|
|
|
'hqx' => 'x-hqx', |
498
|
|
|
|
|
|
|
'uu' => 'x-uuencode', |
499
|
|
|
|
|
|
|
'z' => 'x-pack', |
500
|
|
|
|
|
|
|
'bz2' => 'x-bzip2', |
501
|
|
|
|
|
|
|
); |
502
|
|
|
|
|
|
|
my %suffixDecoding = reverse %suffixEncoding; |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=item split_suffix($f, $dirname, $append, $url, $suggested, $type, $enc) |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Breaks the last component $f of the filename into a pair of basename |
507
|
|
|
|
|
|
|
and suffix, which are returned. $dirname consists of other components |
508
|
|
|
|
|
|
|
of the filename, $append is the string to append to the basename in |
509
|
|
|
|
|
|
|
the future. |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Suffix may be empty, and is supposed to contain the leading dot (if |
512
|
|
|
|
|
|
|
applicable); it may contain more than one dot. Unless overriden, the |
513
|
|
|
|
|
|
|
suffix consists of all trailing non-empty started-by-dot groups with |
514
|
|
|
|
|
|
|
length no more than given by the option C (not including |
515
|
|
|
|
|
|
|
the leading dot). |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub split_suffix { |
520
|
49
|
|
|
49
|
1
|
107
|
my ($self, $f, $dirname, $append, $url, $suggested, $type, $enc) = @_; |
521
|
|
|
|
|
|
|
|
522
|
49
|
|
|
|
|
50
|
my $suff; |
523
|
|
|
|
|
|
|
|
524
|
49
|
|
|
|
|
76
|
my $max = $self->{max_suff_len}; |
525
|
49
|
|
|
|
|
1385
|
(my $base = $f) =~ s<((?:\.[^/]{1,$max})*)$><>; |
526
|
49
|
|
|
|
|
288
|
return ($base, "$1"); |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=item choose_suffix($f, $suff, $dirname, $append, $url, $suggested, $type, $enc) |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Returns a pair of basename and appropriate suffix for a file. $f is |
533
|
|
|
|
|
|
|
the basename of the file, $suff is its suffix, $dirname consists of |
534
|
|
|
|
|
|
|
other components of file names, $append is the string to append to the |
535
|
|
|
|
|
|
|
basename. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Different strategies applicable to this problem are: |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=over |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=item * |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
keep the file extension; |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item * |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
replace by the "best" extension for this $type (and $enc); |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=item * |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
replace by the user-specified type-specific extension. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=back |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Any of these has two variants: whether we want the encodings reflected |
556
|
|
|
|
|
|
|
in the suffix, or not. Unless overriden, chosing strategy/variant |
557
|
|
|
|
|
|
|
consists of several rounds. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
In the first round, choose user-specified suffix if $type is defined, |
560
|
|
|
|
|
|
|
and is (lowercased) in the option-hashes C and |
561
|
|
|
|
|
|
|
C (choosing the variant based on which hash |
562
|
|
|
|
|
|
|
matched). Keep the current suffix if $type is not defined, or option |
563
|
|
|
|
|
|
|
C is TRUE and the current suffix of the file |
564
|
|
|
|
|
|
|
matches $type and $enc (per database of known types and encodings). |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
The second round runs if none of these was applicable. Choose |
567
|
|
|
|
|
|
|
user-specified suffix if $type is (lowercased) in the hashes |
568
|
|
|
|
|
|
|
C or C (choosing |
569
|
|
|
|
|
|
|
variant as above); keep the current suffix if the type (lowercased) is |
570
|
|
|
|
|
|
|
in the hashes C or C (depending on whether |
571
|
|
|
|
|
|
|
$suff is empty or not). |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
If none of these was applicable, the last round chooses the |
574
|
|
|
|
|
|
|
appropriate suffix by the database of known types and encodings; if |
575
|
|
|
|
|
|
|
not found, the existing suffix is preserved. |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub choose_suffix { |
580
|
31
|
|
|
31
|
1
|
72
|
my ($self, $f, $suff, $dirname, $append, $url, $suggested, $type, $enc) = @_; |
581
|
|
|
|
|
|
|
|
582
|
31
|
|
|
|
|
47
|
my ($guess_suffix, $check_enc); |
583
|
31
|
50
|
|
|
|
94
|
$enc = [] unless defined $enc; |
584
|
31
|
50
|
|
|
|
95
|
$enc = [$enc] unless ref $enc; |
585
|
31
|
100
|
|
|
|
160
|
if (not defined $type) { # Do nothing |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
586
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff}{lc $type}) { |
587
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff}{lc $type}; |
588
|
0
|
|
|
|
|
0
|
$check_enc = $enc; |
589
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_no_enc}{lc $type}) { |
590
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff}{lc $type}; |
591
|
|
|
|
|
|
|
} elsif ($self->{keepsuff_same_mediatype}) { |
592
|
18
|
|
|
|
|
74
|
my($t, @enc) = guess_media_type($f); |
593
|
18
|
50
|
33
|
|
|
1528
|
$guess_suffix = 1 |
|
|
|
33
|
|
|
|
|
594
|
|
|
|
|
|
|
unless defined $t and lc $t eq lc $type and lc "@enc" eq lc "@$enc"; |
595
|
|
|
|
|
|
|
} else { |
596
|
0
|
|
|
|
|
0
|
$guess_suffix = 1; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
31
|
100
|
|
|
|
171
|
if (not $guess_suffix) { # No substitution |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
600
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_fallback}{lc $type}) { |
601
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff_fallback}{lc $type}; |
602
|
0
|
|
|
|
|
0
|
$check_enc = $enc; |
603
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_fallback_no_enc}{lc $type}) { |
604
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff_fallback}{lc $type}; |
605
|
|
|
|
|
|
|
} elsif ((length $suff) |
606
|
|
|
|
|
|
|
? $self->{keep_suff}{lc $type} |
607
|
|
|
|
|
|
|
: $self->{keep_nosuff}{lc $type}) { # No substitution |
608
|
|
|
|
|
|
|
} else { |
609
|
18
|
|
|
|
|
57
|
my $s = media_suffix($type); |
610
|
18
|
50
|
33
|
|
|
227
|
if (defined $s and length $s) { # Known media type... |
611
|
18
|
|
|
|
|
27
|
$suff = ".$s"; |
612
|
18
|
|
|
|
|
29
|
$check_enc = $enc; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
31
|
100
|
|
|
|
68
|
if ($check_enc) { |
617
|
18
|
|
|
|
|
38
|
for my $e (@$enc) { |
618
|
0
|
0
|
|
|
|
0
|
$suff .= $suffixDecoding{$e} if exists $suffixDecoding{$e}; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
31
|
|
|
|
|
171
|
return ("$f$append", $suff); |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
=item fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc) |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
Returns a pair of basename and suffix for a file. $f is the last |
628
|
|
|
|
|
|
|
component of the name of the file, $dirname consists of other |
629
|
|
|
|
|
|
|
components. Unless overriden, this method replaces an empty basename |
630
|
|
|
|
|
|
|
by C<"index"> and applies fix_component() method to the basename; |
631
|
|
|
|
|
|
|
finally, if C<'8+3'> otion is set, it converts the filename and suffix |
632
|
|
|
|
|
|
|
to a name suitable 8+3 filesystems. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=cut |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
sub fix_basename { |
637
|
31
|
|
|
31
|
1
|
66
|
my ($self, $f, $dirname, $suffix) = @_; |
638
|
|
|
|
|
|
|
|
639
|
31
|
50
|
|
|
|
99
|
$f = "index" unless length $f; |
640
|
31
|
|
|
|
|
67
|
$f = $self->fix_component($f,0); # Length ignores extension... |
641
|
31
|
100
|
|
|
|
151
|
($f, $suffix) = $self->eight_plus_three($f, $suffix) if $self->{'8+3'}; |
642
|
31
|
|
|
|
|
130
|
return ($f, $suffix); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=item fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc) |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Given a basename, extension, and the directory part of the filename, |
648
|
|
|
|
|
|
|
modifies the basename (if needed) to avoid duplicates; should return |
649
|
|
|
|
|
|
|
the complete file name (combining the dirname, basename, and suffix). |
650
|
|
|
|
|
|
|
Unless overriden, appends a number to the basename (shortening |
651
|
|
|
|
|
|
|
basename if needed) so that the result is unique. |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
This is a prime candidate for overriding (e.g., to ask user for |
654
|
|
|
|
|
|
|
confirmation of overwrite). |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=cut |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub fix_dups { |
659
|
31
|
|
|
31
|
1
|
65
|
my ($self, $f, $dirname, $suff) = @_; |
660
|
|
|
|
|
|
|
|
661
|
31
|
100
|
|
|
|
75
|
return "$dirname$f$suff" if $self->{ignore_existing_files}; |
662
|
30
|
|
|
|
|
48
|
my $max_length = $self->{max_length}; |
663
|
30
|
|
|
|
|
48
|
my $extra = ""; # something to make the name unique |
664
|
30
|
100
|
|
|
|
71
|
$max_length = 8 + length $suff if $self->{'8+3'}; |
665
|
30
|
|
|
|
|
187
|
while (1) { |
666
|
|
|
|
|
|
|
# Construct a new file name; give up shortening if suffix is too long... |
667
|
32
|
100
|
66
|
|
|
194
|
if ( $max_length and length "$f$extra$suff" > $max_length |
|
|
|
66
|
|
|
|
|
668
|
|
|
|
|
|
|
and length "$extra$suff" < $max_length ) { |
669
|
4
|
|
|
|
|
14
|
$f = substr $f, 0, $max_length - length "$extra$suff"; |
670
|
|
|
|
|
|
|
} |
671
|
32
|
|
|
|
|
64
|
my $file = $dirname . $f . $extra . $suff; |
672
|
|
|
|
|
|
|
# Check if it is unique |
673
|
32
|
100
|
|
|
|
887
|
return $file unless -e $file; |
674
|
|
|
|
|
|
|
|
675
|
2
|
50
|
|
|
|
13
|
$extra = "000" unless $extra; # Try appending a number |
676
|
2
|
|
|
|
|
5
|
$extra++; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=item name_found($url, $f, $dirname, $suff, $suggested, $type, $enc) |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
The callback method to register the found name. Unless overridden, |
683
|
|
|
|
|
|
|
behaves like following: if option C is TRUE, stores the |
684
|
|
|
|
|
|
|
found name in the C hash. Otherwise just returns the found name. |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=cut |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub name_found { |
689
|
31
|
|
|
31
|
1
|
75
|
my ($self, $url, $f, $dirname, $suff, $suggested, $type, $enc) = @_; |
690
|
|
|
|
|
|
|
|
691
|
31
|
100
|
|
|
|
108
|
return $f unless $self->{cache_name}; |
692
|
28
|
|
|
|
|
175
|
return $self->{known_names}{$url} = $f; |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=back |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=head2 Helper methods |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=over |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=item fix_component($component, $isdir) |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Returns a suitably modified value of a path component of a filename. |
704
|
|
|
|
|
|
|
The non-overriden method massages unescapes embedded SPACE characters; |
705
|
|
|
|
|
|
|
it removes starting/trailing, and converts the rest to C<_> unless the |
706
|
|
|
|
|
|
|
option C is TRUE; removes trailing dots unless the option |
707
|
|
|
|
|
|
|
C is TRUE; translates to lowercase if the option C |
708
|
|
|
|
|
|
|
is TRUE, truncates to C if this option is set, and applies |
709
|
|
|
|
|
|
|
the eight_plus_three() method if the option C<'8+3'> is set. |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=cut |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
sub fix_component { |
714
|
160
|
|
|
160
|
1
|
388
|
my ($self, $f, $isdir) = @_; |
715
|
|
|
|
|
|
|
|
716
|
160
|
|
|
|
|
259
|
$f =~ s/%20/ /g; # URL-encoded space is %20 |
717
|
160
|
|
|
|
|
423
|
$f =~ s/\E$self->{protect_pref}20/ /g; # URL-encoded space is %20 |
718
|
160
|
100
|
|
|
|
367
|
unless ($self->{keep_space}) { # Translate spaces in URL to underscores (_) |
719
|
151
|
|
|
|
|
434
|
$f =~ s/^ *//; # Remove initial spaces from base |
720
|
151
|
|
|
|
|
1066
|
$f =~ s/ *$//; # Remove trailing spaces from base |
721
|
|
|
|
|
|
|
|
722
|
151
|
|
|
|
|
259
|
$f =~ tr/ /_/; |
723
|
|
|
|
|
|
|
} |
724
|
160
|
50
|
|
|
|
456
|
$f =~ s/\.+$// unless $self->{keep_dots}; |
725
|
160
|
100
|
|
|
|
498
|
$f = lc $f if $self->{tolower}; # Output lower-case |
726
|
|
|
|
|
|
|
|
727
|
160
|
100
|
66
|
|
|
745
|
substr($f, $self->{max_length}) = '' |
728
|
|
|
|
|
|
|
if $self->{max_length} and length $f > $self->{max_length}; |
729
|
160
|
100
|
|
|
|
345
|
return join '', $self->eight_plus_three($f) if $self->{'8+3'}; |
730
|
142
|
|
|
|
|
605
|
$f; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=item eight_plus_three($fname, $suffix) |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
Returns the value of filename modified for filesystems with 8+3 |
736
|
|
|
|
|
|
|
restriction on the filename (such as DOS). If $suffix is not given, |
737
|
|
|
|
|
|
|
calculates it from $fname; otherwise $suffix should include the |
738
|
|
|
|
|
|
|
leading dot, and $fname should have $suffix already removed. (Some |
739
|
|
|
|
|
|
|
parts of info may be moved between suffix and filename if judged |
740
|
|
|
|
|
|
|
appropriate.) |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=cut |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub eight_plus_three { |
745
|
20
|
|
|
20
|
1
|
29
|
my ($self, $f, $suff) = @_; |
746
|
|
|
|
|
|
|
|
747
|
20
|
100
|
|
|
|
56
|
($f, $suff) = $self->split_suffix($f, undef, '') unless defined $suff; |
748
|
|
|
|
|
|
|
# Try to move some info to a suffix even if it becomes too long |
749
|
20
|
100
|
100
|
|
|
110
|
$suff = $2 if not length $suff and $f =~ s|(.{8,})\.(.*)$|$1|s ; |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
# Balance multiple suffices between the parts |
752
|
20
|
|
100
|
|
|
300
|
$f .= $1 while length($f) <= 6 and $suff =~ s/^(\..*?)(?=\...)//s; |
753
|
|
|
|
|
|
|
|
754
|
20
|
100
|
100
|
|
|
124
|
if (not length $suff and length($f) > 8) { # Move part of fname to suff |
755
|
6
|
|
|
|
|
10
|
my $l = length($f) - 8; |
756
|
6
|
50
|
|
|
|
12
|
$l = 3 if $l > 3; |
757
|
6
|
|
|
|
|
12
|
$suff = substr $f, -$l, $l; |
758
|
6
|
|
|
|
|
11
|
substr($f, -$l, $l) = ''; |
759
|
|
|
|
|
|
|
} |
760
|
20
|
|
|
|
|
40
|
$f =~ s/\./_/g; |
761
|
20
|
|
|
|
|
69
|
$suff =~ s/^\.//; # Temporary strip the leading dot |
762
|
20
|
|
|
|
|
35
|
my $s = $self->{short_suffices}{$suff}; |
763
|
20
|
100
|
|
|
|
57
|
($s = $suff) =~ s/\./_/g unless defined $s; |
764
|
|
|
|
|
|
|
|
765
|
20
|
100
|
|
|
|
45
|
substr($f, 8) = '' if length($f) > 8; |
766
|
20
|
100
|
|
|
|
40
|
substr($s, 2, length($s)-3) = '' if length($s) > 3; |
767
|
20
|
100
|
|
|
|
47
|
$s = ".$s" if length $s; |
768
|
20
|
|
|
|
|
106
|
($f, $s); |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=item url_takes_query($url [, $type, $encoding]) |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
This method returns TRUE if the I part of the URL is selecting |
774
|
|
|
|
|
|
|
a part of the resource (i.e., if it is behaves as a I part, |
775
|
|
|
|
|
|
|
and it is the client which should process this part). Such URLs are |
776
|
|
|
|
|
|
|
detected by $type (should be in hash option C), or by |
777
|
|
|
|
|
|
|
extension of the last path component (should be in hash option |
778
|
|
|
|
|
|
|
C). |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=back |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=cut |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub url_takes_query { |
785
|
39
|
|
|
39
|
1
|
1073
|
my ($self, $url, $type) = @_; |
786
|
39
|
50
|
66
|
|
|
175
|
return 1 if $type and $self->{queryless_types}{$type}; |
787
|
39
|
|
|
|
|
196
|
my @p = $url->path_segments; |
788
|
39
|
|
66
|
|
|
3394
|
my ($ext) = (@p and $p[-1] =~ /.*\.(.*)$/); |
789
|
39
|
100
|
|
|
|
314
|
$ext and $self->{queryless_ext}{$ext}; |
790
|
|
|
|
|
|
|
} |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
=head1 Net::ChooseFName::Failer class |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
A class which behaves as Net::ChooseFName, but always returns |
795
|
|
|
|
|
|
|
C. For convenience, the constructor is duplicated as a class |
796
|
|
|
|
|
|
|
method failer() in the class Net::ChooseFName. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
# These always return undef; the caller is free to interpret this "to memory" |
801
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::find_name_by_response {} |
802
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::find_name_by_url {} |
803
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::new {bless [], shift} |
804
|
0
|
|
|
0
|
0
|
0
|
sub Net::ChooseFName::failer {bless [], 'Net::ChooseFName::Failer'} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
sub __Fix_broken_MediaTypes { |
807
|
1
|
|
|
1
|
|
5
|
my @s = media_suffix('application/postscript'); |
808
|
|
|
|
|
|
|
# warn "Fixing `@s'..."; |
809
|
|
|
|
|
|
|
# if ($s[0] eq 'ai' or 1) { # [0] addresses in hash order; meaningless |
810
|
|
|
|
|
|
|
# warn "Fixing..."; |
811
|
1
|
|
|
|
|
2227
|
@s = ('ps', grep $_ ne 'ps', @s); |
812
|
1
|
|
|
|
|
8
|
add_type('application/postscript', @s); |
813
|
|
|
|
|
|
|
# } |
814
|
|
|
|
|
|
|
} |
815
|
|
|
|
|
|
|
__Fix_broken_MediaTypes(); |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
1; |
818
|
|
|
|
|
|
|
__END__ |