File Coverage

lib/File/Information/Base.pm
Criterion Covered Total %
statement 121 495 24.4
branch 46 306 15.0
condition 79 311 25.4
subroutine 15 41 36.5
pod 18 20 90.0
total 279 1173 23.7


line stmt bran cond sub pod time code
1             # Copyright (c) 2024-2025 Philipp Schafft <lion@cpan.org>
2              
3             # licensed under Artistic License 2.0 (see LICENSE file)
4              
5             # ABSTRACT: generic module for extracting information from filesystems
6              
7              
8             package File::Information::Base;
9              
10 3     3   4467 use v5.16;
  3         7  
11 3     3   9 use strict;
  3         3  
  3         42  
12 3     3   8 use warnings;
  3         3  
  3         92  
13              
14 3     3   10 use Carp;
  3         3  
  3         219  
15 3     3   16 use Scalar::Util qw(blessed);
  3         3  
  3         128  
16              
17 3     3   1630 use Data::Identifier v0.08;
  3         436493  
  3         23  
18 3     3   221 use File::Information;
  3         6  
  3         148  
19              
20 3     3   18 use parent qw(Data::Identifier::Interface::Known Data::Identifier::Interface::Simple);
  3         5  
  3         29  
21              
22             use constant { # Taken from Data::Identifier
23 3         33667 RE_UUID => qr/^[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}$/,
24             RE_OID => qr/^[0-2](?:\.(?:0|[1-9][0-9]*))+$/,
25             RE_URI => qr/^[a-zA-Z][a-zA-Z0-9\+\.\-]+/,
26              
27             WK_WM_RANDOM_ACCESS => '4dc9fd07-7ef3-4215-8874-31d78ed55c22',
28             WK_WM_READ_ONLY => '3877b2ef-6c77-423f-b15f-76508fbd48ed',
29             WK_WM_NONE => '7b177183-083c-4387-abd3-8793eb647373',
30             WK_FINAL => 'f418cdb9-64a7-4f15-9a18-63f7755c5b47',
31             WK_ALSO_HAS_ROLE => 'd2750351-aed7-4ade-aa80-c32436cc6030',
32             WK_SPECIFIC_PROTO_FILE_STATE => '63da70a8-78a4-51b0-8b87-86872b474a5d',
33             WK_FINAL_FILE_ENCODING => '448c50a8-c847-4bc7-856e-0db5fea8f23b',
34             WK_FINAL_FILE_SIZE => '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d',
35             WK_FINAL_FILE_HASH => '79385945-0963-44aa-880a-bca4a42e9002',
36             WK_FETCH_FILE_URI => '96674c6c-cf5e-40cd-af1e-63b86e741f4f',
37             WK_PROTO_FILE => '52a516d0-25d8-47c7-a6ba-80983e576c54',
38             WK_TAGPOOL_FILE => '4e4bbe45-f783-442d-8804-ac729f5cdec5',
39             WK_ALSO_HAS_STATE => '4c426c3c-900e-4350-8443-e2149869fbc9',
40             WK_HAS_FINAL_STATE => '54d30193-2000-4d8a-8c28-3fa5af4cad6b',
41             WK_SEE_ALSO => 'a75f9010-9db3-4d78-bd78-0dd528d6b55d',
42             WK_ALSO_SHARES_IDENTIFIER => 'ddd60c5c-2934-404f-8f2d-fcb4da88b633',
43             WK_TAGNAME => 'bfae7574-3dae-425d-89b1-9c087c140c23',
44              
45             WK_NEEDSTAGGING => '92292a4e-b060-417e-a90c-a270331259e9',
46              
47             WK_TAGPOOL_TITLE => '361fda18-50ce-4421-b378-881179b0318a',
48             WK_TAGPOOL_DESCRIPTION => 'ca33b058-b4ce-4059-9f0b-61ca0fd39c35',
49             WK_TAGPOOL_COMMENT => '06706809-207b-4287-9775-6efa07f807dd',
50             WK_TAGPOOL_TAGGED_AS => '703cbb5d-eb4a-4718-9e60-adbef6f71869',
51              
52             WK_FILESYSTEM => '63c1da19-0dd6-4181-b3fa-742b9ceb2903',
53             WK_TAGPOOL_POOL => '1f30649d-eb55-48cb-93d7-6d6fcba23909',
54 3     3   4102 };
  3         6  
55              
56             our $VERSION = v0.16;
57              
58             our %_digest_name_converter = ( # stolen from Data::URIID::Result
59             fc('md5') => 'md-5-128',
60             fc('sha1') => 'sha-1-160',
61             fc('sha-1') => 'sha-1-160',
62             fc('ripemd-160') => 'ripemd-1-160',
63             (map {
64             fc('sha'.$_) => 'sha-2-'.$_,
65             fc('sha-'.$_) => 'sha-2-'.$_,
66             fc('sha3-'.$_) => 'sha-3-'.$_,
67             } qw(224 256 384 512)),
68             );
69              
70             our %_digest_info_extra = (
71             'md-5-128' => {rfc9530 => 'md5', openpgp => 1},
72             'ripemd-1-160' => { openpgp => 3},
73             'sha-1-160' => {rfc9530 => 'sha', openpgp => 2, sni => 185},
74             'sha-2-224' => { openpgp => 11},
75             'sha-2-256' => {rfc9530 => 'sha-256', openpgp => 8},
76             'sha-2-384' => { openpgp => 9},
77             'sha-2-512' => {rfc9530 => 'sha-512', openpgp => 10},
78             'sha-3-256' => { openpgp => 12},
79             'sha-3-512' => { openpgp => 14, sni => 186},
80             );
81              
82             my %_important_digests = map {$_ => 1} qw(sha-1-160 sha-3-512);
83              
84             our %_mediatypes = ( # Copied from tags-universal
85             'application/gzip' => 'a8bb3d20-e983-5060-8c63-95b35e9ca56a',
86             'application/http' => '282ff2fd-0e1b-5b34-bda7-9c44b6ef7dc6',
87             'application/json' => 'c9e61b78-a0bd-5939-9aaa-8f0d08e5a4dc',
88             'application/ld+json' => '999e546d-8dfe-5961-aa5f-bf5cbd0a7037',
89             'application/octet-stream' => '4076d9f9-ca42-5976-b41b-e54aa912ccf3',
90             'application/ogg' => 'f4a4beee-e0f4-567a-ada4-a15d387a953c',
91             'application/pdf' => '03e6c035-e046-5b7e-a016-55b51c4836ea',
92             'application/postscript' => '85224b06-7548-5319-b635-4b37dc78880d',
93             'application/vnd.sirtx.vmv0' => 'f718f85b-6b41-53c0-9c66-8796df90c725',
94             'application/vnd.debian.binary-package' => '026b4c07-00ab-581d-a493-73e0b9b1cff9',
95             'application/vnd.oasis.opendocument.base' => '319de973-68e2-5a01-af87-6fe4a5b800c6',
96             'application/vnd.oasis.opendocument.chart' => '271d085d-1a51-5795-86f5-e6849166cbf6',
97             'application/vnd.oasis.opendocument.chart-template' => 'e8d5322b-0d40-5e3d-a754-4dd0ee6a4bb9',
98             'application/vnd.oasis.opendocument.formula' => 'e771c71d-f4b8-56a7-b299-1ede808b91d0',
99             'application/vnd.oasis.opendocument.formula-template' => '4b9eb9eb-786d-5831-89e1-edcba46a2bb6',
100             'application/vnd.oasis.opendocument.graphics' => '322c5088-84c9-59aa-a828-ffe183557457',
101             'application/vnd.oasis.opendocument.graphics-template' => '76d3335e-a49e-54ec-bec5-8e3bb46d8412',
102             'application/vnd.oasis.opendocument.image' => '869257aa-b61f-5210-af8a-d9a33c356629',
103             'application/vnd.oasis.opendocument.image-template' => '60d259d0-4d58-59c8-81f7-9725f960d415',
104             'application/vnd.oasis.opendocument.presentation' => '7a4abd3a-89ec-53e9-b29d-64c6e2dcdaf4',
105             'application/vnd.oasis.opendocument.presentation-template' => 'b16ebfdd-1b4f-5713-829b-5b35e7a06839',
106             'application/vnd.oasis.opendocument.spreadsheet' => '975706e1-44c3-55d1-b03a-978954a46f3e',
107             'application/vnd.oasis.opendocument.spreadsheet-template' => '52f3046b-e8e4-5c74-8860-b683f1554ad2',
108             'application/vnd.oasis.opendocument.text' => 'b03df4f0-3f52-5ce0-b3e0-42dd911d244a',
109             'application/vnd.oasis.opendocument.text-master' => '21415b27-ce2a-5b5d-bb98-569ce922c97c',
110             'application/vnd.oasis.opendocument.text-master-template' => '889508ab-6a78-5337-b13a-756a8232baae',
111             'application/vnd.oasis.opendocument.text-template' => '8f0bfe22-f343-5cbb-98c7-d826d0f31e63',
112             'application/vnd.oasis.opendocument.text-web' => '83baa5da-8956-51ff-8ec1-41aee5d5b1eb',
113             'application/xhtml+xml' => 'e553c22e-542b-50d8-9abb-aa36625be67e',
114             'application/xml' => '371b035f-45b7-5ba2-9d3e-811bf3b937bc',
115             'audio/flac' => 'a7ea86ac-4938-5adc-8544-b4908e21c7e4',
116             'audio/matroska' => 'e5eae178-ccf2-5659-b23a-3d0d936be8a2',
117             'audio/ogg' => 'ef171c40-2b55-572a-b66f-3d4ecb8182a5',
118             'image/gif' => 'b5ec5cdd-2811-5e51-8b0e-b07d0bd2b570',
119             'image/jpeg' => 'c1e9e865-4653-5037-97f3-06c0c1c061a5',
120             'image/png' => '7c859f1d-693b-5070-a928-dfd051a4f93d',
121             'image/svg+xml' => '3970f481-591e-530a-b962-a2e87b2efde2',
122             'image/webp' => 'd71ad7ca-abd5-59e5-a360-086aa1f39ad0',
123             'message/http' => '3f59f23e-d5ca-5f6d-a70e-05aa4d952f36',
124             'text/html' => 'ecd556c0-7ecb-5b88-ab0a-ec4e09d61782',
125             'text/plain' => '552ec0dc-8678-5657-9422-8a71ea8e5cd0',
126             'video/matroska' => '6155907c-d116-5d88-8d60-850753015fca',
127             'video/matroska-3d' => '46ce8e26-b8e3-5cf6-a534-9d1d6dbcae72',
128             'video/ogg' => 'f14a9d8d-daf4-52aa-9ff8-e0815a3e5b65',
129             'video/webm' => '0ee63dad-e52f-5c62-9c32-e6b872b828c7',
130             );
131              
132             my %_inverse_mediatypes = map {$_mediatypes{$_} => $_} keys %_mediatypes;
133              
134             my %_ise_re = (
135             uuid => RE_UUID,
136             oid => RE_OID,
137             uri => RE_URI,
138             );
139              
140             my %_known_digest_algos = map {$_ => undef} (
141             values(%_digest_name_converter),
142             qw(md-4-128 ripemd-1-160 tiger-1-192 tiger-2-192),
143             );
144              
145              
146             my %_ise_keys = map {$_ => 1} qw(ise uuid oid uri);
147             my %_data_identifier_keys = map {$_ => 1} keys %_ise_keys;
148              
149             my %_tagpool_relations = (
150             tagpool_file_title => WK_TAGPOOL_TITLE,
151             tagpool_file_comment => WK_TAGPOOL_COMMENT,
152             tagpool_file_description => WK_TAGPOOL_DESCRIPTION,
153             );
154              
155             my @_stable_properties = qw(boring comment contentise description displayname fetchurl finalmode hidden inodeise ise mediatype oid pages readonly system size thumbnail title uri uuid writemode);
156              
157             my %_properties = (
158             uuid => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_uuid tagpool_directory_setting_tag uuid(xattr_utag_ise) uuid(store_inodeise) uuid(db_inode_tag) :self dev_disk_by_uuid tagpool_pool_uuid)], rawtype => 'uuid'},
159             oid => {loader => \&_load_aggregate, sources => [qw(::Inode oid(xattr_utag_ise) oid(store_inodeise) oid(db_inode_tag))], rawtype => 'oid'},
160             uri => {loader => \&_load_aggregate, sources => [qw(::Inode uri(xattr_utag_ise) uri(store_inodeise) uri(db_inode_tag))], rawtype => 'uri'},
161             ise => {loader => \&_load_aggregate, sources => [qw(:self uuid oid uri ::Inode xattr_utag_ise ::Base data_uriid_ise store_inodeise)], rawtype => 'ise'},
162             inodeise => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_uuid tagpool_directory_setting_tag xattr_utag_ise store_inodeise db_inode_tag)], rawtype => 'ise'},
163             contentise => {loader => \&_load_aggregate, sources => [qw(::Inode content_sha_1_160_sha_3_512_uuid content_sha_3_512_uuid store_contentise)], rawtype => 'ise'},
164              
165             size => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_size xattr_utag_final_file_size xattr_utag_final_file_hash_size st_size ::Chunk chunk_outer_size ::Base store_size data_tagdb_size)]},
166             title => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_title tagpool_directory_title xattr_dublincore_title dotcomments_caption ::Deep pdf_info_title odf_info_title audio_scan_title)]},
167             comment => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_comment tagpool_directory_comment xattr_xdg_comment dotcomments_note)]},
168             description => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_description tagpool_directory_description xattr_dublincore_description)]},
169             displayname => {loader => \&_load_aggregate, sources => [qw(:self title link_basename_clean dev_disk_by_label dev_mapper_name dev_name data_uriid_attr_displayname)]},
170             mediatype => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_mediatype xattr_utag_final_file_encoding magic_mediatype ::Base data_uriid_attr_media_subtype store_mediasubtype data_tagdb_encoding)], rawtype => 'mediatype'},
171             writemode => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_write_mode xattr_utag_write_mode ::Base store_writemode)], rawtype => 'ise'},
172             pages => {loader => \&_load_aggregate, sources => [qw(::Deep pdf_pages odf_stats_meta_page_count)]},
173              
174             thumbnail => {loader => \&_load_aggregate, sources => [qw(::Link link_thumbnail ::Inode tagpool_file_thumbnail)], rawtype => 'filename'},
175             finalmode => {loader => \&_load_aggregate, sources => [qw(::Inode tagpool_file_finalmode xattr_utag_final_mode ::Base store_finalmode)], rawtype => 'ise'},
176             hidden => {loader => \&_load_aggregate, sources => [qw(::Link link_dotfile ::Inode ntfs_file_attribute_hidden)], rawtype => 'bool', towards => 1},
177             system => {loader => \&_load_aggregate, sources => [qw(::Inode ntfs_file_attribute_system)], rawtype => 'bool', towards => 1},
178             readonly => {loader => \&_load_readonly, rawtype => 'bool'},
179              
180             fetchurl => {loader => \&_load_aggregate, sources => [qw(:self tagpool_file_original_url xattr_xdg_origin_url zonetransfer_hosturl)]},
181              
182             boring => {loader => \&_load_aggregate, sources => [qw(size_boring ::Link link_basename_boring)], rawtype => 'bool', towards => 1},
183              
184             # TODO: displaycolour icontext charset (hash) (mediatype / encoding)
185              
186             size_boring => {loader => \&_load_size_boring, rawtype => 'bool'},
187              
188             data_tagdb_size => {loader => \&_load_data_tagdb},
189             data_tagdb_displayname => {loader => \&_load_data_tagdb},
190             data_tagdb_fetch_uri => {loader => \&_load_data_tagdb, rawtype => 'uri'},
191             data_tagdb_encoding => {loader => \&_load_data_tagdb, rawtype => 'Data::TagDB::Tag'},
192             data_tagdb_charset => {loader => \&_load_data_tagdb, rawtype => 'Data::TagDB::Tag'},
193              
194             store_size => {loader => \&_load_fstore},
195             store_inode => {loader => \&_load_fstore},
196             store_mediasubtype => {loader => \&_load_fstore, rawtype => 'mediatype'},
197             store_contentise => {loader => \&_load_fstore, rawtype => 'ise'},
198             store_inodeise => {loader => \&_load_fstore, rawtype => 'ise'},
199             store_final => {loader => \&_load_fstore, rawtype => 'bool'},
200             store_finalmode => {loader => \&_load_fstore, rawtype => 'ise'},
201             store_writemode => {loader => \&_load_fstore, rawtype => 'ise'},
202             );
203              
204             my @_digest_preload_properties = qw(xattr_utag_final_file_hash xattr_utag_final_file_size tagpool_file_size);
205              
206             sub _new {
207 4     4   17 my ($pkg, %opts) = @_;
208 4         10 my $self = bless \%opts, $pkg;
209              
210 4 50       32 croak 'No instance is given' unless defined $self->{instance};
211              
212 4   50     12 $self->{properties} //= {}; # empty list.
213              
214 4         11 return $self;
215             }
216              
217              
218             sub get {
219 64     64 1 1292 my ($self, $key, %opts) = @_;
220 64   100     288 my $info = $self->{properties}{$key} // $_properties{$key};
221 64         118 my $rawtype = $info->{rawtype};
222 64   100     148 my $pv = $self->{properties_values} //= {};
223 64         182 my $v;
224             my $res;
225 64         0 my $as;
226 64         0 my $lifecycle;
227              
228 64 50       133 unless (defined $info) {
229 0 0       0 return $opts{default} if exists $opts{default};
230 0         0 croak 'Unknown key '.$key;
231             }
232              
233 64   50     239 $as = $opts{as} //= $info->{default_as} // 'raw';
      66        
234 64   100     150 $lifecycle = $opts{lifecycle} //= 'current';
235              
236 64   100     146 $pv = $pv->{$lifecycle} //= {};
237              
238             # load the value if needed.
239 64 100 100     278 if ((!defined($pv->{$key}) || (ref($pv->{$key}) eq 'HASH' && !scalar(%{$pv->{$key}}))) && defined(my $loader = $info->{loader})) {
      100        
240 34 50       78 $loader = $self->can($loader) unless ref $loader;
241 34         151 $self->$loader($key, %opts, list => undef);
242             }
243              
244 64   100     264 $v = $pv->{$key} //= {};
245              
246 64 100       193 if ($opts{list}) {
    50          
247 4         8 my $default = delete $opts{default};
248              
249 4         7 delete $opts{list};
250              
251 4 50       11 if (ref($v) eq 'ARRAY') {
252 0         0 my @list;
253             my $error;
254              
255 0         0 foreach my $entry (@{$v}) {
  0         0  
256 0         0 local $pv->{$key} = $entry;
257 0         0 my $val = $self->get($key, %opts, default => undef);
258              
259 0 0       0 if (defined $val) {
260 0         0 push(@list, $val);
261             } else {
262 0         0 $error = 1;
263 0         0 last;
264             }
265             }
266              
267 0 0       0 if ($error) {
268 0 0       0 return @{$default} if defined $default;
  0         0  
269 0         0 $v = {}; # handle like an empty entry.
270             } else {
271 0         0 return @list;
272             }
273             } else {
274 4         41 my $val = $self->get($key, %opts, default => undef);
275              
276             #warn sprintf('key=%s, v=%s, val=%s, default=%s', $key, $v, $val // '<undef>', $default // '<undef>');
277 4 50       10 if (defined $val) {
278 0         0 return ($val);
279             } else {
280 4 50       10 return @{$default} if defined $default;
  4         16  
281             #confess 'Doof';
282 0         0 $v = {}; # handle like an empty entry.
283             }
284             }
285             } elsif (ref($v) eq 'ARRAY') {
286 0         0 $v = {}; # handle like an empty entry.
287             }
288              
289             # Update rawtype as needed:
290 60   66     173 $rawtype = $v->{rawtype} // $rawtype;
291              
292             # Try: Check if we have what we want.
293 60         94 $res = $v->{$as};
294              
295             # Try: Check if we can convert (rawtype, raw) to what we want.
296 60 50 100     249 if (!defined($res) && defined($rawtype) && defined(my $raw = $v->{raw})) {
      66        
297 0 0 0     0 if ($rawtype eq $as) {
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
      0        
298 0         0 $res = $raw;
299             } elsif ($rawtype eq 'unixts' && $as eq 'DateTime') {
300 0         0 require DateTime;
301 0         0 $res = DateTime->from_epoch(epoch => $raw, time_zone => 'UTC');
302             } elsif ($rawtype eq 'unixts' && $as eq 'Data::Identifier') {
303 0         0 require Data::Identifier::Generate;
304 0         0 $res = Data::Identifier::Generate->date($raw);
305             } elsif ($rawtype eq 'ise' && defined(my $re = $_ise_re{$as})) {
306 0 0       0 $res = $raw if $raw =~ $re;
307             } elsif ($_data_identifier_keys{$rawtype} && $as eq 'Data::Identifier') {
308 0         0 $res = Data::Identifier->new($rawtype => $raw);
309             } elsif ($_ise_keys{$rawtype} && $as eq 'Data::URIID::Result') {
310 0         0 $res = $self->instance->extractor->lookup(ise => $raw);
311             } elsif ($rawtype eq 'filename' && $as eq 'File::Information::Link') {
312 0         0 $res = $self->instance->for_link($raw);
313             } elsif ($rawtype eq 'filename' && ($as eq 'IO::Handle' || $as eq 'IO::File')) {
314 0         0 require IO::File;
315 0         0 $res = IO::File->new($raw, 'r');
316             } elsif (exists($_ise_re{$rawtype}) && $as eq 'Data::TagDB::Tag' && $self->instance->db) {
317 0         0 $res = eval {$self->instance->db->tag_by_id($rawtype => $raw)};
  0         0  
318             }
319              
320 0 0 0     0 $v->{$as} = $res if defined($res) && $as !~ /^IO::/; # Cache unless it is a file handle.
321             }
322              
323 60 50 66     284 if (!defined($res) && blessed($v->{raw})) {
324 0         0 $res = eval {$v->{raw}->Data::Identifier::as($as, db => $self->db, extractor => $self->extractor)};
  0         0  
325             }
326              
327             # Try: Check if we have a Data::Identifier and want a sid.
328 60 50 66     218 if (!defined($res) && $as eq 'sid' && defined(my $identifier = $v->{'Data::Identifier'})) {
      33        
329 0 0       0 $v->{sid} = $res if defined($res = eval {$identifier->sid});
  0         0  
330             }
331              
332             # Try: Check if we can manage to get hold of a ISE in some way.
333 60 100 66     379 if (!defined($res) && !defined($v->{ise}) && defined($rawtype) && $rawtype eq 'ise') {
      100        
      100        
334 16         34 $v->{ise} = $v->{raw};
335             }
336 60 50 66     199 if (!defined($res) && !defined($v->{ise})) {
337 48         126 foreach my $key (keys %_ise_re) {
338 144 50       355 last if defined($v->{ise} = $v->{$key});
339 144 50 100     460 last if defined($rawtype) && $rawtype eq $key && defined($v->{ise} = $v->{raw});
      66        
340             }
341             }
342 60 50 66     346 if (!defined($res) && !defined($v->{ise}) && ($rawtype // '') eq 'mediatype') {
      100        
      66        
343 0   0     0 $v->{ise} = $_mediatypes{$v->{raw} // ''};
344             }
345 60 50 66     219 if (!defined($res) && !defined($v->{ise})) {
346 48         77 foreach my $source_type (qw(Data::Identifier Data::URIID::Result Data::URIID::Colour Data::TagDB::Tag)) {
347 192 50       434 if (defined(my $obj = $v->{$source_type})) {
348 0 0       0 last if defined($v->{ise} = eval {$obj->ise});
  0         0  
349             }
350 192 50 66     533 if (defined($rawtype) && $rawtype eq $source_type && defined(my $obj = $v->{raw})) {
      33        
351 0 0       0 last if defined($v->{ise} = eval {$obj->ise});
  0         0  
352             }
353             }
354             }
355              
356 60 50       148 delete $v->{ise} unless defined $v->{ise}; # TODO: Figure out where it is set to undef
357              
358             # Try: Check if we have an ISE and can convert that to what we want.
359 60 50 66     196 if (!defined($res) && defined(my $ise = $v->{ise})) {
360 0 0 0     0 if ($as eq 'Data::Identifier') {
    0          
    0          
    0          
    0          
    0          
361 0         0 $res = Data::Identifier->new(ise => $ise);
362             } elsif ($as eq 'Data::URIID::Result') {
363 0         0 $res = $self->instance->extractor->lookup(ise => $ise);
364             } elsif ($as eq 'Data::TagDB::Tag' && $self->instance->db) {
365 0         0 foreach my $type (keys %_ise_re) {
366 0 0       0 if ($ise =~ $_ise_re{$type}) {
367 0 0       0 $res = eval {$self->instance->db->tag_by_id($type => $ise)} and last;
  0         0  
368             }
369             }
370             } elsif (defined(my $re = $_ise_re{$as})) {
371 0 0       0 $res = $ise if $ise =~ $re;
372             } elsif ($as eq 'sid') {
373 0         0 my $identifier = $v->{'Data::Identifier'} = Data::Identifier->new(ise => $ise);
374 0         0 $res = eval {$identifier->sid};
  0         0  
375             } elsif ($as eq 'mediatype') {
376 0         0 $res = $_inverse_mediatypes{$ise};
377             }
378              
379 0 0 0     0 $v->{$as} = $res if defined($res) && $as !~ /^IO::/; # Cache unless it is a file handle.
380             }
381              
382 60 100       159 return $res if defined $res;
383              
384 48 50       334 return $opts{default} if exists $opts{default};
385              
386 0         0 confess 'Cannot get value for key '.$key;
387             }
388              
389              
390             sub property_info {
391 0     0 1 0 my ($self, @algos) = @_;
392 0         0 my @ret;
393              
394 0 0       0 unless ($self->{property_info}) {
395 0         0 my %properties = map {$_ => {
396             name => $_,
397 0         0 }} keys %{$self->{properties}}, keys %_properties;
  0         0  
398              
399 0         0 $properties{$_}{stable} = 1 foreach @_stable_properties;
400              
401 0         0 $self->{property_info} = \%properties;
402             }
403              
404 0 0       0 @algos = keys %{$self->{property_info}} unless scalar @algos;
  0         0  
405              
406 0 0 0     0 croak 'Request for more than one property in scalar context' if !wantarray && scalar(@algos) != 1;
407              
408             @ret = map{
409 0 0       0 $self->{property_info}{$_} ||
  0         0  
410             croak 'Unknown property: '.$_
411             } @algos;
412              
413 0 0       0 if (wantarray) {
414 0         0 return @ret;
415             } else {
416 0         0 return $ret[0];
417             }
418             }
419              
420              
421             sub digest {
422 0     0 1 0 my ($self, $key, %opts) = @_;
423 0   0     0 my $as = $opts{as} // 'hex';
424 0   0     0 my $lifecycle = $opts{lifecycle} //= 'current';
425 0         0 my $value;
426              
427             # convert L<Digest> name into utag name if needed:
428 0   0     0 $key = $_digest_name_converter{fc($key)} // $key;
429              
430             # Check utag name:
431 0 0       0 if ($key !~ /^[a-z]+-[0-9]+-[1-9][0-9]*$/) {
432 0         0 croak sprintf('Unknown digest format "%s"', $key);
433             }
434              
435 0         0 foreach my $property (@_digest_preload_properties) {
436 0         0 eval {$self->get($property, lifecycle => $lifecycle)};
  0         0  
437             }
438              
439 0         0 $value = $self->{digest}{$lifecycle}{$key};
440              
441 0 0 0     0 if (!defined($value) && $lifecycle eq 'current' && $self->isa('File::Information::Inode') && !$opts{no_defaults}) {
      0        
      0        
442 0         0 my $size = $self->get('size', default => undef);
443 0         0 my $limit = $self->instance->{digest_sizelimit};
444              
445 0 0 0     0 if (defined($size) && ($limit == -1 || $size <= $limit)) {
      0        
446 0         0 my $digest;
447              
448 0         0 eval {
449 0 0       0 if ($key eq 'md-5-128') {
    0          
    0          
    0          
    0          
450 0         0 require Digest;
451 0         0 $digest = Digest->new('MD5');
452             } elsif ($key eq 'sha-1-160') {
453 0         0 require Digest;
454 0         0 $digest = Digest->new('SHA-1');
455             } elsif ($key eq 'ripemd-1-160') {
456 0         0 require Digest;
457 0         0 $digest = Digest->new('RIPEMD-160');
458             } elsif ($key =~ /^sha-2-(224|256|384|512)$/) {
459 0         0 require Digest;
460 0         0 $digest = Digest->new('SHA-'.$1);
461             } elsif ($key =~ /^sha-3-(224|256|384|512)$/) {
462 0         0 require Digest::SHA3;
463 0         0 $digest = Digest::SHA3->new($1);
464             }
465             };
466              
467 0 0       0 if (defined $digest) {
468 0         0 eval {
469 0         0 my $fh = $self->_get_fh;
470              
471 0 0       0 if (defined $fh) {
472 0         0 $digest->addfile($fh);
473 0         0 $self->{digest}{$lifecycle}{$key} = $value = $digest->hexdigest;
474             }
475             };
476             }
477             }
478             }
479              
480 0 0       0 unless (defined $value) {
481 0 0       0 return $opts{default} if exists $opts{default};
482 0         0 croak 'No such value for digest '.$key;
483             }
484              
485 0 0 0     0 if ($as eq 'hex') {
    0          
    0          
    0          
    0          
    0          
486 0         0 return $value;
487             } elsif ($as eq 'binary') {
488 0         0 return pack('H*', $value);
489             } elsif ($as eq 'base64' || $as eq 'b64') {
490 0         0 require MIME::Base64;
491 0         0 return MIME::Base64::encode(pack('H*', $value), '') =~ s/=+$//r;
492             } elsif ($as eq 'base64_padded') {
493 0         0 require MIME::Base64;
494 0         0 return MIME::Base64::encode(pack('H*', $value), '');
495             } elsif ($as eq 'utag') {
496 0 0       0 if (defined(my $size = $self->get('size', lifecycle => $lifecycle, default => undef))) {
497 0 0       0 if ($size > 0) {
498 0         0 return sprintf('v0 %s bytes 0-%u/%u %s', $key, $size - 1, $size, $value);
499             } else {
500 0         0 return sprintf('v0 %s bytes 0-/%u %s', $key, $size, $value);
501             }
502             }
503              
504 0         0 return sprintf('v0 %s bytes 0-/* %s', $key, $value);
505             } elsif ($as eq 'Digest') {
506 0         0 require Data::URIID::Digest;
507 0         0 return Data::URIID::Digest->_new($value); # Not public API but developed by same developers as this module.
508             # DO NOT USE THIS IN YOUR CODE!
509             }
510              
511 0         0 croak sprintf('Cannot convert from type "%s" to "%s" for digest "%s"', 'hex', $as, $key);
512             }
513              
514              
515              
516             #@returns File::Information::VerifyBase
517             sub verify {
518 0     0 1 0 my ($self, %opts) = @_;
519 0         0 require File::Information::VerifyResult;
520 0         0 return File::Information::VerifyResult->_new(base => $self, instance => $self->instance, %opts{'lifecycle_from', 'lifecycle_to', 'base_from', 'base_to'});
521             }
522              
523              
524             sub link_stream {
525 0     0 1 0 my ($self, $cb, %opts) = @_;
526 0         0 my File::Information $instance = $self->instance;
527 0         0 my $extractor = eval {$self->extractor};
  0         0  
528 0 0 0     0 my @subobjects = map {lc s/^File::Information:://r} ref($opts{subobjects}) eq 'ARRAY' ? @{$opts{subobjects}} : ($opts{subobjects} // ref($self));
  0         0  
  0         0  
529 0 0       0 my $inode = $self->isa('File::Information::Inode') ? $self : eval {$self->inode};
  0         0  
530 0         0 my @lifecycle;
531             my %contentise;
532 0         0 my @cleanup = (
533             WK_NEEDSTAGGING,
534             WK_FINAL,
535             WK_WM_RANDOM_ACCESS, WK_WM_READ_ONLY, WK_WM_NONE,
536             );
537              
538 0 0       0 if (ref($opts{lifecycle}) eq 'ARRAY') {
    0          
539 0         0 @lifecycle = @{$opts{lifecycle}};
  0         0  
540             } elsif (defined $opts{lifecycle}) {
541 0         0 @lifecycle = ($opts{lifecycle});
542             } else {
543 0         0 @lifecycle = $instance->lifecycles;
544             }
545              
546 0         0 foreach my $subobject (@subobjects) {
547 0 0       0 my $base = $self->isa('File::Information::'.ucfirst($subobject)) ? $self : undef;
548              
549 0         0 foreach my $lifecycle (@lifecycle) {
550 0 0       0 if ($subobject eq 'inode') {
    0          
    0          
551 0   0     0 $base //= $self->inode;
552              
553 0 0       0 if (defined(my $contentise = $base->get('contentise', default => undef, as => 'Data::Identifier', lifecycle => $lifecycle))) {
554 0         0 $contentise{$lifecycle} = $contentise;
555              
556 0         0 $cb->('tag-ise', [$contentise]);
557 0         0 $cb->('tag-relation', {tag => $contentise, relation => _to_di(WK_ALSO_HAS_ROLE), related => _to_di(WK_SPECIFIC_PROTO_FILE_STATE)});
558              
559 0 0       0 if (defined(my $mediatype = $base->get('mediatype', default => undef, as => 'Data::Identifier', lifecycle => $lifecycle))) {
560 0         0 $cb->('tag-relation', {tag => $contentise, relation => _to_di(WK_FINAL_FILE_ENCODING), related => $mediatype});
561 0         0 push(@cleanup, $mediatype);
562             }
563              
564 0 0       0 if (defined(my $size = $base->get('size', default => undef, lifecycle => $lifecycle))) {
565 0         0 $cb->('tag-metadata', {tag => $contentise, relation => _to_di(WK_FINAL_FILE_SIZE), data => $size});
566             }
567              
568 0         0 foreach my $algo (map {$_->{name}} $instance->digest_info) {
  0         0  
569 0   0     0 my $hash = $base->digest($algo, as => 'utag', default => undef, lifecycle => $lifecycle, no_defaults => !$_important_digests{$algo}) // next;
570 0         0 $cb->('tag-metadata', {tag => $contentise, relation => _to_di(WK_FINAL_FILE_HASH), data => $hash});
571             }
572              
573 0 0       0 if (defined(my $size = $base->get('fetchurl', default => undef, lifecycle => $lifecycle))) {
574 0         0 $cb->('tag-metadata', {tag => $contentise, relation => _to_di(WK_FETCH_FILE_URI), data => $size});
575             }
576              
577             }
578              
579 0 0       0 if (defined(my $tagpoolise = $base->get('tagpool_file_uuid', as => 'Data::Identifier', default => undef, lifecycle => $lifecycle))) {
580 0         0 my %tags = map {$_->ise => $_} $base->get('tagpool_file_tags', as => 'Data::Identifier', list => 1, default => [], lifecycle => $lifecycle);
  0         0  
581 0         0 my %states;
582              
583 0         0 $cb->('tag-ise', [$tagpoolise]);
584              
585 0         0 $cb->('tag-relation', {tag => $tagpoolise, relation => _to_di(WK_ALSO_HAS_ROLE), related => _to_di(WK_PROTO_FILE)});
586 0         0 $cb->('tag-relation', {tag => $tagpoolise, relation => _to_di(WK_ALSO_HAS_ROLE), related => _to_di(WK_TAGPOOL_FILE)});
587              
588 0         0 foreach my $l ('final', @lifecycle) {
589 0   0     0 my $contentise = $contentise{$l} // next;
590 0 0       0 my $relation = $l eq 'final' ? WK_HAS_FINAL_STATE : WK_ALSO_HAS_STATE;
591              
592             # skip duplicates.
593 0 0       0 next if $states{$contentise->ise};
594 0         0 $states{$contentise->ise} = 1;
595              
596 0         0 $cb->('tag-relation', {tag => $tagpoolise, relation => _to_di($relation), related => $contentise});
597             }
598              
599 0         0 foreach my $key (keys %_tagpool_relations) {
600 0 0       0 if (defined(my $value = $base->get($key, default => undef, lifecycle => $lifecycle))) {
601 0         0 $cb->('tag-metadata', {tag => $tagpoolise, relation => _to_di($_tagpool_relations{$key}), data => $value});
602             }
603             }
604              
605 0 0       0 if (defined(my $description_url = $base->get('tagpool_file_original_description_url', default => undef, lifecycle => $lifecycle))) {
606 0 0       0 if (defined(my $result = eval { $extractor->lookup(qrcode => $description_url)->as('Data::Identifier') })) {
  0         0  
607 0         0 $cb->('tag-relation', {tag => $tagpoolise, relation => _to_di(WK_SEE_ALSO), related => $result});
608             }
609             }
610              
611             # Clean up tags:
612 0         0 foreach my $tag (@cleanup) {
613 0 0       0 if (ref $tag) {
614 0         0 delete $tags{$tag->ise};
615             } else {
616 0         0 delete $tags{$tag};
617             }
618             }
619              
620 0         0 foreach my $related (values %tags) {
621 0         0 $cb->('tag-relation', {tag => $tagpoolise, relation => _to_di(WK_TAGPOOL_TAGGED_AS), related => $related});
622             }
623             }
624             } elsif ($subobject eq 'filesystem') {
625 0   0     0 $base //= $self->filesystem;
626              
627 0 0       0 if (defined(my $ise = $base->get('ise', default => undef, as => 'Data::Identifier', lifecycle => $lifecycle))) {
628 0         0 $cb->('tag-ise', [$ise]);
629 0         0 $cb->('tag-relation', {tag => $ise, relation => _to_di(WK_ALSO_HAS_ROLE), related => _to_di(WK_FILESYSTEM)});
630              
631 0 0       0 if (defined(my $label = $base->get('dev_disk_by_label', default => undef, lifecycle => $lifecycle))) {
632 0         0 $cb->('tag-metadata', {tag => $ise, relation => _to_di(WK_ALSO_SHARES_IDENTIFIER), type => _to_di(WK_TAGNAME), data => $label});
633             }
634             }
635             } elsif ($subobject eq 'tagpool') {
636 0 0       0 foreach my $tagpool ($base ? ($base) : $self->tagpool) {
637 0 0       0 if (defined(my $ise = $tagpool->get('ise', default => undef, as => 'Data::Identifier', lifecycle => $lifecycle))) {
638 0         0 $cb->('tag-ise', [$ise]);
639 0         0 $cb->('tag-relation', {tag => $ise, relation => _to_di(WK_ALSO_HAS_ROLE), related => _to_di(WK_TAGPOOL_POOL)});
640             }
641             }
642             } else {
643 0 0       0 if (defined(my $ise = $base->get('ise', default => undef, as => 'Data::Identifier', lifecycle => $lifecycle))) {
644 0         0 $cb->('tag-ise', [$ise]);
645             }
646             }
647             }
648             }
649             }
650              
651              
652 0     0 1 0 sub uuid { return $_[0]->get('uuid', @_[1..$#_]); }
653 0     0 1 0 sub oid { return $_[0]->get('oid', @_[1..$#_]); }
654 0     0 1 0 sub uri { return $_[0]->get('uri', @_[1..$#_]); }
655 0     0 1 0 sub ise { return $_[0]->get('ise', @_[1..$#_]); }
656 0     0 1 0 sub displayname { return $_[0]->get('displayname', @_[1..$#_]); }
657 0     0 1 0 sub displaycolour { return $_[0]->get('displaycolour', @_[1..$#_]); }
658 0     0 1 0 sub icontext { return $_[0]->get('icontext', @_[1..$#_]); }
659 0     0 1 0 sub description { return $_[0]->get('description', @_[1..$#_]); }
660              
661              
662              
663             #@returns File::Information
664             sub instance {
665 10     10 1 18 my ($self) = @_;
666 10         31 return $self->{instance};
667             }
668              
669              
670             sub store {
671 0     0 1 0 my ($self, %opts) = @_;
672 0         0 my $as = delete $opts{as};
673 0         0 my %stores; # simple deduplication.
674              
675 0 0 0     0 croak 'Invalid as parameter' unless ($as // '') eq 'File::FStore';
676 0 0       0 croak 'Stray options passed' if scalar keys %opts;
677              
678 0         0 foreach my $store ($self->get('store_file', list => 1, default => [])) {
679 0         0 $stores{$store} = $store;
680             }
681              
682 0         0 return values %stores;
683             }
684              
685              
686             #@returns Data::URIID
687             sub extractor {
688 0     0 1 0 my ($self, @args) = @_;
689 0   0     0 return $self->{extractor} //= $self->instance->extractor(@args);
690             }
691              
692             #@returns Data::TagDB
693             sub db {
694 0     0 1 0 my ($self, @args) = @_;
695 0   0     0 return $self->{db} //= $self->instance->db(@args);
696             }
697              
698             sub digest_info {
699 0     0 0 0 my ($self, @args) = @_;
700 0         0 return $self->instance->digest_info(@args);
701             }
702              
703             #@returns File::Information::Editor
704             sub editor {
705 0     0 0 0 my ($self) = @_;
706 0         0 require File::Information::Editor;
707 0         0 return File::Information::Editor->_new(parent => $self);
708             }
709              
710             # ----------------
711             sub _to_di {
712 0     0   0 my ($uuid) = @_;
713 0         0 state $cache = {};
714              
715 0   0     0 return $cache->{$uuid} //= Data::Identifier->new(uuid => $uuid);
716             }
717              
718             sub _set_digest_utag {
719 0     0   0 my ($self, $lifecycle, $v, $given_size) = @_;
720 0         0 my %digest;
721              
722 0         0 eval {
723 0         0 while ($v =~ s/^(v0m?) ([a-z]+)-([0-9]+)-([0-9]+) bytes 0-([0-9]+)\/([0-9]+) ([0-9a-f]+)( |$)//) {
724 0         0 my ($type, $name, $version, $bits, $end, $size, $hash, $mark) = ($1, $2, $3, $4, $5, $6, $7, $8);
725 0 0       0 next if $end != ($size - 1);
726 0 0       0 die if (length($hash) * 4) != $bits;
727 0 0 0     0 die if $type eq 'v0' && length($mark);
728              
729 0   0     0 $given_size //= $size;
730 0 0       0 die unless $given_size == $size;
731 0         0 $digest{join('-', $name, $version, $bits)} = $hash;
732             }
733             };
734              
735 0   0     0 $self->{digest} //= {};
736              
737             {
738 0   0     0 my $digests = $self->{digest}{$lifecycle} //= {};
  0         0  
739 0         0 foreach my $algo (keys %digest) {
740 0   0     0 $digests->{$algo} //= $digest{$algo};
741             }
742             }
743              
744 0         0 return $given_size;
745             }
746              
747             sub _load_aggregate {
748 8     8   27 my ($self, $key, %opts) = @_;
749 8   50     32 my $pv = ($self->{properties_values} //= {})->{$opts{lifecycle}} //= {};
      50        
750 8         13 my $info = $_properties{$key};
751 8         11 my $current = $self;
752              
753 8 50 33     34 return unless defined($info) && defined($info->{sources});
754              
755 8         13 foreach my $source (@{$info->{sources}}) {
  8         21  
756 40 50       177 if ($source eq ':self') {
    100          
    50          
    50          
757 0         0 $current = $self;
758             } elsif ($source =~ /^::/) {
759 16         30 $current = undef;
760              
761 16 100       80 if ($self->isa('File::Information'.$source)) {
    50          
    0          
    0          
762 12         16 $current = $self;
763             } elsif ($source eq '::Inode') {
764 4         9 $current = eval{$self->inode};
  4         13  
765             } elsif ($source eq '::Filesystem') {
766 0         0 $current = eval{$self->filesystem};
  0         0  
767             } elsif ($source eq '::Deep') {
768 0         0 $current = eval{$self->deep(no_defaults => 1, default => undef)};
  0         0  
769             }
770             } elsif (!defined $current) {
771 0         0 next;
772             } elsif ($source =~ /^([a-z]+)\((.+)\)$/) {
773 0         0 my $func = $1;
774 0   0     0 my $re = $_ise_re{$func} // croak 'BUG';
775 0         0 my $value = $current->get($2, %opts, default => undef, as => 'raw');
776              
777 0 0 0     0 if (defined($value) && eval {$value->isa('Data::TagDB::Tag')}) {
  0         0  
778 0 0       0 if (defined(my $c = $value->can($func))) {
779 0         0 $value = $value->$c(no_defaults => 1, default => undef);
780             }
781             }
782              
783 0 0 0     0 if (defined($value) && !ref($value) && $value =~ $re) {
      0        
784 0 0 0     0 if (defined($pv->{$key}) && defined($info->{towards})) {
785 0 0 0     0 next if ($value xor $info->{towards});
786             }
787              
788 0         0 $pv->{$key} = {raw => $value};
789             }
790             } else {
791 24         34 my $value_ref;
792              
793             #warn sprintf('%s <- %s %s %s', $key, $current, $source, $opts{lifecycle});
794 24 50       88 next unless defined $current->get($source, %opts, default => undef, as => 'raw');
795              
796 0         0 $value_ref = eval {$current->{properties_values}{$opts{lifecycle}}{$source}};
  0         0  
797              
798 0 0 0     0 if (defined($value_ref) && defined($pv->{$key}) && defined($info->{towards})) {
      0        
799 0 0 0     0 next if ($value_ref->{raw} xor $info->{towards});
800             }
801              
802 0         0 $pv->{$key} = $value_ref;
803             }
804              
805 16 0 33     71 last if !defined($info->{towards}) && defined($pv->{$key}) && scalar(keys %{$pv->{$key}});
  0   50     0  
806             }
807             }
808              
809             sub _load_readonly {
810 4     4   18 my ($self, $key, %opts) = @_;
811 4   50     33 my $pv = ($self->{properties_values} //= {})->{$opts{lifecycle}} //= {};
      50        
812 4         8 my $info = $_properties{$key};
813 4 100       7 my $inode = eval {$self->isa('File::Information::Inode') ? $self : $self->inode};
  4         37  
814 4         9 my $v = $opts{lifecycle} eq 'final';
815              
816 4 50       10 return unless defined($info);
817              
818 4 50       10 if (defined $inode) {
819 4   33     35 $v ||= $inode->get('stat_readonly', %opts, default => undef, as => 'raw');
820 4   33     21 $v ||= $inode->get('ntfs_file_attribute_readonly', %opts, default => undef, as => 'raw');
821             }
822              
823 4   33     25 $v ||= $self->get('writemode', %opts, default => '', as => 'uuid') eq WK_WM_NONE;
824 4   33     22 $v ||= $self->get('finalmode', %opts, default => '', as => 'uuid') eq WK_FINAL;
825              
826             # Mount options:
827 4 50       9 unless ($v) {
828 4         9 foreach my $option (qw(linux_mount_options linux_superblock_options)) {
829 8   50     24 my $value = $self->get($option, %opts, default => undef, as => 'raw') // next;
830 0   0     0 $v ||= $value =~ /^ro(?:,.+)?$/ || $value =~ /,ro$/;
      0        
831             }
832             }
833              
834 4         21 $pv->{$key} = {raw => $v};
835             }
836              
837             sub _load_size_boring {
838 0     0   0 my ($self, $key, %opts) = @_;
839 0   0     0 my $pv = ($self->{properties_values} //= {})->{$opts{lifecycle}} //= {};
      0        
840 0         0 my $size;
841              
842 0         0 $size = $self->get('size', %opts, default => undef, as => 'raw');
843 0 0       0 return unless defined $size;
844 0         0 $pv->{size_boring} = {raw => $size < $self->instance->{boring_sizelimit}};
845             }
846              
847             sub _load_data_tagdb {
848 0     0   0 my ($self) = @_;
849 0   0     0 my $pv_current = ($self->{properties_values} //= {})->{current} //= {};
      0        
850 0   0     0 my $pv_final = ($self->{properties_values} //= {})->{final} //= {};
      0        
851              
852 0 0       0 return if $self->{_loaded_data_tagdb};
853 0         0 $self->{_loaded_data_tagdb} = 1;
854              
855 0 0       0 if (defined(my $db = eval {$self->db})) {
  0         0  
856 0         0 my $wk = $db->wk;
857 0         0 my %tags;
858 0         0 my %final_metadata = (
859             data_tagdb_size => $wk->final_file_size,
860             data_tagdb_fetch_uri => $db->tag_by_id(uuid => '96674c6c-cf5e-40cd-af1e-63b86e741f4f'),
861             );
862 0         0 my %final_relation = (
863             data_tagdb_encoding => $wk->final_file_encoding,
864             data_tagdb_charset => $db->tag_by_id(uuid => '99437f71-f1b5-4a50-8ecf-882b61b86b1e'),
865             );
866              
867 0         0 foreach my $key (qw(data_uriid_result contentise inodeise ise)) {
868 0   0     0 my $tag = $self->get($key, as => 'Data::TagDB::Tag', default => undef) // next;
869 0         0 $tags{$tag->dbid} = $tag;
870             }
871              
872 0         0 foreach my $tag (values %tags) {
873 0         0 my $given_size;
874              
875 0 0       0 if (defined(my $displayname = $tag->displayname(default => undef, no_defaults => 1))) {
876 0   0     0 my $l = $pv_current->{data_tagdb_displayname} //= [];
877 0         0 push(@{$l}, {raw => $displayname});
  0         0  
878             }
879              
880 0         0 foreach my $key (keys %final_metadata) {
881 0   0     0 my $relation = $final_metadata{$key} // next;
882             $db->metadata(tag => $tag, relation => $relation)->foreach(sub {
883 0     0   0 my ($entry) = @_;
884 0   0     0 my $l = $pv_final->{$key} //= [];
885 0         0 push(@{$l}, {raw => $entry->data_raw});
  0         0  
886 0         0 });
887             }
888              
889 0         0 foreach my $key (keys %final_relation) {
890 0   0     0 my $relation = $final_relation{$key} // next;
891             $db->relation(tag => $tag, relation => $relation)->foreach(sub {
892 0     0   0 my ($entry) = @_;
893 0   0     0 my $l = $pv_final->{$key} //= [];
894 0         0 push(@{$l}, {raw => $entry->related, rawtype => 'Data::TagDB::Tag'});
  0         0  
895 0         0 });
896             }
897              
898 0 0       0 if (defined(my $relation = $wk->final_file_hash)) {
899             $db->metadata(tag => $tag, relation => $relation)->foreach(sub {
900 0     0   0 my ($entry) = @_;
901 0         0 $given_size = $self->_set_digest_utag(final => $entry->data_raw, $given_size);
902 0         0 });
903             }
904              
905 0 0 0     0 push(@{$pv_final->{data_tagdb_size} //= []}, {raw => $given_size}) if defined $given_size;
  0         0  
906              
907             {
908 0         0 my @size_values = map {int($_->{raw})} @{$pv_final->{data_tagdb_size}};
  0         0  
  0         0  
  0         0  
909 0 0       0 if (scalar @size_values) {
910 0   0     0 $given_size //= $size_values[0];
911 0         0 foreach my $size (@size_values) {
912 0 0       0 if ($given_size != $size) {
913 0         0 $given_size = undef;
914 0         0 last;
915             }
916             }
917 0 0       0 $pv_final->{data_tagdb_size} = defined($given_size) ? {raw => $given_size} : undef;
918             }
919             }
920              
921 0         0 foreach my $key (keys(%final_metadata), keys(%final_relation)) {
922 0 0 0     0 if (ref($pv_final->{$key}) eq 'ARRAY' && scalar(@{$pv_final->{$key}}) == 1) {
  0         0  
923 0         0 $pv_final->{$key} = $pv_final->{$key}->[0];
924             }
925             }
926             }
927             }
928             }
929              
930             sub _load_fstore {
931 8     8   28 my ($self, $key, %opts) = @_;
932              
933 8 100       24 return if $self->{_loaded_fstore_base};
934 4         11 $self->{_loaded_fstore_base} = 1;
935              
936             {
937 4   50     5 my $pv_current = ($self->{properties_values} //= {})->{current} //= {};
  4   50     28  
938 4   50     22 my $pv_final = ($self->{properties_values} //= {})->{final} //= {};
      50        
939 4   50     20 my $digest_final = $self->{digest}{final} //= {};
940              
941 4         24 foreach my $candidate ($self->get('store_file', list => 1, default => [])) {
942 0         0 my $properties = $candidate->get('properties');
943 0         0 my $digests = $candidate->get('digests');
944              
945 0 0 0     0 push(@{$pv_final->{store_size} //=[]}, {raw => $properties->{size}}) if defined $properties->{size};
  0         0  
946 0 0 0     0 push(@{$pv_final->{store_inode} //=[]}, {raw => $properties->{inode}}) if defined $properties->{inode};
  0         0  
947 0 0 0     0 push(@{$pv_final->{store_mediasubtype} //=[]}, {raw => $properties->{mediasubtype}}) if defined $properties->{mediasubtype};
  0         0  
948 0 0 0     0 push(@{$pv_final->{store_contentise} //=[]}, {raw => $properties->{contentise}}) if defined $properties->{contentise};
  0         0  
949 0 0 0     0 push(@{$pv_final->{store_inodeise} //=[]}, {raw => $properties->{inodeise}}) if defined $properties->{inodeise};
  0         0  
950              
951 0   0     0 push(@{$pv_final->{store_final} //=[]}, {raw => 1});
  0         0  
952              
953 0         0 foreach my $algo (keys %{$digests}) {
  0         0  
954 0         0 $digest_final->{$algo} = $digests->{$algo};
955             }
956             }
957              
958 4         11 foreach my $key (qw(store_size store_inode store_mediasubtype store_contentise store_inodeise store_final store_finalmode store_writemode)) {
959 32 50       80 if (defined(my $v = $pv_final->{$key})) {
960 0 0       0 if (scalar(@{$v}) > 1) {
  0         0  
961 0         0 my %values = map {$_->{raw} => $_->{raw}} @{$v};
  0         0  
  0         0  
962 0         0 $v = [map {{raw => $_}} values %values];
  0         0  
963             }
964              
965 0 0       0 if (scalar(@{$v}) == 1) {
  0         0  
966 0         0 $v = $v->[0];
967 0         0 $pv_final->{$key} = $v;
968             }
969             }
970             }
971              
972 4 0 33     10 if (defined($pv_final->{store_final}) && $pv_final->{store_final}{raw}) {
973 0         0 $pv_final->{store_finalmode} = {raw => WK_FINAL};
974 0         0 $pv_final->{store_writemode} = {raw => WK_WM_NONE};
975             }
976              
977 4         8 foreach my $key (qw(store_final store_finalmode store_writemode)) {
978 12         45 $pv_current->{$key} = $pv_final->{$key};
979             }
980             }
981             }
982              
983             # --- Overrides for Data::Identifier::Interface::Known ---
984              
985              
986             sub _known_provider {
987 0     0     my ($pkg, $class, %opts) = @_;
988              
989 0 0         croak 'Unsupported options passed' if scalar(keys %opts);
990              
991 0 0         if ($class eq 'properties_name') {
992 0           return ([map {$_->{name}} $pkg->property_info], not_identifiers => 1);
  0            
993             }
994              
995 0 0         if (ref $pkg) {
996 0           return $pkg->instance->_known_provider($class, %opts);
997             } else {
998 0           return File::Information->_known_provider($class, %opts);
999             }
1000             }
1001              
1002              
1003             # --- Overrides for Data::Identifier::Interface::Simple ---
1004              
1005             sub as {
1006 0     0 1   my ($self, $as, %opts) = @_;
1007              
1008 0 0 0       if ($as eq 'Data::Identifier' && !scalar keys %opts) {
1009 0           return Data::Identifier->new(ise => $self->ise);
1010             } else {
1011 0           return Data::Identifier->new(ise => $self->ise)->as($as, %opts);
1012             }
1013             }
1014              
1015             1;
1016              
1017             __END__
1018              
1019             =pod
1020              
1021             =encoding UTF-8
1022              
1023             =head1 NAME
1024              
1025             File::Information::Base - generic module for extracting information from filesystems
1026              
1027             =head1 VERSION
1028              
1029             version v0.16
1030              
1031             =head1 SYNOPSIS
1032              
1033             use File::Information;
1034              
1035             B<Note:> This package inherits from L<Data::Identifier::Interface::Known>, and L<Data::Identifier::Interface::Simple> (experimental since v0.16).
1036              
1037             This is the base package for L<File::Information::Link>, L<File::Information::Inode>, and L<File::Information::Filesystem>.
1038             Common methods are documented in this file. Details (such as supported keys) are documented in the respective modules.
1039              
1040             =head1 METHODS
1041              
1042             =head2 get
1043              
1044             my $value = $obj->get($key [, %opts]);
1045             # or:
1046             my @value = $obj->get($key [, %opts], list => 1);
1047              
1048             Get a value for a given key. The keys supported by this function depend on the module.
1049             Below you find a list with keys for aggregated values. Aggregated values are virtual and
1050             may be from different sources.
1051             If a key is not supported and no C<default> option is given, the method will die.
1052              
1053             The following optional options are supported:
1054              
1055             =over
1056              
1057             =item C<as>
1058              
1059             The type to get the value in. This is the name of a perl package or special value (in all lower case).
1060             The packages supported depend on the type of data to be returned.
1061             Currently the following special values are supported: C<sid>, C<uuid>, C<oid>, C<uri>, C<ise> (one of UUID, OID, or URI), C<mediatype>, C<raw> (a raw value).
1062             The following packages are supported (they need to be installed):
1063             L<Data::Identifier>,
1064             L<DateTime>,
1065             L<Data::URIID::Result>,
1066             L<Data::TagDB::Tag>,
1067             L<IO::Handle>,
1068             L<File::Information::Link>.
1069              
1070             =item C<default>
1071              
1072             The value to be returned when no actual value could be read. This can also be C<undef> which switches
1073             from C<die>-ing when no value is available to returning C<undef>.
1074              
1075             =item C<lifecycle>
1076              
1077             The lifecycle to get the value for. The default is C<current>.
1078             See also L<File::Information/lifecycles>.
1079              
1080             =item C<list>
1081              
1082             B<Note:>
1083             This is B<experimental>. Exact semantics of this mode may change or it
1084             may be removed completly in later versions.
1085              
1086             Enables list support. In list mode this method returns an array of values.
1087             This can be used to access keys that hold multiple values.
1088              
1089             To access keys that hold multiple values his mode must be used.
1090             Currently the list mode can also be used on keys that only hole one value.
1091             Then a list of on element is returned.
1092              
1093             When this mode is used, C<default> must be an array reference.
1094             Most commonly it will be C<[]>.
1095              
1096             =back
1097              
1098             The following keys for B<aggregated values> are supported:
1099              
1100             =over
1101              
1102             =item C<boring>
1103              
1104             (since v0.08)
1105              
1106             Whether the file is boring or not.
1107             A boring file is a file a user normally don't want to interact with.
1108             Such files include very small files, generated files (such as thumbnails, or object files)
1109             as well as lock files, temporary files and similar.
1110              
1111             The boring attribute can be used as a hint for displaying, as well as indexing data.
1112             The attribute may be used with caution in context of backups as even boring files might be important after restore.
1113             This attribute does not reflect importantce but how likely the user wants to directly interact with the file.
1114              
1115             =item C<comment>
1116              
1117             A comment on the document (if any).
1118              
1119             =item C<contentise>
1120              
1121             An ISE of the document that refers to it's content (so all bit-perfect copies share this ISE).
1122              
1123             =item C<description>
1124              
1125             A description of the document (if any).
1126              
1127             =item C<displayname>
1128              
1129             A string that is suitable for display and likely meaningful to users.
1130              
1131             =item C<fetchurl>
1132              
1133             An URL the file can be fetched from. This is very often a HTTPS URL. But it may be any valid URL.
1134              
1135             =item C<finalmode>
1136              
1137             The final mode of the document. Normally this is unset,
1138             auto-final (meaning the document should become final once successfully verifies it's final state),
1139             or final (it reached it's final state).
1140              
1141             =item C<hidden>
1142              
1143             Hidden files are generally not shown to the user (unless specifically requested).
1144             This is most common for system files or files that hold state information
1145             (like temporary files).
1146              
1147             The exact meaning depend on the operating system, the filesystem, and the softwar interacting
1148             with them. On most UNIX like systems files with a filename starting with a dot are considered
1149             hidden (sometimes also called dot-files). On FAT lineage filesystems there is a special hidden attribute.
1150              
1151             =item C<inodeise>
1152              
1153             An ISE for the inode. All hardlinks share this ISE but (bit-perfect) copies do not.
1154              
1155             =item C<ise>
1156              
1157             The ISE of the document. That is it's UUID, OID, or URI.
1158              
1159             =item C<mediatype>
1160              
1161             The media type of the document.
1162              
1163             See also L</MEDIA SUBTYPE DETECTION>.
1164              
1165             =item C<oid>
1166              
1167             The OID of the document.
1168              
1169             =item C<pages>
1170              
1171             (since v0.06)
1172              
1173             The number of pages in the document.
1174              
1175             =item C<readonly>
1176              
1177             If the file is ready only. This is different from immutable files in that they still can be deleted (or other file attributes be changed).
1178             B<Note:> In the C<final> lifecycle all files are read only.
1179              
1180             =item C<system>
1181              
1182             If the file is a system file. The exact meaning system files depend on the operating system.
1183             Generally speaking a user should not write to or delete system files (directly).
1184              
1185             Configuration files are generally not considered system files (as they may be edited by the user).
1186             Filesystems may contain special files that are considered system files. For example some filesystems
1187             might expose a list of bad blocks as a special file.
1188              
1189             =item C<size>
1190              
1191             The file size (in bytes).
1192              
1193             =item C<thumbnail>
1194              
1195             A file that can be used as a thumbnail for the document.
1196              
1197             =item C<title>
1198              
1199             Title of the document (if any).
1200              
1201             =item C<uri>
1202              
1203             The URI of the document.
1204             B<Note:> This is not the URL one can use to fetch the document. This URI is the identifier of the document.
1205              
1206             =item C<uuid>
1207              
1208             The UUID of the document.
1209              
1210             =item C<writemode>
1211              
1212             The write mode for the document. Normally one of random access, append only, or none.
1213              
1214             =back
1215              
1216             =head2 property_info
1217              
1218             my $info = $obj->property_info($property);
1219             # or:
1220             my @info = $obj->property_info;
1221             # or:
1222             my @info = $obj->property_info($property [, ...] );
1223              
1224             Returns information on one or more properties. If no property is given returns infos for all known ones.
1225              
1226             B<Note:> This object may not have values for all the listed properties.
1227             Likewise it is not guaranteed that two objects have the same list of properties.
1228              
1229             The return value is a hashref or an array of hashrefs which contain the following keys:
1230              
1231             =over
1232              
1233             =item C<name>
1234              
1235             The name of the property in universal tag format (the format used in this module).
1236              
1237             =item C<stable>
1238              
1239             Whether of not the property is considered stable API.
1240              
1241             =back
1242              
1243             =head2 digest
1244              
1245             my $digest = $obj->digest($algorithm [, %opts ]);
1246              
1247             Returns a digest (hash). The supported algorithms and lifecycle values depend on object.
1248             If there is a any kind of problem this function dies.
1249              
1250             Algorithm names are given in the universal tag form but aliases for names as by L<Digest> are supported.
1251              
1252             Common values include: C<md-5-128>, C<sha-1-160>, C<sha-2-256>, and C<sha-3-512>.
1253              
1254             The following optional options are supported:
1255              
1256             =over
1257              
1258             =item C<as>
1259              
1260             The type to get the value in. This is the name of a perl package or one of:
1261             C<hex> (the default), C<binary>, C<base64> (or C<b64>), C<base64_padded>, or C<utag>.
1262             To get an object that is compatible with the L<Digest> API use C<Digest>. Do not try to use specific types such as C<Digest::MD5>
1263              
1264             =item C<default>
1265              
1266             The value to be returned when no actual value could be read. This can also be C<undef> which switches
1267             from C<die>-ing when no value is available to returning C<undef>.
1268              
1269             =item C<lifecycle>
1270              
1271             The lifecycle to get the value for. The default is C<current>
1272             See also L<File::Information/lifecycles>.
1273              
1274             =item C<no_defaults>
1275              
1276             Skips calculation of digest in case the digest is not known but could be calculated.
1277              
1278             =back
1279              
1280             =head2 verify
1281              
1282             my File::Information::VerifyBase $result = $obj->verify;
1283             # same as:
1284             my File::Information::VerifyBase $result = $obj->verify(lifecycle_from => 'current', lifecycle_to => 'final');
1285              
1286             my $passed = $result->has_passed;
1287              
1288             Performs a verify of the object using the given lifecycles. The verify operation checks if the given object
1289             in the state of C<lifecycle_from> (by default C<'current'>) is the same as in the state of C<lifecycle_to>
1290             (by default C<'final'>).
1291              
1292             The resulting object contains information on the result of the verify. The most common operation is to call
1293             L<File::Information::VerifyBase/has_passed> on the result.
1294              
1295             B<Note:>
1296             This operation may read the file and calculate digests. This may take significant time.
1297              
1298             See also
1299             L</digest>,
1300             L<File::Information/lifecycles>.
1301              
1302             The following options (all optional) are supported:
1303              
1304             =over
1305              
1306             =item C<lifecycle_from>
1307              
1308             The lifecycle to verify.
1309              
1310             =item C<lifecycle_to>
1311              
1312             The lifecycle to verify against/to compare to.
1313              
1314             =item C<base_from>
1315              
1316             The base object to compare (defaults to C<$obj>).
1317              
1318             B<Note:>
1319             This is an experimental option.
1320              
1321             =item C<base_to>
1322              
1323             The base object to compare (defaults to C<$obj>) against.
1324              
1325             B<Note:>
1326             This is an experimental option.
1327              
1328             =back
1329              
1330             =head2 link_stream
1331              
1332             $base->link_stream($cb, %opts);
1333              
1334             (since v0.09)
1335              
1336             Transforms the object into a stream of singe links.
1337              
1338             For each link the callback C<$cb> is called.
1339             The type of the link is passed as first parameter, the data of the link as second parameter.
1340              
1341             B<Note:>
1342             This is a B<highly experimental> method. It may be removed or changed at any version.
1343              
1344             The following options are supported:
1345              
1346             =over
1347              
1348             =item C<lifecycle>
1349              
1350             The lifecycle this method should use.
1351             In contrast to other methods this can take a single value or a list (as arrayref).
1352              
1353             =item C<subobjects>
1354              
1355             The subobjects to export data from.
1356             This is a single value or an arrayref with the package names of the subobjects to take into account.
1357             Defaults to only the current object.
1358              
1359             =back
1360              
1361             =head2 uuid, oid, uri, ise, displayname, displaycolour, icontext, description
1362              
1363             my $uuid = $obj->uuid;
1364             my $oid = $obj->oid;
1365             my $uri = $obj->uri;
1366             my $ise = $obj->ise;
1367             my $displayname = $obj->displayname;
1368             my $displaycolour = $obj->displaycolour;
1369             my $icontext = $obj->icontext;
1370             my $description = $obj->description;
1371              
1372             These functions are for compatibility with L<Data::TagDB::Tag> and L<Data::Identifier>.
1373              
1374             They perform the same as calling L</get> with their name as key. For example:
1375              
1376             my $displayname = $obj->displayname;
1377             # same as:
1378             my $displayname = $obj->get('displayname');
1379              
1380             There availability depends on the type of object.
1381              
1382             =head2 instance
1383              
1384             my File::Information $instance = $obj->instance;
1385              
1386             Returns the instance that was used to create this object.
1387              
1388             =head2 store
1389              
1390             my @store = $base->store(as => 'File::FStore');
1391              
1392             (since v0.09)
1393              
1394             Returns the list of file stores C<$base> is in if any (see L<File::FStore>).
1395              
1396             B<Note:>
1397             There is no order to the returned values. The order may change between any two calls.
1398              
1399             B<Note:>
1400             Currently the C<as> option must be set to C<File::FStore>. No other values nor options are supported.
1401              
1402             =head2 extractor, db
1403              
1404             my Data::URIID $extractor = $obj->extractor;
1405             my Data::TagDB $db = $obj->db;
1406             my ... = $obj->digest_info;
1407              
1408             These methods provide access to the same data as the methods of L<File::Information>.
1409             Arguments will be passed to said functions. However the object my cache the result.
1410             Therefore it is only allowed to pass arguments that are compatible with caching (if any exist).
1411              
1412             See L<File::Information/extractor>, and L<File::Information/db> for details.
1413              
1414             =head2 known
1415              
1416             my @list = $obj->known($class [, %opts ] );
1417              
1418             This module implements L<Data::Identifier::Interface::Known/known>. See there for details.
1419              
1420             B<Note:>
1421             This interface does not guarantee any specific order.
1422              
1423             The following classes are supported. In addition the classes from L<File::Information/known> are supported.
1424              
1425             =over
1426              
1427             =item C<properties_name>
1428              
1429             Returns the names known by L</property_info>.
1430              
1431             =back
1432              
1433             =head1 MEDIA SUBTYPE DETECTION
1434              
1435             Via the key C<mediatype> modules implementing this interface such as L<File::Information::Inode> will provide media subtype detection.
1436             This feature is only available for formats that allow for unambiguous detection.
1437              
1438             The following minimal requirements are required for formats to be added. Note that this is not a complete list.
1439             Adhering to it is not sufficient for a format to be included.
1440              
1441             =over
1442              
1443             =item *
1444              
1445             The format must provide a unambiguous magic within the first 64 bytes of the file.
1446             To be unambiguous a magic must be at least 32 bit long (better 64 bit long) and must be distinct
1447             and not confusable with other formats.
1448              
1449             =item *
1450              
1451             The media subtype must be registered with IANA.
1452             Both the current list and registration format can be found at
1453             L<https://www.iana.org/assignments/media-types/>.
1454              
1455             =item *
1456              
1457             The format must not be subject to disputes, or limiting legal factors such as patents.
1458              
1459             =item *
1460              
1461             The specification for the format must be publically available.
1462              
1463             =back
1464              
1465             =head1 AUTHOR
1466              
1467             Philipp Schafft <lion@cpan.org>
1468              
1469             =head1 COPYRIGHT AND LICENSE
1470              
1471             This software is Copyright (c) 2024-2025 by Philipp Schafft <lion@cpan.org>.
1472              
1473             This is free software, licensed under:
1474              
1475             The Artistic License 2.0 (GPL Compatible)
1476              
1477             =cut