File Coverage

lib/Data/URIID.pm
Criterion Covered Total %
statement 71 191 37.1
branch 22 98 22.4
condition 16 67 23.8
subroutine 18 25 72.0
pod 10 11 90.9
total 137 392 34.9


line stmt bran cond sub pod time code
1             # Copyright (c) 2023-2025 Philipp Schafft
2              
3             # licensed under Artistic License 2.0 (see LICENSE file)
4              
5             # ABSTRACT: Extractor for identifiers from URIs
6              
7             package Data::URIID;
8              
9 4     4   1421131 use v5.10;
  4         18  
10 4     4   33 use strict;
  4         8  
  4         193  
11 4     4   21 use warnings;
  4         12  
  4         237  
12              
13 4     4   22 use Carp;
  4         8  
  4         441  
14 4     4   2874 use URI;
  4         29451  
  4         167  
15 4     4   29 use Scalar::Util qw(blessed);
  4         6  
  4         313  
16              
17 4     4   3048 use Data::Identifier;
  4         716773  
  4         36  
18              
19 4     4   3958 use Data::URIID::Result;
  4         45  
  4         402  
20 4     4   51 use Data::URIID::Service;
  4         9  
  4         141  
21              
22 4     4   57 use parent 'Data::Identifier::Interface::Known';
  4         9  
  4         112  
23              
24             our $VERSION = v0.20;
25              
26             my %names = (
27             service => {
28             'wikidata' => '198bc92a-be09-42d2-bf96-20a177294b79',
29             'fellig' => '43e7f8fe-2b90-4a5d-88e2-b1d46856d942',
30             'youtube' => 'de49b663-ff54-428b-ac56-d1950fb3cec7',
31             'youtube-nocookie' => 'c7acc624-de92-4480-8a21-31186e8bef54',
32             'dropbox' => 'f8022569-fdc0-4922-8a95-3de51be087aa',
33             '0wx' => 'b279726c-a349-4d87-b87c-929319a20b3e',
34             'e621' => '9bde88c4-1784-4756-b009-6111b4a69f96',
35             'dnb' => '1c5eb5fb-3f2a-4a5a-9b28-9fba163873a0',
36             'britishmuseum' => 'ac0cad64-4bf2-4924-a855-bc4147f6cdb3',
37             'musicbrainz' => 'fcb39c86-34f6-481c-9bb7-63c4a7c2256b',
38             'wikimedia-commons' => 'a283b6cb-c8c5-4b5d-8a58-e0327e087e50',
39             'wikipedia' => '1262f7fe-2d98-42aa-9ed5-5cc5182fc4f4',
40             'noembed.com' => '66c2ac78-936b-4241-b041-567080db3f6a',
41             'osm' => 'fdb14a39-f175-4aba-bcec-53c4683b72bd',
42             'overpass' => '5350885e-92f5-4aee-b72e-dd9d95c6700a',
43             'xkcd' => '6d90e7e2-c193-4e96-8d0a-c9a3d42beecf',
44             'Data::URIID' => '65a5000f-c37f-4fa1-9ad0-c9682fcd8756',
45             'Data::Identifier' => '1d1e84e9-dac5-43d6-9ff9-a72fb7de38d5',
46             'viaf' => 'b542f123-b304-4f60-a2a9-15a0cc62e25d',
47             'europeana' => '2ddf371f-20b5-4fdb-99d5-934b212ed596',
48             'open-library' => '173f7237-9ca0-490d-8a98-6a04c386769a',
49             'ngv' => '01aa1e39-6d90-41c6-a010-f3850844f2e1',
50             'geonames' => '2860d918-ac49-42a1-818d-68abd84972b3',
51             'find-a-grave' => '30deaf5b-470b-46da-8af1-6e5174d0eaf4',
52             'nla' => '0715561c-0189-4c1f-99bf-21cc6746f5ee', # National Library of Australia
53             'agsa' => 'a61dda0f-b914-496a-b473-2a333b9f0f9f', # Art Gallery of South Australia
54             'amc' => 'fec16f49-a9fe-4d89-bad2-7dbb44860e83', # Australian Music Centre
55             'a-p-and-p' => '91a4981f-c1c7-4136-9e2f-39f2cd2eda7f', # Australian Prints + Printmaking
56             'tww' => 'aafdcd22-828b-413e-be0c-ed9a92d941db', # The Watercolour World
57             'factgrid' => '9a6b8382-c004-458a-bf2a-68f03d863282', # FactGrid
58             'grove-art-online' => 'be8b12e5-b32d-4b89-9301-84827a79589e', # Grove Art Online
59             'wikitree' => '70b9de08-2b73-4c0d-91d2-e89561cf94d2', # WikiTree
60             'doi' => '60387716-fa98-4c92-ae2b-7f4496d6f9be', # doi.org
61             'iconclass' => '75cbefbb-e622-4b72-9829-348f3986d709', # iconclass.org
62             'iana' => 'f11657cc-95da-4eae-95fc-62d16fecf473', # iana.org
63             'uriid' => '772aa1ed-9a3a-4806-94a1-42cbc0e9f962', # uriid.org
64             'oidref' => 'b5a63482-f92c-4ed5-8ec3-49caa0bafa66', # oidref.com
65             'furaffinity' => '978a4622-2a87-4c67-b9bf-c1b1e5d69b05',
66             'imgur' => 'e6c5f855-221a-4c48-9f31-cf0a852140da', # imgur.com
67             'notalwaysright' => '50882a9d-b405-4d5d-8068-0d38fb0b2f90', # notalwaysright.com
68             'fefe' => '80195e74-7afb-435b-8c76-3da8b343f235', # fefe.de
69             'schemaorg' => 'b1db320a-ab00-4650-ab54-f162b52bca08', # schema.org
70             'purlorg' => '53f8ea47-1de3-4562-9459-5f093782ae10', # purl.org
71             'ruthede' => '6c7dba44-dd07-4928-874e-f076e98cc96b', # ruthe.de
72             'danbooru2chanjp' => 'dac7a0ba-9090-4db7-bb94-373fabf98103', # danbooru.2chan.jp
73             'sirtxkeepcoolorg' => '0d7be696-accc-4d52-9cea-9c2362a57d62', # sirtx.keep-cool.org
74             },
75             type => {
76             'uuid' => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31',
77             'oid' => 'd08dc905-bbf6-4183-b219-67723c3c8374',
78             'uri' => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439',
79             'tagname' => 'bfae7574-3dae-425d-89b1-9c087c140c23',
80             'wikidata-identifier' => 'ce7aae1e-a210-4214-926a-0ebca56d77e3',
81             'musicbrainz-identifier' => '95bd826b-bd3e-4b40-b16a-aa20c9f673e4', # P434, P435, P436, P966, P982, P1004, P1330, P1407, P4404, P5813, P6423, and P8052
82             'british-museum-term' => '310776dc-1433-4623-9ffa-42d038d400a4', # P1711 (special)!
83             'gnd-identifier' => '893a7d5c-124c-4ad6-9a56-0ea8be50b536', # P227
84             'fellig-box-number' => 'c036d4d9-d983-4322-917c-acbf6133df64',
85             'fellig-identifier' => '90ecb0c5-f99a-4702-8575-430247de8f48',
86             'youtube-video-identifier' => '0d88a8f0-0fce-41ae-beef-88d74d83eb32', # P1651
87             'e621tagtype' => 'da72fa90-5990-46b4-b4ca-05eaf68170a5',
88             'e621tag' => '6fe0dbf0-624b-48b3-b558-0394c14bad6a',
89             'wikimedia-commons-identifier' => 'a6b1a981-48a0-445e-adc7-11df14e91769',
90             'e621-post-identifier' => '4a7fc2e2-854b-42ec-b24f-c7fece371865',
91             'e621-pool-identifier' => 'a0a4fae2-be6f-4a51-8326-6110ba845a16',
92             'osm-node' => '6c09afad-0109-4a05-a430-f3bdade19c24',
93             'osm-way' => '01da1735-25b3-4560-9c8c-186e42dd8904',
94             'osm-relation' => 'bdd9b297-e0a8-427e-8487-83f600226f5b',
95             'xkcd-num' => '943315e7-9efd-41df-b3f5-4a42b93df46d',
96             'factgrid-identifier' => 'd576b9d1-47d4-43ae-b7ec-bbea1fe009ba', # P8168 and P10787
97             'viaf-identifier' => '685c7871-2965-4f0a-ac63-d6bacd1e575e', # P214
98             'open-library-identifier' => '435f6b8c-cae4-4dcf-816a-1225fc35108f', # P3847
99             'unesco-thesaurus-identifier' => '3ff707af-1f72-4e1f-a81b-7871fb6079e1', # P3916
100             'isni' => 'a6de24d2-95a2-4577-870c-31ad10339f22', # P213
101             'aev-identifier' => 'e9c13254-831f-474c-8881-31012ca45a72', # P7033
102             'europeana-entity-identifier' => 'a1cffa6b-6b78-4b11-9a6c-3673ec25c489', # P7704
103             'ngv-artist-identifier' => '8fb7807b-c15a-4ae1-8f15-4b3d8e4f5cef', # P2041
104             'ngv-artwork-identifier' => '4d25c32b-a169-40f5-be88-3d609b7d05ff', # P4684
105             'geonames-identifier' => '02e34fcc-cf5e-445a-ba54-bf6df8ae036a', # P1566
106             'find-a-grave-identifier' => '39ea7c88-3fc2-4a01-89f9-547f451764f7', # P535
107             'libraries-australia-identifier'=> '22a80a6d-0c69-41f5-b5be-6c889f8e601b', # P409
108             'agsa-creator-identifier' => 'fb3bac19-7d4e-4995-9ef0-08dbcea7f340', # P6804
109             'amc-artist-identifier' => '0b907ca8-a84f-4780-b708-910a858228a8', # P9575
110             'a-p-and-p-artist-identifier' => '5bafcbd4-5fcf-4823-848f-7eab8175a80c', # P10086
111             'nla-trove-people-identifier' => '0edc2854-37bf-4562-a05b-ac4113ead938', # P1315
112             'tww-artist-identifier' => 'b49d88ba-1b61-4f13-b5c9-73a09ffb2b3f', # P6735
113             'grove-art-online-identifier' => '80c548f6-4d23-43c1-ab50-b4546319c752', # P8406
114             'wikitree-person-identifier' => 'a6f7d17a-ced2-4cf7-8ce7-fcb4a98f7aa0', # P2949
115             'doi' => '931f155e-5a24-499b-9fbb-ed4efefe27fe', # P356
116             'iconclass-identifier' => '241348a8-c5d0-4473-9ec1-de7c2ba00fbb', # P1256
117             'media-subtype-identifier' => 'c1166bf7-c4ab-40ad-9a92-a55103bec509', # P1163, commonly also called media-type or mime-type.
118             'gtin' => '82d529be-0f00-4b4f-a43f-4a22de5f5312',
119             'small-identifier' => 'f87a38cb-fd13-4e15-866c-e49901adbec5',
120             'language-tag-identifier' => 'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1',
121             'chat-0-word-identifier' => '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a',
122             'sirtx-numerical-identifier' => '039e0bb7-5dd3-40ee-a98c-596ff6cce405',
123             'furaffinity-post-identifier' => 'b8dd10ec-d46b-4316-b3f3-2bc28cff9d35',
124             'imgur-post-identifier' => 'f2425e42-0083-4205-aa0b-2005f1fa62a3',
125             'notalwaysright-post-identifier' => '700a0082-0201-46f1-b0a1-37e2caf76cc2',
126             'fefe-blog-post-identifier' => '9ad1edae-f08e-4605-b9f9-a1d3894c290a',
127             'ruthede-comic-post-identifier' => '2db1003d-dbf4-47bf-bfe4-7874c5bf0263',
128             'danbooru2chanjp-post-identifier' => '6e3590b6-2a0c-4850-a71f-8ba196a52280',
129             'danbooru2chanjp-tag' => 'c5632c60-5da2-41af-8b60-75810b622756',
130             },
131             action => {
132             #What about: search/lookup? list? content?
133              
134             # Human readable:
135             'documentation' => 'b75354b2-a43b-44d9-99d5-9c0ec4fa5287',
136             'manage' => '01fc3e42-7b5c-403e-94fb-a4fa7990c0ed',
137             'render' => 'b608ad23-e61a-4ab3-a1ca-f3f4e269b03b', # for display (of a work)
138             'embed' => '0fecb446-89a9-4b0c-a7db-e83b5acec419',
139             'info' => '478bc202-51ac-4c5e-9f9a-38e233a42dfb', # like metadata just human readable
140             'edit' => 'e775b770-90eb-4b2f-9b78-26021688722d',
141             # Machine readable:
142             'fetch' => '4ab02627-c452-4f4e-a9c0-4bde8f1e6b0e',
143             'file-fetch' => 'a3b66e23-15f2-4bc6-b22e-8f072ba839e7',
144             'stream-fetch' => '4060a966-9fae-4d43-9006-2288b58afabb',
145             'metadata' => '6f1c921b-e0bb-4449-911f-a00719e91a1e',
146             },
147             );
148              
149             my %service_lists = (
150             ALL => [keys %{$names{service}}],
151             LOCAL => [qw(Data::URIID Data::Identifier)],
152             REMOTE => [qw(wikidata factgrid wikimedia-commons fellig noembed.com osm overpass xkcd doi iconclass e621 furaffinity imgur)],
153              
154             wmf => [qw(wikidata wikimedia-commons wikipedia)],
155             osm => [qw(osm overpass)],
156              
157             friendly => [qw(fellig uriid)],
158             uafriendly => [qw(@friendly)],
159             );
160              
161             # Inverse of %names:
162             my %ises;
163              
164             foreach my $class (keys %names) {
165             my $n = $names{$class};
166              
167             $ises{$class} = {
168             map {$n->{$_} => $_} keys %{$n}
169             };
170              
171             foreach my $key (keys %{$n}) {
172             Data::Identifier->new(uuid => $n->{$key}, displayname => $key)->register;
173             }
174             }
175              
176             {
177             my $found;
178             do {
179             $found = undef;
180              
181             foreach my $content (values %service_lists) {
182             my @newlist;
183              
184             foreach my $entry (@{$content}) {
185             if ($entry =~ /^\@(.+)$/) {
186             push(@newlist, @{$service_lists{$1}});
187             $found = 1;
188             } else {
189             push(@newlist, $entry);
190             }
191             }
192              
193             @{$content} = @newlist;
194             }
195             } while ($found);
196             }
197              
198              
199              
200             sub new {
201 3     3 1 2589 my ($pkg, %opts) = @_;
202 3         12 my $self = bless \%opts, $pkg;
203              
204 3 50       34 if (defined(my $services_online = delete $self->{services_online})) {
205 0         0 foreach my $service_name ($self->match_services($services_online)) {
206 0         0 $self->service($service_name)->online(1);
207             }
208             }
209              
210 3         13 return $self;
211             }
212              
213              
214             #@returns Data::URIID::Result
215             sub lookup {
216 6     6 1 4434 my ($self, $type, $uri) = @_;
217 6         14 my %secondary;
218              
219             # Note: We use 'auto' as default and try to figure out of it's an ISE or an URI.
220              
221             # Shuffle arguments if the two argument form is used.
222 6 50       31 if (scalar(@_) == 2) {
223 6         18 ($type, $uri) = ('auto' => $type);
224             }
225              
226 6 50       78 croak 'Passed undef as URI' unless defined $uri;
227              
228 6 50 33     26 if (blessed($uri) && !$uri->isa('URI')) {
229 0 0       0 if (defined(my $displaycolour = eval {$uri->displaycolour})) {
  0         0  
230 0 0       0 if (eval {$displaycolour->isa('Data::URIID::Colour')}) {
  0         0  
231 0   0     0 $secondary{attributes} //= {};
232 0   0     0 $secondary{attributes}{displaycolour} //= {'*' => $displaycolour};
233             }
234             }
235              
236 0 0       0 if (defined(my $displayname = eval {$uri->displayname(default => undef, no_defaults => 1)})) {
  0         0  
237 0   0     0 $secondary{attributes} //= {};
238 0   0     0 $secondary{attributes}{displayname} //= {'*' => $displayname};
239             }
240              
241 0 0 0     0 if ($uri->isa('Data::URIID::Result')) {
    0          
    0          
    0          
    0          
    0          
    0          
242 0         0 $uri = $uri->url;
243             } elsif ($uri->isa('Data::URIID::Barcode')) {
244             # We guess here.
245 0         0 $uri = $uri->data;
246 0         0 $type = 'qrcode';
247             } elsif ($uri->isa('Data::Identifier')) {
248 0         0 $type = $uri->type->uuid;
249 0         0 $uri = $uri->id;
250             } elsif ($uri->isa('Data::TagDB::Tag')) {
251 0         0 $uri = Data::Identifier->new(from => $uri);
252 0         0 $type = $uri->type->uuid;
253 0         0 $uri = $uri->id;
254             } elsif (index(blessed($uri), __PACKAGE__) == 0 && $uri->can('ise')) {
255 0         0 ($type, $uri) = (ise => $uri->ise);
256             } elsif ($uri->isa('Mojo::URL')) {
257 0         0 $uri = URI->new("$uri"); # convert to URI as per documentation of Mojo::URL
258             } elsif ($uri->isa('File::FStore::File')) {
259 0         0 my $data = $uri->get;
260 0         0 $type = 'ise';
261 0         0 $uri = $uri->get(properties => 'contentise');
262              
263 0   0     0 $secondary{attributes} //= {};
264 0 0 0     0 $secondary{attributes}{final_file_size} //= {'*' => $data->{properties}{size}} if defined $data->{properties}{size};
265 0         0 $secondary{attributes}{roles} = [[Data::Identifier->new(sid => 17)]];
266 0         0 $secondary{digest} = $data->{digests};
267             } else {
268 0         0 croak 'Invalid type of object passed';
269             }
270             }
271              
272 6 50       21 unless (blessed $uri) {
273 6 50       21 if (ref $type) {
274 0         0 my URI $u;
275              
276 0 0 0     0 if ($type->isa('URI') || $type->isa('Mojo::URL')) {
277 0         0 $type = $self->lookup($type);
278             }
279              
280 0         0 $type = $type->ise;
281 0         0 $u = URI->new('https://uriid.org/');
282 0         0 $u->path_segments('', $type, $uri);
283 0         0 $uri = $u;
284             } else {
285              
286 6 50       22 if ($type eq 'qrcode') {
287             # Bit more relaxed URLs...
288 0         0 $uri =~ s#^www\.#https://www.#; # Try to add missing protocol.
289 0         0 $type = 'auto';
290             }
291              
292 6 50 33     27 if ($type eq 'auto' || $type eq 'ise') {
    0          
293 6 100       47 if ($uri =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) {
    100          
294 1         5 $uri = 'urn:uuid:'.$uri;
295             } elsif ($uri =~ /^[1-3](?:\.(?:0|[1-9][0-9]*))+$/) {
296 1         4 $uri = 'urn:oid:'.$uri;
297             }
298             } elsif ($type =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/) {
299 0         0 my $u = URI->new('https://uriid.org/');
300 0         0 $u->path_segments('', $type, $uri);
301 0         0 $uri = $u;
302             }
303              
304 6         41 $uri = URI->new($uri);
305             }
306             }
307              
308 6 50       16212 croak 'Passed a non-URI object' unless $uri->isa('URI');
309              
310 6 50 33     40 croak 'URI is not absolute' unless defined($uri->scheme) && length($uri->scheme);
311              
312 6         804 return Data::URIID::Result->new(uri => $uri, extractor => $self, secondary => \%secondary);
313             }
314              
315              
316             sub online {
317 6     6 1 18 my ($self, $new_value) = @_;
318              
319 6 50       23 if (scalar(@_) == 2) {
320 0         0 $self->{online} = !!$new_value;
321             }
322              
323 6         35 return $self->{online};
324             }
325              
326              
327             sub default_online {
328 13     13 1 30 my ($self, $new_value) = @_;
329              
330 13 50       34 if (scalar(@_) == 2) {
331 0         0 $self->{default_online} = !!$new_value;
332             }
333              
334 13         99 return $self->{default_online};
335             }
336              
337              
338             sub language_tags {
339 0     0 1 0 my ($self, @new_value) = @_;
340              
341 0 0       0 if (scalar(@new_value)) {
342 0         0 $self->{language_tags} = \@new_value;
343             }
344              
345 0         0 require I18N::LangTags;
346 0         0 require I18N::LangTags::Detect;
347 0   0     0 $self->{language_tags} //= [I18N::LangTags::implicate_supers(I18N::LangTags::Detect::detect())];
348              
349 0         0 return @{$self->{language_tags}};
  0         0  
350             }
351              
352             # Private method:
353             sub _get_language_tags {
354 0     0   0 my ($self, %opts) = @_;
355              
356 0 0       0 if (defined(my $language_tags = $opts{language_tags})) {
357 0 0       0 return @{$language_tags} if ref($language_tags) eq 'ARRAY';
  0         0  
358 0         0 require I18N::LangTags;
359 0         0 return I18N::LangTags::implicate_supers(I18N::LangTags::extract_language_tags($language_tags));
360             }
361              
362 0         0 return $self->language_tags;
363             }
364              
365             # Private method:
366             sub _ua {
367 0     0   0 my ($self) = @_;
368 0   0     0 return $self->{ua} //= do {
369 0         0 require LWP::UserAgent;
370              
371 0         0 my $ua = LWP::UserAgent->new(agent => $self->{agent});
372 0         0 my $x = 1001; # we use 1001 and --$x here instead of 1000 and $x-- as that confuses parsers.
373              
374 0         0 $ua->default_header('Accept-Language' => join(', ', map {sprintf('%s; q=%.3f', $_, --$x/1000)} $self->language_tags));
  0         0  
375              
376 0         0 $ua;
377             };
378             }
379              
380              
381              
382             sub known {
383 0     0 1 0 my ($pkg, $class, %opts) = @_;
384 0 0 0     0 $opts{extractor} //= $pkg if ref $pkg;
385 0         0 return $pkg->SUPER::known($class, %opts);
386             }
387              
388             sub _known_provider {
389 0     0   0 my ($pkg, $class, %opts) = @_;
390 0 0       0 croak 'Unsupported options passed' if scalar(keys %opts);
391 0 0       0 return ([map {values %{$_}} values %names], rawtype => 'uuid') if $class eq ':all';
  0         0  
  0         0  
392 0 0       0 return ([values %{$names{$class}}], rawtype => 'uuid') if defined $names{$class};
  0         0  
393 0         0 croak 'Unsupported class';
394             }
395              
396              
397             sub name_to_ise {
398 211     211 1 491 my ($self, $class, $name) = @_;
399 211 100       499 return $name if $self->is_ise($name); # return name if name is already an ISE
400 205 50       497 if (blessed($name)) {
401 0 0 0     0 if (index(blessed($name), __PACKAGE__) == 0 && $name->can('ise')) {
    0          
402 0         0 return $name->ise;
403             } elsif ($name->isa('Data::Identifier')) {
404 0         0 return $name->ise;
405             }
406             }
407 205   50     1122 return $names{$class // ''}{$name // ''} // croak sprintf('Invalid class or name: %s: %s', $class // '<undef>', $name // '<undef>');
      50        
      0        
      0        
      33        
408             }
409              
410              
411             sub ise_to_name {
412 145     145 1 312 my ($self, $class, $ise) = @_;
413 145 50       346 $ise = $ise->Data::Identifier::as('ise') if ref $ise;
414 145   50     10383 return $ises{$class // ''}{$ise // ''} // croak 'Invalid class or ISE';
      50        
      66        
415             }
416              
417              
418             sub is_ise {
419 211     211 1 341 my ($self, $str) = @_;
420              
421 211   66     1546 return $str =~ /^[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$/ ||
422             $str =~ /^[1-3](?:\.(?:0|[1-9][0-9]*))+$/ ||
423             $str =~ /^[a-zA-Z][a-zA-Z0-9\+\.\-]+:[^:]/;
424             }
425              
426              
427             sub service {
428 30     30 1 79 my ($self, $service) = @_;
429 30   100     125 my $cache = $self->{service_cache} //= {};
430              
431 30 50       81 $service = $service->ise if ref $service;
432 30         67 $service = $self->name_to_ise(service => $service);
433              
434 30 50       112 croak 'Not a known service: '.$service unless defined $ises{service}{$service};
435              
436 30   66     163 return $cache->{$service} //= Data::URIID::Service->new(
437             extractor => $self,
438             ise => $service,
439             online => $self->default_online,
440             );
441             }
442              
443             sub match_services {
444 0     0 0   my ($self, @list) = @_;
445 0           my %selected;
446              
447 0 0         @list = map {split /\s*[,:]\s*|\s+/} map {ref($_) eq 'ARRAY' ? @{$_} : $_} @list;
  0            
  0            
  0            
448              
449 0           foreach my $entry (@list) {
450 0           my ($neg, $prefix, $name) = $entry =~ /^(\!?)(\@?)(.+)$/;
451 0           my @sublist;
452              
453 0 0         if ($prefix eq '@') {
454 0   0       @sublist = @{$service_lists{$name} // croak 'Invalid service list: '.$name};
  0            
455             } else {
456 0 0         $name = $name->ise if ref $name;
457 0           $name = $self->name_to_ise(service => $name);
458 0           $name = $self->ise_to_name(service => $name);
459 0           @sublist = ($name);
460             }
461              
462 0 0         if ($neg eq '') {
463 0           $selected{$_} = 1 foreach @sublist;
464             } else {
465 0           $selected{$_} = 0 foreach @sublist;
466             }
467             }
468              
469 0           return grep {$selected{$_}} keys %selected;
  0            
470             }
471              
472             sub _register_service {
473 0     0     my ($pkg, $id, %opts) = @_;
474 0           state $dyn = 0;
475 0           my $name = 'x_dyn_'.$dyn++;
476 0           my $uuid;
477              
478 0 0         $id = Data::Identifier->new(from => $id, (defined $opts{displayname} ? (displayname => $opts{displayname}) : ()));
479 0           $uuid = $id->uuid;
480              
481 0           $id->register;
482              
483 0 0         if (defined(my $displayname = $id->displayname(default => undef, no_defaults => 1))) {
484 0 0         if ($displayname =~ /^[a-z][0-9a-z-]*[0-9a-z]$/) {
485 0           $name .= '_'.$displayname;
486             }
487             }
488              
489 0           foreach my $class (keys %ises) {
490 0 0         if (exists $ises{$class}{$uuid}) {
491 0           croak 'Service already in existance. (Bug in user of Data::URIID?)';
492             }
493             }
494              
495 0           $names{service}{$name} = $uuid;
496 0           $ises{service}{$uuid} = $name;
497              
498 0           Data::URIID::Result->_register_service($name, %opts);
499              
500 0           return $name => $id;
501             }
502              
503              
504             1;
505              
506             __END__
507              
508             =pod
509              
510             =encoding UTF-8
511              
512             =head1 NAME
513              
514             Data::URIID - Extractor for identifiers from URIs
515              
516             =head1 VERSION
517              
518             version v0.20
519              
520             =head1 SYNOPSIS
521              
522             use Data::URIID;
523              
524             my $extractor = Data::URIID->new;
525              
526             my $result = $extractor->lookup( $uri );
527              
528             my $id = $result->id( $type );
529             my $displayname = $result->attribute('displayname');
530              
531             This module provides a way to extract knowledge (mainly identifier) from a given URL, a QR Code,
532             or similar objects.
533              
534             The main usages for this module are:
535              
536             =over
537              
538             =item *
539              
540             Provide information to display the object in question to the user (such as name, location, icons, thumbnails, and more)
541              
542             =item *
543              
544             Provide required identifiers and URLs to link the object with many services.
545              
546             =back
547              
548             In order to do so, an extractor (instance of this package) is created.
549             On that extractor L</lookup> is called for every input to process resulting in a L<Data::URIID::Result> object holding the acquired knowledge.
550              
551             The module supports both online and offline lookups. See L</online>.
552              
553             This package inherits from L<Data::Identifier::Interface::Known>.
554              
555             =head1 METHODS
556              
557             =head2 new
558              
559             my $extractor = Data::URIID->new;
560             # or:
561             my $extractor = Data::URIID->new( option => value, ... );
562              
563             Returns a new object that can be used for lookups.
564             The following options are defined:
565              
566             =over
567              
568             =item C<agent>
569              
570             User agent string to use if no C<ua> is given.
571              
572             This should be set to something of valid user agent string syntax that reflects your
573             application and contains contact details.
574              
575             =item C<default_online>
576              
577             Boolean indicating whether online operation is allowed by default.
578              
579             Default false.
580             See also L<"default_online">.
581              
582             =item C<language_tags>
583              
584             An arrayref with all acceptable language tags (most acceptable first).
585              
586             Default C<[I18N::LangTags::implicate_supers(I18N::LangTags::Detect::detect())]>
587             See also L<"language_tags">, L<I18N::LangTags>, and L<I18N::LangTags::Detect>
588             Note: If you perform online lookups and passed a user agent via C<ua> it must also reflect this setting.
589              
590             =item C<online>
591              
592             Boolean indicating whether online operations are permitted.
593              
594             Default false.
595             See also L<"online">.
596              
597             =item C<services_online>
598              
599             List of services to set the online flag for.
600             Similar to C<default_online>, this allows for enable online mode for services.
601             The difference is that this option allows selective control.
602              
603             The argument takes a list (arrayref or C<,> or C<:> delimited string) of elements which each name a service
604             or a group (prefixed with C<@>). Each entry might be prefixed with C<!> to negative (exclude) that element.
605              
606             Currently defined:
607             C<@ALL> (all services),
608             C<@LOCAL> (all services that provide offline mode; note that offline mode is not affected by this setting),
609             C<@REMOTE> (all services that have an online mode),
610             C<@friendly> (all services that have a friendly policy),
611             C<@uafriendly> (all services that have a friendly policy but require the agent string to be set to something useful that includes contact details),
612             C<@wmf> (Wikimedia Foundation, Inc. related services),
613             C<@osm> (OpenStreetMap related services).
614              
615             Defaults to an empty list.
616              
617             B<Example:>
618              
619             [qw(@friendly @osm !@wmf)] # All friendly services plus OpenStreetMap services, but not Wikimedia Foundation
620              
621             '@uafriendly:!wikimedia-commons' # All services friendly (if 'agent' is set) but not Wikimedia commons.
622              
623             B<Note:>
624             If a service is listed as negative in this setting this will not disable that service
625             but just not enable it. It might be enabled by other options.
626             This behaviour may change in future versions.
627              
628             B<Note:>
629             You should not give this option together with C<default_online>.
630             Future versions might change behaviour if both are given.
631              
632             =item C<ua>
633              
634             Useragent to use (L<LWP::UserAgent>).
635              
636             =back
637              
638             =head2 lookup
639              
640             my $result = $extractor->lookup( $uri );
641             # or:
642             my $result = $extractor->lookup( $type, $uri );
643              
644             Tries to look up the URI and returns the result.
645             Takes an L<URI> object (preferred) or a plain string as argument.
646             Alternatively can internally also convert from
647             L<Mojo::URL>,
648             L<File::FStore::File>,
649             L<Data::Identifier>,
650             L<Data::TagDB::Tag>,
651             L<Data::URIID::Service>,
652             L<Data::URIID::Result>,
653             and L<Data::URIID::Colour>.
654              
655             C<$type> is one of C<uri>, C<ise>, or C<qrcode>, an UUID, or an object of any type supported for C<$uri>.
656             Defaults to C<uri>.
657             When C<ise> an UUID or OID can be provided instead of an URI.
658             When C<qrcode> the text content from an QR code can be provided.
659              
660             This method will return a L<Data::URIID::Result> if successful or C<die> otherwise.
661              
662             =head2 online
663              
664             my $online = $extractor->online( [ $new_value ] );
665              
666             Gets or sets the online status of extractor. If this value is false no online operations are permitted.
667             In addition to this value being true the online value for the services that should perform lookups
668             need to be true.
669              
670             See also L<"default_online">.
671              
672             =head2 default_online
673              
674             my $online = $extractor->default_online( [ $new_value ] );
675              
676             Gets or sets the default online value for L<Data::URIID::Service> objects returned by L<"service">.
677             This value is only used if the service has not yet been accessed.
678             Therefore it is often unsafe to alter this value. The corresponding L<"new"> option should be used.
679              
680             See also L<"online">.
681              
682             =head2 language_tags
683              
684             my @language_tags = $extractor->language_tags( [ @new_value ] );
685              
686             Gets or sets the list of acceptable language tags.
687              
688             See also L<"new">.
689              
690             =head1 UTILITY METHODS
691              
692             =head2 known
693              
694             my @list = $extractor->known( $class [, %opts ] );
695             # or:
696             my @list = Data::URIID->known( $class [, %opts ] );
697              
698             Returns a list of known items of a class.
699             Not all items may have the same level of support by this module.
700             Class is one of C<service>, C<type>, C<action>, or C<:all>.
701             If the class is given as C<:all> this module will return the lists for all classes
702             but may also return additional entries known to it.
703              
704             This implements L<Data::Identifier::Interface::Known/known>. See there for details.
705             If called on an instance of this module C<extractor> is automatically filled in the options.
706              
707             =head2 name_to_ise
708              
709             my $ise = $extractor->name_to_ise( $class => $name );
710              
711             Tries to lookup an ISE for a given well known name.
712             Class is one of C<service>, C<type>, or C<action>.
713              
714             This method will return an ISE if successful or C<die> otherwise.
715             This is the reverse of L<"ise_to_name">.
716              
717             =head2 ise_to_name
718              
719             my $name = $extractor->ise_to_name( $class => $ise );
720              
721             Tries to lookup a name for a given well known ISE.
722             Class is one of C<service>, C<type>, or C<action>.
723              
724             This method will return a name if successful or C<die> otherwise.
725             This is the reverse of L<"name_to_ise">.
726              
727             If C<$ise> is a blessed object it is tried to be converted to a plain ISE via L<Data::Identifier/as> (since v0.17).
728              
729             =head2 is_ise
730              
731             my $bool = $extractor->is_ise( $str );
732              
733             Returns whether or not a string is a valid ISE.
734              
735             =head2 service
736              
737             my $service = $extractor->service( $service );
738              
739             This method will return a L<Data::URIID::Service> for the given name or ISE if successful or C<die> otherwise.
740              
741             =head1 KNOWN/SUPPORTED SERVICES
742              
743             For a list of known/supported services see L<Data::URIID::Service/"KNOWN/SUPPORTED SERVICES">.
744              
745             =head1 AUTHOR
746              
747             Philipp Schafft <lion@cpan.org>
748              
749             =head1 COPYRIGHT AND LICENSE
750              
751             This software is Copyright (c) 2023-2025 by Philipp Schafft <lion@cpan.org>.
752              
753             This is free software, licensed under:
754              
755             The Artistic License 2.0 (GPL Compatible)
756              
757             =cut