File Coverage

lib/Data/URIID/Service.pm
Criterion Covered Total %
statement 162 594 27.2
branch 22 198 11.1
condition 40 214 18.6
subroutine 22 56 39.2
pod 5 6 83.3
total 251 1068 23.5


line stmt bran cond sub pod time code
1             # Copyright (c) 2023-2025 Philipp Schafft
2              
3             # licensed under Artistic License 2.0 (see LICENSE file)
4              
5             # ABSTRACT: Extractor for identifiers from URIs
6              
7             package Data::URIID::Service;
8              
9 4     4   62 use v5.10;
  4         14  
10 4     4   22 use strict;
  4         8  
  4         126  
11 4     4   20 use warnings;
  4         7  
  4         232  
12              
13 4     4   23 use Carp;
  4         9  
  4         317  
14 4     4   3235 use JSON;
  4         57545  
  4         51  
15 4     4   784 use URI;
  4         12  
  4         144  
16 4     4   22 use URI::Escape;
  4         10  
  4         333  
17 4     4   28 use Encode;
  4         8  
  4         478  
18 4     4   47 use Scalar::Util qw(weaken);
  4         7  
  4         220  
19 4     4   44 use List::Util qw(all);
  4         6  
  4         291  
20 4     4   26 use Data::Identifier::Generate v0.25;
  4         96  
  4         109  
21              
22 4     4   21 use Data::URIID::Result;
  4         7  
  4         104  
23 4     4   2141 use Data::URIID::Colour;
  4         12  
  4         226  
24              
25             our $VERSION = v0.20;
26              
27 4     4   30 use parent 'Data::URIID::Base';
  4         7  
  4         25  
28              
29             my @musicbrainz_wikidata_relations = qw(P434 P435 P436 P966 P982 P1004 P1330 P1407 P4404 P5813 P6423 P8052);
30              
31             my $config_wikidata = {
32             type => 'wikidata-identifier',
33             idmap => {
34             P213 => 'isni',
35             P214 => 'viaf-identifier',
36             P227 => 'gnd-identifier',
37             P356 => 'doi',
38             P402 => 'osm-relation',
39             P409 => 'libraries-australia-identifier',
40             P535 => 'find-a-grave-identifier',
41             P648 => 'open-library-identifier',
42             P1256 => 'iconclass-identifier',
43             P1315 => 'nla-trove-people-identifier',
44             P1566 => 'geonames-identifier',
45             P1651 => 'youtube-video-identifier',
46             P2041 => 'ngv-artist-identifier',
47             P2949 => 'wikitree-person-identifier',
48             P3916 => 'unesco-thesaurus-identifier',
49             P4684 => 'ngv-artwork-identifier',
50             P6735 => 'tww-artist-identifier',
51             P6804 => 'agsa-creator-identifier',
52             P7033 => 'aev-identifier',
53             P7704 => 'europeana-entity-identifier',
54             P8168 => 'factgrid-identifier',
55             P8406 => 'grove-art-online-identifier',
56             P9575 => 'amc-artist-identifier',
57             P10086 => 'a-p-and-p-artist-identifier',
58             P10689 => 'osm-way',
59             P10787 => 'factgrid-identifier',
60             P11693 => 'osm-node',
61             (map {$_ => 'musicbrainz-identifier'} @musicbrainz_wikidata_relations),
62             },
63             endpoint => {
64             sparql => 'https://query.wikidata.org/sparql',
65             entitydata => 'https://www.wikidata.org/wiki/Special:EntityData/%s.json?flavor=dump',
66             },
67             prefix => 'http://www.wikidata.org/entity/',
68             uuid_relations => \@musicbrainz_wikidata_relations,
69             special_ids => [
70             {
71             property => 'P1711',
72             type => 'british-museum-term',
73             to_service => sub {($_[0] =~ /^BIOG([1-9][0-9]+)$/)[0]},
74             from_service => sub {sprintf('BIOG%u', $_[0])},
75             },
76             ],
77             attributes => [
78             (map {my $c = $_; {
79             property => $c->[0],
80             from_service => sub { return ($c->[1] => {'*' => $_[0]})},
81             }} (
82             [P487 => 'icon_text'], # 'Unicode character'
83             [P1163 => 'media_subtype'], # 'MIME type'
84             )),
85             (map {my $c = $_; {
86             property => $c->[0],
87             from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
88             }} (
89             [P569 => 'date_of_birth'],
90             [P570 => 'date_of_death'],
91             )),
92             (map {my $c = $_; {
93             property => $c->[0],
94             from_service => sub {
95             my ($value, $config) = @_;
96             return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
97             return ();
98             },
99             }} (
100             [P856 => 'website'], # 'official website'
101             )),
102             (map {my $c = $_; {
103             property => $c->[0],
104             from_service => sub {
105             my ($value, $config) = @_;
106             return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
107             return ();
108             },
109             }} (
110             [P21 => 'sex_or_gender'], # 'sex or gender'
111             [P376 => 'space_object'], # 'located on astronomical body'
112             )),
113             (map {my $c = $_; {
114             property => $c->[0],
115             list_value => sub {
116             my ($value, $config) = @_;
117             return ($c->[1] => [[URI->new($config->{prefix} . $value->{id})]]) if defined $value->{id};
118             return ();
119             },
120             }} (
121             [P31 => 'roles'], # 'instance of'
122             )),
123             { # 'sRGB colour hex triplet'
124             property => 'P465',
125             from_service => sub {
126             my ($value) = @_;
127             return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
128             return ();
129             },
130             },
131             { # 'coordinate location'
132             property => 'P625',
133             from_service => \&_online_lookup__wikibase__from_service__coordinate,
134             },
135             ],
136             };
137              
138             my $config_factgrid = {
139             type => 'factgrid-identifier',
140             idmap => {
141             P76 => 'gnd-identifier',
142             P378 => 'viaf-identifier',
143             P980 => 'iconclass-identifier',
144             },
145             endpoint => {
146             sparql => 'https://database.factgrid.de/sparql',
147             entitydata => 'https://database.factgrid.de/wiki/Special:EntityData/%s.json?flavor=dump',
148             },
149             prefix => 'https://database.factgrid.de/entity/',
150             attributes => [
151             (map {my $c = $_; {
152             property => $c->[0],
153             from_service => sub {_online_lookup__wikibase__from_service__datetime($c->[1] => @_)},
154             }} (
155             [P38 => 'date_of_death'],
156             [P77 => 'date_of_birth'],
157             )),
158             (map {my $c = $_; {
159             property => $c->[0],
160             from_service => sub {
161             my ($value, $config) = @_;
162             return ($c->[1] => {'*' => URI->new($value)}) if defined $value;
163             return ();
164             },
165             }} (
166             [P156 => 'website'], # 'Online presence'
167             )),
168             (map {my $c = $_; {
169             property => $c->[0],
170             from_service => sub {
171             my ($value, $config) = @_;
172             return ($c->[1] => {'*' => URI->new($config->{prefix} . $value->{id})}) if defined $value->{id};
173             return ();
174             },
175             }} (
176             [P154 => 'sex_or_gender'], # 'Gender'
177             [P625 => 'sex_or_gender'], # 'Predominant gender usage'
178             )),
179             { # 'Coordinate location'
180             property => 'P48',
181             from_service => \&_online_lookup__wikibase__from_service__coordinate,
182             },
183             { # 'Hex color'
184             property => 'P696',
185             from_service => sub {
186             my ($value) = @_;
187             return (displaycolour => {'*' => Data::URIID::Colour->new(rgb => sprintf('#%s', uc($value)))}) if $value =~ /^[0-9a-f-AF]{6}$/;
188             return ();
189             },
190             },
191             ],
192             };
193              
194             my @fellig_types = qw(fellig-identifier fellig-box-number uuid oid uri wikidata-identifier e621-post-identifier e621-pool-identifier wikimedia-commons-identifier british-museum-term musicbrainz-identifier gnd-identifier e621tagtype);
195              
196             my %attrmap_osm = (
197             name => 'displayname',
198             description => 'description',
199             );
200              
201             my %attrmap_open_graph = (
202             title => 'displayname',
203             description => 'description',
204             image => 'thumbnail',
205             );
206              
207             my %own_metadata = (
208             service => {
209             'wikidata' => {
210             'displayname' => {'*' => 'Wikidata'},
211             },
212             'fellig' => {
213             'displayname' => {'*' => 'Fellig.org'},
214             },
215             'youtube' => {},
216             'youtube-nocookie' => {},
217             'dropbox' => {},
218             '0wx' => {},
219             'e621' => {},
220             'dnb' => {
221             'displayname' => {'*' => 'Deutsche Nationalbibliothek'},
222             },
223             'britishmuseum' => {
224             'displayname' => {'*' => 'British Museum'},
225             },
226             'musicbrainz' => {
227             'displayname' => {'*' => 'MusicBrainz'},
228             },
229             'wikimedia-commons' => {
230             'displayname' => {'*' => 'Wikimedia Commons'},
231             },
232             'wikipedia' => {
233             'displayname' => {'*' => 'Wikipedia'},
234             },
235             'noembed.com' => {},
236             'osm' => {
237             'displayname' => {'*' => 'OpenStreetMap'},
238             },
239             'overpass' => {},
240             'xkcd' => {},
241             'Data::URIID' => {},
242             'viaf' => {
243             'displayname' => {'*' => 'Virtual International Authority File'},
244             },
245             'europeana' => {},
246             'open-library' => {
247             'displayname' => {'*' => 'Open Library'},
248             },
249             'ngv' => {
250             'displayname' => {'*' => 'National Gallery of Victoria'},
251             },
252             'geonames' => {},
253             'find-a-grave' => {
254             'displayname' => {'*' => 'Find a Grave'},
255             },
256             'nla' => {
257             'displayname' => {'*' => 'National Library of Australia'},
258             },
259             'agsa' => {
260             'displayname' => {'*' => 'Art Gallery of South Australia'},
261             },
262             'amc' => {
263             'displayname' => {'*' => 'Australian Music Centre'},
264             },
265             'a-p-and-p' => {
266             'displayname' => {'*' => 'Australian Prints + Printmaking'},
267             },
268             'tww' => {
269             'displayname' => {'*' => 'The Watercolour World'},
270             },
271             'factgrid' => {
272             'displayname' => {'*' => 'FactGrid'},
273             },
274             'grove-art-online' => {
275             'displayname' => {'*' => 'Grove Art Online'},
276             },
277             'wikitree' => {
278             'displayname' => {'*' => 'WikiTree'},
279             },
280             'doi' => {
281             'displayname' => {'*' => 'doi.org'},
282             },
283             },
284             );
285              
286             sub _own_well_known {
287 11     11   17 state $res;
288              
289 11 100       73 return $res if defined $res;
290              
291             my %own_well_known = (
292             'wikidata-identifier' => {
293             Q2 => {
294             ids => {
295             'tagname' => 'Earth',
296             'aev-identifier' => 'scot/1917',
297             'factgrid-identifier' => 'Q176134',
298             'gnd-identifier' => '1135962553',
299             'viaf-identifier' => '6270149919445006650001',
300             'open-library-identifier' => 'earth_(planet)',
301             'unesco-thesaurus-identifier' => 'concept4083',
302             'geonames-identifier' => '6295630',
303             },
304             attributes => {
305             'displayname' => {'*' => 'Earth'},
306             'description' => {'*' => 'third planet from the sun in the solar system'},
307             },
308             },
309             Q405 => {attributes => {displayname => {'*' => 'Moon'}}},
310             Q6581072 => {attributes => {displayname => {'*' => 'female'}}},
311             Q6581097 => {attributes => {displayname => {'*' => 'male'}}}
312             },
313             'factgrid-identifier' => {
314             Q17 => {attributes => {displayname => {'*' => 'Female gender'}}},
315             Q18 => {attributes => {displayname => {'*' => 'Male gender'}}},
316             },
317             'media-subtype-identifier' => {
318 184         53004 (map {$_ => {
319             attributes => {
320             displayname => {'*' => $_},
321             },
322             ids => {
323             'uuid' => Data::URIID::Result->_media_subtype_to_uuid($_),
324             'media-subtype-identifier' => $_,
325             'tagname' => $_,
326             },
327             }}
328             # List copied from tags-universal:
329             qw(
330             application/gzip
331             application/http
332             application/json
333             application/ld+json
334             application/octet-stream
335             application/ogg
336             application/pdf
337             application/vnd.debian.binary-package
338             application/vnd.sirtx.vmv0
339             application/vnd.oasis.opendocument.base
340             application/vnd.oasis.opendocument.chart
341             application/vnd.oasis.opendocument.chart-template
342             application/vnd.oasis.opendocument.formula
343             application/vnd.oasis.opendocument.formula-template
344             application/vnd.oasis.opendocument.graphics
345             application/vnd.oasis.opendocument.graphics-template
346             application/vnd.oasis.opendocument.image
347             application/vnd.oasis.opendocument.image-template
348             application/vnd.oasis.opendocument.presentation
349             application/vnd.oasis.opendocument.presentation-template
350             application/vnd.oasis.opendocument.spreadsheet
351             application/vnd.oasis.opendocument.spreadsheet-template
352             application/vnd.oasis.opendocument.text
353             application/vnd.oasis.opendocument.text-master
354             application/vnd.oasis.opendocument.text-master-template
355             application/vnd.oasis.opendocument.text-template
356             application/vnd.oasis.opendocument.text-web
357             application/xhtml+xml
358             application/xml
359             audio/flac
360             audio/matroska
361             audio/ogg
362             image/gif
363             image/jpeg
364             image/png
365             image/svg+xml
366             image/webp
367             image/bmp
368             image/vnd.wap.wbmp
369             message/http
370             text/html
371             text/plain
372             video/matroska
373             video/matroska-3d
374             video/ogg
375             video/webm
376             )),
377             },
378             'language-tag-identifier' => {
379             en => {attributes => {displayname => {'*' => 'English'}}},
380             de => {attributes => {displayname => {'*' => 'German'}}},
381             nl => {attributes => {displayname => {'*' => 'Dutch'}}},
382             es => {attributes => {displayname => {'*' => 'Spanish'}}},
383             zh => {attributes => {displayname => {'*' => 'Chinese'}}},
384             af => {attributes => {displayname => {'*' => 'Afrikaans'}}},
385             ar => {attributes => {displayname => {'*' => 'Arabic'}}},
386             sw => {attributes => {displayname => {'*' => 'Swahili'}}},
387             },
388             'small-identifier' => {
389             map {$_->{sid} => {
390             ids => {
391             uuid => $_->{uuid},
392             },
393             attributes => {
394             displayname => {'*' => $_->{name}},
395             },
396 548         5397 }} (
397             {uuid => 'ddd60c5c-2934-404f-8f2d-fcb4da88b633', sid => 1, name => 'also-shares-identifier'},
398             {uuid => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', sid => 2, name => 'uuid'},
399             {uuid => 'bfae7574-3dae-425d-89b1-9c087c140c23', sid => 3, name => 'tagname'},
400             {uuid => '7f265548-81dc-4280-9550-1bd0aa4bf748', sid => 4, name => 'has-type'},
401             {uuid => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', sid => 5, name => 'uri'},
402             {uuid => 'd08dc905-bbf6-4183-b219-67723c3c8374', sid => 6, name => 'oid'},
403             # Unassigned: 7
404             {uuid => 'd0a4c6e2-ce2f-4d4c-b079-60065ac681f1', sid => 8, name => 'language-tag-identifier'},
405             {uuid => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', sid => 9, name => 'wikidata-identifier'},
406             {uuid => '923b43ae-a50e-4db3-8655-ed931d0dd6d4', sid => 10, name => 'specialises'},
407             {uuid => 'eacbf914-52cf-4192-a42c-8ecd27c85ee1', sid => 11, name => 'unicode-string'},
408             {uuid => '928d02b0-7143-4ec9-b5ac-9554f02d3fb1', sid => 12, name => 'integer'},
409             {uuid => 'dea3782c-6bcb-4ce9-8a39-f8dab399d75d', sid => 13, name => 'unsigned-integer'},
410             # Unassigned: 14 - 15
411             {uuid => '6ba648c2-3657-47c2-8541-9b73c3a9b2b4', sid => 16, name => 'default-context'},
412             {uuid => '52a516d0-25d8-47c7-a6ba-80983e576c54', sid => 17, name => 'proto-file'},
413             {uuid => '1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d', sid => 18, name => 'final-file-size'},
414             {uuid => '6085f87e-4797-4bb2-b23d-85ff7edc1da0', sid => 19, name => 'text-fragment'},
415             {uuid => '4c9656eb-c130-42b7-9348-a1fee3f42050', sid => 20, name => 'also-list-contains-also'},
416             {uuid => '298ef373-9731-491d-824d-b2836250e865', sid => 21, name => 'proto-message'},
417             {uuid => '7be4d8c7-6a75-44cc-94f7-c87433307b26', sid => 22, name => 'proto-entity'},
418             {uuid => '65bb36f2-b558-48af-8512-bca9150cca85', sid => 23, name => 'proxy-type'},
419             {uuid => 'a1c478b5-0a85-4b5b-96da-d250db14a67c', sid => 24, name => 'flagged-as'},
420             {uuid => '59cfe520-ba32-48cc-b654-74f7a05779db', sid => 25, name => 'marked-as'},
421             {uuid => '2bffc55d-7380-454e-bd53-c5acd525d692', sid => 26, name => 'roaraudio-error-number'},
422             {uuid => 'f87a38cb-fd13-4e15-866c-e49901adbec5', sid => 27, name => 'small-identifier'},
423             {uuid => 'd2750351-aed7-4ade-aa80-c32436cc6030', sid => 28, name => 'also-has-role'},
424             # Unassigned: 29 - 31
425             {uuid => '448c50a8-c847-4bc7-856e-0db5fea8f23b', sid => 32, name => 'final-file-encoding'},
426             {uuid => '79385945-0963-44aa-880a-bca4a42e9002', sid => 33, name => 'final-file-hash'},
427             {uuid => '3fde5688-6e34-45e9-8f33-68f079b152c8', sid => 34, name => 'SEEK_SET'},
428             {uuid => 'bc598c52-642e-465b-b079-e9253cd6f190', sid => 35, name => 'SEEK_CUR'},
429             {uuid => '06aff30f-70e8-48b4-8b20-9194d22fc460', sid => 36, name => 'SEEK_END'},
430             {uuid => '59a5691a-6a19-4051-bc26-8db82c019df3', sid => 37, name => 'inode'},
431             {uuid => 'ae8ec1de-38ec-4c58-bbd7-7ff43e1100fc', sid => 38, name => 'in-reply-to'},
432             {uuid => '8a31868b-0a26-42e0-ac54-819a9ed9dcab', sid => 39, name => 'in-response-to'},
433             {uuid => 'ffa893a2-9a0e-4013-96b4-307e2bca15b9', sid => 40, name => 'has-message-body'},
434             {uuid => 'b72508ba-7fb9-42ae-b4cf-b850b53a16c2', sid => 41, name => 'account'},
435             # Unassigned: 42
436             {uuid => '4e855294-4b4f-443e-b67b-8cb9d733a889', sid => 43, name => 'backwards'},
437             {uuid => '6ad2c921-7a3e-4859-ae02-98e42522e2f8', sid => 44, name => 'forwards'},
438             {uuid => '6d34d4a1-8fbc-4e22-b3e0-d50f43d97cb1', sid => 45, name => 'false'},
439             {uuid => 'eb50b3dc-28be-4cfc-a9ea-bd7cee73aed5', sid => 46, name => 'true'},
440             # Unassigned: 47
441             {uuid => 'dd8e13d3-4b0f-5698-9afa-acf037584b20', sid => 48, name => 'zero'},
442             {uuid => 'bd27669b-201e-51ed-9eb8-774ba7fef7ad', sid => 49, name => 'one'},
443             {uuid => '73415b5a-31fb-5b5a-bb82-8ea5eb3b12f7', sid => 50, name => 'two'},
444             # Unassigned: 51
445             {uuid => 'e425be57-58cb-43fb-ba85-c1a55a6a2ebd', sid => 52, name => 'ancestor-of'},
446             {uuid => 'cdee05f4-91ec-4809-a157-8c58dcb23715', sid => 53, name => 'descendant-of'},
447             {uuid => '26bda7b1-4069-4003-925c-2dbf47833a01', sid => 54, name => 'sibling-of'},
448             {uuid => 'a75f9010-9db3-4d78-bd78-0dd528d6b55d', sid => 55, name => 'see-also'},
449             {uuid => 'd1963bfc-0f79-4b1a-a95a-b05c07a63c2a', sid => 56, name => 'also-at'},
450             {uuid => 'c6e83600-fd96-4b71-b216-21f0c4d73ca6', sid => 57, name => 'also-shares-colour'},
451             {uuid => 'a942ba41-20e6-475e-a2c1-ce891f4ac920', sid => 58, name => 'also-identifies-as'},
452             {uuid => 'ac14b422-e7eb-4e5b-bccd-ad5a65aeab96', sid => 59, name => 'also-is-identified-as'},
453             {uuid => '3c2c155f-a4a0-49f3-bdaf-7f61d25c6b8c', sid => 60, name => 'Earth'},
454             {uuid => 'fade296d-c34f-4ded-abd5-d9adaf37c284', sid => 61, name => 'black'},
455             {uuid => '1a2c23fa-2321-47ce-bf4f-5f08934502de', sid => 62, name => 'white'},
456             {uuid => 'f9bb5cd8-d8e6-4f29-805f-cc6f2b74802d', sid => 63, name => 'grey'},
457             {uuid => 'dd708015-0fdd-4543-9751-7da42d19bc6a', sid => 64, name => 'Sun'},
458             {uuid => '23026974-b92f-4820-80f6-c12f4dd22fca', sid => 65, name => 'Luna'},
459             # Unassigned: 66 - 69
460             {uuid => 'c50134ca-0a32-5c5c-833c-2686043c0b3f', sid => 70, name => 'English'},
461             {uuid => '6895ad9b-2ba6-5933-8455-968aa781a88b', sid => 71, name => 'German'},
462             {uuid => 'da816af7-e49b-5406-b712-8dc96d968541', sid => 72, name => 'Dutch'},
463             {uuid => '52b75ef6-f7fd-5786-8512-0e6cb8374675', sid => 73, name => 'Spanish'},
464             {uuid => 'a27015a5-e6f1-5d38-b00e-a65f7ddd39a3', sid => 74, name => 'Chinese'},
465             {uuid => 'd642eff3-bee6-5d09-aea9-7c47b181dd83', sid => 75, name => 'male'},
466             {uuid => 'db9b0db1-a451-59e8-aa3b-9994e683ded3', sid => 76, name => 'female'},
467             {uuid => 'f6249973-59a9-47e2-8314-f7cf9a5f77bf', sid => 77, name => 'person'},
468             {uuid => '5501e545-f39a-4d62-9f65-792af6b0ccba', sid => 78, name => 'body'},
469             {uuid => 'a331f2c5-20e5-4aa2-b277-8e63fd03438d', sid => 79, name => 'character'},
470             {uuid => '838eede5-3f93-46a9-8e10-75165d10caa1', sid => 80, name => 'cat'},
471             {uuid => '252314f9-1467-48bf-80fd-f8b74036189f', sid => 81, name => 'dog'},
472             {uuid => '571fe2aa-95f6-4b16-a8d2-1ff4f78bdad1', sid => 82, name => 'lion'},
473             {uuid => '36297a27-0673-44ad-b2d8-0e4e97a9022d', sid => 83, name => 'tiger'},
474             {uuid => '5d006ca0-c27b-4529-b051-ac39c784d5ee', sid => 84, name => 'fox'},
475             {uuid => '914b3a09-4e01-4afc-a065-513c199b6c24', sid => 85, name => 'squirrel'},
476             {uuid => '95f1b56e-c576-4f32-ac9b-bfdd397c36a6', sid => 86, name => 'wolf'},
477             {uuid => 'dcf8f4f0-c15e-44bd-ad76-0d483079db16', sid => 87, name => 'human'},
478             # Unassigned: 88
479             {uuid => 'f901e5e0-e217-41c8-b752-f7287af6e6c3', sid => 89, name => 'mammal'},
480             {uuid => '7ed4160e-06d6-44a2-afe8-457e2228304d', sid => 90, name => 'vertebrate'},
481             {uuid => '0510390c-9604-4362-b603-ea09e48de7b7', sid => 91, name => 'animal'},
482             {uuid => 'bccdaf71-0c82-422e-af44-bb8396bf90ed', sid => 92, name => 'plant'},
483             {uuid => 'a0b8122e-d11b-4b78-a266-0bb90d1c1cbe', sid => 93, name => 'fungus'},
484             {uuid => '3e92ac2d-f8fe-48bf-acd7-8505d23d07ab', sid => 94, name => 'organism'},
485             {uuid => '115c1bcf-02cd-4a57-bd02-1d9f1ea8dd01', sid => 95, name => 'any-taxon'},
486             {uuid => 'd2526d8b-25fa-4584-806b-67277c01c0db', sid => 96, name => 'inode-number'},
487             {uuid => 'cd5bfb11-620b-4cce-92bd-85b7d010f070', sid => 97, name => 'also-on-filesystem'},
488             {uuid => '63c1da19-0dd6-4181-b3fa-742b9ceb2903', sid => 98, name => 'filesystem'},
489             {uuid => '5ecb4562-dad7-431d-94a6-d301dcea8d37', sid => 99, name => 'parent'},
490             {uuid => '1a9215b2-ad06-4f4f-a1e7-4cbb908f7c7c', sid => 100, name => 'child'},
491             {uuid => 'a7cfbcb0-45e2-46b9-8f60-646ab2c18b0b', sid => 101, name => 'displaycolour'},
492             # Unassigned: 102
493             {uuid => 'd926eb95-6984-415f-8892-233c13491931', sid => 103, name => 'tag-links'},
494             {uuid => '2c07ddc1-bdb8-435a-9614-4e6782a5101f', sid => 104, name => 'tag-linked-by'},
495             {uuid => '4efce01d-411e-5e9c-9ed9-640ecde31d1d', sid => 105, name => 'parallel'},
496             {uuid => '9aad6c99-67cd-45fd-a8a6-760d863ce9b5', sid => 106, name => 'also-where'},
497             {uuid => '8efbc13b-47e5-4d92-a960-bd9a2efa9ccb', sid => 107, name => 'generated-by'},
498             # Unassigned: 108
499             {uuid => '83e3acbb-eb8d-4dfb-8f2f-ae81cc436d4b', sid => 109, name => 'batch'},
500             {uuid => 'b17f36c6-c397-4e84-bd32-1eccb3f00671', sid => 110, name => 'set'},
501             {uuid => 'aa9d311a-89b7-44cc-a356-c3fc93dfa951', sid => 111, name => 'category'},
502             {uuid => '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a', sid => 112, name => 'chat-0-word-identifier'},
503             {uuid => '039e0bb7-5dd3-40ee-a98c-596ff6cce405', sid => 113, name => 'sirtx-numerical-identifier'},
504             # Unassigned: 114 - 118
505             {uuid => 'c9ec3bea-558e-4992-9b76-91f128b6cf29', sid => 119, name => 'red'},
506             {uuid => 'c0e957d0-b5cf-4e53-8e8a-ff0f5f2f3f03', sid => 120, name => 'green'},
507             {uuid => '3dcef9a3-2ecc-482d-a98b-afffbc2f64b9', sid => 121, name => 'blue'},
508             {uuid => 'abcbf48d-c302-4be1-8c5c-a8de4471bcbb', sid => 122, name => 'cyan'},
509             {uuid => 'a30d070d-9909-40d4-a33a-474c89e5cd45', sid => 123, name => 'magenta'},
510             {uuid => '2892c143-2ae7-48f1-95f4-279e059e7fc3', sid => 124, name => 'yellow'},
511             {uuid => '5c41829f-5062-4868-9c31-2ec98414c53d', sid => 125, name => 'orange'},
512             {uuid => 'c90acb33-b8ea-4f55-bd86-beb7fa5cf80a', sid => 126, name => 'savannah'},
513             # Unassigned: 127 - 131
514             {uuid => 'caf11e36-d401-4521-8f10-f6b36125415c', sid => 132, name => 'icon'},
515             {uuid => 'e7330249-53b8-4dab-aa43-b5bfa331a8e5', sid => 133, name => 'thumbnail'},
516             {uuid => '2ec4a6b0-e6bf-40cd-96a2-490cbc8d6c4b', sid => 134, name => 'empty-set'},
517             # Unassigned: 135 - 143
518             {uuid => 'be6d8e00-a6c1-5c44-8ffc-f7393e14aa23', sid => 144, name => 'three'},
519             {uuid => '79422b2c-b6f6-547f-949f-0cba44fa69b7', sid => 145, name => 'four'},
520             # Unassigned: 146 - 158
521             {uuid => '7cb67873-33bc-4a93-b53f-072ce96c6f1a', sid => 159, name => 'hrair'},
522             {uuid => '82d529be-0f00-4b4f-a43f-4a22de5f5312', sid => 160, name => 'gtin'},
523             {uuid => 'e8c156be-4fe7-4b13-b4fa-e207213caef8', sid => 161, name => 'subject-type'},
524             {uuid => '931f155e-5a24-499b-9fbb-ed4efefe27fe', sid => 162, name => 'doi'},
525             # Unassigned: 163 - 175
526             {uuid => 'c44ee482-0fb7-421b-9aad-a6c8f099a4b6', sid => 176, name => 'Universe'},
527             {uuid => '0ac40a25-d20f-42ed-ae1c-64e62a56d673', sid => 177, name => 'Observable universe'},
528             # Unassigned: 178 - 188
529             {uuid => '8a1cb2d6-df2f-46db-89c3-a75168adebf6', sid => 189, name => 'generator'},
530             {uuid => '3c9f40b4-2b98-44ce-b4dc-97649eb528ae', sid => 190, name => 'using-namespace'},
531             {uuid => 'bc2d2e7c-8aa4-420e-ac07-59c422034de9', sid => 191, name => 'for-type'},
532             {uuid => '5cbdbe1c-e8b6-4cac-b274-b066a7f86b28', sid => 192, name => 'left'},
533             {uuid => '3b1858a9-996b-4831-b600-eb55ab7bb0d1', sid => 193, name => 'right'},
534             {uuid => 'f158e457-9a75-42ac-b864-914b34e813c7', sid => 194, name => 'up'},
535             {uuid => '4c834505-8e77-4da6-b725-e11b6572d979', sid => 195, name => 'down'},
536             # Unassigned: 196 - 207
537             {uuid => 'fd324dee-4bc7-4716-bf0c-6d50a69961b7', sid => 208, name => 'north'},
538             {uuid => '8685e1d8-f313-403a-9f4d-48fce22f9312', sid => 209, name => 'east'},
539             {uuid => 'c65c5baf-630e-4a28-ace5-1082b032dd07', sid => 210, name => 'south'},
540             {uuid => '7ed25dc4-5afc-4b39-8446-4df7748040a4', sid => 211, name => 'west'},
541             {uuid => '7ce365d8-71d2-4bd6-95c9-888a8f1d834c', sid => 212, name => 'northeast'},
542             {uuid => '39be7db6-1dc7-41c3-acd2-de19ad17a97f', sid => 213, name => 'northwest'},
543             {uuid => '33233365-20ec-4073-9962-0cb4b1b1e48d', sid => 214, name => 'southeast'},
544             {uuid => 'b47ecfde-02b1-4790-85dd-c2e848c89d2e', sid => 215, name => 'southwest'},
545             # Unassigned: 216 - 223
546             {uuid => '4076d9f9-ca42-5976-b41b-e54aa912ccf3', sid => 224, name => 'application/octet-stream'},
547             {uuid => '552ec0dc-8678-5657-9422-8a71ea8e5cd0', sid => 225, name => 'text/plain'},
548             {uuid => 'ecd556c0-7ecb-5b88-ab0a-ec4e09d61782', sid => 226, name => 'text/html'},
549             {uuid => '7c859f1d-693b-5070-a928-dfd051a4f93d', sid => 227, name => 'image/png'},
550             {uuid => '3970f481-591e-530a-b962-a2e87b2efde2', sid => 228, name => 'image/svg+xml'},
551             {uuid => '03e6c035-e046-5b7e-a016-55b51c4836ea', sid => 229, name => 'application/pdf'},
552             ),
553             },
554             'sirtx-numerical-identifier' => {
555             map {$_->{sni} => {
556             ids => {
557             uuid => $_->{uuid},
558             },
559             attributes => {
560             displayname => {'*' => $_->{name}},
561             },
562 84         483 }} (
563             {uuid => '039e0bb7-5dd3-40ee-a98c-596ff6cce405', sni => 10, name => 'sirtx-numerical-identifier'},
564             {uuid => 'f87a38cb-fd13-4e15-866c-e49901adbec5', sni => 115, name => 'small-identifier'},
565             {uuid => '2bffc55d-7380-454e-bd53-c5acd525d692', sni => 116, name => 'roaraudio-error-number'},
566             {uuid => '66beb503-9159-41cb-9e7f-2c3eb6b4b5ff', sni => 117, name => 'roaraudio-error-symbol'},
567             {uuid => '2c7e15ed-aa2f-4e2f-9a1d-64df0c85875a', sni => 118, name => 'chat-0-word-identifier'},
568             {uuid => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31', sni => 119, name => 'uuid'},
569             {uuid => 'd08dc905-bbf6-4183-b219-67723c3c8374', sni => 120, name => 'oid'},
570             {uuid => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439', sni => 121, name => 'uri'},
571             {uuid => 'f4b073ff-0b53-4034-b4e4-4affe5caf72c', sni => 122, name => 'ascii-code-point'},
572             {uuid => 'ce7aae1e-a210-4214-926a-0ebca56d77e3', sni => 123, name => 'wikidata-identifier'},
573             {uuid => 'd73b6550-5309-46ad-acc9-865c9261065b', sni => 127, name => 'sirtx-function-number'},
574             {uuid => 'd690772e-de18-4714-aa4e-73fd35e8efc9', sni => 128, name => 'sirtx-function-name'},
575             {uuid => '5e80c7b7-215e-4154-b310-a5387045c336', sni => 129, name => 'sirtx-logical'},
576             {uuid => 'e54d427b-e18f-5d43-ac38-da26173633a0', sni => 185, name => 'sha-1-160'},
577             {uuid => 'ed34c363-6fd3-5b68-9520-986e773e27c0', sni => 186, name => 'sha-3-512'},
578             {uuid => '6d34d4a1-8fbc-4e22-b3e0-d50f43d97cb1', sni => 189, name => 'false'},
579             {uuid => 'eb50b3dc-28be-4cfc-a9ea-bd7cee73aed5', sni => 190, name => 'true'},
580             {uuid => '4076d9f9-ca42-5976-b41b-e54aa912ccf3', sni => 197, name => 'application/octet-stream'},
581             {uuid => 'f718f85b-6b41-53c0-9c66-8796df90c725', sni => 198, name => 'application/vnd.sirtx.vmv0'},
582             {uuid => 'ba4e7f37-467c-5a36-910c-b32974642fa8', sni => 199, name => 'image/vnd.wap.wbmp'},
583             {uuid => '7e7750e4-ab50-50ba-8c9c-b3158e1f47e0', sni => 209, name => 'image/bmp'},
584             ),
585             },
586             'uuid' => {
587 4         143 map {$_->{uuid} => {
588             attributes => {
589             displayname => {'*' => $_->{name}},
590             },
591 4         84 }} (
592             {uuid => '878aac4c-581b-4257-998c-19a9c4003d22', name => 'colour'},
593             ),
594             },
595             );
596              
597 4         353 foreach my $id (keys %{$own_well_known{'wikidata-identifier'}}) {
  4         33  
598 16         67 my $uuid = Data::Identifier->new(wd => $id)->uuid;
599 16         3585 $own_well_known{uuid}{$uuid} = $own_well_known{'wikidata-identifier'}{$id};
600             }
601              
602 4         175 my @colours = (
603             # Abstract colours:
604             [black => 'fade296d-c34f-4ded-abd5-d9adaf37c284' => '#000000'],
605             [white => '1a2c23fa-2321-47ce-bf4f-5f08934502de' => '#ffffff'],
606             [red => 'c9ec3bea-558e-4992-9b76-91f128b6cf29' => '#ff0000'],
607             [green => 'c0e957d0-b5cf-4e53-8e8a-ff0f5f2f3f03' => '#008000'],
608             [blue => '3dcef9a3-2ecc-482d-a98b-afffbc2f64b9' => '#0000ff'],
609             [cyan => 'abcbf48d-c302-4be1-8c5c-a8de4471bcbb' => '#00ffff'],
610             [magenta => 'a30d070d-9909-40d4-a33a-474c89e5cd45' => '#ff00ff'],
611             [yellow => '2892c143-2ae7-48f1-95f4-279e059e7fc3' => '#ffff00'],
612             [grey => 'f9bb5cd8-d8e6-4f29-805f-cc6f2b74802d' => '#808080'],
613             [orange => '5c41829f-5062-4868-9c31-2ec98414c53d' => '#ff8000'],
614             [savannah => 'c90acb33-b8ea-4f55-bd86-beb7fa5cf80a' => '#decc9c'],
615              
616             # VGA colours:
617             [black => '32f5e924-0ddb-4427-ad81-2d099b590c68' => '#000000'],
618             [maroon => '2aeedebd-2814-41b3-9cfd-f992e9a60827' => '#800000'],
619             [green => 'd045d86c-3437-4b42-aa77-2d7ac6ff1656' => '#008000'],
620             [olive => 'a64b447b-3eb3-4a71-92fe-f4399e845892' => '#808000'],
621             [navy => 'f8ace5ee-45a9-4e46-8324-095b6ab452b5' => '#000080'],
622             [purple => '7cd1228f-b55b-4b86-a057-f620e7934f7f' => '#800080'],
623             [teal => 'c7d4cc0e-dd3b-465c-b1ed-6fea3d424b9f' => '#008080'],
624             [gray => 'aa82b49e-12c2-41a4-9fd8-800957be9161' => '#808080'],
625             [silver => 'cdb01cbf-0eca-4aad-b732-caf55abc7566' => '#C0C0C0'],
626             [red => '6d62509a-aac5-412b-953b-e002867090ef' => '#FF0000'],
627             [lime => '18b0ad77-95a1-4ddc-8a3e-52fb1fca2ead' => '#00FF00'],
628             [yellow => 'b85fca40-ab8e-4ab3-b582-43cb0979b994' => '#FFFF00'],
629             [blue => '7f2e2d6a-ec70-417b-8418-a5d67c05b7e0' => '#0000FF'],
630             [fuchsia => '465087e0-a8d0-4a42-8f05-a1aea0d53385' => '#FF00FF'],
631             [aqua => '4feff8a2-dbe4-447b-b052-db333b9ebee3' => '#00FFFF'],
632             [white => 'a671d5f4-5a1d-498d-b3ec-52b92f15218e' => '#FFFFFF'],
633             );
634 4         69 my $colour_roles = {'*' => [[URI->new('urn:uuid:878aac4c-581b-4257-998c-19a9c4003d22')]]};
635 4         11506 my @displaycolours;
636              
637 4         17 foreach my $list (\@colours, \@displaycolours) {
638 8         18 foreach my $colour (@{$list}) {
  8         22  
639 216         331 my ($name, $uuid, $displaycolour) = @{$colour};
  216         589  
640 216   100     1443 my $e = ($own_well_known{uuid}{$uuid} //= {})->{attributes} //= {};
      100        
641 216         730 my $colour_object = Data::URIID::Colour->new(rgb => $displaycolour, register => 1);
642              
643 216 100       605 if (defined $name) {
644 108   50     670 $e->{displayname} //= {};
645 108   33     463 $e->{displayname}{'*'} //= $name;
646             }
647              
648 216   100     909 $e->{displaycolour} //= {};
649 216   66     748 $e->{displaycolour}{'*'} //= $colour_object;
650 216         366 $e->{roles} = $colour_roles;
651              
652 216 100       723 if ($list != \@displaycolours) {
653 108         366 push(@displaycolours, [undef, $colour_object->ise, $displaycolour]);
654             }
655             }
656             }
657              
658             # Add an entry for each colour used.
659 4         30 foreach my $type (keys %own_well_known) {
660 28         38 foreach my $entry (values %{$own_well_known{$type}}) {
  28         149  
661 1072   100     2586 my $dpca = $entry->{attributes}{displaycolour} // next;
662 180   50     333 my $displaycolour = $dpca->{'*'} // next;
663 180   50     390 my $e = ($own_well_known{uuid}{$displaycolour->ise} //= {})->{attributes} //= {};
      50        
664 180   50     374 $e->{displaycolour} //= {};
665 180   33     409 $e->{displaycolour}{'*'} //= $displaycolour;
666             }
667             }
668              
669 4         16 foreach my $language (keys %{$own_well_known{'language-tag-identifier'}}) {
  4         26  
670 32         125 my $uuid = Data::Identifier::Generate->generic(namespace => '47dd950c-9089-4956-87c1-54c122533219', style => 'id-based', request => $language)->uuid;
671 32         6683 $own_well_known{uuid}{$uuid} = $own_well_known{'language-tag-identifier'}{$language};
672             }
673             # Mix and match entries by identifiers to speed up lookups.
674             # This step must always be the last one.
675 4         53 foreach my $id_type_outer (keys %own_well_known) {
676 28         35 foreach my $src_id (keys %{$own_well_known{$id_type_outer}}) {
  28         381  
677 1295         2030 my $src = $own_well_known{$id_type_outer}{$src_id};
678 1295   100     2966 my $s_ids = $src->{ids} //= {};
679 1295   50     2386 my $s_attrs = $src->{attributes} //= {};
680              
681 1295         2145 $s_ids->{$id_type_outer} = $src_id;
682              
683 1295         1539 foreach my $id_type_inner (keys %{$s_ids}) {
  1295         2706  
684 2679   100     8108 my $dst = ($own_well_known{$id_type_inner} //= {})->{$s_ids->{$id_type_inner}} //= {};
      100        
685 2679 100       5610 if ($src != $dst) {
686 1037   100     2903 my $d_ids = $dst->{ids} //= {};
687 1037   100     2894 my $d_attrs = $dst->{attributes} //= {};
688              
689 1037   66     1423 $s_ids->{$_} //= $d_ids->{$_} foreach keys %{$d_ids};
  1037         2392  
690 1037   66     1302 $s_attrs->{$_} //= $d_attrs->{$_} foreach keys %{$d_attrs};
  1037         2483  
691 1037         3186 $own_well_known{$id_type_inner}{$s_ids->{$id_type_inner}} = $src;
692             }
693             }
694             }
695             }
696              
697 4         196 return $res = \%own_well_known;
698             }
699              
700              
701             # Private method:
702             sub new {
703 13     13 0 67 my ($pkg, %opts) = @_;
704 13         47 weaken($opts{extractor});
705 13         85 return bless \%opts, $pkg;
706             }
707              
708             # Private helper:
709             sub _is_online {
710 0     0   0 my ($self) = @_;
711 0   0     0 return $self->online && $self->extractor->online;
712             }
713              
714             # Private method:
715             sub _online_lookup {
716 0     0   0 my ($self, $result, %opts) = @_;
717 0         0 my $func;
718              
719 0 0       0 return undef unless $self->_is_online;
720 0         0 $func = $self->can(sprintf('_online_lookup__%s', $self->name =~ tr/\.:\-/_/r));
721 0 0       0 return undef unless $func;
722              
723 0         0 return $self->$func($result, %opts);
724             }
725              
726             # Private method:
727             sub _offline_lookup {
728 26     26   76 my ($self, $result, %opts) = @_;
729 26         46 my $func;
730              
731 26         69 $func = $self->can(sprintf('_offline_lookup__%s', $self->name =~ tr/\.:\-/_/r));
732 26 100       154 return undef unless $func;
733              
734 12         48 return $self->$func($result, %opts);
735             }
736              
737              
738             sub name {
739 26     26 1 82 my ($self) = @_;
740 26   66     175 return $self->{name} //= $self->extractor->ise_to_name(service => $self->ise);
741             }
742              
743              
744             sub online {
745 0     0 1 0 my ($self, $new_value) = @_;
746              
747 0 0       0 if (scalar(@_) == 2) {
748 0         0 $self->{online} = !!$new_value;
749             }
750              
751 0         0 return $self->{online};
752             }
753              
754              
755             sub setting {
756 0     0 1 0 my ($self, $setting, $new_value) = @_;
757              
758 0   0     0 $self->{setting} //= {};
759              
760 0 0       0 if (scalar(@_) == 3) {
761 0         0 $self->{setting}{$setting} = $new_value;
762             }
763              
764 0         0 return $self->{setting}{$setting};
765             }
766              
767              
768              
769             sub register_service {
770 0     0 1 0 my ($pkg, @args) = @_;
771 0         0 my ($name, $id) = Data::URIID->_register_service(@args);
772              
773 0   0     0 $own_metadata{services}{$name} //= {};
774              
775 0 0       0 if (defined(my $displayname = $id->displayname(default => undef, no_defaults => 1))) {
776 0         0 $own_metadata{services}{$name}{displayname} = {'*' => $displayname};
777             }
778             }
779              
780              
781             # Private helper:
782             sub _extra_lookup_services {
783             return {
784 4         94 'wikidata' => [values(%{$config_wikidata->{idmap}}), qw(wikidata-identifier british-museum-term uuid)],
785             'wikimedia-commons' => [qw(wikimedia-commons-identifier)],
786             'fellig' => \@fellig_types,
787             'noembed.com' => [qw(youtube-video-identifier)],
788             'osm' => [qw(osm-node osm-way osm-relation)],
789             'overpass' => [qw(wikidata-identifier)],
790             'Data::URIID' => [
791             qw(uuid oid uri), # ISE,
792 4         19 keys %{_own_well_known()},
793             ],
794             'Data::Identifier' => [
795             qw(uuid oid uri), # ISE,
796             qw(e621-post-identifier e621-pool-identifier e621tagtype e621tag), # e621
797             qw(danbooru2chanjp-post-identifier danbooru2chanjp-tag), # danbooru2chanjp
798 4         41 keys %{_own_well_known()},
799             ],
800 4     4   12 'factgrid' => [values(%{$config_factgrid->{idmap}}), qw(factgrid-identifier)],
  4         209  
801             'doi' => [qw(doi)],
802             'iconclass' => ['iconclass-identifier'],
803             'xkcd' => ['xkcd-num'],
804             'e621' => ['e621-post-identifier', 'e621-pool-identifier'],
805             'furaffinity' => ['furaffinity-post-identifier'],
806             'imgur' => ['imgur-post-identifier'],
807             'notalwaysright' => ['notalwaysright-post-identifier'],
808             'ruthede' => ['ruthede-comic-post-identifier'],
809             'danbooru2chanjp' => ['danbooru2chanjp-post-identifier'],
810             }
811             }
812              
813             sub _extra_lookup_services_digests {
814             return {
815 4     4   24 'e621' => ['md-5-128'],
816             };
817             }
818              
819             # Private helper:
820             sub _get_html {
821 0     0   0 my ($self, $url, %opts) = @_;
822              
823 0 0       0 if ($self->setting('network_deny')) {
824 0         0 return undef;
825             }
826              
827 0 0       0 if (eval {require HTML::TreeBuilder::XPath; 1;}) {
  0         0  
  0         0  
828 0         0 my Data::URIID $extractor = $self->extractor;
829              
830 0 0       0 if (defined(my $query = $opts{query})) {
831 0 0       0 $url = ref($url) ? $url->clone : URI->new($url);
832 0         0 $url->query_form($url->query_form, %{$query});
  0         0  
833             }
834              
835             # We cannot use decoded_content()'s charset decoding here as it's buggy for JSON (and others?) response (at least in v6.18).
836 0         0 return eval {
837 0         0 my $msg = $extractor->_ua->get($url, 'Accept' => 'text/html');
838 0 0       0 return undef unless $msg->is_success;
839 0         0 my $val = $msg->decoded_content(ref => 1, charset => 'none');
840 0         0 my $r = HTML::TreeBuilder::XPath->new;
841 0         0 $r->parse(decode($msg->content_charset, $$val));
842 0         0 $r->eof;
843 0         0 $r;
844             };
845             } else {
846 0         0 return undef;
847             }
848             }
849              
850             # Private helper:
851             sub _get_json {
852 0     0   0 my ($self, $url, %opts) = @_;
853 0         0 my Data::URIID $extractor = $self->extractor;
854              
855 0 0       0 if ( $self->setting('network_deny') ) {
856 0         0 return undef;
857             }
858              
859 0 0       0 if (defined(my $local_override = $opts{local_override})) {
860 0 0       0 if (defined(my $local_override_dir = $self->setting('local_override_dir'))) {
861 0         0 my ($path, @args) = @{$local_override};
  0         0  
862              
863 0 0   0   0 if (all { defined } @args) {
  0         0  
864 0         0 my $data;
865              
866 0         0 $path =~ s/%s/uri_escape_utf8(shift(@args))/ge;
  0         0  
867 0         0 $path = $local_override_dir.'/'.$path;
868              
869 0         0 $data = $self->_get_json_file($path);
870 0 0       0 return $data if defined $data;
871             }
872             }
873             }
874              
875 0 0       0 if (defined(my $query = $opts{query})) {
876 0 0       0 $url = ref($url) ? $url->clone : URI->new($url);
877 0         0 $url->query_form($url->query_form, %{$query});
  0         0  
878             }
879              
880             # We cannot use decoded_content()'s charset decoding here as it's buggy for JSON response (at least in v6.18).
881 0         0 return eval {
882 0         0 my $msg = $extractor->_ua->get($url, 'Accept' => 'application/json');
883 0 0       0 return undef unless $msg->is_success;
884 0         0 my $val = $msg->decoded_content(ref => 1, charset => 'none');
885 0         0 from_json(decode($msg->content_charset, $$val));
886             };
887             }
888              
889             # Private helper:
890             sub _get_json_file {
891 0     0   0 my ($self, $filename) = @_;
892 0 0 0     0 my $fh = eval { open(my $fh, '<', $filename) or die $!; $fh; } // eval { open(my $fh, '<:gzip', $filename.'.gz') or die $!; $fh; };
  0 0       0  
  0         0  
  0         0  
  0         0  
893              
894 0 0       0 return undef unless $fh;
895              
896 0         0 return eval {
897 0         0 local $/ = undef;
898 0         0 from_json(scalar <$fh>);
899             };
900             }
901              
902             # Private helper:
903             sub _load_open_graph {
904 0     0   0 my ($self, $res, $html, $keys, $filters) = @_;
905 0   0     0 my $attr = $res->{attributes} //= {};
906 0         0 my %raw = map {$_->attr('property') => $_->attr('content')} $html->findnodes('/html/head/meta[@property]');
  0         0  
907              
908 0   0     0 $filters //= {};
909              
910 0         0 foreach my $key (@{$keys}) {
  0         0  
911 0   0     0 my $attrname = $attrmap_open_graph{$key} // croak 'BUG: Unknown key name: '.$key;
912 0         0 my $filter = $filters->{$key};
913              
914 0 0       0 if (defined(my $value = $raw{'og:'.$key})) {
915 0 0       0 if (length($value)) {
916 0 0       0 if (defined $filter) {
917 0 0       0 next unless $value =~ $filter;
918             }
919              
920 0   0     0 $attr->{$attrname} //= {};
921 0   0     0 $attr->{$attrname}{'*'} //= $value;
922             }
923             }
924             }
925             }
926              
927             # Private helper:
928             sub _get_uriid_decompiled_types_json {
929 0     0   0 my ($self) = @_;
930 0         0 state $json = {types => {
931             'oid' => {alias_for => 'd08dc905-bbf6-4183-b219-67723c3c8374'},
932             'uri' => {alias_for => 'a8d1637d-af19-49e9-9ef8-6bc1fbcf6439'},
933             'uuid' => {alias_for => '8be115d2-dc2f-4a98-91e1-a6e3075cbc31'},
934             'wikidata-identifier' => {alias_for => 'ce7aae1e-a210-4214-926a-0ebca56d77e3'},
935             'gtin' => {alias_for => '82d529be-0f00-4b4f-a43f-4a22de5f5312'},
936             'sid' => {alias_for => 'f87a38cb-fd13-4e15-866c-e49901adbec5'},
937             }};
938 0         0 return state $decompiled = do {{
939             forward => $json,
940 0         0 backward => {map {$json->{types}{$_}{alias_for} => $_} grep {defined $json->{types}{$_}{alias_for}} keys %{$json->{types}}},
  0         0  
  0         0  
  0         0  
941             }};
942             }
943              
944             # Private lookup drivers:
945             sub _offline_lookup__Data__URIID {
946 6     6   16 my ($self, $result) = @_;
947 6         22 my Data::URIID $extractor = $self->extractor;
948 6   50     26 my $ise_order = $result->{primary}{ise_order} // [qw(uuid oid uri)];
949 6         21 state $own_well_known = _own_well_known();
950 6         22 my %attr;
951             my %ids;
952 6         0 my @found;
953              
954             outer:
955 6         11 foreach my $ise_type (@{$ise_order}) {
  6         16  
956 18   50     53 my $ise = eval {$result->id($ise_type)} // next;
  18         103  
957 18         47 foreach my $type (qw(service type action)) {
958 54   50     130 my $name = eval { $extractor->ise_to_name($type => $ise) } // next;
  54         203  
959 0   0     0 my $displayname = $attr{displayname} //= {};
960 0   0     0 $displayname->{'*'} //= $name;
961 0         0 push(@found, {attributes => $own_metadata{$type}{$name}});
962 0         0 last outer;
963             }
964             }
965              
966 6         22 foreach my $id_type (keys %{$own_well_known}) {
  6         60  
967 84         156 my $id = eval {$result->id($id_type, _no_convert => 1)};
  84         280  
968 84 100       379 if (defined $id) {
969 8 50       46 if (defined(my $entry = $own_well_known->{$id_type}{$id})) {
970 0         0 push(@found, $entry);
971             }
972             }
973             }
974              
975 6         30 foreach my $found (@found) {
976 0         0 my $attributes = $found->{attributes};
977 0         0 my $ids = $found->{ids};
978              
979 0   0     0 foreach my $attr (keys %{$attributes//{}}) {
  0         0  
980 0   0     0 $attr{$attr} //= {};
981 0         0 foreach my $key (keys %{$attributes->{$attr}}) {
  0         0  
982 0         0 $attr{$attr}{$key} = $attributes->{$attr}{$key};
983             }
984             }
985              
986 0   0     0 foreach my $id_type (keys %{$ids//{}}) {
  0         0  
987 0   0     0 $ids{$id_type} //= $ids->{$id_type};
988             }
989             }
990              
991             {
992 6         12 my %res;
  6         8  
993 6 50       25 $res{id} = \%ids if scalar keys %ids;
994 6 50       19 $res{attributes} = \%attr if scalar keys %attr;
995 6         68 return \%res;
996             }
997             }
998              
999             sub _offline_lookup__Data__Identifier {
1000 6     6   16 my ($self, $result) = @_;
1001 6   50     28 my $ise_order = $result->{primary}{ise_order} // [qw(uuid oid uri)];
1002 6         15 my %attr;
1003             my %ids;
1004              
1005 6         11 eval { $result->id('uri') }; # prefil cache. See RT#157959
  6         44  
1006              
1007 6         18 foreach my $id (
1008             map {
1009 24         8936 eval {$result->id($_, as => 'Data::Identifier', _no_convert => 1)}
  24         90  
1010             } (
1011             $result->{primary}{type},
1012 6         19 @{$ise_order},
1013             )) {
1014              
1015 20 50       2710 next unless defined $id;
1016              
1017 20 50       73 if (defined(my $displayname = $id->displayname(default => undef, no_defaults => 1))) {
1018 0   0     0 $attr{displayname} //= {'*' => $displayname};
1019             }
1020              
1021 20         259 foreach my $type (qw(uuid oid uri sid)) {
1022 80         264 my $func = $id->can($type);
1023              
1024 80 100       222 if (defined(my $value = $id->$func(default => undef))) {
1025 60 50 66     19679 $ids{$type eq 'sid' ? 'small-identifier' : $type} //= $value;
1026             }
1027             }
1028             }
1029              
1030             {
1031 6         115 my %res;
  6         16  
1032 6 50       33 $res{id} = \%ids if scalar keys %ids;
1033 6 50       39 $res{attributes} = \%attr if scalar keys %attr;
1034 6         54 return \%res;
1035             }
1036             }
1037              
1038             sub _online_lookup__wikidata {
1039 0     0     my ($self, $result) = @_;
1040 0           return _online_lookup__wikibase($self, $result, $config_wikidata);
1041             }
1042              
1043             sub _online_lookup__factgrid {
1044 0     0     my ($self, $result) = @_;
1045 0           return _online_lookup__wikibase($self, $result, $config_factgrid);
1046             }
1047              
1048             sub _online_lookup__wikibase {
1049 0     0     my ($self, $result, $config) = @_;
1050 0           my $id = eval {$result->id($config->{type})};
  0            
1051              
1052 0 0         unless (defined $id) {
1053 0           $id = $self->_online_lookup__wikibase__stage_0($result, $config);
1054             }
1055              
1056 0 0         if (defined $id) {
1057 0           return $self->_online_lookup__wikibase__stage_1($result, $id, $config);
1058             }
1059              
1060 0           return undef;
1061             }
1062              
1063             sub _online_lookup__wikibase__stage_0 {
1064 0     0     my ($self, $result, $config) = @_;
1065 0           my @ids;
1066              
1067 0           foreach my $property (keys %{$config->{idmap}}) {
  0            
1068 0           my $id = eval {$result->id($config->{idmap}{$property})};
  0            
1069 0 0         if (defined $id) {
1070 0 0         if ($id !~ /['"]/) {
1071 0           push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
1072             }
1073             }
1074             }
1075              
1076 0           foreach my $special (@{$config->{special_ids}}) {
  0            
1077 0           my $id = eval {$result->id($special->{type})};
  0            
1078 0 0         if (defined $id) {
1079 0           push(@ids, sprintf('?item wdt:%s "%s"', $special->{property}, $special->{to_service}->($id)));
1080             }
1081             }
1082              
1083             # UUID is special:
1084             {
1085 0           my $id = eval {$result->id('uuid')};
  0            
  0            
1086 0 0         if (defined $id) {
1087 0           foreach my $property (@{$config->{uuid_relations}}) {
  0            
1088 0           push(@ids, sprintf('?item wdt:%s "%s"', $property, $id));
1089             }
1090             }
1091             }
1092              
1093 0 0         return undef unless scalar @ids;
1094              
1095             {
1096 0           my $q = sprintf('SELECT * WHERE { { %s } } LIMIT 1', join('} UNION {', @ids));
  0            
1097 0           my $res = $self->_get_json($config->{endpoint}{sparql}, query => {format => 'json', query => $q});
1098 0           my $item = eval {$res->{results}{bindings}[0]{item}};
  0            
1099 0 0         return undef unless $item;
1100 0 0 0       return undef unless ($item->{type} // '') eq 'uri';
1101 0 0 0       if (($item->{value} // '') =~ m#^\Q$config->{prefix}\E([QP][1-9][0-9]*)$#) {
1102 0           return $1;
1103             }
1104             }
1105              
1106 0           return undef;
1107             }
1108              
1109             sub _online_lookup__wikibase__stage_1 {
1110 0     0     my ($self, $result, $id, $config) = @_;
1111 0           my %ids = ($config->{type} => $id);
1112 0           my %attr;
1113 0           my %res = (id => \%ids, attributes => \%attr);
1114 0           my $data = $self->_get_json(sprintf($config->{endpoint}{entitydata}, $id), local_override => ['%s.json', $id]);
1115              
1116 0           $data = $data->{entities}{$id};
1117              
1118 0           $attr{displayname} = {map {$_ => $data->{labels}{$_}{value}} keys %{$data->{labels}}};
  0            
  0            
1119 0           $attr{description} = {map {$_ => $data->{descriptions}{$_}{value}} keys %{$data->{descriptions}}};
  0            
  0            
1120              
1121 0           $res{wikidata_sitelinks} = $data->{sitelinks};
1122 0           foreach my $property (keys %{$config->{idmap}}) {
  0            
1123 0   0       foreach my $entry (@{$data->{claims}{$property} // []}) {
  0            
1124 0           $ids{$config->{idmap}{$property}} = $entry->{mainsnak}{datavalue}{value};
1125             }
1126             }
1127              
1128 0           foreach my $special (@{$config->{special_ids}}) {
  0            
1129 0   0       foreach my $entry (@{$data->{claims}{$special->{property}} // []}) {
  0            
1130 0   0       $ids{$special->{type}} //= $special->{from_service}->($entry->{mainsnak}{datavalue}{value});
1131             }
1132             }
1133              
1134 0           foreach my $attribute (@{$config->{attributes}}) {
  0            
1135 0   0       foreach my $entry (@{$data->{claims}{$attribute->{property}} // []}) {
  0            
1136 0 0         if (defined $attribute->{from_service}) {
    0          
1137 0           my %res = $attribute->{from_service}->($entry->{mainsnak}{datavalue}{value}, $config);
1138 0   0       $attr{$_} //= $res{$_} foreach keys %res;
1139             } elsif (defined $attribute->{list_value}) {
1140 0           my %res = $attribute->{list_value}->($entry->{mainsnak}{datavalue}{value}, $config);
1141 0           foreach my $key (keys %res) {
1142 0   0       $attr{$key} //= [];
1143 0           push(@{$attr{$key}}, @{$res{$key}});
  0            
  0            
1144             }
1145             }
1146             }
1147             }
1148              
1149 0           return \%res;
1150             }
1151              
1152             sub _online_lookup__wikibase__from_service__datetime {
1153 0     0     my ($key, $value) = @_;
1154 0           my $precision = $value->{precision};
1155              
1156             #use Data::Dumper;
1157             #die Dumper $value;
1158              
1159 0 0         if ($precision >= 9) {
1160 0           require DateTime::Format::ISO8601;
1161              
1162 0           my $dt = DateTime::Format::ISO8601->parse_datetime($value->{time} =~ s/^\+//r =~ s/-00-00T/-01-01T/r =~ s/-00T/-01T/r);
1163 0           my $val;
1164              
1165 0 0         if ($precision == 9) {
    0          
1166 0           $val = $dt->year;
1167             } elsif ($precision == 10) {
1168 0           $val = sprintf('%.4u-%.2u', $dt->year, $dt->month);
1169             } else {
1170 0           $val = $dt->ymd;
1171             }
1172              
1173 0           return ($key => $val);
1174             }
1175 0           return ();
1176             }
1177              
1178             sub _online_lookup__wikibase__from_service__coordinate {
1179 0     0     my ($value) = @_;
1180 0           my %attr;
1181              
1182 0           foreach my $subkey (qw(altitude latitude longitude)) {
1183 0 0         $attr{$subkey} = {'*' => $value->{$subkey} + 0} if defined $value->{$subkey};
1184             }
1185 0 0         $attr{space_object} = {'*' => URI->new($value->{globe})} if defined $value->{globe};
1186              
1187 0           return %attr;
1188             }
1189              
1190             sub _online_lookup__wikimedia_commons {
1191 0     0     my ($self, $result) = @_;
1192 0           my $res = {
1193             'attributes' => {},
1194             };
1195 0           my $json = $self->_get_json(
1196             'https://commons.wikimedia.org/w/api.php',
1197             query => {
1198             action => 'query',
1199             titles => $result->id,
1200             prop => 'imageinfo',
1201             iiprop => 'url|mime|size|sha1|canonicaltitle',
1202             iiurlwidth => 240, # get thumbnail
1203             format => 'json'
1204             });
1205              
1206 0           foreach my $page_id ( keys(%{ $json->{query}->{pages} }) ) { # only one item
  0            
1207 0           my $page = $json->{query}->{pages}->{$page_id};
1208 0           my $imageinfo = $page->{imageinfo}->[0];
1209              
1210 0           $res->{attributes}->{displayname} = { '*' => $imageinfo->{canonicaltitle} };
1211 0           $res->{attributes}->{thumbnail} = { '*' => URI->new($imageinfo->{thumburl}) };
1212 0           $res->{attributes}->{final_file_size} = { '*' => int($imageinfo->{size}) };
1213 0           $res->{attributes}->{media_subtype} = { '*' => $imageinfo->{mime} };
1214 0           $res->{digest} = { 'sha-1-160' => $imageinfo->{sha1} };
1215             }
1216              
1217 0           return $res;
1218             }
1219              
1220             sub _online_lookup__fellig {
1221 0     0     my ($self, $result) = @_;
1222 0           my Data::URIID $extractor = $self->extractor;
1223              
1224 0           foreach my $type (@fellig_types) {
1225 0   0       my $id = eval {$result->id($type, _no_convert => 1)} // next;
  0            
1226 0   0       my $json = $self->_get_json(sprintf('https://api.fellig.org/v0/overview/%s/%s', $type, uri_escape($id)), local_override => ['overview/%s/%s.json', $type, $id]) // next;
1227              
1228 0           foreach my $idx (@{$json->{main_result}}) {
  0            
1229 0           my $tag = $json->{tags}[$idx];
1230 0           my %ids;
1231             my %attr;
1232 0           my %res = (id => \%ids, attributes => \%attr, digest => $tag->{unvaried}{'final-file-hash'});
1233              
1234 0           foreach my $class (qw(unvaried varied)) {
1235             # This is a trusted service, so we only check for the id types to be valid ISE
1236             # but accept them all.
1237 0           foreach my $relation ('ids', 'tag-linked-by') {
1238 0   0       foreach my $idtype (keys %{$tag->{$class}{$relation}//{}}) {
  0            
1239 0 0         if ($extractor->is_ise($idtype)) {
1240 0   0       $ids{$idtype} //= $tag->{$class}{$relation}{$idtype}[0];
1241             }
1242             }
1243             }
1244              
1245 0 0         $attr{displayname} = {'*' => $tag->{$class}{displayname}} if defined $tag->{$class}{displayname};
1246 0 0         $attr{icon_text} = {'*' => $tag->{$class}{icontext}} if defined $tag->{$class}{icontext};
1247 0 0         $attr{displaycolour} = {'*' => Data::URIID::Colour->new(rgb => $tag->{$class}{displaycolour})} if defined $tag->{$class}{displaycolour};
1248 0 0         $attr{final_file_size}= {'*' => $tag->{$class}{'final-file-size'}} if defined $tag->{$class}{'final-file-size'};
1249 0 0         $attr{icon} = {'*' => URI->new($tag->{$class}{icon})} if defined $tag->{$class}{icon};
1250              
1251 0 0         if (defined $tag->{$class}{'final-file-encoding'}) {
1252 0 0         if (defined(my $wk = _own_well_known()->{uuid}{$tag->{$class}{'final-file-encoding'}})) {
1253 0 0         if (defined(my $media_subtype = $wk->{ids}{'media-subtype-identifier'})) {
1254 0           $attr{media_subtype} = $media_subtype;
1255             }
1256             }
1257             }
1258             }
1259              
1260 0           return \%res;
1261             }
1262             }
1263              
1264 0           return undef;
1265             }
1266              
1267             sub _online_lookup__noembed_com {
1268 0     0     my ($self, $result) = @_;
1269              
1270 0           foreach my $service (qw(youtube)) {
1271 0   0       my $url = eval {$result->url(service => $service, action => 'render')} // eval {$result->url($service, action => 'embed')} // next;
  0   0        
  0            
1272 0   0       my $json = $self->_get_json('https://noembed.com/embed', query => {url => $url}) // next;
1273 0           my %attr;
1274 0           my %res = (attributes => \%attr);
1275              
1276 0 0         $attr{displayname} = {'*' => $json->{title}} if defined $json->{title};
1277 0 0         $attr{thumbnail} = {'*' => URI->new($json->{thumbnail_url})} if defined $json->{thumbnail_url};
1278              
1279 0           return \%res;
1280             }
1281              
1282 0           return undef;
1283             }
1284              
1285             sub _online_lookup__osm__handle {
1286 0     0     my ($self, $element) = @_;
1287 0   0       my $tags = $element->{tags} // {};
1288 0           my %ids;
1289             my %attr;
1290 0           my %res = (id => \%ids, attributes => \%attr);
1291              
1292 0           $attr{space_object} = {'*' => URI->new('http://www.wikidata.org/entity/Q2')}; # If it's on OSM it's on earth.
1293 0 0         $attr{latitude} = {'*' => $element->{lat} + 0} if defined $element->{lat};
1294 0 0         $attr{longitude} = {'*' => $element->{lon} + 0} if defined $element->{lon};
1295 0 0         $attr{altitude} = {'*' => $tags->{ele} + 0} if defined $tags->{ele};
1296 0 0         $attr{altitude} = {'*' => $tags->{'ele:wgs84'} + 0} if defined $tags->{'ele:wgs84'};
1297             #foreach my $subkey (qw(altitude latitude longitude)) {
1298              
1299 0 0         $ids{'wikidata-identifier'} = $tags->{wikidata} if defined $tags->{wikidata};
1300              
1301 0           foreach my $key (keys %attrmap_osm) {
1302 0           my %data;
1303              
1304 0 0         $data{'*'} = $tags->{$key} if defined $tags->{$key};
1305 0           foreach my $inkey (keys %{$tags}) {
  0            
1306 0 0         if ($inkey =~ /^\Q$key\E:([a-z]{2,3})$/) {
1307 0           $data{$1} = $tags->{$inkey};
1308             }
1309             }
1310              
1311 0 0         $attr{$attrmap_osm{$key}} = \%data if scalar keys %data;
1312             }
1313              
1314 0           return \%res;
1315             }
1316              
1317             sub _online_lookup__osm {
1318 0     0     my ($self, $result) = @_;
1319              
1320 0           foreach my $type (qw(node way relation)) {
1321 0   0       my $id = eval {$result->id('osm-'.$type)} // next;
  0            
1322 0   0       my $json = $self->_get_json(sprintf('https://api.openstreetmap.org/api/0.6/%s/%s.json', $type, $id), local_override => ['%s/%s.json', $type, $id]) // next;
1323 0   0       my $element = eval {$json->{elements}[0]} // return undef;
  0            
1324 0           return $self->_online_lookup__osm__handle($element);
1325             }
1326              
1327 0           return undef;
1328             }
1329              
1330             sub _online_lookup__overpass {
1331 0     0     my ($self, $result) = @_;
1332 0           my $id = $result->id('wikidata-identifier');
1333 0   0       my $json = $self->_get_json('https://overpass-api.de/api/interpreter', query => {
1334             data => sprintf("[out:json][timeout:25];\n(node[\"wikidata\"=\"%s\"];\nway[\"wikidata\"=\"%s\"];\nrelation[\"wikidata\"=\"%s\"];\n);\nout;",
1335             $id, $id, $id,
1336             ),
1337             }) // return undef;
1338 0   0       my $element = eval {$json->{elements}[0]} // return undef;
  0            
1339 0           my $res = $self->_online_lookup__osm__handle($element);
1340 0   0       my $type = $element->{type} // '';
1341 0   0       my $id_new = $element->{id} // '';
1342              
1343 0 0 0       if ($type eq 'node' || $type eq 'way' || $type eq 'relation') {
      0        
1344 0 0         if ($id_new =~ Data::URIID::Result->RE_UINT) {
1345 0           $res->{id}->{'osm-'.$type} = $id_new;
1346             }
1347             }
1348              
1349 0           return $res;
1350             }
1351              
1352             sub _online_lookup__xkcd {
1353 0     0     my ($self, $result, %opts) = @_;
1354 0           my $id = eval {$result->id('xkcd-num')};
  0            
1355 0   0       my $json = $self->_get_json($opts{metadata_url} // $result->url(service => 'xkcd', action => 'metadata'), local_override => ['%s.json', $id]) // return undef;
      0        
1356 0           my %ids;
1357             my %attr;
1358 0           my %res = (id => \%ids, attributes => \%attr);
1359              
1360 0 0 0       $ids{'xkcd-num'} = int($json->{num}) if defined($json->{num}) && $json->{num} =~ Data::URIID::Result->RE_UINT;
1361 0 0 0       $attr{displayname} = {'*' => $json->{title}} if defined($json->{title}) && length($json->{title});
1362              
1363 0           return \%res;
1364             }
1365              
1366             sub _online_lookup__doi {
1367 0     0     my ($self, $result, %opts) = @_;
1368 0   0       my $json = $self->_get_json($result->url(service => 'doi', action => 'metadata')) // return undef;
1369 0           my %attr;
1370 0           my %res = (attributes => \%attr);
1371              
1372 0 0 0       $attr{displayname} = {'*' => $json->{title}} if defined($json->{title}) && length($json->{title});
1373              
1374 0           return \%res;
1375             }
1376              
1377             sub _online_lookup__iconclass {
1378 0     0     my ($self, $result, %opts) = @_;
1379 0           my $id = $result->id('iconclass-identifier');
1380 0   0       my $json = $self->_get_json($result->url(service => 'iconclass', action => 'metadata'), local_override => ['%s.jsonld', $id]) // return undef;
1381 0   0       my $item = $json->{graph}[0] // return undef;
1382 0           my %displayname;
1383 0           my %res = (attributes => {displayname => \%displayname});
1384              
1385 0           foreach my $key ('prefLabel', 'dc:subject') {
1386 0           foreach my $entry (@{$item->{$key}}) {
  0            
1387 0   0       $displayname{$entry->{lang}} //= $entry->{value};
1388             }
1389             }
1390              
1391 0   0       $displayname{'*'} = $displayname{en} // $displayname{de};
1392              
1393 0           return \%res;
1394             }
1395              
1396             sub _online_lookup__e621 {
1397 0     0     my ($self, $result, %opts) = @_;
1398 0   0       my $json = $self->_get_json($result->url(service => 'e621', action => 'metadata')) // return undef;
1399 0           my %ids;
1400             my %attr;
1401 0           my %digest;
1402 0           my %res = (id => \%ids, attributes => \%attr, digest => \%digest);
1403              
1404 0 0         return undef unless scalar(@{$json->{posts}}) == 1;
  0            
1405              
1406 0           foreach my $post (@{$json->{posts}}) {
  0            
1407 0           my $preview = $post->{preview};
1408 0           my $file = $post->{file};
1409              
1410 0           $ids{'e621-post-identifier'} = int($post->{id});
1411 0 0         $attr{ext} = {'*' => $file->{ext}} if defined $file->{ext};
1412 0 0         $attr{final_file_size} = {'*' => $file->{size}} if defined $file->{size};
1413 0 0         $attr{thumbnail} = {'*' => URI->new($preview->{url})} if defined $preview->{url};
1414 0 0         $digest{'md-5-128'} = $file->{md5} if defined $file->{md5};
1415              
1416 0 0         if (defined(my $tagroot = $post->{tags})) {
1417 0           $attr{tagged_as} = [map {[Data::Identifier->new('6fe0dbf0-624b-48b3-b558-0394c14bad6a' => $_)]} map {@{$_}} values %{$tagroot}];
  0            
  0            
  0            
  0            
1418             }
1419             }
1420              
1421 0           return \%res;
1422             }
1423              
1424             sub _online_lookup__danbooru2chanjp {
1425 0     0     my ($self, $result, %opts) = @_;
1426 0           my $url = $result->url(service => 'danbooru2chanjp', action => 'info');
1427 0   0       my $html = $self->_get_html($url) // return undef;
1428 0   0       my $json = from_json(((($html->findnodes('//script[@id="metadata" and @type="application/json"]'))[0] // return undef)->content_list)[0]);
1429 0           my %attr;
1430             my %digest;
1431 0           my %res = (attributes => \%attr, digest => \%digest);
1432              
1433 0 0 0       $digest{'md-5-128'} = $json->{hash} if defined($json->{hash}) && $json->{hash} =~ /^[0-9a-f]{32}$/;
1434 0 0 0       $attr{final_file_size} = {'*' => int($json->{filesize})} if defined($json->{filesize}) && int($json->{filesize});
1435 0 0 0       $attr{ext} = {'*' => $1} if defined($json->{ext}) && $json->{ext} =~ /^\.?([0-9a-z]{1,5})$/;
1436              
1437 0 0         if (defined(my $tags = $json->{tags})) {
1438 0           my @list;
1439 0           $attr{tagged_as} = \@list;
1440              
1441 0           foreach my $tag (split /\s+/, $tags) {
1442 0 0         next unless length $tag;
1443 0           push(@list, [Data::Identifier->new('c5632c60-5da2-41af-8b60-75810b622756' => $tag)]);
1444             }
1445             }
1446              
1447 0 0 0       if (defined(my $image = $json->{image}) && defined(my $directory = $json->{directory})) {
1448 0           my $file_fetch = $url->clone;
1449              
1450 0           $file_fetch->query(undef);
1451 0           $file_fetch->path_segments('', 'images', $directory, $image);
1452              
1453             $res{url_overrides} = {
1454 0           'fetch' => $file_fetch,
1455             'file-fetch' => $file_fetch,
1456             };
1457             }
1458              
1459 0           return \%res;
1460             }
1461              
1462             sub _online_lookup__furaffinity {
1463 0     0     my ($self, $result, %opts) = @_;
1464 0   0       my $html = $self->_get_html($result->url(service => 'furaffinity', action => 'info')) // return undef;
1465 0           my %attr;
1466 0           my %res = (attributes => \%attr);
1467 0           my %raw = map {$_->attr('property') => $_->attr('content')} $html->findnodes('/html/head/meta[@property]');
  0            
1468              
1469 0           $self->_load_open_graph(\%res, $html, [qw(title description image)], {image => qr#^https://t\.furaffinity\.net/#});
1470              
1471 0           foreach my $download ($html->findnodes('/html/body//div[@id="submission_page"]//a[text()="Download" and @href]')) {
1472 0           my $url = URI->new($download->attr('href'), 'https');
1473              
1474 0           $url->scheme('https');
1475 0           $url = $url->as_string;
1476              
1477             $res{url_overrides} = {
1478 0           'fetch' => $url,
1479             'file-fetch' => $url,
1480             };
1481             }
1482              
1483 0           return \%res;
1484             }
1485              
1486             sub _online_lookup__imgur {
1487 0     0     my ($self, $result, %opts) = @_;
1488 0   0       my $html = $self->_get_html($result->url(service => 'imgur', action => 'info')) // return undef;
1489 0           my %attr;
1490 0           my %res = (attributes => \%attr);
1491 0           my %raw = map {$_->attr('name') => $_->attr('content')} $html->findnodes('/html/head/meta[@name]');
  0            
1492              
1493 0           $res{url_overrides} = {};
1494              
1495 0           $self->_load_open_graph(\%res, $html, [qw(title image)]);
1496              
1497 0 0 0       if (defined($raw{'twitter:player:stream'}) && length($raw{'twitter:player:stream'})) {
1498 0           $res{url_overrides}{'stream-fetch'} = $raw{'twitter:player:stream'};
1499             }
1500              
1501 0           return \%res;
1502             }
1503              
1504             sub _online_lookup__notalwaysright {
1505 0     0     my ($self, $result, %opts) = @_;
1506 0   0       my $html = $self->_get_html($result->url(service => 'notalwaysright', action => 'info')) // return undef;
1507 0           my %attr;
1508 0           my %res = (attributes => \%attr);
1509 0           my %raw = map {$_->attr('property') => $_->attr('content')} $html->findnodes('/html/head/meta[@property]');
  0            
1510              
1511 0           $res{url_overrides} = {};
1512              
1513 0           $self->_load_open_graph(\%res, $html, [qw(title)]);
1514              
1515 0 0         if (defined(my $url = $raw{'og:url'})) {
1516 0 0         if (length($url)) {
1517 0           $res{url_overrides}{'info'} = $url;
1518 0           $res{url_overrides}{'render'} = $url;
1519             }
1520             }
1521              
1522 0           return \%res;
1523             }
1524              
1525             sub _online_lookup__ruthede {
1526 0     0     my ($self, $result, %opts) = @_;
1527 0   0       my $html = $self->_get_html($result->url(service => 'ruthede', action => 'info')) // return undef;
1528 0           my %attr;
1529 0           my %res = (attributes => \%attr);
1530              
1531 0           $self->_load_open_graph(\%res, $html, [qw(image)]);
1532              
1533 0 0 0       if (defined($attr{thumbnail}) && defined(my $url = $attr{thumbnail}{'*'})) {
1534 0 0         if ($url =~ m#^(https://ruthe\.de/cartoons/)(strip_2487\.jpg)$#) {
1535 0           $attr{thumbnail} = {'*' => $1.'tn_'.$2};
1536             $res{url_overrides} = {
1537 0           'file-fetch' => $url,
1538             };
1539             }
1540             }
1541              
1542 0           return \%res;
1543             }
1544              
1545             # --- Overrides for Data::URIID::Base ---
1546              
1547             sub displayname {
1548 0     0 1   my ($self, %opts) = @_;
1549 0           my $name = $self->name;
1550 0 0 0       if (defined($own_metadata{services}{$name}) && defined(my $displayname = $own_metadata{services}{$name}{displayname}{'*'})) {
1551 0           return $displayname;
1552             }
1553 0           return $name;
1554             }
1555              
1556             1;
1557              
1558             __END__
1559              
1560             =pod
1561              
1562             =encoding UTF-8
1563              
1564             =head1 NAME
1565              
1566             Data::URIID::Service - Extractor for identifiers from URIs
1567              
1568             =head1 VERSION
1569              
1570             version v0.20
1571              
1572             =head1 SYNOPSIS
1573              
1574             use Data::URIID;
1575              
1576             my $extractor = Data::URIID->new;
1577             my $result = $extractor->lookup( $URI );
1578             my $service = $result->attribute('service');
1579              
1580             my $name = $service->name;
1581             my $ise = $service->ise;
1582              
1583             This module represents a single service.
1584              
1585             This package inherits from L<Data::URIID::Base>.
1586              
1587             =head1 METHODS
1588              
1589             =head2 name
1590              
1591             my $name = $service->name;
1592              
1593             Returns the name of this service.
1594              
1595             =head2 online
1596              
1597             my $online = $service->online( [ $new_value ] );
1598              
1599             Gets or sets the online status of the service.
1600             If this value is false no online operations are permitted.
1601             In addition to this value being true the online value for the extractor need to be true.
1602              
1603             See also L<"extractor">, L<Data::URIID/"online">.
1604              
1605             =head2 setting
1606              
1607             my $value = $service->setting( $setting[, $new_value ] );
1608              
1609             Gets or sets the setting C<$setting> the of the service.
1610              
1611             The available settings depend on the service. This method may or may not die
1612             when an invalid setting or an invalid value is provided.
1613              
1614             Setting an invalid value may result in failures when this service is being used.
1615              
1616             =head3 Universally available settings
1617              
1618             =over
1619              
1620             =item C<network_deny>: Denies network access (i.e. online lookups) for this service.
1621              
1622             =back
1623              
1624             =head2 register_service
1625              
1626             use Data::URIID::Service;
1627              
1628             Data::URIID::Service->register_service($id, %opts);
1629              
1630             (since v0.17, experimental)
1631              
1632             Registers a new service.
1633              
1634             B<Note:>
1635             This is an B<highly experimental> method. It may be changed or replaced, or removed with future versions.
1636              
1637             The service is identified by it's identifier C<$id> (likely a L<Data::Identifier> of type C<uuid>) which must be globally unique.
1638             The method will C<die> if it finds any problem with the identifier.
1639              
1640             B<Note:>
1641             The identifier given here identifies a specific service, not a type of service. So if there are more than one instances of a given service
1642             each must have it's own unique identifier.
1643              
1644             The following, all optional, options are supported:
1645              
1646             =over
1647              
1648             =item C<displayname>
1649              
1650             (experimental) Used as fallback displayname if C<$id> does not provide one in some cases.
1651              
1652             =item C<id_templates>
1653              
1654             An arrayref containing templates used to generate URIs from identifiers.
1655              
1656             =item C<digest_templates>
1657              
1658             An arrayref containing templates used to generate URIs from digests.
1659              
1660             =item C<id_patterns>
1661              
1662             An arrayref containing patterns used to extract identifiers from URIs.
1663              
1664             =back
1665              
1666             =head3 Templates
1667              
1668             Each template contains a hashref with the following keys:
1669              
1670             =over
1671              
1672             =item C<id_type>
1673              
1674             (only for id templates) A filter for identifier types. This works on the names of the identifier types (e.g. C<uuid>).
1675              
1676             =item C<digest>
1677              
1678             (only for digest templates) A filter for digest algorithm types. This works on the algorithm names in universal tag format (e.g. C<sha-1-160>).
1679              
1680             =item C<template>
1681              
1682             The actual template. This might be a single string in an undefined format or an instance of L<URI::Template> or L<URI::Template::Restrict>.
1683              
1684             =item C<filter>
1685              
1686             A filter that is applied to the value (the identifier or the digest value).
1687              
1688             =item C<action>
1689              
1690             A filter for which actions this template applies. This works on the names of the actions (e.g. C<info>).
1691              
1692             =item C<options>
1693              
1694             Additional template options. No options are currently defined by this documentation.
1695              
1696             =back
1697              
1698             =head4 Filter
1699              
1700             A filter in the sense of templates is a regex (quoted using C<qr//>), or an arrayref with exact values to match or a single string that is split into exact values using space and comma as separator.
1701              
1702             =head4 Template variables
1703              
1704             The following variables are supported by templates (more may be supported):
1705              
1706             =over
1707              
1708             =item C<type>
1709              
1710             The type (identifier type name e.g. C<uuid> or digest algorithm name e.g. C<sha-3-512>).
1711              
1712             =item C<value>
1713              
1714             The identifier or digest value.
1715              
1716             =item C<id>
1717              
1718             The identifier.
1719              
1720             =item C<digest>
1721              
1722             The digest.
1723              
1724             =back
1725              
1726             =head3 Patterns
1727              
1728             Each pattern is a hashref with the following keys:
1729              
1730             =over
1731              
1732             =item C<scheme>
1733              
1734             (required) The URI scheme to match. E.g. C<https>.
1735              
1736             B<Note:>
1737             C<http> might be considered an invalid value.
1738              
1739             =item C<host>
1740              
1741             (optional) The host to match as a string or regex (C<qr//>).
1742              
1743             =item C<path>
1744              
1745             (optional) The path to match as a string or rexgex (C<qr//>).
1746              
1747             =item C<type>
1748              
1749             (optional) The type of the identifier (e.g. C<uuid>).
1750              
1751             =item C<action>
1752              
1753             (optional) The action this matches (e.g. C<info>).
1754              
1755             =item C<match>
1756              
1757             A method (coderef) that is called to match the URI.
1758              
1759             The method is called with the L<Data::URIID::Result> as first argument, the URI (as L<URI>) as second argument, and this hashref (the pattern) as thrid.
1760             It must return a hash (not a hashref) containing at least an C<id> element.
1761             If a empty hash is returned a no-match condition is assumed.
1762              
1763             It must return an element C<id> that is a L<Data::Identifier> of the found match.
1764             It may also return an element C<action> that provides the name of the action.
1765              
1766             =back
1767              
1768             =head1 KNOWN/SUPPORTED SERVICES
1769              
1770             The following is a non-complete list of services for which lookups (online or offline) are supported.
1771             For a complete list of known services see L<Data::URIID/"known">.
1772              
1773             =head2 C<wikidata> and C<wikipedia>
1774              
1775             Wikidata is a large collection of machine readable data from all categories. It can act as a central connecting point
1776             for several types of identifiers and services. It also provides Wikipedia pages for the given subject.
1777              
1778             The C<wikipedia> services is only used for online lookups if a Wikipedia page is used as an input. It does not provide
1779             lookup from identifiers to Wikipedia links.
1780              
1781             In many cases you want to enable online lookups for both C<wikidata>, and C<wikipedia>. This is specifically true if you
1782             want to work with very different services at once.
1783              
1784             You commonly don't need to enable online lookups if all the services you're interested in use the same type of identifiers.
1785              
1786             =head2 C<osm> and C<overpass>
1787              
1788             The C<osm> service is mainly used to lookup from OpenStreetMap identifiers to other identifiers as well as attributes.
1789             While the C<overpass> service is mostly used to look up from other identifiers to OpenStreetMap identifiers.
1790              
1791             If you work with places you most likely want to enable online lookups on those services.
1792              
1793             =head2 C<factgrid>
1794              
1795             The C<factgrid> provides information mostly on history topics. It contains a large amount of data for historical figures.
1796              
1797             =head2 C<Data::URIID>
1798              
1799             This service is used to perform internal offline lookups on identifiers known to the module.
1800             It mainly provides display names for ISEs used by this module.
1801              
1802             =head2 C<Data::Identifier>
1803              
1804             This service uses L<Data::Identifier> as a data source.
1805             It can provide display names and similar for a number of common identifiers.
1806              
1807             See also L<Data::Identifier::Wellknown>.
1808              
1809             =head1 AUTHOR
1810              
1811             Philipp Schafft <lion@cpan.org>
1812              
1813             =head1 COPYRIGHT AND LICENSE
1814              
1815             This software is Copyright (c) 2023-2025 by Philipp Schafft <lion@cpan.org>.
1816              
1817             This is free software, licensed under:
1818              
1819             The Artistic License 2.0 (GPL Compatible)
1820              
1821             =cut