blib/lib/Bible/OBML/Gateway.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 158 | 219 | 72.1 |
branch | 15 | 42 | 35.7 |
condition | 5 | 26 | 19.2 |
subroutine | 28 | 49 | 57.1 |
pod | 5 | 5 | 100.0 |
total | 211 | 341 | 61.8 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Bible::OBML::Gateway; | ||||||
2 | # ABSTRACT: Bible Gateway content conversion to Open Bible Markup Language | ||||||
3 | |||||||
4 | 1 | 1 | 235270 | use 5.020; | |||
1 | 9 | ||||||
5 | |||||||
6 | 1 | 1 | 476 | use exact; | |||
1 | 37515 | ||||||
1 | 6 | ||||||
7 | 1 | 1 | 3173 | use exact::class; | |||
1 | 12310 | ||||||
1 | 4 | ||||||
8 | 1 | 1 | 905 | use Bible::OBML; | |||
1 | 646590 | ||||||
1 | 25 | ||||||
9 | 1 | 1 | 592 | use Bible::Reference; | |||
1 | 3 | ||||||
1 | 6 | ||||||
10 | 1 | 1 | 246 | use Mojo::ByteStream; | |||
1 | 2 | ||||||
1 | 40 | ||||||
11 | 1 | 1 | 6 | use Mojo::DOM; | |||
1 | 3 | ||||||
1 | 22 | ||||||
12 | 1 | 1 | 785 | use Mojo::UserAgent; | |||
1 | 247096 | ||||||
1 | 10 | ||||||
13 | 1 | 1 | 52 | use Mojo::URL; | |||
1 | 2 | ||||||
1 | 4 | ||||||
14 | 1 | 1 | 29 | use Mojo::Util 'html_unescape'; | |||
1 | 2 | ||||||
1 | 4941 | ||||||
15 | |||||||
16 | our $VERSION = '2.06'; # VERSION | ||||||
17 | |||||||
18 | has translation => 'NIV'; | ||||||
19 | has url => Mojo::URL->new('https://www.biblegateway.com/passage/'); | ||||||
20 | has ua => sub { | ||||||
21 | my $ua = Mojo::UserAgent->new( max_redirects => 3 ); | ||||||
22 | $ua->transactor->name( __PACKAGE__ . '/' . ( __PACKAGE__->VERSION // '2.0' ) ); | ||||||
23 | return $ua; | ||||||
24 | }; | ||||||
25 | has reference => Bible::Reference->new( | ||||||
26 | bible => 'Protestant', | ||||||
27 | sorting => 1, | ||||||
28 | ); | ||||||
29 | |||||||
30 | 1 | 1 | 1 | 1185 | sub translations ($self) { | ||
1 | 3 | ||||||
1 | 2 | ||||||
31 | 1 | 2 | my $translations; | ||||
32 | |||||||
33 | $self->ua->get( $self->url )->result->dom->find('select.search-dropdown option')->each( sub { | ||||||
34 | 2 | 100 | 2 | 2912 | my $class = $_->attr('class') || ''; | ||
35 | |||||||
36 | 2 | 100 | 44 | if ( $class eq 'lang' ) { | |||
50 | |||||||
37 | 1 | 5 | my @language = $_->text =~ /\-{3}(.+)\s\(([^\)]+)\)\-{3}/; | ||||
38 | 1 | 15 | push( @$translations, { | ||||
39 | language => $language[0], | ||||||
40 | acronym => $language[1], | ||||||
41 | } ); | ||||||
42 | } | ||||||
43 | elsif ( not $class ) { | ||||||
44 | 1 | 8 | my @translation = $_->text =~ /\s*(.+)\s\(([^\)]+)\)/; | ||||
45 | 1 | 11 | push( @{ $translations->[-1]{translations} }, { | ||||
1 | 9 | ||||||
46 | translation => $translation[0], | ||||||
47 | acronym => $translation[1], | ||||||
48 | } ); | ||||||
49 | } | ||||||
50 | 1 | 4 | } ); | ||||
51 | |||||||
52 | 1 | 20 | return $translations; | ||||
53 | } | ||||||
54 | |||||||
55 | 1 | 1 | 1 | 1698 | sub structure ( $self, $translation = $self->translation ) { | ||
1 | 2 | ||||||
1 | 6 | ||||||
1 | 12 | ||||||
56 | return $self->ua->get( | ||||||
57 | $self->url->clone->path( $self->url->path . 'bcv/' )->query( { version => $translation } ) | ||||||
58 | 1 | 3 | )->result->json->{data}[0]; | ||||
59 | } | ||||||
60 | |||||||
61 | 21 | 21 | 159 | sub _retag ( $tag, $retag ) { | |||
21 | 37 | ||||||
21 | 31 | ||||||
21 | 28 | ||||||
62 | 21 | 54 | $tag->tag($retag); | ||||
63 | 21 | 279 | delete $tag->attr->{$_} for ( keys %{ $tag->attr } ); | ||||
21 | 38 | ||||||
64 | } | ||||||
65 | |||||||
66 | 1 | 1 | 1 | 7 | sub fetch ( $self, $reference, $translation = $self->translation ) { | ||
1 | 2 | ||||||
1 | 4 | ||||||
1 | 2 | ||||||
1 | 1 | ||||||
67 | 1 | 5 | my $runs = $self->reference->require_verse_match(0)->acronyms(0)->clear->in($reference)->as_runs; | ||||
68 | 1 | 50 | 33 | 3860 | $reference = $runs->[0] unless ( @$runs != 1 or $runs->[0] !~ /\w\s*\d/ ); | ||
69 | |||||||
70 | 1 | 7 | my $result = $self->ua->get( | ||||
71 | $self->url->query( { | ||||||
72 | version => $translation, | ||||||
73 | search => $reference, | ||||||
74 | } ) | ||||||
75 | )->result; | ||||||
76 | |||||||
77 | 1 | 50 | 0 | 136 | croak( $translation . ' "' . ( $reference // '(undef)' ) . '" did not match a chapter or run of verses' ) | ||
78 | if ( $result->dom->at('div.content-section') ); | ||||||
79 | |||||||
80 | 1 | 9 | return Mojo::ByteStream->new( $result->body )->decode->to_string; | ||||
81 | } | ||||||
82 | |||||||
83 | 1 | 1 | 1 | 167 | sub parse ( $self, $html ) { | ||
1 | 3 | ||||||
1 | 2 | ||||||
1 | 2 | ||||||
84 | 1 | 50 | 6 | return unless ($html); | |||
85 | |||||||
86 | 1 | 10 | my $dom = Mojo::DOM->new($html); | ||||
87 | |||||||
88 | 1 | 10543 | my $ref_display = $dom->at('div.bcv div.dropdown-display-text'); | ||||
89 | 1 | 50 | 33 | 993 | croak('source appears to be invalid; check your inputs') unless ( $ref_display and $ref_display->text ); | ||
90 | 1 | 57 | my $reference = $ref_display->text; | ||||
91 | |||||||
92 | 1 | 50 | 27 | croak('EXB (Extended Bible) translation not supported') | |||
93 | if ( $dom->at('div.translation div.dropdown-display-text')->text eq 'Expanded Bible' ); | ||||||
94 | |||||||
95 | 1 | 745 | my $block = $dom->at('div.passage-text div.passage-content div:first-child'); | ||||
96 | 1 | 8 | 1388 | $block->find('*[data-link]')->each( sub { delete $_->attr->{'data-link'} } ); | |||
8 | 3029 | ||||||
97 | |||||||
98 | 1 | 23 | $html = $block->to_string; | ||||
99 | |||||||
100 | 1 | 3508 | $html =~ s`(\d+).(\d+)`$1/$2`g; | ||||
101 | 1 | 32 | $html =~ s`(?:<){2,}(.*?)(?:\x{2019}>|(?:>){2,})`\x{201c}$1\x{201d}`g; | ||||
102 | 1 | 33 | $html =~ s`(?:<)(.*?)(?:>|\x{2019})`\x{2018}$1\x{2019}`g; | ||||
103 | 1 | 28 | $html =~ s`\\\w+``g; | ||||
104 | 1 | 37 | $html =~ s/(?:\.\s*){2,}\./\x{2026}/; | ||||
105 | |||||||
106 | 1 | 6 | $block = Mojo::DOM->new($html)->at('div'); | ||||
107 | |||||||
108 | 1 | 50 | 8902 | $_->parent->strip if ( $_ = $block->find('div.poetry > h2')->first ); | |||
109 | |||||||
110 | 1 | 142 | 1869 | $block->descendant_nodes->grep( sub { $_->type eq 'comment' } )->each('remove'); | |||
142 | 5717 | ||||||
111 | 1 | 286 | $block->find( | ||||
112 | '.il-text, hidden, hr, .translation-note, span.inline-note, a.full-chap-link, b.inline-h3, top1' | ||||||
113 | )->each('remove'); | ||||||
114 | 1 | 11267 | $block->find('.std-text, hgroup, b, em, versenum, char')->each('strip'); | ||||
115 | $block | ||||||
116 | ->find('i, .italic, .trans-change, .idiom, .catch-word, selah, span.selah') | ||||||
117 | 1 | 0 | 7124 | ->each( sub { _retag( $_, 'i' ) } ); | |||
0 | 0 | ||||||
118 | 1 | 0 | 10282 | $block->find('.woj, u.jesus-speech')->each( sub { _retag( $_, 'woj' ) } ); | |||
0 | 0 | ||||||
119 | 1 | 0 | 3476 | $block->find('.divine-name, .small-caps')->each( sub { _retag( $_, 'small_caps' ) } ); | |||
0 | 0 | ||||||
120 | |||||||
121 | 10 | 10 | 2181 | $block->find('sup')->grep( sub { length $_->text == 1 } )->each( sub { | |||
122 | 0 | 0 | 0 | $_->content( '-' . $_->content ); | |||
123 | 0 | 0 | $_->strip; | ||||
124 | 1 | 4054 | } ); | ||||
125 | |||||||
126 | 1 | 80 | $self->reference->require_verse_match(1)->acronyms(1); | ||||
127 | |||||||
128 | 1 | 40 | my $footnotes = $block->at('div.footnotes'); | ||||
129 | 1 | 50 | 1655 | if ($footnotes) { | |||
130 | $footnotes->find('a.bibleref')->each( sub { | ||||||
131 | 0 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | ||
132 | 0 | 0 | $_->replace($ref); | ||||
133 | 0 | 0 | } ); | ||||
134 | 0 | 0 | $footnotes->remove; | ||||
135 | $footnotes = { | ||||||
136 | map { | ||||||
137 | 0 | 0 | '#' . $_->attr('id') => $self->reference->clear->in( | ||||
0 | 0 | ||||||
138 | $_->at('span')->all_text | ||||||
139 | )->as_text | ||||||
140 | } $footnotes->find('ol li')->each | ||||||
141 | }; | ||||||
142 | } | ||||||
143 | |||||||
144 | 1 | 6 | my $crossrefs = $block->at('div.crossrefs'); | ||||
145 | 1 | 50 | 1011 | if ($crossrefs) { | |||
146 | $crossrefs->find('a.bibleref')->each( sub { | ||||||
147 | 0 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | ||
148 | 0 | 0 | $_->replace($ref); | ||||
149 | 1 | 11 | } ); | ||||
150 | 1 | 1363 | $crossrefs->remove; | ||||
151 | $crossrefs = { | ||||||
152 | map { | ||||||
153 | 1 | 172 | '#' . $_->attr('id') => $self->reference->clear->in( | ||||
8 | 36796 | ||||||
154 | $_->at('a:last-child')->attr('data-bibleref') | ||||||
155 | )->refs | ||||||
156 | } $crossrefs->find('ol li')->each | ||||||
157 | }; | ||||||
158 | } | ||||||
159 | |||||||
160 | $block | ||||||
161 | ->find('span.text > a.bibleref') | ||||||
162 | ->map('parent') | ||||||
163 | 0 | 0 | 0 | ->grep( sub { $_->content =~ /^\[ | |||
164 | ->each( sub { | ||||||
165 | $_->find('a')->each( sub { | ||||||
166 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | |||
167 | 0 | 0 | $_->replace($ref); | ||||
168 | 0 | 0 | 0 | } ); | |||
169 | |||||||
170 | 0 | 0 | my $content = $_->content; | ||||
171 | 0 | 0 | $content =~ s|\s+\[([^\]]+)\]| | ||||
172 | 0 | 0 | ' |
||||
173 | |ge; | ||||||
174 | |||||||
175 | 0 | 0 | $_->content($content); | ||||
176 | 1 | 16972 | } ); | ||||
177 | |||||||
178 | $block | ||||||
179 | ->find('i > a.bibleref, crossref > a.bibleref') | ||||||
180 | ->map('parent') | ||||||
181 | 0 | 0 | 0 | ->grep( sub { $_->children->size == 1 } ) | |||
182 | ->each( sub { | ||||||
183 | 0 | 0 | 0 | my $a = $_->at('a:last-child'); | |||
184 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | |||
185 | |||||||
186 | 0 | 0 | $_->tag('sup'); | ||||
187 | 0 | 0 | $_->attr({ | ||||
188 | 'class' => 'crossreference', | ||||||
189 | 'data-cr' => $a->attr('data-bibleref'), | ||||||
190 | }); | ||||||
191 | |||||||
192 | 0 | 0 | $crossrefs = { | ||||
193 | $a->attr('data-bibleref') => $self->reference->clear->in($ref)->refs | ||||||
194 | }; | ||||||
195 | 1 | 1416 | } ); | ||||
196 | |||||||
197 | 1 | 2021 | $block->find('a.bibleref')->each('strip'); | ||||
198 | |||||||
199 | $block->find('sup.crossreference, sup.footnote')->each( sub { | ||||||
200 | 8 | 50 | 8 | 3861 | if ( $_->attr('class') eq 'footnote' ) { | ||
50 | |||||||
201 | $_->replace( | ||||||
202 | ( $footnotes->{ $_->attr('data-fn') } ) | ||||||
203 | 0 | 0 | 0 | ? ' |
|||
204 | : '' | ||||||
205 | ); | ||||||
206 | } | ||||||
207 | elsif ( $_->attr('class') eq 'crossreference' ) { | ||||||
208 | $_->replace( | ||||||
209 | ( $crossrefs->{ $_->attr('data-cr') } ) | ||||||
210 | 8 | 50 | 230 | ? ' |
|||
211 | : '' | ||||||
212 | ); | ||||||
213 | } | ||||||
214 | 1 | 1111 | } ); | ||||
215 | |||||||
216 | 1 | 8 | 287 | $block->find('footnote, crossref')->each( sub { _retag( $_, $_->tag ) } ); | |||
8 | 1441 | ||||||
217 | |||||||
218 | 1 | 23 | _retag( $block, 'obml' ); | ||||
219 | 1 | 34 | $block->child_nodes->first->prepend( $block->new_tag( 'reference', $reference ) ); | ||||
220 | |||||||
221 | 1 | 584 | $block->find('h3.chapter')->each('remove'); | ||||
222 | 1 | 0 | 820 | $block->find('h2 + h3')->each( sub { $_->tag('h4') } ); | |||
0 | 0 | ||||||
223 | 1 | 2 | 965 | $block->find('h2, h3')->each( sub { _retag( $_, 'header' ) } ); | |||
2 | 1234 | ||||||
224 | 1 | 0 | 34 | $block->find('h4')->each( sub { _retag( $_, 'sub_header' ) } ); | |||
0 | 0 | ||||||
225 | |||||||
226 | 1 | 2 | 713 | $block->find('.versenum')->grep( sub { $_->text =~ /^\s*\(/ } )->each('remove'); | |||
2 | 1037 | ||||||
227 | 1 | 0 | 46 | $block->find('.chapternum + .versenum')->each( sub { $_->previous->remove } ); | |||
0 | 0 | ||||||
228 | 1 | 0 | 1084 | $block->find('.chapternum + i > .versenum')->each( sub { $_->parent->previous->remove } ); | |||
0 | 0 | ||||||
229 | |||||||
230 | $block->find('.chapternum')->each( sub { | ||||||
231 | 1 | 1 | 944 | _retag( $_, 'verse_number' ); | |||
232 | 1 | 43 | $_->content(1); | ||||
233 | 1 | 1137 | } ); | ||||
234 | $block->find('.versenum')->each( sub { | ||||||
235 | 2 | 2 | 1079 | _retag( $_, 'verse_number' ); | |||
236 | |||||||
237 | 2 | 53 | my $verse_number = $_->content; | ||||
238 | 2 | 152 | $verse_number =~ s/^.*://g; | ||||
239 | 2 | 10 | ($verse_number) = $verse_number =~ /(\d+)/; | ||||
240 | |||||||
241 | 2 | 6 | $_->content($verse_number); | ||||
242 | 1 | 148 | } ); | ||||
243 | |||||||
244 | 1 | 5 | 139 | $block->find('span.text')->each( sub { _retag( $_, 'text' ) } ); | |||
5 | 1220 | ||||||
245 | |||||||
246 | $block->find('table')->each( sub { | ||||||
247 | $_->find('tr')->each( sub { | ||||||
248 | 0 | 0 | $_->find('th')->each('remove'); | ||||
249 | 0 | 0 | 0 | unless ( $_->child_nodes->size ) { | |||
250 | 0 | 0 | $_->strip; | ||||
251 | } | ||||||
252 | else { | ||||||
253 | 0 | 0 | 0 | $_->replace( join( '', | |||
0 | |||||||
0 | |||||||
254 | ' |
||||||
255 | $_->find('td text')->map('content')->join(', '), | ||||||
256 | ( | ||||||
257 | ( $_->find('td text')->map('text')->last =~ /\W$/ ) ? '' : | ||||||
258 | ( $_->following_nodes->size ) ? '; ' : '.' | ||||||
259 | ), | ||||||
260 | ( ( $_->following_nodes->size ) ? ' ' : '' ), | ||||||
261 | ) ); | ||||||
262 | } | ||||||
263 | 0 | 0 | 0 | } ); | |||
264 | |||||||
265 | 0 | 0 | $_->tag('div'); | ||||
266 | 0 | 0 | $_->content( ' ' . $_->content . ' ' ); |
||||
267 | 1 | 36 | } ); | ||||
268 | |||||||
269 | $block->find('ul, ol')->each( sub { | ||||||
270 | $_->find('li')->each( sub { | ||||||
271 | 0 | 0 | $_->tag('text'); | ||||
272 | 0 | 0 | $_->find('text > text')->each('strip'); | ||||
273 | 0 | 0 | 0 | 0 | $_->append_content(' ') if ( $_->next and $_->next->tag eq 'li' ); |
||
274 | 0 | 0 | 0 | } ); | |||
275 | |||||||
276 | 0 | 0 | $_->tag('div'); | ||||
277 | 0 | 0 | $_->attr( class => 'left-1' ); | ||||
278 | 0 | 0 | $_->content( ' ' . $_->content . ' ' ); |
||||
279 | 1 | 712 | } ); | ||||
280 | |||||||
281 | 9 | 26 | $block->find( join( ', ', map { 'div.left-' . $_ } 1 .. 9 ) )->each( sub { | ||||
282 | 0 | 0 | 0 | my ($left) = $_->attr('class') =~ /\bleft\-(\d+)/; | |||
283 | 0 | 0 | $_->find('text')->each( sub { $_->attr( indent => $left ) } ); | ||||
0 | 0 | ||||||
284 | 0 | 0 | $_->strip; | ||||
285 | 1 | 1097 | } ); | ||||
286 | |||||||
287 | 1 | 0 | 4311 | $block->find('div.poetry')->each( sub { $_->attr( class => 'indent-1' ) } ); | |||
0 | 0 | ||||||
288 | 9 | 25 | $block->find( join( ', ', map { '.indent-' . $_ } 1 .. 9 ) )->each( sub { | ||||
289 | 0 | 0 | 0 | my ($indent) = $_->attr('class') =~ /\bindent\-(\d+)/; | |||
290 | $_->find('text')->each( sub { | ||||||
291 | 0 | 0 | 0 | $_->attr( indent => $indent + ( $_->attr('indent') || 0 ) ); | |||
292 | 0 | 0 | } ); | ||||
293 | 0 | 0 | $_->strip; | ||||
294 | 1 | 817 | } ); | ||||
295 | |||||||
296 | 1 | 4536 | $block->find( join( ', ', map { '.indent-' . $_ . '-breaks' } 1 .. 5 ) )->each('remove'); | ||||
5 | 19 | ||||||
297 | |||||||
298 | $block->find('text[indent]')->each( sub { | ||||||
299 | 0 | 0 | 0 | my $level = $_->attr('indent'); | |||
300 | 0 | 0 | _retag( $_, 'indent' ); | ||||
301 | 0 | 0 | $_->attr( level => $level ); | ||||
302 | 1 | 2678 | } ); | ||||
303 | 1 | 879 | $block->find('text')->each('strip'); | ||||
304 | |||||||
305 | $block->find('indent + indent')->each( sub { | ||||||
306 | 0 | 0 | 0 | 0 | if ( $_->previous->attr('level') eq $_->attr('level') ) { | ||
307 | 0 | 0 | $_->previous->append_content( ' ' . $_->content ); | ||||
308 | 0 | 0 | $_->remove; | ||||
309 | } | ||||||
310 | 1 | 1328 | } ); | ||||
311 | |||||||
312 | 1 | 2 | 669 | $block->find('p')->each( sub { _retag( $_, 'p' ) } ); | |||
2 | 685 | ||||||
313 | |||||||
314 | 1 | 50 | 33 | 28 | $block->at('p')->prepend_content(' |
||
315 | if ( $block->at('p') and not $block->at('p')->at('verse_number') ); | ||||||
316 | |||||||
317 | 1 | 638 | $block->find('div, span, u, sup, bk, verse, start-chapter')->each('strip'); | ||||
318 | |||||||
319 | 1 | 2480 | $html = html_unescape( $block->to_string ); | ||||
320 | 1 | 784 | $html =~ s/ [ ]+/ /g; |
||||
321 | |||||||
322 | 1 | 113 | return $html; | ||||
323 | } | ||||||
324 | |||||||
325 | 1 | 1 | 1 | 5267 | sub get ( $self, $reference, $translation = $self->translation ) { | ||
1 | 2 | ||||||
1 | 3 | ||||||
1 | 4 | ||||||
1 | 11 | ||||||
326 | 1 | 6 | return Bible::OBML->new->html( $self->parse( $self->fetch( $reference, $translation ) ) ); | ||||
327 | } | ||||||
328 | |||||||
329 | 1; | ||||||
330 | |||||||
331 | __END__ |