File Coverage

blib/lib/Religion/Bible/Regex/Builder.pm

Criterion	Covered	Total	%
statement	21	206	10.1
branch	0	44	0.0
condition	0	18	0.0
subroutine	7	20	35.0
pod	5	5	100.0
total	33	293	11.2

line	stmt	bran	cond	sub	pod	time	code
1							package Religion::Bible::Regex::Builder;
2
3	1			1		69261	use warnings;
	1					2
	1					31
4	1			1		6	use strict;
	1					2
	1					35
5	1			1		15	use Carp;
	1					7
	1					87
6
7	1			1		848	use version; our $VERSION = '0.99';
	1					2924
	1					6
8	1			1		1146	use Data::Dumper;
	1					8578
	1					83
9
10							# Input files are assumed to be in the UTF-8 strict character encoding.
11	1			1		1072	use utf8;
	1					12
	1					9
12							binmode(STDOUT, ":utf8");
13
14	1			1		69	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
	1					3
	1					5630
15
16							require Exporter;
17							our @ISA = qw(Exporter);
18							our %EXPORT_TAGS = ( 'all' => [ qw( ) ] );
19							our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
20							our @EXPORT = qw(
21
22							);
23
24							sub new {
25	0			0	1		my $class = shift;
26	0						my $config = shift;
27	0						my $self = {};
28	0						bless $self, $class;
29
30							# Get the Configurations for building these regular expressions
31	0						my %configs;
32	0						$self->_process_config($config->get, \%configs);
33
34							######################################################################################
35							# Définitions par défaut des expressions régulières avec références bibliques
36							#
37							# La fonction '_set_regex' a trois paramètres.
38							# 1. Un nom unique pour cette expression régulière
39							# 2. Une experssion régulière
40							# 3. Si la paramètre deux est 'undef', une experssion régulière comme defaut
41							######################################################################################
42
43	0						my $spaces = qr/([\s ]*)/;
44
45							####################################################################################
46							# Définitions des chiffres
47							####################################################################################
48							# chapitre : c'est un chiffre inférieur à 150 qui indique un chapitre
49							# le chapitre avec le grand chiffre dans la Bible est Psaume 150
50							# regex for roman numbers less than 150
51							# \b(?:(?:CL\|(?:C(XL\|X?X?X?)(IX\|IV\|V?I?I?I?)))\|(?:(XC\|XL\|L?X?X?X?)(IX\|IV\|V?I?I?I?)))\b
52	0						my $chapitre = qr/(?:\b150\b)\|(?:\b1[01234]\d\b)\|\b\d{1,2}\b/;
53	0						$self->_set_regex( 'chapitre',
54							$configs{'chapitre'},
55							$chapitre
56							);
57
58							# verset_number : c'est un chiffre inférieur à 176 qui indique un verset
59							# le plus grand verset dans la Bible est Psaume 119:176
60							# regex for roman numbers less than 176
61							# \b(?:(?:CLXX(IV\|II\|III\|V?I?)\|(?:C(XL\|X?X?X?)(IV\|V?I?I?I?)))\|(?:CLX?(IX\|IV\|V?I?I?I?)\|(?:C(XL\|X?X?X?)(IX\|IV\|V?I?I?I?)))\|(?:(XC\|XL\|L?X?X?X?)(IX\|IV\|V?I?I?I?)))\b
62	0						my $verse_number = qr/(?:17[0123456]\|1[0123456]\d\|\d{1,2})/;
63	0						$self->_set_regex( 'verse_number',
64							$configs{'verse_number'},
65							$verse_number
66							);
67
68							# verset_letter : c'est un lettre miniscule a la fin d'un verset
69	0						my $verse_letter = qr/[a-z]/;
70	0						$self->_set_regex( 'verse_letter',
71							$configs{'verse_letter'},
72							$verse_letter
73							);
74
75							# verset : c'est un chiffre et lettre qui indique un verset ou une partie de celle-ci
76	0						my $verset = qr/\b(?:$self->{verse_number})(?:$self->{verse_letter})?\b/;
77	0						$self->_set_regex( 'verset',
78							$configs{'verset'},
79							$verset
80							);
81
82							####################################################################################
83							# Définitions de la ponctuation
84							####################################################################################
85							# cv_separateur : vous pouvez trouver ce entre un chapitre et un verset
86	0						my $cv_separateur = qr/(?::\|\.)/;
87	0						$self->_set_regex( 'cv_separateur',
88							$configs{'cv_separateur'},
89							$cv_separateur
90							);
91
92							# separateur : cette sépare deux références bibliques
93	0						my $separateur = qr/\bet\b/;
94	0						$self->_set_regex( 'separateur',
95							$configs{'separateur'},
96							$separateur
97							);
98
99							# cl_separateur : cette sépare deux références bibliques et que le deuxième référence est un référence d'un chaptire
100	0						my $cl_separateur = qr/;/;
101	0						$self->_set_regex( 'cl_separateur',
102							$configs{'cl_separateur'},
103							$cl_separateur
104							);
105
106							# vl_separateur : cette sépare deux références bibliques et que le deuxième référence est un référence d'un verset
107	0						my $vl_separateur = qr/,/;
108	0						$self->_set_regex( 'vl_separateur',
109							$configs{'vl_separateur'},
110							$vl_separateur
111							);
112
113	0						my $intervale = qr/(?:-\|–\|−)/;
114							# tiret : ce correspond à tous les types de tiret
115	0						$self->_set_regex( 'intervale',
116							$configs{'intervale'},
117							$intervale
118							);
119
120							# reference_separateurs : ce correspond à tous les types de separateur entre références biblque
121	0						my $cl_ou_vl_separateur = qr/(?:$self->{cl_separateur}\|$self->{vl_separateur}\|$self->{separateur})/;
122	0						$self->_set_regex( 'cl_ou_vl_separateurs',
123							$configs{'cl_ou_vl_separateurs'},
124							$cl_ou_vl_separateur
125							);
126
127							####################################################################################
128							# Définitions de les expressions avec intervales
129							####################################################################################
130
131	0						my $intervale_chapitre = qr/
132							# Intervale Verset, Ex '-4', '-45'
133							$spaces # Spaces
134							$self->{'intervale'}
135							$spaces # Spaces
136							$self->{'chapitre'}
137							/x;
138
139							# intervale_chapitre : deux chapitre avec un tiret entre
140							# Par exemple: '-2', '–9', ou ' - 4'
141	0						$self->_set_regex( 'intervale_chapitre',
142							$configs{'intervale_chapitre'},
143							$intervale_chapitre
144							);
145
146	0						my $intervale_verset = qr/
147							# Intervale Verset, Ex '-4', '-45'
148							$spaces # Spaces
149							$self->{'intervale'}
150							$spaces # Spaces
151							$self->{'verset'}
152							/x;
153
154							# intervale_verset : deux chapitre avec un tiret entre
155							# Par exemple: '-2', '–9', ou ' - 4'
156	0						$self->_set_regex( 'intervale_verset',
157							$configs{'intervale_verset'},
158							$intervale_verset
159							);
160
161	0						my $cv_separateur_verset = qr/
162							# CV Separator Verset
163							$spaces# Spaces
164							$self->{'cv_separateur'} # CV Separator
165							$spaces# Spaces
166							$self->{'verset'}
167							/x;
168
169							# cv_separateur_verset : deux chapitre avec un tiret entre
170							# Par exemple: ':2', '.9', ou ' : 4'
171	0						$self->_set_regex( 'cv_separateur_verset',
172							$configs{'cv_separateur_verset'},
173							$cv_separateur_verset
174							);
175
176							####################################################################################
177							# Définitions de les references numiques
178							####################################################################################
179
180							####################################################################################
181							# Les mots donne contexte aux référence biblique
182							# Par Exemple:
183							# chapitre_mots: 'voir la chapitre'
184							# texte: voir la chapitre 9
185							#
186							# Avec cette texte 'voir la chapitre' comme chapitre_mots le 9 peu être indentifié
187							# comme un chapitre
188							#####################################################################################
189
190							# reference_contexte_mots_avant : les mots qui indique que le prochain référence
191							# est un chapitre référence
192	0						my $reference_mots = qr/(?:dans\|voir aussi)/;
193	0						$self->_set_regex( 'reference_mots',
194							$configs{'reference_mots'},
195							$reference_mots
196							);
197
198							# chapitre_contexte_mots_avant : les mots qui indique que le prochain référence est un chapitre référence
199	0						my $chapitre_mots = qr/(?:dans le chapitre)/;
200	0						$self->_set_regex( 'chapitre_mots',
201							$configs{'chapitre_mots'},
202							$chapitre_mots
203							);
204
205							# verset_contexte_mots_avant : les mots qui indique que le prochain référence est un verset référence
206	0						my $verset_mots = qr/(?:vv?\.)/;
207	0						$self->_set_regex( 'verset_mots',
208							$configs{'verset_mots'},
209							$verset_mots
210							);
211
212							# voir_contexte_mots_avant : les mots qui indique que le prochain référence est un verset référence
213	0						my $voir_mots = qr/(?:voir)/;
214	0						$self->_set_regex( 'voir_mots',
215							$configs{'voir_mots'},
216							$voir_mots
217							);
218
219							####################################################################################
220							# Définitions de les expressions avec livres
221							####################################################################################
222
223							# livres_numerique : Ceci est une liste de tous les livres qui commencent par un chiffre
224	0						my $livres_numerique = qr/
225							Samuel\|S\|Rois\|R\|Chroniques\|Ch\|Corinthiens\|Co\|Thessaloniciens\|Th\|Timothée\|Ti\|Pierre\|P\|Jean\|Jn\|Esras\|Es\|Maccabees\|Ma\|Psalm\|Ps
226							/x;
227
228	0						$self->_set_regex( 'livres_numerique',
229							$configs{'livres_numerique'},
230							$livres_numerique
231							);
232
233	0						my $livres_numerique_protect = "";
234	0	0					if ($self->{'livres_numerique'} ne '(?-xism:)') {
235	0						$livres_numerique_protect = qr/(?!(?:[\s ]*(?:$self->{livres_numerique})))/;
236							}
237	0						$self->_set_regex( 'livres_numerique_protect',
238							$configs{'livres_numerique_protect'},
239							$livres_numerique_protect
240							);
241
242
243	0						my $livres = qr/
244							Genèse\|Genese\|Exode\|Lévitique\|Levitique\|Nombres\|Deutéronome\|Deuteronome\|Josué\|Josue\|Juges\|Ruth\|1[\s ]Samuel\|2[\s ]Samuel\|1[\s ]Rois\|2[\s ]Rois\|1[\s ]Chroniques\|2[\s ]Chroniques\|Esdras\|Néhémie\|Nehemie\|Esther\|Job\|Psaume\|Psaumes\|Proverbes\|Ecclésiaste\|Ecclesiaste\|Cantique[\s ]des[\s ]Cantiqu\|Ésaïe\|Esaie\|Jérémie\|Jeremie\|Lamentations\|Ézéchiel\|Ezechiel\|Daniel\|Osée\|Osee\|Joël\|Joel\|Amos\|Abdias\|Jonas\|Michée\|Michee\|Nahum\|Habacuc\|Sophonie\|Aggée\|Aggee\|Zacharie\|Malachie\|Matthieu\|Marc\|Luc\|Jean\|Actes\|Romains\|1[\s ]Corinthiens\|2[\s ]Corinthiens\|Galates\|Éphésiens\|Ephesiens\|Philippiens\|Colossiens\|1[\s ]Thessaloniciens\|2[\s ]Thessaloniciens\|1[\s ]Timothée\|1[\s ]Timothee\|2[\s ]Timothée\|2[\s ]Timothee\|Tite\|Philémon\|Philemon\|Hébreux\|Hebreux\|Jacques\|1[\s ]Pierre\|2[\s ]Pierre\|1[\s ]Jean\|2[\s ]Jean\|3[\s ]*Jean\|Jude\|Apocalypse
245							/x;
246
247							# livres : le nom complet de tous les livres, avec et sans accents
248	0						$self->_set_regex( 'livres',
249							$configs{'livres'},
250							$livres
251							);
252
253	0						my $abbreviations = qr/
254							Ge\|Ex\|Lé\|No\|De\|Dt\|Jos\|Jug\|Jg\|Ru\|1[\s ]S\|2[\s ]S\|1[\s ]R\|2[\s ]R\|1[\s ]Ch\|2[\s ]Ch\|Esd\|Né\|Est\|Job\|Ps\|Ps\|Pr\|Ec\|Ca\|Esa\|Esa\|És\|Jér\|Jé\|La\|Ez\|Éz\|Da\|Os\|Joe\|Joë\|Am\|Ab\|Jon\|Mic\|Mi\|Na\|Ha\|Sop\|So\|Ag\|Za\|Mal\|Ma\|Mt\|Mc\|Mr\|Lu\|Jn\|Ac\|Ro\|1[\s ]Co\|2[\s ]Co\|Ga\|Ep\|Ép\|Ph\|Col\|1[\s ]Th\|2[\s ]Th\|1[\s ]Ti\|2[\s ]Ti\|Ti\|Tit\|Phm\|Hé\|Ja\|1[\s ]Pi\|2[\s ]Pi\|1[\s ]Jn\|2[\s ]Jn\|3[\s ]Jn\|Jude\|Jud\|Ap\|1[\s ]Es\|2[\s ]Es\|Tob\|Jdt\|Est\|Sag\|Sir\|Bar\|Aza\|Sus\|Bel\|Man\|1[\s ]Ma\|2[\s ]Ma\|3[\s ]Ma\|4[\s ]Ma\|2[\s ]Ps
255							/x;
256
257							# abbreviations : le nom complet de tous les abbreviations, avec et sans accents
258	0						$self->_set_regex( 'abbreviations',
259							$configs{'abbreviations'},
260							$abbreviations
261							);
262
263							# livres_et_abbreviations : la liste de tous les livres et les abréviations
264	0						my $livres_et_abbreviations = qr/(?:$self->{'livres'}\|$self->{'abbreviations'})/;
265	0						$self->_set_regex( 'livres_et_abbreviations',
266							$configs{'livres_et_abbreviations'},
267							$livres_et_abbreviations
268							);
269
270							# contexte_mots : Tous les mots qui viennent avant une référence biblique. Des mots différents peut
271							# fournir des contextes différents. Par exemple, 'voir le chapitre' fournit une
272							# contexte et le chapitre 'Matthew' fournit une référence explicite contexte
273	0						my $contexte_mots = qr/
274							(?: # Contexte Mots
275							$self->{'livres_et_abbreviations'} # Livres et abbreviations
276							\|
277							$self->{'chapitre_mots'} # Chapitre mots
278							\|
279							$self->{'verset_mots'} # Verset mots
280							\|
281							$self->{'reference_mots'} # Voir mots
282							)
283							/x;
284
285	0						$self->_set_regex( 'contexte_mots',
286							$configs{'contexte_mots'},
287							$contexte_mots
288							);
289
290							#livre2abre : une table de changement pour livre à l'abréviation
291	0						$self->_set_hash( 'book2key',
292							$configs{'book2key'},
293							{}
294							);
295
296							#abre2livres : une table de changement pour abréviation à livre
297	0						$self->_set_hash( 'abbr2key',
298							$configs{'abbr2key'},
299							{}
300							);
301
302							#livre2abre : une table de changement pour livre à l'abréviation
303	0						$self->_set_hash( 'key2book',
304							$configs{'key2book'},
305							{}
306							);
307
308							#abre2livres : une table de changement pour abréviation à livre
309	0						$self->_set_hash( 'key2abbr',
310							$configs{'key2abbr'},
311							{}
312							);
313
314
315							# livres_avec_un_chapitre : la liste de tous les livres avec un seul chapitre
316	0						my $livres_avec_un_chapitre = qr/(?:Ab\|Abdias\|2Jn\|2Jean\|Phm\|Philemon\|Philémon\|Jud\|Jude\|3Jn\|3Jean)/;
317	0						$self->_set_regex( 'livres_avec_un_chapitre',
318							$configs{'livres_avec_un_chapitre'},
319							$livres_avec_un_chapitre
320							);
321
322							#######################################################################################################
323							# full_reference_protection : Il s'agit d'une expression régulière complexe. Ne pas changer,
324							# sauf si vous savez ce que vous faites.
325
326	0						my $cv_list = qr/
327							$self->{'chapitre'} # LC, '22'
328							$self->{'livres_numerique_protect'}
329							(?: # Choose between CV and Interval
330							(?:
331							(?:# LCC: Ge 22-24
332							$self->{'intervale_chapitre'}
333							(?:# LCCV: Ge 22-23:46
334							$self->{'cv_separateur_verset'}
335							(?: # LCCVV:Ge 22-23:46-49
336							$self->{'intervale_verset'}
337							)?
338							)?
339							)
340							\|
341							(?:# LCV:Ge 1:1
342							$self->{'cv_separateur_verset'}
343							(?: # LCVV:Ge 22-23:46-49
344							$self->{'intervale_verset'}
345							(?:# LCVCV:Ge 22:23-46:49
346							$self->{'cv_separateur_verset'}
347							)?
348							)?
349							)
350							)
351							)?
352							/x;
353
354							# cv_list : Combines LC, LCC, LCCV, LCCVV and LCV, LCVV, LCVCV
355	0						$self->_set_regex( 'cv_list',
356							$configs{'cv_list'},
357							$cv_list
358							);
359
360
361							# reference_biblique_list : Cette expression régulière correspond à une liste de références bibliques
362							# ex. '1 Ti 1.19 ; Ge 1:1, 2:16-18' or '1 Ti 1.19 ; 2Ti 2:16-18'
363	0						my $reference_biblique = qr/
364							(?:
365							$self->{'contexte_mots'}
366							$spaces # Spaces
367							(?: # Chapitre Verset liste
368							$self->{'cv_list'}
369							)
370							(?: # Reference List
371							$spaces # Spaces
372							$self->{'cl_ou_vl_separateurs'}
373							$spaces # Spaces
374							$self->{'livres_numerique_protect'}
375							(?: # Chapitre Verset liste
376							$self->{'cv_list'}
377							)
378							)*
379							)
380							/x;
381
382	0						$self->_set_regex( 'reference_biblique',
383							$configs{'reference_biblique'},
384							$reference_biblique
385							);
386
387							# explicit_reference_biblique : Cette expression régulière correspond à une liste de références bibliques explicit
388							# Il faut avoir le livre et chapitre au moins
389							# ex. '1 Ti 1.19 ; Ge 1:1, 2:16-18' or '1 Ti 1.19 ; 2Ti 2:16-18'
390	0						my $explicit_reference_biblique = qr/
391							(?:
392							$self->{'livres_et_abbreviations'}
393							$spaces # Spaces
394							(?: # Chapitre Verset liste
395							$self->{'cv_list'}
396							)
397							(?: # Reference List
398							$spaces # Spaces
399							$self->{'cl_ou_vl_separateurs'}
400							$spaces # Spaces
401							$self->{'livres_numerique_protect'}
402							(?: # Chapitre Verset liste
403							$self->{'cv_list'}
404							)
405							)*
406							)
407							/x;
408
409	0						$self->_set_regex( 'explicit_reference_biblique',
410							$configs{'explicit_reference_biblique'},
411							$explicit_reference_biblique
412							);
413
414							# reference_biblique_list : Cette expression régulière correspond à une liste de références bibliques
415							# ex. '1 Ti 1.19 ; Ge 1:1, 2:16-18' or '1 Ti 1.19 ; 2Ti 2:16-18'
416	0						my $reference_biblique_list = qr/
417							(?:
418							$self->{'contexte_mots'}
419							$spaces # Spaces
420							(?: # Chapitre Verset liste
421							$self->{'cv_list'}
422							)
423							(?: # Reference List
424							$spaces # Spaces
425							$self->{'cl_ou_vl_separateurs'}
426							$spaces # Spaces
427							(?:$self->{'contexte_mots'})?
428							$spaces # Spaces
429							(?: # Chapitre Verset liste
430							$self->{'cv_list'}
431							)
432							)*
433							)
434							/x;
435
436	0						$self->_set_regex( 'reference_biblique_list',
437							$configs{'reference_biblique_list'},
438							$reference_biblique_list
439							);
440
441	0						return $self;
442							}
443
444							sub abbreviation {
445	0			0	1		my $self = shift;
446	0		0				my $key = shift \|\| '';
447
448							# return unless defined($key);
449
450	0						chomp($key);
451
452	0	0					return $self->{key2abbr}{$key} if ($key =~ /^\d+$/);
453							# try a lookup just in case $key eq 'Pr' or 'Genèse'
454	0						my $foundkey = $self->key($key);
455
456							# if we found a key then use it as the index
457	0	0					return unless (_non_empty($foundkey));
458	0						return $self->{key2abbr}{$foundkey};
459							}
460
461							sub book {
462	0			0	1		my $self = shift;
463	0						my $key = shift;
464
465	0	0					return unless defined($key);
466
467	0						chomp($key);
468
469	0	0					return $self->{key2book}{$key} if ($key =~ /^\d$/);
470
471							# try a lookup just in case $key eq 'Pr' or 'Genèse'
472	0						my $foundkey = $self->key($key);
473
474							# if we found a key then use it as the index
475	0	0					if (defined($foundkey)) {
476	0						return $self->{key2book}{$foundkey};
477							}
478	0						return $self->{key2book}{$key};
479							}
480
481							sub key {
482	0			0	1		my $self = shift;
483	0		0				my $book_or_abbr = shift \|\| '';
484	0						chomp($book_or_abbr);
485
486	0		0				return $self->{book2key}{$book_or_abbr} \|\| $self->{abbr2key}{$book_or_abbr};
487							}
488
489							sub bookname_type {
490	0			0	1		my $self = shift;
491	0		0				my $book = shift \|\| '';
492	0	0					return('NONE') unless _non_empty($book);
493	0	0					return('CANONICAL_NAME') if ($book =~ m/$self->{livres}/);
494	0	0					return('ABBREVIATION') if ($book =~ m/$self->{abbreviations}/);
495	0						return('UNKNOWN');
496							}
497
498
499							################################################################################
500							# Helper functions for internal use
501							################################################################################
502							sub _set_regex {
503	0			0			my ($self, $key, $regex, $default_regex) = @_;
504							# return if (m/^$/ =~ $regex);
505	0	0	0				if (defined($regex)) {
		0
506	0						my $result = eval { qr/$regex/ }; # Evaluate that line
	0
507	0	0					if ($@) { # Check for compile or run-time errors.
508	0						croak "Invalid regex:\n $regex";
509							} else {
510	0						$self->{$key} = $result;
511							}
512							} elsif (defined($regex) && $regex eq ''){
513	0						return;
514							} else {
515	0						$self->{$key} = $default_regex;
516							}
517							}
518
519							sub _set_hash {
520	0			0			my ($self, $key, $hash, $default_hash) = @_;
521	0	0					if (defined($hash)) {
522	0						$self->{$key} = $hash;
523							} else {
524	0						$self->{$key} = $default_hash;
525							}
526							}
527
528							sub _non_empty {
529	0			0			my $value = shift;
530	0		0				return (defined($value) && $value ne '');
531							}
532
533							################################################################################
534							# les fonctions qui se préoccupe de la configuration
535							################################################################################
536							sub _process_config {
537	0			0			my $self = shift;
538	0						my $config = shift;
539	0						my $retval = shift;
540
541							# If this is the book configurations then build the associated data structures
542							# If this configuration value is a file name, then use the contents of that
543							# file to build a regular expression
544							# If the configuration value is a HASH, then recursively call _process_config
545							# If this configuration value is a string, then copy it to the data structure
546							# that is being returned
547	0						while ( my ($key, $value) = each(%{$config}) ) {
	0
548	0	0					$value = '' unless defined($value);
549	0	0	0				if ($key =~ m/books/) {
		0
		0
550	0						$self->_init_book_and_abbreviation_data_structures($value, $retval);
551							} elsif ($value =~ m/^(?:fichier\|file):/) {
552	0						$retval->{$key} = $self->_build_regexes_from_file($value);
553							} elsif (defined(ref $value) && ref $value eq "HASH") {
554	0						$self->_process_config($value, $retval);
555							} else {
556	0						$retval->{$key} = $value;
557							}
558							}
559	0						return $retval;
560							}
561
562							sub _build_regexes_from_file {
563	0			0			my $self = shift;
564	0						my $value = shift;
565	0						my @list;
566
567							# Enleve le phrase 'fichier:' ou 'file:'
568	0						$value =~ s/^(?:fichier\|file)://g;
569
570	0	0					open(*LIST, "<:encoding(UTF-8)", $value) or croak "Couldn't open \'$value\' for reading: $!\n";
571	0						while() {
572	0						chomp; # no newline
573	0						s/[^\\]#.*//; # no comments si il y a un '\' devant le '#' il n'est pas un commentarie
574	0						s/^\s+//; # no leading white
575	0						s/\s+$//; # no trailing white
576	0	0					next unless length; # anything left?
577	0						push @list, $_;
578							}
579	0						close (LIST);
580	0						return "(?:" . _join_regex(\@list) . ")";
581							}
582
583							sub _join_regex {
584	0			0			my $array_ref = shift;
585	0	0					if (defined($array_ref)) {
586	0						return join("\|", @{$array_ref});
	0
587							} else {
588	0						return;
589							}
590							}
591
592							# Encode and decode helper
593							sub _encode {
594	0			0			my $class = shift;
595	0						my $s = shift;
596	0						chomp($s);
597	0						$s =~ s/([èéÉïëà])/'\x{' . sprintf("%2.2x",ord($1)) . '}'/eg;
	0
598	0						return $s;
599							}
600
601
602							################################################################################
603							# _init_book_and_abbreviation_data_structures
604							#
605							# Creates the following mappings:
606							# An array of all match book names (book names to search for in a document)
607							# An array of all match abbreviation (abbreviations to search for in a document)
608							# An array of all book names that begin with a number
609							# A hash mapping from match book name to the primary key
610							# A hash mapping from match abbreviation to the primary key
611							#
612							# The primary key is the number which starts the entry in the abbr config file
613							# For example with this configuration the primary key is '1'
614							# 1:
615							# Match:
616							# Book: ['Genèse', 'Genese']
617							# Abbreviation: ['Ge']
618							# Normalized:
619							# Book: Genèse
620							# Abbreviation: Ge
621							#
622							################################################################################
623							sub _init_book_and_abbreviation_data_structures {
624	0			0			my $self = shift;
625	0						my $config = shift;
626	0						my $retval = shift;
627
628	0						my $regex;
629	0						my (@livres, @livres_numerique, @abbreviations); # Array for all match books and another for match books starting with a number
630	0						my (%book2key, %abbr2key, %key2abbr, %key2book, %ln); # Mappings between match books and abbreviations and the primary key
631
632							# Loop through each number and gather the books
633	0						while ( my ($key, $value) = each %{$config} ) {
	0
634							# Loop through
635	0						foreach my $livre (@{$value->{Match}{Book}}) {
	0
636	0						push @livres, $livre;
637	0						$book2key{$livre} = $key;
638	0	0					if ($livre =~ m/^\d+/) {
639	0						$livre =~ s/\d+[\s ]*([A-Za-z]+)/$1/xg;
640	0						$ln{$livre} = 1;
641
642							}
643							}
644							# Loop through
645	0						foreach my $abbreviation (@{$value->{Match}{Abbreviation}}) {
	0
646	0						push @abbreviations, $abbreviation;
647	0						$abbr2key{$abbreviation} = $key;
648	0	0					if ($abbreviation =~ m/^\d+/) {
649	0						$abbreviation =~ s/\d+[\s ]*([A-Za-z]+)/$1/xg;
650	0						$ln{$abbreviation} = 1;
651							}
652							}
653	0						$key2abbr{$key} = $value->{Normalized}{Abbreviation};
654	0						$key2book{$key} = $value->{Normalized}{Book};
655							}
656
657	0						foreach my $y (sort(keys %ln)) {
658	0						push @livres_numerique, $y;
659							}
660
661	0						$retval->{'livres'} = _join_regex(\@livres);
662	0						$retval->{'abbreviations'} = _join_regex(\@abbreviations);
663	0						$retval->{'livres_numerique'} = _join_regex(\@livres_numerique);
664
665	0						$retval->{'livres_array'} = \@livres;
666	0						$retval->{'abbreviations_array'} = \@abbreviations;
667	0						$retval->{'livres_numerique_array'} = \@livres_numerique;
668
669	0						$retval->{'book2key'} = \%book2key;
670	0						$retval->{'abbr2key'} = \%abbr2key;
671	0						$retval->{'key2book'} = \%key2book;
672	0						$retval->{'key2abbr'} = \%key2abbr;
673	0						$retval->{'configs'} = $config;
674							}
675
676
677							1; # Magic true value required at end of module
678							__END__