File Coverage

blib/lib/Parse/PhoneNumber/ID.pm
Criterion Covered Total %
statement 283 285 99.3
branch 59 78 75.6
condition 36 54 66.6
subroutine 14 14 100.0
pod 1 1 100.0
total 393 432 90.9


line stmt bran cond sub pod time code
1             package Parse::PhoneNumber::ID;
2              
3             our $DATE = '2017-07-10'; # DATE
4             our $VERSION = '0.16'; # VERSION
5              
6 1     1   87112 use 5.010001;
  1         6  
7 1     1   8 use strict;
  1         3  
  1         31  
8 1     1   7 use warnings;
  1         3  
  1         45  
9 1     1   5042 use Log::ger;
  1         131  
  1         8  
10              
11 1     1   2323 use Function::Fallback::CoreOrPP qw(clone);
  1         869  
  1         89  
12 1     1   712 use Perinci::Sub::Util qw(gen_modified_sub);
  1         2928  
  1         7908  
13              
14             require Exporter;
15             our @ISA = qw(Exporter);
16             our @EXPORT_OK = qw(extract_id_phones parse_id_phone
17             list_id_operators list_id_area_codes);
18              
19             # from: http://id.wikipedia.org/wiki/Daftar_kode_telepon_di_Indonesia
20             # last updated: 2011-03-08
21             my %area_codes = (
22             '0627' => {province=>'aceh', cities=>'Kota Subulussalam'},
23             '0629' => {province=>'aceh', cities=>'Kutacane (Kabupaten Aceh Tenggara)'},
24             '0641' => {province=>'aceh', cities=>'Kota Langsa'},
25             '0642' => {province=>'aceh', cities=>'Blang Kejeren (Kabupaten Gayo Lues)'},
26             '0643' => {province=>'aceh', cities=>'Takengon (Kabupaten Aceh Tengah)'},
27             '0644' => {province=>'aceh', cities=>'Bireuen (Kabupaten Bireuen)'},
28             '0645' => {province=>'aceh', cities=>'Kota Lhokseumawe'},
29             '0646' => {province=>'aceh', cities=>'Idi (Kabupaten Aceh Timur)'},
30             '0650' => {province=>'aceh', cities=>'Sinabang (Kabupaten Simeulue)'},
31             '0651' => {province=>'aceh', cities=>'Kota Banda Aceh - Jantho (Kabupaten Aceh Besar) - Lamno (Kabupaten Aceh Jaya)'},
32             '0652' => {province=>'aceh', cities=>'Kota Sabang'},
33             '0653' => {province=>'aceh', cities=>'Sigli (Kabupaten Pidie)'},
34             '0654' => {province=>'aceh', cities=>'Calang (Kabupaten Aceh Jaya)'},
35             '0655' => {province=>'aceh', cities=>'Meulaboh (Kabupaten Aceh Barat)'},
36             '0656' => {province=>'aceh', cities=>'Tapaktuan (Kabupaten Aceh Selatan)'},
37             '0657' => {province=>'aceh', cities=>'Bakongan (Kabupaten Aceh Selatan)'},
38             '0658' => {province=>'aceh', cities=>'Singkil (Kabupaten Aceh Singkil)'},
39             '0659' => {province=>'aceh', cities=>'Blangpidie (Kabupaten Aceh Barat Daya)'},
40              
41             '061' => {province=>'sumut', cities=>'Kota Medan - Kota Binjai - Stabat (Kabupaten Langkat)'},
42             '0620' => {province=>'sumut', cities=>'Pangkalan Brandan (Kabupaten Langkat)'},
43             '0621' => {province=>'sumut', cities=>'Kota Tebing Tinggi'},
44             '0622' => {province=>'sumut', cities=>'Kota Pematangsiantar'},
45             '0623' => {province=>'sumut', cities=>'Kisaran (Kabupaten Asahan) - Kota Tanjung Balai'},
46             '0624' => {province=>'sumut', cities=>'Rantau Prapat (Kabupaten Labuhanbatu)'},
47             '0625' => {province=>'sumut', cities=>'Parapat (Kabupaten Simalungun)'},
48             '0626' => {province=>'sumut', cities=>'Pangururan (Kabupaten Samosir)'},
49             '0627' => {province=>'sumut', cities=>'Sidikalang (Kabupaten Dairi) - Salak (Kabupaten Pakpak Bharat)'},
50             '0628' => {province=>'sumut', cities=>'Kabanjahe (Kabupaten Karo)'},
51             '0630' => {province=>'sumut', cities=>'Teluk Dalam (Kabupaten Nias Selatan)'},
52             '0631' => {province=>'sumut', cities=>'Kota Sibolga'},
53             '0636' => {province=>'sumut', cities=>'Balige (Kabupaten Toba Samosir)'},
54             '0633' => {province=>'sumut', cities=>'Tarutung (Kabupaten Tapanuli Utara)'},
55             '0634' => {province=>'sumut', cities=>'Kota Padang Sidempuan'},
56             '0635' => {province=>'sumut', cities=>'Gunung Tua (Kabupaten Padang Lawas Utara)'},
57             '0636' => {province=>'sumut', cities=>'Panyabungan (Kabupaten Mandailing Natal)'},
58             '0638' => {province=>'sumut', cities=>'Barus (Kabupaten Tapanuli Tengah)'},
59             '0639' => {province=>'sumut', cities=>'Kota Gunung Sitoli'},
60              
61             '0751' => {province=>'sumbar', cities=>'Kota Padang - Kota Pariaman'},
62             '0752' => {province=>'sumbar', cities=>'Kota Bukittinggi - Kota Padang Panjang - Kota Payakumbuh - Batusangkar (Kabupaten Tanah Datar)'},
63             '0753' => {province=>'sumbar', cities=>'Lubuk Sikaping (Kabupaten Pasaman)'},
64             '0754' => {province=>'sumbar', cities=>'Kabupaten Sijunjung'},
65             '0755' => {province=>'sumbar', cities=>'Kota Solok - Kabupaten Solok Selatan - Alahan Panjang (Kabupaten Solok)'},
66             '0756' => {province=>'sumbar', cities=>'Painan (Kabupaten Pesisir Selatan)'},
67             '0757' => {province=>'sumbar', cities=>'Balai Selasa (Kabupaten Agam)'},
68             '0759' => {province=>'sumbar', cities=>'Tuapejat (Kabupaten Kepulauan Mentawai)'},
69              
70             '0760' => {province=>'riau', cities=>'Teluk Kuantan (Kabupaten Kuantan Singingi)'},
71             '0761' => {province=>'riau', cities=>'Kota Pekanbaru - Pangkalan Kerinci (Kabupaten Pelalawan)'},
72             '0762' => {province=>'riau', cities=>'Bangkinang (Kabupaten Kampar)'},
73             '0763' => {province=>'riau', cities=>'Selatpanjang (Kabupaten Bengkalis)'},
74             '0764' => {province=>'riau', cities=>'Siak Sri Indrapura (Kabupaten Siak)'},
75             '0765' => {province=>'riau', cities=>'Kota Dumai - Duri (Kabupaten Bengkalis)'},
76             '0766' => {province=>'riau', cities=>'Bengkalis (Kabupaten Bengkalis)'},
77             '0767' => {province=>'riau', cities=>'Bagan Siapi-api (Kabupaten Rokan Hilir)'},
78             '0768' => {province=>'riau', cities=>'Tembilahan (Kabupaten Indragiri Hilir)'},
79             '0769' => {province=>'riau', cities=>'Rengat - Air Molek (Kabupaten Indragiri Hulu)'},
80              
81             '0771' => {province=>'kepriau', cities=>'Kota Tanjung Pinang'},
82             '0772' => {province=>'kepriau', cities=>'Tarempa (Kabupaten Kepulauan Anambas)'},
83             '0773' => {province=>'kepriau', cities=>'Ranai (Kabupaten Natuna)'},
84             '0776' => {province=>'kepriau', cities=>'Dabosingkep (Kabupaten Lingga)'},
85             '0777' => {province=>'kepriau', cities=>'Tanjung Balai Karimun (Kabupaten Karimun)'},
86             '0778' => {province=>'kepriau', cities=>'Kota Batam'},
87             '0779' => {province=>'kepriau', cities=>'Tanjungbatu (Kabupaten Karimun)'},
88              
89             '0740' => {province=>'jambi', cities=>'Mendahara - Muara Sabak (Kabupaten Tanjung Jabung Timur)'},
90             '0741' => {province=>'jambi', cities=>'Kota Jambi'},
91             '0742' => {province=>'jambi', cities=>'Kualatungkal (Kabupaten Tanjung Jabung Barat)'},
92             '0743' => {province=>'jambi', cities=>'Muara Bulian (Kabupaten Batanghari)'},
93             '0744' => {province=>'jambi', cities=>'Muara Tebo (Kabupaten Tebo)'},
94             '0745' => {province=>'jambi', cities=>'Sarolangun (Kabupaten Sarolangun)'},
95             '0746' => {province=>'jambi', cities=>'Bangko (Kabupaten Merangin)'},
96             '0747' => {province=>'jambi', cities=>'Muarabungo (Kabupaten Bungo)'},
97             '0748' => {province=>'jambi', cities=>'Kota Sungai Penuh'},
98              
99             '0711' => {province=>'sumsel', cities=>'Kota Palembang - Pangkalan Balai - Betung (Kabupaten Banyuasin) - Indralaya (Kabupaten Ogan Ilir)'},
100             '0712' => {province=>'sumsel', cities=>'Kayu Agung (Kabupaten Ogan Komering Ilir)'},
101             '0713' => {province=>'sumsel', cities=>'Kota Prabumulih'},
102             '0714' => {province=>'sumsel', cities=>'Sekayu (Kabupaten Musi Banyuasin)'},
103             '0730' => {province=>'sumsel', cities=>'Kota Pagar Alam'},
104             '0731' => {province=>'sumsel', cities=>'Lahat (Kabupaten Lahat)'},
105             '0733' => {province=>'sumsel', cities=>'Kota Lubuklinggau - Pendopo (Kabupaten Lahat)'},
106             '0734' => {province=>'sumsel', cities=>'Muara Enim (Kabupaten Muara Enim)'},
107             '0735' => {province=>'sumsel', cities=>'Baturaja (Kabupaten Ogan Komering Ulu)'},
108              
109             '0715' => {province=>'kbb', cities=>'Belinyu (Kabupaten Bangka)'},
110             '0716' => {province=>'kbb', cities=>'Muntok (Kabupaten Bangka Barat)'},
111             '0717' => {province=>'kbb', cities=>'Kota Pangkal Pinang - Sungailiat (Kabupaten Bangka)'},
112             '0718' => {province=>'kbb', cities=>'Koba (Kabupaten Bangka Tengah) - Toboali (Kabupaten Bangka Selatan)'},
113             '0719' => {province=>'kbb', cities=>'Manggar (Kabupaten Belitung Timur) - Tanjung Pandan (Kabupaten Belitung)'},
114              
115             '0732' => {province=>'bengkulu', cities=>'Curup (Kabupaten Rejang Lebong)'},
116             '0736' => {province=>'bengkulu', cities=>'Kota Bengkulu - Lais (Kabupaten Bengkulu Utara)'},
117             '0737' => {province=>'bengkulu', cities=>'Arga Makmur (Kabupaten Bengkulu Utara) - Mukomuko (Kabupaten Mukomuko)'},
118             '0738' => {province=>'bengkulu', cities=>'Muara Aman (Kabupaten Lebong)'},
119             '0739' => {province=>'bengkulu', cities=>'Bintuhan (Kabupaten Kaur) - Kota Manna (Kabupaten Bengkulu Selatan)'},
120              
121             '0721' => {province=>'lampung', cities=>'Kota Bandar Lampung'},
122             '0722' => {province=>'lampung', cities=>'Kota Agung (Kabupaten Tanggamus)'},
123             '0723' => {province=>'lampung', cities=>'Blambangan Umpu (Kabupaten Way Kanan)'},
124             '0724' => {province=>'lampung', cities=>'Kotabumi (Kabupaten Lampung Utara)'},
125             '0725' => {province=>'lampung', cities=>'Kota Metro'},
126             '0726' => {province=>'lampung', cities=>'Menggala (Kabupaten Tulang Bawang)'},
127             '0727' => {province=>'lampung', cities=>'Kalianda (Kabupaten Lampung Selatan)'},
128             '0728' => {province=>'lampung', cities=>'Kota Liwa (Kabupaten Lampung Barat)'},
129             '0729' => {province=>'lampung', cities=>'Pringsewu (Kabupaten Pringsewu)'},
130              
131             '021' => {province=>'dki/banten/jabar', cities=>'Kepulauan Seribu - Jakarta Barat - Jakarta Pusat - Jakarta Selatan - Jakarta Timur - Jakarta Utara/Tigaraksa (Kabupaten Tangerang) - Kota Tangerang - Kota Tangerang Selatan/Kota Bekasi - Cikarang (Kabupaten Bekasi) - Kota Depok - Cibinong (Kabupaten Bogor)'},
132              
133             '0252' => {province=>'banten', cities=>'Rangkasbitung (Kabupaten Lebak)'},
134             '0253' => {province=>'banten', cities=>'Pandeglang - Labuan (Kabupaten Pandeglang)'},
135             '0254' => {province=>'banten', cities=>'Kota Serang - Kabupaten Serang - Merak (Kota Cilegon)'},
136             '0257' => {province=>'banten', cities=>'Pasauran (Kabupaten Serang)'},
137              
138             '022' => {province=>'jabar', cities=>'Kota Bandung - Kota Cimahi - Soreang (Kabupaten Bandung) - Lembang - Ngamprah (Kabupaten Bandung Barat)'},
139             '0231' => {province=>'jabar', cities=>'Kota Cirebon - Sumber - Losari (Kabupaten Cirebon)'},
140             '0232' => {province=>'jabar', cities=>'Kabupaten Kuningan'},
141             '0233' => {province=>'jabar', cities=>'Kadipaten (Kabupaten Majalengka)'},
142             '0234' => {province=>'jabar', cities=>'Jatibarang (Kabupaten Indramayu)'},
143             '0251' => {province=>'jabar', cities=>'Kota Bogor'},
144             '0260' => {province=>'jabar', cities=>'Pamanukan (Kabupaten Subang)'},
145             '0261' => {province=>'jabar', cities=>'Kabupaten Sumedang'},
146             '0262' => {province=>'jabar', cities=>'Kabupaten Garut'},
147             '0263' => {province=>'jabar', cities=>'Kabupaten Cianjur'},
148             '0264' => {province=>'jabar', cities=>'Kabupaten Purwakarta - Cikampek)'},
149             '0265' => {province=>'jabar', cities=>'Kota Tasikmalaya - Kadipaten - Singaparna (Kabupaten Tasikmalaya) - Kota Banjar - Ciamis - Pangandaran (Kabupaten Ciamis)'},
150             '0266' => {province=>'jabar', cities=>'Kota Sukabumi - Palabuhanratu (Kabupaten Sukabumi)'},
151             '0267' => {province=>'jabar', cities=>'Kabupaten Karawang'},
152              
153             '024' => {province=>'jateng', cities=>'Semarang, Ungaran'},
154             '0271' => {province=>'jateng', cities=>'Surakarta (Solo), Kartasura, Sukoharjo, Karanganyar, Sragen'},
155             '0272' => {province=>'jateng', cities=>'Klaten'},
156             '0273' => {province=>'jateng', cities=>'Wonogiri'},
157             '0275' => {province=>'jateng', cities=>'Purworejo,Kutoarjo'},
158             '0276' => {province=>'jateng', cities=>'Boyolali'},
159             '0280' => {province=>'jateng', cities=>'Majenang, Sidareja (Kabupaten Cilacap bagian barat)'},
160             '0281' => {province=>'jateng', cities=>'Purwokerto, Banyumas, Purbalingga'},
161             '0282' => {province=>'jateng', cities=>'Cilacap (bagian timur)'},
162             '0283' => {province=>'jateng', cities=>'Tegal, Slawi, Brebes'},
163             '0284' => {province=>'jateng', cities=>'Pemalang'},
164             '0285' => {province=>'jateng', cities=>'Pekalongan, Batang (bagian barat)'},
165             '0286' => {province=>'jateng', cities=>'Banjarnegara, Wonosobo'},
166             '0287' => {province=>'jateng', cities=>'Kebumen, Gombong'},
167             '0289' => {province=>'jateng', cities=>'Bumiayu (Kabupaten Brebes bagian selatan)'},
168             '0291' => {province=>'jateng', cities=>'Demak, Jepara, Kudus'},
169             '0292' => {province=>'jateng', cities=>'Purwodadi'},
170             '0293' => {province=>'jateng', cities=>'Magelang, Mungkid, Temanggung'},
171             '0294' => {province=>'jateng', cities=>'Kendal, Kaliwungu, Weleri, Batang (bagian timur)'},
172             '0295' => {province=>'jateng', cities=>'Pati, Rembang, Lasem'},
173             '0296' => {province=>'jateng', cities=>'Blora, Cepu'},
174             '0297' => {province=>'jateng', cities=>'Karimun Jawa'},
175             '0298' => {province=>'jateng', cities=>'Salatiga, Ambarawa (Kabupaten Semarang bagian tengah dan selatan)'},
176             '0356' => {province=>'jateng', cities=>'Rembang bagian Timur (wilayah yang berbatasan dengan Tuban)'},
177              
178             '0274' => {province=>'diy', cities=>'Yogyakarta, Sleman, Wates, Bantul, Wonosari'},
179              
180             '031' => {province=>'jatim', cities=>'Surabaya, Gresik, Sidoarjo, Bangkalan'},
181             '0321' => {province=>'jatim', cities=>'Mojokerto, Jombang'},
182             '0322' => {province=>'jatim', cities=>'Lamongan, Babat'},
183             '0323' => {province=>'jatim', cities=>'Sampang'},
184             '0324' => {province=>'jatim', cities=>'Pamekasan'},
185             '0325' => {province=>'jatim', cities=>'Sangkapura (Bawean)'},
186             '0327' => {province=>'jatim', cities=>'Kepulauan Kangean, Kepulauan Masalembu'},
187             '0328' => {province=>'jatim', cities=>'Sumenep'},
188             '0331' => {province=>'jatim', cities=>'Jember'},
189             '0332' => {province=>'jatim', cities=>'Bondowoso, Sukosari, Prajekan'},
190             '0333' => {province=>'jatim', cities=>'Banyuwangi, Muncar'},
191             '0334' => {province=>'jatim', cities=>'Lumajang'},
192             '0335' => {province=>'jatim', cities=>'Probolinggo, Kraksaan'},
193             '0336' => {province=>'jatim', cities=>'Ambulu, Puger (Kabupaten Jember bagian selatan)'},
194             '0338' => {province=>'jatim', cities=>'Situbondo, Besuki'},
195             '0341' => {province=>'jatim', cities=>'Malang, Kepanjen, Batu'},
196             '0342' => {province=>'jatim', cities=>'Blitar, Wlingi'},
197             '0343' => {province=>'jatim', cities=>'Pasuruan, Pandaan, Gempol'},
198             '0351' => {province=>'jatim', cities=>'Madiun, Caruban, Magetan, Ngawi'},
199             '0352' => {province=>'jatim', cities=>'Ponorogo'},
200             '0353' => {province=>'jatim', cities=>'Bojonegoro'},
201             '0354' => {province=>'jatim', cities=>'Kediri, Pare'},
202             '0355' => {province=>'jatim', cities=>'Tulungagung, Trenggalek'},
203             '0356' => {province=>'jatim', cities=>'Tuban'},
204             '0357' => {province=>'jatim', cities=>'Pacitan'},
205             '0358' => {province=>'jatim', cities=>'Nganjuk, Kertosono'},
206              
207             '0361' => {province=>'bali', cities=>'Denpasar, Gianyar, Kuta, Tabanan, Tampaksiring, Ubud'},
208             '0362' => {province=>'bali', cities=>'Singaraja'},
209             '0363' => {province=>'bali', cities=>'Amlapura'},
210             '0365' => {province=>'bali', cities=>'Negara, Gilimanuk'},
211             '0366' => {province=>'bali', cities=>'Klungkung, Kintamani'},
212             '0368' => {province=>'bali', cities=>'Baturiti'},
213              
214             '0364' => {province=>'ntb', cities=>'Kota Mataram'},
215             '0370' => {province=>'ntb', cities=>'Mataram, Praya'},
216             '0371' => {province=>'ntb', cities=>'Sumbawa'},
217             '0372' => {province=>'ntb', cities=>'Alas, Taliwang'},
218             '0373' => {province=>'ntb', cities=>'Dompu'},
219             '0374' => {province=>'ntb', cities=>'Bima'},
220             '0376' => {province=>'ntb', cities=>'Selong'},
221              
222             '0380' => {province=>'ntt', cities=>'Kupang, Baa (Roti)'},
223             '0381' => {province=>'ntt', cities=>'Ende'},
224             '0382' => {province=>'ntt', cities=>'Maumere'},
225             '0383' => {province=>'ntt', cities=>'Larantuka'},
226             '0384' => {province=>'ntt', cities=>'Bajawa'},
227             '0385' => {province=>'ntt', cities=>'Labuhanbajo, Ruteng'},
228             '0386' => {province=>'ntt', cities=>'Kalabahi'},
229             '0387' => {province=>'ntt', cities=>'Waingapu, Waikabubak'},
230             '0388' => {province=>'ntt', cities=>'Kefamenanu, Soe'},
231             '0389' => {province=>'ntt', cities=>'Atambua'},
232              
233             '0561' => {province=>'kalbar', cities=>'Pontianak, Mempawah'},
234             '0562' => {province=>'kalbar', cities=>'Sambas, Singkawang, Bengkayang'},
235             '0563' => {province=>'kalbar', cities=>'Ngabang'},
236             '0564' => {province=>'kalbar', cities=>'Sanggau'},
237             '0565' => {province=>'kalbar', cities=>'Sintang'},
238             '0567' => {province=>'kalbar', cities=>'Putussibau'},
239             '0568' => {province=>'kalbar', cities=>'Nanga Pinoh'},
240             '0534' => {province=>'kalbar', cities=>'Ketapang'},
241              
242             '0513' => {province=>'kalteng', cities=>'Kuala Kapuas, Pulang Pisau'},
243             '0519' => {province=>'kalteng', cities=>'Muara Teweh'},
244             '0522' => {province=>'kalteng', cities=>'Ampah (Dusun Tengah, Barito Timur)'},
245             '0525' => {province=>'kalteng', cities=>'Buntok'},
246             '0526' => {province=>'kalteng', cities=>'Tamiang Layang'},
247             '0528' => {province=>'kalteng', cities=>'Purukcahu'},
248             '0531' => {province=>'kalteng', cities=>'Sampit'},
249             '0532' => {province=>'kalteng', cities=>'Pangkalan Bun, Kumai'},
250             '0534' => {province=>'kalteng', cities=>'Kendawangan'},
251             '0536' => {province=>'kalteng', cities=>'Palangkaraya, Kasongan'},
252             '0537' => {province=>'kalteng', cities=>'Kuala Kurun'},
253             '0538' => {province=>'kalteng', cities=>'Kuala Pembuang'},
254             '0539' => {province=>'kalteng', cities=>'Kuala Kuayan (Mentaya Hulu, Kotawaringin Timur)'},
255              
256             '0511' => {province=>'kalsel', cities=>'Banjarmasin, Banjarbaru, Martapura, Marabahan'},
257             '0512' => {province=>'kalsel', cities=>'Pelaihari'},
258             '0517' => {province=>'kalsel', cities=>'Kandangan, Barabai, Rantau, Negara'},
259             '0518' => {province=>'kalsel', cities=>'Kotabaru, Batulicin'},
260             '0526' => {province=>'kalsel', cities=>'Tanjung'},
261             '0527' => {province=>'kalsel', cities=>'Amuntai'},
262              
263             '0541' => {province=>'kaltim', cities=>'Samarinda, Tenggarong'},
264             '0542' => {province=>'kaltim', cities=>'Balikpapan'},
265             '0543' => {province=>'kaltim', cities=>'Tanah Grogot'},
266             '0545' => {province=>'kaltim', cities=>'Melak'},
267             '0548' => {province=>'kaltim', cities=>'Bontang'},
268             '0549' => {province=>'kaltim', cities=>'Sangatta'},
269             '0551' => {province=>'kaltim', cities=>'Tarakan'},
270             '0552' => {province=>'kaltim', cities=>'Tanjungselor'},
271             '0553' => {province=>'kaltim', cities=>'Malinau'},
272             '0554' => {province=>'kaltim', cities=>'Tanjung Redeb'},
273             '0556' => {province=>'kaltim', cities=>'Nunukan'},
274              
275             '0430' => {province=>'sulut', cities=>'Amurang'},
276             '0431' => {province=>'sulut', cities=>'Manado, Tomohon, Tondano'},
277             '0432' => {province=>'sulut', cities=>'Tahuna'},
278             '0434' => {province=>'sulut', cities=>'Kotamobagu'},
279             '0438' => {province=>'sulut', cities=>'Bitung'},
280              
281             '0435' => {province=>'gorontalo', cities=>'Gorontalo, Limboto'},
282             '0443' => {province=>'gorontalo', cities=>'Marisa'},
283              
284             '0450' => {province=>'sulteng', cities=>'Parigi'},
285             '0451' => {province=>'sulteng', cities=>'Palu'},
286             '0452' => {province=>'sulteng', cities=>'Poso'},
287             '0453' => {province=>'sulteng', cities=>'Tolitoli'},
288             '0457' => {province=>'sulteng', cities=>'Donggala'},
289             '0458' => {province=>'sulteng', cities=>'Tentena'},
290             '0461' => {province=>'sulteng', cities=>'Luwuk'},
291             '0462' => {province=>'sulteng', cities=>'Banggai'},
292             '0463' => {province=>'sulteng', cities=>'Bunta'},
293             '0464' => {province=>'sulteng', cities=>'Ampana'},
294             '0465' => {province=>'sulteng', cities=>'Kolonedale'},
295             '0455' => {province=>'sulteng', cities=>'kotaraya,moutong'},
296              
297             '0422' => {province=>'sulbar', cities=>'Majene'},
298             '0426' => {province=>'sulbar', cities=>'Mamuju'},
299             '0428' => {province=>'sulbar', cities=>'Polewali'},
300              
301             '0410' => {province=>'sulsel', cities=>'Pangkep'},
302             '0411' => {province=>'sulsel', cities=>'Makassar, Maros, Sungguminasa'},
303             '0413' => {province=>'sulsel', cities=>'Bulukumba'},
304             '0414' => {province=>'sulsel', cities=>'Bantaeng (Selayar)'},
305             '0417' => {province=>'sulsel', cities=>'Malino'},
306             '0418' => {province=>'sulsel', cities=>'Takalar'},
307             '0419' => {province=>'sulsel', cities=>'Janeponto'},
308             '0420' => {province=>'sulsel', cities=>'Enrekang'},
309             '0421' => {province=>'sulsel', cities=>'Parepare, Pinrang'},
310             '0422' => {province=>'sulsel', cities=>'Manene'},
311             '0423' => {province=>'sulsel', cities=>'Makale, Rantepao'},
312             '0427' => {province=>'sulsel', cities=>'Barru'},
313             '0428' => {province=>'sulsel', cities=>'Wonomulyo'},
314             '0471' => {province=>'sulsel', cities=>'Palopo'},
315             '0472' => {province=>'sulsel', cities=>'Pitumpanua'},
316             '0473' => {province=>'sulsel', cities=>'Masamba'},
317             '0474' => {province=>'sulsel', cities=>'Malili'},
318             '0475' => {province=>'sulsel', cities=>'Soroako'},
319             '0481' => {province=>'sulsel', cities=>'Watampone'},
320             '0482' => {province=>'sulsel', cities=>'Sinjai'},
321             '0484' => {province=>'sulsel', cities=>'Watansoppeng'},
322             '0485' => {province=>'sulsel', cities=>'Sengkang'},
323              
324             '0401' => {province=>'sultra', cities=>'Kendari'},
325             '0402' => {province=>'sultra', cities=>'Baubau'},
326             '0403' => {province=>'sultra', cities=>'Raha'},
327             '0404' => {province=>'sultra', cities=>'Wanci'},
328             '0405' => {province=>'sultra', cities=>'Kolaka'},
329             '0408' => {province=>'sultra', cities=>'Unaaha'},
330              
331             '0910' => {province=>'maluku', cities=>'Bandanaira'},
332             '0911' => {province=>'maluku', cities=>'Ambon'},
333             '0913' => {province=>'maluku', cities=>'Namlea'},
334             '0914' => {province=>'maluku', cities=>'Masohi'},
335             '0915' => {province=>'maluku', cities=>'Bula'},
336             '0916' => {province=>'maluku', cities=>'Tual'},
337             '0917' => {province=>'maluku', cities=>'Dobo'},
338             '0918' => {province=>'maluku', cities=>'Saumlaku'},
339             '0921' => {province=>'maluku', cities=>'Soasiu'},
340             '0922' => {province=>'maluku', cities=>'Jailolo'},
341             '0923' => {province=>'maluku', cities=>'Morotai'},
342             '0924' => {province=>'maluku', cities=>'Tobelo'},
343             '0927' => {province=>'maluku', cities=>'Labuha'},
344             '0929' => {province=>'maluku', cities=>'Sanana'},
345             '0931' => {province=>'maluku', cities=>'Saparua'},
346             '0901' => {province=>'maluku', cities=>'Timika, Tembagapura'},
347              
348             '0902' => {province=>'papua', cities=>'Agats (Asmat)'},
349             '0951' => {province=>'papua', cities=>'Sorong'},
350             '0952' => {province=>'papua', cities=>'Teminabuan'},
351             '0955' => {province=>'papua', cities=>'Bintuni'},
352             '0956' => {province=>'papua', cities=>'Fakfak'},
353             '0957' => {province=>'papua', cities=>'Kaimana'},
354             '0966' => {province=>'papua', cities=>'Sarmi'},
355             '0967' => {province=>'papua', cities=>'Jayapura, Abepura'},
356             '0969' => {province=>'papua', cities=>'Wamena'},
357             '0971' => {province=>'papua', cities=>'Merauke'},
358             '0975' => {province=>'papua', cities=>'Tanahmerah'},
359             '0980' => {province=>'papua', cities=>'Ransiki'},
360             '0981' => {province=>'papua', cities=>'Biak'},
361             '0983' => {province=>'papua', cities=>'Serui'},
362             '0984' => {province=>'papua', cities=>'Nabire'},
363             '0985' => {province=>'papua', cities=>'Nabire'},
364             '0986' => {province=>'papua', cities=>'Manokwari'},
365             );
366              
367             my %cell_prefixes = (
368             '0811' => {operator=>'telkomsel', product=>'halo', is_gsm=>1},
369             '0812' => {operator=>'telkomsel', product=>'halo/simpati', is_gsm=>1},
370             '0813' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
371             '0814' => {operator=>'indosat', product=>'matrix', is_gsm=>1},
372             '0815' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
373             '0816' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
374             '0817' => {operator=>'xl', is_gsm=>1},
375             '0818' => {operator=>'xl', is_gsm=>1},
376             '0819' => {operator=>'xl', is_gsm=>1},
377             '0821' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
378             '0822' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
379             '0823' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
380             '0828' => {operator=>'sampoerna', product=>'ceria', is_gsm=>1},
381             #'08315' => {operator=>'nts', is_gsm=>1},
382             '0831' => {operator=>'axis', is_gsm=>1},
383             '0832' => {operator=>'axis', is_gsm=>1},
384             '0838' => {operator=>'axis', is_gsm=>1},
385             '0852' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
386             '0853' => {operator=>'telkomsel', product=>'as', is_gsm=>1}, # fress
387             '0855' => {operator=>'indosat', product=>'matrix bright', is_gsm=>1},
388             '0856' => {operator=>'indosat', product=>'im3', is_gsm=>1},
389             '0857' => {operator=>'indosat', product=>'im3', is_gsm=>1},
390             '0858' => {operator=>'indosat', product=>'mentari', is_gsm=>1},
391             '0859' => {operator=>'xl', is_gsm=>1},
392             #'08681' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
393             '0868' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
394             '0877' => {operator=>'xl', product=>'axiata', is_gsm=>1},
395             '0878' => {operator=>'xl', product=>'axiata', is_gsm=>1},
396             '0879' => {operator=>'xl', product=>'axiata', is_gsm=>1},
397             '0881' => {operator=>'smartfren', is_cdma=>1},
398             '0882' => {operator=>'smartfren', is_cdma=>1},
399             '0883' => {operator=>'smartfren', is_cdma=>1},
400             '0884' => {operator=>'smartfren', is_cdma=>1},
401             '0885' => {operator=>'smartfren', is_cdma=>1},
402             '0886' => {operator=>'smartfren', is_cdma=>1},
403             '0887' => {operator=>'smartfren', is_cdma=>1},
404             '0888' => {operator=>'smartfren', is_cdma=>1},
405             '0889' => {operator=>'smartfren', is_cdma=>1},
406             '0896' => {operator=>'three', is_gsm=>1},
407             '0897' => {operator=>'three', is_gsm=>1},
408             '0898' => {operator=>'three', is_gsm=>1},
409             '0899' => {operator=>'three', is_gsm=>1},
410             );
411              
412             my %fwa_prefixes = (
413             30 => {operator=>'indosat', product=>'starone'},
414             32 => {operator=>'telkom', product=>'flexi'},
415             #39 is fixed telcom
416             40 => {operator=>'telkom', product=>'flexi'},
417             50 => {operator=>'telkom', product=>'flexi'},
418             60 => {operator=>'indosat', product=>'starone'},
419             62 => {operator=>'indosat', product=>'starone'},
420             68 => {operator=>'telkom', product=>'flexi'},
421             70 => {operator=>'telkom', product=>'flexi'},
422             710 => {operator=>'telkom', product=>'flexi'},
423             711 => {operator=>'telkom', product=>'flexi'},
424             712 => {operator=>'telkom', product=>'flexi'},
425             713 => {operator=>'telkom', product=>'flexi'},
426             714 => {operator=>'telkom', product=>'flexi'},
427             715 => {operator=>'telkom', product=>'flexi'},
428             716 => {operator=>'telkom', product=>'flexi'},
429             717 => {}, # land
430             718 => {}, # land
431             719 => {}, # land
432             72 => {}, # land
433             73 => {}, # land
434             74 => {}, # land
435             75 => {}, # land
436             76 => {}, # land
437             77 => {}, # land
438             78 => {}, # land
439             79 => {}, # land
440             80 => {operator=>'esia'},
441             81 => {operator=>'esia'}, # jkt
442             82 => {operator=>'esia'}, # assumed 8x
443             83 => {operator=>'esia'},
444             84 => {operator=>'esia'}, # assumed 8x
445             85 => {operator=>'esia'}, # jkt
446             86 => {operator=>'esia'}, # assumed 8x
447             87 => {operator=>'esia'}, # jkt
448             88 => {operator=>'esia'}, # assumed 8x
449             89 => {operator=>'esia'},
450             90 => {operator=>'esia'}, # assumed 9x
451             91 => {operator=>'esia'},
452             92 => {operator=>'esia'},
453             93 => {operator=>'esia'},
454             94 => {operator=>'esia'}, # assumed 9x
455             95 => {operator=>'esia'}, # assumed 9x
456             96 => {operator=>'esia'}, # assumed 9x
457             97 => {operator=>'esia'}, # assumed 9x
458             98 => {operator=>'esia'},
459             99 => {operator=>'esia'},
460             );
461              
462             our %SPEC;
463              
464             $SPEC{':package'} = {
465             v => 1.1,
466             summary => 'Parse Indonesian phone numbers',
467             };
468              
469             my $extract_args = {
470             text => {
471             summary => 'Text containing phone numbers to extract from',
472             schema => 'str*',
473             req => 1,
474             pos => 0,
475             },
476             max_numbers => {
477             schema => 'int',
478             },
479             default_area_code => {
480             summary => 'When encountering a number without area code, use this',
481             schema => ['str' => {
482             match => qr/^0\d{2,3}$/,
483             }],
484             description => <<'_',
485              
486             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
487             you'll need to provide this.
488              
489             _
490             },
491             level => {
492             summary => 'How hard should the function extract numbers (1-9)',
493             schema => ['int' => {
494             default => 5,
495             between => [1, 9],
496             }],
497             description => <<'_',
498              
499             The higher the level, the harder this function will try finding phone numbers,
500             but the higher the risk of false positives will be. E.g. in text
501             '123456789012345' with level=5 it will not find a phone number, but with level=9
502             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
503             default level is fine.
504              
505             _
506             },
507             };
508              
509             $SPEC{extract_id_phones} = {
510             v => 1.1,
511             summary => 'Extract phone number(s) from text',
512             description => <<'_',
513              
514             Extracts phone number(s) from text. Return an array of one or more parsed phone
515             number structure (a hash). Understands the list of known area codes and cellular
516             operators, as well as other information. Understands various syntax e.g.
517             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
518             7123456/57 (2 adjacent numbers).
519              
520             Extraction algorithm is particularly targetted at classified ads text in
521             Indonesian language, but should be quite suitable for any other normal text.
522              
523             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
524             but without any other detailed information other than country code.
525              
526             _
527             args => $extract_args,
528             result_naked => 1,
529             };
530             sub extract_id_phones {
531 35     35 1 158619 my %args = @_;
532 35         118 my $text = $args{text};
533 35   100     199 my $level = $args{level} // 5;
534 35         88 my $defac = $args{default_area_code};
535              
536 35         173 log_trace("text = %s", $text);
537              
538 35         143 my %nums; # normalized num => {_level=>..., _order=>..., raw=>..., ...}
539              
540             # note: capital prefix means it has capturing group
541 35         65 state $_Cc_prefix_local;
542 35         83 state $_Kprefix_local;
543 35         86 state $_Cc_karea_local_ext;
544 35         72 state $_Karea_local_ext;
545 35         68 state $_Prefix_local;
546 35         71 state $_Klocal;
547 35         70 state $_Local;
548 35         73 state $_Indicator;
549 35         71 state $_sep;
550 35         74 state $_start_w;
551 35         68 state $_start_d;
552 35         69 state $_end_d;
553 35         70 state $_Adjacent;
554 35 100       134 if (!$_Prefix_local) {
555             # known prefixes
556 1         2 $_start_w = '(?:\A|\b)';
557 1         2 $_start_d = '(?:\A|(?<=\D))';
558 1         1 $_end_d = '(?:\z|(?=\D))';
559 1         144 my $_kprefix =
560             '(?:'.join("|",sort(keys %area_codes, keys %cell_prefixes)).')';
561 1         102 my $_karea = '(?:'.join("|",sort keys %area_codes).')';
562 1         9 my @_kareanz;
563 1         21 for (keys %area_codes) { s/^0//; push @_kareanz, $_ }
  306         609  
  306         568  
564 1         99 my $_kareanz = '(?:'.join("|",sort @_kareanz).')';
565             # XXX currently ignores 08681
566 1         13 my $_prefix = '(?:0[1-9](?:[0-9]){1,2})';
567 1         2 my $_prefixnz = '(?:[1-9](?:[0-9]){1,2})';
568 1         2 $_sep = '(?:\s+|\.|-)';
569 1         2 my $_cc = '(?:\+[1-9][0-9]{1,2})';
570              
571 1         2 $_Local = '(\d{5,8}|(?:\d'.$_sep.'?){4,7}\d)';
572              
573             # heuristic: we know that is FWA is 7-8 digits, there is no prefix 1
574             # (?). also (not for exact reason though, just minimizing false
575             # negatives) be stricter (no in-between seps).
576 1         2 my @_klocal;
577 1         9 for (keys %fwa_prefixes) {
578 46         67 my $l = length($_);
579 46         98 push @_klocal, sprintf("%s\\d{%d,%d}", $_, 7-$l, 8-$l);
580             }
581 1         8 $_Klocal = '(' . join("|", @_klocal, '[2-9]{5,7}'). ')';
582              
583 1         4 my $_Ext =
584             qr!((?:extension|ekstensi|ext?|ekst?)(?:\s|:|\.)*(?:\d{1,5}))!ix;
585              
586 1         652 $_Kprefix_local = # (021) 123-4567, 021-123-4567
587             qr!(\(\s*$_kprefix\s*\)|$_kprefix) $_sep* $_Local!sx;
588 1         49 $_Prefix_local = # same as above, but w/o checking known prefixes
589             qr!(\(\s*$_prefix\s*\)|$_prefix) $_sep* $_Local!sx;
590 1         526 $_Karea_local_ext = # (021) 123-4567 ext 102, mobile assumed has no ext
591             qr!(\(\s*$_karea\s*\)|$_karea) $_sep*
592             $_Local $_sep*
593             $_Ext!sx;
594 1         54 $_Cc_prefix_local = # (+62) 22 123-4567, 62 812 123-4567
595             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
596             (\(\s*$_prefixnz\s*\)|$_prefixnz) $_sep*
597             $_Local!sx;
598 1         349 $_Cc_karea_local_ext = # (+62) 22 123-4567 ext 1000
599             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
600             (\(\s*$_kareanz\s*\)|$_kareanz) $_sep*
601             $_Local $_sep*
602             $_Ext!sx;
603 1         5 $_Indicator = qr!(
604             menghubungi|hubungi|hub|
605             contact|kontak|mengontak|mengkontak|
606             nomor|nomer|no|num|
607             to|ke|
608             tele?pon|tilpun|tilp|te?lp|tel|tl?|
609             phone|ph|
610             handphone|h\.?p|ponsel|cellular|cell|
611             faximile|facsimile|faksimile|fax|facs|faks|f
612             )(?:\s*|\.|:)*!ix;
613 1         20 $_Adjacent = qr!(\s*/\s*\d\d?)!;
614             }
615              
616             # preprocess text: 0 1 2 3 4 5 -> 012345
617 35 100       131 if ($level >= 6) {
618             state $_remove_spaces = sub {
619 2     2   8 local $_ = shift;
620 2         18 s/\s//sg;
621 2         10 $_;
622 17         54 };
623 17         51 my $oldtext = $text;
624 17         114 $text =~ s/((?:\d\s){4,}\d)/$_remove_spaces->($1)/seg;
  2         8  
625 17 100       81 log_trace("Preprocess text: remove spaces: %s", $text)
626             if $oldtext ne $text;
627             }
628              
629             # preprocess text: O (letter O) as 0 and l/I/| as 1
630 35 100       121 if ($level >= 6) {
631 17         51 state $diglets = {o=>0, O=>0, l=>1, '|'=>1, I=>1, S=>5};
632 17         44 state $lets = join("", keys %$diglets);
633             state $_replace_lets = sub {
634 20     20   87 my ($lets) = @_;
635 20 100       97 $lets =~ s!(.)!defined($diglets->{$1}) ? $diglets->{$1} : $1!eg;
  240         1049  
636             # when will emacs grok //? grr...
637 20         128 $lets;
638 17         47 };
639 17         46 my $oldtext = $text;
640 17         399 $text =~ s/((?:[0-9$lets](?:\s+|-|\.)?){5,})/$_replace_lets->($1)/eg;
  20         83  
641 17 100       88 log_trace("Preprocess text: letters->digits: %s", $text)
642             if $oldtext ne $text;
643             }
644              
645             # TODO: preprocess text: words as numbers (nol satu delapan ...)
646              
647 35         98 my $i;
648             my @r;
649              
650             # first, try to find numbers tacked after some indicator, e.g. Hub: blah,
651             # T.blah, etc.
652 35 50       127 if ($level >= 1) {
653 35         80 $i = 0; @r = ();
  35         82  
654 35         1240 while ($text =~ m!($_start_w $_Indicator $_sep*
655             $_Cc_karea_local_ext $_end_d)!xg) {
656 1         3 push @r, $1;
657 1         3 my $ind = $2;
658 1         5 my $num = _normalize($3, $4, $5, $6);
659 1   50     11 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
660             _pat=>"ind+cc+karea+local+ext"};
661 1 50       12 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
662             }
663 35         174 _remove_text(\$text, \@r);
664              
665 35         85 $i = 0; @r = ();
  35         87  
666 35         639 while ($text =~ m!($_start_w $_Indicator $_sep*
667             $_Cc_prefix_local $_end_d)!xg) {
668 2         7 push @r, $1;
669 2         17 my $ind = $2;
670 2         14 my $num = _normalize($3, $4, $5);
671 2   50     18 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
672             _pat=>"ind+cc+prefix+local"};
673 2 50       21 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
674             }
675 35         150 _remove_text(\$text, \@r);
676              
677 35         88 $i = 0; @r = ();
  35         84  
678 35         1020 while ($text =~ m!($_start_w $_Indicator $_Karea_local_ext
679             $_end_d)!xg) {
680 1         4 push @r, $1;
681 1         3 my $ind = $2;
682 1         4 my $num = _normalize(undef, $3, $4, $5);
683 1   50     12 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
684             _pat=>"ind+karea+local+ext"};
685 1 50       11 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
686             }
687 35         144 _remove_text(\$text, \@r);
688              
689 35         83 $i = 0; @r = ();
  35         74  
690 35         1067 while ($text =~ m!($_start_w $_Indicator $_Kprefix_local
691             $_Adjacent? $_end_d)!xg) {
692 5         23 push @r, $1;
693 5         15 my $ind = $2;
694 5         22 my $num = _normalize(undef, $3, $4);
695 5         16 my $adj = $5;
696 5   50     59 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
697             _pat=>"ind+kprefix+local"};
698 5 50       25 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/;
699 5         22 _add_adjacent(\%nums, $num, $adj);
700             }
701 35         123 _remove_text(\$text, \@r);
702             }
703 35 50       145 if ($level >= 2) {
704 35         70 $i = 0; @r = ();
  35         81  
705 35   100     633 while (defined($defac) &&
706             $text =~ m!($_start_w $_Indicator $_sep* $_Klocal
707             $_Adjacent? $_end_d)!xg) {
708 2         11 push @r, $1;
709 2         7 my $ind = $2;
710 2         8 my $num = _normalize(undef, $defac, $3);
711 2         11 my $adj = $4;
712 2   50     24 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
713             _pat=>"ind+klocal"};
714 2 50       31 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
715 2         10 _add_adjacent(\%nums, $num, $adj);
716             }
717 35         112 _remove_text(\$text, \@r);
718             }
719 35 50       121 if ($level >= 2) {
720 35         80 $i = 0; @r = ();
  35         70  
721 35         536 while ($text =~ m!($_start_w $_Indicator $_sep* $_Prefix_local
722             $_Adjacent? $_end_d)!xg) {
723 1         3 push @r, $1;
724 1         3 my $ind = $2;
725 1         4 my $num = _normalize(undef, $3, $4);
726 1         3 my $adj = $5;
727 1   50     10 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
728             _pat=>"ind+prefix+local"};
729 1 50       6 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
730 1         4 _add_adjacent(\%nums, $num, $adj);
731             }
732 35         180 _remove_text(\$text, \@r);
733              
734 35         77 $i = 0; @r = ();
  35         84  
735 35   100     483 while (defined($defac) &&
736             $text =~ m!($_start_w $_Indicator $_sep* $_Local
737             $_Adjacent? $_end_d)!xg) {
738 3         18 push @r, $1;
739 3         8 my $ind = $2;
740 3         13 my $num = _normalize(undef, $defac, $3);
741 3         11 my $adj = $4;
742 3   50     34 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
743             _pat=>"ind+local"};
744 3 50       26 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
745 3         11 _add_adjacent(\%nums, $num, $adj);
746             }
747 35         95 _remove_text(\$text, \@r);
748             }
749              
750             # try to find any cc+area+local numbers
751 35 50       110 if ($level >= 3) {
752 35         72 $i = 0; @r = ();
  35         77  
753 35         786 while ($text =~ m!($_start_d $_Cc_karea_local_ext $_end_d)!xg) {
754 1         4 push @r, $1;
755 1   50     5 $nums{_normalize($2, $3, $4, $5)} //=
756             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+karea+local+ext"};
757             }
758 35         134 _remove_text(\$text, \@r);
759              
760 35         80 $i = 0; @r = ();
  35         71  
761 35         351 while ($text =~ m!($_start_d $_Cc_prefix_local $_end_d)!xg) {
762 0         0 push @r, $1;
763 0   0     0 $nums{_normalize($2, $3, $4)} //=
764             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+prefix+local"};
765             }
766 35         114 _remove_text(\$text, \@r);
767             }
768              
769             # try to find numbers with known area code/cell number prefixes
770 35 50       128 if ($level >= 3) {
771 35         68 $i = 0; @r = ();
  35         71  
772 35         1044 while ($text =~ m!($_start_d $_Kprefix_local $_Adjacent? $_end_d)!xg) {
773 11         43 push @r, $1;
774 11         43 my $num = _normalize(undef, $2, $3);
775 11         39 my $adj = $4;
776 11   50     129 $nums{$num} //=
777             {_level=>3, _order=>++$i, raw=>$1, _pat=>"kprefix+local"};
778 11         40 _add_adjacent(\%nums, $num, $adj);
779             }
780 35         131 _remove_text(\$text, \@r);
781             }
782              
783 35 50       142 if ($level >= 5) {
784 35         75 $i = 0; @r = ();
  35         83  
785 35   100     438 while (defined($defac) &&
786             $text =~ m!($_start_w $_Klocal
787             $_Adjacent? $_end_d)!xg) {
788 6         27 push @r, $1;
789 6         23 my $num = _normalize(undef, $defac, $2);
790 6         20 my $adj = $3;
791 6   50     69 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
792             _pat=>"klocal"};
793 6         25 _add_adjacent(\%nums, $num, $adj);
794             }
795 35         106 _remove_text(\$text, \@r);
796             }
797              
798             # try to find any area+local numbers
799 35 50       144 if ($level >= 5) {
800 35         72 $i = 0; @r = ();
  35         84  
801 35         360 while ($text =~ m!($_start_d $_Prefix_local $_Adjacent? $_end_d)!xg) {
802 1         4 push @r, $1;
803 1         3 my $num = _normalize(undef, $2, $3);
804 1         3 my $adj = $4;
805 1   50     10 $nums{$num} //=
806             {_level=>5, _order=>++$i, raw=>$1, _pat=>"prefix+local"};
807 1         4 _add_adjacent(\%nums, $num, $adj);
808             }
809 35         117 _remove_text(\$text, \@r);
810             }
811              
812             # try to find any local numbers (6-8 digit, because 5 digits are easily
813             # confused with indonesian postal code, even though they might still be used
814             # in smaller cities)
815 35 100 66     192 if ($level >= 5 && defined($defac)) {
816 19         50 $i = 0; @r = ();
  19         57  
817 19         306 while ($text =~ m!($_start_d $_Local $_Adjacent? $_end_d)!xg) {
818 5         22 push @r, $1;
819 5         20 my $num = _normalize(undef, $defac, $2);
820 5         17 my $adj = $3;
821 5   50     53 $nums{$num} //=
822             {_level=>5, _order=>++$i, raw=>$1, _pat=>"local (defac)"};
823 5         19 _add_adjacent(\%nums, $num, $adj);
824             }
825 19         64 _remove_text(\$text, \@r);
826             }
827              
828 35         134 for (keys %nums) { $nums{$_}{standard} = $_ }
  41         133  
829 35         149 log_trace("\\%%nums = %s", \%nums);
830              
831             # if we are told to extract only N max_numbers, use the lower level ones and
832             # the ones at the end (they are more likely to be numbers, in the case of
833             # classified ads)
834 41         161 my @nums = map { $nums{$_} } sort {
835 35         209 $nums{$a}{_level} <=> $nums{$b}{_level} ||
836             $nums{$b}{_order} <=> $nums{$a}{_order} ||
837             $nums{$b}{standard} cmp $nums{$a}{standard}
838 9 50 66     83 } keys %nums;
839 35 100 66     162 if (defined($args{max_numbers}) && $args{max_numbers} > 0 &&
      100        
840             @nums > $args{max_numbers}
841             ) {
842 1         4 splice @nums, $args{max_numbers};
843             }
844              
845             # sort again according to order (ascending), this is what most people expect
846 35         92 @nums = sort {$a->{_order} <=> $b->{_order}} @nums;
  8         31  
847              
848             # remove internal data
849 35         91 for my $num (@nums) {
850             #for (keys %$num) { delete $num->{$_} if /^_/ }
851 40         114 _add_info($num);
852             }
853              
854 35         142 log_trace("\\\@nums = %s", \@nums);
855              
856 35         241 \@nums;
857             }
858              
859             gen_modified_sub(
860             output_name => 'parse_id_phone',
861             base_name => 'extract_id_phones',
862             summary => 'Alias for extract_id_phones(..., max_numbers=>1)->[0]',
863             remove_args => [qw/max_numbers/],
864             output_code => sub {
865 1     1   4979 my %args = @_;
866 1         8 my $res = extract_id_phones(%args, max_numbers=>1);
867 1         5 $res->[0];
868             },
869             );
870              
871             sub _normalize {
872 39     39   182 my ($cc, $area, $local, $ext) = @_;
873 39   100     250 $cc //= "62";
874 39 100       110 for ($cc, $area, $local, $ext) { s/\D+//g if defined($_) }
  156         561  
875 39         175 $area =~ s/^0//;
876 39 100 66     283 "+$cc.$area.$local".(defined($ext) && length($ext) ? ".ext$ext" : "");
877             }
878              
879             sub _remove_text {
880 439     439   940 my ($textref, $strs) = @_;
881 439         870 my $oldtext = $$textref;
882 439         926 for (@$strs) {
883 39         690 $$textref =~ s/\Q$_\E//;
884             }
885 439 100       1347 log_trace("removed match, text = %s", $$textref)
886             if $$textref ne $oldtext;
887             }
888              
889             sub _add_adjacent {
890 34     34   104 my ($nums, $num, $adj) = @_;
891 34 100       337 return unless $adj;
892 2         11 $adj =~ s/\D//g;
893 2         9 my $first = substr($num, -length($adj));
894 2 50       13 return unless abs($first - $adj) == 1;
895 2         6 my $num2 = $num;
896 2         7 substr($num2, -length($adj)) = $adj;
897 2         13 $nums->{$num2} = clone($nums->{$num});
898 2         78 $nums->{$num2}{_order} += 0.5;
899             }
900              
901             sub _add_info {
902 40     40   116 my ($num) = @_;
903             my ($cc, $prefix, $local, $ext) =
904 40 50       411 $num->{standard} =~ /^\+(\d+)\.(\d+)\.(\d+)(?:\.ext*(\d+))?$/
905             or die "BUG: invalid standard format: $num->{standard}";
906 40         133 $prefix = "0$prefix";
907 40         125 $num->{country_code} = $cc;
908 40         110 $num->{area_code} = $prefix;
909 40         132 $num->{local_number} = $local;
910 40 100       125 $num->{ext} = $ext if defined($ext);
911              
912             # XXX country calling code -> name for other countries
913 40 100       144 $num->{country} = 'Indonesia' if $cc eq '62';
914 40 100       120 return unless $cc eq '62';
915              
916 38 100       122 if (length($local) >= 8) {
917 21         80 $local =~ /(....)(.+)/;
918 21         102 $num->{pretty} = "$prefix-$1-$2";
919             } else {
920 17         69 $local =~ /(...)(.+)/;
921 17         85 $num->{pretty} = "$prefix-$1-$2";
922             }
923              
924 38 100       150 if (my $c = $cell_prefixes{$prefix}) {
925 9         26 $num->{is_cell} = 1;
926 9 50       42 $num->{is_gsm} = $c->{is_gsm} ? 1:0;
927 9 50       36 $num->{is_cdma} = $c->{is_cdma} ? 1:0;
928 9         29 $num->{operator} = $c->{operator};
929 9         31 $num->{product} = $c->{product};
930             } else {
931 29         73 $num->{is_cell} = 0;
932             }
933              
934 38 100       153 if (my $a = $area_codes{$prefix}) {
935 27         82 $num->{is_land} = 1;
936 27         88 $num->{province} = $a->{province};
937 27         80 $num->{cities} = $a->{cities};
938 27         57 state $_fwa_prefixes;
939 27 100       91 if (!$_fwa_prefixes) {
940 1         13 $_fwa_prefixes = '(?:'.join("|", keys %fwa_prefixes).')';
941             }
942 27 100       271 if ($local =~ /^($_fwa_prefixes)/) {
943 18         70 my $fwa = $fwa_prefixes{$1};
944 18         53 $num->{is_cdma} = 1;
945 18         77 $num->{operator} = $fwa->{operator};
946 18         80 $num->{product} = $fwa->{product};
947             }
948             } else {
949 11         46 $num->{is_land} = 0;
950             }
951             }
952              
953             #$SPEC{list_id_operators} = {
954             # v => 1.1,
955             # summary => 'Return list of known phone operators',
956             # result_naked => 1,
957             #};
958             #sub list_id_operators {
959             #
960             #}
961              
962             #$SPEC{list_id_area_codes} = {
963             # v => 1.1,
964             # summary => 'Return list of known area codes in Indonesia, '.
965             # 'along with area names',
966             # result_naked => 1,
967             #};
968             #sub list_id_area_codes {
969             #}
970              
971             1;
972             # ABSTRACT: Parse Indonesian phone numbers
973              
974             __END__
975              
976             =pod
977              
978             =encoding UTF-8
979              
980             =head1 NAME
981              
982             Parse::PhoneNumber::ID - Parse Indonesian phone numbers
983              
984             =head1 VERSION
985              
986             This document describes version 0.16 of Parse::PhoneNumber::ID (from Perl distribution Parse-PhoneNumber-ID), released on 2017-07-10.
987              
988             =head1 SYNOPSIS
989              
990             use Parse::PhoneNumber::ID qw(parse_id_phone extract_id_phones);
991             use Data::Dump;
992              
993             dd parse_id_phone(text => 'Jual dalmatian 2bl lucu2x. Hub: 7123 4567',
994             default_area_code=>'022');
995              
996             Will print something like:
997              
998             { raw => 'Hub: 7123 4567',
999             pretty => '022-7123-4567',
1000             standard => '+62.22.71234567',
1001             is_cell => 1,
1002             is_gsm => 0,
1003             is_cdma => 1,
1004             operator => 'telkom',
1005             product => 'flexi',
1006             area_code => '022',
1007             province => 'jabar',
1008             cities => 'Bandung, Cimahi, ...',
1009             local_number => '71234567',
1010             country => 'Indonesia',
1011             country_code => '62',
1012             ext => undef, }
1013              
1014             To extract more than one numbers in a text:
1015              
1016             my $phones = extract_id_phones(text => 'some text containing phone number(s):'.
1017             '0812 2345 6789, +62-22-91234567');
1018             say "There are ", scalar(@$phones), "phone number(s) found in text";
1019             for (@$phones) { say $_->{pretty} }
1020              
1021             =head1 FUNCTIONS
1022              
1023              
1024             =head2 extract_id_phones
1025              
1026             Usage:
1027              
1028             extract_id_phones(%args) -> any
1029              
1030             Extract phone number(s) from text.
1031              
1032             Extracts phone number(s) from text. Return an array of one or more parsed phone
1033             number structure (a hash). Understands the list of known area codes and cellular
1034             operators, as well as other information. Understands various syntax e.g.
1035             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
1036             7123456/57 (2 adjacent numbers).
1037              
1038             Extraction algorithm is particularly targetted at classified ads text in
1039             Indonesian language, but should be quite suitable for any other normal text.
1040              
1041             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
1042             but without any other detailed information other than country code.
1043              
1044             This function is not exported by default, but exportable.
1045              
1046             Arguments ('*' denotes required arguments):
1047              
1048             =over 4
1049              
1050             =item * B<default_area_code> => I<str>
1051              
1052             When encountering a number without area code, use this.
1053              
1054             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1055             you'll need to provide this.
1056              
1057             =item * B<level> => I<int> (default: 5)
1058              
1059             How hard should the function extract numbers (1-9).
1060              
1061             The higher the level, the harder this function will try finding phone numbers,
1062             but the higher the risk of false positives will be. E.g. in text
1063             '123456789012345' with level=5 it will not find a phone number, but with level=9
1064             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1065             default level is fine.
1066              
1067             =item * B<max_numbers> => I<int>
1068              
1069             =item * B<text>* => I<str>
1070              
1071             Text containing phone numbers to extract from.
1072              
1073             =back
1074              
1075             Return value: (any)
1076              
1077              
1078             =head2 parse_id_phone
1079              
1080             Usage:
1081              
1082             parse_id_phone(%args) -> any
1083              
1084             Alias for extract_id_phones(..., max_numbers=>1)->[0].
1085              
1086             Extracts phone number(s) from text. Return an array of one or more parsed phone
1087             number structure (a hash). Understands the list of known area codes and cellular
1088             operators, as well as other information. Understands various syntax e.g.
1089             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
1090             7123456/57 (2 adjacent numbers).
1091              
1092             Extraction algorithm is particularly targetted at classified ads text in
1093             Indonesian language, but should be quite suitable for any other normal text.
1094              
1095             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
1096             but without any other detailed information other than country code.
1097              
1098             This function is not exported by default, but exportable.
1099              
1100             Arguments ('*' denotes required arguments):
1101              
1102             =over 4
1103              
1104             =item * B<default_area_code> => I<str>
1105              
1106             When encountering a number without area code, use this.
1107              
1108             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1109             you'll need to provide this.
1110              
1111             =item * B<level> => I<int> (default: 5)
1112              
1113             How hard should the function extract numbers (1-9).
1114              
1115             The higher the level, the harder this function will try finding phone numbers,
1116             but the higher the risk of false positives will be. E.g. in text
1117             '123456789012345' with level=5 it will not find a phone number, but with level=9
1118             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1119             default level is fine.
1120              
1121             =item * B<text>* => I<str>
1122              
1123             Text containing phone numbers to extract from.
1124              
1125             =back
1126              
1127             Return value: (any)
1128              
1129             =head1 HOMEPAGE
1130              
1131             Please visit the project's homepage at L<https://metacpan.org/release/Parse-PhoneNumber-ID>.
1132              
1133             =head1 SOURCE
1134              
1135             Source repository is at L<https://github.com/perlancar/perl-Parse-PhoneNumber-ID>.
1136              
1137             =head1 BUGS
1138              
1139             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-PhoneNumber-ID>
1140              
1141             When submitting a bug or request, please include a test-file or a
1142             patch to an existing test-file that illustrates the bug or desired
1143             feature.
1144              
1145             =head1 SEE ALSO
1146              
1147             L<Parse::PhoneNumber>
1148              
1149             =head1 AUTHOR
1150              
1151             perlancar <perlancar@cpan.org>
1152              
1153             =head1 COPYRIGHT AND LICENSE
1154              
1155             This software is copyright (c) 2017, 2015, 2014, 2013, 2012, 2011 by perlancar@cpan.org.
1156              
1157             This is free software; you can redistribute it and/or modify it under
1158             the same terms as the Perl 5 programming language system itself.
1159              
1160             =cut