| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package URI::ParseSearchString; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require Exporter; |
|
4
|
|
|
|
|
|
|
@ISA = (Exporter); |
|
5
|
|
|
|
|
|
|
@EXPORT = ( qw (parse_search_string findEngine se_host se_name se_term) ); |
|
6
|
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
30280
|
use warnings; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
116
|
|
|
8
|
3
|
|
|
3
|
|
19
|
use strict; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
100
|
|
|
9
|
3
|
|
|
3
|
|
2832
|
use URI; |
|
|
3
|
|
|
|
|
18044
|
|
|
|
3
|
|
|
|
|
109
|
|
|
10
|
3
|
|
|
3
|
|
34686
|
use Data::Dumper; |
|
|
3
|
|
|
|
|
23182
|
|
|
|
3
|
|
|
|
|
12954
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=encoding utf8 |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
URI::ParseSearchString - parse search engine referrer URLs and extract keywords used |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 VERSION |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Version 3.51 (Diablo 3 edition) |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=cut |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $VERSION = '3.51'; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use URI::ParseSearchString ; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $uparse = new URI::ParseSearchString(); |
|
31
|
|
|
|
|
|
|
my $ref = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my $query_terms = $uparse->se_term( $ref ); |
|
34
|
|
|
|
|
|
|
my $canonical = $uparse->se_name( $ref ); |
|
35
|
|
|
|
|
|
|
my $hostname = $uparse->se_host( $ref ); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 new |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Creates a new instance object of the module. |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $uparse = new URI::ParseSearchString() ; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $RH_LOOKUPS = { |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
'answers.yahoo.com' => { name => 'Yahoo Answers', q=>'p' }, |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
'sapo.pt' => { name => 'Pesquisa SAPO', q => 'q'}, |
|
52
|
|
|
|
|
|
|
'iol.pt' => { name => 'Pesquisa Iol', q => 'q'}, |
|
53
|
|
|
|
|
|
|
'pesquisa.clix.pt' => { name => 'Pesquisa Clix', q => 'question'}, |
|
54
|
|
|
|
|
|
|
'aeiou.pt' => { name => 'Aeiou', q => 'q'}, |
|
55
|
|
|
|
|
|
|
'cuil.pt' => { name => 'Cuil PT', q => 'q' }, |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
'fotos.sapo.pt' => { name => 'SAPO fotos', q => 'word'}, |
|
59
|
|
|
|
|
|
|
'videos.sapo.pt' => { name => 'SAPO videos', q => 'word'}, |
|
60
|
|
|
|
|
|
|
'sabores.sapo.pt' => { name => 'SAPO sabores', q => 'cxSearch'}, |
|
61
|
|
|
|
|
|
|
'jn.sapo.pt' => { name => 'Jornal Noticias', q => 'Pesquisa'}, |
|
62
|
|
|
|
|
|
|
'dn.sapo.pt' => { name => 'Diario Noticias', q => 'Pesquisa'}, |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
'rtp.pt' => { name => 'Rtp', q => 'search'}, |
|
66
|
|
|
|
|
|
|
'record.pt' => { name => 'Jornal Record', q => 'q'}, |
|
67
|
|
|
|
|
|
|
'correiodamanha.pt' => { name => 'Correio da Manha', q => 'pesquisa'}, |
|
68
|
|
|
|
|
|
|
'correiomanha.pt' => { name => 'Correio Manha', q => 'pesquisa'}, |
|
69
|
|
|
|
|
|
|
'publico.clix.pt' => { name => 'Publico', q => 'q'}, |
|
70
|
|
|
|
|
|
|
'xl.pt' => { name => 'XL', q => 'pesquisa'}, |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
'abacho.com' => { name => 'Abacho', q => 'q'}, |
|
73
|
|
|
|
|
|
|
'alice.it' => { name => 'Alice.it', q => 'qs' }, |
|
74
|
|
|
|
|
|
|
'altavista.com' => { name => 'Altavista', q => 'q' }, |
|
75
|
|
|
|
|
|
|
'aolsearch.aol.com' => { name => 'AOL Search', q => 'query' }, |
|
76
|
|
|
|
|
|
|
'as.starware.com' => { name => 'Starware', q => 'qry' }, |
|
77
|
|
|
|
|
|
|
'blogs.icerocket.com' => { name => 'IceRocket', q => 'q' }, |
|
78
|
|
|
|
|
|
|
'blogsearch.google.com' => { name => 'Google Blogsearch', q => 'q' }, |
|
79
|
|
|
|
|
|
|
'busca.orange.es' => { name => 'Orange ES', q => 'buscar' }, |
|
80
|
|
|
|
|
|
|
'buscador.lycos.es' => { name => 'Lycos ES', q => 'query' }, |
|
81
|
|
|
|
|
|
|
'buscador.terra.es' => { name => 'Terra ES', q => 'query' }, |
|
82
|
|
|
|
|
|
|
'buscar.ozu.es' => { name => 'Ozu ES', q => 'q' }, |
|
83
|
|
|
|
|
|
|
'categorico.it' => { name => 'Categorico IT', q => 'q' }, |
|
84
|
|
|
|
|
|
|
'cuil.com' => { name => 'Cuil', q => 'q' }, |
|
85
|
|
|
|
|
|
|
'clusty.com' => { name => 'Clusty', q => 'query' }, |
|
86
|
|
|
|
|
|
|
'excite.com' => { name => 'Excite', q => 'q' }, |
|
87
|
|
|
|
|
|
|
'excite.it' => { name => 'Excite IT', q => 'q' }, |
|
88
|
|
|
|
|
|
|
'fastweb.it' => { name => 'Fastweb IT', q => 'q' }, |
|
89
|
|
|
|
|
|
|
'fastbrowsersearch.com' => { name => 'Fastbrowsersearch', q=> 'q' }, |
|
90
|
|
|
|
|
|
|
'godado.com' => { name => 'Godado', q => 'key' }, |
|
91
|
|
|
|
|
|
|
'godado.it' => { name => 'Godado (IT)', q => 'key' }, |
|
92
|
|
|
|
|
|
|
'gps.virgin.net' => { name => 'Virgin Search', q => 'q' }, |
|
93
|
|
|
|
|
|
|
'ilmotore.com' => { name => 'ilMotore', q => 'query' }, |
|
94
|
|
|
|
|
|
|
'ithaki.net' => { name => 'Ithaki', q => 'query' }, |
|
95
|
|
|
|
|
|
|
'kataweb.it' => { name => 'Kataweb IT', q => 'q' }, |
|
96
|
|
|
|
|
|
|
'libero.it' => { name => 'Libero IT', q => 'query' }, |
|
97
|
|
|
|
|
|
|
'lycos.it' => { name => 'Lycos IT', q => 'query' }, |
|
98
|
|
|
|
|
|
|
'search.aol.co.uk' => { name => 'AOL UK', q => 'query' }, |
|
99
|
|
|
|
|
|
|
'search.arabia.msn.com' => { name => 'MSN Arabia', q => 'q' }, |
|
100
|
|
|
|
|
|
|
'search.bbc.co.uk' => { name => 'BBC Search', q => 'q' }, |
|
101
|
|
|
|
|
|
|
'search.conduit.com' => { name => 'Conduit', q => 'q' }, |
|
102
|
|
|
|
|
|
|
'search.icq.com' => { name => 'ICQ dot com', q => 'q' }, |
|
103
|
|
|
|
|
|
|
'search.live.com' => { name => 'Live.com', q => 'q' }, |
|
104
|
|
|
|
|
|
|
'search.lycos.co.uk' => { name => 'Lycos UK', q => 'query' }, |
|
105
|
|
|
|
|
|
|
'search.lycos.com' => { name => 'Lycos', q => 'query' }, |
|
106
|
|
|
|
|
|
|
'search.msn.co.uk' => { name => 'MSN UK', q => 'q' }, |
|
107
|
|
|
|
|
|
|
'search.msn.com' => { name => 'MSN', q => 'q' }, |
|
108
|
|
|
|
|
|
|
'search.myway.com' => { name => 'MyWay', q => 'searchfor' }, |
|
109
|
|
|
|
|
|
|
'search.mywebsearch.com' => { name => 'My Web Search', q => 'searchfor' }, |
|
110
|
|
|
|
|
|
|
'search.ntlworld.com' => { name => 'NTLWorld', q => 'q' }, |
|
111
|
|
|
|
|
|
|
'search.orange.co.uk' => { name => 'Orange Search', q => 'q' }, |
|
112
|
|
|
|
|
|
|
'search.prodigy.msn.com' => { name => 'MSN Prodigy', q => 'q' }, |
|
113
|
|
|
|
|
|
|
'search.sweetim.com' => { name => 'Sweetim', q => 'q' }, |
|
114
|
|
|
|
|
|
|
'search.virginmedia.com' => { name => 'VirginMedia', q => 'q' }, |
|
115
|
|
|
|
|
|
|
'search.yahoo.co.jp' => { name => 'Yahoo Japan', q => 'p' }, |
|
116
|
|
|
|
|
|
|
'search.yahoo.com' => { name => 'Yahoo!', q => 'p' }, |
|
117
|
|
|
|
|
|
|
'search.yahoo.jp' => { name => 'Yahoo! Japan', q => 'p' }, |
|
118
|
|
|
|
|
|
|
'simpatico.ws' => { name => 'Simpatico IT', q => 'query' }, |
|
119
|
|
|
|
|
|
|
'soso.com' => { name => 'Soso', q => 'w' }, |
|
120
|
|
|
|
|
|
|
'suche.fireball.de' => { name => 'Fireball DE', q => 'query' }, |
|
121
|
|
|
|
|
|
|
'suche.web.de' => { name => 'Suche DE', q => 'su' }, |
|
122
|
|
|
|
|
|
|
'suche.t-online.de' => { name => 'T-Online', q => 'q' }, |
|
123
|
|
|
|
|
|
|
'thespider.it' => { name => 'TheSpider IT', q => 'q' }, |
|
124
|
|
|
|
|
|
|
'uk.altavista.com' => { name => 'Altavista UK', q => 'q' }, |
|
125
|
|
|
|
|
|
|
'uk.ask.com' => { name => 'Ask UK', q => 'q' }, |
|
126
|
|
|
|
|
|
|
'uk.search.yahoo.com' => { name => 'Yahoo! UK', q => 'p' }, |
|
127
|
|
|
|
|
|
|
'alltheweb.com' => { name => 'AllTheWeb', q => 'q' }, |
|
128
|
|
|
|
|
|
|
'ask.com' => { name => 'Ask dot com', q => 'q' }, |
|
129
|
|
|
|
|
|
|
'blueyonder.co.uk' => { name => 'Blueyonder', q => 'q' }, |
|
130
|
|
|
|
|
|
|
'feedster.com' => { name => 'Feedster', q => 'q' }, |
|
131
|
|
|
|
|
|
|
'google.ad' => { name => 'Google Andorra',q => 'q' }, |
|
132
|
|
|
|
|
|
|
'google.ae' => { name => 'Google United Arab Emirates', q => 'q' }, |
|
133
|
|
|
|
|
|
|
'google.af' => { name => 'Google Afghanistan', q => 'q' }, |
|
134
|
|
|
|
|
|
|
'google.ag' => { name => 'Google Antiqua and Barbuda', q => 'q' }, |
|
135
|
|
|
|
|
|
|
'google.am' => { name => 'Google Armenia', q => 'q' }, |
|
136
|
|
|
|
|
|
|
'google.as' => { name => 'Google American Samoa', q => 'q' }, |
|
137
|
|
|
|
|
|
|
'google.at' => { name => 'Google Austria', q => 'q' }, |
|
138
|
|
|
|
|
|
|
'google.az' => { name => 'Google Azerbaijan', q => 'q' }, |
|
139
|
|
|
|
|
|
|
'google.ba' => { name => 'Google Bosnia and Herzegovina', q => 'q' }, |
|
140
|
|
|
|
|
|
|
'google.be' => { name => 'Google Belgium', q => 'q' }, |
|
141
|
|
|
|
|
|
|
'google.bg' => { name => 'Google Bulgaria',q => 'q' }, |
|
142
|
|
|
|
|
|
|
'google.bi' => { name => 'Google Burundi', q => 'q' }, |
|
143
|
|
|
|
|
|
|
'google.biz' => { name => 'Google dot biz', q => 'q' }, |
|
144
|
|
|
|
|
|
|
'google.bo' => { name => 'Google Bolivia', q => 'q' }, |
|
145
|
|
|
|
|
|
|
'google.bs' => { name => 'Google Bahamas', q => 'q' }, |
|
146
|
|
|
|
|
|
|
'google.bz' => { name => 'Google Belize', q => 'q' }, |
|
147
|
|
|
|
|
|
|
'google.ca' => { name => 'Google Canada', q => 'q' }, |
|
148
|
|
|
|
|
|
|
'google.cc' => { name => 'Google Cocos Islands', q => 'q' }, |
|
149
|
|
|
|
|
|
|
'google.cd' => { name => 'Google Dem Rep of Congo', q => 'q' }, |
|
150
|
|
|
|
|
|
|
'google.cg' => { name => 'Google Rep of Congo', q => 'q' }, |
|
151
|
|
|
|
|
|
|
'google.ch' => { name => 'Google Switzerland', q => 'q' }, |
|
152
|
|
|
|
|
|
|
'google.ci' => { name => 'Google Cote dIvoire', q => 'q' }, |
|
153
|
|
|
|
|
|
|
'google.cl' => { name => 'Google Chile', q => 'q' }, |
|
154
|
|
|
|
|
|
|
'google.cn' => { name => 'Google China', q => 'q' }, |
|
155
|
|
|
|
|
|
|
'google.co.at' => { name => 'Google Austria', q => 'q' }, |
|
156
|
|
|
|
|
|
|
'google.co.bi' => { name => 'Google Burundi', q => 'q' }, |
|
157
|
|
|
|
|
|
|
'google.co.bw' => { name => 'Google Botswana', q => 'q' }, |
|
158
|
|
|
|
|
|
|
'google.co.ci' => { name => 'Google Ivory Coast', q => 'q' }, |
|
159
|
|
|
|
|
|
|
'google.co.ck' => { name => 'Google Cook Islands', q => 'q' }, |
|
160
|
|
|
|
|
|
|
'google.co.cr' => { name => 'Google Costa Rica', q => 'q' }, |
|
161
|
|
|
|
|
|
|
'google.co.gg' => { name => 'Google Guernsey', q => 'q' }, |
|
162
|
|
|
|
|
|
|
'google.co.gl' => { name => 'Google Greenland', q => 'q' }, |
|
163
|
|
|
|
|
|
|
'google.co.gy' => { name => 'Google Guyana', q => 'q' }, |
|
164
|
|
|
|
|
|
|
'google.co.hu' => { name => 'Google Hungary', q => 'q' }, |
|
165
|
|
|
|
|
|
|
'google.co.id' => { name => 'Google Indonesia', q => 'q' }, |
|
166
|
|
|
|
|
|
|
'google.co.il' => { name => 'Google Israel', q => 'q' }, |
|
167
|
|
|
|
|
|
|
'google.co.im' => { name => 'Google Isle of Man', q => 'q' }, |
|
168
|
|
|
|
|
|
|
'google.co.in' => { name => 'Google India', q => 'q' }, |
|
169
|
|
|
|
|
|
|
'google.co.it' => { name => 'Google Italy', q => 'q' }, |
|
170
|
|
|
|
|
|
|
'google.co.je' => { name => 'Google Jersey', q => 'q' }, |
|
171
|
|
|
|
|
|
|
'google.co.jp' => { name => 'Google Japan', q => 'q' }, |
|
172
|
|
|
|
|
|
|
'google.co.ke' => { name => 'Google Kenya', q => 'q' }, |
|
173
|
|
|
|
|
|
|
'google.co.kr' => { name => 'Google South Korea', q => 'q' }, |
|
174
|
|
|
|
|
|
|
'google.co.ls' => { name => 'Google Lesotho', q => 'q' }, |
|
175
|
|
|
|
|
|
|
'google.co.ma' => { name => 'Google Morocco', q => 'q' }, |
|
176
|
|
|
|
|
|
|
'google.co.mu' => { name => 'Google Mauritius', q => 'q' }, |
|
177
|
|
|
|
|
|
|
'google.co.mw' => { name => 'Google Malawi', q => 'q' }, |
|
178
|
|
|
|
|
|
|
'google.co.nz' => { name => 'Google New Zeland', q => 'q' }, |
|
179
|
|
|
|
|
|
|
'google.co.pn' => { name => 'Google Pitcairn Islands', q => 'q' }, |
|
180
|
|
|
|
|
|
|
'google.co.th' => { name => 'Google Thailand', q => 'q' }, |
|
181
|
|
|
|
|
|
|
'google.co.tt' => { name => 'Google Trinidad and Tobago', q => 'q' }, |
|
182
|
|
|
|
|
|
|
'google.co.ug' => { name => 'Google Uganda', q => 'q' }, |
|
183
|
|
|
|
|
|
|
'google.co.uk' => { name => 'Google UK', q => 'q' }, |
|
184
|
|
|
|
|
|
|
'google.co.uz' => { name => 'Google Uzbekistan', q => 'q' }, |
|
185
|
|
|
|
|
|
|
'google.co.ve' => { name => 'Google Venezuela', q => 'q' }, |
|
186
|
|
|
|
|
|
|
'google.co.vi' => { name => 'Google US Virgin Islands', q => 'q' }, |
|
187
|
|
|
|
|
|
|
'google.co.za' => { name => 'Google South Africa',q => 'q' }, |
|
188
|
|
|
|
|
|
|
'google.co.zm' => { name => 'Google Zambia', q => 'q' }, |
|
189
|
|
|
|
|
|
|
'google.co.zw' => { name => 'Google Zimbabwe', q => 'q' }, |
|
190
|
|
|
|
|
|
|
'google.com' => { name => 'Google', q => 'q' }, |
|
191
|
|
|
|
|
|
|
'google.com.af' => { name => 'Google Afghanistan', q => 'q' }, |
|
192
|
|
|
|
|
|
|
'google.com.ag' => { name => 'Google Antiqua and Barbuda', q => 'q' }, |
|
193
|
|
|
|
|
|
|
'google.com.ai' => { name => 'Google Anguilla', q => 'q' }, |
|
194
|
|
|
|
|
|
|
'google.com.ar' => { name => 'Google Argentina', q => 'q' }, |
|
195
|
|
|
|
|
|
|
'google.com.au' => { name => 'Google Australia', q => 'q' }, |
|
196
|
|
|
|
|
|
|
'google.com.az' => { name => 'Google Azerbaijan', q => 'q' }, |
|
197
|
|
|
|
|
|
|
'google.com.bd' => { name => 'Google Bangladesh', q => 'q' }, |
|
198
|
|
|
|
|
|
|
'google.com.bh' => { name => 'Google Bahrain', q => 'q' }, |
|
199
|
|
|
|
|
|
|
'google.com.bi' => { name => 'Google Burundi', q => 'q' }, |
|
200
|
|
|
|
|
|
|
'google.com.bn' => { name => 'Google Brunei Darussalam', q => 'q' }, |
|
201
|
|
|
|
|
|
|
'google.com.bo' => { name => 'Google Bolivia', q => 'q' }, |
|
202
|
|
|
|
|
|
|
'google.com.br' => { name => 'Google Brazil', q => 'q' }, |
|
203
|
|
|
|
|
|
|
'google.com.bs' => { name => 'Google Bahamas', q => 'q' }, |
|
204
|
|
|
|
|
|
|
'google.com.bz' => { name => 'Google Belize', q => 'q' }, |
|
205
|
|
|
|
|
|
|
'google.com.cn' => { name => 'Google China', q => 'q' }, |
|
206
|
|
|
|
|
|
|
'google.com.co' => { name => 'Google', q => 'q' }, |
|
207
|
|
|
|
|
|
|
'google.com.cu' => { name => 'Google Cuba', q => 'q' }, |
|
208
|
|
|
|
|
|
|
'google.com.do' => { name => 'Google Dominican Rep', q => 'q' }, |
|
209
|
|
|
|
|
|
|
'google.com.ec' => { name => 'Google Ecuador', q => 'q' }, |
|
210
|
|
|
|
|
|
|
'google.com.eg' => { name => 'Google Egypt', q => 'q' }, |
|
211
|
|
|
|
|
|
|
'google.com.et' => { name => 'Google Ethiopia', q => 'q' }, |
|
212
|
|
|
|
|
|
|
'google.com.fj' => { name => 'Google Fiji', q => 'q' }, |
|
213
|
|
|
|
|
|
|
'google.com.ge' => { name => 'Google Georgia', q => 'q' }, |
|
214
|
|
|
|
|
|
|
'google.com.gh' => { name => 'Google Ghana', q => 'q' }, |
|
215
|
|
|
|
|
|
|
'google.com.gi' => { name => 'Google Gibraltar', q => 'q' }, |
|
216
|
|
|
|
|
|
|
'google.com.gl' => { name => 'Google Greenland', q => 'q' }, |
|
217
|
|
|
|
|
|
|
'google.com.gp' => { name => 'Google Guadeloupe', q => 'q' }, |
|
218
|
|
|
|
|
|
|
'google.com.gr' => { name => 'Google Greece', q => 'q' }, |
|
219
|
|
|
|
|
|
|
'google.com.gt' => { name => 'Google Guatemala', q => 'q' }, |
|
220
|
|
|
|
|
|
|
'google.com.gy' => { name => 'Google Guyana', q => 'q' }, |
|
221
|
|
|
|
|
|
|
'google.com.hk' => { name => 'Google Hong Kong', q => 'q' }, |
|
222
|
|
|
|
|
|
|
'google.com.hn' => { name => 'Google Honduras', q => 'q' }, |
|
223
|
|
|
|
|
|
|
'google.com.hr' => { name => 'Google Croatia', q => 'q' }, |
|
224
|
|
|
|
|
|
|
'google.com.jm' => { name => 'Google Jamaica', q => 'q' }, |
|
225
|
|
|
|
|
|
|
'google.com.jo' => { name => 'Google Jordan', q => 'q' }, |
|
226
|
|
|
|
|
|
|
'google.com.kg' => { name => 'Google Kyrgyzstan', q => 'q' }, |
|
227
|
|
|
|
|
|
|
'google.com.kh' => { name => 'Google Cambodia', q => 'q' }, |
|
228
|
|
|
|
|
|
|
'google.com.ki' => { name => 'Google Kiribati', q => 'q' }, |
|
229
|
|
|
|
|
|
|
'google.com.kz' => { name => 'Google Kazakhstan', q => 'q' }, |
|
230
|
|
|
|
|
|
|
'google.com.lk' => { name => 'Google Sri Lanka', q => 'q' }, |
|
231
|
|
|
|
|
|
|
'google.com.lv' => { name => 'Google Latvia', q => 'q' }, |
|
232
|
|
|
|
|
|
|
'google.com.ly' => { name => 'Google Libya', q => 'q' }, |
|
233
|
|
|
|
|
|
|
'google.com.mt' => { name => 'Google Malta', q => 'q' }, |
|
234
|
|
|
|
|
|
|
'google.com.mu' => { name => 'Google Mauritius', q => 'q' }, |
|
235
|
|
|
|
|
|
|
'google.com.mw' => { name => 'Google Malawi', q => 'q' }, |
|
236
|
|
|
|
|
|
|
'google.com.mx' => { name => 'Google Mexico', q => 'q' }, |
|
237
|
|
|
|
|
|
|
'google.com.my' => { name => 'Google Malaysia', q => 'q' }, |
|
238
|
|
|
|
|
|
|
'google.com.na' => { name => 'Google Namibia', q => 'q' }, |
|
239
|
|
|
|
|
|
|
'google.com.nf' => { name => 'Google Norfolk Island', q => 'q' }, |
|
240
|
|
|
|
|
|
|
'google.com.ng' => { name => 'Google Nigeria', q => 'q' }, |
|
241
|
|
|
|
|
|
|
'google.com.ni' => { name => 'Google Nicaragua', q => 'q' }, |
|
242
|
|
|
|
|
|
|
'google.com.np' => { name => 'Google Nepal', q => 'q' }, |
|
243
|
|
|
|
|
|
|
'google.com.nr' => { name => 'Google Nauru', q => 'q' }, |
|
244
|
|
|
|
|
|
|
'google.com.om' => { name => 'Google Oman', q => 'q' }, |
|
245
|
|
|
|
|
|
|
'google.com.pa' => { name => 'Google Panama', q => 'q' }, |
|
246
|
|
|
|
|
|
|
'google.com.pe' => { name => 'Google Peru', q => 'q' }, |
|
247
|
|
|
|
|
|
|
'google.com.ph' => { name => 'Google Philipines', q => 'q' }, |
|
248
|
|
|
|
|
|
|
'google.com.pk' => { name => 'Google Pakistan', q => 'q' }, |
|
249
|
|
|
|
|
|
|
'google.com.pl' => { name => 'Google Poland', q => 'q' }, |
|
250
|
|
|
|
|
|
|
'google.com.pr' => { name => 'Google Puerto Rico', q => 'q' }, |
|
251
|
|
|
|
|
|
|
'google.com.pt' => { name => 'Google Portugal', q => 'q' }, |
|
252
|
|
|
|
|
|
|
'google.com.py' => { name => 'Google Paraguay', q => 'q' }, |
|
253
|
|
|
|
|
|
|
'google.com.qa' => { name => 'Google', q => 'q' }, |
|
254
|
|
|
|
|
|
|
'google.com.ru' => { name => 'Google Russia', q => 'q' }, |
|
255
|
|
|
|
|
|
|
'google.com.sa' => { name => 'Google Saudi Arabia', q => 'q' }, |
|
256
|
|
|
|
|
|
|
'google.com.sb' => { name => 'Google Solomon Islands', q => 'q' }, |
|
257
|
|
|
|
|
|
|
'google.com.sc' => { name => 'Google Seychelles', q => 'q' }, |
|
258
|
|
|
|
|
|
|
'google.com.sg' => { name => 'Google Singapore', q => 'q' }, |
|
259
|
|
|
|
|
|
|
'google.com.sv' => { name => 'Google El Savador', q => 'q' }, |
|
260
|
|
|
|
|
|
|
'google.com.tj' => { name => 'Google Tajikistan', q => 'q' }, |
|
261
|
|
|
|
|
|
|
'google.com.tr' => { name => 'Google Turkey', q => 'q' }, |
|
262
|
|
|
|
|
|
|
'google.com.tt' => { name => 'Google Trinidad and Tobago', q => 'q' }, |
|
263
|
|
|
|
|
|
|
'google.com.tw' => { name => 'Google Taiwan', q => 'q' }, |
|
264
|
|
|
|
|
|
|
'google.com.ua' => { name => 'Google Ukraine', q => 'q' }, |
|
265
|
|
|
|
|
|
|
'google.com.uy' => { name => 'Google Uruguay', q => 'q' }, |
|
266
|
|
|
|
|
|
|
'google.com.uz' => { name => 'Google Uzbekistan', q => 'q' }, |
|
267
|
|
|
|
|
|
|
'google.com.ve' => { name => 'Google Venezuela', q => 'q' }, |
|
268
|
|
|
|
|
|
|
'google.com.vi' => { name => 'Google US Virgin Islands', q => 'q' }, |
|
269
|
|
|
|
|
|
|
'google.com.vn' => { name => 'Google Vietnam', q => 'q' }, |
|
270
|
|
|
|
|
|
|
'google.com.ws' => { name => 'Google Samoa', q => 'q' }, |
|
271
|
|
|
|
|
|
|
'google.cz' => { name => 'Google Czech Rep', q => 'q' }, |
|
272
|
|
|
|
|
|
|
'google.de' => { name => 'Google Germany', q => 'q' }, |
|
273
|
|
|
|
|
|
|
'google.dj' => { name => 'Google Djubouti', q => 'q' }, |
|
274
|
|
|
|
|
|
|
'google.dk' => { name => 'Google Denmark', q => 'q' }, |
|
275
|
|
|
|
|
|
|
'google.dm' => { name => 'Google Dominica', q => 'q' }, |
|
276
|
|
|
|
|
|
|
'google.ec' => { name => 'Google Ecuador', q => 'q' }, |
|
277
|
|
|
|
|
|
|
'google.ee' => { name => 'Google Estonia', q => 'q' }, |
|
278
|
|
|
|
|
|
|
'google.es' => { name => 'Google Spain', q => 'q' }, |
|
279
|
|
|
|
|
|
|
'google.fi' => { name => 'Google Finland', q => 'q' }, |
|
280
|
|
|
|
|
|
|
'google.fm' => { name => 'Google Micronesia', q => 'q' }, |
|
281
|
|
|
|
|
|
|
'google.fr' => { name => 'Google France', q => 'q' }, |
|
282
|
|
|
|
|
|
|
'google.gd' => { name => 'Google Grenada', q => 'q' }, |
|
283
|
|
|
|
|
|
|
'google.ge' => { name => 'Google Georgia', q => 'q' }, |
|
284
|
|
|
|
|
|
|
'google.gf' => { name => 'Google French Guiana', q => 'q' }, |
|
285
|
|
|
|
|
|
|
'google.gg' => { name => 'Google Guernsey', q => 'q' }, |
|
286
|
|
|
|
|
|
|
'google.gl' => { name => 'Google Greenland', q => 'q' }, |
|
287
|
|
|
|
|
|
|
'google.gm' => { name => 'Google Gambia', q => 'q' }, |
|
288
|
|
|
|
|
|
|
'google.gp' => { name => 'Google Guadeloupe', q => 'q' }, |
|
289
|
|
|
|
|
|
|
'google.gr' => { name => 'Google Greece', q => 'q' }, |
|
290
|
|
|
|
|
|
|
'google.gy' => { name => 'Google Guyana', q => 'q' }, |
|
291
|
|
|
|
|
|
|
'google.hk' => { name => 'Google Hong Kong', q => 'q' }, |
|
292
|
|
|
|
|
|
|
'google.hn' => { name => 'Google Honduras', q => 'q' }, |
|
293
|
|
|
|
|
|
|
'google.hr' => { name => 'Google Croatia', q => 'q' }, |
|
294
|
|
|
|
|
|
|
'google.ht' => { name => 'Google Haiti', q => 'q' }, |
|
295
|
|
|
|
|
|
|
'google.hu' => { name => 'Google Hungary', q => 'q' }, |
|
296
|
|
|
|
|
|
|
'google.ie' => { name => 'Google Ireland', q => 'q' }, |
|
297
|
|
|
|
|
|
|
'google.im' => { name => 'Google Isle of Man', q => 'q' }, |
|
298
|
|
|
|
|
|
|
'google.in' => { name => 'Google India', q => 'q' }, |
|
299
|
|
|
|
|
|
|
'google.info' => { name => 'Google dot info', q => 'q' }, |
|
300
|
|
|
|
|
|
|
'google.is' => { name => 'Google Iceland', q => 'q' }, |
|
301
|
|
|
|
|
|
|
'google.it' => { name => 'Google Italy', q => 'q' }, |
|
302
|
|
|
|
|
|
|
'google.je' => { name => 'Google Jersey', q => 'q' }, |
|
303
|
|
|
|
|
|
|
'google.jo' => { name => 'Google Jordan', q => 'q' }, |
|
304
|
|
|
|
|
|
|
'google.jobs' => { name => 'Google dot jobs', q => 'q' }, |
|
305
|
|
|
|
|
|
|
'google.jp' => { name => 'Google Japan', q => 'q' }, |
|
306
|
|
|
|
|
|
|
'google.kg' => { name => 'Google Kyrgyzstan', q => 'q' }, |
|
307
|
|
|
|
|
|
|
'google.ki' => { name => 'Google Kiribati', q => 'q' }, |
|
308
|
|
|
|
|
|
|
'google.kz' => { name => 'Google Kazakhstan', q => 'q' }, |
|
309
|
|
|
|
|
|
|
'google.la' => { name => 'Google Laos', q => 'q' }, |
|
310
|
|
|
|
|
|
|
'google.li' => { name => 'Google Liechtenstein', q => 'q' }, |
|
311
|
|
|
|
|
|
|
'google.lk' => { name => 'Google Sri Lanka', q => 'q' }, |
|
312
|
|
|
|
|
|
|
'google.lt' => { name => 'Google Lithuania', q => 'q' }, |
|
313
|
|
|
|
|
|
|
'google.lu' => { name => 'Google Luxembourg', q => 'q' }, |
|
314
|
|
|
|
|
|
|
'google.lv' => { name => 'Google Latvia', q => 'q' }, |
|
315
|
|
|
|
|
|
|
'google.ma' => { name => 'Google Morocco', q => 'q' }, |
|
316
|
|
|
|
|
|
|
'google.md' => { name => 'Google Moldova', q => 'q' }, |
|
317
|
|
|
|
|
|
|
'google.mn' => { name => 'Google Mongolia', q => 'q' }, |
|
318
|
|
|
|
|
|
|
'google.mobi' => { name => 'Google dot mobi', q => 'q' }, |
|
319
|
|
|
|
|
|
|
'google.ms' => { name => 'Google Montserrat', q => 'q' }, |
|
320
|
|
|
|
|
|
|
'google.mu' => { name => 'Google Mauritius', q => 'q' }, |
|
321
|
|
|
|
|
|
|
'google.mv' => { name => 'Google Maldives', q => 'q' }, |
|
322
|
|
|
|
|
|
|
'google.mw' => { name => 'Google Malawi', q => 'q' }, |
|
323
|
|
|
|
|
|
|
'google.net' => { name => 'Google dot net', q => 'q' }, |
|
324
|
|
|
|
|
|
|
'google.nf' => { name => 'Google Norfolk Island', q => 'q' }, |
|
325
|
|
|
|
|
|
|
'google.nl' => { name => 'Google Netherlands', q => 'q' }, |
|
326
|
|
|
|
|
|
|
'google.no' => { name => 'Google Norway', q => 'q' }, |
|
327
|
|
|
|
|
|
|
'google.nr' => { name => 'Google Nauru', q => 'q' }, |
|
328
|
|
|
|
|
|
|
'google.nu' => { name => 'Google Niue', q => 'q' }, |
|
329
|
|
|
|
|
|
|
'google.off.ai' => { name => 'Google Anguilla', q => 'q' }, |
|
330
|
|
|
|
|
|
|
'google.ph' => { name => 'Google Philipines', q => 'q' }, |
|
331
|
|
|
|
|
|
|
'google.pk' => { name => 'Google Pakistan', q => 'q' }, |
|
332
|
|
|
|
|
|
|
'google.pl' => { name => 'Google Poland', q => 'q' }, |
|
333
|
|
|
|
|
|
|
'google.pn' => { name => 'Google Pitcairn Islands', q => 'q' }, |
|
334
|
|
|
|
|
|
|
'google.pr' => { name => 'Google Puerto Rico', q => 'q' }, |
|
335
|
|
|
|
|
|
|
'google.pt' => { name => 'Google Portugal', q => 'q' }, |
|
336
|
|
|
|
|
|
|
'google.ro' => { name => 'Google Romania', q => 'q' }, |
|
337
|
|
|
|
|
|
|
'google.ru' => { name => 'Google Russia', q => 'q' }, |
|
338
|
|
|
|
|
|
|
'google.rw' => { name => 'Google Rwanda', q => 'q' }, |
|
339
|
|
|
|
|
|
|
'google.sc' => { name => 'Google Seychelles', q => 'q' }, |
|
340
|
|
|
|
|
|
|
'google.se' => { name => 'Google Sweden', q => 'q' }, |
|
341
|
|
|
|
|
|
|
'google.sg' => { name => 'Google Singapore', q => 'q' }, |
|
342
|
|
|
|
|
|
|
'google.sh' => { name => 'Google Saint Helena', q => 'q' }, |
|
343
|
|
|
|
|
|
|
'google.si' => { name => 'Google Slovenia', q => 'q' }, |
|
344
|
|
|
|
|
|
|
'google.sk' => { name => 'Google Slovakia', q => 'q' }, |
|
345
|
|
|
|
|
|
|
'google.sm' => { name => 'Google San Marino', q => 'q' }, |
|
346
|
|
|
|
|
|
|
'google.sn' => { name => 'Google Senegal', q => 'q' }, |
|
347
|
|
|
|
|
|
|
'google.sr' => { name => 'Google Suriname', q => 'q' }, |
|
348
|
|
|
|
|
|
|
'google.st' => { name => 'Google Sao Tome', q => 'q' }, |
|
349
|
|
|
|
|
|
|
'google.tk' => { name => 'Google Tokelau', q => 'q' }, |
|
350
|
|
|
|
|
|
|
'google.tm' => { name => 'Google Turkmenistan', q => 'q' }, |
|
351
|
|
|
|
|
|
|
'google.to' => { name => 'Google Tonga', q => 'q' }, |
|
352
|
|
|
|
|
|
|
'google.tp' => { name => 'Google East Timor', q => 'q' }, |
|
353
|
|
|
|
|
|
|
'google.tt' => { name => 'Google Trinidad and Tobago', q => 'q' }, |
|
354
|
|
|
|
|
|
|
'google.tv' => { name => 'Google Tuvalu', q => 'q' }, |
|
355
|
|
|
|
|
|
|
'google.tw' => { name => 'Google Taiwan', q => 'q' }, |
|
356
|
|
|
|
|
|
|
'google.ug' => { name => 'Google Uganda', q => 'q' }, |
|
357
|
|
|
|
|
|
|
'google.us' => { name => 'Google US', q => 'q' }, |
|
358
|
|
|
|
|
|
|
'google.uz' => { name => 'Google Uzbekistan', q => 'q' }, |
|
359
|
|
|
|
|
|
|
'google.vg' => { name => 'Google British Virgin Islands', q => 'q' }, |
|
360
|
|
|
|
|
|
|
'google.vn' => { name => 'Google Vietnam', q => 'q' }, |
|
361
|
|
|
|
|
|
|
'google.vu' => { name => 'Google Vanuatu', q => 'q' }, |
|
362
|
|
|
|
|
|
|
'google.ws' => { name => 'Google Samoa', q => 'q' }, |
|
363
|
|
|
|
|
|
|
'hotbot.com' => { name => 'HotBot', q => 'query' }, |
|
364
|
|
|
|
|
|
|
'in.gr' => { name => 'In GR', q => 'q' }, |
|
365
|
|
|
|
|
|
|
'mamma.com' => { name => 'Mamma', q => 'query' }, |
|
366
|
|
|
|
|
|
|
'mahalo.com' => { name => 'Mahalo', q => 'search' }, |
|
367
|
|
|
|
|
|
|
'megasearching.net' => { name => 'Megasearching', q => 's' }, |
|
368
|
|
|
|
|
|
|
'mirago.co.uk' => { name => 'Mirago UK', q => 'qry' }, |
|
369
|
|
|
|
|
|
|
'netscape.com' => { name => 'Netscape', q => 's' }, |
|
370
|
|
|
|
|
|
|
'community.paglo.com' => { name => 'Paglo', q => 'q' }, |
|
371
|
|
|
|
|
|
|
'pathfinder.gr' => { name => 'Pathfinder GR', q => 'q' }, |
|
372
|
|
|
|
|
|
|
'phantis.com' => { name => 'Phantis GR' , q => 'q'}, |
|
373
|
|
|
|
|
|
|
'robby.gr' => { name => 'Robby GR' , q => 'searchstr' }, |
|
374
|
|
|
|
|
|
|
'sproose.com' => { name => 'Sproose', q => 'query' }, |
|
375
|
|
|
|
|
|
|
'technorati.com' => { name => 'Technorati', q => 'q' }, |
|
376
|
|
|
|
|
|
|
'tesco.net' => { name => 'Tesco Search', q => 'q' }, |
|
377
|
|
|
|
|
|
|
'tiscali.co.uk' => { name => 'Tiscali UK', q => 'query' }, |
|
378
|
|
|
|
|
|
|
'bing.com' => { name => 'Bing', q => 'q' }, |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
'acbusca.com' => { name => 'ACBusca', q => 'query' }, |
|
381
|
|
|
|
|
|
|
'atalhocerto.com.br' => { name => 'Atalho Certo', q => 'keyword' }, |
|
382
|
|
|
|
|
|
|
'bastaclicar.com.br' => { name => 'Basta Clicar', q => 'search' }, |
|
383
|
|
|
|
|
|
|
'bemrapido.com.br' => { name => 'Bem Rapido', q => 'chave' }, |
|
384
|
|
|
|
|
|
|
'br.altavista.com' => { name => 'AltaVista Brasil', q => 'q' }, |
|
385
|
|
|
|
|
|
|
'br.search.yahoo.com' => { name => 'Yahoo Brazil', q => 'p' }, |
|
386
|
|
|
|
|
|
|
'busca.uol.com.br' => { name => 'Radar UOL', q => 'q' }, |
|
387
|
|
|
|
|
|
|
'buscaaqui.com.br' => { name => 'Busca Aqui', q => 'q' }, |
|
388
|
|
|
|
|
|
|
'buscador.terra.com.br' => { name => 'Terra Busca', q => 'query' }, |
|
389
|
|
|
|
|
|
|
'cade.search.yahoo.com' => { name => 'Cadê', q => 'p' }, |
|
390
|
|
|
|
|
|
|
'clickgratis.com.br' => { name => 'Click Gratis', q => 'query' }, |
|
391
|
|
|
|
|
|
|
'entrada.com.br' => { name => 'Entrada', q => 'q' }, |
|
392
|
|
|
|
|
|
|
'gigabusca.com.br' => { name => 'Giga Busca', q => 'what' }, |
|
393
|
|
|
|
|
|
|
'internetica.com.br' => { name => 'Internetica', q => 'busca' }, |
|
394
|
|
|
|
|
|
|
'katatudo.com.br' => { name => 'KataTudo', q => 'q' }, |
|
395
|
|
|
|
|
|
|
'minasplanet.com.br' => { name => 'Minas Planet', q => 'term' }, |
|
396
|
|
|
|
|
|
|
'speedybusca.com.br' => { name => 'SpeedyBusca', q => 'q' }, |
|
397
|
|
|
|
|
|
|
'vaibuscar.com.br' => { name => 'Vai Busca', q => 'q' }, |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
'search.conduit.com' => { name => 'Conduit', q=>'q' }, |
|
400
|
|
|
|
|
|
|
'in.search.yahoo.com' => { name => 'Yahoo India', q => 'p' }, |
|
401
|
|
|
|
|
|
|
'rediff.com' => { name => 'Rediff', q => 'MT' }, |
|
402
|
|
|
|
|
|
|
'guruji.com' => { name => 'Guruji', q => 'q' }, |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
'isohunt.com' => { name => 'Isohunt', q => 'ihq' }, |
|
405
|
|
|
|
|
|
|
'btjunkie.org' => { name => 'BT Junkie', q => 'q' }, |
|
406
|
|
|
|
|
|
|
'torrentz.eu' => { name => 'Torrentz', q => 'f' } |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
}; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub new { |
|
411
|
1
|
|
|
1
|
1
|
820
|
my $class = shift ; |
|
412
|
1
|
|
|
|
|
2
|
my $self = { } ; |
|
413
|
1
|
|
|
|
|
3
|
$self->{engines} = $RH_LOOKUPS; |
|
414
|
1
|
|
|
|
|
4
|
return bless $self, $class ; |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 parse_search_string |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
This module provides a simple function to parse and extract search engine query strings. It was designed and tested having |
|
420
|
|
|
|
|
|
|
Apache referrer logs in mind. It can be used for a wide number of purposes, including tracking down what keywords people use |
|
421
|
|
|
|
|
|
|
on popular search engines before they land on a site. Although a number of existing modules and scripts exist for this purpose, |
|
422
|
|
|
|
|
|
|
the majority of them are either outdated using obsolete search strings associated with each engine. |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
The default function exported is "parse_search_string" which accepts an unquoted referrer string as input and returns the |
|
425
|
|
|
|
|
|
|
search engine query contained within. It currently works with both escaped and un-escaped queries and will translate the search |
|
426
|
|
|
|
|
|
|
terms before returning them in the latter case. The function returns undef in all other cases and errors. |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
for example: |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
my $ref = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; |
|
431
|
|
|
|
|
|
|
my $terms = |
|
432
|
|
|
|
|
|
|
$uparse->parse_search_string( $ref ); |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
would return I<'a simple test'> |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
whereas |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
my $ref = 'http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+more%21+complex_+search%24&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0'; |
|
439
|
|
|
|
|
|
|
my $terms = |
|
440
|
|
|
|
|
|
|
$uparse->parse_search_string( $ref ); |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
would return I<'a more! complex_ search$'> |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=head2 se_term |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
Same as parse_search_string(). |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=cut |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
sub se_term { |
|
453
|
124
|
|
|
124
|
1
|
67158
|
my $self = shift ; |
|
454
|
124
|
|
|
|
|
207
|
my $string = shift ; |
|
455
|
124
|
50
|
|
|
|
321
|
return unless defined $string ; |
|
456
|
124
|
|
|
|
|
271
|
return $self->parse_search_string($string) ; |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
## internal method for creating a URI object |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub _uri { |
|
462
|
264
|
|
|
264
|
|
282
|
my $self = shift; |
|
463
|
264
|
|
|
|
|
311
|
my $string = shift; |
|
464
|
|
|
|
|
|
|
|
|
465
|
264
|
50
|
|
|
|
503
|
return unless defined($string); |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
## create a new URI object |
|
468
|
|
|
|
|
|
|
## and return unless its http or https |
|
469
|
|
|
|
|
|
|
|
|
470
|
264
|
|
|
|
|
961
|
my $uri = URI->new( $string ); |
|
471
|
|
|
|
|
|
|
return |
|
472
|
264
|
100
|
100
|
|
|
29332
|
unless (defined($uri) |
|
|
|
|
33
|
|
|
|
|
|
473
|
|
|
|
|
|
|
&& (ref($uri) eq 'URI::http' || ref($uri) eq 'URI::https')); |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
## feedster and technorati as they do not follow |
|
476
|
|
|
|
|
|
|
## the usual search patterns thus we extract the query |
|
477
|
|
|
|
|
|
|
## terms by taking the last element from the path segments |
|
478
|
|
|
|
|
|
|
|
|
479
|
260
|
|
|
|
|
881
|
my $host = $uri->host; |
|
480
|
|
|
|
|
|
|
|
|
481
|
260
|
100
|
100
|
|
|
13750
|
return unless defined($host) && $host; |
|
482
|
|
|
|
|
|
|
|
|
483
|
256
|
100
|
|
|
|
870
|
if ( $host =~ m/(feedster|technorati)\.com$/ ){ |
|
484
|
4
|
|
|
|
|
24
|
$uri->query_form( q => ( $uri->path_segments)[-1]); |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
## clean up the host until it matches |
|
488
|
|
|
|
|
|
|
## something we already know about |
|
489
|
|
|
|
|
|
|
|
|
490
|
256
|
|
|
|
|
1399
|
while( ! defined $self->{'engines'}{ $host }){ |
|
491
|
134
|
|
|
|
|
242
|
my $c = index($host, '.'); |
|
492
|
134
|
100
|
|
|
|
270
|
last if $c <0; |
|
493
|
132
|
|
|
|
|
686
|
$host= substr($host, $c+1); |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
|
|
496
|
256
|
|
|
|
|
707
|
return ($uri, $host); |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub parse_search_string { |
|
502
|
134
|
|
|
134
|
1
|
4178
|
my $self = shift ; |
|
503
|
134
|
|
|
|
|
157
|
my $string = shift ; |
|
504
|
134
|
50
|
|
|
|
299
|
return unless defined($string); |
|
505
|
|
|
|
|
|
|
|
|
506
|
134
|
|
|
|
|
283
|
my ($uri,$host) = $self->_uri( $string ); |
|
507
|
134
|
100
|
|
|
|
327
|
return unless defined($uri); |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
## get rid of the www |
|
510
|
132
|
|
|
|
|
184
|
$host =~ m!^www\.!; |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
## find the query parameter the engine uses |
|
513
|
132
|
|
|
|
|
445
|
my $q = $self->{'engines'}{$host}{'q'}; |
|
514
|
132
|
100
|
|
|
|
297
|
return unless defined $q; |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
## return the string passed to the query parameter |
|
517
|
128
|
|
|
|
|
398
|
my %h_query = $uri->query_form; |
|
518
|
|
|
|
|
|
|
|
|
519
|
128
|
|
|
|
|
12082
|
return $h_query{$q} |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 findEngine |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Returns a list with the hostname of the search engine as the first element and |
|
525
|
|
|
|
|
|
|
the canonical name as the second element. |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
my $ref = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; |
|
528
|
|
|
|
|
|
|
my ($hostname, $canonical) = $uparse->findEngine( $ref ) ; |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
This will return 'google.com' as the search engine hostname and 'Google' as the name. |
|
531
|
|
|
|
|
|
|
This function will return I<undef> on error. |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=cut |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
sub findEngine { |
|
536
|
130
|
|
|
130
|
1
|
169
|
my $self = shift ; |
|
537
|
130
|
|
|
|
|
154
|
my $string = shift ; |
|
538
|
|
|
|
|
|
|
|
|
539
|
130
|
50
|
|
|
|
254
|
return unless defined($string); |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
## create a URI object |
|
542
|
|
|
|
|
|
|
|
|
543
|
130
|
|
|
|
|
250
|
my ($uri,$hostname) = $self->_uri( $string ); |
|
544
|
130
|
100
|
66
|
|
|
701
|
return unless defined($uri) && $uri; |
|
545
|
124
|
50
|
33
|
|
|
1824
|
return unless defined($hostname) && $hostname; |
|
546
|
|
|
|
|
|
|
|
|
547
|
124
|
|
|
|
|
314
|
my $canonical = $self->{'engines'}->{$hostname}->{'name'}; |
|
548
|
|
|
|
|
|
|
|
|
549
|
124
|
|
|
|
|
324
|
return ($hostname,$canonical); |
|
550
|
|
|
|
|
|
|
} |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=head2 se_host |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
Wrapper around findEngine - returns just the hostname. |
|
555
|
|
|
|
|
|
|
This function will return I<undef> on error. |
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=cut |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
sub se_host { |
|
560
|
130
|
|
|
130
|
1
|
1904
|
my $self = shift ; |
|
561
|
130
|
|
|
|
|
176
|
my $string = shift ; |
|
562
|
130
|
50
|
|
|
|
330
|
return unless defined($string) ; |
|
563
|
130
|
|
|
|
|
280
|
my ($host,$name) = $self->findEngine($string) ; |
|
564
|
130
|
|
|
|
|
469
|
return $host ; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=head2 se_name |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
Wrapper around findEngine - returns just the canonical name; |
|
570
|
|
|
|
|
|
|
This function will return I<undef> on error. |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=cut |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
sub se_name { |
|
575
|
0
|
|
|
0
|
1
|
|
my $self = shift ; |
|
576
|
0
|
|
|
|
|
|
my $string = shift ; |
|
577
|
0
|
0
|
|
|
|
|
return unless defined($string); |
|
578
|
0
|
|
|
|
|
|
my ($host,$name) = $self->findEngine($string) ; |
|
579
|
0
|
|
|
|
|
|
return $name ; |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=head1 SUPPORTED ENGINES |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
Currently supported search engines include: Sproose, Google Namibia, Google Ivory Coast, Google Oman, Technorati, Google Ecuador, |
|
585
|
|
|
|
|
|
|
Google Norfolk Island, Mahalo, Google UK, Yahoo! UK, Google Micronesia, Google Bahrain, Basta Clicar, |
|
586
|
|
|
|
|
|
|
Giga Busca, Google Greece, Google Belgium, Google Egypt, Google Chile, Godado (IT), Google Australia, |
|
587
|
|
|
|
|
|
|
Google Uruguay, Google India, Google Taiwan, Google Ukraine, Google US, Terra ES, |
|
588
|
|
|
|
|
|
|
Tesco Search, Megasearching, SAPO videos, Google Nepal, Google Israel, Google US Virgin Islands, Google Hungary, |
|
589
|
|
|
|
|
|
|
Google San Marino, Google Croatia, Google dot jobs, Google Panama, Google Malaysia, Internetica, Google Brunei Darussalam, |
|
590
|
|
|
|
|
|
|
Google Denmark, Google Pakistan, Google Solomon Islands, Google dot biz, Google Lesotho, IceRocket, Google Greenland, Fireball DE, |
|
591
|
|
|
|
|
|
|
Rtp, Google Portugal, Google Samoa, Google Kazakhstan, Google Blogsearch, Google Thailand, Google, Google Antiqua and Barbuda, |
|
592
|
|
|
|
|
|
|
Google Germany, Google Moldova, Google Zambia, Google Greece, Google Sri Lanka, Google Ireland, Google Austria, |
|
593
|
|
|
|
|
|
|
Google Peru, Google Guatemala, ICQ dot com, AOL UK, Google Guyana, In GR, Google dot info, MyWay, Pathfinder GR, Google Costa Rica, |
|
594
|
|
|
|
|
|
|
KataTudo, Google Jamaica, Google Vietnam, Google Morocco, Google Gambia, Google Singapore, Google Mauritius, Altavista, Google Afghanistan, |
|
595
|
|
|
|
|
|
|
Google Cote dIvoire, Google Kazakhstan, Google Czech Rep, Phantis GR, Google Bahamas, Google United Arab Emirates, Google East Timor, Ozu ES, |
|
596
|
|
|
|
|
|
|
Google Venezuela, Google Puerto Rico, Google Armenia, Google Croatia, Google Botswana, Google Tuvalu, Ask UK, Google Singapore, Mirago UK, |
|
597
|
|
|
|
|
|
|
Google Greenland, MSN Arabia, Google Nauru, Publico, Robby GR, Minas Planet, Pesquisa Iol, Google Romania, Google South Korea, Google Jersey, |
|
598
|
|
|
|
|
|
|
Netscape, Busca Aqui, Google Bulgaria, Google Uzbekistan, Tiscali UK, Ithaki, Cadê, Lycos IT, Google Suriname, Excite IT, Google Hong Kong, |
|
599
|
|
|
|
|
|
|
Kataweb IT, Google Burundi, Click Gratis, Google Vietnam, MSN, Alice.it, Google Honduras, Google Trinidad and Tobago, Google Uganda, XL, |
|
600
|
|
|
|
|
|
|
Jornal Noticias, Google Cook Islands, Google Japan, Google Ecuador, Google Ghana, Google Guadeloupe, Google Libya, Google Kenya, Fastbrowsersearch, |
|
601
|
|
|
|
|
|
|
Aeiou, Google Niue, Jornal Record, HotBot, Google Honduras, Google Georgia, Google Fiji, Google Philipines, BBC Search, Google, Google Laos, |
|
602
|
|
|
|
|
|
|
Soso, AltaVista Brasil, Lycos UK, SAPO fotos, Ask dot com, Google Netherlands, Google Philipines, Google Trinidad and Tobago, Google Turkey, |
|
603
|
|
|
|
|
|
|
AllTheWeb, Google Japan, Google Argentina, Google Vanuatu, Blueyonder, Google Greenland, Google Samoa, Google Georgia, Google Slovakia, |
|
604
|
|
|
|
|
|
|
Google Sri Lanka, Pesquisa SAPO, Google Latvia, Google Latvia, Correio Manha, Terra Busca, Google El Savador, Google Cambodia, |
|
605
|
|
|
|
|
|
|
Google Mauritius, Google China, AOL Search, Google Tokelau, Google Tonga, Correio da Manha, Radar UOL, Google Jordan, Godado, Google Jordan, |
|
606
|
|
|
|
|
|
|
Google Pitcairn Islands, Categorico IT, Google Morocco, Google Dominican Rep, Google France, Abacho, Google Azerbaijan, Google Andorra, Google Belize, |
|
607
|
|
|
|
|
|
|
Google Paraguay, Simpatico IT, Google Ethiopia, Google Uganda, Google Poland, Google Bolivia, Google Hungary, Google Russia, Diario Noticias, |
|
608
|
|
|
|
|
|
|
Google Puerto Rico, Google Montserrat, Yahoo! Japan, Google Seychelles, Mamma, Google Pitcairn Islands, Google South Africa, Paglo, Google Malta, |
|
609
|
|
|
|
|
|
|
Google Azerbaijan, Google New Zeland, Google China, Google Norway, Google Bosnia and Herzegovina, Google Indonesia, SpeedyBusca, Entrada, Google Anguilla, |
|
610
|
|
|
|
|
|
|
Google Rep of Congo, Google Dominica, Google Finland, Altavista UK, Google Guyana, MSN UK, Yahoo Answers, Google British Virgin Islands, Google Guadeloupe, |
|
611
|
|
|
|
|
|
|
Google Lithuania, Google Antiqua and Barbuda, Google Bahamas, Google Malawi, MSN Prodigy, Bing, Google Bolivia, Google Djubouti, Google Uzbekistan, Fastweb IT, |
|
612
|
|
|
|
|
|
|
Google Tajikistan, Virgin Search, Google Nigeria, Yahoo Japan, Pesquisa Clix, Google Grenada, Google Haiti, Google American Samoa, Google Pakistan, |
|
613
|
|
|
|
|
|
|
Google Cocos Islands, Google Hong Kong, NTLWorld, ilMotore, Google Belize, Google Guernsey, Google Sweden, Google Anguilla, Google Bangladesh, Google Isle of Man, |
|
614
|
|
|
|
|
|
|
Google Guernsey, Google Kyrgyzstan, Google Dem Rep of Congo, Google Malawi, Orange Search, Google Seychelles, Google Guyana, Google Gibraltar, |
|
615
|
|
|
|
|
|
|
oogle Italy, Google Kiribati, TheSpider IT, Google Nicaragua, Google Russia, Google Venezuela, Google Poland, Google Brazil, Google Senegal, Conduit, Lycos, |
|
616
|
|
|
|
|
|
|
Google Isle of Man, Live.com, Google Italy, Libero IT, Google Canada, Google Nauru, Google Liechtenstein, Google Afghanistan, Cuil, Google Zimbabwe, Google Mauritius, |
|
617
|
|
|
|
|
|
|
Orange ES, Google Burundi, Google Portugal, ACBusca, Bem Rapido, Atalho Certo, Excite, Clusty, Yahoo Brazil, My Web Search, Google Spain, Google Uzbekistan, Google, |
|
618
|
|
|
|
|
|
|
Google Mexico, T-Online, Google dot mobi, Google Luxembourg, Google Austria, Yahoo!, Google Kiribati, Sweetim, Vai Busca, Google Mongolia, Google Saudi Arabia, Google dot net, |
|
619
|
|
|
|
|
|
|
Google Maldives, Google Trinidad and Tobago, Google Jersey, Feedster, Google Turkmenistan, Google Switzerland, Google Norfolk Island, Suche DE, Google Malawi, Google Rwanda, |
|
620
|
|
|
|
|
|
|
Lycos ES, Google Burundi, Google French Guiana, Google Kyrgyzstan, Google Saint Helena, VirginMedia, Google Iceland, SAPO sabores, Google India, Google Cuba, |
|
621
|
|
|
|
|
|
|
Google US Virgin Islands, Google Taiwan, Google Sao Tome, Google Slovenia, Starware, Google Estonia, Conduit, Yahoo India, Rediff, Guruji |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=head1 AUTHOR |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Spiros Denaxas, C<< <s.denaxas at gmail.com> >> |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=head1 SOURCE CODE |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
The source code can be found on github L<https://github.com/spiros/URI-ParseSearchString> |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head1 BUGS |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
This is my first CPAN module so I encourage you to send all comments, especially bad, |
|
634
|
|
|
|
|
|
|
to my email address. |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
This could not have been possible without the support of my co-workers at |
|
637
|
|
|
|
|
|
|
http://nestoria.co.uk - the easiest way of finding UK property. |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=head1 SUPPORT |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
For more information, you could also visit my blog: |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
http://blog.ffffruit.com |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=over 4 |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
=back |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Copyright 2011 Spiros Denaxas, all rights reserved. |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
654
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=cut |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
1; # End of URI::ParseSearchString |