| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package URI::ParseSearchString; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | require Exporter; | 
| 4 |  |  |  |  |  |  | @ISA = (Exporter); | 
| 5 |  |  |  |  |  |  | @EXPORT = ( qw (parse_search_string findEngine se_host se_name se_term) ); | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 3 |  |  | 3 |  | 30280 | use warnings; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 116 |  | 
| 8 | 3 |  |  | 3 |  | 19 | use strict; | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 100 |  | 
| 9 | 3 |  |  | 3 |  | 2832 | use URI; | 
|  | 3 |  |  |  |  | 18044 |  | 
|  | 3 |  |  |  |  | 109 |  | 
| 10 | 3 |  |  | 3 |  | 34686 | use Data::Dumper; | 
|  | 3 |  |  |  |  | 23182 |  | 
|  | 3 |  |  |  |  | 12954 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | =encoding utf8 | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 NAME | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | URI::ParseSearchString - parse search engine referrer URLs and extract keywords used | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | =head1 VERSION | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | Version 3.51  (Diablo 3 edition) | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =cut | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | our $VERSION = '3.51'; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | use URI::ParseSearchString ; | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | my $uparse = new URI::ParseSearchString(); | 
| 31 |  |  |  |  |  |  | my $ref    = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | my $query_terms = $uparse->se_term( $ref ); | 
| 34 |  |  |  |  |  |  | my $canonical   = $uparse->se_name( $ref ); | 
| 35 |  |  |  |  |  |  | my $hostname    = $uparse->se_host( $ref ); | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =head2 new | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | Creates a new instance object of the module. | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | my $uparse = new URI::ParseSearchString() ; | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =cut | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | my $RH_LOOKUPS = { | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | 'answers.yahoo.com'     => { name => 'Yahoo Answers', q=>'p' }, | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | 'sapo.pt'                => { name => 'Pesquisa SAPO', q => 'q'}, | 
| 52 |  |  |  |  |  |  | 'iol.pt'                 => { name => 'Pesquisa Iol',  q => 'q'}, | 
| 53 |  |  |  |  |  |  | 'pesquisa.clix.pt'       => { name => 'Pesquisa Clix', q => 'question'}, | 
| 54 |  |  |  |  |  |  | 'aeiou.pt'               => { name => 'Aeiou',         q => 'q'}, | 
| 55 |  |  |  |  |  |  | 'cuil.pt'                => { name => 'Cuil PT',       q => 'q' }, | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | 'fotos.sapo.pt'          => { name => 'SAPO fotos',    q => 'word'}, | 
| 59 |  |  |  |  |  |  | 'videos.sapo.pt'         => { name => 'SAPO videos',   q => 'word'}, | 
| 60 |  |  |  |  |  |  | 'sabores.sapo.pt'        => { name => 'SAPO sabores',  q => 'cxSearch'}, | 
| 61 |  |  |  |  |  |  | 'jn.sapo.pt'             => { name => 'Jornal Noticias', q => 'Pesquisa'}, | 
| 62 |  |  |  |  |  |  | 'dn.sapo.pt'             => { name => 'Diario Noticias', q => 'Pesquisa'}, | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | 'rtp.pt'                 => { name => 'Rtp',           q => 'search'}, | 
| 66 |  |  |  |  |  |  | 'record.pt'              => { name => 'Jornal Record', q => 'q'}, | 
| 67 |  |  |  |  |  |  | 'correiodamanha.pt'      => { name => 'Correio da Manha',        q => 'pesquisa'}, | 
| 68 |  |  |  |  |  |  | 'correiomanha.pt'        => { name => 'Correio Manha',        q => 'pesquisa'}, | 
| 69 |  |  |  |  |  |  | 'publico.clix.pt'        => { name => 'Publico',       q => 'q'}, | 
| 70 |  |  |  |  |  |  | 'xl.pt'                  => { name => 'XL',            q => 'pesquisa'}, | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | 'abacho.com'             => { name => 'Abacho',        q => 'q'}, | 
| 73 |  |  |  |  |  |  | 'alice.it'               => { name => 'Alice.it',      q => 'qs' }, | 
| 74 |  |  |  |  |  |  | 'altavista.com'          => { name => 'Altavista',     q => 'q' }, | 
| 75 |  |  |  |  |  |  | 'aolsearch.aol.com'      => { name => 'AOL Search',    q => 'query' }, | 
| 76 |  |  |  |  |  |  | 'as.starware.com'        => { name => 'Starware',      q => 'qry' }, | 
| 77 |  |  |  |  |  |  | 'blogs.icerocket.com'    => { name => 'IceRocket',     q => 'q' }, | 
| 78 |  |  |  |  |  |  | 'blogsearch.google.com'  => { name => 'Google Blogsearch', q => 'q' }, | 
| 79 |  |  |  |  |  |  | 'busca.orange.es'        => { name => 'Orange ES',     q => 'buscar' }, | 
| 80 |  |  |  |  |  |  | 'buscador.lycos.es'      => { name => 'Lycos ES',      q => 'query' }, | 
| 81 |  |  |  |  |  |  | 'buscador.terra.es'      => { name => 'Terra ES',      q => 'query' }, | 
| 82 |  |  |  |  |  |  | 'buscar.ozu.es'          => { name => 'Ozu ES',        q => 'q' }, | 
| 83 |  |  |  |  |  |  | 'categorico.it'          => { name => 'Categorico IT', q => 'q' }, | 
| 84 |  |  |  |  |  |  | 'cuil.com'               => { name => 'Cuil',          q => 'q' }, | 
| 85 |  |  |  |  |  |  | 'clusty.com'             => { name => 'Clusty',        q => 'query' }, | 
| 86 |  |  |  |  |  |  | 'excite.com'             => { name => 'Excite',        q => 'q' }, | 
| 87 |  |  |  |  |  |  | 'excite.it'              => { name => 'Excite IT',     q => 'q' }, | 
| 88 |  |  |  |  |  |  | 'fastweb.it'             => { name => 'Fastweb IT',    q => 'q' }, | 
| 89 |  |  |  |  |  |  | 'fastbrowsersearch.com'  => { name => 'Fastbrowsersearch', q=> 'q' }, | 
| 90 |  |  |  |  |  |  | 'godado.com'             => { name => 'Godado',        q => 'key' }, | 
| 91 |  |  |  |  |  |  | 'godado.it'              => { name => 'Godado (IT)',   q => 'key' }, | 
| 92 |  |  |  |  |  |  | 'gps.virgin.net'         => { name => 'Virgin Search', q => 'q' }, | 
| 93 |  |  |  |  |  |  | 'ilmotore.com'           => { name => 'ilMotore',      q => 'query' }, | 
| 94 |  |  |  |  |  |  | 'ithaki.net'             => { name => 'Ithaki',        q => 'query' }, | 
| 95 |  |  |  |  |  |  | 'kataweb.it'             => { name => 'Kataweb IT',    q => 'q' }, | 
| 96 |  |  |  |  |  |  | 'libero.it'              => { name => 'Libero IT',     q => 'query' }, | 
| 97 |  |  |  |  |  |  | 'lycos.it'               => { name => 'Lycos IT',      q => 'query' }, | 
| 98 |  |  |  |  |  |  | 'search.aol.co.uk'       => { name => 'AOL UK',        q => 'query' }, | 
| 99 |  |  |  |  |  |  | 'search.arabia.msn.com'  => { name => 'MSN Arabia',    q => 'q' }, | 
| 100 |  |  |  |  |  |  | 'search.bbc.co.uk'       => { name => 'BBC Search',    q => 'q' }, | 
| 101 |  |  |  |  |  |  | 'search.conduit.com'     => { name => 'Conduit',       q => 'q' }, | 
| 102 |  |  |  |  |  |  | 'search.icq.com'         => { name => 'ICQ dot com',   q => 'q' }, | 
| 103 |  |  |  |  |  |  | 'search.live.com'        => { name => 'Live.com',      q => 'q' }, | 
| 104 |  |  |  |  |  |  | 'search.lycos.co.uk'     => { name => 'Lycos UK',      q => 'query' }, | 
| 105 |  |  |  |  |  |  | 'search.lycos.com'       => { name => 'Lycos',         q => 'query' }, | 
| 106 |  |  |  |  |  |  | 'search.msn.co.uk'       => { name => 'MSN UK',        q => 'q' }, | 
| 107 |  |  |  |  |  |  | 'search.msn.com'         => { name => 'MSN',           q => 'q' }, | 
| 108 |  |  |  |  |  |  | 'search.myway.com'       => { name => 'MyWay',         q => 'searchfor' }, | 
| 109 |  |  |  |  |  |  | 'search.mywebsearch.com' => { name => 'My Web Search', q => 'searchfor' }, | 
| 110 |  |  |  |  |  |  | 'search.ntlworld.com'    => { name => 'NTLWorld',      q => 'q' }, | 
| 111 |  |  |  |  |  |  | 'search.orange.co.uk'    => { name => 'Orange Search', q => 'q' }, | 
| 112 |  |  |  |  |  |  | 'search.prodigy.msn.com' => { name => 'MSN Prodigy',   q => 'q' }, | 
| 113 |  |  |  |  |  |  | 'search.sweetim.com'     => { name => 'Sweetim',       q => 'q' }, | 
| 114 |  |  |  |  |  |  | 'search.virginmedia.com' => { name => 'VirginMedia',   q => 'q' }, | 
| 115 |  |  |  |  |  |  | 'search.yahoo.co.jp'     => { name => 'Yahoo Japan',   q => 'p' }, | 
| 116 |  |  |  |  |  |  | 'search.yahoo.com'       => { name => 'Yahoo!',        q => 'p' }, | 
| 117 |  |  |  |  |  |  | 'search.yahoo.jp'        => { name => 'Yahoo! Japan',  q => 'p' }, | 
| 118 |  |  |  |  |  |  | 'simpatico.ws'           => { name => 'Simpatico IT',  q => 'query' }, | 
| 119 |  |  |  |  |  |  | 'soso.com'               => { name => 'Soso',          q => 'w' }, | 
| 120 |  |  |  |  |  |  | 'suche.fireball.de'      => { name => 'Fireball DE',   q => 'query' }, | 
| 121 |  |  |  |  |  |  | 'suche.web.de'           => { name => 'Suche DE',      q => 'su' }, | 
| 122 |  |  |  |  |  |  | 'suche.t-online.de'      => { name => 'T-Online',      q => 'q' }, | 
| 123 |  |  |  |  |  |  | 'thespider.it'           => { name => 'TheSpider IT',  q => 'q' }, | 
| 124 |  |  |  |  |  |  | 'uk.altavista.com'       => { name => 'Altavista UK',  q => 'q' }, | 
| 125 |  |  |  |  |  |  | 'uk.ask.com'             => { name => 'Ask UK',        q => 'q' }, | 
| 126 |  |  |  |  |  |  | 'uk.search.yahoo.com'    => { name => 'Yahoo! UK',     q => 'p' }, | 
| 127 |  |  |  |  |  |  | 'alltheweb.com'          => { name => 'AllTheWeb',     q => 'q' }, | 
| 128 |  |  |  |  |  |  | 'ask.com'                => { name => 'Ask dot com',   q => 'q' }, | 
| 129 |  |  |  |  |  |  | 'blueyonder.co.uk'       => { name => 'Blueyonder',    q => 'q' }, | 
| 130 |  |  |  |  |  |  | 'feedster.com'           => { name => 'Feedster',      q => 'q' }, | 
| 131 |  |  |  |  |  |  | 'google.ad'              => { name => 'Google Andorra',q => 'q' }, | 
| 132 |  |  |  |  |  |  | 'google.ae'              => { name => 'Google United Arab Emirates', q => 'q' }, | 
| 133 |  |  |  |  |  |  | 'google.af'              => { name => 'Google Afghanistan',          q => 'q' }, | 
| 134 |  |  |  |  |  |  | 'google.ag'              => { name => 'Google Antiqua and Barbuda',  q => 'q' }, | 
| 135 |  |  |  |  |  |  | 'google.am'              => { name => 'Google Armenia',              q => 'q' }, | 
| 136 |  |  |  |  |  |  | 'google.as'              => { name => 'Google American Samoa',       q => 'q' }, | 
| 137 |  |  |  |  |  |  | 'google.at'              => { name => 'Google Austria',    q => 'q' }, | 
| 138 |  |  |  |  |  |  | 'google.az'              => { name => 'Google Azerbaijan', q => 'q' }, | 
| 139 |  |  |  |  |  |  | 'google.ba'              => { name => 'Google Bosnia and Herzegovina', q => 'q' }, | 
| 140 |  |  |  |  |  |  | 'google.be'              => { name => 'Google Belgium', q => 'q' }, | 
| 141 |  |  |  |  |  |  | 'google.bg'              => { name => 'Google Bulgaria',q => 'q' }, | 
| 142 |  |  |  |  |  |  | 'google.bi'              => { name => 'Google Burundi', q => 'q' }, | 
| 143 |  |  |  |  |  |  | 'google.biz'             => { name => 'Google dot biz', q => 'q' }, | 
| 144 |  |  |  |  |  |  | 'google.bo'              => { name => 'Google Bolivia', q => 'q' }, | 
| 145 |  |  |  |  |  |  | 'google.bs'              => { name => 'Google Bahamas', q => 'q' }, | 
| 146 |  |  |  |  |  |  | 'google.bz'              => { name => 'Google Belize',  q => 'q' }, | 
| 147 |  |  |  |  |  |  | 'google.ca'              => { name => 'Google Canada',  q => 'q' }, | 
| 148 |  |  |  |  |  |  | 'google.cc'              => { name => 'Google Cocos Islands',    q => 'q' }, | 
| 149 |  |  |  |  |  |  | 'google.cd'              => { name => 'Google Dem Rep of Congo', q => 'q' }, | 
| 150 |  |  |  |  |  |  | 'google.cg'              => { name => 'Google Rep of Congo',     q => 'q' }, | 
| 151 |  |  |  |  |  |  | 'google.ch'              => { name => 'Google Switzerland',      q => 'q' }, | 
| 152 |  |  |  |  |  |  | 'google.ci'              => { name => 'Google Cote dIvoire',     q => 'q' }, | 
| 153 |  |  |  |  |  |  | 'google.cl'              => { name => 'Google Chile',    q => 'q' }, | 
| 154 |  |  |  |  |  |  | 'google.cn'              => { name => 'Google China',    q => 'q' }, | 
| 155 |  |  |  |  |  |  | 'google.co.at'           => { name => 'Google Austria',  q => 'q' }, | 
| 156 |  |  |  |  |  |  | 'google.co.bi'           => { name => 'Google Burundi',  q => 'q' }, | 
| 157 |  |  |  |  |  |  | 'google.co.bw'           => { name => 'Google Botswana', q => 'q' }, | 
| 158 |  |  |  |  |  |  | 'google.co.ci'           => { name => 'Google Ivory Coast',  q => 'q' }, | 
| 159 |  |  |  |  |  |  | 'google.co.ck'           => { name => 'Google Cook Islands', q => 'q' }, | 
| 160 |  |  |  |  |  |  | 'google.co.cr'           => { name => 'Google Costa Rica',   q => 'q' }, | 
| 161 |  |  |  |  |  |  | 'google.co.gg'           => { name => 'Google Guernsey',     q => 'q' }, | 
| 162 |  |  |  |  |  |  | 'google.co.gl'           => { name => 'Google Greenland',    q => 'q' }, | 
| 163 |  |  |  |  |  |  | 'google.co.gy'           => { name => 'Google Guyana',       q => 'q' }, | 
| 164 |  |  |  |  |  |  | 'google.co.hu'           => { name => 'Google Hungary',      q => 'q' }, | 
| 165 |  |  |  |  |  |  | 'google.co.id'           => { name => 'Google Indonesia',    q => 'q' }, | 
| 166 |  |  |  |  |  |  | 'google.co.il'           => { name => 'Google Israel',       q => 'q' }, | 
| 167 |  |  |  |  |  |  | 'google.co.im'           => { name => 'Google Isle of Man',  q => 'q' }, | 
| 168 |  |  |  |  |  |  | 'google.co.in'           => { name => 'Google India',        q => 'q' }, | 
| 169 |  |  |  |  |  |  | 'google.co.it'           => { name => 'Google Italy',        q => 'q' }, | 
| 170 |  |  |  |  |  |  | 'google.co.je'           => { name => 'Google Jersey',       q => 'q' }, | 
| 171 |  |  |  |  |  |  | 'google.co.jp'           => { name => 'Google Japan',        q => 'q' }, | 
| 172 |  |  |  |  |  |  | 'google.co.ke'           => { name => 'Google Kenya',        q => 'q' }, | 
| 173 |  |  |  |  |  |  | 'google.co.kr'           => { name => 'Google South Korea',  q => 'q' }, | 
| 174 |  |  |  |  |  |  | 'google.co.ls'           => { name => 'Google Lesotho',      q => 'q' }, | 
| 175 |  |  |  |  |  |  | 'google.co.ma'           => { name => 'Google Morocco',      q => 'q' }, | 
| 176 |  |  |  |  |  |  | 'google.co.mu'           => { name => 'Google Mauritius',    q => 'q' }, | 
| 177 |  |  |  |  |  |  | 'google.co.mw'           => { name => 'Google Malawi',       q => 'q' }, | 
| 178 |  |  |  |  |  |  | 'google.co.nz'           => { name => 'Google New Zeland',   q => 'q' }, | 
| 179 |  |  |  |  |  |  | 'google.co.pn'           => { name => 'Google Pitcairn Islands',    q => 'q' }, | 
| 180 |  |  |  |  |  |  | 'google.co.th'           => { name => 'Google Thailand',            q => 'q' }, | 
| 181 |  |  |  |  |  |  | 'google.co.tt'           => { name => 'Google Trinidad and Tobago', q => 'q' }, | 
| 182 |  |  |  |  |  |  | 'google.co.ug'           => { name => 'Google Uganda',       q => 'q' }, | 
| 183 |  |  |  |  |  |  | 'google.co.uk'           => { name => 'Google UK',           q => 'q' }, | 
| 184 |  |  |  |  |  |  | 'google.co.uz'           => { name => 'Google Uzbekistan',   q => 'q' }, | 
| 185 |  |  |  |  |  |  | 'google.co.ve'           => { name => 'Google Venezuela',    q => 'q' }, | 
| 186 |  |  |  |  |  |  | 'google.co.vi'           => { name => 'Google US Virgin Islands', q => 'q' }, | 
| 187 |  |  |  |  |  |  | 'google.co.za'           => { name => 'Google  South Africa',q => 'q' }, | 
| 188 |  |  |  |  |  |  | 'google.co.zm'           => { name => 'Google Zambia',       q => 'q' }, | 
| 189 |  |  |  |  |  |  | 'google.co.zw'           => { name => 'Google Zimbabwe',     q => 'q' }, | 
| 190 |  |  |  |  |  |  | 'google.com'             => { name => 'Google',              q => 'q' }, | 
| 191 |  |  |  |  |  |  | 'google.com.af'          => { name => 'Google Afghanistan',  q => 'q' }, | 
| 192 |  |  |  |  |  |  | 'google.com.ag'          => { name => 'Google Antiqua and Barbuda', q => 'q' }, | 
| 193 |  |  |  |  |  |  | 'google.com.ai'          => { name => 'Google Anguilla',    q => 'q' }, | 
| 194 |  |  |  |  |  |  | 'google.com.ar'          => { name => 'Google Argentina',   q => 'q' }, | 
| 195 |  |  |  |  |  |  | 'google.com.au'          => { name => 'Google Australia',   q => 'q' }, | 
| 196 |  |  |  |  |  |  | 'google.com.az'          => { name => 'Google Azerbaijan',  q => 'q' }, | 
| 197 |  |  |  |  |  |  | 'google.com.bd'          => { name => 'Google Bangladesh',  q => 'q' }, | 
| 198 |  |  |  |  |  |  | 'google.com.bh'          => { name => 'Google Bahrain',     q => 'q' }, | 
| 199 |  |  |  |  |  |  | 'google.com.bi'          => { name => 'Google Burundi',     q => 'q' }, | 
| 200 |  |  |  |  |  |  | 'google.com.bn'          => { name => 'Google Brunei Darussalam', q => 'q' }, | 
| 201 |  |  |  |  |  |  | 'google.com.bo'          => { name => 'Google Bolivia',     q => 'q' }, | 
| 202 |  |  |  |  |  |  | 'google.com.br'          => { name => 'Google Brazil',      q => 'q' }, | 
| 203 |  |  |  |  |  |  | 'google.com.bs'          => { name => 'Google Bahamas',     q => 'q' }, | 
| 204 |  |  |  |  |  |  | 'google.com.bz'          => { name => 'Google Belize',      q => 'q' }, | 
| 205 |  |  |  |  |  |  | 'google.com.cn'          => { name => 'Google China',       q => 'q' }, | 
| 206 |  |  |  |  |  |  | 'google.com.co'          => { name => 'Google',             q => 'q' }, | 
| 207 |  |  |  |  |  |  | 'google.com.cu'          => { name => 'Google Cuba',        q => 'q' }, | 
| 208 |  |  |  |  |  |  | 'google.com.do'          => { name => 'Google Dominican Rep', q => 'q' }, | 
| 209 |  |  |  |  |  |  | 'google.com.ec'          => { name => 'Google Ecuador',     q => 'q' }, | 
| 210 |  |  |  |  |  |  | 'google.com.eg'          => { name => 'Google Egypt',       q => 'q' }, | 
| 211 |  |  |  |  |  |  | 'google.com.et'          => { name => 'Google Ethiopia',    q => 'q' }, | 
| 212 |  |  |  |  |  |  | 'google.com.fj'          => { name => 'Google Fiji',        q => 'q' }, | 
| 213 |  |  |  |  |  |  | 'google.com.ge'          => { name => 'Google Georgia',     q => 'q' }, | 
| 214 |  |  |  |  |  |  | 'google.com.gh'          => { name => 'Google Ghana',       q => 'q' }, | 
| 215 |  |  |  |  |  |  | 'google.com.gi'          => { name => 'Google Gibraltar',   q => 'q' }, | 
| 216 |  |  |  |  |  |  | 'google.com.gl'          => { name => 'Google Greenland',   q => 'q' }, | 
| 217 |  |  |  |  |  |  | 'google.com.gp'          => { name => 'Google Guadeloupe',  q => 'q' }, | 
| 218 |  |  |  |  |  |  | 'google.com.gr'          => { name => 'Google Greece',      q => 'q' }, | 
| 219 |  |  |  |  |  |  | 'google.com.gt'          => { name => 'Google Guatemala',   q => 'q' }, | 
| 220 |  |  |  |  |  |  | 'google.com.gy'          => { name => 'Google Guyana',      q => 'q' }, | 
| 221 |  |  |  |  |  |  | 'google.com.hk'          => { name => 'Google Hong Kong',   q => 'q' }, | 
| 222 |  |  |  |  |  |  | 'google.com.hn'          => { name => 'Google Honduras',    q => 'q' }, | 
| 223 |  |  |  |  |  |  | 'google.com.hr'          => { name => 'Google Croatia',     q => 'q' }, | 
| 224 |  |  |  |  |  |  | 'google.com.jm'          => { name => 'Google Jamaica',     q => 'q' }, | 
| 225 |  |  |  |  |  |  | 'google.com.jo'          => { name => 'Google Jordan',      q => 'q' }, | 
| 226 |  |  |  |  |  |  | 'google.com.kg'          => { name => 'Google Kyrgyzstan',  q => 'q' }, | 
| 227 |  |  |  |  |  |  | 'google.com.kh'          => { name => 'Google Cambodia',    q => 'q' }, | 
| 228 |  |  |  |  |  |  | 'google.com.ki'          => { name => 'Google Kiribati',    q => 'q' }, | 
| 229 |  |  |  |  |  |  | 'google.com.kz'          => { name => 'Google Kazakhstan',  q => 'q' }, | 
| 230 |  |  |  |  |  |  | 'google.com.lk'          => { name => 'Google Sri Lanka',   q => 'q' }, | 
| 231 |  |  |  |  |  |  | 'google.com.lv'          => { name => 'Google Latvia',      q => 'q' }, | 
| 232 |  |  |  |  |  |  | 'google.com.ly'          => { name => 'Google Libya',       q => 'q' }, | 
| 233 |  |  |  |  |  |  | 'google.com.mt'          => { name => 'Google Malta',       q => 'q' }, | 
| 234 |  |  |  |  |  |  | 'google.com.mu'          => { name => 'Google Mauritius',   q => 'q' }, | 
| 235 |  |  |  |  |  |  | 'google.com.mw'          => { name => 'Google Malawi',      q => 'q' }, | 
| 236 |  |  |  |  |  |  | 'google.com.mx'          => { name => 'Google Mexico',      q => 'q' }, | 
| 237 |  |  |  |  |  |  | 'google.com.my'          => { name => 'Google Malaysia',    q => 'q' }, | 
| 238 |  |  |  |  |  |  | 'google.com.na'          => { name => 'Google Namibia',     q => 'q' }, | 
| 239 |  |  |  |  |  |  | 'google.com.nf'          => { name => 'Google Norfolk Island', q => 'q' }, | 
| 240 |  |  |  |  |  |  | 'google.com.ng'          => { name => 'Google Nigeria',        q => 'q' }, | 
| 241 |  |  |  |  |  |  | 'google.com.ni'          => { name => 'Google Nicaragua',   q => 'q' }, | 
| 242 |  |  |  |  |  |  | 'google.com.np'          => { name => 'Google Nepal',       q => 'q' }, | 
| 243 |  |  |  |  |  |  | 'google.com.nr'          => { name => 'Google Nauru',       q => 'q' }, | 
| 244 |  |  |  |  |  |  | 'google.com.om'          => { name => 'Google Oman',        q => 'q' }, | 
| 245 |  |  |  |  |  |  | 'google.com.pa'          => { name => 'Google Panama',      q => 'q' }, | 
| 246 |  |  |  |  |  |  | 'google.com.pe'          => { name => 'Google Peru',        q => 'q' }, | 
| 247 |  |  |  |  |  |  | 'google.com.ph'          => { name => 'Google Philipines',  q => 'q' }, | 
| 248 |  |  |  |  |  |  | 'google.com.pk'          => { name => 'Google Pakistan',    q => 'q' }, | 
| 249 |  |  |  |  |  |  | 'google.com.pl'          => { name => 'Google Poland',      q => 'q' }, | 
| 250 |  |  |  |  |  |  | 'google.com.pr'          => { name => 'Google Puerto Rico', q => 'q' }, | 
| 251 |  |  |  |  |  |  | 'google.com.pt'          => { name => 'Google Portugal',    q => 'q' }, | 
| 252 |  |  |  |  |  |  | 'google.com.py'          => { name => 'Google Paraguay',    q => 'q' }, | 
| 253 |  |  |  |  |  |  | 'google.com.qa'          => { name => 'Google',             q => 'q' }, | 
| 254 |  |  |  |  |  |  | 'google.com.ru'          => { name => 'Google Russia',      q => 'q' }, | 
| 255 |  |  |  |  |  |  | 'google.com.sa'          => { name => 'Google Saudi Arabia',    q => 'q' }, | 
| 256 |  |  |  |  |  |  | 'google.com.sb'          => { name => 'Google Solomon Islands', q => 'q' }, | 
| 257 |  |  |  |  |  |  | 'google.com.sc'          => { name => 'Google Seychelles',      q => 'q' }, | 
| 258 |  |  |  |  |  |  | 'google.com.sg'          => { name => 'Google Singapore',   q => 'q' }, | 
| 259 |  |  |  |  |  |  | 'google.com.sv'          => { name => 'Google El Savador',  q => 'q' }, | 
| 260 |  |  |  |  |  |  | 'google.com.tj'          => { name => 'Google Tajikistan',  q => 'q' }, | 
| 261 |  |  |  |  |  |  | 'google.com.tr'          => { name => 'Google Turkey',      q => 'q' }, | 
| 262 |  |  |  |  |  |  | 'google.com.tt'          => { name => 'Google Trinidad and Tobago', q => 'q' }, | 
| 263 |  |  |  |  |  |  | 'google.com.tw'          => { name => 'Google Taiwan',      q => 'q' }, | 
| 264 |  |  |  |  |  |  | 'google.com.ua'          => { name => 'Google Ukraine',      q => 'q' }, | 
| 265 |  |  |  |  |  |  | 'google.com.uy'          => { name => 'Google Uruguay',     q => 'q' }, | 
| 266 |  |  |  |  |  |  | 'google.com.uz'          => { name => 'Google Uzbekistan',  q => 'q' }, | 
| 267 |  |  |  |  |  |  | 'google.com.ve'          => { name => 'Google Venezuela',   q => 'q' }, | 
| 268 |  |  |  |  |  |  | 'google.com.vi'          => { name => 'Google US Virgin Islands', q => 'q' }, | 
| 269 |  |  |  |  |  |  | 'google.com.vn'          => { name => 'Google Vietnam',     q => 'q' }, | 
| 270 |  |  |  |  |  |  | 'google.com.ws'          => { name => 'Google Samoa',       q => 'q' }, | 
| 271 |  |  |  |  |  |  | 'google.cz'              => { name => 'Google Czech Rep',   q => 'q' }, | 
| 272 |  |  |  |  |  |  | 'google.de'              => { name => 'Google Germany',     q => 'q' }, | 
| 273 |  |  |  |  |  |  | 'google.dj'              => { name => 'Google Djubouti',    q => 'q' }, | 
| 274 |  |  |  |  |  |  | 'google.dk'              => { name => 'Google Denmark',     q => 'q' }, | 
| 275 |  |  |  |  |  |  | 'google.dm'              => { name => 'Google Dominica',    q => 'q' }, | 
| 276 |  |  |  |  |  |  | 'google.ec'              => { name => 'Google Ecuador',     q => 'q' }, | 
| 277 |  |  |  |  |  |  | 'google.ee'              => { name => 'Google Estonia',     q => 'q' }, | 
| 278 |  |  |  |  |  |  | 'google.es'              => { name => 'Google Spain',       q => 'q' }, | 
| 279 |  |  |  |  |  |  | 'google.fi'              => { name => 'Google Finland',     q => 'q' }, | 
| 280 |  |  |  |  |  |  | 'google.fm'              => { name => 'Google Micronesia',  q => 'q' }, | 
| 281 |  |  |  |  |  |  | 'google.fr'              => { name => 'Google France',      q => 'q' }, | 
| 282 |  |  |  |  |  |  | 'google.gd'              => { name => 'Google Grenada',     q => 'q' }, | 
| 283 |  |  |  |  |  |  | 'google.ge'              => { name => 'Google Georgia',     q => 'q' }, | 
| 284 |  |  |  |  |  |  | 'google.gf'              => { name => 'Google French Guiana', q => 'q' }, | 
| 285 |  |  |  |  |  |  | 'google.gg'              => { name => 'Google Guernsey',      q => 'q' }, | 
| 286 |  |  |  |  |  |  | 'google.gl'              => { name => 'Google Greenland',     q => 'q' }, | 
| 287 |  |  |  |  |  |  | 'google.gm'              => { name => 'Google Gambia',        q => 'q' }, | 
| 288 |  |  |  |  |  |  | 'google.gp'              => { name => 'Google Guadeloupe',    q => 'q' }, | 
| 289 |  |  |  |  |  |  | 'google.gr'              => { name => 'Google Greece',        q => 'q' }, | 
| 290 |  |  |  |  |  |  | 'google.gy'              => { name => 'Google Guyana',        q => 'q' }, | 
| 291 |  |  |  |  |  |  | 'google.hk'              => { name => 'Google Hong Kong',     q => 'q' }, | 
| 292 |  |  |  |  |  |  | 'google.hn'              => { name => 'Google Honduras',      q => 'q' }, | 
| 293 |  |  |  |  |  |  | 'google.hr'              => { name => 'Google Croatia',       q => 'q' }, | 
| 294 |  |  |  |  |  |  | 'google.ht'              => { name => 'Google Haiti',         q => 'q' }, | 
| 295 |  |  |  |  |  |  | 'google.hu'              => { name => 'Google Hungary',       q => 'q' }, | 
| 296 |  |  |  |  |  |  | 'google.ie'              => { name => 'Google Ireland',       q => 'q' }, | 
| 297 |  |  |  |  |  |  | 'google.im'              => { name => 'Google Isle of Man',   q => 'q' }, | 
| 298 |  |  |  |  |  |  | 'google.in'              => { name => 'Google India',         q => 'q' }, | 
| 299 |  |  |  |  |  |  | 'google.info'            => { name => 'Google dot info',      q => 'q' }, | 
| 300 |  |  |  |  |  |  | 'google.is'              => { name => 'Google Iceland',       q => 'q' }, | 
| 301 |  |  |  |  |  |  | 'google.it'              => { name => 'Google Italy',         q => 'q' }, | 
| 302 |  |  |  |  |  |  | 'google.je'              => { name => 'Google Jersey',        q => 'q' }, | 
| 303 |  |  |  |  |  |  | 'google.jo'              => { name => 'Google Jordan',        q => 'q' }, | 
| 304 |  |  |  |  |  |  | 'google.jobs'            => { name => 'Google dot jobs',      q => 'q' }, | 
| 305 |  |  |  |  |  |  | 'google.jp'              => { name => 'Google Japan',         q => 'q' }, | 
| 306 |  |  |  |  |  |  | 'google.kg'              => { name => 'Google Kyrgyzstan',    q => 'q' }, | 
| 307 |  |  |  |  |  |  | 'google.ki'              => { name => 'Google Kiribati',      q => 'q' }, | 
| 308 |  |  |  |  |  |  | 'google.kz'              => { name => 'Google Kazakhstan',    q => 'q' }, | 
| 309 |  |  |  |  |  |  | 'google.la'              => { name => 'Google Laos',          q => 'q' }, | 
| 310 |  |  |  |  |  |  | 'google.li'              => { name => 'Google Liechtenstein', q => 'q' }, | 
| 311 |  |  |  |  |  |  | 'google.lk'              => { name => 'Google Sri Lanka',     q => 'q' }, | 
| 312 |  |  |  |  |  |  | 'google.lt'              => { name => 'Google Lithuania',     q => 'q' }, | 
| 313 |  |  |  |  |  |  | 'google.lu'              => { name => 'Google Luxembourg',    q => 'q' }, | 
| 314 |  |  |  |  |  |  | 'google.lv'              => { name => 'Google Latvia',        q => 'q' }, | 
| 315 |  |  |  |  |  |  | 'google.ma'              => { name => 'Google Morocco',       q => 'q' }, | 
| 316 |  |  |  |  |  |  | 'google.md'              => { name => 'Google Moldova',       q => 'q' }, | 
| 317 |  |  |  |  |  |  | 'google.mn'              => { name => 'Google Mongolia',      q => 'q' }, | 
| 318 |  |  |  |  |  |  | 'google.mobi'            => { name => 'Google dot mobi',      q => 'q' }, | 
| 319 |  |  |  |  |  |  | 'google.ms'              => { name => 'Google Montserrat',    q => 'q' }, | 
| 320 |  |  |  |  |  |  | 'google.mu'              => { name => 'Google Mauritius',     q => 'q' }, | 
| 321 |  |  |  |  |  |  | 'google.mv'              => { name => 'Google Maldives',      q => 'q' }, | 
| 322 |  |  |  |  |  |  | 'google.mw'              => { name => 'Google Malawi',        q => 'q' }, | 
| 323 |  |  |  |  |  |  | 'google.net'             => { name => 'Google dot net',       q => 'q' }, | 
| 324 |  |  |  |  |  |  | 'google.nf'              => { name => 'Google Norfolk Island', q => 'q' }, | 
| 325 |  |  |  |  |  |  | 'google.nl'              => { name => 'Google Netherlands',    q => 'q' }, | 
| 326 |  |  |  |  |  |  | 'google.no'              => { name => 'Google Norway',        q => 'q' }, | 
| 327 |  |  |  |  |  |  | 'google.nr'              => { name => 'Google Nauru',         q => 'q' }, | 
| 328 |  |  |  |  |  |  | 'google.nu'              => { name => 'Google Niue',          q => 'q' }, | 
| 329 |  |  |  |  |  |  | 'google.off.ai'          => { name => 'Google Anguilla',      q => 'q' }, | 
| 330 |  |  |  |  |  |  | 'google.ph'              => { name => 'Google Philipines',    q => 'q' }, | 
| 331 |  |  |  |  |  |  | 'google.pk'              => { name => 'Google Pakistan',      q => 'q' }, | 
| 332 |  |  |  |  |  |  | 'google.pl'              => { name => 'Google Poland',        q => 'q' }, | 
| 333 |  |  |  |  |  |  | 'google.pn'              => { name => 'Google Pitcairn Islands', q => 'q' }, | 
| 334 |  |  |  |  |  |  | 'google.pr'              => { name => 'Google Puerto Rico',   q => 'q' }, | 
| 335 |  |  |  |  |  |  | 'google.pt'              => { name => 'Google Portugal',      q => 'q' }, | 
| 336 |  |  |  |  |  |  | 'google.ro'              => { name => 'Google Romania',       q => 'q' }, | 
| 337 |  |  |  |  |  |  | 'google.ru'              => { name => 'Google Russia',        q => 'q' }, | 
| 338 |  |  |  |  |  |  | 'google.rw'              => { name => 'Google Rwanda',        q => 'q' }, | 
| 339 |  |  |  |  |  |  | 'google.sc'              => { name => 'Google Seychelles',    q => 'q' }, | 
| 340 |  |  |  |  |  |  | 'google.se'              => { name => 'Google Sweden',        q => 'q' }, | 
| 341 |  |  |  |  |  |  | 'google.sg'              => { name => 'Google Singapore',     q => 'q' }, | 
| 342 |  |  |  |  |  |  | 'google.sh'              => { name => 'Google Saint Helena',  q => 'q' }, | 
| 343 |  |  |  |  |  |  | 'google.si'              => { name => 'Google Slovenia',      q => 'q' }, | 
| 344 |  |  |  |  |  |  | 'google.sk'              => { name => 'Google Slovakia',      q => 'q' }, | 
| 345 |  |  |  |  |  |  | 'google.sm'              => { name => 'Google San Marino',    q => 'q' }, | 
| 346 |  |  |  |  |  |  | 'google.sn'              => { name => 'Google Senegal',       q => 'q' }, | 
| 347 |  |  |  |  |  |  | 'google.sr'              => { name => 'Google Suriname',      q => 'q' }, | 
| 348 |  |  |  |  |  |  | 'google.st'              => { name => 'Google Sao Tome',      q => 'q' }, | 
| 349 |  |  |  |  |  |  | 'google.tk'              => { name => 'Google Tokelau',       q => 'q' }, | 
| 350 |  |  |  |  |  |  | 'google.tm'              => { name => 'Google Turkmenistan',  q => 'q' }, | 
| 351 |  |  |  |  |  |  | 'google.to'              => { name => 'Google Tonga',        q => 'q' }, | 
| 352 |  |  |  |  |  |  | 'google.tp'              => { name => 'Google East Timor',   q => 'q' }, | 
| 353 |  |  |  |  |  |  | 'google.tt'              => { name => 'Google Trinidad and Tobago', q => 'q' }, | 
| 354 |  |  |  |  |  |  | 'google.tv'              => { name => 'Google Tuvalu', q => 'q' }, | 
| 355 |  |  |  |  |  |  | 'google.tw'              => { name => 'Google Taiwan', q => 'q' }, | 
| 356 |  |  |  |  |  |  | 'google.ug'              => { name => 'Google Uganda', q => 'q' }, | 
| 357 |  |  |  |  |  |  | 'google.us'              => { name => 'Google US',     q => 'q' }, | 
| 358 |  |  |  |  |  |  | 'google.uz'              => { name => 'Google Uzbekistan',             q => 'q' }, | 
| 359 |  |  |  |  |  |  | 'google.vg'              => { name => 'Google British Virgin Islands', q => 'q' }, | 
| 360 |  |  |  |  |  |  | 'google.vn'              => { name => 'Google Vietnam', q => 'q' }, | 
| 361 |  |  |  |  |  |  | 'google.vu'              => { name => 'Google Vanuatu', q => 'q' }, | 
| 362 |  |  |  |  |  |  | 'google.ws'              => { name => 'Google Samoa',  q => 'q' }, | 
| 363 |  |  |  |  |  |  | 'hotbot.com'             => { name => 'HotBot',        q => 'query' }, | 
| 364 |  |  |  |  |  |  | 'in.gr'                  => { name => 'In GR',         q => 'q' }, | 
| 365 |  |  |  |  |  |  | 'mamma.com'              => { name => 'Mamma',         q => 'query' }, | 
| 366 |  |  |  |  |  |  | 'mahalo.com'             => { name => 'Mahalo',        q => 'search' }, | 
| 367 |  |  |  |  |  |  | 'megasearching.net'      => { name => 'Megasearching', q => 's' }, | 
| 368 |  |  |  |  |  |  | 'mirago.co.uk'           => { name => 'Mirago UK',     q => 'qry' }, | 
| 369 |  |  |  |  |  |  | 'netscape.com'           => { name => 'Netscape',      q => 's' }, | 
| 370 |  |  |  |  |  |  | 'community.paglo.com'    => { name => 'Paglo',         q => 'q' }, | 
| 371 |  |  |  |  |  |  | 'pathfinder.gr'          => { name => 'Pathfinder GR', q => 'q' }, | 
| 372 |  |  |  |  |  |  | 'phantis.com'            => { name => 'Phantis GR' ,   q => 'q'}, | 
| 373 |  |  |  |  |  |  | 'robby.gr'               => { name => 'Robby GR'     , q => 'searchstr' }, | 
| 374 |  |  |  |  |  |  | 'sproose.com'            => { name => 'Sproose',       q => 'query' }, | 
| 375 |  |  |  |  |  |  | 'technorati.com'         => { name => 'Technorati',    q => 'q' }, | 
| 376 |  |  |  |  |  |  | 'tesco.net'              => { name => 'Tesco Search',  q => 'q' }, | 
| 377 |  |  |  |  |  |  | 'tiscali.co.uk'          => { name => 'Tiscali UK',    q => 'query' }, | 
| 378 |  |  |  |  |  |  | 'bing.com'               => { name => 'Bing',          q => 'q' }, | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | 'acbusca.com'            => { name => 'ACBusca',          q => 'query' }, | 
| 381 |  |  |  |  |  |  | 'atalhocerto.com.br'     => { name => 'Atalho Certo',     q => 'keyword' }, | 
| 382 |  |  |  |  |  |  | 'bastaclicar.com.br'     => { name => 'Basta Clicar',     q => 'search' }, | 
| 383 |  |  |  |  |  |  | 'bemrapido.com.br'       => { name => 'Bem Rapido',       q => 'chave' }, | 
| 384 |  |  |  |  |  |  | 'br.altavista.com'       => { name => 'AltaVista Brasil', q => 'q' }, | 
| 385 |  |  |  |  |  |  | 'br.search.yahoo.com'    => { name => 'Yahoo Brazil',     q => 'p' }, | 
| 386 |  |  |  |  |  |  | 'busca.uol.com.br'       => { name => 'Radar UOL',        q => 'q' }, | 
| 387 |  |  |  |  |  |  | 'buscaaqui.com.br'       => { name => 'Busca Aqui',       q => 'q' }, | 
| 388 |  |  |  |  |  |  | 'buscador.terra.com.br'  => { name => 'Terra Busca',      q => 'query' }, | 
| 389 |  |  |  |  |  |  | 'cade.search.yahoo.com'  => { name => 'Cadê',             q => 'p' }, | 
| 390 |  |  |  |  |  |  | 'clickgratis.com.br'     => { name => 'Click Gratis',     q => 'query' }, | 
| 391 |  |  |  |  |  |  | 'entrada.com.br'         => { name => 'Entrada',          q => 'q' }, | 
| 392 |  |  |  |  |  |  | 'gigabusca.com.br'       => { name => 'Giga Busca',       q => 'what' }, | 
| 393 |  |  |  |  |  |  | 'internetica.com.br'     => { name => 'Internetica',      q => 'busca' }, | 
| 394 |  |  |  |  |  |  | 'katatudo.com.br'        => { name => 'KataTudo',         q => 'q' }, | 
| 395 |  |  |  |  |  |  | 'minasplanet.com.br'     => { name => 'Minas Planet',     q => 'term' }, | 
| 396 |  |  |  |  |  |  | 'speedybusca.com.br'     => { name => 'SpeedyBusca',      q => 'q' }, | 
| 397 |  |  |  |  |  |  | 'vaibuscar.com.br'       => { name => 'Vai Busca',        q => 'q' }, | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | 'search.conduit.com'     => { name => 'Conduit',          q=>'q'   }, | 
| 400 |  |  |  |  |  |  | 'in.search.yahoo.com'    => { name => 'Yahoo India',      q => 'p'  }, | 
| 401 |  |  |  |  |  |  | 'rediff.com'             => { name => 'Rediff',           q => 'MT' }, | 
| 402 |  |  |  |  |  |  | 'guruji.com'             => { name => 'Guruji',           q => 'q'  }, | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | 'isohunt.com'            => { name => 'Isohunt',          q => 'ihq' }, | 
| 405 |  |  |  |  |  |  | 'btjunkie.org'           => { name => 'BT Junkie',        q => 'q' }, | 
| 406 |  |  |  |  |  |  | 'torrentz.eu'            => { name => 'Torrentz',         q => 'f' } | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | }; | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | sub new { | 
| 411 | 1 |  |  | 1 | 1 | 820 | my $class        = shift ; | 
| 412 | 1 |  |  |  |  | 2 | my $self         = { } ; | 
| 413 | 1 |  |  |  |  | 3 | $self->{engines} = $RH_LOOKUPS; | 
| 414 | 1 |  |  |  |  | 4 | return bless $self, $class ; | 
| 415 |  |  |  |  |  |  | } | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | =head2 parse_search_string | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | This module provides a simple function to parse and extract search engine query strings. It was designed and tested having | 
| 420 |  |  |  |  |  |  | Apache referrer logs in mind. It can be used for a wide number of purposes, including tracking down what keywords people use | 
| 421 |  |  |  |  |  |  | on popular search engines before they land on a site. Although a number of existing modules and scripts exist for this purpose, | 
| 422 |  |  |  |  |  |  | the majority of them are either outdated using obsolete search strings associated with each engine. | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | The default function exported is "parse_search_string" which accepts an unquoted referrer string as input and returns the | 
| 425 |  |  |  |  |  |  | search engine query contained within. It currently works with both escaped and un-escaped queries and will translate the search | 
| 426 |  |  |  |  |  |  | terms before returning them in the latter case. The function returns undef in all other cases and errors. | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | for example: | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | my $ref   = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; | 
| 431 |  |  |  |  |  |  | my $terms = | 
| 432 |  |  |  |  |  |  | $uparse->parse_search_string( $ref ); | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | would return I<'a simple test'> | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | whereas | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | my $ref   = 'http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+more%21+complex_+search%24&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0'; | 
| 439 |  |  |  |  |  |  | my $terms = | 
| 440 |  |  |  |  |  |  | $uparse->parse_search_string( $ref ); | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | would return I<'a more! complex_ search$'> | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | =cut | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | =head2 se_term | 
| 447 |  |  |  |  |  |  |  | 
| 448 |  |  |  |  |  |  | Same as parse_search_string(). | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | =cut | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | sub se_term { | 
| 453 | 124 |  |  | 124 | 1 | 67158 | my $self   = shift ; | 
| 454 | 124 |  |  |  |  | 207 | my $string = shift ; | 
| 455 | 124 | 50 |  |  |  | 321 | return unless defined $string ; | 
| 456 | 124 |  |  |  |  | 271 | return $self->parse_search_string($string) ; | 
| 457 |  |  |  |  |  |  | } | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | ## internal method for creating a URI object | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | sub _uri { | 
| 462 | 264 |  |  | 264 |  | 282 | my $self   = shift; | 
| 463 | 264 |  |  |  |  | 311 | my $string = shift; | 
| 464 |  |  |  |  |  |  |  | 
| 465 | 264 | 50 |  |  |  | 503 | return unless defined($string); | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | ## create a new URI object | 
| 468 |  |  |  |  |  |  | ## and return unless its http or https | 
| 469 |  |  |  |  |  |  |  | 
| 470 | 264 |  |  |  |  | 961 | my $uri = URI->new( $string ); | 
| 471 |  |  |  |  |  |  | return | 
| 472 | 264 | 100 | 100 |  |  | 29332 | unless (defined($uri) | 
|  |  |  | 33 |  |  |  |  | 
| 473 |  |  |  |  |  |  | && (ref($uri) eq 'URI::http' || ref($uri) eq 'URI::https')); | 
| 474 |  |  |  |  |  |  |  | 
| 475 |  |  |  |  |  |  | ## feedster and technorati as they do not follow | 
| 476 |  |  |  |  |  |  | ## the usual search patterns thus we extract the query | 
| 477 |  |  |  |  |  |  | ## terms by taking the last element from the path segments | 
| 478 |  |  |  |  |  |  |  | 
| 479 | 260 |  |  |  |  | 881 | my $host = $uri->host; | 
| 480 |  |  |  |  |  |  |  | 
| 481 | 260 | 100 | 100 |  |  | 13750 | return unless defined($host) && $host; | 
| 482 |  |  |  |  |  |  |  | 
| 483 | 256 | 100 |  |  |  | 870 | if ( $host =~ m/(feedster|technorati)\.com$/ ){ | 
| 484 | 4 |  |  |  |  | 24 | $uri->query_form( q => ( $uri->path_segments)[-1]); | 
| 485 |  |  |  |  |  |  | } | 
| 486 |  |  |  |  |  |  |  | 
| 487 |  |  |  |  |  |  | ## clean up the host until it matches | 
| 488 |  |  |  |  |  |  | ## something we already know about | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 256 |  |  |  |  | 1399 | while( ! defined $self->{'engines'}{ $host }){ | 
| 491 | 134 |  |  |  |  | 242 | my $c = index($host, '.'); | 
| 492 | 134 | 100 |  |  |  | 270 | last if $c <0; | 
| 493 | 132 |  |  |  |  | 686 | $host= substr($host, $c+1); | 
| 494 |  |  |  |  |  |  | } | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 256 |  |  |  |  | 707 | return ($uri, $host); | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | } | 
| 499 |  |  |  |  |  |  |  | 
| 500 |  |  |  |  |  |  |  | 
| 501 |  |  |  |  |  |  | sub parse_search_string { | 
| 502 | 134 |  |  | 134 | 1 | 4178 | my $self   = shift ; | 
| 503 | 134 |  |  |  |  | 157 | my $string = shift ; | 
| 504 | 134 | 50 |  |  |  | 299 | return unless defined($string); | 
| 505 |  |  |  |  |  |  |  | 
| 506 | 134 |  |  |  |  | 283 | my ($uri,$host) = $self->_uri( $string ); | 
| 507 | 134 | 100 |  |  |  | 327 | return unless defined($uri); | 
| 508 |  |  |  |  |  |  |  | 
| 509 |  |  |  |  |  |  | ## get rid of the www | 
| 510 | 132 |  |  |  |  | 184 | $host =~ m!^www\.!; | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | ## find the query parameter the engine uses | 
| 513 | 132 |  |  |  |  | 445 | my $q = $self->{'engines'}{$host}{'q'}; | 
| 514 | 132 | 100 |  |  |  | 297 | return unless defined $q; | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | ## return the string passed to the query parameter | 
| 517 | 128 |  |  |  |  | 398 | my %h_query = $uri->query_form; | 
| 518 |  |  |  |  |  |  |  | 
| 519 | 128 |  |  |  |  | 12082 | return $h_query{$q} | 
| 520 |  |  |  |  |  |  | } | 
| 521 |  |  |  |  |  |  |  | 
| 522 |  |  |  |  |  |  | =head2 findEngine | 
| 523 |  |  |  |  |  |  |  | 
| 524 |  |  |  |  |  |  | Returns a list with the hostname of the search engine as the first element and | 
| 525 |  |  |  |  |  |  | the canonical name as the second element. | 
| 526 |  |  |  |  |  |  |  | 
| 527 |  |  |  |  |  |  | my $ref = 'http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search'; | 
| 528 |  |  |  |  |  |  | my ($hostname, $canonical) = $uparse->findEngine( $ref ) ; | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | This will return 'google.com' as the search engine hostname and 'Google' as the name. | 
| 531 |  |  |  |  |  |  | This function will return I<undef> on error. | 
| 532 |  |  |  |  |  |  |  | 
| 533 |  |  |  |  |  |  | =cut | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | sub findEngine { | 
| 536 | 130 |  |  | 130 | 1 | 169 | my $self    = shift ; | 
| 537 | 130 |  |  |  |  | 154 | my $string  = shift ; | 
| 538 |  |  |  |  |  |  |  | 
| 539 | 130 | 50 |  |  |  | 254 | return unless defined($string); | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | ## create a URI object | 
| 542 |  |  |  |  |  |  |  | 
| 543 | 130 |  |  |  |  | 250 | my ($uri,$hostname) = $self->_uri( $string ); | 
| 544 | 130 | 100 | 66 |  |  | 701 | return unless defined($uri) && $uri; | 
| 545 | 124 | 50 | 33 |  |  | 1824 | return unless defined($hostname) && $hostname; | 
| 546 |  |  |  |  |  |  |  | 
| 547 | 124 |  |  |  |  | 314 | my $canonical = $self->{'engines'}->{$hostname}->{'name'}; | 
| 548 |  |  |  |  |  |  |  | 
| 549 | 124 |  |  |  |  | 324 | return ($hostname,$canonical); | 
| 550 |  |  |  |  |  |  | } | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | =head2 se_host | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | Wrapper around findEngine - returns just the hostname. | 
| 555 |  |  |  |  |  |  | This function will return I<undef> on error. | 
| 556 |  |  |  |  |  |  |  | 
| 557 |  |  |  |  |  |  | =cut | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | sub se_host { | 
| 560 | 130 |  |  | 130 | 1 | 1904 | my $self   = shift ; | 
| 561 | 130 |  |  |  |  | 176 | my $string = shift ; | 
| 562 | 130 | 50 |  |  |  | 330 | return unless defined($string) ; | 
| 563 | 130 |  |  |  |  | 280 | my ($host,$name) = $self->findEngine($string) ; | 
| 564 | 130 |  |  |  |  | 469 | return $host ; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | =head2 se_name | 
| 568 |  |  |  |  |  |  |  | 
| 569 |  |  |  |  |  |  | Wrapper around findEngine - returns just the canonical name; | 
| 570 |  |  |  |  |  |  | This function will return I<undef> on error. | 
| 571 |  |  |  |  |  |  |  | 
| 572 |  |  |  |  |  |  | =cut | 
| 573 |  |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | sub se_name { | 
| 575 | 0 |  |  | 0 | 1 |  | my $self   = shift ; | 
| 576 | 0 |  |  |  |  |  | my $string = shift ; | 
| 577 | 0 | 0 |  |  |  |  | return unless defined($string); | 
| 578 | 0 |  |  |  |  |  | my ($host,$name) = $self->findEngine($string) ; | 
| 579 | 0 |  |  |  |  |  | return $name ; | 
| 580 |  |  |  |  |  |  | } | 
| 581 |  |  |  |  |  |  |  | 
| 582 |  |  |  |  |  |  | =head1 SUPPORTED ENGINES | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  | Currently supported search engines include: Sproose, Google Namibia, Google Ivory Coast, Google Oman, Technorati, Google Ecuador, | 
| 585 |  |  |  |  |  |  | Google Norfolk Island, Mahalo, Google UK, Yahoo! UK, Google Micronesia, Google Bahrain, Basta Clicar, | 
| 586 |  |  |  |  |  |  | Giga Busca, Google Greece, Google Belgium, Google Egypt, Google Chile, Godado (IT), Google Australia, | 
| 587 |  |  |  |  |  |  | Google Uruguay, Google India, Google Taiwan, Google Ukraine, Google US, Terra ES, | 
| 588 |  |  |  |  |  |  | Tesco Search, Megasearching, SAPO videos, Google Nepal, Google Israel, Google US Virgin Islands, Google Hungary, | 
| 589 |  |  |  |  |  |  | Google San Marino, Google Croatia, Google dot jobs, Google Panama, Google Malaysia, Internetica, Google Brunei Darussalam, | 
| 590 |  |  |  |  |  |  | Google Denmark, Google Pakistan, Google Solomon Islands, Google dot biz, Google Lesotho, IceRocket, Google Greenland, Fireball DE, | 
| 591 |  |  |  |  |  |  | Rtp, Google Portugal, Google Samoa, Google Kazakhstan, Google Blogsearch, Google Thailand, Google, Google Antiqua and Barbuda, | 
| 592 |  |  |  |  |  |  | Google Germany, Google Moldova, Google Zambia, Google Greece, Google Sri Lanka, Google Ireland, Google Austria, | 
| 593 |  |  |  |  |  |  | Google Peru, Google Guatemala, ICQ dot com, AOL UK, Google Guyana, In GR, Google dot info, MyWay, Pathfinder GR, Google Costa Rica, | 
| 594 |  |  |  |  |  |  | KataTudo, Google Jamaica, Google Vietnam, Google Morocco, Google Gambia, Google Singapore, Google Mauritius, Altavista, Google Afghanistan, | 
| 595 |  |  |  |  |  |  | Google Cote dIvoire, Google Kazakhstan, Google Czech Rep, Phantis GR, Google Bahamas, Google United Arab Emirates, Google East Timor, Ozu ES, | 
| 596 |  |  |  |  |  |  | Google Venezuela, Google Puerto Rico, Google Armenia, Google Croatia, Google Botswana, Google Tuvalu, Ask UK, Google Singapore, Mirago UK, | 
| 597 |  |  |  |  |  |  | Google Greenland, MSN Arabia, Google Nauru, Publico, Robby GR, Minas Planet, Pesquisa Iol, Google Romania, Google South Korea, Google Jersey, | 
| 598 |  |  |  |  |  |  | Netscape, Busca Aqui, Google Bulgaria, Google Uzbekistan, Tiscali UK, Ithaki, Cadê, Lycos IT, Google Suriname, Excite IT, Google Hong Kong, | 
| 599 |  |  |  |  |  |  | Kataweb IT, Google Burundi, Click Gratis, Google Vietnam, MSN, Alice.it, Google Honduras, Google Trinidad and Tobago, Google Uganda, XL, | 
| 600 |  |  |  |  |  |  | Jornal Noticias, Google Cook Islands, Google Japan, Google Ecuador, Google Ghana, Google Guadeloupe, Google Libya, Google Kenya, Fastbrowsersearch, | 
| 601 |  |  |  |  |  |  | Aeiou, Google Niue, Jornal Record, HotBot, Google Honduras, Google Georgia, Google Fiji, Google Philipines, BBC Search, Google, Google Laos, | 
| 602 |  |  |  |  |  |  | Soso, AltaVista Brasil, Lycos UK, SAPO fotos, Ask dot com, Google Netherlands, Google Philipines, Google Trinidad and Tobago, Google Turkey, | 
| 603 |  |  |  |  |  |  | AllTheWeb, Google Japan, Google Argentina, Google Vanuatu, Blueyonder, Google Greenland, Google Samoa, Google Georgia, Google Slovakia, | 
| 604 |  |  |  |  |  |  | Google Sri Lanka, Pesquisa SAPO, Google Latvia, Google Latvia, Correio Manha, Terra Busca, Google El Savador, Google Cambodia, | 
| 605 |  |  |  |  |  |  | Google Mauritius, Google China, AOL Search, Google Tokelau, Google Tonga, Correio da Manha, Radar UOL, Google Jordan, Godado, Google Jordan, | 
| 606 |  |  |  |  |  |  | Google Pitcairn Islands, Categorico IT, Google Morocco, Google Dominican Rep, Google France, Abacho, Google Azerbaijan, Google Andorra, Google Belize, | 
| 607 |  |  |  |  |  |  | Google Paraguay, Simpatico IT, Google Ethiopia, Google Uganda, Google Poland, Google Bolivia, Google Hungary, Google Russia, Diario Noticias, | 
| 608 |  |  |  |  |  |  | Google Puerto Rico, Google Montserrat, Yahoo! Japan, Google Seychelles, Mamma, Google Pitcairn Islands, Google  South Africa, Paglo, Google Malta, | 
| 609 |  |  |  |  |  |  | Google Azerbaijan, Google New Zeland, Google China, Google Norway, Google Bosnia and Herzegovina, Google Indonesia, SpeedyBusca, Entrada, Google Anguilla, | 
| 610 |  |  |  |  |  |  | Google Rep of Congo, Google Dominica, Google Finland, Altavista UK, Google Guyana, MSN UK, Yahoo Answers, Google British Virgin Islands, Google Guadeloupe, | 
| 611 |  |  |  |  |  |  | Google Lithuania, Google Antiqua and Barbuda, Google Bahamas, Google Malawi, MSN Prodigy, Bing, Google Bolivia, Google Djubouti, Google Uzbekistan, Fastweb IT, | 
| 612 |  |  |  |  |  |  | Google Tajikistan, Virgin Search, Google Nigeria, Yahoo Japan, Pesquisa Clix, Google Grenada, Google Haiti, Google American Samoa, Google Pakistan, | 
| 613 |  |  |  |  |  |  | Google Cocos Islands, Google Hong Kong, NTLWorld, ilMotore, Google Belize, Google Guernsey, Google Sweden, Google Anguilla, Google Bangladesh, Google Isle of Man, | 
| 614 |  |  |  |  |  |  | Google Guernsey, Google Kyrgyzstan, Google Dem Rep of Congo, Google Malawi, Orange Search, Google Seychelles, Google Guyana, Google Gibraltar, | 
| 615 |  |  |  |  |  |  | oogle Italy, Google Kiribati, TheSpider IT, Google Nicaragua, Google Russia, Google Venezuela, Google Poland, Google Brazil, Google Senegal, Conduit, Lycos, | 
| 616 |  |  |  |  |  |  | Google Isle of Man, Live.com, Google Italy, Libero IT, Google Canada, Google Nauru, Google Liechtenstein, Google Afghanistan, Cuil, Google Zimbabwe, Google Mauritius, | 
| 617 |  |  |  |  |  |  | Orange ES, Google Burundi, Google Portugal, ACBusca, Bem Rapido, Atalho Certo, Excite, Clusty, Yahoo Brazil, My Web Search, Google Spain, Google Uzbekistan, Google, | 
| 618 |  |  |  |  |  |  | Google Mexico, T-Online, Google dot mobi, Google Luxembourg, Google Austria, Yahoo!, Google Kiribati, Sweetim, Vai Busca, Google Mongolia, Google Saudi Arabia, Google dot net, | 
| 619 |  |  |  |  |  |  | Google Maldives, Google Trinidad and Tobago, Google Jersey, Feedster, Google Turkmenistan, Google Switzerland, Google Norfolk Island, Suche DE, Google Malawi, Google Rwanda, | 
| 620 |  |  |  |  |  |  | Lycos ES, Google Burundi, Google French Guiana, Google Kyrgyzstan, Google Saint Helena, VirginMedia, Google Iceland, SAPO sabores, Google India, Google Cuba, | 
| 621 |  |  |  |  |  |  | Google US Virgin Islands, Google Taiwan, Google Sao Tome, Google Slovenia, Starware, Google Estonia, Conduit, Yahoo India, Rediff, Guruji | 
| 622 |  |  |  |  |  |  |  | 
| 623 |  |  |  |  |  |  | =head1 AUTHOR | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | Spiros Denaxas, C<< <s.denaxas at gmail.com> >> | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | =head1 SOURCE CODE | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  | The source code can be found on github L<https://github.com/spiros/URI-ParseSearchString> | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | =head1 BUGS | 
| 632 |  |  |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | This is my first CPAN module so I encourage you to send all comments, especially bad, | 
| 634 |  |  |  |  |  |  | to my email address. | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | This could not have been possible without the support of my co-workers at | 
| 637 |  |  |  |  |  |  | http://nestoria.co.uk - the easiest way of finding UK property. | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  | =head1 SUPPORT | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  | For more information, you could also visit my blog: | 
| 642 |  |  |  |  |  |  |  | 
| 643 |  |  |  |  |  |  | http://blog.ffffruit.com | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | =over 4 | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  | =back | 
| 648 |  |  |  |  |  |  |  | 
| 649 |  |  |  |  |  |  | =head1 COPYRIGHT & LICENSE | 
| 650 |  |  |  |  |  |  |  | 
| 651 |  |  |  |  |  |  | Copyright 2011 Spiros Denaxas, all rights reserved. | 
| 652 |  |  |  |  |  |  |  | 
| 653 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify it | 
| 654 |  |  |  |  |  |  | under the same terms as Perl itself. | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  | =cut | 
| 657 |  |  |  |  |  |  |  | 
| 658 |  |  |  |  |  |  | 1; # End of URI::ParseSearchString |