| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::BookBot::Chinese; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 755 | use 5.008; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 4 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 31 |  | 
| 5 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 6 | 1 |  |  | 1 |  | 5 | no warnings qw(uninitialized); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 48 |  | 
| 7 | 1 |  |  | 1 |  | 5 | use base qw(WWW::BookBot); | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 808 |  | 
| 8 | 1 |  |  | 1 |  | 13 | use vars qw($VERSION); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 764 |  | 
| 9 |  |  |  |  |  |  | $VERSION = '0.12'; | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 12 |  |  |  |  |  |  | # Default settings | 
| 13 |  |  |  |  |  |  | #	$class->default_settings						=> \%settings | 
| 14 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 15 |  |  |  |  |  |  | sub default_settings { | 
| 16 | 7 |  |  | 7 | 0 | 46 | my $self = shift->SUPER::default_settings; | 
| 17 | 7 |  |  |  |  | 19 | $self->{get_language}='zh-cn'; | 
| 18 | 7 |  |  |  |  | 13 | $self->{language_decode}='gbk'; | 
| 19 | 7 |  |  |  |  | 14 | $self->{language_encode}='gbk'; | 
| 20 | 7 |  |  |  |  | 17 | $self; | 
| 21 |  |  |  |  |  |  | } | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 24 |  |  |  |  |  |  | # Redefined functions | 
| 25 |  |  |  |  |  |  | #	$bot->decode_entity($content_dein_deout)			=> N/A | 
| 26 |  |  |  |  |  |  | #	$bot->trandict_init								=> $bot->{translate_dict} | 
| 27 |  |  |  |  |  |  | #	$bot->msg_init									=> $bot->{messages} | 
| 28 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 29 |  |  |  |  |  |  | sub decode_entity { | 
| 30 |  |  |  |  |  |  | #chinese novels sometimes add \x{FF1B} after unkown unicode string | 
| 31 | 8 |  |  | 8 | 0 | 23 | $_[1]=~s/(?:&\#(\d{1,5});?\x{FF1B}?)/chr($1)/esg; | 
|  | 2 |  |  |  |  | 10 |  | 
| 32 | 8 |  |  |  |  | 15 | $_[1]=~s/(?:&\#[xX]([0-9a-fA-F]{1,5});?\x{FF1B}?)/chr(hex($1))/esg; | 
|  | 0 |  |  |  |  | 0 |  | 
| 33 | 8 | 50 |  |  |  | 16 | $_[1]=~s/(&([0-9a-zA-Z]{1,9});?)/$WWW::BookBot::entity2char{$2} or $1/esg; | 
|  | 1 |  |  |  |  | 9 |  | 
| 34 |  |  |  |  |  |  | #normalize middle dot | 
| 35 | 8 |  |  |  |  | 21 | $_[1]=~s/\x{2022}/\x{00B7}/sg; | 
| 36 |  |  |  |  |  |  | } | 
| 37 |  |  |  |  |  |  | sub trandict_init { | 
| 38 | 7 |  |  | 7 | 0 | 57 | shift->{translate_dict} = { | 
| 39 |  |  |  |  |  |  | 'log'		=> "日志", | 
| 40 |  |  |  |  |  |  | 'result'	=> "结果", | 
| 41 |  |  |  |  |  |  | 'DB'		=> "数据", | 
| 42 |  |  |  |  |  |  | 'debug'		=> "调试", | 
| 43 |  |  |  |  |  |  | } | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  | sub msg_init { | 
| 46 | 7 |  |  | 7 | 0 | 18 | my $skip_info="\n".'$pargs->{levelspace}  url=$pargs->{url}'."\n"; | 
| 47 | 7 |  |  |  |  | 259 | shift->{messages} = { | 
| 48 |  |  |  |  |  |  | TestMsg			=> '测试: $pargs->{TestInfo} $pargs->{TestNum}', | 
| 49 |  |  |  |  |  |  | BookStart		=> '$pargs->{levelspace} [$pargs->{bpos_limit}/$pargs->{book_num}] $pargs->{title_limit} ', | 
| 50 |  |  |  |  |  |  | BookBinaryOK	=> '$pargs->{data_len_KB} $pargs->{write_file}'."\n", | 
| 51 |  |  |  |  |  |  | BookChapterErr	=> ' - 无法分析'.$skip_info, | 
| 52 |  |  |  |  |  |  | BookChapterMany	=> '[$pargs->{chapter_num_limit}章]', | 
| 53 |  |  |  |  |  |  | BookChapterOne	=> '[单章节]', | 
| 54 |  |  |  |  |  |  | BookChapterOK	=> '$pargs->{data_len_KB}'."\n", | 
| 55 |  |  |  |  |  |  | BookTOCFinish	=> '$pargs->{TOC_len_KB}'."\n", | 
| 56 |  |  |  |  |  |  | CatalogInfo		=> '取书目: ', | 
| 57 |  |  |  |  |  |  | CatalogResultErr=> ' 0套书'."\n", | 
| 58 |  |  |  |  |  |  | CatalogResultOK	=> ' $pargs->{book_num}套书'."\n", | 
| 59 |  |  |  |  |  |  | CatalogURL		=> '$pargs->{url}', | 
| 60 |  |  |  |  |  |  | CatalogURLEmpty	=> '[失败] 索引的URL为空'."\n", | 
| 61 |  |  |  |  |  |  | DBBookErr		=> "\t".' \$bot->go_book({$pargs->{allargs}});'."\t#错误\n", | 
| 62 |  |  |  |  |  |  | DBBookOK		=> "\t".'#\$bot->go_book({$pargs->{allargs}});'."\n", | 
| 63 |  |  |  |  |  |  | DBCatalogErr	=> ' \$bot->go_catalog({$pargs->{allargs}});'."\t#错误\n", | 
| 64 |  |  |  |  |  |  | DBCatalogOK		=> '#\$bot->go_catalog({$pargs->{allargs}});'."\n", | 
| 65 |  |  |  |  |  |  | DBHead			=> <<'DATA', | 
| 66 |  |  |  |  |  |  | #!$pargs->{perlcmd} | 
| 67 |  |  |  |  |  |  | ##====================================== | 
| 68 |  |  |  |  |  |  | ## 自动生成的数据文件,用于$pargs->{classname} | 
| 69 |  |  |  |  |  |  | ##    生成时间: $pargs->{createtime} | 
| 70 |  |  |  |  |  |  | ##====================================== | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | use $pargs->{classname}; | 
| 73 |  |  |  |  |  |  | my \$bot = new $pargs->{classname}; | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | DATA | 
| 76 |  |  |  |  |  |  | FailClearDB		=> '无法清除数据文件$pargs->{filename}: $pargs->{errmsg}', | 
| 77 |  |  |  |  |  |  | FailClose	 	=> '无法关闭$self->{translate_dict}->{$pargs->{filetype}}文件$pargs->{filename}: $pargs->{errmsg}', | 
| 78 |  |  |  |  |  |  | FailMkDir		=> '建目录$pargs->{dir}失败: $pargs->{errmsg}', | 
| 79 |  |  |  |  |  |  | FailOpen	 	=> '无法打开$self->{translate_dict}->{$pargs->{filetype}}文件$pargs->{filename}: $pargs->{errmsg}', | 
| 80 |  |  |  |  |  |  | FailWrite	 	=> '无法写入$self->{translate_dict}->{$pargs->{filetype}}文件$pargs->{filename}: $pargs->{errmsg}', | 
| 81 |  |  |  |  |  |  | GetFail404		=> <<'DATA', | 
| 82 |  |  |  |  |  |  | [$pargs->{code},失败] 找不到文件 | 
| 83 |  |  |  |  |  |  | $pargs->{url_real} | 
| 84 |  |  |  |  |  |  | DATA | 
| 85 |  |  |  |  |  |  | GetFail404Detail=> <<'DATA', | 
| 86 |  |  |  |  |  |  | [$pargs->{code},失败] 找不到文件 | 
| 87 |  |  |  |  |  |  | >>>>请求 | 
| 88 |  |  |  |  |  |  | $pargs->{req_content}<<<<响应 | 
| 89 |  |  |  |  |  |  | $pargs->{status_line} | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | DATA | 
| 92 |  |  |  |  |  |  | GetFailRetries	=> <<'DATA', | 
| 93 |  |  |  |  |  |  | [$pargs->{code},失败] 重试太多,放弃 | 
| 94 |  |  |  |  |  |  | $pargs->{url_real} | 
| 95 |  |  |  |  |  |  | DATA | 
| 96 |  |  |  |  |  |  | GetFailRetriesDetail	=> <<'DATA', | 
| 97 |  |  |  |  |  |  | [$pargs->{code},失败] 重试太多,放弃 | 
| 98 |  |  |  |  |  |  | >>>>请求 | 
| 99 |  |  |  |  |  |  | $pargs->{req_content}<<<<响应 | 
| 100 |  |  |  |  |  |  | $pargs->{status_line} | 
| 101 |  |  |  |  |  |  | $pargs->{res_content} | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | DATA | 
| 104 |  |  |  |  |  |  | GetURLSuccess	=> '$pargs->{len_KB} ', | 
| 105 |  |  |  |  |  |  | GetURLRetry		=> '[$pargs->{code},重试] ', | 
| 106 |  |  |  |  |  |  | GetWait			=> '等待..', | 
| 107 |  |  |  |  |  |  | SkipMaxLevel	=> '[跳过]层数>$self->{book_max_levels}'.$skip_info, | 
| 108 |  |  |  |  |  |  | SkipMedia		=> '[跳过]媒体文件'.$skip_info, | 
| 109 |  |  |  |  |  |  | SkipTitleEmpty	=> '[跳过]标题为空'.$skip_info, | 
| 110 |  |  |  |  |  |  | SkipUrlEmpty	=> '[跳过]地址为空'."\n", | 
| 111 |  |  |  |  |  |  | SkipVisited		=> '[跳过]已访问过'."\n", | 
| 112 |  |  |  |  |  |  | SkipZip			=> '[跳过]压缩文件'.$skip_info, | 
| 113 |  |  |  |  |  |  | }; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 117 |  |  |  |  |  |  | # patterns | 
| 118 |  |  |  |  |  |  | #------------------------------------------------------------- | 
| 119 |  |  |  |  |  |  | sub getpattern_space2_data { | 
| 120 | 7 |  |  | 7 | 0 | 72 | <<'DATA'; | 
| 121 |  |  |  |  |  |  | [ ] | 
| 122 |  |  |  |  |  |  | DATA | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  | sub getpattern_line_head_data { | 
| 125 | 7 |  |  | 7 | 0 | 23 | '  '; | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  | sub getpattern_parentheses_data { | 
| 128 | 7 |  |  | 7 | 0 | 37 | shift->SUPER::getpattern_parentheses_data().<<'DATA'; | 
| 129 |  |  |  |  |  |  | 〃 〃 | 
| 130 |  |  |  |  |  |  | ‘ ’ | 
| 131 |  |  |  |  |  |  | “ ” | 
| 132 |  |  |  |  |  |  | 〔 〕 | 
| 133 |  |  |  |  |  |  | 〈 〉 | 
| 134 |  |  |  |  |  |  | 《 》 | 
| 135 |  |  |  |  |  |  | 「 」 | 
| 136 |  |  |  |  |  |  | 『 』 | 
| 137 |  |  |  |  |  |  | 〖 〗 | 
| 138 |  |  |  |  |  |  | 【 】 | 
| 139 |  |  |  |  |  |  | ′ ′ | 
| 140 |  |  |  |  |  |  | ″ ″ | 
| 141 |  |  |  |  |  |  | " " | 
| 142 |  |  |  |  |  |  | ' ' | 
| 143 |  |  |  |  |  |  | ( ) | 
| 144 |  |  |  |  |  |  | < > | 
| 145 |  |  |  |  |  |  | [ ] | 
| 146 |  |  |  |  |  |  | ` ` | 
| 147 |  |  |  |  |  |  | ` ' | 
| 148 |  |  |  |  |  |  | { } | 
| 149 |  |  |  |  |  |  | ︵ ︶ | 
| 150 |  |  |  |  |  |  | ︹ ︺ | 
| 151 |  |  |  |  |  |  | ︿ ﹀ | 
| 152 |  |  |  |  |  |  | ︽ ︾ | 
| 153 |  |  |  |  |  |  | ﹁ ﹂ | 
| 154 |  |  |  |  |  |  | ﹃ ﹄ | 
| 155 |  |  |  |  |  |  | ︻ ︼ | 
| 156 |  |  |  |  |  |  | ︷ ︸ | 
| 157 |  |  |  |  |  |  | ˋ ˊ | 
| 158 |  |  |  |  |  |  | ‵ ‵ | 
| 159 |  |  |  |  |  |  | 〝 〞 | 
| 160 |  |  |  |  |  |  | ﹙ ﹚ | 
| 161 |  |  |  |  |  |  | ﹛ ﹜ | 
| 162 |  |  |  |  |  |  | ﹝ ﹞ | 
| 163 |  |  |  |  |  |  | ﹤ ﹥ | 
| 164 |  |  |  |  |  |  | DATA | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | sub getpattern_mark_dash_data { | 
| 167 | 7 |  |  | 7 | 0 | 20 | <<'DATA'; | 
| 168 |  |  |  |  |  |  | [#-&\*\+\-=@_~ˉ—~‖…×÷∷⊙≡≈∽∞$¤¢‰§#%&*+-=@_|–―‥∣¦‐ー─-♂〇〓※︱-︴﹉-﹏﹡﹢﹣﹦﹩﹪﹫] | 
| 169 |  |  |  |  |  |  | DATA | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  | sub getpattern_mark_wordsplit_data { | 
| 172 | 7 |  |  | 7 | 0 | 24 | <<'DATA'; | 
| 173 |  |  |  |  |  |  | [\.\,\?\!\:\;∶、。·!,.:;?︰﹐﹑﹒﹔﹕﹖﹗] | 
| 174 |  |  |  |  |  |  | DATA | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  | sub getpattern_word_finish_data { | 
| 177 | 7 |  |  | 7 | 0 | 22 | <<'DATA'; | 
| 178 |  |  |  |  |  |  | (?:全[文书]|)[完终] | 
| 179 |  |  |  |  |  |  | DATA | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  | sub getpattern_remove_line_by_end_data { | 
| 182 | 7 |  |  | 7 | 0 | 23 | <<'DATA'; | 
| 183 |  |  |  |  |  |  | (case) | 
| 184 |  |  |  |  |  |  | [报网社讯] | 
| 185 |  |  |  |  |  |  | [连重排整出提推扫校较编书世视文科在讨小工转][学幻论作]?(?:[载贴排版理品供出入校较描正对者屋库城路界苑线区组室]|海洋|望远镜|桃花源|-K12)(?:完成|) | 
| 186 |  |  |  |  |  |  | 请(?:申请授权|保留站台信息)[。.﹒\.!﹗]? | 
| 187 |  |  |  |  |  |  | 制作 | 
| 188 |  |  |  |  |  |  | [OoOo][CcCc][RrRr] | 
| 189 |  |  |  |  |  |  | 采编中心 | 
| 190 |  |  |  |  |  |  | 亦凡公益图书馆 | 
| 191 |  |  |  |  |  |  | 龙的天空 | 
| 192 |  |  |  |  |  |  | 失落的星辰 | 
| 193 |  |  |  |  |  |  | 书香门第 | 
| 194 |  |  |  |  |  |  | 旧雨楼 | 
| 195 |  |  |  |  |  |  | 一剑小天下 | 
| 196 |  |  |  |  |  |  | 竹露荷风 | 
| 197 |  |  |  |  |  |  | 扬剑轩居士 | 
| 198 |  |  |  |  |  |  | 幻想时代 | 
| 199 |  |  |  |  |  |  | 冒险者天堂 | 
| 200 |  |  |  |  |  |  | 信息中心 | 
| 201 |  |  |  |  |  |  | cnread[\.。.·﹒]net | 
| 202 |  |  |  |  |  |  | ezla[\.。.·﹒]com?[\.。.·﹒]tw | 
| 203 |  |  |  |  |  |  | thebook[\.。.·﹒]yeah[\.。.·﹒]net | 
| 204 |  |  |  |  |  |  | y(?:esho[\.。.·﹒]com/wenxue|uzispy[\.。.·﹒]yeah[\.。.·﹒]net) | 
| 205 |  |  |  |  |  |  | www[\.。.·﹒](?:v-war|oldrain)[\.。.·﹒](?:net|com) | 
| 206 |  |  |  |  |  |  | DATA | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  | sub getpattern_remove_line_by_end_special_data { | 
| 209 | 7 |  |  | 7 | 0 | 26 | <<'DATA'; | 
| 210 |  |  |  |  |  |  | 报网社讯 | 
| 211 |  |  |  |  |  |  | DATA | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | 1; | 
| 215 |  |  |  |  |  |  | __END__ |