| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package AozoraBunko::Checkerkun; | 
| 2 |  |  |  |  |  |  | our $VERSION = "0.10"; | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 7 |  |  | 7 |  | 10055 | use 5.008001; | 
|  | 7 |  |  |  |  | 23 |  | 
| 5 | 7 |  |  | 7 |  | 34 | use strict; | 
|  | 7 |  |  |  |  | 10 |  | 
|  | 7 |  |  |  |  | 143 |  | 
| 6 | 7 |  |  | 7 |  | 40 | use warnings; | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 163 |  | 
| 7 | 7 |  |  | 7 |  | 970 | use utf8; | 
|  | 7 |  |  |  |  | 21 |  | 
|  | 7 |  |  |  |  | 34 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 7 |  |  | 7 |  | 163 | use Carp           qw//; | 
|  | 7 |  |  |  |  | 10 |  | 
|  | 7 |  |  |  |  | 156 |  | 
| 10 | 7 |  |  | 7 |  | 5420 | use File::ShareDir qw//; | 
|  | 7 |  |  |  |  | 42715 |  | 
|  | 7 |  |  |  |  | 156 |  | 
| 11 | 7 |  |  | 7 |  | 6402 | use YAML::Tiny     qw//; | 
|  | 7 |  |  |  |  | 40926 |  | 
|  | 7 |  |  |  |  | 194 |  | 
| 12 | 7 |  |  | 7 |  | 6154 | use Encode         qw//; | 
|  | 7 |  |  |  |  | 79200 |  | 
|  | 7 |  |  |  |  | 177 |  | 
| 13 | 7 |  |  | 7 |  | 5340 | use Lingua::JA::Halfwidth::Katakana; | 
|  | 7 |  |  |  |  | 1445 |  | 
|  | 7 |  |  |  |  | 9039 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | my $YAML_FILE = File::ShareDir::dist_file('AozoraBunko-Checkerkun', 'hiden_no_tare.yml'); | 
| 16 |  |  |  |  |  |  | my $YAML = YAML::Tiny->read($YAML_FILE)->[0]; | 
| 17 |  |  |  |  |  |  | my $ENC = Encode::find_encoding("Shift_JIS"); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | my %VALID_OUTPUT_FORMAT; | 
| 20 |  |  |  |  |  |  | @VALID_OUTPUT_FORMAT{qw/plaintext html/} = (); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # [78hosetsu_tekiyo] 78互換包摂の対象となる不要な外字注記をチェックする | 
| 23 |  |  |  |  |  |  | our $KUTENMEN_78HOSETSU_TEKIYO = $YAML->{'kutenmen_78hosetsu_tekiyo'}; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | # [hosetsu_tekiyo] 包摂の対象となる不要な外字注記をチェックする | 
| 26 |  |  |  |  |  |  | our $KUTENMEN_HOSETSU_TEKIYO = $YAML->{'kutenmen_hosetsu_tekiyo'}; | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | # 新JIS漢字で包摂基準の適用除外となる104字 | 
| 29 |  |  |  |  |  |  | our $JYOGAI = $YAML->{'jyogai'}; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | # 78互換文字 | 
| 32 |  |  |  |  |  |  | our $J78 = $YAML->{'j78'}; | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # 間違えやすい文字 | 
| 35 |  |  |  |  |  |  | # かとうかおりさんの「誤認識されやすい文字リスト」から | 
| 36 |  |  |  |  |  |  | # http://plaza.users.to/katokao/digipr/digipr_charlist.html | 
| 37 |  |  |  |  |  |  | our $GONIN1 = $YAML->{'gonin1'}; | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | # 誤認2 | 
| 40 |  |  |  |  |  |  | our $GONIN2 = $YAML->{'gonin2'}; | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | # 誤認3 | 
| 43 |  |  |  |  |  |  | # (砂場清隆さんの入力による) | 
| 44 |  |  |  |  |  |  | our $GONIN3 = $YAML->{'gonin3'}; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # 新字体・旧字体対応リスト | 
| 47 |  |  |  |  |  |  | our $KYUJI = $YAML->{'kyuji'}; | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | # 異体字 | 
| 50 |  |  |  |  |  |  | our $ITAIJI = $YAML->{'itaiji'}; | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | sub _default_options | 
| 53 |  |  |  |  |  |  | { | 
| 54 |  |  |  |  |  |  | return { | 
| 55 | 66 |  |  | 66 |  | 508 | 'gaiji'            => 1, # JIS外字をチェックする | 
| 56 |  |  |  |  |  |  | 'hansp'            => 1, # 半角スペースをチェックする | 
| 57 |  |  |  |  |  |  | 'hanpar'           => 1, # 半角カッコをチェックする | 
| 58 |  |  |  |  |  |  | 'zensp'            => 0, # 全角スペースをチェックする | 
| 59 |  |  |  |  |  |  | '78hosetsu_tekiyo' => 1, # 78互換包摂の対象となる不要な外字注記をチェックする | 
| 60 |  |  |  |  |  |  | 'hosetsu_tekiyo'   => 1, # 包摂の対象となる不要な外字注記をチェックする | 
| 61 |  |  |  |  |  |  | '78'               => 0, # 78互換包摂29字をチェックする | 
| 62 |  |  |  |  |  |  | 'jyogai'           => 0, # 新JIS漢字で包摂規準の適用除外となる104字をチェックする | 
| 63 |  |  |  |  |  |  | 'gonin1'           => 0, # 誤認しやすい文字をチェックする(1) | 
| 64 |  |  |  |  |  |  | 'gonin2'           => 0, # 誤認しやすい文字をチェックする(2) | 
| 65 |  |  |  |  |  |  | 'gonin3'           => 0, # 誤認しやすい文字をチェックする(3) | 
| 66 |  |  |  |  |  |  | 'simplesp'         => 0, # 半角スペースは「_」で、全角スペースは「□」で出力する | 
| 67 |  |  |  |  |  |  | 'kouetsukun'       => 0, # 旧字体置換可能チェッカー「校閲君」を有効にする | 
| 68 |  |  |  |  |  |  | 'output_format'    => 'plaintext', # plaintext または html | 
| 69 |  |  |  |  |  |  | }; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub new | 
| 73 |  |  |  |  |  |  | { | 
| 74 | 66 |  |  | 66 | 1 | 45811 | my $class = shift; | 
| 75 | 66 | 100 |  |  |  | 222 | my %args  = (ref $_[0] eq 'HASH' ? %{$_[0]} : @_); | 
|  | 60 |  |  |  |  | 416 |  | 
| 76 |  |  |  |  |  |  |  | 
| 77 | 66 |  |  |  |  | 220 | my $options = $class->_default_options; | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 66 |  |  |  |  | 227 | for my $key (keys %args) | 
| 80 |  |  |  |  |  |  | { | 
| 81 | 859 | 100 |  |  |  | 1453 | if ( ! exists $options->{$key} ) { Carp::croak "Unknown option: '$key'"; } | 
|  | 2 |  |  |  |  | 392 |  | 
| 82 |  |  |  |  |  |  | else | 
| 83 |  |  |  |  |  |  | { | 
| 84 | 857 | 100 |  |  |  | 1560 | if ($key eq 'output_format') | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 62 | 100 |  |  |  | 272 | Carp::croak "Output format option must be 'plaintext' or 'html'" unless exists $VALID_OUTPUT_FORMAT{ $args{$key} }; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 856 |  |  |  |  | 1405 | $options->{$key} = $args{$key}; | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 63 |  |  |  |  | 327 | bless $options, $class; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | sub _tag_html | 
| 97 |  |  |  |  |  |  | { | 
| 98 | 4052 |  |  | 4052 |  | 7134 | my ($plaintext, $tag_name, $msg) = @_; | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 4052 | 100 |  |  |  | 7425 | return qq|$plaintext| unless defined $msg; | 
| 101 | 4044 |  |  |  |  | 18787 | return qq|$plaintext|; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | # 例: | 
| 105 |  |  |  |  |  |  | # | 
| 106 |  |  |  |  |  |  | # ※[#「口+亞」、第3水準1-15-8、144-上-9] | 
| 107 |  |  |  |  |  |  | # が | 
| 108 |  |  |  |  |  |  | # ※[#「口+亞」、第3水準1-15-8、144-上-9] → [78hosetsu_tekiyo]【唖】 | 
| 109 |  |  |  |  |  |  | # に変換され、 | 
| 110 |  |  |  |  |  |  | # | 
| 111 |  |  |  |  |  |  | # ※[#「にんべん+曾」、第3水準1-14-41、144-上-9] | 
| 112 |  |  |  |  |  |  | # が | 
| 113 |  |  |  |  |  |  | # ※[#「にんべん+曾」、第3水準1-14-41、144-上-9]→[hosetsu_tekiyo]【僧】 | 
| 114 |  |  |  |  |  |  | # に変換される。 | 
| 115 |  |  |  |  |  |  | # | 
| 116 |  |  |  |  |  |  | sub _check_all_hosetsu_tekiyo | 
| 117 |  |  |  |  |  |  | { | 
| 118 | 4008 |  |  | 4008 |  | 5568 | my ($self, $chars_ref, $index) = @_; | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 4008 |  |  |  |  | 4334 | my ($replace, $usedlen); | 
| 121 |  |  |  |  |  |  |  | 
| 122 | 4008 |  |  |  |  | 5174 | my $rear_index = $index + 80; | 
| 123 | 4008 | 100 |  |  |  | 4307 | $rear_index = $#{$chars_ref} if $rear_index > $#{$chars_ref}; | 
|  | 14 |  |  |  |  | 21 |  | 
|  | 4008 |  |  |  |  | 9039 |  | 
| 124 |  |  |  |  |  |  |  | 
| 125 | 4008 | 50 |  |  |  | 10620 | if ( join("", @{$chars_ref}[$index .. $rear_index]) =~ /^(※[#.*?水準(\d+\-\d+\-\d+).*?])/ ) | 
|  | 4008 |  |  |  |  | 41970 |  | 
| 126 |  |  |  |  |  |  | { | 
| 127 | 4008 |  |  |  |  | 9074 | my ($match, $kutenmen) = ($1, $2); | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 4008 | 100 | 66 |  |  | 19963 | if ( $self->{'78hosetsu_tekiyo'} && exists $KUTENMEN_78HOSETSU_TEKIYO->{$kutenmen} ) | 
|  |  | 50 | 33 |  |  |  |  | 
| 130 |  |  |  |  |  |  | { | 
| 131 | 2004 | 100 |  |  |  | 5090 | if ($self->{'output_format'} eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | { | 
| 133 | 1002 |  |  |  |  | 2417 | $replace = "$match→[78hosetsu_tekiyo]【$KUTENMEN_78HOSETSU_TEKIYO->{$kutenmen}】"; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  | elsif ($self->{'output_format'} eq 'html') | 
| 136 |  |  |  |  |  |  | { | 
| 137 | 1002 |  |  |  |  | 2296 | $replace = _tag_html($match, '78hosetsuTekiyo', $KUTENMEN_78HOSETSU_TEKIYO->{$kutenmen}); | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 2004 |  |  |  |  | 4737 | $usedlen = length $match; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | elsif ( $self->{'hosetsu_tekiyo'} && exists $KUTENMEN_HOSETSU_TEKIYO->{$kutenmen} ) | 
| 143 |  |  |  |  |  |  | { | 
| 144 | 2004 | 100 |  |  |  | 4941 | if ($self->{'output_format'} eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | { | 
| 146 | 1002 |  |  |  |  | 2541 | $replace = "$match→[hosetsu_tekiyo]【$KUTENMEN_HOSETSU_TEKIYO->{$kutenmen}】"; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  | elsif ($self->{'output_format'} eq 'html') | 
| 149 |  |  |  |  |  |  | { | 
| 150 | 1002 |  |  |  |  | 2550 | $replace = _tag_html($match, 'hosetsuTekiyo', $KUTENMEN_HOSETSU_TEKIYO->{$kutenmen}); | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 2004 |  |  |  |  | 4735 | $usedlen = length $match; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  |  | 
| 157 | 4008 |  |  |  |  | 16432 | return ($replace, $usedlen); | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | sub _is_gaiji | 
| 161 |  |  |  |  |  |  | { | 
| 162 | 4016 |  |  | 4016 |  | 5315 | my $char = shift; # コピーしないと、encode のタイミングで元の文字が消失してしまう。 | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | # UTF-8からSJISに変換できなければ外字と判定 | 
| 165 | 4016 |  |  |  |  | 5045 | eval { $ENC->encode($char, Encode::FB_CROAK) }; | 
|  | 4016 |  |  |  |  | 27949 |  | 
| 166 | 4016 | 100 |  |  |  | 19786 | return 1 if $@; | 
| 167 | 12 |  |  |  |  | 42 | return 0; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub check | 
| 171 |  |  |  |  |  |  | { | 
| 172 | 59 |  |  | 59 | 1 | 263 | my ($self, $text) = @_; | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 59 | 100 |  |  |  | 199 | return undef unless defined $text; | 
| 175 |  |  |  |  |  |  |  | 
| 176 | 58 |  |  |  |  | 110 | my $output_format = $self->{'output_format'}; | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 58 |  |  |  |  | 18064 | my @chars = split(//, $text); | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 58 |  |  |  |  | 91 | my $checked_text = ''; | 
| 181 |  |  |  |  |  |  |  | 
| 182 | 58 |  |  |  |  | 172 | for (my $i = 0; $i < @chars; $i++) | 
| 183 |  |  |  |  |  |  | { | 
| 184 | 8668 |  |  |  |  | 12494 | my $char = $chars[$i]; | 
| 185 |  |  |  |  |  |  |  | 
| 186 | 8668 | 100 | 100 |  |  | 18120 | if ( $self->{simplesp} && ($char eq "\x{0020}" || $char eq "\x{3000}") ) | 
|  |  |  | 66 |  |  |  |  | 
| 187 |  |  |  |  |  |  | { | 
| 188 | 16 | 100 |  |  |  | 42 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | { | 
| 190 | 8 | 100 |  |  |  | 22 | if ($char eq "\x{0020}") { $checked_text .= '_';  } | 
|  | 4 | 50 |  |  |  | 10 |  | 
| 191 | 4 |  |  |  |  | 5 | elsif ($char eq "\x{3000}") { $checked_text .= '□'; } | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 194 |  |  |  |  |  |  | { | 
| 195 | 8 | 100 |  |  |  | 23 | if ($char eq "\x{0020}") { $checked_text .= _tag_html('_', 'simplesp');  } | 
|  | 4 | 50 |  |  |  | 7 |  | 
| 196 | 4 |  |  |  |  | 8 | elsif ($char eq "\x{3000}") { $checked_text .= _tag_html('□', 'simplesp'); } | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  |  | 
| 199 | 16 |  |  |  |  | 46 | next; | 
| 200 |  |  |  |  |  |  | } | 
| 201 |  |  |  |  |  |  |  | 
| 202 | 8652 | 100 | 100 |  |  | 83505 | if ($char =~ /[\x{0000}-\x{0009}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}]/) | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
| 203 |  |  |  |  |  |  | { | 
| 204 |  |  |  |  |  |  | # 改行は含まない | 
| 205 |  |  |  |  |  |  |  | 
| 206 | 8 | 100 |  |  |  | 25 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | { | 
| 208 | 4 |  |  |  |  | 32 | $checked_text .= $char . '[ctrl](' . sprintf("U+%04X", ord $char) . ')'; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 211 |  |  |  |  |  |  | { | 
| 212 | 4 |  |  |  |  | 23 | $checked_text .= _tag_html($char, 'ctrl', sprintf("U+%04X", ord $char)); | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  | } | 
| 215 | 7 |  |  | 7 |  | 98 | elsif ($char =~ /\p{InHalfwidthKatakana}/) | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 101 |  | 
| 216 |  |  |  |  |  |  | { | 
| 217 | 8 | 100 |  |  |  | 27 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | { | 
| 219 | 4 |  |  |  |  | 18 | $checked_text .= $char . '[hankata]'; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 222 |  |  |  |  |  |  | { | 
| 223 | 4 |  |  |  |  | 9 | $checked_text .= _tag_html($char, 'hankata', '半角カタカナ'); | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  | } | 
| 226 |  |  |  |  |  |  | elsif ($self->{'hansp'} && $char eq "\x{0020}") | 
| 227 |  |  |  |  |  |  | { | 
| 228 | 4 | 100 |  |  |  | 17 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | { | 
| 230 | 2 |  |  |  |  | 9 | $checked_text .= $char . '[hansp]'; | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 233 |  |  |  |  |  |  | { | 
| 234 | 2 |  |  |  |  | 5 | $checked_text .= _tag_html($char, 'hansp', '半角スペース'); | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  | elsif ($self->{'zensp'} && $char eq "\x{3000}") | 
| 238 |  |  |  |  |  |  | { | 
| 239 | 4 | 100 |  |  |  | 12 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | { | 
| 241 | 2 |  |  |  |  | 9 | $checked_text .= $char . '[zensp]'; | 
| 242 |  |  |  |  |  |  | } | 
| 243 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 244 |  |  |  |  |  |  | { | 
| 245 | 2 |  |  |  |  | 6 | $checked_text .= _tag_html($char, 'zensp', '全角スペース'); | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  | elsif ( $self->{hanpar} && ($char eq '(' || $char eq ')') ) | 
| 249 |  |  |  |  |  |  | { | 
| 250 | 8 | 100 |  |  |  | 24 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | { | 
| 252 | 4 |  |  |  |  | 15 | $checked_text .= $char . '[hanpar]'; | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 255 |  |  |  |  |  |  | { | 
| 256 | 4 |  |  |  |  | 7 | $checked_text .= _tag_html($char, 'hanpar', '半角括弧'); | 
| 257 |  |  |  |  |  |  | } | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  | elsif ( $char eq '※' && ($self->{'78hosetsu_tekiyo'} || $self->{'hosetsu_tekiyo'}) ) | 
| 260 |  |  |  |  |  |  | { | 
| 261 | 4008 |  |  |  |  | 9097 | my ($replace, $usedlen) = $self->_check_all_hosetsu_tekiyo(\@chars, $i); | 
| 262 |  |  |  |  |  |  |  | 
| 263 | 4008 | 50 |  |  |  | 8678 | if ($replace) | 
| 264 |  |  |  |  |  |  | { | 
| 265 | 4008 |  |  |  |  | 6390 | $checked_text .= $replace; | 
| 266 | 4008 |  |  |  |  | 5158 | $i += ($usedlen - 1); | 
| 267 | 4008 |  |  |  |  | 10702 | next; | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  | else | 
| 271 |  |  |  |  |  |  | { | 
| 272 |  |  |  |  |  |  | # 秘伝のタレによるチェック | 
| 273 |  |  |  |  |  |  | #  複数のタグに該当する文字でも↓のif文で真っ先にマッチした1つのタグしかつかないことに注意。 | 
| 274 |  |  |  |  |  |  | #  複数タグに対応してもいいが、複数タグに該当する文字は9字で、その9字のためにコードと出力結果を複雑化させるのも微妙なところ。 | 
| 275 |  |  |  |  |  |  | # | 
| 276 | 4612 | 100 | 66 |  |  | 60773 | if ($self->{'78'} && $J78->{$char}) | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 100 |  |  |  |  | 
| 277 |  |  |  |  |  |  | { | 
| 278 | 4 | 100 |  |  |  | 14 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | { | 
| 280 | 2 |  |  |  |  | 10 | $checked_text .= $char . '[78](' . $J78->{$char} . ')'; | 
| 281 |  |  |  |  |  |  | } | 
| 282 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 283 |  |  |  |  |  |  | { | 
| 284 | 2 |  |  |  |  | 6 | $checked_text .= _tag_html($char, '78', $J78->{$char}); | 
| 285 |  |  |  |  |  |  | } | 
| 286 |  |  |  |  |  |  | } | 
| 287 |  |  |  |  |  |  | elsif ($self->{'jyogai'} && $JYOGAI->{$char}) | 
| 288 |  |  |  |  |  |  | { | 
| 289 | 4 | 100 |  |  |  | 15 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | { | 
| 291 | 2 |  |  |  |  | 9 | $checked_text .= $char . '[jyogai]'; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 294 |  |  |  |  |  |  | { | 
| 295 | 2 |  |  |  |  | 4 | $checked_text .= _tag_html($char, 'jyogai', '新JIS漢字で包摂規準の適用除外となる'); | 
| 296 |  |  |  |  |  |  | } | 
| 297 |  |  |  |  |  |  | } | 
| 298 |  |  |  |  |  |  | elsif ($self->{'kouetsukun'} && $KYUJI->{$char}) | 
| 299 |  |  |  |  |  |  | { | 
| 300 | 8 | 100 |  |  |  | 20 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | { | 
| 302 | 4 |  |  |  |  | 18 | $checked_text .= "▼$char$KYUJI->{$char}▲"; | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 305 |  |  |  |  |  |  | { | 
| 306 | 4 |  |  |  |  | 9 | $checked_text .= _tag_html($char, 'kyuji', $KYUJI->{$char}); | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  | elsif ($self->{'kouetsukun'} && $ITAIJI->{$char}) | 
| 310 |  |  |  |  |  |  | { | 
| 311 | 4 | 100 |  |  |  | 10 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | { | 
| 313 | 2 |  |  |  |  | 10 | $checked_text .= "▼$char$ITAIJI->{$char}▲"; | 
| 314 |  |  |  |  |  |  | } | 
| 315 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 316 |  |  |  |  |  |  | { | 
| 317 | 2 |  |  |  |  | 5 | $checked_text .= _tag_html($char, 'itaiji', $ITAIJI->{$char}); | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  | } | 
| 320 |  |  |  |  |  |  | elsif ($self->{'gonin1'} && $GONIN1->{$char}) | 
| 321 |  |  |  |  |  |  | { | 
| 322 | 8 | 100 |  |  |  | 24 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | { | 
| 324 | 4 |  |  |  |  | 20 | $checked_text .= $char . '[gonin1](' . $GONIN1->{$char} . ')'; | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 327 |  |  |  |  |  |  | { | 
| 328 | 4 |  |  |  |  | 9 | $checked_text .= _tag_html($char, 'gonin1', $GONIN1->{$char}); | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  | } | 
| 331 |  |  |  |  |  |  | elsif ($self->{'gonin2'} && $GONIN2->{$char}) | 
| 332 |  |  |  |  |  |  | { | 
| 333 | 8 | 100 |  |  |  | 23 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | { | 
| 335 | 4 |  |  |  |  | 19 | $checked_text .= $char . '[gonin2](' . $GONIN2->{$char} . ')'; | 
| 336 |  |  |  |  |  |  | } | 
| 337 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 338 |  |  |  |  |  |  | { | 
| 339 | 4 |  |  |  |  | 9 | $checked_text .= _tag_html($char, 'gonin2', $GONIN2->{$char}); | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  | } | 
| 342 |  |  |  |  |  |  | elsif ($self->{'gonin3'} && $GONIN3->{$char}) | 
| 343 |  |  |  |  |  |  | { | 
| 344 | 8 | 100 |  |  |  | 25 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | { | 
| 346 | 4 |  |  |  |  | 19 | $checked_text .= $char . '[gonin3](' . $GONIN3->{$char} . ')'; | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 349 |  |  |  |  |  |  | { | 
| 350 | 4 |  |  |  |  | 10 | $checked_text .= _tag_html($char, 'gonin3', $GONIN3->{$char}); | 
| 351 |  |  |  |  |  |  | } | 
| 352 |  |  |  |  |  |  | } | 
| 353 |  |  |  |  |  |  | elsif ( $self->{'gaiji'} && _is_gaiji($char) ) | 
| 354 |  |  |  |  |  |  | { | 
| 355 |  |  |  |  |  |  | # 秘伝のタレに外字が含まれていないことがテストで保証されているのでこの位置で問題ない | 
| 356 |  |  |  |  |  |  | # コントロール文字に外字があるが、コントロール文字なら必ず 'ctrl' とタグ付けされるのでそれで良しとする。 | 
| 357 | 4004 | 100 |  |  |  | 8964 | if ($output_format eq 'plaintext') | 
|  |  | 50 |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | { | 
| 359 | 2002 |  |  |  |  | 7380 | $checked_text .= $char . '[gaiji]'; | 
| 360 |  |  |  |  |  |  | } | 
| 361 |  |  |  |  |  |  | elsif ($output_format eq 'html') | 
| 362 |  |  |  |  |  |  | { | 
| 363 | 2002 |  |  |  |  | 3564 | $checked_text .= _tag_html($char, 'gaiji', 'JIS外字'); | 
| 364 |  |  |  |  |  |  | } | 
| 365 |  |  |  |  |  |  | } | 
| 366 | 564 |  |  |  |  | 1575 | else { $checked_text .= $char; } | 
| 367 |  |  |  |  |  |  | } | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  |  | 
| 370 | 58 |  |  |  |  | 8167 | return $checked_text; | 
| 371 |  |  |  |  |  |  | } | 
| 372 |  |  |  |  |  |  |  | 
| 373 |  |  |  |  |  |  | 1; | 
| 374 |  |  |  |  |  |  |  | 
| 375 |  |  |  |  |  |  | __END__ |