|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::JA::Gairaigo::Fuzzy;  | 
| 
2
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
12407
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
3
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require Exporter;  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw(Exporter);  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = qw/same_gairaigo/;  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %EXPORT_TAGS = (  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     all => \@EXPORT_OK,  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.07';  | 
| 
11
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
491
 | 
 use utf8;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
405
 | 
 use Text::Fuzzy 'fuzzy_index';  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
906
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    | 
| 
14
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
481
 | 
 use Lingua::JA::Moji 'kana2romaji';  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26864
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
826
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub same_gairaigo  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
18
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
62
 | 
     my ($kana, $n, $debug) = @_;  | 
| 
19
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     if ($kana eq $n) {  | 
| 
20
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return 1;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
22
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     if (chouon ($kana, $n)) {  | 
| 
23
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	my $gotcha = usual_suspect ($kana, $n, $debug);  | 
| 
24
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	if ($gotcha) {  | 
| 
25
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	    return 1;  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
28
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return undef;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Check a few likely things  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub usual_suspect  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
35
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
0
  
 | 
11
 | 
     my ($kana, $n, $debug) = @_;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # The following is an undocumented routine in Text::Fuzzy.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my ($dist, $edits) = fuzzy_index ($kana, $n, 1);  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Is this a likely candidate?  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4147
 | 
     my $gotcha;  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     if ($edits =~ /ii|dd|rr/) {  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# A double delete, double insertion, or double replace means  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# this is unlikely to be the same word.  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if ($debug) {  | 
| 
51
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    print "$kana has double delete, insert, or replace; rejecting.\n";  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	return undef;  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
56
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my @kana = split //, $kana;  | 
| 
57
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my @nkana = split //, $n;  | 
| 
58
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     my @edits = split //, $edits;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     if ($debug) {  | 
| 
61
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	printf ("Lengths of a and b: %d %d\n", scalar (@kana), scalar (@nkana));  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # $i is the offset in @kana, and $j is the offset in @nkana. Note  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # that @kana and @nkana may have different lengths and the offsets  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # are adjusted as we look though what edit is necessary to change  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # "$kana" to "$n".  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $i = 0;  | 
| 
70
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $j = 0;  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     for my $edit (@edits) {  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
62
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
60
 | 
 	if ($debug) {  | 
| 
75
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    print "Offsets i = $i, j = $j, edit = $edit\n";  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
77
 | 
62
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
150
 | 
 	if ($edit eq 'r') {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # Replaced $k with $q.  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	    my $k = $kana[$i];  | 
| 
82
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	    my $q = $nkana[$j];  | 
| 
83
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	    if ($debug) {  | 
| 
84
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		print "Replace $k with $q\n";  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
86
 | 
6
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
38
 | 
 	    if ($k =~ /[ーィイ]/ && $q =~ /[ーィイ]/) {  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Check whether the previous kana ends in "e", so it  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# is something like "ヘイ" and "ヘー".  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		if (ends_in_e (\@kana, $i)) {  | 
| 
92
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		    $gotcha = 1;  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
94
 | 
3
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
20
 | 
 		if (($k eq 'ー' && $q eq 'イ') ||  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    ($q eq 'ー' && $k eq 'イ')) {  | 
| 
96
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		    if (ends_in_i (\@kana, $i)) {  | 
| 
97
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 			$gotcha = 1;  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
101
 | 
6
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
25
 | 
 	    if ($k =~ /[ーッ]/ && $q =~ /[ーッ]/) {  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# A chouon has been replaced with a sokuon, or  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# vice-versa.  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 		$gotcha = 1;  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
108
 | 
6
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
30
 | 
 	    if (($k eq 'ー' && $q eq 'ウ') ||  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		($q eq 'ー' && $k eq 'ウ')) {  | 
| 
110
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		if (ends_in_ou (\@kana, $i)) {  | 
| 
111
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 		    $gotcha = 1;  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # Whatever we had, increment $i and $j equally because a  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # character was replaced.  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	    $i++;  | 
| 
119
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	    $j++;  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($edit eq 'd') {  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # Character $k was deleted from $kana to get $n, so we  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # just increment $i.  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	    my $k = $kana[$i];  | 
| 
127
 | 
5
 | 
  
 50
  
 | 
  
100
  
 | 
 
 | 
 
 | 
26
 | 
 	    if ($k eq 'ー' || $k eq '・' || $k eq 'ッ') {  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# A chouon, nakaguro, or sokuon was deleted from $kana  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# to get $n.  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$gotcha = 1;  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # Check we are not at the end of the string.  | 
| 
135
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	    if ($j < scalar (@kana)) {  | 
| 
136
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $q = $kana[$j];  | 
| 
137
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		if ($q =~ /[ーィイ]/) {  | 
| 
138
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		    if (ends_in_e (\@kana, $i)) {  | 
| 
139
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$gotcha = 1;  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
143
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	    $i++;  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($edit eq 'i') {  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # Character $k was inserted into $n, so we just increment  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # $j, not $i.  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    my $k = $nkana[$j];  | 
| 
151
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	    if ($k eq 'ー' || $k eq '・' || $k eq 'ッ') {  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# A chouon, nakaguro, or sokuon was inserted into  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# $kana to get $n.  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$gotcha = 1;  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
158
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $j++;  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($edit eq 'k') {  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # The two strings are the same at this point, so do not do  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # any checking but just increment the offsets.  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	    $i++;  | 
| 
166
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	    $j++;  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Check we did not make a mistake scanning the two strings.  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if ($i != scalar @kana) {  | 
| 
173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	warn "Bug: Mismatch $i";  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
175
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     if ($j != scalar @nkana) {  | 
| 
176
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	warn "Bug: Mismatch $j";  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
178
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     return $gotcha;  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Work out whether the kana before the one at $i ends in "e".  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ends_in_e  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
185
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
0
  
 | 
17
 | 
     my ($kana_ref, $i) = @_;  | 
| 
186
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     my $prev;  | 
| 
187
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     if ($i >= 1) {  | 
| 
188
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$prev = $kana_ref->[$i - 1];  | 
| 
189
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	$prev = kana2romaji ($prev);  | 
| 
190
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1966
 | 
 	if ($prev =~ /e$/) {  | 
| 
191
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	    return 1;  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
194
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return undef;  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Work out whether the kana before the one at $i ends in "ou".  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ends_in_ou  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
201
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
3
 | 
     my ($kana_ref, $i) = @_;  | 
| 
202
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     my $prev;  | 
| 
203
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     if ($i >= 1) {  | 
| 
204
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$prev = $kana_ref->[$i - 1];  | 
| 
205
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$prev = kana2romaji ($prev);  | 
| 
206
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
415
 | 
 	if ($prev =~ /[ou]$/) {  | 
| 
207
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	    return 1;  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
210
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return undef;  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Work out whether the kana before the one at $i ends in "e".  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ends_in_i  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
216
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
0
  
 | 
3
 | 
     my ($kana_ref, $i) = @_;  | 
| 
217
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $prev;  | 
| 
218
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     if ($i >= 1) {  | 
| 
219
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$prev = $kana_ref->[$i - 1];  | 
| 
220
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$prev = kana2romaji ($prev);  | 
| 
221
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
590
 | 
 	if ($prev =~ /i$/) {  | 
| 
222
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	    return 1;  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
225
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return undef;  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Work out whether $x and $y differ in the ways we expect.  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The name "chouon" is a misnomer.  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub chouon  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
233
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
  
0
  
 | 
11
 | 
     my ($x, $y) = @_;  | 
| 
234
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my %xa = alph ($x);  | 
| 
235
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my %ya = alph ($y);  | 
| 
236
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     my $found;  | 
| 
237
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     my $mismatch = check (\%xa, \%ya, \$found);  | 
| 
238
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     if ($mismatch) {  | 
| 
239
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return undef;  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
241
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     $mismatch = check (\%ya, \%xa, \$found);  | 
| 
242
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     if ($mismatch) {  | 
| 
243
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return undef;  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
245
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     if ($found) {  | 
| 
246
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	return 1;  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
248
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return undef;  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Given a word $x, make an alphabet of its consituent letters.  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub alph  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
255
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
  
0
  
 | 
18
 | 
     my ($x) = @_;  | 
| 
256
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my %xa;  | 
| 
257
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     my @xl = split //, $x;  | 
| 
258
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
     @xa{@xl} = @xl;  | 
| 
259
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
98
 | 
     return %xa;  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Go through the keys of $ya, and check whether the keys which are not  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # in $xa are the right kind of keys.  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub check  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
267
 | 
23
 | 
 
 | 
 
 | 
  
23
  
 | 
  
0
  
 | 
18
 | 
     my ($xa, $ya, $found) = @_;  | 
| 
268
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $ok;  | 
| 
269
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     for my $k (keys %$ya) {  | 
| 
270
 | 
128
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
153
 | 
 	next if $xa->{$k};  | 
| 
271
 | 
18
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
98
 | 
 	if ($k eq 'ー' ||  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $k eq 'イ' ||  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $k eq 'ィ' ||  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $k eq '・' ||  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $k eq 'ッ' ||   | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $k eq 'ウ') {  | 
| 
277
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	    $ok = 1;  | 
| 
278
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	    next;  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
280
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return $k;  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
282
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     if ($ok) {  | 
| 
283
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	$$found = $ok;  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
285
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     return;  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |