line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ORDB::Unihan; |
2
|
|
|
|
|
|
|
{ |
3
|
|
|
|
|
|
|
$ORDB::Unihan::VERSION = '0.03'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# ABSTRACT: An ORM for the published Unihan database |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
26221
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
42
|
|
9
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
10
|
1
|
|
|
1
|
|
5
|
use Carp (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
11
|
1
|
|
|
1
|
|
13
|
use File::Spec 0.80 (); |
|
1
|
|
|
|
|
35
|
|
|
1
|
|
|
|
|
22
|
|
12
|
1
|
|
|
1
|
|
8
|
use File::Path 2.04 (); |
|
1
|
|
|
|
|
20
|
|
|
1
|
|
|
|
|
31
|
|
13
|
1
|
|
|
1
|
|
252153
|
use File::Remove 1.42 (); |
|
1
|
|
|
|
|
2615
|
|
|
1
|
|
|
|
|
31
|
|
14
|
1
|
|
|
1
|
|
74555
|
use File::HomeDir 0.69 (); |
|
1
|
|
|
|
|
11204
|
|
|
1
|
|
|
|
|
34
|
|
15
|
1
|
|
|
1
|
|
1427
|
use LWP::Online (); |
|
1
|
|
|
|
|
129095
|
|
|
1
|
|
|
|
|
30
|
|
16
|
1
|
|
|
1
|
|
1242
|
use Params::Util 0.33 qw{ _STRING _NONNEGINT _HASH }; |
|
1
|
|
|
|
|
4526
|
|
|
1
|
|
|
|
|
80
|
|
17
|
1
|
|
|
1
|
|
8263
|
use DBI; |
|
1
|
|
|
|
|
47315
|
|
|
1
|
|
|
|
|
88
|
|
18
|
1
|
|
|
1
|
|
1257
|
use ORLite 1.22 (); |
|
1
|
|
|
|
|
29443
|
|
|
1
|
|
|
|
|
35
|
|
19
|
1
|
|
|
1
|
|
16
|
use vars qw{@ISA}; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
191
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
BEGIN { |
22
|
1
|
|
|
1
|
|
1837
|
@ISA = 'ORLite'; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $url = 'http://www.unicode.org/Public/UNIDATA/Unihan.zip'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub dir { |
28
|
0
|
0
|
|
0
|
0
|
|
File::Spec->catdir( File::HomeDir->my_data, |
29
|
|
|
|
|
|
|
( $^O eq 'MSWin32' ? 'Perl' : '.perl' ), |
30
|
|
|
|
|
|
|
'ORDB-Unihan', ); |
31
|
|
|
|
|
|
|
} |
32
|
0
|
|
|
0
|
1
|
|
sub sqlite_path { File::Spec->catfile( dir(), 'Unihan.sqlite' ) } |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub import { |
35
|
0
|
|
|
0
|
|
|
my $self = shift; |
36
|
0
|
|
0
|
|
|
|
my $class = ref $self || $self; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Check for debug mode |
39
|
0
|
|
|
|
|
|
my $DEBUG = 0; |
40
|
0
|
0
|
0
|
|
|
|
if ( scalar @_ and defined _STRING( $_[-1] ) and $_[-1] eq '-DEBUG' ) { |
|
|
|
0
|
|
|
|
|
41
|
0
|
|
|
|
|
|
$DEBUG = 1; |
42
|
0
|
|
|
|
|
|
pop @_; |
43
|
|
|
|
|
|
|
} |
44
|
0
|
|
|
|
|
|
my %params; |
45
|
0
|
0
|
|
|
|
|
if ( _HASH( $_[0] ) ) { |
46
|
0
|
|
|
|
|
|
%params = %{ $_[0] }; |
|
0
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
else { |
49
|
0
|
|
|
|
|
|
%params = @_; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# where we save .sqlite to? |
53
|
|
|
|
|
|
|
# Determine the database directory |
54
|
0
|
|
|
|
|
|
my $dir = dir(); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# Create it if needed |
57
|
0
|
0
|
|
|
|
|
unless ( -e $dir ) { |
58
|
0
|
|
|
|
|
|
File::Path::mkpath( $dir, { verbose => 0 } ); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Determine the mirror database file |
62
|
0
|
|
|
|
|
|
my $db = sqlite_path(); |
63
|
0
|
|
|
|
|
|
my $zip_path = File::Spec->catfile( $dir, 'Unihan.zip' ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Create the default useragent |
66
|
0
|
|
|
|
|
|
my $show_progress = $DEBUG; |
67
|
0
|
|
|
|
|
|
my $useragent = delete $params{useragent}; |
68
|
0
|
0
|
|
|
|
|
unless ($useragent) { |
69
|
0
|
|
|
|
|
|
$useragent = LWP::UserAgent->new( |
70
|
|
|
|
|
|
|
timeout => 30, |
71
|
|
|
|
|
|
|
show_progress => $show_progress, |
72
|
|
|
|
|
|
|
); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Do we need refecth? |
76
|
0
|
|
|
|
|
|
my $need_refetch = 1; |
77
|
|
|
|
|
|
|
{ |
78
|
0
|
|
|
|
|
|
my $last_mod_file = File::Spec->catfile( $dir, 'last_mod.txt' ); |
|
0
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
my $last_mod_local = 'N/A'; |
80
|
0
|
0
|
|
|
|
|
if ( open( my $fh, '<', $last_mod_file ) ) { |
81
|
0
|
|
|
|
|
|
flock( $fh, 1 ); |
82
|
0
|
|
|
|
|
|
$last_mod_local = <$fh>; |
83
|
0
|
|
|
|
|
|
chomp($last_mod_local); |
84
|
0
|
|
0
|
|
|
|
$last_mod_local ||= 0; |
85
|
0
|
|
|
|
|
|
close($fh); |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
my $res = $useragent->head($url); |
89
|
0
|
|
|
|
|
|
my $last_mod = $res->header('last-modified'); |
90
|
0
|
0
|
|
|
|
|
if ( $last_mod_local eq $last_mod ) { |
91
|
0
|
|
|
|
|
|
$need_refetch = 0; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
else { |
94
|
0
|
0
|
|
|
|
|
print STDERR |
95
|
|
|
|
|
|
|
"Unihan.zip last-modified $last_mod, we have $last_mod_local\n" |
96
|
|
|
|
|
|
|
if $DEBUG; |
97
|
0
|
|
|
|
|
|
open( my $fh, '>', $last_mod_file ); |
98
|
0
|
|
|
|
|
|
flock( $fh, 2 ); |
99
|
0
|
|
|
|
|
|
print $fh $last_mod; |
100
|
0
|
|
|
|
|
|
close($fh); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
my $online = LWP::Online::online(); |
105
|
0
|
0
|
0
|
|
|
|
unless ( $online or -f $db ) { |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Don't have the file and can't get it |
108
|
0
|
|
|
|
|
|
Carp::croak("Cannot fetch database without an internet connection"); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# refetch the .zip |
112
|
0
|
|
|
|
|
|
my $regenerated_sqlite = 0; |
113
|
0
|
0
|
0
|
|
|
|
if ( $need_refetch or !-e $zip_path ) { |
114
|
0
|
0
|
|
|
|
|
print STDERR "Mirror $url to $zip_path\n" if $DEBUG; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Fetch the archive |
117
|
0
|
|
|
|
|
|
my $response = $useragent->mirror( $url => $zip_path ); |
118
|
0
|
0
|
0
|
|
|
|
unless ( $response->is_success or $response->code == 304 ) { |
119
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to fetch $url"); |
120
|
|
|
|
|
|
|
} |
121
|
0
|
|
|
|
|
|
$regenerated_sqlite = 1; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# Extract .txt file |
125
|
0
|
|
|
|
|
|
my $old_txt_file = File::Spec->catfile( $dir, 'Unihan.txt' ); |
126
|
0
|
0
|
|
|
|
|
unlink($old_txt_file) if -e $old_txt_file; |
127
|
0
|
|
|
|
|
|
my $txt_path = File::Spec->catfile( $dir, 'Unihan_Readings.txt' ); |
128
|
0
|
0
|
0
|
|
|
|
if ( $regenerated_sqlite or !-e $txt_path ) { |
129
|
0
|
0
|
|
|
|
|
print STDERR "Extract $zip_path to $dir\n" if $DEBUG; |
130
|
0
|
|
|
|
|
|
require Archive::Extract; |
131
|
0
|
|
|
|
|
|
my $ae = Archive::Extract->new( archive => $zip_path ); |
132
|
0
|
|
|
|
|
|
my $ok = $ae->extract( to => $dir ); |
133
|
0
|
0
|
|
|
|
|
unless ($ok) { |
134
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to read .zip"); |
135
|
|
|
|
|
|
|
} |
136
|
0
|
0
|
|
|
|
|
unless ( -e $txt_path ) { |
137
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to extract .zip"); |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# regenerate the .sqlite |
142
|
0
|
0
|
0
|
|
|
|
if ( $regenerated_sqlite or !-e $db ) { |
143
|
0
|
|
|
|
|
|
unlink($db); |
144
|
0
|
|
|
|
|
|
my $dbh = DBI->connect( |
145
|
|
|
|
|
|
|
"DBI:SQLite:$db", |
146
|
|
|
|
|
|
|
undef, undef, |
147
|
|
|
|
|
|
|
{ |
148
|
|
|
|
|
|
|
RaiseError => 1, |
149
|
|
|
|
|
|
|
PrintError => 1, |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
); |
152
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA synchronous=OFF'); |
153
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA count_changes=OFF'); |
154
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA journal_mode=MEMORY'); |
155
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA temp_store=MEMORY'); |
156
|
0
|
|
|
|
|
|
$dbh->do(<<'SQL'); |
157
|
|
|
|
|
|
|
CREATE TABLE unihan ( |
158
|
|
|
|
|
|
|
"hex" CHAR(5) NOT NULL, |
159
|
|
|
|
|
|
|
"type" VARCHAR(18) NOT NULL, |
160
|
|
|
|
|
|
|
"val" VARCHAR(255), |
161
|
|
|
|
|
|
|
PRIMARY KEY ("hex", "type") |
162
|
|
|
|
|
|
|
) |
163
|
|
|
|
|
|
|
SQL |
164
|
0
|
|
|
|
|
|
my $sql = |
165
|
|
|
|
|
|
|
'INSERT INTO "unihan" ("hex", "type", "val") VALUES (?, ?, ?)'; |
166
|
0
|
|
|
|
|
|
my $sth = $dbh->prepare($sql); |
167
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
opendir( my $fdir, $dir ); |
169
|
0
|
|
|
|
|
|
my @files = grep { /.txt$/ } readdir($fdir); |
|
0
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
closedir($fdir); |
171
|
0
|
|
|
|
|
|
foreach my $file (@files) { |
172
|
0
|
0
|
|
|
|
|
next if $file eq 'last_mod.txt'; |
173
|
0
|
0
|
|
|
|
|
print STDERR "Populate $dir/$file\n" if $DEBUG; |
174
|
0
|
|
|
|
|
|
open( my $fh, '<:utf8', "$dir/$file" ); |
175
|
0
|
|
|
|
|
|
flock( $fh, 1 ); |
176
|
0
|
|
|
|
|
|
while ( my $line = <$fh> ) { |
177
|
0
|
0
|
|
|
|
|
next if ( $line =~ /^\#/ ); # comment line |
178
|
0
|
0
|
|
|
|
|
next if ( $line =~ /^\s+$/ ); # blank line |
179
|
0
|
|
|
|
|
|
chomp($line); |
180
|
0
|
|
|
|
|
|
my ( $hex, $type, $val ) = split( /\t/, $line, 3 ); |
181
|
0
|
|
|
|
|
|
$hex =~ s/^U\+//; |
182
|
0
|
|
|
|
|
|
$type =~ s/^k//; |
183
|
0
|
|
|
|
|
|
$val =~ s/(^\s|\s+)//g; |
184
|
0
|
0
|
|
|
|
|
$sth->execute( $hex, $type, $val ) |
185
|
|
|
|
|
|
|
or die "$dbh:errstr $type, $hex, $val"; |
186
|
|
|
|
|
|
|
} |
187
|
0
|
|
|
|
|
|
close($fh); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
$params{file} = $db; |
192
|
0
|
|
|
|
|
|
$params{readonly} = 1; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Hand off to the main ORLite class. |
195
|
0
|
0
|
|
|
|
|
$class->SUPER::import( \%params, $DEBUG ? '-DEBUG' : () ); |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1; |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
__END__ |