| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ORDB::Unihan; |
|
2
|
|
|
|
|
|
|
{ |
|
3
|
|
|
|
|
|
|
$ORDB::Unihan::VERSION = '0.03'; |
|
4
|
|
|
|
|
|
|
} |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# ABSTRACT: An ORM for the published Unihan database |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
26221
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
42
|
|
|
9
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use Carp (); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
20
|
|
|
11
|
1
|
|
|
1
|
|
13
|
use File::Spec 0.80 (); |
|
|
1
|
|
|
|
|
35
|
|
|
|
1
|
|
|
|
|
22
|
|
|
12
|
1
|
|
|
1
|
|
8
|
use File::Path 2.04 (); |
|
|
1
|
|
|
|
|
20
|
|
|
|
1
|
|
|
|
|
31
|
|
|
13
|
1
|
|
|
1
|
|
252153
|
use File::Remove 1.42 (); |
|
|
1
|
|
|
|
|
2615
|
|
|
|
1
|
|
|
|
|
31
|
|
|
14
|
1
|
|
|
1
|
|
74555
|
use File::HomeDir 0.69 (); |
|
|
1
|
|
|
|
|
11204
|
|
|
|
1
|
|
|
|
|
34
|
|
|
15
|
1
|
|
|
1
|
|
1427
|
use LWP::Online (); |
|
|
1
|
|
|
|
|
129095
|
|
|
|
1
|
|
|
|
|
30
|
|
|
16
|
1
|
|
|
1
|
|
1242
|
use Params::Util 0.33 qw{ _STRING _NONNEGINT _HASH }; |
|
|
1
|
|
|
|
|
4526
|
|
|
|
1
|
|
|
|
|
80
|
|
|
17
|
1
|
|
|
1
|
|
8263
|
use DBI; |
|
|
1
|
|
|
|
|
47315
|
|
|
|
1
|
|
|
|
|
88
|
|
|
18
|
1
|
|
|
1
|
|
1257
|
use ORLite 1.22 (); |
|
|
1
|
|
|
|
|
29443
|
|
|
|
1
|
|
|
|
|
35
|
|
|
19
|
1
|
|
|
1
|
|
16
|
use vars qw{@ISA}; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
191
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
BEGIN { |
|
22
|
1
|
|
|
1
|
|
1837
|
@ISA = 'ORLite'; |
|
23
|
|
|
|
|
|
|
} |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $url = 'http://www.unicode.org/Public/UNIDATA/Unihan.zip'; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub dir { |
|
28
|
0
|
0
|
|
0
|
0
|
|
File::Spec->catdir( File::HomeDir->my_data, |
|
29
|
|
|
|
|
|
|
( $^O eq 'MSWin32' ? 'Perl' : '.perl' ), |
|
30
|
|
|
|
|
|
|
'ORDB-Unihan', ); |
|
31
|
|
|
|
|
|
|
} |
|
32
|
0
|
|
|
0
|
1
|
|
sub sqlite_path { File::Spec->catfile( dir(), 'Unihan.sqlite' ) } |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub import { |
|
35
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
36
|
0
|
|
0
|
|
|
|
my $class = ref $self || $self; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Check for debug mode |
|
39
|
0
|
|
|
|
|
|
my $DEBUG = 0; |
|
40
|
0
|
0
|
0
|
|
|
|
if ( scalar @_ and defined _STRING( $_[-1] ) and $_[-1] eq '-DEBUG' ) { |
|
|
|
|
0
|
|
|
|
|
|
41
|
0
|
|
|
|
|
|
$DEBUG = 1; |
|
42
|
0
|
|
|
|
|
|
pop @_; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
0
|
|
|
|
|
|
my %params; |
|
45
|
0
|
0
|
|
|
|
|
if ( _HASH( $_[0] ) ) { |
|
46
|
0
|
|
|
|
|
|
%params = %{ $_[0] }; |
|
|
0
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
else { |
|
49
|
0
|
|
|
|
|
|
%params = @_; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# where we save .sqlite to? |
|
53
|
|
|
|
|
|
|
# Determine the database directory |
|
54
|
0
|
|
|
|
|
|
my $dir = dir(); |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# Create it if needed |
|
57
|
0
|
0
|
|
|
|
|
unless ( -e $dir ) { |
|
58
|
0
|
|
|
|
|
|
File::Path::mkpath( $dir, { verbose => 0 } ); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Determine the mirror database file |
|
62
|
0
|
|
|
|
|
|
my $db = sqlite_path(); |
|
63
|
0
|
|
|
|
|
|
my $zip_path = File::Spec->catfile( $dir, 'Unihan.zip' ); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Create the default useragent |
|
66
|
0
|
|
|
|
|
|
my $show_progress = $DEBUG; |
|
67
|
0
|
|
|
|
|
|
my $useragent = delete $params{useragent}; |
|
68
|
0
|
0
|
|
|
|
|
unless ($useragent) { |
|
69
|
0
|
|
|
|
|
|
$useragent = LWP::UserAgent->new( |
|
70
|
|
|
|
|
|
|
timeout => 30, |
|
71
|
|
|
|
|
|
|
show_progress => $show_progress, |
|
72
|
|
|
|
|
|
|
); |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Do we need refecth? |
|
76
|
0
|
|
|
|
|
|
my $need_refetch = 1; |
|
77
|
|
|
|
|
|
|
{ |
|
78
|
0
|
|
|
|
|
|
my $last_mod_file = File::Spec->catfile( $dir, 'last_mod.txt' ); |
|
|
0
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
my $last_mod_local = 'N/A'; |
|
80
|
0
|
0
|
|
|
|
|
if ( open( my $fh, '<', $last_mod_file ) ) { |
|
81
|
0
|
|
|
|
|
|
flock( $fh, 1 ); |
|
82
|
0
|
|
|
|
|
|
$last_mod_local = <$fh>; |
|
83
|
0
|
|
|
|
|
|
chomp($last_mod_local); |
|
84
|
0
|
|
0
|
|
|
|
$last_mod_local ||= 0; |
|
85
|
0
|
|
|
|
|
|
close($fh); |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
my $res = $useragent->head($url); |
|
89
|
0
|
|
|
|
|
|
my $last_mod = $res->header('last-modified'); |
|
90
|
0
|
0
|
|
|
|
|
if ( $last_mod_local eq $last_mod ) { |
|
91
|
0
|
|
|
|
|
|
$need_refetch = 0; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
else { |
|
94
|
0
|
0
|
|
|
|
|
print STDERR |
|
95
|
|
|
|
|
|
|
"Unihan.zip last-modified $last_mod, we have $last_mod_local\n" |
|
96
|
|
|
|
|
|
|
if $DEBUG; |
|
97
|
0
|
|
|
|
|
|
open( my $fh, '>', $last_mod_file ); |
|
98
|
0
|
|
|
|
|
|
flock( $fh, 2 ); |
|
99
|
0
|
|
|
|
|
|
print $fh $last_mod; |
|
100
|
0
|
|
|
|
|
|
close($fh); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
my $online = LWP::Online::online(); |
|
105
|
0
|
0
|
0
|
|
|
|
unless ( $online or -f $db ) { |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Don't have the file and can't get it |
|
108
|
0
|
|
|
|
|
|
Carp::croak("Cannot fetch database without an internet connection"); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# refetch the .zip |
|
112
|
0
|
|
|
|
|
|
my $regenerated_sqlite = 0; |
|
113
|
0
|
0
|
0
|
|
|
|
if ( $need_refetch or !-e $zip_path ) { |
|
114
|
0
|
0
|
|
|
|
|
print STDERR "Mirror $url to $zip_path\n" if $DEBUG; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Fetch the archive |
|
117
|
0
|
|
|
|
|
|
my $response = $useragent->mirror( $url => $zip_path ); |
|
118
|
0
|
0
|
0
|
|
|
|
unless ( $response->is_success or $response->code == 304 ) { |
|
119
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to fetch $url"); |
|
120
|
|
|
|
|
|
|
} |
|
121
|
0
|
|
|
|
|
|
$regenerated_sqlite = 1; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# Extract .txt file |
|
125
|
0
|
|
|
|
|
|
my $old_txt_file = File::Spec->catfile( $dir, 'Unihan.txt' ); |
|
126
|
0
|
0
|
|
|
|
|
unlink($old_txt_file) if -e $old_txt_file; |
|
127
|
0
|
|
|
|
|
|
my $txt_path = File::Spec->catfile( $dir, 'Unihan_Readings.txt' ); |
|
128
|
0
|
0
|
0
|
|
|
|
if ( $regenerated_sqlite or !-e $txt_path ) { |
|
129
|
0
|
0
|
|
|
|
|
print STDERR "Extract $zip_path to $dir\n" if $DEBUG; |
|
130
|
0
|
|
|
|
|
|
require Archive::Extract; |
|
131
|
0
|
|
|
|
|
|
my $ae = Archive::Extract->new( archive => $zip_path ); |
|
132
|
0
|
|
|
|
|
|
my $ok = $ae->extract( to => $dir ); |
|
133
|
0
|
0
|
|
|
|
|
unless ($ok) { |
|
134
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to read .zip"); |
|
135
|
|
|
|
|
|
|
} |
|
136
|
0
|
0
|
|
|
|
|
unless ( -e $txt_path ) { |
|
137
|
0
|
|
|
|
|
|
Carp::croak("Error: Failed to extract .zip"); |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# regenerate the .sqlite |
|
142
|
0
|
0
|
0
|
|
|
|
if ( $regenerated_sqlite or !-e $db ) { |
|
143
|
0
|
|
|
|
|
|
unlink($db); |
|
144
|
0
|
|
|
|
|
|
my $dbh = DBI->connect( |
|
145
|
|
|
|
|
|
|
"DBI:SQLite:$db", |
|
146
|
|
|
|
|
|
|
undef, undef, |
|
147
|
|
|
|
|
|
|
{ |
|
148
|
|
|
|
|
|
|
RaiseError => 1, |
|
149
|
|
|
|
|
|
|
PrintError => 1, |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
); |
|
152
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA synchronous=OFF'); |
|
153
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA count_changes=OFF'); |
|
154
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA journal_mode=MEMORY'); |
|
155
|
0
|
|
|
|
|
|
$dbh->do('PRAGMA temp_store=MEMORY'); |
|
156
|
0
|
|
|
|
|
|
$dbh->do(<<'SQL'); |
|
157
|
|
|
|
|
|
|
CREATE TABLE unihan ( |
|
158
|
|
|
|
|
|
|
"hex" CHAR(5) NOT NULL, |
|
159
|
|
|
|
|
|
|
"type" VARCHAR(18) NOT NULL, |
|
160
|
|
|
|
|
|
|
"val" VARCHAR(255), |
|
161
|
|
|
|
|
|
|
PRIMARY KEY ("hex", "type") |
|
162
|
|
|
|
|
|
|
) |
|
163
|
|
|
|
|
|
|
SQL |
|
164
|
0
|
|
|
|
|
|
my $sql = |
|
165
|
|
|
|
|
|
|
'INSERT INTO "unihan" ("hex", "type", "val") VALUES (?, ?, ?)'; |
|
166
|
0
|
|
|
|
|
|
my $sth = $dbh->prepare($sql); |
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
opendir( my $fdir, $dir ); |
|
169
|
0
|
|
|
|
|
|
my @files = grep { /.txt$/ } readdir($fdir); |
|
|
0
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
closedir($fdir); |
|
171
|
0
|
|
|
|
|
|
foreach my $file (@files) { |
|
172
|
0
|
0
|
|
|
|
|
next if $file eq 'last_mod.txt'; |
|
173
|
0
|
0
|
|
|
|
|
print STDERR "Populate $dir/$file\n" if $DEBUG; |
|
174
|
0
|
|
|
|
|
|
open( my $fh, '<:utf8', "$dir/$file" ); |
|
175
|
0
|
|
|
|
|
|
flock( $fh, 1 ); |
|
176
|
0
|
|
|
|
|
|
while ( my $line = <$fh> ) { |
|
177
|
0
|
0
|
|
|
|
|
next if ( $line =~ /^\#/ ); # comment line |
|
178
|
0
|
0
|
|
|
|
|
next if ( $line =~ /^\s+$/ ); # blank line |
|
179
|
0
|
|
|
|
|
|
chomp($line); |
|
180
|
0
|
|
|
|
|
|
my ( $hex, $type, $val ) = split( /\t/, $line, 3 ); |
|
181
|
0
|
|
|
|
|
|
$hex =~ s/^U\+//; |
|
182
|
0
|
|
|
|
|
|
$type =~ s/^k//; |
|
183
|
0
|
|
|
|
|
|
$val =~ s/(^\s|\s+)//g; |
|
184
|
0
|
0
|
|
|
|
|
$sth->execute( $hex, $type, $val ) |
|
185
|
|
|
|
|
|
|
or die "$dbh:errstr $type, $hex, $val"; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
0
|
|
|
|
|
|
close($fh); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
$params{file} = $db; |
|
192
|
0
|
|
|
|
|
|
$params{readonly} = 1; |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Hand off to the main ORLite class. |
|
195
|
0
|
0
|
|
|
|
|
$class->SUPER::import( \%params, $DEBUG ? '-DEBUG' : () ); |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1; |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
__END__ |