line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::ZH::CEDICT::Textfile; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright (c) 2002-2005 Christian Renz |
4
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or modify |
5
|
|
|
|
|
|
|
# it under the same terms as Perl itself. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# $Id: Textfile.pm,v 1.3 2002/08/13 20:57:45 crenz Exp $ |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use bytes; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
28
|
|
10
|
1
|
|
|
1
|
|
41
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
39
|
|
11
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
12
|
1
|
|
|
1
|
|
6
|
use vars qw($VERSION @ISA); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
563
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$VERSION = '0.03'; |
15
|
|
|
|
|
|
|
@ISA = qw(Lingua::ZH::CEDICT); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
1
|
|
|
1
|
1
|
2
|
my $class = shift; |
19
|
1
|
|
|
|
|
3
|
my $self = +{@_}; |
20
|
|
|
|
|
|
|
|
21
|
1
|
|
50
|
|
|
8
|
$self->{filename} ||= "cedict_ts.u8"; |
22
|
1
|
|
50
|
|
|
8
|
$self->{sourceCharSet} ||= "UTF-8"; |
23
|
1
|
|
50
|
|
|
5
|
$self->{targetCharSet} ||= "UTF-8"; |
24
|
|
|
|
|
|
|
|
25
|
1
|
|
|
|
|
8
|
bless $self, $class; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub init { |
29
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
30
|
0
|
|
|
|
|
|
my $fh; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $iconv; |
33
|
0
|
0
|
0
|
|
|
|
if ($self->{sourceCharset} && $self->{targetCharset} && |
|
|
|
0
|
|
|
|
|
34
|
|
|
|
|
|
|
($self->{sourceCharset} ne $self->{targetCharset})) { |
35
|
0
|
|
|
|
|
|
require Text::Iconv; |
36
|
0
|
|
|
|
|
|
Text::Iconv->raise_error(1); |
37
|
0
|
|
|
|
|
|
$iconv = Text::Iconv->new($self->{sourceCharset}, $self->{targetCharset}); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
$self->{entry} = []; |
41
|
0
|
0
|
|
|
|
|
open $fh, "<" . $self->{filename} |
42
|
|
|
|
|
|
|
or die "Can't open $self->{filename}: $!\n"; |
43
|
0
|
|
|
|
|
|
$self->{version} = <$fh>; |
44
|
0
|
|
|
|
|
|
while (<$fh>) { |
45
|
0
|
0
|
|
|
|
|
next unless /\w/; |
46
|
0
|
0
|
|
|
|
|
$_ = $iconv->convert($_) if defined $iconv; |
47
|
|
|
|
|
|
|
|
48
|
0
|
0
|
|
|
|
|
m|^(\S+)\s(\S+)\s\[([a-z0-9: ]+)\]\s/(.*)/\s*$| or |
49
|
|
|
|
|
|
|
# m|^(\S+\|\S*)\s\[([a-z0-9: ]+)\]\s/(.*)/\s*$| or |
50
|
|
|
|
|
|
|
die "Line $.: Invalid entry '$_'\n"; |
51
|
|
|
|
|
|
|
# my @zi = split /\|/, $1; |
52
|
|
|
|
|
|
|
# $zi[1] ||= ''; |
53
|
|
|
|
|
|
|
# $zi[1] = '' if (index($zi[1], '?') >= 0); |
54
|
0
|
|
|
|
|
|
my $tonelessPinyin = $self->removePinyinTones($3); |
55
|
|
|
|
|
|
|
# print "@zi // $2 // $3\n"; |
56
|
0
|
|
|
|
|
|
push @{$self->{entry}}, [ $1, $2, $3, $tonelessPinyin, $4 ]; |
|
0
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# push @{$self->{entry}}, [ $zi[0], $zi[1], $2, $tonelessPinyin, $3 ]; |
58
|
|
|
|
|
|
|
} |
59
|
0
|
|
|
|
|
|
close $fh; |
60
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
|
$self->{numEntries} = scalar @{$self->{entry}}; |
|
0
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
1; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
__END__ |