| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::ZH::CEDICT::Textfile; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright (c) 2002-2005 Christian Renz |
|
4
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or modify |
|
5
|
|
|
|
|
|
|
# it under the same terms as Perl itself. |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# $Id: Textfile.pm,v 1.3 2002/08/13 20:57:45 crenz Exp $ |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use bytes; |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
28
|
|
|
10
|
1
|
|
|
1
|
|
41
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
39
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
43
|
|
|
12
|
1
|
|
|
1
|
|
6
|
use vars qw($VERSION @ISA); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
563
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$VERSION = '0.03'; |
|
15
|
|
|
|
|
|
|
@ISA = qw(Lingua::ZH::CEDICT); |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
|
18
|
1
|
|
|
1
|
1
|
2
|
my $class = shift; |
|
19
|
1
|
|
|
|
|
3
|
my $self = +{@_}; |
|
20
|
|
|
|
|
|
|
|
|
21
|
1
|
|
50
|
|
|
8
|
$self->{filename} ||= "cedict_ts.u8"; |
|
22
|
1
|
|
50
|
|
|
8
|
$self->{sourceCharSet} ||= "UTF-8"; |
|
23
|
1
|
|
50
|
|
|
5
|
$self->{targetCharSet} ||= "UTF-8"; |
|
24
|
|
|
|
|
|
|
|
|
25
|
1
|
|
|
|
|
8
|
bless $self, $class; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub init { |
|
29
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
30
|
0
|
|
|
|
|
|
my $fh; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $iconv; |
|
33
|
0
|
0
|
0
|
|
|
|
if ($self->{sourceCharset} && $self->{targetCharset} && |
|
|
|
|
0
|
|
|
|
|
|
34
|
|
|
|
|
|
|
($self->{sourceCharset} ne $self->{targetCharset})) { |
|
35
|
0
|
|
|
|
|
|
require Text::Iconv; |
|
36
|
0
|
|
|
|
|
|
Text::Iconv->raise_error(1); |
|
37
|
0
|
|
|
|
|
|
$iconv = Text::Iconv->new($self->{sourceCharset}, $self->{targetCharset}); |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
$self->{entry} = []; |
|
41
|
0
|
0
|
|
|
|
|
open $fh, "<" . $self->{filename} |
|
42
|
|
|
|
|
|
|
or die "Can't open $self->{filename}: $!\n"; |
|
43
|
0
|
|
|
|
|
|
$self->{version} = <$fh>; |
|
44
|
0
|
|
|
|
|
|
while (<$fh>) { |
|
45
|
0
|
0
|
|
|
|
|
next unless /\w/; |
|
46
|
0
|
0
|
|
|
|
|
$_ = $iconv->convert($_) if defined $iconv; |
|
47
|
|
|
|
|
|
|
|
|
48
|
0
|
0
|
|
|
|
|
m|^(\S+)\s(\S+)\s\[([a-z0-9: ]+)\]\s/(.*)/\s*$| or |
|
49
|
|
|
|
|
|
|
# m|^(\S+\|\S*)\s\[([a-z0-9: ]+)\]\s/(.*)/\s*$| or |
|
50
|
|
|
|
|
|
|
die "Line $.: Invalid entry '$_'\n"; |
|
51
|
|
|
|
|
|
|
# my @zi = split /\|/, $1; |
|
52
|
|
|
|
|
|
|
# $zi[1] ||= ''; |
|
53
|
|
|
|
|
|
|
# $zi[1] = '' if (index($zi[1], '?') >= 0); |
|
54
|
0
|
|
|
|
|
|
my $tonelessPinyin = $self->removePinyinTones($3); |
|
55
|
|
|
|
|
|
|
# print "@zi // $2 // $3\n"; |
|
56
|
0
|
|
|
|
|
|
push @{$self->{entry}}, [ $1, $2, $3, $tonelessPinyin, $4 ]; |
|
|
0
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# push @{$self->{entry}}, [ $zi[0], $zi[1], $2, $tonelessPinyin, $3 ]; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
0
|
|
|
|
|
|
close $fh; |
|
60
|
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
|
$self->{numEntries} = scalar @{$self->{entry}}; |
|
|
0
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
1; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
__END__ |