line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::PML::Schema::CDATA; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
3
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
21
|
|
4
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
19
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
3
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
1
|
|
|
1
|
|
12
|
$VERSION='2.22'; # version template |
9
|
|
|
|
|
|
|
} |
10
|
1
|
|
|
1
|
|
3
|
no warnings 'uninitialized'; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
24
|
|
11
|
1
|
|
|
1
|
|
2
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
43
|
|
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
6
|
use Treex::PML::Schema::Constants; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
66
|
|
14
|
1
|
|
|
1
|
|
3
|
use base qw( Treex::PML::Schema::Decl ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3050
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Treex::PML::Schema::CDATA - implements cdata declaration. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 INHERITANCE |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
This class inherits from L. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 METHODS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
See the super-class for the complete list. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=over 3 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=item $decl->is_atomic () |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Returns 1. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=item $decl->get_decl_type () |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Returns the constant PML_CDATA_DECL. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=item $decl->get_decl_type_str () |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Returns the string 'cdata'. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=item $decl->get_format () |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Return identifier of the data format. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=item $decl->set_format (format) |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Set format to a given format identifier. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=item $decl->check_string_format (string, format-id?) |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
If the C argument is specified, return 1 if the string |
53
|
|
|
|
|
|
|
confirms to the given format. If the C argument is |
54
|
|
|
|
|
|
|
omitted, return 1 if the string conforms to the format specified in |
55
|
|
|
|
|
|
|
the type declaration in the PML schema. Otherwise return 0. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item $decl->validate_object($object) |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
See C in L. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item $decl->supported_formats |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Returns a list of formats for which the current implementation |
64
|
|
|
|
|
|
|
of C provides a reasonable validator. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Currently all formats defined in the PML Schema specification revision |
67
|
|
|
|
|
|
|
1.1.2 are supported, namely: |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
any, anyURI, base64Binary, boolean, byte, date, dateTime, decimal, |
70
|
|
|
|
|
|
|
double, duration, float, gDay, gMonth, gMonthDay, gYear, gYearMonth, |
71
|
|
|
|
|
|
|
hexBinary, ID, IDREF, IDREFS, int, integer, language, long, Name, |
72
|
|
|
|
|
|
|
NCName, negativeInteger, NMTOKEN, NMTOKENS, nonNegativeInteger, |
73
|
|
|
|
|
|
|
nonPositiveInteger, normalizedString, PMLREF, positiveInteger, short, |
74
|
|
|
|
|
|
|
string, time, token, unsignedByte, unsignedInt, unsignedLong, |
75
|
|
|
|
|
|
|
unsignedShort |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=item $decl->get_content_decl () |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Returns undef. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=back |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
0
|
|
|
0
|
1
|
|
sub is_atomic { 1 } |
87
|
0
|
|
|
0
|
1
|
|
sub get_decl_type { return PML_CDATA_DECL; } |
88
|
0
|
|
|
0
|
1
|
|
sub get_decl_type_str { return 'cdata'; } |
89
|
0
|
|
|
0
|
1
|
|
sub get_content_decl { return(undef); } |
90
|
0
|
|
|
0
|
1
|
|
sub get_format { return $_[0]->{format} } |
91
|
0
|
|
|
0
|
1
|
|
sub set_format { $_[0]->{format} = $_[1] } |
92
|
|
|
|
|
|
|
sub init { |
93
|
0
|
|
|
0
|
0
|
|
my ($self,$opts)=@_; |
94
|
0
|
|
|
|
|
|
$self->{-parent}{-decl} = 'cdata'; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
{ |
98
|
|
|
|
|
|
|
our %format_re = ( |
99
|
|
|
|
|
|
|
any => sub { 1 }, # to make it appear in the list of supported formats |
100
|
|
|
|
|
|
|
nonNegativeInteger => qr(^\s*(?:[+]?\d+|-0+)\s*$), |
101
|
|
|
|
|
|
|
positiveInteger => qr(^\s*[+]?\d*[1-9]\d*\s*$), # ? is zero allowed lexically |
102
|
|
|
|
|
|
|
negativeInteger => qr(^\s*-\d*[1-9]\d*\s*$), # ? is zero allowed lexically |
103
|
|
|
|
|
|
|
nonPositiveInteger => qr(^\s*(?:-\d+|[+]?0+)\s*$), |
104
|
|
|
|
|
|
|
decimal => qr(^\s*[+-]?\d+(?:\.\d*)?\s*$), |
105
|
|
|
|
|
|
|
boolean => qr(^(?:[01]|true|false)$), |
106
|
|
|
|
|
|
|
); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
my $BaseChar = '\x{0041}-\x{005A}\x{0061}-\x{007A}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}'. |
109
|
|
|
|
|
|
|
'\x{00F8}-\x{00FF}\x{0100}-\x{0131}\x{0134}-\x{013E}\x{0141}-\x{0148}\x{014A}-\x{017E}'. |
110
|
|
|
|
|
|
|
'\x{0180}-\x{01C3}\x{01CD}-\x{01F0}\x{01F4}-\x{01F5}\x{01FA}-\x{0217}\x{0250}-\x{02A8}'. |
111
|
|
|
|
|
|
|
'\x{02BB}-\x{02C1}\x{0386}\x{0388}-\x{038A}\x{038C}\x{038E}-\x{03A1}\x{03A3}-\x{03CE}'. |
112
|
|
|
|
|
|
|
'\x{03D0}-\x{03D6}\x{03DA}\x{03DC}\x{03DE}\x{03E0}\x{03E2}-\x{03F3}\x{0401}-\x{040C}'. |
113
|
|
|
|
|
|
|
'\x{040E}-\x{044F}\x{0451}-\x{045C}\x{045E}-\x{0481}\x{0490}-\x{04C4}\x{04C7}-\x{04C8}'. |
114
|
|
|
|
|
|
|
'\x{04CB}-\x{04CC}\x{04D0}-\x{04EB}\x{04EE}-\x{04F5}\x{04F8}-\x{04F9}\x{0531}-\x{0556}'. |
115
|
|
|
|
|
|
|
'\x{0559}\x{0561}-\x{0586}\x{05D0}-\x{05EA}\x{05F0}-\x{05F2}\x{0621}-\x{063A}\x{0641}-'. |
116
|
|
|
|
|
|
|
'\x{064A}\x{0671}-\x{06B7}\x{06BA}-\x{06BE}\x{06C0}-\x{06CE}\x{06D0}-\x{06D3}\x{06D5}\x{06E5}-'. |
117
|
|
|
|
|
|
|
'\x{06E6}\x{0905}-\x{0939}\x{093D}\x{0958}-\x{0961}\x{0985}-\x{098C}\x{098F}-\x{0990}\x{0993}-'. |
118
|
|
|
|
|
|
|
'\x{09A8}\x{09AA}-\x{09B0}\x{09B2}\x{09B6}-\x{09B9}\x{09DC}-\x{09DD}\x{09DF}-\x{09E1}\x{09F0}-'. |
119
|
|
|
|
|
|
|
'\x{09F1}\x{0A05}-\x{0A0A}\x{0A0F}-\x{0A10}\x{0A13}-\x{0A28}\x{0A2A}-\x{0A30}\x{0A32}-'. |
120
|
|
|
|
|
|
|
'\x{0A33}\x{0A35}-\x{0A36}\x{0A38}-\x{0A39}\x{0A59}-\x{0A5C}\x{0A5E}\x{0A72}-\x{0A74}\x{0A85}-'. |
121
|
|
|
|
|
|
|
'\x{0A8B}\x{0A8D}\x{0A8F}-\x{0A91}\x{0A93}-\x{0AA8}\x{0AAA}-\x{0AB0}\x{0AB2}-\x{0AB3}\x{0AB5}-'. |
122
|
|
|
|
|
|
|
'\x{0AB9}\x{0ABD}\x{0AE0}\x{0B05}-\x{0B0C}\x{0B0F}-\x{0B10}\x{0B13}-\x{0B28}\x{0B2A}-\x{0B30}'. |
123
|
|
|
|
|
|
|
'\x{0B32}-\x{0B33}\x{0B36}-\x{0B39}\x{0B3D}\x{0B5C}-\x{0B5D}\x{0B5F}-\x{0B61}\x{0B85}-'. |
124
|
|
|
|
|
|
|
'\x{0B8A}\x{0B8E}-\x{0B90}\x{0B92}-\x{0B95}\x{0B99}-\x{0B9A}\x{0B9C}\x{0B9E}-\x{0B9F}\x{0BA3}-'. |
125
|
|
|
|
|
|
|
'\x{0BA4}\x{0BA8}-\x{0BAA}\x{0BAE}-\x{0BB5}\x{0BB7}-\x{0BB9}\x{0C05}-\x{0C0C}\x{0C0E}-'. |
126
|
|
|
|
|
|
|
'\x{0C10}\x{0C12}-\x{0C28}\x{0C2A}-\x{0C33}\x{0C35}-\x{0C39}\x{0C60}-\x{0C61}\x{0C85}-'. |
127
|
|
|
|
|
|
|
'\x{0C8C}\x{0C8E}-\x{0C90}\x{0C92}-\x{0CA8}\x{0CAA}-\x{0CB3}\x{0CB5}-\x{0CB9}\x{0CDE}\x{0CE0}-'. |
128
|
|
|
|
|
|
|
'\x{0CE1}\x{0D05}-\x{0D0C}\x{0D0E}-\x{0D10}\x{0D12}-\x{0D28}\x{0D2A}-\x{0D39}\x{0D60}-'. |
129
|
|
|
|
|
|
|
'\x{0D61}\x{0E01}-\x{0E2E}\x{0E30}\x{0E32}-\x{0E33}\x{0E40}-\x{0E45}\x{0E81}-\x{0E82}\x{0E84}'. |
130
|
|
|
|
|
|
|
'\x{0E87}-\x{0E88}\x{0E8A}\x{0E8D}\x{0E94}-\x{0E97}\x{0E99}-\x{0E9F}\x{0EA1}-\x{0EA3}\x{0EA5}'. |
131
|
|
|
|
|
|
|
'\x{0EA7}\x{0EAA}-\x{0EAB}\x{0EAD}-\x{0EAE}\x{0EB0}\x{0EB2}-\x{0EB3}\x{0EBD}\x{0EC0}-\x{0EC4}'. |
132
|
|
|
|
|
|
|
'\x{0F40}-\x{0F47}\x{0F49}-\x{0F69}\x{10A0}-\x{10C5}\x{10D0}-\x{10F6}\x{1100}\x{1102}-'. |
133
|
|
|
|
|
|
|
'\x{1103}\x{1105}-\x{1107}\x{1109}\x{110B}-\x{110C}\x{110E}-\x{1112}\x{113C}\x{113E}\x{1140}'. |
134
|
|
|
|
|
|
|
'\x{114C}\x{114E}\x{1150}\x{1154}-\x{1155}\x{1159}\x{115F}-\x{1161}\x{1163}\x{1165}\x{1167}'. |
135
|
|
|
|
|
|
|
'\x{1169}\x{116D}-\x{116E}\x{1172}-\x{1173}\x{1175}\x{119E}\x{11A8}\x{11AB}\x{11AE}-\x{11AF}'. |
136
|
|
|
|
|
|
|
'\x{11B7}-\x{11B8}\x{11BA}\x{11BC}-\x{11C2}\x{11EB}\x{11F0}\x{11F9}\x{1E00}-\x{1E9B}\x{1EA0}-'. |
137
|
|
|
|
|
|
|
'\x{1EF9}\x{1F00}-\x{1F15}\x{1F18}-\x{1F1D}\x{1F20}-\x{1F45}\x{1F48}-\x{1F4D}\x{1F50}-'. |
138
|
|
|
|
|
|
|
'\x{1F57}\x{1F59}\x{1F5B}\x{1F5D}\x{1F5F}-\x{1F7D}\x{1F80}-\x{1FB4}\x{1FB6}-\x{1FBC}\x{1FBE}'. |
139
|
|
|
|
|
|
|
'\x{1FC2}-\x{1FC4}\x{1FC6}-\x{1FCC}\x{1FD0}-\x{1FD3}\x{1FD6}-\x{1FDB}\x{1FE0}-\x{1FEC}'. |
140
|
|
|
|
|
|
|
'\x{1FF2}-\x{1FF4}\x{1FF6}-\x{1FFC}\x{2126}\x{212A}-\x{212B}\x{212E}\x{2180}-\x{2182}\x{3041}-'. |
141
|
|
|
|
|
|
|
'\x{3094}\x{30A1}-\x{30FA}\x{3105}-\x{312C}\x{AC00}-\x{D7A3}'; |
142
|
|
|
|
|
|
|
my $Ideographic = '\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}'; |
143
|
|
|
|
|
|
|
my $Letter = "$BaseChar$Ideographic"; |
144
|
|
|
|
|
|
|
my $Digit = |
145
|
|
|
|
|
|
|
'\x{0030}-\x{0039}\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{0966}-\x{096F}\x{09E6}-\x{09EF}'. |
146
|
|
|
|
|
|
|
'\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}\x{0BE7}-\x{0BEF}\x{0C66}-\x{0C6F}'. |
147
|
|
|
|
|
|
|
'\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}'; |
148
|
|
|
|
|
|
|
my $CombiningChar = |
149
|
|
|
|
|
|
|
'\x{0300}-\x{0345}\x{0360}-\x{0361}\x{0483}-\x{0486}\x{0591}-\x{05A1}\x{05A3}-\x{05B9}'. |
150
|
|
|
|
|
|
|
'\x{05BB}-\x{05BD}\x{05BF}\x{05C1}-\x{05C2}\x{05C4}\x{064B}-\x{0652}\x{0670}\x{06D6}-\x{06DC}'. |
151
|
|
|
|
|
|
|
'\x{06DD}-\x{06DF}\x{06E0}-\x{06E4}\x{06E7}-\x{06E8}\x{06EA}-\x{06ED}\x{0901}-\x{0903}'. |
152
|
|
|
|
|
|
|
'\x{093C}\x{093E}-\x{094C}\x{094D}\x{0951}-\x{0954}\x{0962}-\x{0963}\x{0981}-\x{0983}\x{09BC}'. |
153
|
|
|
|
|
|
|
'\x{09BE}\x{09BF}\x{09C0}-\x{09C4}\x{09C7}-\x{09C8}\x{09CB}-\x{09CD}\x{09D7}\x{09E2}-\x{09E3}'. |
154
|
|
|
|
|
|
|
'\x{0A02}\x{0A3C}\x{0A3E}\x{0A3F}\x{0A40}-\x{0A42}\x{0A47}-\x{0A48}\x{0A4B}-\x{0A4D}\x{0A70}-'. |
155
|
|
|
|
|
|
|
'\x{0A71}\x{0A81}-\x{0A83}\x{0ABC}\x{0ABE}-\x{0AC5}\x{0AC7}-\x{0AC9}\x{0ACB}-\x{0ACD}\x{0B01}-'. |
156
|
|
|
|
|
|
|
'\x{0B03}\x{0B3C}\x{0B3E}-\x{0B43}\x{0B47}-\x{0B48}\x{0B4B}-\x{0B4D}\x{0B56}-\x{0B57}\x{0B82}-'. |
157
|
|
|
|
|
|
|
'\x{0B83}\x{0BBE}-\x{0BC2}\x{0BC6}-\x{0BC8}\x{0BCA}-\x{0BCD}\x{0BD7}\x{0C01}-\x{0C03}\x{0C3E}-'. |
158
|
|
|
|
|
|
|
'\x{0C44}\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}-\x{0C56}\x{0C82}-\x{0C83}\x{0CBE}-'. |
159
|
|
|
|
|
|
|
'\x{0CC4}\x{0CC6}-\x{0CC8}\x{0CCA}-\x{0CCD}\x{0CD5}-\x{0CD6}\x{0D02}-\x{0D03}\x{0D3E}-'. |
160
|
|
|
|
|
|
|
'\x{0D43}\x{0D46}-\x{0D48}\x{0D4A}-\x{0D4D}\x{0D57}\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}'. |
161
|
|
|
|
|
|
|
'\x{0EB1}\x{0EB4}-\x{0EB9}\x{0EBB}-\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}-\x{0F19}\x{0F35}\x{0F37}'. |
162
|
|
|
|
|
|
|
'\x{0F39}\x{0F3E}\x{0F3F}\x{0F71}-\x{0F84}\x{0F86}-\x{0F8B}\x{0F90}-\x{0F95}\x{0F97}\x{0F99}-'. |
163
|
|
|
|
|
|
|
'\x{0FAD}\x{0FB1}-\x{0FB7}\x{0FB9}\x{20D0}-\x{20DC}\x{20E1}\x{302A}-\x{302F}\x{3099}\x{309A}'; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
my $Extender = |
166
|
|
|
|
|
|
|
'\x{00B7}\x{02D0}\x{02D1}\x{0387}\x{0640}\x{0E46}\x{0EC6}\x{3005}\x{3031}-\x{3035}\x{309D}-'. |
167
|
|
|
|
|
|
|
'\x{309E}\x{30FC}-\x{30FE}'; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
our $NameChar = "[-._:$Letter$Digit$CombiningChar$Extender]"; |
170
|
|
|
|
|
|
|
our $NCNameChar = "[-._$Letter$Digit$CombiningChar$Extender]"; |
171
|
|
|
|
|
|
|
our $Name = "(?:[_:$Letter]$NameChar*)"; |
172
|
|
|
|
|
|
|
our $NCName = "(?:[_$Letter]$NCNameChar*)"; |
173
|
|
|
|
|
|
|
our $NmToken = "(?:$NameChar+)"; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
$format_re{ID} = $format_re{IDREF} = $format_re{NCName} = qr(^$NCName$)o; |
176
|
|
|
|
|
|
|
$format_re{PMLREF} = qr(^$NCName(?:\#$NCName)?$)o; |
177
|
|
|
|
|
|
|
$format_re{Name} = qr(^$Name$)o; |
178
|
|
|
|
|
|
|
$format_re{NMTOKEN} = qr(^$NameChar+$)o; |
179
|
|
|
|
|
|
|
$format_re{NMTOKENS} = qr(^$NmToken(?:\x20$NmToken)*$)o; |
180
|
|
|
|
|
|
|
$format_re{IDREFS} = qr(^\s*$NCName(?:\s+$NCName)*\s*$)o; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
our $Space = '[\x20]'; |
183
|
|
|
|
|
|
|
our $TokChar = '(?:[\x21-\x{D7FF}]|[\x{E000}-\x{FFFD}]|[\x{10000}-\x{10FFFF}])'; # [\x10000-\x10FFFF] |
184
|
|
|
|
|
|
|
our $NoNorm = '\x09|\x0a|\x0d'; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
our $NormChar = "(?:$Space|$TokChar)"; |
187
|
|
|
|
|
|
|
our $Char = "(?:$NoNorm|$NormChar)"; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
$format_re{string} = qr(^$Char*$)o; |
190
|
|
|
|
|
|
|
$format_re{normalizedString} = qr(^$NormChar*$)o; |
191
|
|
|
|
|
|
|
# Token :no \x9,\xA,\xD, no leading/trailing space, |
192
|
|
|
|
|
|
|
# no internal sequence of two or more spaces |
193
|
|
|
|
|
|
|
$format_re{token} = qr(^(?:$TokChar(?:$TokChar*(?:$Space$TokChar)?)*)?$)o; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
our $B64 = '[A-Za-z0-9+/]'; |
196
|
|
|
|
|
|
|
our $B16 = '[AEIMQUYcgkosw048]'; |
197
|
|
|
|
|
|
|
our $B04 = '[AQgw]'; |
198
|
|
|
|
|
|
|
our $B04S = "$B04\x20?"; |
199
|
|
|
|
|
|
|
our $B16S = "$B16\x20?"; |
200
|
|
|
|
|
|
|
our $B64S = "$B64\x20?"; |
201
|
|
|
|
|
|
|
our $Base64Binary = "(?:(?:$B64S$B64S$B64S$B64S)*(?:(?:$B64S$B64S$B64S$B64)|(?:$B64S$B64S$B16S=)|(?:$B64S$B04S=\x20?=)))?"; |
202
|
|
|
|
|
|
|
$format_re{base64Binary} = qr(^$Base64Binary$)o; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# URI (RFC 2396, RFC 2732) |
205
|
|
|
|
|
|
|
our $digit = '[0-9]'; |
206
|
|
|
|
|
|
|
our $upalpha = '[A-Z]'; |
207
|
|
|
|
|
|
|
our $lowalpha = '[a-z]'; |
208
|
|
|
|
|
|
|
our $alpha = "(?:$lowalpha | $upalpha)"; |
209
|
|
|
|
|
|
|
our $alphanum = "(?:$alpha | $digit)"; |
210
|
|
|
|
|
|
|
our $hex = "(?:$digit | [A-Fa-f])"; |
211
|
|
|
|
|
|
|
our $escaped = "(?:[%] $hex $hex)"; |
212
|
|
|
|
|
|
|
our $mark = "[-_.!~*'()]"; |
213
|
|
|
|
|
|
|
our $unreserved = "(?:$alphanum | $mark)"; |
214
|
|
|
|
|
|
|
our $reserved = '(?:[][;/?:@&=+] | [\$,])'; |
215
|
|
|
|
|
|
|
our $uric = "(?:$reserved | $unreserved | $escaped)"; |
216
|
|
|
|
|
|
|
our $fragment = "(?:$uric*)"; |
217
|
|
|
|
|
|
|
our $query = "(?:$uric*)"; |
218
|
|
|
|
|
|
|
our $pchar = "(?:$unreserved | $escaped | [:@&=+\$,])"; |
219
|
|
|
|
|
|
|
our $param = "(?:$pchar*)"; |
220
|
|
|
|
|
|
|
our $segment = "(?:$pchar* (?: [;] $param )*)"; |
221
|
|
|
|
|
|
|
our $path_segments= "(?:$segment (?: [/] $segment )*)"; |
222
|
|
|
|
|
|
|
our $port = "(?:$digit*)"; |
223
|
|
|
|
|
|
|
our $IPv4_address = "(?:${digit}{1,3} [.] ${digit}{1,3} [.] ${digit}{1,3} [.] ${digit}{1,3})"; |
224
|
|
|
|
|
|
|
our $hex4 = "(?:${hex}{1,4})"; |
225
|
|
|
|
|
|
|
our $hexseq = "(?:$hex4 (?: : hex4)*)"; |
226
|
|
|
|
|
|
|
our $hexpart = "(?:$hexseq | $hexseq :: $hexseq ? | :: $hexseq ?)"; |
227
|
|
|
|
|
|
|
our $IPv6prefix = "(?:$hexpart / ${digit}{1,2})"; |
228
|
|
|
|
|
|
|
our $IPv6_address = "(?:$hexpart (?: : IPv4address )?)"; |
229
|
|
|
|
|
|
|
our $ipv6reference ="(?:[[](?:$IPv6_address)[]])"; |
230
|
|
|
|
|
|
|
our $toplabel = "(?:$alpha | $alpha (?: $alphanum | [-] )* $alphanum)"; |
231
|
|
|
|
|
|
|
our $domainlabel = "(?:$alphanum | $alphanum (?: $alphanum | [-] )* $alphanum)"; |
232
|
|
|
|
|
|
|
our $hostname = "(?:(?: ${domainlabel} [.] )* $toplabel (?: [.] )?)"; |
233
|
|
|
|
|
|
|
our $host = "(?:$hostname | $IPv4_address | $ipv6reference)"; |
234
|
|
|
|
|
|
|
our $hostport = "(?:$host (?: [:] $port )?)"; |
235
|
|
|
|
|
|
|
our $userinfo = "(?:(?: $unreserved | $escaped | [;:&=+\$,] )*)"; |
236
|
|
|
|
|
|
|
our $server = "(?:(?: (?: ${userinfo} [@] )? $hostport )?)"; |
237
|
|
|
|
|
|
|
our $reg_name = "(?:(?: $unreserved | $escaped | [\$,] | [;:@&=+] )+)"; |
238
|
|
|
|
|
|
|
our $authority = "(?:$server | $reg_name)"; |
239
|
|
|
|
|
|
|
our $scheme = "(?:$alpha (?: $alpha | $digit | [-+.] )*)"; |
240
|
|
|
|
|
|
|
our $rel_segment = "(?:(?: $unreserved | $escaped | [;@&=+\$,] )+)"; |
241
|
|
|
|
|
|
|
our $abs_path = "(?: / $path_segments)"; |
242
|
|
|
|
|
|
|
our $rel_path = "(?:$rel_segment (?: $abs_path )?)"; |
243
|
|
|
|
|
|
|
our $net_path = "(?: // $authority (?: $abs_path )?)"; |
244
|
|
|
|
|
|
|
our $uric_no_slash= "(?:$unreserved | $escaped | [;?:@] | [&=+\$,])"; |
245
|
|
|
|
|
|
|
our $opaque_part = "(?:$uric_no_slash $uric*)"; |
246
|
|
|
|
|
|
|
our $path = "(?:(?: $abs_path | $opaque_part )?)"; |
247
|
|
|
|
|
|
|
our $hier_part = "(?:(?: $net_path | $abs_path ) (?: [?] $query )?)"; |
248
|
|
|
|
|
|
|
our $relativeURI = "(?:(?: $net_path | $abs_path | $rel_path ) (?: [?] $query )?)"; |
249
|
|
|
|
|
|
|
our $absoluteURI = "(?:${scheme} [:] (?: $hier_part | $opaque_part ))"; |
250
|
|
|
|
|
|
|
our $URI_reference = "(?:$absoluteURI|$relativeURI)?(?:[#]$fragment)?"; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
$format_re{anyURI} = qr(^ $URI_reference $)x; |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
$format_re{hexBinary} = qr(^(?:$hex$hex)*$)o; |
255
|
|
|
|
|
|
|
$format_re{language} = qr(^(?:[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*)$)o; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub _parse_real { |
258
|
0
|
|
|
0
|
|
|
my ($value,$exp) = @_; |
259
|
0
|
0
|
0
|
|
|
|
return 0 unless |
260
|
|
|
|
|
|
|
($value ne q{} and |
261
|
|
|
|
|
|
|
$value =~ / |
262
|
|
|
|
|
|
|
^ |
263
|
|
|
|
|
|
|
(?:[+-])? # sign |
264
|
|
|
|
|
|
|
(?: |
265
|
|
|
|
|
|
|
(?:INF) # infinity |
266
|
|
|
|
|
|
|
| (?:NaN) # not a number |
267
|
|
|
|
|
|
|
| (?:\d+(?:\.\d+)?) # mantissa |
268
|
|
|
|
|
|
|
(?:[eE] # exponent |
269
|
|
|
|
|
|
|
([+-])? # sign ($1) |
270
|
|
|
|
|
|
|
(\d+) # value ($2) |
271
|
|
|
|
|
|
|
)? |
272
|
|
|
|
|
|
|
) |
273
|
|
|
|
|
|
|
$ |
274
|
|
|
|
|
|
|
/x); |
275
|
|
|
|
|
|
|
# TODO: need to test bounds of mantissa ( < 2^24 ) |
276
|
0
|
0
|
0
|
|
|
|
$$exp = ($1 || '') . ($2 || '') if ref($exp); |
|
|
|
0
|
|
|
|
|
277
|
0
|
|
|
|
|
|
return 1; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
$format_re{double} = sub { |
281
|
|
|
|
|
|
|
my $exp; |
282
|
|
|
|
|
|
|
return 0 unless _parse_real(shift,\$exp); |
283
|
|
|
|
|
|
|
return 0 if $exp && ($exp < -1075 || $exp > 970); |
284
|
|
|
|
|
|
|
return 1; |
285
|
|
|
|
|
|
|
}; |
286
|
|
|
|
|
|
|
$format_re{float} = sub { |
287
|
|
|
|
|
|
|
my $exp; |
288
|
|
|
|
|
|
|
return 0 unless _parse_real(shift,\$exp); |
289
|
|
|
|
|
|
|
return 0 if $exp && ($exp < -149 || $exp > 104); |
290
|
|
|
|
|
|
|
return 1; |
291
|
|
|
|
|
|
|
}; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
$format_re{duration} = sub { |
294
|
|
|
|
|
|
|
my $value = shift; |
295
|
|
|
|
|
|
|
return 0 |
296
|
|
|
|
|
|
|
unless length $value and $value =~ / |
297
|
|
|
|
|
|
|
^ |
298
|
|
|
|
|
|
|
-? # sign |
299
|
|
|
|
|
|
|
P # date |
300
|
|
|
|
|
|
|
(?:\d+Y)? # years |
301
|
|
|
|
|
|
|
(?:\d+M)? # months |
302
|
|
|
|
|
|
|
(?:\d+D)? # days |
303
|
|
|
|
|
|
|
(?:T # time |
304
|
|
|
|
|
|
|
(?:\d+H)? # hours |
305
|
|
|
|
|
|
|
(?:\d+M)? # minutes |
306
|
|
|
|
|
|
|
(?:\d(?:\.\d+)?S)? # seconds |
307
|
|
|
|
|
|
|
)? |
308
|
|
|
|
|
|
|
$ |
309
|
|
|
|
|
|
|
/x; |
310
|
|
|
|
|
|
|
}; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
my $integer = $format_re{integer} = qr(^\s*[+-]?\d+\s*$); |
313
|
|
|
|
|
|
|
$format_re{long} = sub { |
314
|
|
|
|
|
|
|
my $val = shift; |
315
|
|
|
|
|
|
|
return ($val =~ $integer and |
316
|
|
|
|
|
|
|
$val >= -9223372036854775808 and |
317
|
|
|
|
|
|
|
$val <= 9223372036854775807) ? 1 : 0; |
318
|
|
|
|
|
|
|
}; |
319
|
|
|
|
|
|
|
$format_re{int} = sub { |
320
|
|
|
|
|
|
|
my $val = shift; |
321
|
|
|
|
|
|
|
return ($val =~ $integer and |
322
|
|
|
|
|
|
|
$val >= -2147483648 and |
323
|
|
|
|
|
|
|
$val <= 2147483647) ? 1 : 0; |
324
|
|
|
|
|
|
|
}; |
325
|
|
|
|
|
|
|
$format_re{short} = sub { |
326
|
|
|
|
|
|
|
my $val = shift; |
327
|
|
|
|
|
|
|
return ($val =~ $integer and |
328
|
|
|
|
|
|
|
$val >= -32768 and |
329
|
|
|
|
|
|
|
$val <= 32767) ? 1 : 0; |
330
|
|
|
|
|
|
|
}; |
331
|
|
|
|
|
|
|
$format_re{byte} = sub { |
332
|
|
|
|
|
|
|
my $val = shift; |
333
|
|
|
|
|
|
|
return ($val =~ $integer and |
334
|
|
|
|
|
|
|
$val >= -128 and |
335
|
|
|
|
|
|
|
$val <= 127) ? 1 : 0; |
336
|
|
|
|
|
|
|
}; |
337
|
|
|
|
|
|
|
my $nonNegativeInteger=$format_re{nonNegativeInteger}; |
338
|
|
|
|
|
|
|
$format_re{unsignedLong} = sub { |
339
|
|
|
|
|
|
|
my $val = shift; |
340
|
|
|
|
|
|
|
return ($val =~ $nonNegativeInteger and |
341
|
|
|
|
|
|
|
$val <= 18446744073709551615) |
342
|
|
|
|
|
|
|
}; |
343
|
|
|
|
|
|
|
$format_re{unsignedInt} = sub { |
344
|
|
|
|
|
|
|
my $val = shift; |
345
|
|
|
|
|
|
|
return ($val =~ $nonNegativeInteger and |
346
|
|
|
|
|
|
|
$val <= 4294967295) |
347
|
|
|
|
|
|
|
}; |
348
|
|
|
|
|
|
|
$format_re{unsignedShort} = sub { |
349
|
|
|
|
|
|
|
my $val = shift; |
350
|
|
|
|
|
|
|
return ($val =~ $nonNegativeInteger and |
351
|
|
|
|
|
|
|
$val <= 65535) |
352
|
|
|
|
|
|
|
}; |
353
|
|
|
|
|
|
|
$format_re{unsignedByte} = sub { |
354
|
|
|
|
|
|
|
my $val = shift; |
355
|
|
|
|
|
|
|
return ($val =~ $nonNegativeInteger and |
356
|
|
|
|
|
|
|
$val <= 255) |
357
|
|
|
|
|
|
|
}; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub _check_time { |
360
|
0
|
|
|
0
|
|
|
my $value = shift; |
361
|
0
|
|
|
|
|
|
my $no_hour24 = shift; |
362
|
|
|
|
|
|
|
return |
363
|
0
|
0
|
0
|
|
|
|
((length($value) and |
364
|
|
|
|
|
|
|
$value =~ m(^ |
365
|
|
|
|
|
|
|
(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))? # hour:min:sec |
366
|
|
|
|
|
|
|
(?:Z|[-+]\d{2}:\d{2})? # zone |
367
|
|
|
|
|
|
|
$)x and |
368
|
|
|
|
|
|
|
((!$no_hour24 and $1 == 24 and $2 == 0 and $3 == 0 and $4 == 0) or |
369
|
|
|
|
|
|
|
0 <= $1 and $1 <= 23 and |
370
|
|
|
|
|
|
|
0 <= $2 and $2 <= 59 and |
371
|
|
|
|
|
|
|
0 <= $3 and $3 <= 59) |
372
|
|
|
|
|
|
|
) ? 1 : 0); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
sub _check_date { |
375
|
0
|
|
|
0
|
|
|
my $value = shift; |
376
|
|
|
|
|
|
|
return |
377
|
0
|
0
|
0
|
|
|
|
(length($value) and |
378
|
|
|
|
|
|
|
$value =~ / |
379
|
|
|
|
|
|
|
^ |
380
|
|
|
|
|
|
|
[-+]? # sign |
381
|
|
|
|
|
|
|
(?:[1-9]\d{4,}|\d{4}) # year |
382
|
|
|
|
|
|
|
-(\d{2}) # month ($1) |
383
|
|
|
|
|
|
|
-(\d{2}) # day ($2) |
384
|
|
|
|
|
|
|
$ |
385
|
|
|
|
|
|
|
/x |
386
|
|
|
|
|
|
|
and $1>=1 and $1<=12 |
387
|
|
|
|
|
|
|
and $2>= 1 and $2<=31 |
388
|
|
|
|
|
|
|
) ? 1 : 0; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$format_re{time} = \&_check_time; |
392
|
|
|
|
|
|
|
$format_re{date} = \&_check_date; |
393
|
|
|
|
|
|
|
$format_re{dateTime} = sub { |
394
|
|
|
|
|
|
|
my $value = shift; |
395
|
|
|
|
|
|
|
return 0 unless length $value; |
396
|
|
|
|
|
|
|
return 0 unless $value =~ /^(.*)T(.*)$/; |
397
|
|
|
|
|
|
|
my ($date,$time)=($1,$2); |
398
|
|
|
|
|
|
|
return _check_date($date) && _check_time($time,1) ? 1 : 0; |
399
|
|
|
|
|
|
|
}; |
400
|
|
|
|
|
|
|
$format_re{gYearMonth} = sub { |
401
|
|
|
|
|
|
|
my $value = shift; |
402
|
|
|
|
|
|
|
return |
403
|
|
|
|
|
|
|
(length($value) and |
404
|
|
|
|
|
|
|
$value =~ / |
405
|
|
|
|
|
|
|
^ |
406
|
|
|
|
|
|
|
[-+]? # sign |
407
|
|
|
|
|
|
|
(?:[1-9]\d{4,}|\d{4}) # year |
408
|
|
|
|
|
|
|
-(\d{2}) # month ($1) |
409
|
|
|
|
|
|
|
$ |
410
|
|
|
|
|
|
|
/x |
411
|
|
|
|
|
|
|
and $1>=1 and $1<=12 |
412
|
|
|
|
|
|
|
) ? 1 : 0; |
413
|
|
|
|
|
|
|
}; |
414
|
|
|
|
|
|
|
$format_re{gYear} = sub { |
415
|
|
|
|
|
|
|
my $value = shift; |
416
|
|
|
|
|
|
|
return |
417
|
|
|
|
|
|
|
(length($value) and |
418
|
|
|
|
|
|
|
$value =~ / |
419
|
|
|
|
|
|
|
^ |
420
|
|
|
|
|
|
|
[-+]? # sign |
421
|
|
|
|
|
|
|
(?:[1-9]\d{4,}|\d{4}) # year |
422
|
|
|
|
|
|
|
$ |
423
|
|
|
|
|
|
|
/x) ? 1 : 0; |
424
|
|
|
|
|
|
|
}; |
425
|
|
|
|
|
|
|
$format_re{gMonthDay} = sub { |
426
|
|
|
|
|
|
|
my $value = shift; |
427
|
|
|
|
|
|
|
return |
428
|
|
|
|
|
|
|
(length($value) and |
429
|
|
|
|
|
|
|
$value =~ /^--(\d{2})-(\d{2})$/ # --MM-DD |
430
|
|
|
|
|
|
|
and $1>=1 and $1<=12 |
431
|
|
|
|
|
|
|
and $2>= 1 and $2<=31 |
432
|
|
|
|
|
|
|
) ? 1 : 0; |
433
|
|
|
|
|
|
|
}; |
434
|
|
|
|
|
|
|
$format_re{gDay} = sub { |
435
|
|
|
|
|
|
|
my $value = shift; |
436
|
|
|
|
|
|
|
return |
437
|
|
|
|
|
|
|
(length($value) and |
438
|
|
|
|
|
|
|
$value =~ /^---(\d{2})$/ # ---DD |
439
|
|
|
|
|
|
|
and $1>= 1 and $1<=31 |
440
|
|
|
|
|
|
|
) ? 1 : 0; |
441
|
|
|
|
|
|
|
}; |
442
|
|
|
|
|
|
|
$format_re{gMonth} = sub { |
443
|
|
|
|
|
|
|
my $value = shift; |
444
|
|
|
|
|
|
|
return |
445
|
|
|
|
|
|
|
(length($value) and |
446
|
|
|
|
|
|
|
$value =~ /^--(\d{2})$/ # --MM |
447
|
|
|
|
|
|
|
and $1>=1 and $1<=12 |
448
|
|
|
|
|
|
|
) ? 1 : 0; |
449
|
|
|
|
|
|
|
}; |
450
|
0
|
|
0
|
0
|
|
|
sub _get_format_checker { return $format_re{ $_[1] || $_[0]->{format} } } |
451
|
|
|
|
|
|
|
sub supported_formats { |
452
|
0
|
|
|
0
|
1
|
|
return sort keys %format_re; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub check_string_format { |
458
|
0
|
|
|
0
|
1
|
|
my ($self, $string, $format) = @_; |
459
|
0
|
|
0
|
|
|
|
$format ||= $self->get_format; |
460
|
0
|
0
|
|
|
|
|
return 1 if $format eq 'any'; |
461
|
0
|
|
|
|
|
|
my $re = $self->_get_format_checker($format); |
462
|
0
|
0
|
|
|
|
|
if (defined $re) { |
463
|
0
|
0
|
0
|
|
|
|
if ((ref($re) eq 'CODE' and !$re->($string)) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
464
|
|
|
|
|
|
|
or (ref($re) ne 'CODE' and $string !~ $re)) { |
465
|
0
|
|
|
|
|
|
return 0 |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} else { |
468
|
|
|
|
|
|
|
# warn "format $format not supported ??"; |
469
|
|
|
|
|
|
|
} |
470
|
0
|
|
|
|
|
|
return 1; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub validate_object { |
474
|
0
|
|
|
0
|
1
|
|
my ($self, $object, $opts) = @_; |
475
|
0
|
|
|
|
|
|
my $err = undef; |
476
|
0
|
|
|
|
|
|
my $format = $self->get_format; |
477
|
0
|
0
|
|
|
|
|
if (ref($object)) { |
|
|
0
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
$err = "expected CDATA, got: ".ref($object); |
479
|
|
|
|
|
|
|
} elsif (!$self->check_string_format($object,$format)) { |
480
|
0
|
|
|
|
|
|
$err = "CDATA value not formatted as $format: '$object'"; |
481
|
|
|
|
|
|
|
} |
482
|
0
|
0
|
0
|
|
|
|
if ($err and ref($opts) and ref($opts->{log})) { |
|
|
|
0
|
|
|
|
|
483
|
0
|
|
|
|
|
|
my $path = $opts->{path}; |
484
|
0
|
|
|
|
|
|
my $tag = $opts->{tag}; |
485
|
0
|
0
|
|
|
|
|
$path.="/".$tag if $tag ne q{}; |
486
|
0
|
|
|
|
|
|
push @{$opts->{log}}, "$path: ".$err; |
|
0
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
} |
488
|
0
|
0
|
|
|
|
|
return $err ? 0 : 1; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
1; |
493
|
|
|
|
|
|
|
__END__ |