line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
2
|
|
|
|
|
|
|
#include "perl.h" |
3
|
|
|
|
|
|
|
#include "XSUB.h" |
4
|
|
|
|
|
|
|
#include "perliol.h" |
5
|
|
|
|
|
|
|
#include "ppport.h" |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
#define UTF8_MAX_BYTES 4 |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
static const U8 xs_utf8_sequence_len[0x100] = { |
10
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00-0x0F */ |
11
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10-0x1F */ |
12
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20-0x2F */ |
13
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30-0x3F */ |
14
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */ |
15
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50-0x5F */ |
16
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */ |
17
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70-0x7F */ |
18
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */ |
19
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */ |
20
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */ |
21
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */ |
22
|
|
|
|
|
|
|
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xC0-0xCF */ |
23
|
|
|
|
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xD0-0xDF */ |
24
|
|
|
|
|
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xE0-0xEF */ |
25
|
|
|
|
|
|
|
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */ |
26
|
|
|
|
|
|
|
}; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
typedef enum { STRICT_UTF8=0, ALLOW_SURROGATES=1, ALLOW_NONCHARACTERS=2, ALLOW_NONSHORTEST=4 } utf8_flags; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
3634
|
|
|
|
|
|
static STRLEN skip_sequence(const U8 *cur, const STRLEN len) { |
33
|
3634
|
|
|
|
|
|
STRLEN i, n = xs_utf8_sequence_len[*cur]; |
34
|
|
|
|
|
|
|
|
35
|
3634
|
100
|
|
|
|
|
if (n < 1 || len < 2) |
|
|
100
|
|
|
|
|
|
36
|
1035
|
|
|
|
|
|
return 1; |
37
|
|
|
|
|
|
|
|
38
|
2599
|
|
|
|
|
|
switch (cur[0]) { |
39
|
2
|
50
|
|
|
|
|
case 0xE0: if ((cur[1] & 0xE0) != 0xA0) return 1; break; |
40
|
2050
|
100
|
|
|
|
|
case 0xED: if ((cur[1] & 0xE0) != 0x80) return 1; break; |
41
|
33
|
100
|
|
|
|
|
case 0xF4: if ((cur[1] & 0xF0) != 0x80) return 1; break; |
42
|
100
|
100
|
|
|
|
|
case 0xF0: if ((cur[1] & 0xF0) == 0x80) return 1; /* FALLTROUGH */ |
43
|
510
|
50
|
|
|
|
|
default: if ((cur[1] & 0xC0) != 0x80) return 1; break; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
544
|
50
|
|
|
|
|
if (n > len) |
47
|
544
|
|
|
|
|
|
n = len; |
48
|
1056
|
100
|
|
|
|
|
for (i = 2; i < n; i++) |
49
|
512
|
50
|
|
|
|
|
if ((cur[i] & 0xC0) != 0x80) |
50
|
0
|
|
|
|
|
|
break; |
51
|
544
|
|
|
|
|
|
return i; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__) |
55
|
|
|
|
|
|
|
PERL_STATIC_NO_RET void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__; |
56
|
|
|
|
|
|
|
#elif defined(__attribute__noreturn__) |
57
|
|
|
|
|
|
|
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__; |
58
|
|
|
|
|
|
|
#endif |
59
|
|
|
|
|
|
|
|
60
|
3360
|
|
|
|
|
|
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) { |
61
|
|
|
|
|
|
|
static const char *hex = "0123456789ABCDEF"; |
62
|
|
|
|
|
|
|
const char *fmt; |
63
|
|
|
|
|
|
|
char seq[UTF8_MAX_BYTES * 3]; |
64
|
3360
|
|
|
|
|
|
char *d = seq; |
65
|
|
|
|
|
|
|
|
66
|
3360
|
100
|
|
|
|
|
if (eof) |
67
|
272
|
|
|
|
|
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s> at end of file"; |
68
|
|
|
|
|
|
|
else |
69
|
3088
|
|
|
|
|
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s>"; |
70
|
|
|
|
|
|
|
|
71
|
7247
|
100
|
|
|
|
|
while (len-- > 0) { |
72
|
3887
|
|
|
|
|
|
const U8 c = *cur++; |
73
|
3887
|
|
|
|
|
|
*d++ = hex[c >> 4]; |
74
|
3887
|
|
|
|
|
|
*d++ = hex[c & 15]; |
75
|
3887
|
100
|
|
|
|
|
if (len) |
76
|
527
|
|
|
|
|
|
*d++ = ' '; |
77
|
|
|
|
|
|
|
} |
78
|
3360
|
|
|
|
|
|
*d = 0; |
79
|
3360
|
|
|
|
|
|
Perl_croak(aTHX_ fmt, seq); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__) |
83
|
|
|
|
|
|
|
PERL_STATIC_NO_RET void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__; |
84
|
|
|
|
|
|
|
#elif defined(__attribute__noreturn__) |
85
|
|
|
|
|
|
|
static void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__; |
86
|
|
|
|
|
|
|
#endif |
87
|
|
|
|
|
|
|
|
88
|
66
|
|
|
|
|
|
static void report_noncharacter(pTHX_ UV usv) { |
89
|
|
|
|
|
|
|
static const char *fmt = "Can't interchange noncharacter code point U+%"UVXf; |
90
|
66
|
|
|
|
|
|
Perl_croak(aTHX_ fmt, usv); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
6090
|
|
|
|
|
|
static STRLEN validate(pTHX_ const U8 *buf, const U8 *end, const int flags, PerlIO* handle) { |
94
|
6090
|
|
|
|
|
|
const bool eof = PerlIO_eof(handle); |
95
|
6090
|
|
|
|
|
|
const U8 *cur = buf; |
96
|
6090
|
|
|
|
|
|
const U8 *end4 = end - UTF8_MAX_BYTES; |
97
|
6090
|
|
|
|
|
|
STRLEN skip = 0; |
98
|
|
|
|
|
|
|
U32 v; |
99
|
|
|
|
|
|
|
|
100
|
17203
|
100
|
|
|
|
|
while (cur < end4) { |
101
|
12761
|
100
|
|
|
|
|
while (cur < end4 && *cur < 0x80) |
|
|
100
|
|
|
|
|
|
102
|
3013
|
|
|
|
|
|
cur++; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
check: |
105
|
14267
|
|
|
|
|
|
switch (xs_utf8_sequence_len[*cur]) { |
106
|
|
|
|
|
|
|
case 0: |
107
|
1033
|
|
|
|
|
|
goto illformed; |
108
|
|
|
|
|
|
|
case 1: |
109
|
11
|
|
|
|
|
|
cur += 1; |
110
|
11
|
|
|
|
|
|
break; |
111
|
|
|
|
|
|
|
case 2: |
112
|
|
|
|
|
|
|
/* 110xxxxx 10xxxxxx */ |
113
|
313
|
50
|
|
|
|
|
if ((cur[1] & 0xC0) != 0x80) |
114
|
0
|
|
|
|
|
|
goto illformed; |
115
|
313
|
|
|
|
|
|
cur += 2; |
116
|
313
|
|
|
|
|
|
break; |
117
|
|
|
|
|
|
|
case 3: |
118
|
25170
|
|
|
|
|
|
v = ((U32)cur[0] << 16) |
119
|
12585
|
|
|
|
|
|
| ((U32)cur[1] << 8) |
120
|
12585
|
|
|
|
|
|
| ((U32)cur[2]); |
121
|
|
|
|
|
|
|
/* 1110xxxx 10xxxxxx 10xxxxxx */ |
122
|
12585
|
50
|
|
|
|
|
if ((v & 0x00F0C0C0) != 0x00E08080 || |
|
|
100
|
|
|
|
|
|
123
|
|
|
|
|
|
|
/* Non-shortest form */ |
124
|
|
|
|
|
|
|
v < 0x00E0A080) |
125
|
|
|
|
|
|
|
goto illformed; |
126
|
|
|
|
|
|
|
/* Surrogates U+D800..U+DFFF */ |
127
|
12583
|
100
|
|
|
|
|
if (!(flags & ALLOW_SURROGATES) && (v & 0x00EFA080) == 0x00EDA080) |
|
|
100
|
|
|
|
|
|
128
|
2048
|
|
|
|
|
|
goto illformed; |
129
|
|
|
|
|
|
|
/* Non-characters U+FDD0..U+FDEF, U+FFFE..U+FFFF */ |
130
|
10535
|
100
|
|
|
|
|
if (!(flags & ALLOW_NONCHARACTERS) && v >= 0x00EFB790 && (v <= 0x00EFB7AF || v >= 0x00EFBFBE)) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
131
|
|
|
|
|
|
|
goto noncharacter; |
132
|
10501
|
|
|
|
|
|
cur += 3; |
133
|
10501
|
|
|
|
|
|
break; |
134
|
|
|
|
|
|
|
case 4: |
135
|
650
|
|
|
|
|
|
v = ((U32)cur[0] << 24) |
136
|
325
|
|
|
|
|
|
| ((U32)cur[1] << 16) |
137
|
325
|
|
|
|
|
|
| ((U32)cur[2] << 8) |
138
|
325
|
|
|
|
|
|
| ((U32)cur[3]); |
139
|
|
|
|
|
|
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
140
|
325
|
50
|
|
|
|
|
if ((v & 0xF8C0C0C0) != 0xF0808080 || |
|
|
100
|
|
|
|
|
|
141
|
|
|
|
|
|
|
/* Non-shortest form */ |
142
|
321
|
100
|
|
|
|
|
v < 0xF0908080 || |
143
|
|
|
|
|
|
|
/* Greater than U+10FFFF */ |
144
|
|
|
|
|
|
|
v > 0xF48FBFBF) |
145
|
|
|
|
|
|
|
goto illformed; |
146
|
|
|
|
|
|
|
/* Non-characters U+nFFFE..U+nFFFF on plane 1-16 */ |
147
|
320
|
100
|
|
|
|
|
if (!(flags & ALLOW_NONCHARACTERS) && (v & 0x000FBFBE) == 0x000FBFBE) |
|
|
100
|
|
|
|
|
|
148
|
32
|
|
|
|
|
|
goto noncharacter; |
149
|
288
|
|
|
|
|
|
cur += 4; |
150
|
288
|
|
|
|
|
|
break; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
7455
|
100
|
|
|
|
|
if (cur < end) { |
155
|
5065
|
100
|
|
|
|
|
if (cur + xs_utf8_sequence_len[*cur] <= end) |
156
|
4519
|
|
|
|
|
|
goto check; |
157
|
546
|
|
|
|
|
|
skip = skip_sequence(cur, end - cur); |
158
|
546
|
100
|
|
|
|
|
if (eof || cur + skip < end) |
|
|
50
|
|
|
|
|
|
159
|
|
|
|
|
|
|
goto illformed; |
160
|
|
|
|
|
|
|
} |
161
|
2664
|
|
|
|
|
|
return cur - buf; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
illformed: |
164
|
3360
|
100
|
|
|
|
|
if (!skip) |
165
|
3088
|
|
|
|
|
|
skip = skip_sequence(cur, end - cur); |
166
|
3360
|
|
|
|
|
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR; |
167
|
3360
|
|
|
|
|
|
report_illformed(aTHX_ cur, skip, eof); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
noncharacter: |
170
|
66
|
100
|
|
|
|
|
if (v < 0xF0808080) |
171
|
34
|
|
|
|
|
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x0F0000) >> 4; |
172
|
|
|
|
|
|
|
else |
173
|
32
|
|
|
|
|
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x3F0000) >> 4 | (v & 0x07000000) >> 6; |
174
|
66
|
|
|
|
|
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR; |
175
|
66
|
|
|
|
|
|
report_noncharacter(aTHX_ v); |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
typedef struct { |
179
|
|
|
|
|
|
|
PerlIOBuf buf; |
180
|
|
|
|
|
|
|
STDCHAR leftovers[UTF8_MAX_BYTES]; |
181
|
|
|
|
|
|
|
size_t leftover_length; |
182
|
|
|
|
|
|
|
utf8_flags flags; |
183
|
|
|
|
|
|
|
} PerlIOUnicode; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
static struct { |
186
|
|
|
|
|
|
|
const char* name; |
187
|
|
|
|
|
|
|
size_t length; |
188
|
|
|
|
|
|
|
utf8_flags value; |
189
|
|
|
|
|
|
|
} map[] = { |
190
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_surrogates"), ALLOW_SURROGATES }, |
191
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_noncharacters"), ALLOW_NONCHARACTERS }, |
192
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_nonshortest"), ALLOW_NONSHORTEST }, |
193
|
|
|
|
|
|
|
{ STR_WITH_LEN("strict"), 0 }, |
194
|
|
|
|
|
|
|
{ STR_WITH_LEN("loose"), ALLOW_SURROGATES | ALLOW_NONCHARACTERS | ALLOW_NONSHORTEST }, |
195
|
|
|
|
|
|
|
}; |
196
|
|
|
|
|
|
|
|
197
|
2114
|
|
|
|
|
|
static utf8_flags lookup_parameter(pTHX_ const char* ptr, size_t len) { |
198
|
|
|
|
|
|
|
unsigned i; |
199
|
2180
|
50
|
|
|
|
|
for (i = 0; i < sizeof map / sizeof *map; ++i) { |
200
|
2180
|
100
|
|
|
|
|
if (map[i].length == len && memcmp(ptr, map[i].name, len) == 0) |
|
|
50
|
|
|
|
|
|
201
|
2114
|
|
|
|
|
|
return map[i].value; |
202
|
|
|
|
|
|
|
} |
203
|
0
|
|
|
|
|
|
Perl_croak(aTHX_ "Unknown argument to :utf8_strict: %*s", (int)len, ptr); |
204
|
|
|
|
|
|
|
} |
205
|
5817
|
|
|
|
|
|
static utf8_flags parse_parameters(pTHX_ SV* param) { |
206
|
|
|
|
|
|
|
STRLEN len; |
207
|
|
|
|
|
|
|
const char *begin, *delim; |
208
|
5817
|
50
|
|
|
|
|
if (!param || !SvOK(param)) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
209
|
3703
|
|
|
|
|
|
return 0; |
210
|
|
|
|
|
|
|
|
211
|
2114
|
50
|
|
|
|
|
begin = SvPV(param, len); |
212
|
2114
|
|
|
|
|
|
delim = strchr(begin, ','); |
213
|
2114
|
50
|
|
|
|
|
if(delim) { |
214
|
0
|
|
|
|
|
|
utf8_flags ret = 0; |
215
|
0
|
|
|
|
|
|
const char* end = begin + len; |
216
|
|
|
|
|
|
|
do { |
217
|
0
|
|
|
|
|
|
ret |= lookup_parameter(aTHX_ begin, delim - begin); |
218
|
0
|
|
|
|
|
|
begin = delim + 1; |
219
|
0
|
|
|
|
|
|
delim = strchr(begin, ','); |
220
|
0
|
0
|
|
|
|
|
} while (delim); |
221
|
0
|
0
|
|
|
|
|
if (begin < end) |
222
|
0
|
|
|
|
|
|
ret |= lookup_parameter(aTHX_ begin, end - begin); |
223
|
0
|
|
|
|
|
|
return ret; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
else { |
226
|
5817
|
|
|
|
|
|
return lookup_parameter(aTHX_ begin, len); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
#define line_buffered(flags) ((flags & (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) == (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) |
231
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
void PerlIOBase_flush_linebuf(pTHX) { |
233
|
|
|
|
|
|
|
#ifdef dVAR |
234
|
|
|
|
|
|
|
dVAR; |
235
|
|
|
|
|
|
|
#endif |
236
|
0
|
|
|
|
|
|
PerlIOl **table = &PL_perlio; |
237
|
|
|
|
|
|
|
PerlIOl *f; |
238
|
0
|
0
|
|
|
|
|
while ((f = *table)) { |
239
|
|
|
|
|
|
|
int i; |
240
|
0
|
|
|
|
|
|
table = (PerlIOl **) (f++); |
241
|
0
|
0
|
|
|
|
|
for (i = 1; i < 64; i++) { |
242
|
0
|
0
|
|
|
|
|
if (f->next && line_buffered(PerlIOBase(&(f->next))->flags)) |
|
|
0
|
|
|
|
|
|
243
|
0
|
|
|
|
|
|
PerlIO_flush(&(f->next)); |
244
|
0
|
|
|
|
|
|
f++; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
0
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
5817
|
|
|
|
|
|
static IV PerlIOUnicode_pushed(pTHX_ PerlIO* f, const char* mode, SV* arg, PerlIO_funcs* tab) { |
250
|
5817
|
|
|
|
|
|
utf8_flags flags = parse_parameters(aTHX_ arg); |
251
|
5817
|
50
|
|
|
|
|
if (PerlIOBuf_pushed(aTHX_ f, mode, arg, tab) == 0) { |
252
|
5817
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
253
|
5817
|
|
|
|
|
|
PerlIOSelf(f, PerlIOUnicode)->flags = flags; |
254
|
5817
|
|
|
|
|
|
return 0; |
255
|
|
|
|
|
|
|
} |
256
|
0
|
|
|
|
|
|
return -1; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
8478
|
|
|
|
|
|
static IV PerlIOUnicode_fill(pTHX_ PerlIO* f) { |
260
|
8478
|
|
|
|
|
|
PerlIOUnicode * const u = PerlIOSelf(f, PerlIOUnicode); |
261
|
8478
|
|
|
|
|
|
PerlIOBuf * const b = &u->buf; |
262
|
8478
|
|
|
|
|
|
PerlIO *n = PerlIONext(f); |
263
|
|
|
|
|
|
|
SSize_t avail; |
264
|
8478
|
|
|
|
|
|
Size_t read_bytes = 0; |
265
|
|
|
|
|
|
|
STDCHAR *end; |
266
|
|
|
|
|
|
|
SSize_t fit; |
267
|
|
|
|
|
|
|
|
268
|
8478
|
50
|
|
|
|
|
if (PerlIO_flush(f) != 0) |
269
|
0
|
|
|
|
|
|
return -1; |
270
|
8478
|
50
|
|
|
|
|
if (PerlIOBase(f)->flags & PERLIO_F_TTY) |
271
|
0
|
|
|
|
|
|
PerlIOBase_flush_linebuf(aTHX); |
272
|
|
|
|
|
|
|
|
273
|
8478
|
50
|
|
|
|
|
if (!b->buf) |
274
|
0
|
|
|
|
|
|
PerlIO_get_base(f); |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
assert(b->buf); |
277
|
|
|
|
|
|
|
|
278
|
8478
|
100
|
|
|
|
|
if (u->leftover_length) { |
279
|
274
|
|
|
|
|
|
Copy(u->leftovers, b->buf, u->leftover_length, STDCHAR); |
280
|
274
|
|
|
|
|
|
b->end = b->buf + u->leftover_length; |
281
|
274
|
|
|
|
|
|
read_bytes = u->leftover_length; |
282
|
274
|
|
|
|
|
|
u->leftover_length = 0; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
else { |
285
|
8204
|
|
|
|
|
|
b->ptr = b->end = b->buf; |
286
|
|
|
|
|
|
|
} |
287
|
8478
|
|
|
|
|
|
fit = (SSize_t)b->bufsiz - (b->end - b->buf); |
288
|
|
|
|
|
|
|
|
289
|
8478
|
50
|
|
|
|
|
if (!PerlIOValid(n)) { |
|
|
50
|
|
|
|
|
|
290
|
0
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_EOF; |
291
|
0
|
|
|
|
|
|
return -1; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
8478
|
50
|
|
|
|
|
if (PerlIO_fast_gets(n)) { |
295
|
|
|
|
|
|
|
/* |
296
|
|
|
|
|
|
|
* Layer below is also buffered. We do _NOT_ want to call its |
297
|
|
|
|
|
|
|
* ->Read() because that will loop till it gets what we asked for |
298
|
|
|
|
|
|
|
* which may hang on a pipe etc. Instead take anything it has to |
299
|
|
|
|
|
|
|
* hand, or ask it to fill _once_. |
300
|
|
|
|
|
|
|
*/ |
301
|
8478
|
|
|
|
|
|
avail = PerlIO_get_cnt(n); |
302
|
8478
|
50
|
|
|
|
|
if (avail <= 0) { |
303
|
8478
|
|
|
|
|
|
avail = PerlIO_fill(n); |
304
|
8478
|
100
|
|
|
|
|
if (avail == 0) |
305
|
5818
|
|
|
|
|
|
avail = PerlIO_get_cnt(n); |
306
|
|
|
|
|
|
|
else { |
307
|
2660
|
50
|
|
|
|
|
if (!PerlIO_error(n) && PerlIO_eof(n)) |
|
|
50
|
|
|
|
|
|
308
|
2660
|
|
|
|
|
|
avail = 0; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
} |
311
|
8478
|
100
|
|
|
|
|
if (avail > 0) { |
312
|
5818
|
|
|
|
|
|
STDCHAR *ptr = PerlIO_get_ptr(n); |
313
|
5818
|
|
|
|
|
|
const SSize_t cnt = avail; |
314
|
5818
|
100
|
|
|
|
|
if (avail > fit) |
315
|
1
|
|
|
|
|
|
avail = fit; |
316
|
5818
|
|
|
|
|
|
Copy(ptr, b->end, avail, STDCHAR); |
317
|
5818
|
|
|
|
|
|
PerlIO_set_ptrcnt(n, ptr + avail, cnt - avail); |
318
|
8478
|
|
|
|
|
|
read_bytes += avail; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
else { |
322
|
0
|
|
|
|
|
|
avail = PerlIO_read(n, b->end, fit); |
323
|
0
|
0
|
|
|
|
|
if (avail > 0) |
324
|
0
|
|
|
|
|
|
read_bytes += avail; |
325
|
|
|
|
|
|
|
} |
326
|
8478
|
100
|
|
|
|
|
if (avail <= 0) { |
327
|
2660
|
50
|
|
|
|
|
if (avail < 0 || (read_bytes == 0 && PerlIO_eof(n))) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
328
|
2388
|
50
|
|
|
|
|
PerlIOBase(f)->flags |= (avail == 0) ? PERLIO_F_EOF : PERLIO_F_ERROR; |
329
|
2388
|
|
|
|
|
|
return -1; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
6090
|
|
|
|
|
|
end = b->buf + read_bytes; |
333
|
6090
|
|
|
|
|
|
b->end = b->buf + validate(aTHX_ (const U8 *)b->buf, (const U8 *)end, u->flags, n); |
334
|
2664
|
100
|
|
|
|
|
if (b->end < end) { |
335
|
274
|
|
|
|
|
|
size_t len = b->buf + read_bytes - b->end; |
336
|
274
|
|
|
|
|
|
Copy(b->end, u->leftovers, len, char); |
337
|
274
|
|
|
|
|
|
u->leftover_length = len; |
338
|
|
|
|
|
|
|
} |
339
|
2664
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_RDBUF; |
340
|
|
|
|
|
|
|
|
341
|
2664
|
|
|
|
|
|
return 0; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
PERLIO_FUNCS_DECL(PerlIO_utf8_strict) = { |
345
|
|
|
|
|
|
|
sizeof(PerlIO_funcs), |
346
|
|
|
|
|
|
|
"utf8_strict", |
347
|
|
|
|
|
|
|
sizeof(PerlIOUnicode), |
348
|
|
|
|
|
|
|
PERLIO_K_BUFFERED|PERLIO_K_UTF8, |
349
|
|
|
|
|
|
|
PerlIOUnicode_pushed, |
350
|
|
|
|
|
|
|
PerlIOBuf_popped, |
351
|
|
|
|
|
|
|
PerlIOBuf_open, |
352
|
|
|
|
|
|
|
PerlIOBase_binmode, |
353
|
|
|
|
|
|
|
NULL, |
354
|
|
|
|
|
|
|
PerlIOBase_fileno, |
355
|
|
|
|
|
|
|
PerlIOBuf_dup, |
356
|
|
|
|
|
|
|
PerlIOBuf_read, |
357
|
|
|
|
|
|
|
PerlIOBase_unread, |
358
|
|
|
|
|
|
|
PerlIOBuf_write, |
359
|
|
|
|
|
|
|
PerlIOBuf_seek, |
360
|
|
|
|
|
|
|
PerlIOBuf_tell, |
361
|
|
|
|
|
|
|
PerlIOBuf_close, |
362
|
|
|
|
|
|
|
PerlIOBuf_flush, |
363
|
|
|
|
|
|
|
PerlIOUnicode_fill, |
364
|
|
|
|
|
|
|
PerlIOBase_eof, |
365
|
|
|
|
|
|
|
PerlIOBase_error, |
366
|
|
|
|
|
|
|
PerlIOBase_clearerr, |
367
|
|
|
|
|
|
|
PerlIOBase_setlinebuf, |
368
|
|
|
|
|
|
|
PerlIOBuf_get_base, |
369
|
|
|
|
|
|
|
PerlIOBuf_bufsiz, |
370
|
|
|
|
|
|
|
PerlIOBuf_get_ptr, |
371
|
|
|
|
|
|
|
PerlIOBuf_get_cnt, |
372
|
|
|
|
|
|
|
PerlIOBuf_set_ptrcnt, |
373
|
|
|
|
|
|
|
}; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
MODULE = PerlIO::utf8_strict |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
BOOT: |
380
|
8
|
|
|
|
|
|
PerlIO_define_layer(aTHX_ (PerlIO_funcs*)&PerlIO_utf8_strict); |
381
|
|
|
|
|
|
|
|