line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
2
|
|
|
|
|
|
|
#include "perl.h" |
3
|
|
|
|
|
|
|
#include "XSUB.h" |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#include "ppport.h" |
6
|
|
|
|
|
|
|
#include |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#define YES 1 |
9
|
|
|
|
|
|
|
#define NO 0 |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
typedef enum { CSV_NULL, CSV_NUMERIC, CSV_STRING } CSVTYPE; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
struct csvfield { |
14
|
|
|
|
|
|
|
char *string; |
15
|
|
|
|
|
|
|
CSVTYPE type; |
16
|
|
|
|
|
|
|
}; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
typedef struct csvfield CSVFIELD; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
MODULE = Text::CSV::Easy_XS PACKAGE = Text::CSV::Easy_XS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
SV * |
26
|
|
|
|
|
|
|
csv_build(...) |
27
|
|
|
|
|
|
|
CODE: |
28
|
|
|
|
|
|
|
// we will keep track of exactly how long the final string |
29
|
|
|
|
|
|
|
// needs to be. |
30
|
5
|
|
|
|
|
|
int finallength = 0; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
// turn on the UTF8 flag if we detect any UTF8 strings. |
33
|
5
|
|
|
|
|
|
bool isutf8 = NO; |
34
|
|
|
|
|
|
|
|
35
|
5
|
|
|
|
|
|
CSVFIELD fields[items]; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
int i; |
38
|
18
|
100
|
|
|
|
|
for (i = 0; i < items; i++) { |
39
|
13
|
|
|
|
|
|
svtype svt = SvTYPE(ST(i)); |
40
|
|
|
|
|
|
|
|
41
|
13
|
50
|
|
|
|
|
if (SvROK(ST(i))) croak("not a string"); |
42
|
|
|
|
|
|
|
|
43
|
13
|
100
|
|
|
|
|
if (SvUTF8(ST(i))) isutf8 = YES; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
// SVt_NULL will be treated as an undef. |
46
|
13
|
100
|
|
|
|
|
if (svt == SVt_NULL) { |
47
|
1
|
|
|
|
|
|
CSVFIELD field = {NULL,CSV_NULL}; |
48
|
1
|
|
|
|
|
|
fields[i] = field; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
else { |
51
|
|
|
|
|
|
|
STRLEN length; |
52
|
12
|
100
|
|
|
|
|
char *string = SvPV(ST(i), length); |
53
|
12
|
50
|
|
|
|
|
if (string == NULL) croak("could not find a string for argument %d", i + 1); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
// if the length is zero, we'll treat it as an empty string. |
56
|
12
|
100
|
|
|
|
|
if (length == 0) { |
57
|
1
|
|
|
|
|
|
CSVFIELD field = {NULL,CSV_STRING}; |
58
|
1
|
|
|
|
|
|
fields[i] = field; |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
|
|
|
finallength += 2; // beginning and trailing quote |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
else { |
63
|
11
|
|
|
|
|
|
CSVTYPE csvtype = CSV_NUMERIC; |
64
|
|
|
|
|
|
|
char *ptr; |
65
|
51
|
100
|
|
|
|
|
for (ptr = string; *ptr != '\0'; ptr++) { |
66
|
40
|
100
|
|
|
|
|
if (!isdigit(*ptr)) { |
67
|
34
|
|
|
|
|
|
csvtype = CSV_STRING; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
// if we encounter a double quote, we'll need to escape it, so add |
71
|
|
|
|
|
|
|
// one to the length to account for it. |
72
|
40
|
100
|
|
|
|
|
if (csvtype == CSV_STRING && *ptr == '"') length++; |
|
|
100
|
|
|
|
|
|
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
11
|
|
|
|
|
|
CSVFIELD field = {string,csvtype}; |
76
|
11
|
|
|
|
|
|
fields[i] = field; |
77
|
|
|
|
|
|
|
|
78
|
11
|
|
|
|
|
|
finallength += length; |
79
|
12
|
100
|
|
|
|
|
if (csvtype == CSV_STRING) finallength += 2; // beginning and trailing quote |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
5
|
|
|
|
|
|
finallength += (items - 1); // commas |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
char *outstring; |
87
|
5
|
|
|
|
|
|
Newx(outstring, finallength + 1, char); |
88
|
|
|
|
|
|
|
|
89
|
5
|
|
|
|
|
|
char *optr = outstring; |
90
|
18
|
100
|
|
|
|
|
for (i = 0; i < items; i++) { |
91
|
|
|
|
|
|
|
// record separator |
92
|
13
|
100
|
|
|
|
|
if (i != 0) { |
93
|
8
|
|
|
|
|
|
*optr++ = ','; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
13
|
|
|
|
|
|
CSVFIELD field = fields[i]; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
// we will quote all strings. |
99
|
13
|
100
|
|
|
|
|
if (field.type == CSV_STRING) *optr++ = '"'; |
100
|
|
|
|
|
|
|
|
101
|
13
|
100
|
|
|
|
|
if (field.string != NULL) { |
102
|
|
|
|
|
|
|
char *ptr; |
103
|
51
|
100
|
|
|
|
|
for (ptr = field.string; *ptr != '\0'; ptr++) { |
104
|
40
|
|
|
|
|
|
*optr++ = *ptr; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
// if we encounter a quote, we need to escape it. |
107
|
40
|
100
|
|
|
|
|
if (*ptr == '"') { |
108
|
2
|
|
|
|
|
|
*optr++ = '"'; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
// closing quote |
114
|
13
|
100
|
|
|
|
|
if (field.type == CSV_STRING) *optr++ = '"'; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
5
|
|
|
|
|
|
*optr = '\0'; |
118
|
|
|
|
|
|
|
|
119
|
5
|
|
|
|
|
|
SV *retval = newSVpvn(outstring, optr - outstring); |
120
|
5
|
|
|
|
|
|
Safefree(outstring); |
121
|
|
|
|
|
|
|
|
122
|
5
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(retval); |
123
|
|
|
|
|
|
|
|
124
|
5
|
|
|
|
|
|
RETVAL = retval; |
125
|
|
|
|
|
|
|
OUTPUT: |
126
|
|
|
|
|
|
|
RETVAL |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
void |
129
|
|
|
|
|
|
|
csv_parse(string) |
130
|
|
|
|
|
|
|
SV *string |
131
|
|
|
|
|
|
|
PPCODE: |
132
|
|
|
|
|
|
|
{ |
133
|
|
|
|
|
|
|
// do not allow references |
134
|
25
|
50
|
|
|
|
|
if (SvROK(string)) croak("not a string"); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
// get the string and verify we have length > 0 |
137
|
|
|
|
|
|
|
STRLEN len; |
138
|
25
|
50
|
|
|
|
|
char *str = SvPV(string, len); |
139
|
25
|
50
|
|
|
|
|
if (len == 0) XSRETURN(0); |
140
|
|
|
|
|
|
|
|
141
|
25
|
|
|
|
|
|
int st_pos = 0; // keep track for ST(x) |
142
|
25
|
|
|
|
|
|
char *ptr = NULL; // tracks character in string |
143
|
25
|
|
|
|
|
|
char *field = NULL; // tracks current field being parsed |
144
|
|
|
|
|
|
|
|
145
|
25
|
|
|
|
|
|
bool isutf8 = SvUTF8(string) != 0; // SvUTF8 doesn't typecast consistently to bool across various archs |
146
|
25
|
|
|
|
|
|
bool quoted = NO; // is the field quoted? |
147
|
25
|
|
|
|
|
|
bool requires_unescape = NO; // did we encounter an escaped quote, e.g. some ""quote"" |
148
|
|
|
|
|
|
|
|
149
|
227
|
100
|
|
|
|
|
for ( ptr = str; *ptr != '\0'; ptr++ ) { |
150
|
214
|
100
|
|
|
|
|
if ( field == NULL ) { |
151
|
53
|
|
|
|
|
|
field = ptr; |
152
|
|
|
|
|
|
|
|
153
|
53
|
|
|
|
|
|
quoted = NO; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
// a quoted string: "one","two","three" |
156
|
53
|
100
|
|
|
|
|
if (*ptr == '"') { |
157
|
19
|
|
|
|
|
|
quoted = YES; |
158
|
19
|
|
|
|
|
|
requires_unescape = NO; |
159
|
19
|
|
|
|
|
|
field++; |
160
|
19
|
|
|
|
|
|
continue; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
// an undef value: one,,three |
163
|
34
|
100
|
|
|
|
|
else if (*ptr == ',') { |
164
|
1
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
165
|
1
|
|
|
|
|
|
ST(st_pos++) = &PL_sv_undef; |
166
|
1
|
|
|
|
|
|
field = NULL; |
167
|
1
|
|
|
|
|
|
continue; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
// an undef at the end with a trailing newline |
170
|
33
|
100
|
|
|
|
|
else if ( |
171
|
1
|
50
|
|
|
|
|
( *ptr == '\n' && *(ptr+1) == '\0' ) |
172
|
32
|
100
|
|
|
|
|
|| ( *ptr == '\r' && *(ptr+1) == '\n' && *(ptr+2) == '\0' ) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
173
|
|
|
|
|
|
|
) { |
174
|
|
|
|
|
|
|
// undef is added later |
175
|
4
|
|
|
|
|
|
field = NULL; |
176
|
4
|
|
|
|
|
|
break; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
// an unquoted string or number: one,2,3 |
179
|
|
|
|
|
|
|
else { |
180
|
|
|
|
|
|
|
// do nothing |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
190
|
100
|
|
|
|
|
if ( !quoted ) { |
185
|
100
|
|
|
|
|
|
switch (*ptr) { |
186
|
|
|
|
|
|
|
case ',': |
187
|
21
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
188
|
21
|
|
|
|
|
|
ST(st_pos++) = sv_2mortal( newSVpvn( field, ptr - field ) ); |
189
|
21
|
|
|
|
|
|
field = NULL; |
190
|
21
|
|
|
|
|
|
break; |
191
|
|
|
|
|
|
|
case '"': |
192
|
2
|
|
|
|
|
|
croak("quote found in middle of the field: %s\n", field); |
193
|
|
|
|
|
|
|
break; |
194
|
|
|
|
|
|
|
case '\n': { |
195
|
|
|
|
|
|
|
// allow an optional trailing newline |
196
|
3
|
100
|
|
|
|
|
if (*(ptr+1) == '\0') { |
197
|
|
|
|
|
|
|
// handle the case when the provide a CRLF |
198
|
2
|
50
|
|
|
|
|
if (ptr > field && *(ptr-1) == '\r') { |
|
|
100
|
|
|
|
|
|
199
|
1
|
|
|
|
|
|
ptr--; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
// goto is evil, but in this case, use it to exit |
203
|
|
|
|
|
|
|
// a nested loop. I prefer a switch here, and I don't |
204
|
|
|
|
|
|
|
// want to add additional logic to the for conditional. |
205
|
|
|
|
|
|
|
// I feel guilty if that makes you feel any better. |
206
|
2
|
|
|
|
|
|
goto outsidefor; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
else { |
209
|
96
|
|
|
|
|
|
croak("newline found in unquoted string: %s\n", field); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
break; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
else { |
217
|
90
|
100
|
|
|
|
|
if ( *ptr == '"' ) { |
218
|
|
|
|
|
|
|
// see if the quote is part of an escaped quote |
219
|
26
|
100
|
|
|
|
|
if ( *(ptr + 1) == '"' ) { |
220
|
8
|
|
|
|
|
|
requires_unescape = YES; |
221
|
8
|
|
|
|
|
|
ptr++; // increment to get past the escaped quote |
222
|
8
|
|
|
|
|
|
continue; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
// reached the end of the field |
225
|
18
|
100
|
|
|
|
|
else if ( *(ptr + 1) == ',' |
226
|
14
|
100
|
|
|
|
|
|| *(ptr + 1) == '\0' |
227
|
5
|
100
|
|
|
|
|
|| ( *(ptr + 1) == '\n' && *(ptr + 2) == '\0' ) // trailing newline |
|
|
50
|
|
|
|
|
|
228
|
3
|
100
|
|
|
|
|
|| ( *(ptr + 1) == '\r' && *(ptr + 2) == '\n' && *(ptr + 3) == '\0' ) // trailing CRLF |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
229
|
|
|
|
|
|
|
) { |
230
|
17
|
100
|
|
|
|
|
if (!requires_unescape) { |
231
|
|
|
|
|
|
|
// no additional processing required. just create a string. |
232
|
13
|
|
|
|
|
|
SV *tmp = sv_2mortal( newSVpvn( field, ptr - field ) ); |
233
|
13
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(tmp); |
234
|
13
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
235
|
13
|
|
|
|
|
|
ST(st_pos++) = tmp; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
else { |
238
|
|
|
|
|
|
|
// we need to convert any double quotes to single quotes |
239
|
4
|
|
|
|
|
|
int field_len = ptr - field; |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
char *tmp; |
242
|
4
|
|
|
|
|
|
Newx(tmp, field_len + 1, char); |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
int i; |
245
|
|
|
|
|
|
|
char *fieldptr; |
246
|
38
|
100
|
|
|
|
|
for (i = 0, fieldptr = field; fieldptr < ptr; fieldptr++) { |
247
|
34
|
|
|
|
|
|
tmp[i++] = *fieldptr; |
248
|
34
|
100
|
|
|
|
|
if (*fieldptr == '"') { |
249
|
8
|
|
|
|
|
|
fieldptr++; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
} |
252
|
4
|
|
|
|
|
|
tmp[i] = '\0'; |
253
|
|
|
|
|
|
|
|
254
|
4
|
|
|
|
|
|
SV *tmpsv = sv_2mortal( newSVpvn( tmp, i ) ); |
255
|
4
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(tmpsv); |
256
|
4
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
257
|
4
|
|
|
|
|
|
ST(st_pos++) = tmpsv; |
258
|
|
|
|
|
|
|
|
259
|
4
|
|
|
|
|
|
Safefree(tmp); |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
17
|
|
|
|
|
|
field = NULL; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
// allow trailing newline. |
265
|
17
|
100
|
|
|
|
|
if (*(ptr+1) == '\n') break; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
// move the pointer ahead so we don't process the comma |
268
|
15
|
100
|
|
|
|
|
if (*(ptr+1) == ',') ptr++; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
else { |
271
|
|
|
|
|
|
|
// put the quote back to make it easier to for the user. |
272
|
1
|
|
|
|
|
|
croak("invalid field: \"%s\n", field); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
// No I don't, deal with it! |
279
|
|
|
|
|
|
|
// This label should only be used to break out of the switch inside the for |
280
|
|
|
|
|
|
|
// loop. |
281
|
|
|
|
|
|
|
outsidefor: |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
// if we hit the end of the string, the last field will not have been |
284
|
|
|
|
|
|
|
// added if it's a non-quoted string. |
285
|
21
|
100
|
|
|
|
|
if (field != NULL && !quoted) { |
|
|
100
|
|
|
|
|
|
286
|
5
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
287
|
5
|
|
|
|
|
|
ST(st_pos++) = sv_2mortal( newSVpvn( field, ptr - field ) ); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
// if field is not NULL, it means the string never terminated. |
290
|
16
|
100
|
|
|
|
|
else if (field != NULL) { |
291
|
1
|
|
|
|
|
|
croak("unterminated string: %s\n", str); |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
// if there was a trailing comma, add an undef |
294
|
15
|
100
|
|
|
|
|
else if (*(ptr-1) == ',') { |
295
|
2
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
50
|
|
|
|
|
|
296
|
2
|
|
|
|
|
|
ST(st_pos++) = &PL_sv_undef; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
20
|
|
|
|
|
|
XSRETURN(st_pos); |
300
|
|
|
|
|
|
|
} |