| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
2
|
|
|
|
|
|
|
#include "perl.h" |
|
3
|
|
|
|
|
|
|
#include "XSUB.h" |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#include "ppport.h" |
|
6
|
|
|
|
|
|
|
#include |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#define YES 1 |
|
9
|
|
|
|
|
|
|
#define NO 0 |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
typedef enum { CSV_NULL, CSV_NUMERIC, CSV_STRING } CSVTYPE; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
struct csvfield { |
|
14
|
|
|
|
|
|
|
char *string; |
|
15
|
|
|
|
|
|
|
CSVTYPE type; |
|
16
|
|
|
|
|
|
|
}; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
typedef struct csvfield CSVFIELD; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
MODULE = Text::CSV::Easy_XS PACKAGE = Text::CSV::Easy_XS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
SV * |
|
26
|
|
|
|
|
|
|
csv_build(...) |
|
27
|
|
|
|
|
|
|
CODE: |
|
28
|
|
|
|
|
|
|
// we will keep track of exactly how long the final string |
|
29
|
|
|
|
|
|
|
// needs to be. |
|
30
|
5
|
|
|
|
|
|
int finallength = 0; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
// turn on the UTF8 flag if we detect any UTF8 strings. |
|
33
|
5
|
|
|
|
|
|
bool isutf8 = NO; |
|
34
|
|
|
|
|
|
|
|
|
35
|
5
|
|
|
|
|
|
CSVFIELD fields[items]; |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
int i; |
|
38
|
18
|
100
|
|
|
|
|
for (i = 0; i < items; i++) { |
|
39
|
13
|
|
|
|
|
|
svtype svt = SvTYPE(ST(i)); |
|
40
|
|
|
|
|
|
|
|
|
41
|
13
|
50
|
|
|
|
|
if (SvROK(ST(i))) croak("not a string"); |
|
42
|
|
|
|
|
|
|
|
|
43
|
13
|
100
|
|
|
|
|
if (SvUTF8(ST(i))) isutf8 = YES; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
// SVt_NULL will be treated as an undef. |
|
46
|
13
|
100
|
|
|
|
|
if (svt == SVt_NULL) { |
|
47
|
1
|
|
|
|
|
|
CSVFIELD field = {NULL,CSV_NULL}; |
|
48
|
1
|
|
|
|
|
|
fields[i] = field; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
else { |
|
51
|
|
|
|
|
|
|
STRLEN length; |
|
52
|
12
|
100
|
|
|
|
|
char *string = SvPV(ST(i), length); |
|
53
|
12
|
50
|
|
|
|
|
if (string == NULL) croak("could not find a string for argument %d", i + 1); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
// if the length is zero, we'll treat it as an empty string. |
|
56
|
12
|
100
|
|
|
|
|
if (length == 0) { |
|
57
|
1
|
|
|
|
|
|
CSVFIELD field = {NULL,CSV_STRING}; |
|
58
|
1
|
|
|
|
|
|
fields[i] = field; |
|
59
|
|
|
|
|
|
|
|
|
60
|
1
|
|
|
|
|
|
finallength += 2; // beginning and trailing quote |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
else { |
|
63
|
11
|
|
|
|
|
|
CSVTYPE csvtype = CSV_NUMERIC; |
|
64
|
|
|
|
|
|
|
char *ptr; |
|
65
|
51
|
100
|
|
|
|
|
for (ptr = string; *ptr != '\0'; ptr++) { |
|
66
|
40
|
100
|
|
|
|
|
if (!isdigit(*ptr)) { |
|
67
|
34
|
|
|
|
|
|
csvtype = CSV_STRING; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
// if we encounter a double quote, we'll need to escape it, so add |
|
71
|
|
|
|
|
|
|
// one to the length to account for it. |
|
72
|
40
|
100
|
|
|
|
|
if (csvtype == CSV_STRING && *ptr == '"') length++; |
|
|
|
100
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
11
|
|
|
|
|
|
CSVFIELD field = {string,csvtype}; |
|
76
|
11
|
|
|
|
|
|
fields[i] = field; |
|
77
|
|
|
|
|
|
|
|
|
78
|
11
|
|
|
|
|
|
finallength += length; |
|
79
|
12
|
100
|
|
|
|
|
if (csvtype == CSV_STRING) finallength += 2; // beginning and trailing quote |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
5
|
|
|
|
|
|
finallength += (items - 1); // commas |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
char *outstring; |
|
87
|
5
|
|
|
|
|
|
Newx(outstring, finallength + 1, char); |
|
88
|
|
|
|
|
|
|
|
|
89
|
5
|
|
|
|
|
|
char *optr = outstring; |
|
90
|
18
|
100
|
|
|
|
|
for (i = 0; i < items; i++) { |
|
91
|
|
|
|
|
|
|
// record separator |
|
92
|
13
|
100
|
|
|
|
|
if (i != 0) { |
|
93
|
8
|
|
|
|
|
|
*optr++ = ','; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
13
|
|
|
|
|
|
CSVFIELD field = fields[i]; |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
// we will quote all strings. |
|
99
|
13
|
100
|
|
|
|
|
if (field.type == CSV_STRING) *optr++ = '"'; |
|
100
|
|
|
|
|
|
|
|
|
101
|
13
|
100
|
|
|
|
|
if (field.string != NULL) { |
|
102
|
|
|
|
|
|
|
char *ptr; |
|
103
|
51
|
100
|
|
|
|
|
for (ptr = field.string; *ptr != '\0'; ptr++) { |
|
104
|
40
|
|
|
|
|
|
*optr++ = *ptr; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
// if we encounter a quote, we need to escape it. |
|
107
|
40
|
100
|
|
|
|
|
if (*ptr == '"') { |
|
108
|
2
|
|
|
|
|
|
*optr++ = '"'; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
// closing quote |
|
114
|
13
|
100
|
|
|
|
|
if (field.type == CSV_STRING) *optr++ = '"'; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
5
|
|
|
|
|
|
*optr = '\0'; |
|
118
|
|
|
|
|
|
|
|
|
119
|
5
|
|
|
|
|
|
SV *retval = newSVpvn(outstring, optr - outstring); |
|
120
|
5
|
|
|
|
|
|
Safefree(outstring); |
|
121
|
|
|
|
|
|
|
|
|
122
|
5
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(retval); |
|
123
|
|
|
|
|
|
|
|
|
124
|
5
|
|
|
|
|
|
RETVAL = retval; |
|
125
|
|
|
|
|
|
|
OUTPUT: |
|
126
|
|
|
|
|
|
|
RETVAL |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
void |
|
129
|
|
|
|
|
|
|
csv_parse(string) |
|
130
|
|
|
|
|
|
|
SV *string |
|
131
|
|
|
|
|
|
|
PPCODE: |
|
132
|
|
|
|
|
|
|
{ |
|
133
|
|
|
|
|
|
|
// do not allow references |
|
134
|
25
|
50
|
|
|
|
|
if (SvROK(string)) croak("not a string"); |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
// get the string and verify we have length > 0 |
|
137
|
|
|
|
|
|
|
STRLEN len; |
|
138
|
25
|
50
|
|
|
|
|
char *str = SvPV(string, len); |
|
139
|
25
|
50
|
|
|
|
|
if (len == 0) XSRETURN(0); |
|
140
|
|
|
|
|
|
|
|
|
141
|
25
|
|
|
|
|
|
int st_pos = 0; // keep track for ST(x) |
|
142
|
25
|
|
|
|
|
|
char *ptr = NULL; // tracks character in string |
|
143
|
25
|
|
|
|
|
|
char *field = NULL; // tracks current field being parsed |
|
144
|
|
|
|
|
|
|
|
|
145
|
25
|
|
|
|
|
|
bool isutf8 = SvUTF8(string) != 0; // SvUTF8 doesn't typecast consistently to bool across various archs |
|
146
|
25
|
|
|
|
|
|
bool quoted = NO; // is the field quoted? |
|
147
|
25
|
|
|
|
|
|
bool requires_unescape = NO; // did we encounter an escaped quote, e.g. some ""quote"" |
|
148
|
|
|
|
|
|
|
|
|
149
|
227
|
100
|
|
|
|
|
for ( ptr = str; *ptr != '\0'; ptr++ ) { |
|
150
|
214
|
100
|
|
|
|
|
if ( field == NULL ) { |
|
151
|
53
|
|
|
|
|
|
field = ptr; |
|
152
|
|
|
|
|
|
|
|
|
153
|
53
|
|
|
|
|
|
quoted = NO; |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
// a quoted string: "one","two","three" |
|
156
|
53
|
100
|
|
|
|
|
if (*ptr == '"') { |
|
157
|
19
|
|
|
|
|
|
quoted = YES; |
|
158
|
19
|
|
|
|
|
|
requires_unescape = NO; |
|
159
|
19
|
|
|
|
|
|
field++; |
|
160
|
19
|
|
|
|
|
|
continue; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
// an undef value: one,,three |
|
163
|
34
|
100
|
|
|
|
|
else if (*ptr == ',') { |
|
164
|
1
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
165
|
1
|
|
|
|
|
|
ST(st_pos++) = &PL_sv_undef; |
|
166
|
1
|
|
|
|
|
|
field = NULL; |
|
167
|
1
|
|
|
|
|
|
continue; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
// an undef at the end with a trailing newline |
|
170
|
33
|
100
|
|
|
|
|
else if ( |
|
171
|
1
|
50
|
|
|
|
|
( *ptr == '\n' && *(ptr+1) == '\0' ) |
|
172
|
32
|
100
|
|
|
|
|
|| ( *ptr == '\r' && *(ptr+1) == '\n' && *(ptr+2) == '\0' ) |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
) { |
|
174
|
|
|
|
|
|
|
// undef is added later |
|
175
|
4
|
|
|
|
|
|
field = NULL; |
|
176
|
4
|
|
|
|
|
|
break; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
// an unquoted string or number: one,2,3 |
|
179
|
|
|
|
|
|
|
else { |
|
180
|
|
|
|
|
|
|
// do nothing |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
190
|
100
|
|
|
|
|
if ( !quoted ) { |
|
185
|
100
|
|
|
|
|
|
switch (*ptr) { |
|
186
|
|
|
|
|
|
|
case ',': |
|
187
|
21
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
188
|
21
|
|
|
|
|
|
ST(st_pos++) = sv_2mortal( newSVpvn( field, ptr - field ) ); |
|
189
|
21
|
|
|
|
|
|
field = NULL; |
|
190
|
21
|
|
|
|
|
|
break; |
|
191
|
|
|
|
|
|
|
case '"': |
|
192
|
2
|
|
|
|
|
|
croak("quote found in middle of the field: %s\n", field); |
|
193
|
|
|
|
|
|
|
break; |
|
194
|
|
|
|
|
|
|
case '\n': { |
|
195
|
|
|
|
|
|
|
// allow an optional trailing newline |
|
196
|
3
|
100
|
|
|
|
|
if (*(ptr+1) == '\0') { |
|
197
|
|
|
|
|
|
|
// handle the case when the provide a CRLF |
|
198
|
2
|
50
|
|
|
|
|
if (ptr > field && *(ptr-1) == '\r') { |
|
|
|
100
|
|
|
|
|
|
|
199
|
1
|
|
|
|
|
|
ptr--; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
// goto is evil, but in this case, use it to exit |
|
203
|
|
|
|
|
|
|
// a nested loop. I prefer a switch here, and I don't |
|
204
|
|
|
|
|
|
|
// want to add additional logic to the for conditional. |
|
205
|
|
|
|
|
|
|
// I feel guilty if that makes you feel any better. |
|
206
|
2
|
|
|
|
|
|
goto outsidefor; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
else { |
|
209
|
96
|
|
|
|
|
|
croak("newline found in unquoted string: %s\n", field); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
break; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
else { |
|
217
|
90
|
100
|
|
|
|
|
if ( *ptr == '"' ) { |
|
218
|
|
|
|
|
|
|
// see if the quote is part of an escaped quote |
|
219
|
26
|
100
|
|
|
|
|
if ( *(ptr + 1) == '"' ) { |
|
220
|
8
|
|
|
|
|
|
requires_unescape = YES; |
|
221
|
8
|
|
|
|
|
|
ptr++; // increment to get past the escaped quote |
|
222
|
8
|
|
|
|
|
|
continue; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
// reached the end of the field |
|
225
|
18
|
100
|
|
|
|
|
else if ( *(ptr + 1) == ',' |
|
226
|
14
|
100
|
|
|
|
|
|| *(ptr + 1) == '\0' |
|
227
|
5
|
100
|
|
|
|
|
|| ( *(ptr + 1) == '\n' && *(ptr + 2) == '\0' ) // trailing newline |
|
|
|
50
|
|
|
|
|
|
|
228
|
3
|
100
|
|
|
|
|
|| ( *(ptr + 1) == '\r' && *(ptr + 2) == '\n' && *(ptr + 3) == '\0' ) // trailing CRLF |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
) { |
|
230
|
17
|
100
|
|
|
|
|
if (!requires_unescape) { |
|
231
|
|
|
|
|
|
|
// no additional processing required. just create a string. |
|
232
|
13
|
|
|
|
|
|
SV *tmp = sv_2mortal( newSVpvn( field, ptr - field ) ); |
|
233
|
13
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(tmp); |
|
234
|
13
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
235
|
13
|
|
|
|
|
|
ST(st_pos++) = tmp; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
else { |
|
238
|
|
|
|
|
|
|
// we need to convert any double quotes to single quotes |
|
239
|
4
|
|
|
|
|
|
int field_len = ptr - field; |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
char *tmp; |
|
242
|
4
|
|
|
|
|
|
Newx(tmp, field_len + 1, char); |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
int i; |
|
245
|
|
|
|
|
|
|
char *fieldptr; |
|
246
|
38
|
100
|
|
|
|
|
for (i = 0, fieldptr = field; fieldptr < ptr; fieldptr++) { |
|
247
|
34
|
|
|
|
|
|
tmp[i++] = *fieldptr; |
|
248
|
34
|
100
|
|
|
|
|
if (*fieldptr == '"') { |
|
249
|
8
|
|
|
|
|
|
fieldptr++; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
} |
|
252
|
4
|
|
|
|
|
|
tmp[i] = '\0'; |
|
253
|
|
|
|
|
|
|
|
|
254
|
4
|
|
|
|
|
|
SV *tmpsv = sv_2mortal( newSVpvn( tmp, i ) ); |
|
255
|
4
|
100
|
|
|
|
|
if (isutf8) SvUTF8_on(tmpsv); |
|
256
|
4
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
257
|
4
|
|
|
|
|
|
ST(st_pos++) = tmpsv; |
|
258
|
|
|
|
|
|
|
|
|
259
|
4
|
|
|
|
|
|
Safefree(tmp); |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
|
|
262
|
17
|
|
|
|
|
|
field = NULL; |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
// allow trailing newline. |
|
265
|
17
|
100
|
|
|
|
|
if (*(ptr+1) == '\n') break; |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
// move the pointer ahead so we don't process the comma |
|
268
|
15
|
100
|
|
|
|
|
if (*(ptr+1) == ',') ptr++; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
else { |
|
271
|
|
|
|
|
|
|
// put the quote back to make it easier to for the user. |
|
272
|
1
|
|
|
|
|
|
croak("invalid field: \"%s\n", field); |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
// No I don't, deal with it! |
|
279
|
|
|
|
|
|
|
// This label should only be used to break out of the switch inside the for |
|
280
|
|
|
|
|
|
|
// loop. |
|
281
|
|
|
|
|
|
|
outsidefor: |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
// if we hit the end of the string, the last field will not have been |
|
284
|
|
|
|
|
|
|
// added if it's a non-quoted string. |
|
285
|
21
|
100
|
|
|
|
|
if (field != NULL && !quoted) { |
|
|
|
100
|
|
|
|
|
|
|
286
|
5
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
287
|
5
|
|
|
|
|
|
ST(st_pos++) = sv_2mortal( newSVpvn( field, ptr - field ) ); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
// if field is not NULL, it means the string never terminated. |
|
290
|
16
|
100
|
|
|
|
|
else if (field != NULL) { |
|
291
|
1
|
|
|
|
|
|
croak("unterminated string: %s\n", str); |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
// if there was a trailing comma, add an undef |
|
294
|
15
|
100
|
|
|
|
|
else if (*(ptr-1) == ',') { |
|
295
|
2
|
50
|
|
|
|
|
EXTEND( SP, st_pos + 1 ); |
|
|
|
50
|
|
|
|
|
|
|
296
|
2
|
|
|
|
|
|
ST(st_pos++) = &PL_sv_undef; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
20
|
|
|
|
|
|
XSRETURN(st_pos); |
|
300
|
|
|
|
|
|
|
} |