line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* Copyright (C) the libgit2 contributors. All rights reserved. |
3
|
|
|
|
|
|
|
* |
4
|
|
|
|
|
|
|
* This file is part of libgit2, distributed under the GNU GPL v2 with |
5
|
|
|
|
|
|
|
* a Linking Exception. For full terms see the included COPYING file. |
6
|
|
|
|
|
|
|
*/ |
7
|
|
|
|
|
|
|
#include "buf_text.h" |
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
|
|
|
int git_buf_text_puts_escaped( |
10
|
|
|
|
|
|
|
git_buf *buf, |
11
|
|
|
|
|
|
|
const char *string, |
12
|
|
|
|
|
|
|
const char *esc_chars, |
13
|
|
|
|
|
|
|
const char *esc_with) |
14
|
|
|
|
|
|
|
{ |
15
|
|
|
|
|
|
|
const char *scan; |
16
|
3
|
|
|
|
|
|
size_t total = 0, esc_len = strlen(esc_with), count, alloclen; |
17
|
|
|
|
|
|
|
|
18
|
3
|
50
|
|
|
|
|
if (!string) |
19
|
0
|
|
|
|
|
|
return 0; |
20
|
|
|
|
|
|
|
|
21
|
9
|
100
|
|
|
|
|
for (scan = string; *scan; ) { |
22
|
|
|
|
|
|
|
/* count run of non-escaped characters */ |
23
|
6
|
|
|
|
|
|
count = strcspn(scan, esc_chars); |
24
|
6
|
|
|
|
|
|
total += count; |
25
|
6
|
|
|
|
|
|
scan += count; |
26
|
|
|
|
|
|
|
/* count run of escaped characters */ |
27
|
6
|
|
|
|
|
|
count = strspn(scan, esc_chars); |
28
|
6
|
|
|
|
|
|
total += count * (esc_len + 1); |
29
|
6
|
|
|
|
|
|
scan += count; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
3
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1); |
|
|
50
|
|
|
|
|
|
33
|
3
|
50
|
|
|
|
|
if (git_buf_grow_by(buf, alloclen) < 0) |
34
|
0
|
|
|
|
|
|
return -1; |
35
|
|
|
|
|
|
|
|
36
|
9
|
100
|
|
|
|
|
for (scan = string; *scan; ) { |
37
|
6
|
|
|
|
|
|
count = strcspn(scan, esc_chars); |
38
|
|
|
|
|
|
|
|
39
|
6
|
|
|
|
|
|
memmove(buf->ptr + buf->size, scan, count); |
40
|
6
|
|
|
|
|
|
scan += count; |
41
|
6
|
|
|
|
|
|
buf->size += count; |
42
|
|
|
|
|
|
|
|
43
|
9
|
100
|
|
|
|
|
for (count = strspn(scan, esc_chars); count > 0; --count) { |
44
|
|
|
|
|
|
|
/* copy escape sequence */ |
45
|
3
|
|
|
|
|
|
memmove(buf->ptr + buf->size, esc_with, esc_len); |
46
|
3
|
|
|
|
|
|
buf->size += esc_len; |
47
|
|
|
|
|
|
|
/* copy character to be escaped */ |
48
|
3
|
|
|
|
|
|
buf->ptr[buf->size] = *scan; |
49
|
3
|
|
|
|
|
|
buf->size++; |
50
|
3
|
|
|
|
|
|
scan++; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
3
|
|
|
|
|
|
buf->ptr[buf->size] = '\0'; |
55
|
|
|
|
|
|
|
|
56
|
3
|
|
|
|
|
|
return 0; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
32
|
|
|
|
|
|
void git_buf_text_unescape(git_buf *buf) |
60
|
|
|
|
|
|
|
{ |
61
|
32
|
|
|
|
|
|
buf->size = git__unescape(buf->ptr); |
62
|
32
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
3
|
|
|
|
|
|
int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) |
65
|
|
|
|
|
|
|
{ |
66
|
3
|
|
|
|
|
|
const char *scan = src->ptr; |
67
|
3
|
|
|
|
|
|
const char *scan_end = src->ptr + src->size; |
68
|
3
|
|
|
|
|
|
const char *next = memchr(scan, '\r', src->size); |
69
|
|
|
|
|
|
|
size_t new_size; |
70
|
|
|
|
|
|
|
char *out; |
71
|
|
|
|
|
|
|
|
72
|
3
|
50
|
|
|
|
|
assert(tgt != src); |
73
|
|
|
|
|
|
|
|
74
|
3
|
50
|
|
|
|
|
if (!next) |
75
|
0
|
|
|
|
|
|
return git_buf_set(tgt, src->ptr, src->size); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/* reduce reallocs while in the loop */ |
78
|
3
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1); |
|
|
50
|
|
|
|
|
|
79
|
3
|
50
|
|
|
|
|
if (git_buf_grow(tgt, new_size) < 0) |
80
|
0
|
|
|
|
|
|
return -1; |
81
|
|
|
|
|
|
|
|
82
|
3
|
|
|
|
|
|
out = tgt->ptr; |
83
|
3
|
|
|
|
|
|
tgt->size = 0; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
/* Find the next \r and copy whole chunk up to there to tgt */ |
86
|
6
|
100
|
|
|
|
|
for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { |
87
|
3
|
50
|
|
|
|
|
if (next > scan) { |
88
|
3
|
|
|
|
|
|
size_t copylen = (size_t)(next - scan); |
89
|
3
|
|
|
|
|
|
memcpy(out, scan, copylen); |
90
|
3
|
|
|
|
|
|
out += copylen; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
/* Do not drop \r unless it is followed by \n */ |
94
|
3
|
50
|
|
|
|
|
if (next + 1 == scan_end || next[1] != '\n') |
|
|
50
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
*out++ = '\r'; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
/* Copy remaining input into dest */ |
99
|
3
|
50
|
|
|
|
|
if (scan < scan_end) { |
100
|
3
|
|
|
|
|
|
size_t remaining = (size_t)(scan_end - scan); |
101
|
3
|
|
|
|
|
|
memcpy(out, scan, remaining); |
102
|
3
|
|
|
|
|
|
out += remaining; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
3
|
|
|
|
|
|
tgt->size = (size_t)(out - tgt->ptr); |
106
|
3
|
|
|
|
|
|
tgt->ptr[tgt->size] = '\0'; |
107
|
|
|
|
|
|
|
|
108
|
3
|
|
|
|
|
|
return 0; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
3
|
|
|
|
|
|
int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) |
112
|
|
|
|
|
|
|
{ |
113
|
3
|
|
|
|
|
|
const char *start = src->ptr; |
114
|
3
|
|
|
|
|
|
const char *end = start + src->size; |
115
|
3
|
|
|
|
|
|
const char *scan = start; |
116
|
3
|
|
|
|
|
|
const char *next = memchr(scan, '\n', src->size); |
117
|
|
|
|
|
|
|
size_t alloclen; |
118
|
|
|
|
|
|
|
|
119
|
3
|
50
|
|
|
|
|
assert(tgt != src); |
120
|
|
|
|
|
|
|
|
121
|
3
|
50
|
|
|
|
|
if (!next) |
122
|
0
|
|
|
|
|
|
return git_buf_set(tgt, src->ptr, src->size); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
/* attempt to reduce reallocs while in the loop */ |
125
|
3
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); |
|
|
50
|
|
|
|
|
|
126
|
3
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); |
|
|
50
|
|
|
|
|
|
127
|
3
|
50
|
|
|
|
|
if (git_buf_grow(tgt, alloclen) < 0) |
128
|
0
|
|
|
|
|
|
return -1; |
129
|
3
|
|
|
|
|
|
tgt->size = 0; |
130
|
|
|
|
|
|
|
|
131
|
6
|
100
|
|
|
|
|
for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { |
132
|
3
|
|
|
|
|
|
size_t copylen = next - scan; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
/* if we find mixed line endings, carry on */ |
135
|
3
|
50
|
|
|
|
|
if (copylen && next[-1] == '\r') |
|
|
50
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
copylen--; |
137
|
|
|
|
|
|
|
|
138
|
3
|
50
|
|
|
|
|
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); |
|
|
50
|
|
|
|
|
|
139
|
3
|
50
|
|
|
|
|
if (git_buf_grow_by(tgt, alloclen) < 0) |
140
|
0
|
|
|
|
|
|
return -1; |
141
|
|
|
|
|
|
|
|
142
|
3
|
50
|
|
|
|
|
if (copylen) { |
143
|
3
|
|
|
|
|
|
memcpy(tgt->ptr + tgt->size, scan, copylen); |
144
|
3
|
|
|
|
|
|
tgt->size += copylen; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
3
|
|
|
|
|
|
tgt->ptr[tgt->size++] = '\r'; |
148
|
3
|
|
|
|
|
|
tgt->ptr[tgt->size++] = '\n'; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
3
|
|
|
|
|
|
tgt->ptr[tgt->size] = '\0'; |
152
|
3
|
|
|
|
|
|
return git_buf_put(tgt, scan, end - scan); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
36
|
|
|
|
|
|
int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) |
156
|
|
|
|
|
|
|
{ |
157
|
|
|
|
|
|
|
size_t i; |
158
|
|
|
|
|
|
|
const char *str, *pfx; |
159
|
|
|
|
|
|
|
|
160
|
36
|
|
|
|
|
|
git_buf_clear(buf); |
161
|
|
|
|
|
|
|
|
162
|
36
|
50
|
|
|
|
|
if (!strings || !strings->count) |
|
|
50
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
return 0; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
/* initialize common prefix to first string */ |
166
|
36
|
50
|
|
|
|
|
if (git_buf_sets(buf, strings->strings[0]) < 0) |
167
|
0
|
|
|
|
|
|
return -1; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
/* go through the rest of the strings, truncating to shared prefix */ |
170
|
36
|
100
|
|
|
|
|
for (i = 1; i < strings->count; ++i) { |
171
|
|
|
|
|
|
|
|
172
|
3
|
50
|
|
|
|
|
for (str = strings->strings[i], pfx = buf->ptr; |
173
|
3
|
50
|
|
|
|
|
*str && *str == *pfx; str++, pfx++) |
174
|
|
|
|
|
|
|
/* scanning */; |
175
|
|
|
|
|
|
|
|
176
|
3
|
|
|
|
|
|
git_buf_truncate(buf, pfx - buf->ptr); |
177
|
|
|
|
|
|
|
|
178
|
3
|
50
|
|
|
|
|
if (!buf->size) |
179
|
3
|
|
|
|
|
|
break; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
36
|
|
|
|
|
|
return 0; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
10
|
|
|
|
|
|
bool git_buf_text_is_binary(const git_buf *buf) |
186
|
|
|
|
|
|
|
{ |
187
|
10
|
|
|
|
|
|
const char *scan = buf->ptr, *end = buf->ptr + buf->size; |
188
|
|
|
|
|
|
|
git_bom_t bom; |
189
|
10
|
|
|
|
|
|
int printable = 0, nonprintable = 0; |
190
|
|
|
|
|
|
|
|
191
|
10
|
|
|
|
|
|
scan += git_buf_text_detect_bom(&bom, buf); |
192
|
|
|
|
|
|
|
|
193
|
10
|
50
|
|
|
|
|
if (bom > GIT_BOM_UTF8) |
194
|
0
|
|
|
|
|
|
return 1; |
195
|
|
|
|
|
|
|
|
196
|
289
|
100
|
|
|
|
|
while (scan < end) { |
197
|
279
|
|
|
|
|
|
unsigned char c = *scan++; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
/* Printable characters are those above SPACE (0x1F) excluding DEL, |
200
|
|
|
|
|
|
|
* and including BS, ESC and FF. |
201
|
|
|
|
|
|
|
*/ |
202
|
279
|
100
|
|
|
|
|
if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
203
|
276
|
|
|
|
|
|
printable++; |
204
|
3
|
50
|
|
|
|
|
else if (c == '\0') |
205
|
0
|
|
|
|
|
|
return true; |
206
|
3
|
50
|
|
|
|
|
else if (!git__isspace(c)) |
207
|
0
|
|
|
|
|
|
nonprintable++; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
10
|
|
|
|
|
|
return ((printable >> 7) < nonprintable); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
60
|
|
|
|
|
|
bool git_buf_text_contains_nul(const git_buf *buf) |
214
|
|
|
|
|
|
|
{ |
215
|
60
|
|
|
|
|
|
return (memchr(buf->ptr, '\0', buf->size) != NULL); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
2755
|
|
|
|
|
|
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf) |
219
|
|
|
|
|
|
|
{ |
220
|
|
|
|
|
|
|
const char *ptr; |
221
|
|
|
|
|
|
|
size_t len; |
222
|
|
|
|
|
|
|
|
223
|
2755
|
|
|
|
|
|
*bom = GIT_BOM_NONE; |
224
|
|
|
|
|
|
|
/* need at least 2 bytes to look for any BOM */ |
225
|
2755
|
100
|
|
|
|
|
if (buf->size < 2) |
226
|
2494
|
|
|
|
|
|
return 0; |
227
|
|
|
|
|
|
|
|
228
|
261
|
|
|
|
|
|
ptr = buf->ptr; |
229
|
261
|
|
|
|
|
|
len = buf->size; |
230
|
|
|
|
|
|
|
|
231
|
261
|
|
|
|
|
|
switch (*ptr++) { |
232
|
|
|
|
|
|
|
case 0: |
233
|
0
|
0
|
|
|
|
|
if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
*bom = GIT_BOM_UTF32_BE; |
235
|
0
|
|
|
|
|
|
return 4; |
236
|
|
|
|
|
|
|
} |
237
|
0
|
|
|
|
|
|
break; |
238
|
|
|
|
|
|
|
case '\xEF': |
239
|
0
|
0
|
|
|
|
|
if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
*bom = GIT_BOM_UTF8; |
241
|
0
|
|
|
|
|
|
return 3; |
242
|
|
|
|
|
|
|
} |
243
|
0
|
|
|
|
|
|
break; |
244
|
|
|
|
|
|
|
case '\xFE': |
245
|
0
|
0
|
|
|
|
|
if (*ptr == '\xFF') { |
246
|
0
|
|
|
|
|
|
*bom = GIT_BOM_UTF16_BE; |
247
|
0
|
|
|
|
|
|
return 2; |
248
|
|
|
|
|
|
|
} |
249
|
0
|
|
|
|
|
|
break; |
250
|
|
|
|
|
|
|
case '\xFF': |
251
|
0
|
0
|
|
|
|
|
if (*ptr != '\xFE') |
252
|
0
|
|
|
|
|
|
break; |
253
|
0
|
0
|
|
|
|
|
if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
254
|
0
|
|
|
|
|
|
*bom = GIT_BOM_UTF32_LE; |
255
|
0
|
|
|
|
|
|
return 4; |
256
|
|
|
|
|
|
|
} else { |
257
|
0
|
|
|
|
|
|
*bom = GIT_BOM_UTF16_LE; |
258
|
0
|
|
|
|
|
|
return 2; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
break; |
261
|
|
|
|
|
|
|
default: |
262
|
261
|
|
|
|
|
|
break; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
261
|
|
|
|
|
|
return 0; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
63
|
|
|
|
|
|
bool git_buf_text_gather_stats( |
269
|
|
|
|
|
|
|
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) |
270
|
|
|
|
|
|
|
{ |
271
|
63
|
|
|
|
|
|
const char *scan = buf->ptr, *end = buf->ptr + buf->size; |
272
|
|
|
|
|
|
|
int skip; |
273
|
|
|
|
|
|
|
|
274
|
63
|
|
|
|
|
|
memset(stats, 0, sizeof(*stats)); |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
/* BOM detection */ |
277
|
63
|
|
|
|
|
|
skip = git_buf_text_detect_bom(&stats->bom, buf); |
278
|
63
|
50
|
|
|
|
|
if (skip_bom) |
279
|
0
|
|
|
|
|
|
scan += skip; |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
/* Ignore EOF character */ |
282
|
63
|
50
|
|
|
|
|
if (buf->size > 0 && end[-1] == '\032') |
|
|
50
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
end--; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
/* Counting loop */ |
286
|
2003
|
100
|
|
|
|
|
while (scan < end) { |
287
|
1940
|
|
|
|
|
|
unsigned char c = *scan++; |
288
|
|
|
|
|
|
|
|
289
|
1940
|
100
|
|
|
|
|
if (c > 0x1F && c != 0x7F) |
|
|
50
|
|
|
|
|
|
290
|
1889
|
|
|
|
|
|
stats->printable++; |
291
|
51
|
|
|
|
|
|
else switch (c) { |
292
|
|
|
|
|
|
|
case '\0': |
293
|
0
|
|
|
|
|
|
stats->nul++; |
294
|
0
|
|
|
|
|
|
stats->nonprintable++; |
295
|
0
|
|
|
|
|
|
break; |
296
|
|
|
|
|
|
|
case '\n': |
297
|
48
|
|
|
|
|
|
stats->lf++; |
298
|
48
|
|
|
|
|
|
break; |
299
|
|
|
|
|
|
|
case '\r': |
300
|
3
|
|
|
|
|
|
stats->cr++; |
301
|
3
|
50
|
|
|
|
|
if (scan < end && *scan == '\n') |
|
|
50
|
|
|
|
|
|
302
|
3
|
|
|
|
|
|
stats->crlf++; |
303
|
3
|
|
|
|
|
|
break; |
304
|
|
|
|
|
|
|
case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ |
305
|
0
|
|
|
|
|
|
stats->printable++; |
306
|
0
|
|
|
|
|
|
break; |
307
|
|
|
|
|
|
|
default: |
308
|
0
|
|
|
|
|
|
stats->nonprintable++; |
309
|
0
|
|
|
|
|
|
break; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
/* Treat files with a bare CR as binary */ |
314
|
63
|
50
|
|
|
|
|
return (stats->cr != stats->crlf || stats->nul > 0 || |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
315
|
63
|
|
|
|
|
|
((stats->printable >> 7) < stats->nonprintable)); |
316
|
|
|
|
|
|
|
} |