File Coverage

deps/libgit2/src/util/utf8.c
Criterion Covered Total %
statement 14 55 25.4
branch 7 38 18.4
condition n/a
subroutine n/a
pod n/a
total 21 93 22.5


line stmt bran cond sub pod time code
1             /*
2             * Copyright (C) the libgit2 contributors. All rights reserved.
3             *
4             * This file is part of libgit2, distributed under the GNU GPL v2 with
5             * a Linking Exception. For full terms see the included COPYING file.
6             */
7              
8             #include "utf8.h"
9              
10             #include "git2_util.h"
11              
12             /*
13             * git_utf8_iterate is taken from the utf8proc project,
14             * http://www.public-software-group.org/utf8proc
15             *
16             * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
17             *
18             * Permission is hereby granted, free of charge, to any person obtaining a
19             * copy of this software and associated documentation files (the ""Software""),
20             * to deal in the Software without restriction, including without limitation
21             * the rights to use, copy, modify, merge, publish, distribute, sublicense,
22             * and/or sell copies of the Software, and to permit persons to whom the
23             * Software is furnished to do so, subject to the following conditions:
24             *
25             * The above copyright notice and this permission notice shall be included in
26             * all copies or substantial portions of the Software.
27             *
28             * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29             * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30             * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31             * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32             * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33             * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34             * DEALINGS IN THE SOFTWARE.
35             */
36              
37             static const uint8_t utf8proc_utf8class[256] = {
38             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53             4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
54             };
55              
56 522           static int utf8_charlen(const uint8_t *str, size_t str_len)
57             {
58             uint8_t length;
59             size_t i;
60              
61 522           length = utf8proc_utf8class[str[0]];
62 522 50         if (!length)
63 0           return -1;
64              
65 522 50         if (str_len > 0 && length > str_len)
    50          
66 0           return -1;
67              
68 522 50         for (i = 1; i < length; i++) {
69 0 0         if ((str[i] & 0xC0) != 0x80)
70 0           return -1;
71             }
72              
73 522           return (int)length;
74             }
75              
76 0           int git_utf8_iterate(uint32_t *out, const char *_str, size_t str_len)
77             {
78 0           const uint8_t *str = (const uint8_t *)_str;
79 0           uint32_t uc = 0;
80             int length;
81              
82 0           *out = 0;
83              
84 0 0         if ((length = utf8_charlen(str, str_len)) < 0)
85 0           return -1;
86              
87 0           switch (length) {
88             case 1:
89 0           uc = str[0];
90 0           break;
91             case 2:
92 0           uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
93 0 0         if (uc < 0x80) uc = -1;
94 0           break;
95             case 3:
96 0           uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
97 0           + (str[2] & 0x3F);
98 0 0         if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
    0          
    0          
    0          
99 0 0         (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
100 0           break;
101             case 4:
102 0           uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
103 0           + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
104 0 0         if (uc < 0x10000 || uc >= 0x110000) uc = -1;
    0          
105 0           break;
106             default:
107 0           return -1;
108             }
109              
110 0 0         if ((uc & 0xFFFF) >= 0xFFFE)
111 0           return -1;
112              
113 0           *out = uc;
114 0           return length;
115             }
116              
117 0           size_t git_utf8_char_length(const char *_str, size_t str_len)
118             {
119 0           const uint8_t *str = (const uint8_t *)_str;
120 0           size_t offset = 0, count = 0;
121              
122 0 0         while (offset < str_len) {
123 0           int length = utf8_charlen(str + offset, str_len - offset);
124              
125 0 0         if (length < 0)
126 0           length = 1;
127              
128 0           offset += length;
129 0           count++;
130             }
131              
132 0           return count;
133             }
134              
135 38           size_t git_utf8_valid_buf_length(const char *_str, size_t str_len)
136             {
137 38           const uint8_t *str = (const uint8_t *)_str;
138 38           size_t offset = 0;
139              
140 560 100         while (offset < str_len) {
141 522           int length = utf8_charlen(str + offset, str_len - offset);
142              
143 522 50         if (length < 0)
144 0           break;
145              
146 522           offset += length;
147             }
148              
149 38           return offset;
150             }