File Coverage

third_party/modest/source/myhtml/parser.c
Criterion Covered Total %
statement 82 149 55.0
branch 33 74 44.5
condition n/a
subroutine n/a
pod n/a
total 115 223 51.5


line stmt bran cond sub pod time code
1             /*
2             Copyright (C) 2015-2017 Alexander Borisov
3            
4             This library is free software; you can redistribute it and/or
5             modify it under the terms of the GNU Lesser General Public
6             License as published by the Free Software Foundation; either
7             version 2.1 of the License, or (at your option) any later version.
8            
9             This library is distributed in the hope that it will be useful,
10             but WITHOUT ANY WARRANTY; without even the implied warranty of
11             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12             Lesser General Public License for more details.
13            
14             You should have received a copy of the GNU Lesser General Public
15             License along with this library; if not, write to the Free Software
16             Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17            
18             Author: lex.borisov@gmail.com (Alexander Borisov)
19             */
20              
21             #include "myhtml/parser.h"
22              
23 1685           void myhtml_parser_stream(mythread_id_t thread_id, void* ctx)
24             {
25 1685           mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx;
26            
27 1685 50         if((((myhtml_tree_t*)(qnode->context))->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) {
28 2288 100         while(myhtml_rules_tree_dispatcher(qnode->context, qnode->args)){}
29             }
30 1685           }
31              
32 186           size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
33             {
34 186           mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
35 186           size_t relative_begin = begin - buffer->offset;
36            
37             // if token data length in one buffer then print them all at once
38 186 50         if((relative_begin + length) <= buffer->size) {
39 186 50         if(tree->encoding == MyENCODING_UTF_8)
40 186           myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char);
41             else
42 0           myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
43 0           &buffer->data[relative_begin], length,
44 0           proc_entry->encoding, proc_entry->emit_null_char);
45            
46 186           return str->length;
47             }
48            
49 0           size_t save_position = 0;
50             // if the data are spread across multiple buffers that join them
51 0 0         while(buffer) {
52 0 0         if((relative_begin + length) > buffer->size)
53             {
54 0           size_t relative_end = (buffer->size - relative_begin);
55 0           length -= relative_end;
56            
57 0           size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position);
58            
59 0 0         if(relative_end > 0) {
60 0 0         if(tree->encoding == MyENCODING_UTF_8)
61 0           save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char);
62             else
63 0           save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
64 0           &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset),
65 0           proc_entry->encoding, proc_entry->emit_null_char);
66             }
67            
68 0           relative_begin = 0;
69 0           buffer = buffer->next;
70             }
71             else {
72 0           size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position);
73            
74 0 0         if(length > 0) {
75 0 0         if(tree->encoding == MyENCODING_UTF_8)
76 0           myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char);
77             else
78 0           myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
79 0           &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset),
80 0           proc_entry->encoding, proc_entry->emit_null_char);
81             }
82            
83 0           break;
84             }
85             }
86            
87 0           return str->length;
88             }
89              
90 61           size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
91             {
92 61           mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
93 61           size_t relative_begin = begin - buffer->offset;
94            
95             // if token data length in one buffer then print them all at once
96 61 50         if((relative_begin + length) <= buffer->size) {
97 61 50         if(tree->encoding == MyENCODING_UTF_8)
98 61           myhtml_string_append_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char);
99             else
100 0           myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
101 0           &buffer->data[relative_begin], length,
102 0           proc_entry->encoding, proc_entry->emit_null_char);
103            
104 61           return str->length;
105             }
106            
107 0           size_t save_position = 0;
108             // if the data are spread across multiple buffers that join them
109 0 0         while(buffer) {
110 0 0         if((relative_begin + length) > buffer->size)
111             {
112 0           size_t relative_end = (buffer->size - relative_begin);
113 0           length -= relative_end;
114            
115 0           size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position);
116            
117 0 0         if(relative_end > 0) {
118 0 0         if(tree->encoding == MyENCODING_UTF_8)
119 0           save_position = myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char);
120             else
121 0           save_position = myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
122 0           &buffer->data[(relative_begin + tmp_offset)],
123             (relative_end - tmp_offset),
124 0           proc_entry->encoding, proc_entry->emit_null_char);
125             }
126            
127 0           relative_begin = 0;
128 0           buffer = buffer->next;
129             }
130             else {
131 0           size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position);
132            
133 0 0         if(length > 0) {
134 0 0         if(tree->encoding == MyENCODING_UTF_8)
135 0           myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char);
136             else
137 0           myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
138 0           &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset),
139 0           proc_entry->encoding, proc_entry->emit_null_char);
140             }
141            
142 0           break;
143             }
144             }
145            
146 0           return str->length;
147             }
148              
149 884           size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
150             {
151 884           mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
152 884           size_t relative_begin = begin - buffer->offset;
153            
154             // if token data length in one buffer then print them all at once
155 884 100         if((relative_begin + length) <= buffer->size) {
156 883           myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], length);
157 883           myhtml_data_process_end(proc_entry, str);
158            
159 883           return str->length;
160             }
161            
162             // if the data are spread across multiple buffers that join them
163 2 50         while(buffer) {
164 2 100         if((relative_begin + length) > buffer->size)
165             {
166 1           size_t relative_end = (buffer->size - relative_begin);
167 1           length -= relative_end;
168            
169 1           myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], relative_end);
170            
171 1           relative_begin = 0;
172 1           buffer = buffer->next;
173             }
174             else {
175 1           myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], length);
176 1           break;
177             }
178             }
179            
180 1           myhtml_data_process_end(proc_entry, str);
181            
182 1           return str->length;
183             }
184              
185 1685           void myhtml_parser_worker(mythread_id_t thread_id, void* ctx)
186             {
187 1685           mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx;
188            
189 1685           myhtml_tree_t* tree = qnode->context;
190 1685           myhtml_token_node_t* token = qnode->args;
191            
192             /*
193             * Tree can not be built without tokens
194             *
195             * MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN == 3
196             * MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE == 1
197             *
198             * MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN include MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE
199             *
200             * if set only MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE and check only for MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN
201             * return true
202             * we need check both, 1 and 2
203             */
204 1685 50         if((tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) &&
205 0 0         (tree->parse_flags & 2))
206             {
207 0 0         if(tree->callback_before_token)
208 0           tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
209            
210 0           token->type |= MyHTML_TOKEN_TYPE_DONE;
211            
212 0 0         if(tree->callback_after_token)
213 0           tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
214            
215 0           return;
216             }
217            
218             size_t mchar_node_id;
219             #ifndef MyCORE_BUILD_WITHOUT_THREADS
220 1685 100         if(tree->myhtml->thread_batch)
221 22           mchar_node_id = tree->async_args[(thread_id + tree->myhtml->thread_batch->id_increase)].mchar_node_id;
222             else
223             #endif
224 1663           mchar_node_id = tree->async_args[thread_id].mchar_node_id;
225            
226 1685 50         if(tree->callback_before_token)
227 0           tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
228            
229 1685 100         if(token->tag_id == MyHTML_TAG__TEXT ||
230 993 100         token->tag_id == MyHTML_TAG__COMMENT)
231 753           {
232 753           mycore_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1));
233            
234 753           token->attr_first = NULL;
235 753           token->attr_last = NULL;
236            
237             myhtml_data_process_entry_t proc_entry;
238 753           myhtml_data_process_entry_clean(&proc_entry);
239            
240 753           proc_entry.encoding = tree->encoding;
241            
242 753 100         if(token->type & MyHTML_TOKEN_TYPE_DATA) {
243 692           proc_entry.emit_null_char = true;
244            
245 692           myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
246             }
247 61 50         else if(token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA) {
    50          
248 0           myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
249             }
250             else
251 61           myhtml_parser_token_data_to_string(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
252             }
253 932 100         else if(token->attr_first)
254             {
255 150           mycore_string_clean_all(&token->str);
256            
257 150           myhtml_token_attr_t* attr = token->attr_first;
258             myhtml_data_process_entry_t proc_entry;
259            
260 388 100         while(attr)
261             {
262 238 100         if(attr->raw_key_length) {
263 186           myhtml_data_process_entry_clean(&proc_entry);
264 186           proc_entry.encoding = tree->encoding;
265            
266 186           mycore_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1));
267 186           myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length);
268             }
269             else
270 52           mycore_string_clean_all(&attr->key);
271            
272 238 100         if(attr->raw_value_length) {
273 192           myhtml_data_process_entry_clean(&proc_entry);
274 192           proc_entry.encoding = tree->encoding;
275 192           proc_entry.is_attributes = true;
276            
277 192           mycore_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1));
278 192           myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length);
279             }
280             else
281 46           mycore_string_clean_all(&attr->value);
282            
283 238           attr = attr->next;
284             }
285             }
286             else {
287 782           token->attr_first = NULL;
288 782           token->attr_last = NULL;
289            
290 782           mycore_string_clean_all(&token->str);
291             }
292            
293 1685           token->type |= MyHTML_TOKEN_TYPE_DONE;
294            
295 1685 50         if(tree->callback_after_token)
296 0           tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
297             }
298              
299 0           void myhtml_parser_worker_stream(mythread_id_t thread_id, void* ctx)
300             {
301 0           mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx;
302            
303 0           myhtml_parser_worker(thread_id, qnode);
304 0           myhtml_parser_stream(thread_id, qnode);
305 0           }
306              
307