File Coverage

third_party/modest/source/myhtml/parser.c

Criterion	Covered	Total	%
statement	82	149	55.0
branch	33	74	44.5
condition			n/a
subroutine			n/a
pod			n/a
total	115	223	51.5

line	stmt	bran	code
1			/*
2			Copyright (C) 2015-2017 Alexander Borisov
3
4			This library is free software; you can redistribute it and/or
5			modify it under the terms of the GNU Lesser General Public
6			License as published by the Free Software Foundation; either
7			version 2.1 of the License, or (at your option) any later version.
8
9			This library is distributed in the hope that it will be useful,
10			but WITHOUT ANY WARRANTY; without even the implied warranty of
11			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12			Lesser General Public License for more details.
13
14			You should have received a copy of the GNU Lesser General Public
15			License along with this library; if not, write to the Free Software
16			Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18			Author: lex.borisov@gmail.com (Alexander Borisov)
19			*/
20
21			#include "myhtml/parser.h"
22
23	1685		void myhtml_parser_stream(mythread_id_t thread_id, void* ctx)
24			{
25	1685		mythread_queue_node_t qnode = (mythread_queue_node_t)ctx;
26
27	1685	50	if((((myhtml_tree_t*)(qnode->context))->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) {
28	2288	100	while(myhtml_rules_tree_dispatcher(qnode->context, qnode->args)){}
29			}
30	1685		}
31
32	186		size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t tree, mycore_string_t str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
33			{
34	186		mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
35	186		size_t relative_begin = begin - buffer->offset;
36
37			// if token data length in one buffer then print them all at once
38	186	50	if((relative_begin + length) <= buffer->size) {
39	186	50	if(tree->encoding == MyENCODING_UTF_8)
40	186		myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char);
41			else
42	0		myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
43	0		&buffer->data[relative_begin], length,
44	0		proc_entry->encoding, proc_entry->emit_null_char);
45
46	186		return str->length;
47			}
48
49	0		size_t save_position = 0;
50			// if the data are spread across multiple buffers that join them
51	0	0	while(buffer) {
52	0	0	if((relative_begin + length) > buffer->size)
53			{
54	0		size_t relative_end = (buffer->size - relative_begin);
55	0		length -= relative_end;
56
57	0		size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position);
58
59	0	0	if(relative_end > 0) {
60	0	0	if(tree->encoding == MyENCODING_UTF_8)
61	0		save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char);
62			else
63	0		save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
64	0		&buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset),
65	0		proc_entry->encoding, proc_entry->emit_null_char);
66			}
67
68	0		relative_begin = 0;
69	0		buffer = buffer->next;
70			}
71			else {
72	0		size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position);
73
74	0	0	if(length > 0) {
75	0	0	if(tree->encoding == MyENCODING_UTF_8)
76	0		myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char);
77			else
78	0		myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
79	0		&buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset),
80	0		proc_entry->encoding, proc_entry->emit_null_char);
81			}
82
83	0		break;
84			}
85			}
86
87	0		return str->length;
88			}
89
90	61		size_t myhtml_parser_token_data_to_string(myhtml_tree_t tree, mycore_string_t str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
91			{
92	61		mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
93	61		size_t relative_begin = begin - buffer->offset;
94
95			// if token data length in one buffer then print them all at once
96	61	50	if((relative_begin + length) <= buffer->size) {
97	61	50	if(tree->encoding == MyENCODING_UTF_8)
98	61		myhtml_string_append_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char);
99			else
100	0		myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
101	0		&buffer->data[relative_begin], length,
102	0		proc_entry->encoding, proc_entry->emit_null_char);
103
104	61		return str->length;
105			}
106
107	0		size_t save_position = 0;
108			// if the data are spread across multiple buffers that join them
109	0	0	while(buffer) {
110	0	0	if((relative_begin + length) > buffer->size)
111			{
112	0		size_t relative_end = (buffer->size - relative_begin);
113	0		length -= relative_end;
114
115	0		size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position);
116
117	0	0	if(relative_end > 0) {
118	0	0	if(tree->encoding == MyENCODING_UTF_8)
119	0		save_position = myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char);
120			else
121	0		save_position = myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
122	0		&buffer->data[(relative_begin + tmp_offset)],
123			(relative_end - tmp_offset),
124	0		proc_entry->encoding, proc_entry->emit_null_char);
125			}
126
127	0		relative_begin = 0;
128	0		buffer = buffer->next;
129			}
130			else {
131	0		size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position);
132
133	0	0	if(length > 0) {
134	0	0	if(tree->encoding == MyENCODING_UTF_8)
135	0		myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char);
136			else
137	0		myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
138	0		&buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset),
139	0		proc_entry->encoding, proc_entry->emit_null_char);
140			}
141
142	0		break;
143			}
144			}
145
146	0		return str->length;
147			}
148
149	884		size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t tree, mycore_string_t str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
150			{
151	884		mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
152	884		size_t relative_begin = begin - buffer->offset;
153
154			// if token data length in one buffer then print them all at once
155	884	100	if((relative_begin + length) <= buffer->size) {
156	883		myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], length);
157	883		myhtml_data_process_end(proc_entry, str);
158
159	883		return str->length;
160			}
161
162			// if the data are spread across multiple buffers that join them
163	2	50	while(buffer) {
164	2	100	if((relative_begin + length) > buffer->size)
165			{
166	1		size_t relative_end = (buffer->size - relative_begin);
167	1		length -= relative_end;
168
169	1		myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], relative_end);
170
171	1		relative_begin = 0;
172	1		buffer = buffer->next;
173			}
174			else {
175	1		myhtml_data_process(proc_entry, str, &buffer->data[relative_begin], length);
176	1		break;
177			}
178			}
179
180	1		myhtml_data_process_end(proc_entry, str);
181
182	1		return str->length;
183			}
184
185	1685		void myhtml_parser_worker(mythread_id_t thread_id, void* ctx)
186			{
187	1685		mythread_queue_node_t qnode = (mythread_queue_node_t)ctx;
188
189	1685		myhtml_tree_t* tree = qnode->context;
190	1685		myhtml_token_node_t* token = qnode->args;
191
192			/*
193			* Tree can not be built without tokens
194			*
195			* MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN == 3
196			* MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE == 1
197			*
198			* MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN include MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE
199			*
200			* if set only MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE and check only for MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN
201			* return true
202			* we need check both, 1 and 2
203			*/
204	1685	50	if((tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) &&
205	0	0	(tree->parse_flags & 2))
206			{
207	0	0	if(tree->callback_before_token)
208	0		tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
209
210	0		token->type \|= MyHTML_TOKEN_TYPE_DONE;
211
212	0	0	if(tree->callback_after_token)
213	0		tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
214
215	0		return;
216			}
217
218			size_t mchar_node_id;
219			#ifndef MyCORE_BUILD_WITHOUT_THREADS
220	1685	100	if(tree->myhtml->thread_batch)
221	22		mchar_node_id = tree->async_args[(thread_id + tree->myhtml->thread_batch->id_increase)].mchar_node_id;
222			else
223			#endif
224	1663		mchar_node_id = tree->async_args[thread_id].mchar_node_id;
225
226	1685	50	if(tree->callback_before_token)
227	0		tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
228
229	1685	100	if(token->tag_id == MyHTML_TAG__TEXT \|\|
230	993	100	token->tag_id == MyHTML_TAG__COMMENT)
231	753		{
232	753		mycore_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1));
233
234	753		token->attr_first = NULL;
235	753		token->attr_last = NULL;
236
237			myhtml_data_process_entry_t proc_entry;
238	753		myhtml_data_process_entry_clean(&proc_entry);
239
240	753		proc_entry.encoding = tree->encoding;
241
242	753	100	if(token->type & MyHTML_TOKEN_TYPE_DATA) {
243	692		proc_entry.emit_null_char = true;
244
245	692		myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
246			}
247	61	50	else if(token->type & MyHTML_TOKEN_TYPE_RCDATA \|\| token->type & MyHTML_TOKEN_TYPE_CDATA) {
		50
248	0		myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
249			}
250			else
251	61		myhtml_parser_token_data_to_string(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
252			}
253	932	100	else if(token->attr_first)
254			{
255	150		mycore_string_clean_all(&token->str);
256
257	150		myhtml_token_attr_t* attr = token->attr_first;
258			myhtml_data_process_entry_t proc_entry;
259
260	388	100	while(attr)
261			{
262	238	100	if(attr->raw_key_length) {
263	186		myhtml_data_process_entry_clean(&proc_entry);
264	186		proc_entry.encoding = tree->encoding;
265
266	186		mycore_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1));
267	186		myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length);
268			}
269			else
270	52		mycore_string_clean_all(&attr->key);
271
272	238	100	if(attr->raw_value_length) {
273	192		myhtml_data_process_entry_clean(&proc_entry);
274	192		proc_entry.encoding = tree->encoding;
275	192		proc_entry.is_attributes = true;
276
277	192		mycore_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1));
278	192		myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length);
279			}
280			else
281	46		mycore_string_clean_all(&attr->value);
282
283	238		attr = attr->next;
284			}
285			}
286			else {
287	782		token->attr_first = NULL;
288	782		token->attr_last = NULL;
289
290	782		mycore_string_clean_all(&token->str);
291			}
292
293	1685		token->type \|= MyHTML_TOKEN_TYPE_DONE;
294
295	1685	50	if(tree->callback_after_token)
296	0		tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
297			}
298
299	0		void myhtml_parser_worker_stream(mythread_id_t thread_id, void* ctx)
300			{
301	0		mythread_queue_node_t qnode = (mythread_queue_node_t)ctx;
302
303	0		myhtml_parser_worker(thread_id, qnode);
304	0		myhtml_parser_stream(thread_id, qnode);
305	0		}
306
307