line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#ifndef _XH_X2H_H_ |
2
|
|
|
|
|
|
|
#define _XH_X2H_H_ |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
#include "xh_config.h" |
5
|
|
|
|
|
|
|
#include "xh_core.h" |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
#define XH_X2H_XPATH_MAX_LEN 1023 |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#define XH_X2H_NORMALIZE_REF 1 |
10
|
|
|
|
|
|
|
#define XH_X2H_NORMALIZE_LINE_FEED 2 |
11
|
|
|
|
|
|
|
#define XH_X2H_FILTER_ENABLED 4 |
12
|
|
|
|
|
|
|
#define XH_X2H_FILTER_MATCHED 8 |
13
|
|
|
|
|
|
|
#define XH_X2H_ROOT_FOUND 16 |
14
|
|
|
|
|
|
|
#define XH_X2H_IS_NOT_BLANK 32 |
15
|
|
|
|
|
|
|
#define XH_X2H_DOCTYPE_FOUND 64 |
16
|
|
|
|
|
|
|
#define XH_X2H_TEXT_NODE 128 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#define XH_X2H_NEED_NORMALIZE (XH_X2H_NORMALIZE_REF | \ |
19
|
|
|
|
|
|
|
XH_X2H_NORMALIZE_LINE_FEED) |
20
|
|
|
|
|
|
|
#define XH_X2H_FILTER_SEARCH(f) (((f) & (XH_X2H_FILTER_ENABLED | XH_X2H_FILTER_MATCHED)) ==\ |
21
|
|
|
|
|
|
|
XH_X2H_FILTER_ENABLED) |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#define XH_X2H_PARSER_STATE_LIST \ |
24
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CONTENT_START) \ |
25
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(PARSE_ELEMENT_START) \ |
26
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_COMMENT_NODE_OR_CDATA_START) \ |
27
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(COMMENT1_START) \ |
28
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_COMMENT1_START) \ |
29
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_COMMENT2_START) \ |
30
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_COMMENT3_START) \ |
31
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_1_1_START) \ |
32
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_1_2_START) \ |
33
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_1_3_START) \ |
34
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_2_1_START) \ |
35
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_2_2_START) \ |
36
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_2_3_START) \ |
37
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA1_START) \ |
38
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA2_START) \ |
39
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA3_START) \ |
40
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_1_1_START) \ |
41
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_1_2_START) \ |
42
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_1_3_START) \ |
43
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_2_1_START) \ |
44
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_2_2_START) \ |
45
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_2_3_START) \ |
46
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_NAME_START) \ |
47
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_NAME_START_START) \ |
48
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_NAME_END_START) \ |
49
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_NAME_BLANK_START) \ |
50
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_1_1_START) \ |
51
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_1_2_START) \ |
52
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_1_3_START) \ |
53
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_2_1_START) \ |
54
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_2_2_START) \ |
55
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_LOCATION_DOCTYPE_DELIM_START) \ |
56
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_LOCATION_DOCTYPE_DELIM_SKIP_BLANK_START)\ |
57
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_1_END_OF_LITERAL_START) \ |
58
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_SYSTEM_2_END_OF_LITERAL_START) \ |
59
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_1_1_START) \ |
60
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_1_2_START) \ |
61
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_1_3_START) \ |
62
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_2_1_START) \ |
63
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_2_2_START) \ |
64
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_ID_DOCTYPE_DELIM_START) \ |
65
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_ID_DOCTYPE_DELIM_SKIP_BLANK_START)\ |
66
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_1_DOCTYPE_DELIM_START) \ |
67
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_1_DOCTYPE_DELIM_SKIP_BLANK_START)\ |
68
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_1_1_END_OF_LITERAL_START)\ |
69
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_1_2_END_OF_LITERAL_START)\ |
70
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_2_DOCTYPE_DELIM_START) \ |
71
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_2_DOCTYPE_DELIM_SKIP_BLANK_START)\ |
72
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_2_1_END_OF_LITERAL_START)\ |
73
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_LOCATION_2_2_END_OF_LITERAL_START)\ |
74
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_1_END_OF_LITERAL_START) \ |
75
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_PUBLIC_2_END_OF_LITERAL_START) \ |
76
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_END_START) \ |
77
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_INTSUBSET_START) \ |
78
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(DOCTYPE_INTSUBSET_START_START) \ |
79
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_1_1_START) \ |
80
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_1_2_START) \ |
81
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_1_3_START) \ |
82
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_2_1_START) \ |
83
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_2_2_START) \ |
84
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(CDATA_WITH_TRIM_2_3_START) \ |
85
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA_WITH_TRIM1_START) \ |
86
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA_WITH_TRIM2_START) \ |
87
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(END_CDATA_WITH_TRIM3_START) \ |
88
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(PARSE_CLOSING_TAG_START) \ |
89
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(SEARCH_CLOSING_END_TAG_START) \ |
90
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(PARSE_OPENING_TAG_START) \ |
91
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(SEARCH_OPENING_END_TAG_START) \ |
92
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_SEARCH_ATTR_START) \ |
93
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(SEARCH_END_TAG_START) \ |
94
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_PARSE_ATTR_NAME_START) \ |
95
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_ATTR_SKIP_BLANK_START) \ |
96
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_PARSE_ATTR_VALUE_START) \ |
97
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_1_END_ATTR_VALUE_START) \ |
98
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(NODE_2_END_ATTR_VALUE_START) \ |
99
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_1_START) \ |
100
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_2_START) \ |
101
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_3_START) \ |
102
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_START) \ |
103
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_SEARCH_ATTR_START) \ |
104
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_SEARCH_END_TAG2_START) \ |
105
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_PARSE_ATTR_NAME_START) \ |
106
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_ATTR_SKIP_BLANK_START) \ |
107
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_PARSE_ATTR_VALUE_START) \ |
108
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_1_END_ATTR_VALUE_START) \ |
109
|
|
|
|
|
|
|
XH_X2H_PROCESS_STATE(XML_DECL_ATTR_2_END_ATTR_VALUE_START) |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#define XH_X2H_PROCESS_STATE(st) st, |
112
|
|
|
|
|
|
|
typedef enum { |
113
|
|
|
|
|
|
|
PARSER_ST_NONE = 0, |
114
|
|
|
|
|
|
|
XH_X2H_PARSER_STATE_LIST |
115
|
|
|
|
|
|
|
XML_DECL_FOUND, |
116
|
|
|
|
|
|
|
PARSER_ST_DONE |
117
|
|
|
|
|
|
|
} xh_x2h_state_t; |
118
|
|
|
|
|
|
|
#undef XH_X2H_PROCESS_STATE |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
#define XH_X2H_NODE_FLAG_NONE 0 |
121
|
|
|
|
|
|
|
#define XH_X2H_NODE_FLAG_FORCE_ARRAY 1 |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
typedef struct { |
124
|
|
|
|
|
|
|
SV **lval; |
125
|
|
|
|
|
|
|
unsigned int flags; |
126
|
|
|
|
|
|
|
} xh_x2h_node_t; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
typedef struct { |
129
|
|
|
|
|
|
|
xh_opts_t opts; |
130
|
|
|
|
|
|
|
SV *hash; |
131
|
|
|
|
|
|
|
xh_char_t encoding[XH_PARAM_LEN]; |
132
|
|
|
|
|
|
|
xh_char_t *tmp; |
133
|
|
|
|
|
|
|
size_t tmp_size; |
134
|
|
|
|
|
|
|
xh_char_t *node, *end, *content; |
135
|
|
|
|
|
|
|
unsigned int flags; |
136
|
|
|
|
|
|
|
xh_x2h_node_t *nodes; |
137
|
|
|
|
|
|
|
SV **lval; |
138
|
|
|
|
|
|
|
unsigned int depth, real_depth, code; |
139
|
|
|
|
|
|
|
xh_x2h_state_t state; |
140
|
|
|
|
|
|
|
xh_reader_t reader; |
141
|
|
|
|
|
|
|
SV *result, *input; |
142
|
|
|
|
|
|
|
xh_char_t xpath[XH_X2H_XPATH_MAX_LEN + 1]; |
143
|
|
|
|
|
|
|
} xh_x2h_ctx_t; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
SV *xh_x2h(xh_x2h_ctx_t *ctx); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
XH_INLINE void |
148
|
16
|
|
|
|
|
|
xh_x2h_destroy_ctx(xh_x2h_ctx_t *ctx) |
149
|
|
|
|
|
|
|
{ |
150
|
16
|
50
|
|
|
|
|
if (ctx->nodes != NULL) free(ctx->nodes); |
151
|
16
|
100
|
|
|
|
|
if (ctx->tmp != NULL) free(ctx->tmp); |
152
|
|
|
|
|
|
|
|
153
|
16
|
|
|
|
|
|
xh_destroy_opts(&ctx->opts); |
154
|
16
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
XH_INLINE void |
157
|
16
|
|
|
|
|
|
xh_x2h_init_ctx(xh_x2h_ctx_t *ctx, I32 ax, I32 items) |
158
|
|
|
|
|
|
|
{ |
159
|
16
|
|
|
|
|
|
xh_opts_t *opts = NULL; |
160
|
16
|
|
|
|
|
|
xh_int_t nparam = 0; |
161
|
|
|
|
|
|
|
|
162
|
16
|
|
|
|
|
|
memset(ctx, 0, sizeof(xh_x2h_ctx_t)); |
163
|
|
|
|
|
|
|
|
164
|
16
|
|
|
|
|
|
opts = (xh_opts_t *) xh_get_obj_param(&nparam, ax, items, "XML::Hash::XS"); |
165
|
16
|
|
|
|
|
|
ctx->input = xh_get_str_param(&nparam, ax, items); |
166
|
16
|
|
|
|
|
|
xh_merge_opts(&ctx->opts, opts, nparam, ax, items); |
167
|
|
|
|
|
|
|
|
168
|
16
|
50
|
|
|
|
|
if ((ctx->nodes = malloc(sizeof(xh_x2h_node_t) * ctx->opts.max_depth)) == NULL) { |
169
|
0
|
|
|
|
|
|
croak("Memory allocation error"); |
170
|
|
|
|
|
|
|
} |
171
|
16
|
|
|
|
|
|
memset(ctx->nodes, 0, sizeof(xh_x2h_node_t) * ctx->opts.max_depth); |
172
|
16
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
#endif /* _XH_X2H_H_ */ |