line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* vim: set sw=8 ts=8 si noet: */ |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
/* written by Guido Socher. |
4
|
|
|
|
|
|
|
* |
5
|
|
|
|
|
|
|
* This program is free software; you can redistribute it |
6
|
|
|
|
|
|
|
* and/or modify it under the same terms as Perl itself. |
7
|
|
|
|
|
|
|
*/ |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
/* read the following man pages to learn how to use XS and access |
10
|
|
|
|
|
|
|
* perl from C: |
11
|
|
|
|
|
|
|
* perlxs Perl XS application programming interface |
12
|
|
|
|
|
|
|
* perlxstut Perl XS tutorial |
13
|
|
|
|
|
|
|
* perlguts Perl internal functions, variables, data structures for |
14
|
|
|
|
|
|
|
* C programmer |
15
|
|
|
|
|
|
|
* perlcall Perl calling conventions from C |
16
|
|
|
|
|
|
|
*/ |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#ifdef __cplusplus |
19
|
|
|
|
|
|
|
extern "C" { |
20
|
|
|
|
|
|
|
#endif |
21
|
|
|
|
|
|
|
#include "EXTERN.h" |
22
|
|
|
|
|
|
|
#include "perl.h" |
23
|
|
|
|
|
|
|
#include "XSUB.h" |
24
|
|
|
|
|
|
|
#include |
25
|
|
|
|
|
|
|
#include |
26
|
|
|
|
|
|
|
#include |
27
|
|
|
|
|
|
|
#ifdef __cplusplus |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
#endif |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
/* tags longer than TAGREADER_MAX_TAGLEN produce a warning about |
33
|
|
|
|
|
|
|
* not terminated tags, must be much smaler than BUFFLEN */ |
34
|
|
|
|
|
|
|
#define TAGREADER_MAX_TAGLEN 400 |
35
|
|
|
|
|
|
|
/* BUFFLEN is the units in which we re-allocate mem, must be much bigger than |
36
|
|
|
|
|
|
|
* TAGREADER_MAX_TAGLEN */ |
37
|
|
|
|
|
|
|
#define BUFFLEN 6000 |
38
|
|
|
|
|
|
|
#define TAGREADER_TAGTYPELEN 20 |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
typedef struct trstuct{ |
41
|
|
|
|
|
|
|
char *filename; |
42
|
|
|
|
|
|
|
int fileline; |
43
|
|
|
|
|
|
|
int tagline; /* file line where the tag starts */ |
44
|
|
|
|
|
|
|
int charpos; /* character pos in the line */ |
45
|
|
|
|
|
|
|
int tagcharpos; /* character pos where tag starts */ |
46
|
|
|
|
|
|
|
int currbuflen; |
47
|
|
|
|
|
|
|
PerlIO *fd; |
48
|
|
|
|
|
|
|
char tagtype[TAGREADER_TAGTYPELEN + 1]; |
49
|
|
|
|
|
|
|
char *buffer; |
50
|
|
|
|
|
|
|
} *HTML__TagReader; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
/* WIN32 stuff from: DH , |
53
|
|
|
|
|
|
|
* http://testers.cpan.org/ */ |
54
|
|
|
|
|
|
|
#ifdef WIN32 |
55
|
|
|
|
|
|
|
#define THEINLINE __forceinline |
56
|
|
|
|
|
|
|
#else |
57
|
|
|
|
|
|
|
#define THEINLINE inline |
58
|
|
|
|
|
|
|
#endif |
59
|
|
|
|
|
|
|
/* start of a html tag (first char in the tag) */ |
60
|
100
|
|
|
|
|
|
static THEINLINE int is_start_of_tag(int ch){ |
61
|
100
|
100
|
|
|
|
|
if (ch=='!' || ch=='/' || ch=='?' || isalnum(ch)){ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
62
|
84
|
|
|
|
|
|
return(1); |
63
|
|
|
|
|
|
|
} |
64
|
16
|
|
|
|
|
|
return(0); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
MODULE = HTML::TagReader PACKAGE = HTML::TagReader PREFIX = tr_ |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
PROTOTYPES: ENABLE |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
HTML::TagReader |
72
|
|
|
|
|
|
|
tr_new(class, filename) |
73
|
|
|
|
|
|
|
SV *class |
74
|
|
|
|
|
|
|
SV *filename |
75
|
|
|
|
|
|
|
CODE: |
76
|
|
|
|
|
|
|
STRLEN i; // int |
77
|
|
|
|
|
|
|
char *str; |
78
|
3
|
50
|
|
|
|
|
if (!SvPOKp(filename)){ |
79
|
0
|
|
|
|
|
|
croak("ERROR: filename must be a string scalar"); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
/* malloc and zero the struct */ |
82
|
3
|
|
|
|
|
|
Newz(0, RETVAL, 1, struct trstuct ); |
83
|
3
|
50
|
|
|
|
|
str=SvPV(filename,i); |
84
|
|
|
|
|
|
|
/* malloc */ |
85
|
3
|
|
|
|
|
|
New(0, RETVAL->filename, i+1, char ); |
86
|
3
|
|
|
|
|
|
strncpy(RETVAL->filename,str,i); |
87
|
|
|
|
|
|
|
/* malloc initial buffer */ |
88
|
3
|
|
|
|
|
|
New(0, RETVAL->buffer, BUFFLEN+1, char ); |
89
|
3
|
|
|
|
|
|
RETVAL->currbuflen=BUFFLEN; |
90
|
|
|
|
|
|
|
/* put a zero at the end of the string, perl might not do it */ |
91
|
3
|
|
|
|
|
|
*(RETVAL->filename + i )=(char)0; |
92
|
3
|
|
|
|
|
|
RETVAL->fd=PerlIO_open(str,"r"); |
93
|
3
|
50
|
|
|
|
|
if (RETVAL->fd == NULL){ |
94
|
0
|
|
|
|
|
|
croak("ERROR: Can not read file \"%s\" ",str); |
95
|
|
|
|
|
|
|
} |
96
|
3
|
|
|
|
|
|
RETVAL->charpos=0; |
97
|
3
|
|
|
|
|
|
RETVAL->tagcharpos=0; |
98
|
3
|
|
|
|
|
|
RETVAL->fileline=1; |
99
|
3
|
|
|
|
|
|
RETVAL->tagline=0; |
100
|
|
|
|
|
|
|
OUTPUT: |
101
|
|
|
|
|
|
|
RETVAL |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
void |
104
|
|
|
|
|
|
|
DESTROY(self) |
105
|
|
|
|
|
|
|
HTML::TagReader self |
106
|
|
|
|
|
|
|
CODE: |
107
|
3
|
|
|
|
|
|
Safefree(self->filename); |
108
|
3
|
|
|
|
|
|
Safefree(self->buffer); |
109
|
3
|
|
|
|
|
|
PerlIO_close(self->fd); |
110
|
3
|
|
|
|
|
|
Safefree(self); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
void |
113
|
|
|
|
|
|
|
tr_gettag(self,showerrors) |
114
|
|
|
|
|
|
|
HTML::TagReader self |
115
|
|
|
|
|
|
|
SV *showerrors |
116
|
|
|
|
|
|
|
PREINIT: |
117
|
|
|
|
|
|
|
int bufpos; |
118
|
|
|
|
|
|
|
char ch; |
119
|
|
|
|
|
|
|
char chn; |
120
|
|
|
|
|
|
|
int state; |
121
|
|
|
|
|
|
|
PPCODE: |
122
|
4
|
50
|
|
|
|
|
if (! self->fileline){ |
123
|
0
|
|
|
|
|
|
croak("Object not initialized"); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
/* initialize */ |
126
|
4
|
|
|
|
|
|
state=0; |
127
|
4
|
|
|
|
|
|
bufpos=0; |
128
|
4
|
|
|
|
|
|
ch=(char)0; |
129
|
4
|
|
|
|
|
|
chn=(char)0; |
130
|
4
|
|
|
|
|
|
self->tagline=self->fileline; |
131
|
|
|
|
|
|
|
/* find the next tag */ |
132
|
74
|
100
|
|
|
|
|
while(state != 3 && (chn=PerlIO_getc(self->fd))!=EOF ){ |
|
|
100
|
|
|
|
|
|
133
|
70
|
|
|
|
|
|
self->charpos++; |
134
|
70
|
100
|
|
|
|
|
if (ch==0){ /* read one more character ahead so we have always 2 */ |
135
|
3
|
|
|
|
|
|
ch=chn; |
136
|
3
|
|
|
|
|
|
continue; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
/* we can not run out of mem because TAGREADER_MAX_TAGLEN |
139
|
|
|
|
|
|
|
* is much smaller than BUFFLEN */ |
140
|
67
|
50
|
|
|
|
|
if (bufpos > TAGREADER_MAX_TAGLEN){ |
141
|
0
|
0
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, tag not terminated or too long.\n",self->filename,self->tagline,self->charpos); |
143
|
|
|
|
|
|
|
} |
144
|
0
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
145
|
0
|
|
|
|
|
|
self->buffer[bufpos]=(char)0;bufpos++; |
146
|
0
|
|
|
|
|
|
state=3; |
147
|
0
|
|
|
|
|
|
continue; /* jump out of while */ |
148
|
|
|
|
|
|
|
} |
149
|
67
|
100
|
|
|
|
|
if (ch=='\n') { |
150
|
3
|
|
|
|
|
|
self->fileline++; |
151
|
3
|
|
|
|
|
|
self->charpos=0; |
152
|
|
|
|
|
|
|
} |
153
|
67
|
100
|
|
|
|
|
if (ch=='\n'|| ch=='\r' || ch=='\t' || ch==' ') { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
154
|
11
|
|
|
|
|
|
ch=' '; |
155
|
11
|
100
|
|
|
|
|
if (chn=='\n'|| chn=='\r' || chn=='\t' || chn==' '){ |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
156
|
|
|
|
|
|
|
/* delete mupltiple spaces */ |
157
|
3
|
|
|
|
|
|
ch=chn; /* shift next char */ |
158
|
3
|
|
|
|
|
|
continue; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
} |
161
|
64
|
|
|
|
|
|
switch (state) { |
162
|
|
|
|
|
|
|
/*---*/ |
163
|
|
|
|
|
|
|
case 0: |
164
|
|
|
|
|
|
|
/* outside of tag and we start tag here*/ |
165
|
11
|
100
|
|
|
|
|
if (ch=='<') { |
166
|
4
|
100
|
|
|
|
|
if (is_start_of_tag(chn)) { |
167
|
3
|
|
|
|
|
|
self->buffer[0]=(char)0; |
168
|
3
|
|
|
|
|
|
bufpos=0; |
169
|
3
|
|
|
|
|
|
self->tagcharpos=self->charpos; |
170
|
|
|
|
|
|
|
/*line where tag starts*/ |
171
|
3
|
|
|
|
|
|
self->tagline=self->fileline; |
172
|
3
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
173
|
3
|
|
|
|
|
|
state=1; |
174
|
|
|
|
|
|
|
}else{ |
175
|
1
|
50
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, single \'<\' should be written as <\n",self->filename,self->fileline,self->charpos); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
11
|
|
|
|
|
|
break; |
181
|
|
|
|
|
|
|
/*---*/ |
182
|
|
|
|
|
|
|
case 1: |
183
|
38
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
184
|
38
|
100
|
|
|
|
|
if (ch=='!' && chn=='-' && self->buffer[bufpos-2]=='<'){ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
185
|
|
|
|
|
|
|
/* start of comment handling */ |
186
|
1
|
|
|
|
|
|
state=30; |
187
|
|
|
|
|
|
|
} |
188
|
38
|
100
|
|
|
|
|
if (ch=='>'){ |
189
|
2
|
|
|
|
|
|
state=3; /* note the exit state is hardcoded |
190
|
|
|
|
|
|
|
* as well in the while loop above */ |
191
|
2
|
|
|
|
|
|
self->buffer[bufpos]=(char)0;bufpos++; |
192
|
|
|
|
|
|
|
} |
193
|
38
|
50
|
|
|
|
|
if(ch=='<'){ |
194
|
|
|
|
|
|
|
/* the tag that we were reading was not terminated but instead we ge a new opening */ |
195
|
0
|
0
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
196
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, \'>\' inside a tag should be written as >\n",self->filename,self->tagline,self->charpos); |
197
|
|
|
|
|
|
|
} |
198
|
0
|
|
|
|
|
|
state=1; |
199
|
0
|
|
|
|
|
|
bufpos=0; |
200
|
0
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
201
|
0
|
|
|
|
|
|
self->tagline=self->fileline; |
202
|
|
|
|
|
|
|
} |
203
|
38
|
|
|
|
|
|
break; |
204
|
|
|
|
|
|
|
/*---*/ |
205
|
|
|
|
|
|
|
case 30: /*comment handling, |
206
|
|
|
|
|
|
|
*we have found " |