| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* vim: set sw=8 ts=8 si noet: */ |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
/* written by Guido Socher. |
|
4
|
|
|
|
|
|
|
* |
|
5
|
|
|
|
|
|
|
* This program is free software; you can redistribute it |
|
6
|
|
|
|
|
|
|
* and/or modify it under the same terms as Perl itself. |
|
7
|
|
|
|
|
|
|
*/ |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
/* read the following man pages to learn how to use XS and access perl from C: |
|
10
|
|
|
|
|
|
|
* perlxs Perl XS application programming interface |
|
11
|
|
|
|
|
|
|
* perlxstut Perl XS tutorial |
|
12
|
|
|
|
|
|
|
* perlguts Perl internal functions, variables, data structures for |
|
13
|
|
|
|
|
|
|
* C programmer |
|
14
|
|
|
|
|
|
|
* perlcall Perl calling conventions from C |
|
15
|
|
|
|
|
|
|
* perlapio IO abstraction interface |
|
16
|
|
|
|
|
|
|
* perlapi Perl C api |
|
17
|
|
|
|
|
|
|
*/ |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
// we use perlio not stdio: |
|
20
|
|
|
|
|
|
|
#define PERLIO_NOT_STDIO 0 |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
23
|
|
|
|
|
|
|
extern "C" { |
|
24
|
|
|
|
|
|
|
#endif |
|
25
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
26
|
|
|
|
|
|
|
#include "perl.h" |
|
27
|
|
|
|
|
|
|
#include "XSUB.h" |
|
28
|
|
|
|
|
|
|
#include |
|
29
|
|
|
|
|
|
|
#include |
|
30
|
|
|
|
|
|
|
#include |
|
31
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
#endif |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
/* tags longer than TAGREADER_MAX_TAGLEN produce a warning about |
|
37
|
|
|
|
|
|
|
* not terminated tags, must be much smaler than BUFFLEN */ |
|
38
|
|
|
|
|
|
|
#define TAGREADER_MAX_TAGLEN 400 |
|
39
|
|
|
|
|
|
|
/* BUFFLEN is the units in which we re-allocate mem, must be much bigger than |
|
40
|
|
|
|
|
|
|
* TAGREADER_MAX_TAGLEN */ |
|
41
|
|
|
|
|
|
|
#define BUFFLEN 6000 |
|
42
|
|
|
|
|
|
|
#define TAGREADER_TAGTYPELEN 25 |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
typedef struct trstuct{ |
|
45
|
|
|
|
|
|
|
char *filename; |
|
46
|
|
|
|
|
|
|
int fileline; |
|
47
|
|
|
|
|
|
|
int tagline; /* file line where the tag starts */ |
|
48
|
|
|
|
|
|
|
int charpos; /* character pos in the line */ |
|
49
|
|
|
|
|
|
|
int tagcharpos; /* character pos where tag starts */ |
|
50
|
|
|
|
|
|
|
int currbuflen; |
|
51
|
|
|
|
|
|
|
PerlIO *fd; |
|
52
|
|
|
|
|
|
|
char tagtype[TAGREADER_TAGTYPELEN + 1]; |
|
53
|
|
|
|
|
|
|
char *buffer; |
|
54
|
|
|
|
|
|
|
} *HTML__TagReader; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
/* WIN32 stuff from: DH , |
|
57
|
|
|
|
|
|
|
* http://testers.cpan.org/ */ |
|
58
|
|
|
|
|
|
|
#ifdef WIN32 |
|
59
|
|
|
|
|
|
|
#define THEINLINE __forceinline |
|
60
|
|
|
|
|
|
|
#else |
|
61
|
|
|
|
|
|
|
#define THEINLINE inline |
|
62
|
|
|
|
|
|
|
#endif |
|
63
|
|
|
|
|
|
|
/* start of a html tag (first char in the tag) */ |
|
64
|
143
|
|
|
|
|
|
static THEINLINE int is_start_of_tag(int ch){ |
|
65
|
143
|
100
|
|
|
|
|
if (ch=='!' || ch=='/' || ch=='?' || isalnum(ch)){ |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
66
|
122
|
|
|
|
|
|
return(1); |
|
67
|
|
|
|
|
|
|
} |
|
68
|
21
|
|
|
|
|
|
return(0); |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
MODULE = HTML::TagReader PACKAGE = HTML::TagReader PREFIX = tr_ |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
PROTOTYPES: ENABLE |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
HTML::TagReader |
|
76
|
|
|
|
|
|
|
tr_new(class, filename) |
|
77
|
|
|
|
|
|
|
SV *class |
|
78
|
|
|
|
|
|
|
SV *filename |
|
79
|
|
|
|
|
|
|
CODE: |
|
80
|
|
|
|
|
|
|
STRLEN i; // int |
|
81
|
|
|
|
|
|
|
char *str; |
|
82
|
3
|
50
|
|
|
|
|
if (!SvPOKp(filename)){ |
|
83
|
0
|
|
|
|
|
|
croak("ERROR: filename must be a string scalar"); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
/* malloc and zero the struct */ |
|
86
|
3
|
|
|
|
|
|
Newz(0, RETVAL, 1, struct trstuct ); |
|
87
|
3
|
50
|
|
|
|
|
str=SvPV(filename,i); |
|
88
|
|
|
|
|
|
|
/* malloc */ |
|
89
|
3
|
|
|
|
|
|
New(0, RETVAL->filename, i+1, char ); |
|
90
|
3
|
|
|
|
|
|
strncpy(RETVAL->filename,str,i); |
|
91
|
|
|
|
|
|
|
/* malloc initial buffer */ |
|
92
|
3
|
|
|
|
|
|
New(0, RETVAL->buffer, BUFFLEN+1, char ); |
|
93
|
3
|
|
|
|
|
|
RETVAL->currbuflen=BUFFLEN; |
|
94
|
|
|
|
|
|
|
/* put a zero at the end of the string, perl might not do it */ |
|
95
|
3
|
|
|
|
|
|
*(RETVAL->filename + i )=(char)0; |
|
96
|
3
|
|
|
|
|
|
RETVAL->fd=PerlIO_open(str,"r"); |
|
97
|
3
|
50
|
|
|
|
|
if (RETVAL->fd == NULL){ |
|
98
|
0
|
|
|
|
|
|
croak("ERROR: Can not read file \"%s\" ",str); |
|
99
|
|
|
|
|
|
|
} |
|
100
|
3
|
|
|
|
|
|
RETVAL->charpos=0; |
|
101
|
3
|
|
|
|
|
|
RETVAL->tagcharpos=0; |
|
102
|
3
|
|
|
|
|
|
RETVAL->fileline=1; |
|
103
|
3
|
|
|
|
|
|
RETVAL->tagline=0; |
|
104
|
|
|
|
|
|
|
OUTPUT: |
|
105
|
|
|
|
|
|
|
RETVAL |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
HTML::TagReader |
|
108
|
|
|
|
|
|
|
tr_new_from_iofh(class, fh) |
|
109
|
|
|
|
|
|
|
SV *class |
|
110
|
|
|
|
|
|
|
PerlIO *fh |
|
111
|
|
|
|
|
|
|
CODE: |
|
112
|
|
|
|
|
|
|
STRLEN i; // int |
|
113
|
2
|
|
|
|
|
|
char str[]="iofh"; |
|
114
|
|
|
|
|
|
|
char c; |
|
115
|
2
|
50
|
|
|
|
|
if (fh == NULL){ |
|
116
|
0
|
|
|
|
|
|
croak("ERROR: invalid PerlIO fh"); |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
// let's do some test to see if we will be able to read on this io filehandle: |
|
119
|
2
|
|
|
|
|
|
c=PerlIO_getc(fh); |
|
120
|
|
|
|
|
|
|
// c is EOF in case of error or end of file |
|
121
|
2
|
50
|
|
|
|
|
if (c==EOF){ |
|
122
|
0
|
0
|
|
|
|
|
if (PerlIO_error(fh)){ |
|
123
|
0
|
|
|
|
|
|
croak("ERROR: can not read from IO filehandle"); |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
// no ungetc in case of EOF |
|
126
|
|
|
|
|
|
|
}else{ |
|
127
|
2
|
50
|
|
|
|
|
if (PerlIO_ungetc(fh,c)==EOF){ |
|
128
|
0
|
|
|
|
|
|
croak("ERROR: ungetc on filehandle failed"); |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
} |
|
131
|
2
|
|
|
|
|
|
i=strlen(str); |
|
132
|
|
|
|
|
|
|
// malloc and zero the struct |
|
133
|
2
|
|
|
|
|
|
Newz(0, RETVAL, 1, struct trstuct ); |
|
134
|
|
|
|
|
|
|
// malloc filename, we need it for some error printouts |
|
135
|
2
|
|
|
|
|
|
New(0, RETVAL->filename, i+1, char ); |
|
136
|
2
|
|
|
|
|
|
strncpy(RETVAL->filename,str,i); |
|
137
|
|
|
|
|
|
|
// put a zero at the end of the string, perl might not do it |
|
138
|
2
|
|
|
|
|
|
*(RETVAL->filename + i )=(char)0; |
|
139
|
|
|
|
|
|
|
// malloc initial buffer |
|
140
|
2
|
|
|
|
|
|
New(0, RETVAL->buffer, BUFFLEN+1, char ); |
|
141
|
2
|
|
|
|
|
|
RETVAL->currbuflen=BUFFLEN; |
|
142
|
2
|
|
|
|
|
|
RETVAL->fd=fh; |
|
143
|
2
|
|
|
|
|
|
RETVAL->charpos=0; |
|
144
|
2
|
|
|
|
|
|
RETVAL->tagcharpos=0; |
|
145
|
2
|
|
|
|
|
|
RETVAL->fileline=1; |
|
146
|
2
|
|
|
|
|
|
RETVAL->tagline=0; |
|
147
|
|
|
|
|
|
|
OUTPUT: |
|
148
|
|
|
|
|
|
|
RETVAL |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
void |
|
151
|
|
|
|
|
|
|
DESTROY(self) |
|
152
|
|
|
|
|
|
|
HTML::TagReader self |
|
153
|
|
|
|
|
|
|
CODE: |
|
154
|
5
|
|
|
|
|
|
Safefree(self->filename); |
|
155
|
5
|
|
|
|
|
|
Safefree(self->buffer); |
|
156
|
5
|
|
|
|
|
|
PerlIO_close(self->fd); |
|
157
|
5
|
|
|
|
|
|
Safefree(self); |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
void |
|
160
|
|
|
|
|
|
|
tr_gettag(self,showerrors) |
|
161
|
|
|
|
|
|
|
HTML::TagReader self |
|
162
|
|
|
|
|
|
|
SV *showerrors |
|
163
|
|
|
|
|
|
|
PREINIT: |
|
164
|
|
|
|
|
|
|
int bufpos; |
|
165
|
|
|
|
|
|
|
char ch; |
|
166
|
|
|
|
|
|
|
char chn; |
|
167
|
|
|
|
|
|
|
int state; |
|
168
|
|
|
|
|
|
|
PPCODE: |
|
169
|
9
|
50
|
|
|
|
|
if (! self->fileline){ |
|
170
|
0
|
|
|
|
|
|
croak("Object not initialized"); |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
/* initialize */ |
|
173
|
9
|
|
|
|
|
|
state=0; |
|
174
|
9
|
|
|
|
|
|
bufpos=0; |
|
175
|
9
|
|
|
|
|
|
ch=(char)0; |
|
176
|
9
|
|
|
|
|
|
chn=(char)0; |
|
177
|
9
|
|
|
|
|
|
self->tagline=self->fileline; |
|
178
|
|
|
|
|
|
|
/* find the next tag */ |
|
179
|
119
|
100
|
|
|
|
|
while(state != 3 && (chn=PerlIO_getc(self->fd))!=EOF ){ |
|
|
|
100
|
|
|
|
|
|
|
180
|
110
|
|
|
|
|
|
self->charpos++; |
|
181
|
110
|
100
|
|
|
|
|
if (ch==0){ /* read one more character ahead so we have always 2 */ |
|
182
|
8
|
|
|
|
|
|
ch=chn; |
|
183
|
8
|
|
|
|
|
|
continue; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
/* we can not run out of mem because TAGREADER_MAX_TAGLEN |
|
186
|
|
|
|
|
|
|
* is much smaller than BUFFLEN */ |
|
187
|
102
|
50
|
|
|
|
|
if (bufpos > TAGREADER_MAX_TAGLEN){ |
|
188
|
0
|
0
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, tag not terminated or too long.\n",self->filename,self->tagline,self->charpos); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
0
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
|
192
|
0
|
|
|
|
|
|
self->buffer[bufpos]=(char)0;bufpos++; |
|
193
|
0
|
|
|
|
|
|
state=3; |
|
194
|
0
|
|
|
|
|
|
continue; /* jump out of while */ |
|
195
|
|
|
|
|
|
|
} |
|
196
|
102
|
100
|
|
|
|
|
if (ch=='\n') { |
|
197
|
5
|
|
|
|
|
|
self->fileline++; |
|
198
|
5
|
|
|
|
|
|
self->charpos=0; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
102
|
100
|
|
|
|
|
if (ch=='\n'|| ch=='\r' || ch=='\t' || ch==' ') { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
201
|
15
|
|
|
|
|
|
ch=' '; |
|
202
|
15
|
100
|
|
|
|
|
if (chn=='\n'|| chn=='\r' || chn=='\t' || chn==' '){ |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
/* delete mupltiple spaces */ |
|
204
|
3
|
|
|
|
|
|
ch=chn; /* shift next char */ |
|
205
|
3
|
|
|
|
|
|
continue; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
} |
|
208
|
99
|
|
|
|
|
|
switch (state) { |
|
209
|
|
|
|
|
|
|
/*---*/ |
|
210
|
|
|
|
|
|
|
case 0: |
|
211
|
|
|
|
|
|
|
/* outside of tag and we start tag here*/ |
|
212
|
32
|
100
|
|
|
|
|
if (ch=='<') { |
|
213
|
9
|
100
|
|
|
|
|
if (is_start_of_tag(chn)) { |
|
214
|
8
|
|
|
|
|
|
self->buffer[0]=(char)0; |
|
215
|
8
|
|
|
|
|
|
bufpos=0; |
|
216
|
8
|
|
|
|
|
|
self->tagcharpos=self->charpos; |
|
217
|
|
|
|
|
|
|
/*line where tag starts*/ |
|
218
|
8
|
|
|
|
|
|
self->tagline=self->fileline; |
|
219
|
8
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
|
220
|
8
|
|
|
|
|
|
state=1; |
|
221
|
|
|
|
|
|
|
}else{ |
|
222
|
1
|
50
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, single \'<\' should be written as <\n",self->filename,self->fileline,self->charpos); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
} |
|
227
|
32
|
|
|
|
|
|
break; |
|
228
|
|
|
|
|
|
|
/*---*/ |
|
229
|
|
|
|
|
|
|
case 1: |
|
230
|
52
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
|
231
|
52
|
100
|
|
|
|
|
if (ch=='!' && chn=='-' && self->buffer[bufpos-2]=='<'){ |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
/* start of comment handling */ |
|
233
|
1
|
|
|
|
|
|
state=30; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
52
|
100
|
|
|
|
|
if (ch=='>'){ |
|
236
|
6
|
|
|
|
|
|
state=3; /* note the exit state is hardcoded |
|
237
|
|
|
|
|
|
|
* as well in the while loop above */ |
|
238
|
6
|
|
|
|
|
|
self->buffer[bufpos]=(char)0;bufpos++; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
52
|
50
|
|
|
|
|
if(ch=='<'){ |
|
241
|
|
|
|
|
|
|
/* the tag that we were reading was not terminated but instead we ge a new opening */ |
|
242
|
0
|
0
|
|
|
|
|
if (SvTRUE(showerrors)){ |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
243
|
0
|
|
|
|
|
|
PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, \'>\' inside a tag should be written as >\n",self->filename,self->tagline,self->charpos); |
|
244
|
|
|
|
|
|
|
} |
|
245
|
0
|
|
|
|
|
|
state=1; |
|
246
|
0
|
|
|
|
|
|
bufpos=0; |
|
247
|
0
|
|
|
|
|
|
self->buffer[bufpos]=ch;bufpos++; |
|
248
|
0
|
|
|
|
|
|
self->tagline=self->fileline; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
52
|
|
|
|
|
|
break; |
|
251
|
|
|
|
|
|
|
/*---*/ |
|
252
|
|
|
|
|
|
|
case 30: /*comment handling, |
|
253
|
|
|
|
|
|
|
*we have found " |