blib/lib/HTML/SBC.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 205 | 246 | 83.3 |
branch | 67 | 96 | 69.7 |
condition | 20 | 25 | 80.0 |
subroutine | 36 | 46 | 78.2 |
pod | 12 | 12 | 100.0 |
total | 340 | 425 | 80.0 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package HTML::SBC; | ||||||
2 | |||||||
3 | =head1 NAME | ||||||
4 | |||||||
5 | HTML::SBC - simple blog code for valid (X)HTML | ||||||
6 | |||||||
7 | =head1 VERSION | ||||||
8 | |||||||
9 | Version 0.15 | ||||||
10 | |||||||
11 | =cut | ||||||
12 | |||||||
13 | our $VERSION = '0.15'; | ||||||
14 | |||||||
15 | 6 | 6 | 134822 | use warnings; | |||
6 | 15 | ||||||
6 | 198 | ||||||
16 | 6 | 6 | 35 | use strict; | |||
6 | 12 | ||||||
6 | 193 | ||||||
17 | 6 | 6 | 31 | use Carp; | |||
6 | 16 | ||||||
6 | 495 | ||||||
18 | 6 | 6 | 32 | use Scalar::Util qw( blessed ); | |||
6 | 9 | ||||||
6 | 580 | ||||||
19 | 6 | 6 | 34 | use Exporter; | |||
6 | 8 | ||||||
6 | 300 | ||||||
20 | |||||||
21 | # "vintage" interface | ||||||
22 | my @vintage = qw( | ||||||
23 | sbc_translate sbc_translate_inline sbc_quote sbc_description | ||||||
24 | ); | ||||||
25 | 6 | 6 | 28 | use base qw( Exporter ); | |||
6 | 8 | ||||||
6 | 20145 | ||||||
26 | our @EXPORT_OK = (@vintage, ); | ||||||
27 | our %EXPORT_TAGS = (all => \@EXPORT_OK, vintage => \@vintage); | ||||||
28 | |||||||
29 | =head1 SYNOPSIS | ||||||
30 | |||||||
31 | use HTML::SBC; | ||||||
32 | my $translator = HTML::SBC->new(); | ||||||
33 | my $html = $translator->sbc($text); | ||||||
34 | |||||||
35 | or with vintage interface: | ||||||
36 | |||||||
37 | use HTML::SBC qw(sbc_translate); | ||||||
38 | my $html = sbc_translate($text); | ||||||
39 | |||||||
40 | =head1 DESCRIPTION | ||||||
41 | |||||||
42 | I |
||||||
43 | books, blogs, wikis, boards and various other web applications. It produces | ||||||
44 | valid and semantic (X)HTML from input and is patterned on that tiny usenet | ||||||
45 | markups like *B |
||||||
46 | for details. | ||||||
47 | |||||||
48 | HTML::SBC tries to give useful error messages and guess the right translation | ||||||
49 | even with invalid input. It will B |
||||||
50 | |||||||
51 | =head2 OOP Interface | ||||||
52 | |||||||
53 | HTML::SBC now (since 0.10) uses an OO interface, but the old interface is still | ||||||
54 | available. See L for details. | ||||||
55 | |||||||
56 | =head3 Constructor | ||||||
57 | |||||||
58 | =over 4 | ||||||
59 | |||||||
60 | =item new | ||||||
61 | |||||||
62 | my $translator = HTML::SBC->new() | ||||||
63 | |||||||
64 | creates a translator with english language for error messages. Additionally, | ||||||
65 | you can set initial values for all attributes, e. g.: | ||||||
66 | |||||||
67 | my $translator = HTML::SBC->new({ | ||||||
68 | language => 'german', | ||||||
69 | image_support => 1, | ||||||
70 | error_callback => sub | ||||||
71 | { print " |
||||||
72 | linkcheck_callback => sub | ||||||
73 | { return $_[0] =~ m{archive}; }, | ||||||
74 | imgcheck_callback => sub | ||||||
75 | { return $_[0] =~ m{naked\d{4}\,jpg}; }, | ||||||
76 | }); | ||||||
77 | |||||||
78 | For the meaning of the attributes, see the accessor documentations below. | ||||||
79 | B |
||||||
80 | |||||||
81 | =cut | ||||||
82 | |||||||
83 | my @lang = qw( english german ); | ||||||
84 | |||||||
85 | { | ||||||
86 | my %defaults = ( | ||||||
87 | language => $lang[0], | ||||||
88 | image_support => undef, | ||||||
89 | error_callback => undef, | ||||||
90 | linkcheck_callback => undef, | ||||||
91 | imgcheck_callback => undef, | ||||||
92 | ); | ||||||
93 | |||||||
94 | sub new { | ||||||
95 | 4 | 4 | 1 | 124 | my ($class, $args) = @_; | ||
96 | 4 | 100 | 27 | $args ||= {}; | |||
97 | 4 | 50 | 26 | croak 'Arguments expected as hash ref' if ref $args ne 'HASH'; | |||
98 | 4 | 36 | my $self = bless { %defaults, %$args }, $class; | ||||
99 | 4 | 21 | $self->_init; | ||||
100 | 4 | 15 | return $self; | ||||
101 | } | ||||||
102 | } # end of lexical %defaults | ||||||
103 | |||||||
104 | sub _init { | ||||||
105 | 28 | 28 | 74 | my ($self) = @_; | |||
106 | 28 | 80 | $self->{text} = ''; | ||||
107 | 28 | 49 | $self->{result} = ''; | ||||
108 | 28 | 43 | $self->{attribute} = ''; | ||||
109 | 28 | 57 | $self->{errors} = [ ]; | ||||
110 | 28 | 47 | $self->{istack} = { }; | ||||
111 | 28 | 58 | $self->{qstack} = 0; | ||||
112 | 28 | 107 | $self->{line} = 0; | ||||
113 | } | ||||||
114 | |||||||
115 | # private error reporting sub | ||||||
116 | { | ||||||
117 | my %error = ( | ||||||
118 | no_quote_end => { | ||||||
119 | $lang[0] => q(No quote end tag ']'), | ||||||
120 | $lang[1] => q(Kein Zitatende-Zeichen ']'), | ||||||
121 | }, | ||||||
122 | no_emphasis_end => { | ||||||
123 | $lang[0] => q(No emphasis end tag '*'), | ||||||
124 | $lang[1] => q(Kein Betonungs-Endezeichen '*'), | ||||||
125 | }, | ||||||
126 | no_strong_end => { | ||||||
127 | $lang[0] => q(No strong end tag '_'), | ||||||
128 | $lang[1] => q(Kein Hervorhebungs-Endezeichen '_'), | ||||||
129 | }, | ||||||
130 | no_hyperlink_end => { | ||||||
131 | $lang[0] => q(No hyperlink end tag '>'), | ||||||
132 | $lang[1] => q(Kein Hyperlink-Endezeichen '>'), | ||||||
133 | }, | ||||||
134 | no_image_end => { | ||||||
135 | $lang[0] => q(No image end tag '}'), | ||||||
136 | $lang[1] => q(Kein Bild-Endezeichen '}'), | ||||||
137 | }, | ||||||
138 | forbidden_url => { | ||||||
139 | $lang[0] => q(Forbidden URL), | ||||||
140 | $lang[1] => q(Verbotener URL), | ||||||
141 | }, | ||||||
142 | unknown_token => { | ||||||
143 | $lang[0] => q(Unknown token), | ||||||
144 | $lang[1] => q(Unbekanntes Zeichen), | ||||||
145 | }, | ||||||
146 | line => { | ||||||
147 | $lang[0] => q(around logical line), | ||||||
148 | $lang[1] => q(um logische Zeile), | ||||||
149 | }, | ||||||
150 | ); | ||||||
151 | |||||||
152 | sub _error { | ||||||
153 | 7 | 7 | 11 | my ($self, $error, $arg) = @_; | |||
154 | 7 | 18 | my $string = join ' ', ( | ||||
155 | $error{$error}{$self->language()}, | ||||||
156 | ($arg) x ! ! $arg, # additional information to this error message | ||||||
157 | $error{line}{$self->language()}, | ||||||
158 | $self->{line}, | ||||||
159 | ); | ||||||
160 | 7 | 16 | push @{ $self->{errors} }, $string; | ||||
7 | 15 | ||||||
161 | 7 | 15 | $self->_error_callback($string, $self); | ||||
162 | } | ||||||
163 | } # end of lexical %error | ||||||
164 | |||||||
165 | sub _error_callback { | ||||||
166 | 7 | 7 | 11 | my ($self, @args) = @_; | |||
167 | 7 | 50 | 25 | $self->{error_callback}->(@args) if defined $self->{error_callback}; | |||
168 | } | ||||||
169 | |||||||
170 | sub _linkcheck_callback { | ||||||
171 | 7 | 7 | 17 | my ($self, @args) = @_; | |||
172 | 7 | 50 | 24 | if (defined $self->{linkcheck_callback}) { | |||
173 | 0 | 0 | return $self->{linkcheck_callback}->(@args); | ||||
174 | } | ||||||
175 | 7 | 22 | return 1; # all URIs are valid by default | ||||
176 | } | ||||||
177 | |||||||
178 | sub _imgcheck_callback { | ||||||
179 | 4 | 4 | 10 | my ($self, @args) = @_; | |||
180 | 4 | 50 | 21 | if (defined $self->{imgcheck_callback}) { | |||
181 | 0 | 0 | return $self->{imgcheck_callback}->(@args); | ||||
182 | } | ||||||
183 | 4 | 15 | return 1; # all IMG URIs are valid by default | ||||
184 | } | ||||||
185 | |||||||
186 | # basic html things | ||||||
187 | sub _pre { | ||||||
188 | 24 | 24 | 26 | my ($self) = @_; | |||
189 | 24 | 46 | $self->{text} =~ s/&/&/g; | ||||
190 | 24 | 1567 | $self->{text} =~ s/\\</g; | ||||
191 | 24 | 37 | $self->{text} =~ s/\\>/>/g; | ||||
192 | 24 | 35 | $self->{text} =~ s/"/"/g; | ||||
193 | 24 | 79 | $self->{text} =~ s/[\t ]+/ /g; | ||||
194 | } | ||||||
195 | |||||||
196 | # make clean... | ||||||
197 | sub _post { | ||||||
198 | 24 | 24 | 32 | my ($self) = @_; | |||
199 | 24 | 50 | $self->{result} =~ s/\\([*_<>{}\[\]#\\])/$1/g; | ||||
200 | } | ||||||
201 | |||||||
202 | # tokenizer | ||||||
203 | { | ||||||
204 | my %token = ( | ||||||
205 | EMPHASIS => qr{^\*}, | ||||||
206 | STRONG => qr{^_}, | ||||||
207 | HYPERLINK_START => qr{^<(https?://[^ >\n]+) *}, | ||||||
208 | HYPERLINK_END => qr{^>}, | ||||||
209 | IMAGE_START => qr|^\{(https?://[^ }\n]+) *|, | ||||||
210 | IMAGE_END => qr|^\}|, | ||||||
211 | QUOTE_START => qr{^\n+\[\n?}, | ||||||
212 | QUOTE_END => qr{^\] *\n+}, | ||||||
213 | QUOTE_END_CITE => qr{^\] *}, | ||||||
214 | UL_BULLET => qr{^\n+- *}, | ||||||
215 | OL_BULLET => qr{^\n+# *}, | ||||||
216 | LINEBREAK => qr{^\n+}, | ||||||
217 | PLAIN => qr{^((?:[^*_<>\{\}\[\]#\\\n]+|\\[*_<>\{\}\[\]#\\\n])*)}, | ||||||
218 | ); | ||||||
219 | |||||||
220 | sub _literal { | ||||||
221 | 635 | 635 | 806 | my ($self, $token, $replacement) = @_; | |||
222 | 635 | 100 | 1224 | $replacement = '' unless defined $replacement; | |||
223 | 635 | 858 | my $regex = $token{$token}; | ||||
224 | |||||||
225 | 635 | 2434 | my $success = $self->{text} =~ s/$regex/$replacement/; | ||||
226 | 635 | 100 | 2380 | $self->{attribute} = $1 || undef; | |||
227 | 635 | 3650 | return $success; | ||||
228 | } | ||||||
229 | } # end of lexical %token | ||||||
230 | |||||||
231 | # parser... | ||||||
232 | sub _sbc { | ||||||
233 | 19 | 19 | 21 | my ($self) = @_; | |||
234 | 19 | 23 | my $sbc = ''; | ||||
235 | 19 | 35 | while (my $block = $self->_block()) { | ||||
236 | 19 | 81 | $sbc .= $block; | ||||
237 | } | ||||||
238 | 19 | 40 | return $sbc; | ||||
239 | } | ||||||
240 | |||||||
241 | sub _block { | ||||||
242 | 38 | 38 | 43 | my ($self) = @_; | |||
243 | 38 | 100 | 65 | return( $self->_quote() | |||
244 | or $self->_ulist() | ||||||
245 | or $self->_olist() | ||||||
246 | or $self->_paragraph() | ||||||
247 | ); | ||||||
248 | } | ||||||
249 | |||||||
250 | sub _quote { | ||||||
251 | 38 | 38 | 38 | my ($self) = @_; | |||
252 | 38 | 100 | 66 | $self->_literal('QUOTE_START', "\n") or return; | |||
253 | |||||||
254 | 2 | 4 | $self->{line}++; | ||||
255 | 2 | 2 | $self->{qstack}++; | ||||
256 | 2 | 11 | my $quote = $self->_sbc(); | ||||
257 | 2 | 3 | $self->{qstack}--; | ||||
258 | |||||||
259 | 2 | 100 | 7 | if ($self->_literal('QUOTE_END', "\n")) { | |||
50 | |||||||
260 | 1 | 8 | return qq( ) |
||||
261 | . qq(\n$quote\n); |
||||||
262 | } | ||||||
263 | elsif ($self->_literal('QUOTE_END_CITE')) { | ||||||
264 | 1 | 3 | my $cite = $self->_inline(); | ||||
265 | 1 | 8 | return qq( $cite) |
||||
266 | . qq(\n$quote\n); |
||||||
267 | } | ||||||
268 | else { | ||||||
269 | 0 | 0 | $self->_error('no_quote_end'); | ||||
270 | 0 | 0 | return qq( ) |
||||
271 | . qq(\n$quote\n); |
||||||
272 | } | ||||||
273 | } | ||||||
274 | |||||||
275 | sub _ulist { | ||||||
276 | 36 | 36 | 40 | my ($self) = @_; | |||
277 | 36 | 41 | my $ulist = ''; | ||||
278 | 36 | 68 | while (my $ulitem = $self->_ulitem()) { | ||||
279 | 2 | 6 | $ulist .= $ulitem; | ||||
280 | } | ||||||
281 | 36 | 100 | 207 | return if $ulist eq ''; | |||
282 | 1 | 6 | return qq(
|
||||
283 | } | ||||||
284 | |||||||
285 | sub _ulitem { | ||||||
286 | 38 | 38 | 41 | my ($self) = @_; | |||
287 | 38 | 100 | 57 | $self->_literal('UL_BULLET') or return; | |||
288 | 2 | 3 | $self->{line}++; | ||||
289 | 2 | 5 | my $ulitem = $self->_inline(); | ||||
290 | 2 | 8 | return qq(\t |
||||
291 | } | ||||||
292 | |||||||
293 | sub _olist { | ||||||
294 | 35 | 35 | 40 | my ($self) = @_; | |||
295 | 35 | 40 | my $olist = ''; | ||||
296 | 35 | 55 | while (my $olitem = $self->_olitem()) { | ||||
297 | 2 | 6 | $olist .= $olitem; | ||||
298 | } | ||||||
299 | 35 | 100 | 188 | return if $olist eq ''; | |||
300 | 1 | 6 | return qq(
|
||||
301 | } | ||||||
302 | |||||||
303 | sub _olitem { | ||||||
304 | 37 | 37 | 40 | my ($self) = @_; | |||
305 | 37 | 100 | 59 | $self->_literal('OL_BULLET') or return; | |||
306 | 2 | 3 | $self->{line}++; | ||||
307 | 2 | 5 | my $olitem = $self->_inline(); | ||||
308 | 2 | 43 | return qq(\t |
||||
309 | } | ||||||
310 | |||||||
311 | sub _paragraph { | ||||||
312 | 34 | 34 | 39 | my ($self) = @_; | |||
313 | 34 | 100 | 58 | $self->_literal('LINEBREAK') or return; | |||
314 | 32 | 43 | $self->{line}++; | ||||
315 | 32 | 55 | my $paragraph = $self->_inline(); | ||||
316 | |||||||
317 | 32 | 100 | 100 | 109 | unless ($self->{qstack} or $self->_literal('LINEBREAK', "\n")) { | ||
318 | 17 | 22 | $self->{line}--; | ||||
319 | 17 | 68 | return; | ||||
320 | } | ||||||
321 | 15 | 50 | 46 | if ($paragraph =~ /^\s*$/) { | |||
322 | 0 | 0 | return "\n"; | ||||
323 | } | ||||||
324 | else { | ||||||
325 | 15 | 81 | return qq( $paragraph \n); |
||||
326 | } | ||||||
327 | } | ||||||
328 | |||||||
329 | sub _inline { | ||||||
330 | 61 | 61 | 74 | my ($self) = @_; | |||
331 | 61 | 82 | my $inline = ''; | ||||
332 | |||||||
333 | 61 | 60 | while (1) { # use Acme::speeed to accelerate this! | ||||
334 | 110 | 100 | 66 | 476 | if (not $self->{istack}{EMPHASIS} and | ||
100 | 66 | ||||||
100 | 66 | ||||||
100 | 100 | ||||||
100 | |||||||
335 | defined(my $emphasis = $self->_emphasis())) { | ||||||
336 | 5 | 10 | $inline .= $emphasis; next; | ||||
5 | 6 | ||||||
337 | } | ||||||
338 | elsif (not $self->{istack}{STRONG} and | ||||||
339 | defined(my $strong = $self->_strong())) { | ||||||
340 | 5 | 9 | $inline .= $strong; next; | ||||
5 | 9 | ||||||
341 | } | ||||||
342 | elsif (not $self->{istack}{HYPERLINK} and | ||||||
343 | defined(my $hyperlink = $self->_hyperlink())) { | ||||||
344 | 7 | 16 | $inline .= $hyperlink; next; | ||||
7 | 12 | ||||||
345 | } | ||||||
346 | elsif ($self->image_support() and | ||||||
347 | defined(my $image = $self->_image())) { | ||||||
348 | 4 | 463 | $inline .= $image; next; | ||||
4 | 10 | ||||||
349 | } | ||||||
350 | elsif (defined(my $plain = $self->_plain())) { | ||||||
351 | 28 | 37 | $inline .= $plain; next; | ||||
28 | 57 | ||||||
352 | } | ||||||
353 | else { | ||||||
354 | 61 | 89 | last; | ||||
355 | } | ||||||
356 | } | ||||||
357 | |||||||
358 | 61 | 147 | return $inline; | ||||
359 | } | ||||||
360 | |||||||
361 | sub _emphasis { | ||||||
362 | 100 | 100 | 115 | my ($self) = @_; | |||
363 | 100 | 100 | 231 | $self->_literal('EMPHASIS') or return; | |||
364 | 5 | 12 | $self->{istack}{EMPHASIS}++; | ||||
365 | 5 | 20 | my $emphasis = $self->_inline(); | ||||
366 | 5 | 100 | 12 | $self->_literal('EMPHASIS') or $self->_error('no_emphasis_end'); | |||
367 | 5 | 11 | $self->{istack}{EMPHASIS}--; | ||||
368 | 5 | 50 | 15 | return '' if $emphasis eq ''; | |||
369 | 5 | 17 | return qq($emphasis); | ||||
370 | } | ||||||
371 | |||||||
372 | sub _strong { | ||||||
373 | 93 | 93 | 112 | my ($self) = @_; | |||
374 | 93 | 100 | 151 | $self->_literal('STRONG') or return; | |||
375 | 5 | 14 | $self->{istack}{STRONG}++; | ||||
376 | 5 | 13 | my $strong = $self->_inline(); | ||||
377 | 5 | 100 | 13 | $self->_literal('STRONG') or $self->_error('no_strong_end'); | |||
378 | 5 | 10 | $self->{istack}{STRONG}--; | ||||
379 | 5 | 50 | 16 | return '' if $strong eq ''; | |||
380 | 5 | 28 | return qq($strong); | ||||
381 | } | ||||||
382 | |||||||
383 | sub _hyperlink { | ||||||
384 | 87 | 87 | 92 | my ($self) = @_; | |||
385 | 87 | 100 | 139 | $self->_literal('HYPERLINK_START') or return; | |||
386 | 7 | 19 | $self->{istack}{HYPERLINK}++; | ||||
387 | 7 | 12 | my $url = $self->{attribute}; | ||||
388 | 7 | 24 | my $link = $self->_inline(); | ||||
389 | 7 | 100 | 29 | $link = $url if $link =~ /^ *$/; | |||
390 | 7 | 100 | 18 | $self->_literal('HYPERLINK_END') or $self->_error('no_hyperlink_end'); | |||
391 | 7 | 13 | $self->{istack}{HYPERLINK}--; | ||||
392 | 7 | 50 | 101 | if ($self->_linkcheck_callback($url)) { | |||
393 | 7 | 29 | return qq($link); | ||||
394 | } | ||||||
395 | else { | ||||||
396 | 0 | 0 | $self->_error('forbidden_url', $url); | ||||
397 | 0 | 0 | return $link; | ||||
398 | } | ||||||
399 | } | ||||||
400 | |||||||
401 | sub _image { | ||||||
402 | 59 | 59 | 65 | my ($self) = @_; | |||
403 | 59 | 100 | 95 | $self->_literal('IMAGE_START') or return; | |||
404 | 4 | 9 | my $url = $self->{attribute}; | ||||
405 | 4 | 9 | my $alt = ''; | ||||
406 | 4 | 10 | while (my $plain = $self->_plain()) { | ||||
407 | 2 | 8 | $alt .= $plain; | ||||
408 | } | ||||||
409 | 4 | 50 | 12 | $self->_literal('IMAGE_END') or $self->_error('no_image_end'); | |||
410 | 4 | 50 | 13 | if ($self->_imgcheck_callback($url)) { | |||
411 | 4 | 23 | return qq(); | ||||
412 | } | ||||||
413 | else { | ||||||
414 | 0 | 0 | $self->_error('forbidden_url', $url); | ||||
415 | 0 | 0 | return ''; | ||||
416 | } | ||||||
417 | } | ||||||
418 | |||||||
419 | sub _plain { | ||||||
420 | 95 | 95 | 103 | my ($self) = @_; | |||
421 | 95 | 50 | 171 | $self->_literal('PLAIN') and return $self->{attribute}; | |||
422 | } | ||||||
423 | |||||||
424 | =back | ||||||
425 | |||||||
426 | =head3 Accessor methods | ||||||
427 | |||||||
428 | =over 4 | ||||||
429 | |||||||
430 | =item language | ||||||
431 | |||||||
432 | Accessor method for the C |
||||||
433 | messages. All accessors are both setter and getter: | ||||||
434 | |||||||
435 | $language = $translator->language(); | ||||||
436 | $translator->language($new_language); | ||||||
437 | |||||||
438 | Valid languages: 'english' (default), 'german'. | ||||||
439 | |||||||
440 | =item image_support | ||||||
441 | |||||||
442 | Accessor method for the C |
||||||
443 | parsed or not. Image markup is translated if and only if this field has a true | ||||||
444 | value, so for this field all values are valid. | ||||||
445 | |||||||
446 | =item error_callback | ||||||
447 | |||||||
448 | Accessor method for the C |
||||||
449 | is called on every error that occurs while parsing your SBC input. It gets the | ||||||
450 | error message as first argument and a reference to the translator object as | ||||||
451 | second argument. Valid values are: undef, coderefs. | ||||||
452 | |||||||
453 | =item linkcheck_callback | ||||||
454 | |||||||
455 | Accessor method for the C |
||||||
456 | callback is called if there is hyperlink markup in your SBC input. It gets the | ||||||
457 | URL as first argument and has to return a true value if that URL is considered | ||||||
458 | valid, false otherwise. Valid values are: undef, coderefs. | ||||||
459 | |||||||
460 | =item imgcheck_callback | ||||||
461 | |||||||
462 | Accessor method for the C |
||||||
463 | callback is called if there is image markup in your SBC input. It gets the URL | ||||||
464 | as first argument and has to return a true value if that URL is considered | ||||||
465 | valid, false otherwise. Valid values are: undef, coderefs. | ||||||
466 | |||||||
467 | =cut | ||||||
468 | |||||||
469 | { | ||||||
470 | # accessor checks | ||||||
471 | my %checks = ( | ||||||
472 | language => sub { my ($l) = @_; | ||||||
473 | scalar grep { $_ eq $l } @lang | ||||||
474 | }, | ||||||
475 | image_support => sub { | ||||||
476 | 1; | ||||||
477 | }, | ||||||
478 | error_callback => sub { | ||||||
479 | ! blessed($_[0]) && ref $_[0] eq 'CODE' || ! defined $_[0] | ||||||
480 | }, | ||||||
481 | linkcheck_callback => sub { | ||||||
482 | ! blessed($_[0]) && ref $_[0] eq 'CODE' || ! defined $_[0] | ||||||
483 | }, | ||||||
484 | imgcheck_callback => sub { | ||||||
485 | ! blessed($_[0]) && ref $_[0] eq 'CODE' || ! defined $_[0] | ||||||
486 | }, | ||||||
487 | ); | ||||||
488 | |||||||
489 | # accessor generation | ||||||
490 | while (my ($field, $valid) = each %checks) { | ||||||
491 | 6 | 6 | 55 | no strict 'refs'; | |||
6 | 12 | ||||||
6 | 7289 | ||||||
492 | *$field = sub { | ||||||
493 | 117 | 117 | 3177 | my $self = shift; | |||
494 | 117 | 100 | 216 | if (@_) { | |||
495 | 5 | 7 | my $new = shift; | ||||
496 | 5 | 50 | 33 | 55 | if (defined $valid and not $valid->($new)) { | ||
497 | 0 | 0 | croak "Invalid value for $field: $new"; | ||||
498 | } | ||||||
499 | 5 | 12 | $self->{$field} = $new; | ||||
500 | } | ||||||
501 | 117 | 404 | return $self->{$field}; | ||||
502 | }; | ||||||
503 | } | ||||||
504 | } # end of lexical %check | ||||||
505 | |||||||
506 | =back | ||||||
507 | |||||||
508 | =head3 Translation methods | ||||||
509 | |||||||
510 | =over 4 | ||||||
511 | |||||||
512 | =item sbc | ||||||
513 | |||||||
514 | my $html = $translator->sbc($text); | ||||||
515 | |||||||
516 | Returns some valid HTML block elements which represent the given SBC C<$text>. | ||||||
517 | |||||||
518 | =cut | ||||||
519 | |||||||
520 | sub sbc { | ||||||
521 | 17 | 17 | 1 | 6794 | my ($self, $text) = @_; | ||
522 | 17 | 50 | 37 | return undef unless defined $text; | |||
523 | 17 | 50 | 67 | return '' if $text =~ /^\s*$/; | |||
524 | 17 | 36 | $self->_init(); | ||||
525 | 17 | 22 | $self->{text} = $text; | ||||
526 | 17 | 99 | $self->_pre(); | ||||
527 | 17 | 45 | $self->{text} = "\n$self->{text}\n"; | ||||
528 | 17 | 78 | $self->{text} =~ s/[\r\n]+/\n/g; | ||||
529 | 17 | 39 | $self->{result} = $self->_sbc(); | ||||
530 | 17 | 31 | $self->_post(); | ||||
531 | 17 | 24 | $self->{result} =~ s/\\\n/ /g; |
||||
532 | 17 | 50 | 106 | $self->_error('unknown_token') unless $self->{text} =~ /^\n*$/; | |||
533 | 17 | 80 | return $self->{result}; | ||||
534 | } | ||||||
535 | |||||||
536 | =item sbc_inline | ||||||
537 | |||||||
538 | my $line = $translator->sbc_inline($text); | ||||||
539 | |||||||
540 | Returns some valid HTML inline content which represents the given SBC C<$text>. | ||||||
541 | C<$text> may only contain inline SBC markup. | ||||||
542 | |||||||
543 | =cut | ||||||
544 | |||||||
545 | sub sbc_inline { | ||||||
546 | 7 | 7 | 1 | 4281 | my ($self, $text) = @_; | ||
547 | 7 | 50 | 22 | return undef unless defined $text; | |||
548 | 7 | 50 | 28 | return '' if $text =~ /^\s*$/; | |||
549 | 7 | 17 | $self->_init(); | ||||
550 | 7 | 13 | $self->{text} = $text; | ||||
551 | 7 | 18 | $self->_pre(); | ||||
552 | 7 | 12 | $self->{text} =~ s/[\r\n]+/ /g; | ||||
553 | 7 | 17 | $self->{result} = $self->_inline(); | ||||
554 | 7 | 16 | $self->_post(); | ||||
555 | 7 | 50 | 27 | $self->_error('unknown_token') unless $self->{text} =~ /^\n*$/; | |||
556 | 7 | 2030 | return $self->{result}; | ||||
557 | } | ||||||
558 | |||||||
559 | =back | ||||||
560 | |||||||
561 | =head3 Error handling methods | ||||||
562 | |||||||
563 | After translation you can look for errors in your SBC input: | ||||||
564 | |||||||
565 | =over 4 | ||||||
566 | |||||||
567 | =item errors | ||||||
568 | |||||||
569 | my @errors = $translator->errors(); | ||||||
570 | |||||||
571 | returns a list of warnings/errors in the chosen language. | ||||||
572 | |||||||
573 | =cut | ||||||
574 | |||||||
575 | sub errors { | ||||||
576 | 0 | 0 | 1 | 0 | my ($self) = @_; | ||
577 | 0 | 0 | return @{$self->{errors}}; | ||||
0 | 0 | ||||||
578 | } | ||||||
579 | |||||||
580 | =item next_error | ||||||
581 | |||||||
582 | while (my $error = $translator->next_error()) { | ||||||
583 | do_something_with($error); | ||||||
584 | } | ||||||
585 | |||||||
586 | Implements an iterator interface to your error messages. It will return the next | ||||||
587 | error message or undef if there's nothing left. | ||||||
588 | |||||||
589 | =cut | ||||||
590 | |||||||
591 | sub next_error { | ||||||
592 | 0 | 0 | 1 | 0 | my ($self) = @_; | ||
593 | 0 | 0 | return shift @{ $self->{errors} }; | ||||
0 | 0 | ||||||
594 | } | ||||||
595 | |||||||
596 | =back | ||||||
597 | |||||||
598 | Remember the possibility to use your own error callback method. | ||||||
599 | |||||||
600 | =head3 Class methods | ||||||
601 | |||||||
602 | There are some SBC tools implemented as class methods. | ||||||
603 | |||||||
604 | =over 4 | ||||||
605 | |||||||
606 | =item quote | ||||||
607 | |||||||
608 | my $reply = HTML::SBC->quote($original); | ||||||
609 | |||||||
610 | If you have some text in simple blog code C<$original> and you want it to be | ||||||
611 | sbc-quoted (e. g. for reply functionality in boards). You can add the author's | ||||||
612 | name as second argument: | ||||||
613 | |||||||
614 | my $reply = HTML::SBC->quote($original, $author); | ||||||
615 | |||||||
616 | =cut | ||||||
617 | |||||||
618 | sub quote { | ||||||
619 | 2 | 2 | 1 | 15 | my ($class, $sbc, $cite) = @_; | ||
620 | 2 | 100 | 7 | $cite = '' unless defined $cite; | |||
621 | 2 | 16 | return qq([\n$sbc\n]$cite\n); | ||||
622 | } | ||||||
623 | |||||||
624 | =item remove_hyperlinks | ||||||
625 | |||||||
626 | my $plain = HTML::SBC->remove_hyperlinks($sbc); | ||||||
627 | |||||||
628 | This class methods strips any hyperlink urls from given sbc input. It is often | ||||||
629 | used for search scripts which usually don't want to search within urls. It also | ||||||
630 | removes image markup. | ||||||
631 | |||||||
632 | =cut | ||||||
633 | |||||||
634 | sub remove_hyperlinks { | ||||||
635 | 4 | 4 | 1 | 8 | my ($class, $sbc) = @_; | ||
636 | 4 | 16 | $sbc =~ s{<(https?://[^ >\n]+)>}{$1}g; | ||||
637 | 4 | 10 | $sbc =~ s{ |
||||
638 | 4 | 13 | $sbc =~ s{\{https?://[^ \}\n]+\}}{}g; | ||||
639 | 4 | 9 | $sbc =~ s{\{https?://[^ \}\n]+ +([^\}\n]*)\}}{$1}g; | ||||
640 | 4 | 19 | return $sbc; | ||||
641 | } | ||||||
642 | |||||||
643 | =item description | ||||||
644 | |||||||
645 | my $description = HTML::SBC->description('german'); | ||||||
646 | |||||||
647 | If you want some newbies to use SBC, just show them our SBC language | ||||||
648 | description in your favourite language (english is default). | ||||||
649 | |||||||
650 | =cut | ||||||
651 | |||||||
652 | { | ||||||
653 | my %desc = ( | ||||||
654 | $lang[0] => < | ||||||
655 | Simple Blog Code is easy. Paragraphs are directly translated in paragraphs. Codes in paragraphs: | ||||||
656 | - _\\*foo\\*_ emphasis: *foo* | ||||||
657 | - _\\_bar\\__ strong emphasis: _bar_ | ||||||
658 | - _\\ |
||||||
659 | - _\\ |
||||||
660 | - _\\{http://www.memowe.de/pix/sbc.jpg\\}_ images without alternative text (*may be disabled*). | ||||||
661 | - _\\{http://www.memowe.de/pix/sbc.jpg SBC\\}_ images with alternative text *SBC* (*may be disabled*). | ||||||
662 | You can use unordered lists: | ||||||
663 | _- one thing\\ | ||||||
664 | - another thing_ | ||||||
665 | will be | ||||||
666 | - one thing | ||||||
667 | - another thing | ||||||
668 | Or ordered lists: | ||||||
669 | _\\# first\\ | ||||||
670 | \\# second_ | ||||||
671 | will be | ||||||
672 | # first | ||||||
673 | # second | ||||||
674 | In lists you can use the codes from paragraphs. With square brackets one can mark up quotes. A _\\[Quote\\]_ looks like this: | ||||||
675 | [Quote] | ||||||
676 | Or you can add the quote's author after the closing bracket: _\\[Quote\\] Author_: | ||||||
677 | [Quote] Author | ||||||
678 | A quote may contain paragraphs, lists and quotes. Author information may contain all codes from paragraphs. Special characters from SBC have to be *escaped* with a backslash: _\\\\\\*_, _\\\\\\__, ...; even the backslash itself: _\\\\\\\\_. | ||||||
679 | DESC_EN | ||||||
680 | $lang[1] => < | ||||||
681 | Simple Blog Code ist einfach. Absätze werden direkt in Absätze übersetzt. Codes in Absätzen: | ||||||
682 | - _\\*foo\\*_ Betonte Texte: *foo* | ||||||
683 | - _\\_bar\\__ Hervorgehobene Texte: _bar_ | ||||||
684 | - _\\ |
||||||
685 | - _\\ |
||||||
686 | - _\\{http://www.memowe.de/pix/sbc.jpg\\}_ Bilder ohne alternativen Text (*möglicherweise deaktiviert*). | ||||||
687 | - _\\{http://www.memowe.de/pix/sbc.jpg SBC\\}_ Bilder mit alternativem Text *SBC* (*möglicherweise deaktiviert*). | ||||||
688 | Statt Absätzen kann man ungeordnete Listen verwenden: | ||||||
689 | _- Einerseits\\ | ||||||
690 | - Andererseits_ | ||||||
691 | wird zu | ||||||
692 | - Einerseits | ||||||
693 | - Andererseits | ||||||
694 | Oder geordnete Listen: | ||||||
695 | _\\# Erstens\\ | ||||||
696 | \\# Zweitens_ | ||||||
697 | wird zu | ||||||
698 | # Erstens | ||||||
699 | # Zweitens | ||||||
700 | Innerhalb von Listen können die Codes von Absätzen verwendet werden. Mit eckigen Klammern kann man Zitate auszeichnen. Ein _\\[Zitat\\]_ sieht so aus: | ||||||
701 | [Zitat] | ||||||
702 | Man kann auch die Quelle des Zitats angeben, nämlich hinter der schließenden eckigen Klammer: _\\[Zitat\\]_ Quelle | ||||||
703 | [Zitat] Quelle | ||||||
704 | Ein Zitat kann wieder Absätze, Listen und Zitate enthalten, in Quellenangaben können alle Codes verwendet werden, die auch Absätze kennen. Sonderzeichen von SBC müssen mit einem Backslash codiert werden: _\\\\\\*_, _\\\\\\__, usw. und auch der Backslash selbst: _\\\\\\\\_. | ||||||
705 | DESC_DE | ||||||
706 | ); | ||||||
707 | |||||||
708 | sub description { | ||||||
709 | 0 | 0 | 1 | my ($class, $lang) = @_; | |||
710 | 0 | 0 | $lang = $lang[0] unless defined $lang; | ||||
711 | 0 | 0 | croak "Unknown language '$lang'" unless grep { $lang eq $_ } @lang; | ||||
0 | |||||||
712 | 0 | return scalar sbc_translate($desc{$lang}); | |||||
713 | } | ||||||
714 | } # end of lexical %desc | ||||||
715 | |||||||
716 | =back | ||||||
717 | |||||||
718 | =head2 Vintage interface | ||||||
719 | |||||||
720 | For backward compatibility, HTML::SBC implements its vintage non-OO interface | ||||||
721 | (versions < 0.10) so you can use newer versions of HTML::SBC without any changes | ||||||
722 | in your source code, for example: | ||||||
723 | |||||||
724 | use HTML::SBC qw( sbc_translate ); | ||||||
725 | HTML::SBC::german(); | ||||||
726 | my ($html, $errors) = sbc_translate($text); | ||||||
727 | print "$_\n" for @$errors; | ||||||
728 | |||||||
729 | To import this vintage interface, | ||||||
730 | |||||||
731 | use HTML::SBC qw( sbc_translate sbc_description ); | ||||||
732 | |||||||
733 | or import everything (except language getter): | ||||||
734 | |||||||
735 | use HTML::SBC qw( :vintage ); | ||||||
736 | |||||||
737 | =cut | ||||||
738 | |||||||
739 | { | ||||||
740 | my $static_transl; # for vintage interface | ||||||
741 | |||||||
742 | sub _static { | ||||||
743 | 0 | 0 | 0 | unless (defined $static_transl) { | |||
744 | 0 | $static_transl = HTML::SBC->new({ | |||||
745 | image_support => 0, # no image support in versions < 0.10 | ||||||
746 | }); | ||||||
747 | } | ||||||
748 | 0 | return $static_transl; | |||||
749 | } | ||||||
750 | } # end of lexical $static_transl | ||||||
751 | |||||||
752 | sub _static_lang { | ||||||
753 | 0 | 0 | my $transl = _static(); | ||||
754 | 0 | return $transl->language(); | |||||
755 | } | ||||||
756 | |||||||
757 | =over 4 | ||||||
758 | |||||||
759 | =item english | ||||||
760 | |||||||
761 | C |
||||||
762 | |||||||
763 | =item german | ||||||
764 | |||||||
765 | C |
||||||
766 | |||||||
767 | =item sbc_translate | ||||||
768 | |||||||
769 | my ($html, $errors) = sbc_translate($text); | ||||||
770 | |||||||
771 | C |
||||||
772 | messages. To ignore the errors, just evaluate C |
||||||
773 | context. | ||||||
774 | |||||||
775 | =item sbc_translate_inline | ||||||
776 | |||||||
777 | my ($inline_html, $errors) = sbc_translate_inline($inline_text); | ||||||
778 | |||||||
779 | does the same with inline content (see C |
||||||
780 | |||||||
781 | =item sbc_quote | ||||||
782 | |||||||
783 | my $reply = sbc_quote($original); | ||||||
784 | |||||||
785 | If you have some text in simple blog code C<$original> and you want it to be | ||||||
786 | sbc-quoted (e. g. for reply functionality in boards), just use this. You can | ||||||
787 | add the author's name as second argument: | ||||||
788 | |||||||
789 | my $reply = sbc_quote($original, $author); | ||||||
790 | |||||||
791 | =item sbc_description | ||||||
792 | |||||||
793 | my $description = sbc_description(); | ||||||
794 | |||||||
795 | If you want some newbies to use SBC, just show them our SBC language | ||||||
796 | description. | ||||||
797 | |||||||
798 | =cut | ||||||
799 | |||||||
800 | foreach my $lang (@lang) { | ||||||
801 | 6 | 6 | 49 | no strict 'refs'; | |||
6 | 18 | ||||||
6 | 2344 | ||||||
802 | *$lang = sub { | ||||||
803 | 0 | 0 | my $static_obj = _static(); | ||||
804 | 0 | $static_obj->language($lang); | |||||
805 | }; | ||||||
806 | } | ||||||
807 | |||||||
808 | sub sbc_translate { | ||||||
809 | 0 | 0 | 1 | my ($text) = @_; | |||
810 | 0 | my $transl = _static(); | |||||
811 | 0 | my $result = $transl->sbc($text); | |||||
812 | 0 | my @errors = $transl->errors(); | |||||
813 | 0 | 0 | return wantarray ? ($result, \@errors) : $result; | ||||
814 | } | ||||||
815 | |||||||
816 | sub sbc_translate_inline { | ||||||
817 | 0 | 0 | 1 | my ($line) = @_; | |||
818 | 0 | my $transl = _static(); | |||||
819 | 0 | my $result = $transl->sbc_inline($line); | |||||
820 | 0 | my @errors = $transl->errors(); | |||||
821 | 0 | 0 | return wantarray ? ($result, \@errors) : $result; | ||||
822 | } | ||||||
823 | |||||||
824 | sub sbc_quote { | ||||||
825 | 0 | 0 | 1 | my ($sbc, $cite) = @_; | |||
826 | 0 | return HTML::SBC->quote($sbc, $cite); | |||||
827 | } | ||||||
828 | |||||||
829 | sub sbc_description { | ||||||
830 | 0 | 0 | 1 | return HTML::SBC->description(_static_lang()); | |||
831 | } | ||||||
832 | |||||||
833 | =back | ||||||
834 | |||||||
835 | =head2 Language | ||||||
836 | |||||||
837 | I |
||||||
838 | between newlines) are translated in (X)HTML P elements. In paragraphs, some | ||||||
839 | |||||||
840 | =head3 inline elements | ||||||
841 | |||||||
842 | are allowed as follows: | ||||||
843 | |||||||
844 | =over 4 | ||||||
845 | |||||||
846 | =item C<*emphasis*> | ||||||
847 | |||||||
848 | emphasis | ||||||
849 | |||||||
850 | =item C<_strong emphasis_> | ||||||
851 | |||||||
852 | strong emphasis | ||||||
853 | |||||||
854 | =item C<< |
||||||
855 | |||||||
856 | http://www.example.org/ | ||||||
857 | |||||||
858 | =item C<< |
||||||
859 | |||||||
860 | hyperlink | ||||||
861 | |||||||
862 | =item C<< {http://www.example.org/foo.jpg} >> B<(optional, only in oo)> | ||||||
863 | |||||||
864 | |||||||
865 | |||||||
866 | =item C<< {http://www.example.org/foo.jpg image} >> B<(optional, only in oo)> | ||||||
867 | |||||||
868 | |||||||
869 | |||||||
870 | =back | ||||||
871 | |||||||
872 | There are some elements on block level which don't have to be in paragraphs. | ||||||
873 | |||||||
874 | =head3 block level elements | ||||||
875 | |||||||
876 | =over 4 | ||||||
877 | |||||||
878 | =item C<[nice quote]> | ||||||
879 | |||||||
880 | |
||||||
881 | |
||||||
882 | nice quote | ||||||
883 | |||||||
884 | |||||||
885 | |||||||
886 | =item C<[another nice quote] author> | ||||||
887 | |||||||
888 | |
||||||
889 | author | ||||||
890 | |
||||||
891 | another nice quote | ||||||
892 | |||||||
893 | |||||||
894 | |||||||
895 | =item C<- first\n- second\n- third\n> | ||||||
896 | |||||||
897 | |
||||||
898 | |
||||||
899 | |
||||||
900 | |
||||||
901 | |||||||
902 | |||||||
903 | =item C<# first\n# second\n# third\n> | ||||||
904 | |||||||
905 | |
||||||
906 | |
||||||
907 | |
||||||
908 | |
||||||
909 | |||||||
910 | |||||||
911 | =back | ||||||
912 | |||||||
913 | Block level elements have to be started in new lines. In quotes, you can use | ||||||
914 | block level elements, e. g. | ||||||
915 | |||||||
916 | [ | ||||||
917 | \[...\] the three great virtues of a programmer: | ||||||
918 | - laziness, | ||||||
919 | - impatience and | ||||||
920 | - hubris. | ||||||
921 | ] Larry Wall | ||||||
922 | |||||||
923 | You'll get the nice quote from Larry with an inner list. You can see here, that | ||||||
924 | characters with a special meaning have to be escaped in SBC. You would use "\*" | ||||||
925 | to get an asterisk, for example. | ||||||
926 | |||||||
927 | =head1 AUTHOR | ||||||
928 | |||||||
929 | Mirko Westermeier, C<< |
||||||
930 | |||||||
931 | =head1 BUGS | ||||||
932 | |||||||
933 | Please report any bugs or feature requests to | ||||||
934 | C |
||||||
935 | L |
||||||
936 | I will be notified, and then you'll automatically be notified of progress on | ||||||
937 | your bug as I make changes. | ||||||
938 | |||||||
939 | I love feedback. :-) | ||||||
940 | |||||||
941 | =head1 SUPPORT | ||||||
942 | |||||||
943 | You can find documentation for this module with the perldoc command. | ||||||
944 | |||||||
945 | perldoc HTML::SBC | ||||||
946 | |||||||
947 | =head1 ACKNOWLEDGEMENTS | ||||||
948 | |||||||
949 | Thanks to Florian Ragwitz (rafl) for many helpful comments and suggestions. | ||||||
950 | |||||||
951 | =head1 COPYRIGHT & LICENSE | ||||||
952 | |||||||
953 | Copyright 2006 Mirko Westermeier, all rights reserved. | ||||||
954 | |||||||
955 | This program is free software; you can redistribute it and/or modify it | ||||||
956 | under the same terms as Perl itself. | ||||||
957 | |||||||
958 | =cut | ||||||
959 | |||||||
960 | 1; # End of HTML::SBC |