File Coverage

blib/lib/Syntax/Highlight/HTML.pm
Criterion Covered Total %
statement 46 49 93.8
branch 6 10 60.0
condition 7 9 77.7
subroutine 7 7 100.0
pod 2 2 100.0
total 68 77 88.3


line stmt bran cond sub pod time code
1             package Syntax::Highlight::HTML;
2 3     3   74187 use strict;
  3         8  
  3         145  
3 3     3   6580 use HTML::Parser;
  3         40506  
  3         155  
4              
5 3     3   38 { no strict;
  3         9  
  3         3152  
6               $VERSION = '0.04';
7               @ISA = qw(HTML::Parser);
8             }
9              
10             =head1 NAME
11            
12             Syntax::Highlight::HTML - Highlight HTML syntax
13            
14             =head1 VERSION
15            
16             Version 0.04
17            
18             =cut
19              
20             my %classes = (
21                 declaration => 'h-decl', # declaration <!DOCTYPE ...>
22                 process => 'h-pi', # process instruction <?xml ...?>
23                 comment => 'h-com', # comment <!-- ... -->
24                 angle_bracket => 'h-ab', # the characters '<' and '>' as tag delimiters
25                 tag_name => 'h-tag', # the tag name of an element
26                 attr_name => 'h-attr', # the attribute name
27                 attr_value => 'h-attv', # the attribute value
28                 entity => 'h-ent', # any entities: &eacute; &#171;
29                 line_number => 'h-lno', # line number
30             );
31              
32             my %defaults = (
33                 pre => 1, # add <pre>...</pre> around the result? (default: yes)
34                 nnn => 0, # add line numbers (default: no)
35             );
36              
37             =head1 SYNOPSIS
38            
39             use Syntax::Highlight::HTML;
40            
41             my $highlighter = new Syntax::Highlight::HTML;
42             $output = $highlighter->parse($html);
43            
44             If C<$html> contains the following HTML fragment:
45            
46             <!-- a description list -->
47             <dl compact="compact">
48             <dt>some word</dt>
49             <dd>the description of the word. Plus some <a href="/definitions/other_word"
50             >reference</a> towards another definition. </dd>
51             </dl>
52            
53             then the resulting HTML contained in C<$output> will render like this:
54            
55             =begin html
56            
57             <style type="text/css">
58             <!--
59             .h-decl { color: #336699; font-style: italic; } /* doctype declaration */
60             .h-pi { color: #336699; } /* process instruction */
61             .h-com { color: #338833; font-style: italic; } /* comment */
62             .h-ab { color: #000000; font-weight: bold; } /* angles as tag delim. */
63             .h-tag { color: #993399; font-weight: bold; } /* tag name */
64             .h-attr { color: #000000; font-weight: bold; } /* attribute name */
65             .h-attv { color: #333399; } /* attribute value */
66             .h-ent { color: #cc3333; } /* entity */
67             .h-lno { color: #aaaaaa; background: #f7f7f7;} /* line numbers */
68             -->
69             </style>
70            
71             <pre>
72             <span class="h-com">&lt;!-- a description list --&gt;</span>
73             <span class="h-ab">&lt;</span><span class="h-tag">dl</span> <span class="h-attr">compact</span>=<span class="h-attv">"compact</span>"<span class="h-ab">&gt;</span>
74             <span class="h-ab">&lt;</span><span class="h-tag">dt</span><span class="h-ab">&gt;</span>some word<span class="h-ab">&lt;/</span><span class="h-tag">dt</span><span class="h-ab">&gt;</span>
75             <span class="h-ab">&lt;</span><span class="h-tag">dd</span><span class="h-ab">&gt;</span>the description of the word. Plus some <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/definitions/other_word</span>"
76             <span class="h-ab">&gt;</span>reference<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span> towards another definition. <span class="h-ab">&lt;/</span><span class="h-tag">dd</span><span class="h-ab">&gt;</span>
77             <span class="h-ab">&lt;/</span><span class="h-tag">dl</span><span class="h-ab">&gt;</span>
78             </pre>
79            
80             =end html
81            
82             =head1 DESCRIPTION
83            
84             This module is designed to take raw HTML input and highlight it (using a CSS
85             stylesheet, see L<"Notes"> for the classes). The returned HTML code is ready
86             for inclusion in a web page.
87            
88             It is intented to be used as an highlighting filter, and as such does not reformat
89             or reindent the original HTML code.
90            
91             =head1 METHODS
92            
93             =over 4
94            
95             =item new()
96            
97             The constructor. Returns a C<Syntax::Highlight::HTML> object, which derives from
98             C<HTML::Parser>. As such, any C<HTML::parser> method can be called on this object
99             (that is, expect for C<parse()> which is overloaded here).
100            
101             B<Options>
102            
103             =over 4
104            
105             =item *
106            
107             C<nnn> - Activate line numbering. Default value: 0 (disabled).
108            
109             =item *
110            
111             C<pre> - Surround result by C<< <pre>...</pre> >> tags. Default value: 1 (enabled).
112            
113             =back
114            
115             B<Example>
116            
117             To avoid surrounding the result by the C<< <pre>...</pre> >> tags:
118            
119             my $highlighter = Syntax::Highlight::HTML->new(pre => 0);
120            
121             =cut
122              
123             sub new {
124 2     2 1 1173     my $self = __PACKAGE__->SUPER::new(
125             # API version
126                     api_version => 3,
127              
128             # Options
129                     case_sensitive => 1,
130                     attr_encoded => 1,
131              
132             # Handlers
133                     declaration_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ],
134                     process_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ],
135                     comment_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ],
136                     start_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ],
137                     end_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ],
138                     text_h => [ \&_highlight_text, 'self, text' ],
139                     default_h => [ \&_highlight_text, 'self, text' ],
140                 );
141                 
142 2   33     368     my $class = ref $_[0] || $_[0]; shift;
  2         5  
143 2         5     bless $self, $class;
144                 
145 2         24     $self->{options} = { %defaults };
146                 
147 2         7     my %args = @_;
148 2         8     for my $arg (keys %defaults) {
149 4 50       18         $self->{options}{$arg} = $args{$arg} if defined $args{$arg}
150                 }
151                 
152 2         8     $self->{output} = '';
153                 
154 2         8     return $self
155             }
156              
157             =item parse()
158            
159             Parse the HTML code given in argument and returns the highlighted HTML code,
160             ready for inclusion in a web page.
161            
162             B<Example>
163            
164             $highlighter->parse("<p>Hello, world.</p>");
165            
166             =cut
167              
168             sub parse {
169 10     10 1 26     my $self = shift;
170                 
171             ## parse the HTML fragment
172 10         23     $self->{output} = '';
173 10         88     $self->SUPER::parse($_[0]);
174 10         56     $self->eof;
175                 
176             ## add line numbering?
177 10 50       31     if($self->{options}{nnn}) {
178 0         0         my $i = 1;
179 0         0         $self->{output} =~ s|^|<span class="$classes{line_number}">@{[sprintf '%3d', $i++]}</span> |gm;
  0         0  
180                 }
181                 
182             ## add <pre>...</pre>?
183 10 50       43     $self->{output} = "<pre>\n" . $self->{output} . "</pre>\n" if $self->{options}{pre};
184                 
185 10         59     return $self->{output}
186             }
187              
188             =back
189            
190             =head2 Internals Methods
191            
192             The following methods are for internal use only.
193            
194             =over 4
195            
196             =item _highlight_tag()
197            
198             C<HTML::Parser> tags handler: highlights a tag.
199            
200             =cut
201              
202             sub _highlight_tag {
203 16     16   28     my $self = shift;
204 16         19     my $event = shift;
205 16         17     my $tagname = shift;
206 16         20     my $attr = shift;
207                 
208 16         25     $_[0] =~ s|&([^;]+;)|<span class="$classes{entity}">&amp;$1</span>|g;
209                 
210 16 100 100     108     if($event eq 'declaration' or $event eq 'process' or $event eq 'comment') {
      100        
211 3         13         $_[0] =~ s/</&lt;/g;
212 3         12         $_[0] =~ s/>/&gt;/g;
213 3         25         $self->{output} .= qq|<span class="$classes{$event}">| . $_[0] . '</span>'
214                 
215                 } else {
216 13         166         $_[0] =~ s|^<$tagname|<<span class="$classes{tag_name}">$tagname</span>|;
217 13         89         $_[0] =~ s|^</$tagname|</<span class="$classes{tag_name}">$tagname</span>|;
218 13         81         $_[0] =~ s|^<(/?)|<span class="$classes{angle_bracket}">&lt;$1</span>|;
219 13         83         $_[0] =~ s|(/?)>$|<span class="$classes{angle_bracket}">$1&gt;</span>|;
220                     
221 13         42         for my $attr_name (keys %$attr) {
222 5 50       33             next if $attr_name eq '/';
223 5         150             $_[0] =~ s{$attr_name=(["'])\Q$$attr{$attr_name}\E\1}
224             {<span class="$classes{attr_name}">$attr_name</span>=<span class="$classes{attr_value}">$1$$attr{$attr_name}</span>$1}
225                     }
226                     
227 13         89         $self->{output} .= $_[0];
228                 }
229             }
230              
231             =item _highlight_text()
232            
233             C<HTML::Parser> text handler: highlights text.
234            
235             =cut
236              
237             sub _highlight_text {
238 34     34   45     my $self = shift;
239 34         43     $_[0] =~ s|&([^;]+;)|<span class="$classes{entity}">&amp;$1</span>|g;
240 34         170     $self->{output} .= $_[0];
241             }
242              
243             =back
244            
245             =head1 NOTES
246            
247             The resulting HTML uses CSS to colourize the syntax. Here are the classes
248             that you can define in your stylesheet.
249            
250             =over 4
251            
252             =item *
253            
254             C<.h-decl> - for a markup declaration; in a HTML document, the only
255             markup declaration is the C<DOCTYPE>, like:
256             C<< <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"> >>
257            
258             =item *
259            
260             C<.h-pi> - for a process instruction like C<< <?html ...> >>
261             or C<< <?xml ...?> >>
262            
263             =item *
264            
265             C<.h-com> - for a comment, C<< <!-- ... --> >>
266            
267             =item *
268            
269             C<.h-ab> - for the characters C<< '<' >> and C<< '>' >> as tag delimiters
270            
271             =item *
272            
273             C<.h-tag> - for the tag name of an element
274            
275             =item *
276            
277             C<.h-attr> - for the attribute name
278            
279             =item *
280            
281             C<.h-attv> - for the attribute value
282            
283             =item *
284            
285             C<.h-ent> - for any entities: C<&eacute;> C<&#171;>
286            
287             =item *
288            
289             C<.h-lno> - for the line numbers
290            
291             =back
292            
293             An example stylesheet can be found in F<eg/html-syntax.css>.
294            
295             =head1 EXAMPLE
296            
297             Here is an example of generated HTML output. It was generated with the
298             script F<eg/highlight.pl>.
299            
300             The following HTML fragment (which is the beginning of
301             L<http://search.cpan.org/~saper/>)
302            
303             <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
304             <html>
305             <head>
306             <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
307             <link rel="stylesheet" href="/s/style.css" type="text/css">
308             <title>search.cpan.org: S&#233;bastien Aperghis-Tramoni</title>
309             </head>
310             <body id="cpansearch">
311             <center><div class="logo"><a href="/"><img src="/s/img/cpan_banner.png" alt="CPAN"></a></div></center>
312             <div class="menubar">
313             <a href="/">Home</a>
314             &middot; <a href="/author/">Authors</a>
315             &middot; <a href="/recent">Recent</a>
316             &middot; <a href="/news">News</a>
317             &middot; <a href="/mirror">Mirrors</a>
318             &middot; <a href="/faq.html">FAQ</a>
319             &middot; <a href="/feedback">Feedback</a>
320             </div>
321             <form method="get" action="/search" name="f" class="searchbox">
322             <input type="text" name="query" value="" size="35">
323             <br>in <select name="mode">
324             <option value="all">All</option>
325             <option value="module" >Modules</option>
326             <option value="dist" >Distributions</option>
327             <option value="author" >Authors</option>
328             </select>&nbsp;<input type="submit" value="CPAN Search">
329             </form>
330            
331             will be rendered like this (using the CSS stylesheet F<eg/html-syntax.css>):
332            
333             =begin html
334            
335             <pre>
336             <span class="h-lno"> 1</span> <span class="h-decl">&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"&gt;</span>
337             <span class="h-lno"> 2</span> <span class="h-ab">&lt;</span><span class="h-tag">html</span><span class="h-ab">&gt;</span>
338             <span class="h-lno"> 3</span> <span class="h-ab">&lt;</span><span class="h-tag">head</span><span class="h-ab">&gt;</span>
339             <span class="h-lno"> 4</span> <span class="h-ab">&lt;</span><span class="h-tag">meta</span> <span class="h-attr">http-equiv</span>=<span class="h-attv">"Content-Type</span>" <span class="h-attr">content</span>=<span class="h-attv">"text/html; charset=iso-8859-1</span>"<span class="h-ab">&gt;</span>
340             <span class="h-lno"> 5</span> <span class="h-ab">&lt;</span><span class="h-tag">link</span> <span class="h-attr">rel</span>=<span class="h-attv">"stylesheet</span>" <span class="h-attr">href</span>=<span class="h-attv">"/s/style.css</span>" <span class="h-attr">type</span>=<span class="h-attv">"text/css</span>"<span class="h-ab">&gt;</span>
341             <span class="h-lno"> 6</span> <span class="h-ab">&lt;</span><span class="h-tag">title</span><span class="h-ab">&gt;</span>search.cpan.org: S<span class="h-ent">&amp;#233;</span>bastien Aperghis-Tramoni<span class="h-ab">&lt;/</span><span class="h-tag">title</span><span class="h-ab">&gt;</span>
342             <span class="h-lno"> 7</span> <span class="h-ab">&lt;/</span><span class="h-tag">head</span><span class="h-ab">&gt;</span>
343             <span class="h-lno"> 8</span> <span class="h-ab">&lt;</span><span class="h-tag">body</span> <span class="h-attr">id</span>=<span class="h-attv">"cpansearch</span>"<span class="h-ab">&gt;</span>
344             <span class="h-lno"> 9</span> <span class="h-ab">&lt;</span><span class="h-tag">center</span><span class="h-ab">&gt;</span><span class="h-ab">&lt;</span><span class="h-tag">div</span> <span class="h-attr">class</span>=<span class="h-attv">"logo</span>"<span class="h-ab">&gt;</span><span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/</span>"<span class="h-ab">&gt;</span><span class="h-ab">&lt;</span><span class="h-tag">img</span> <span class="h-attr">src</span>=<span class="h-attv">"/s/img/cpan_banner.png</span>" <span class="h-attr">alt</span>=<span class="h-attv">"CPAN</span>"<span class="h-ab">&gt;</span><span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span><span class="h-ab">&lt;/</span><span class="h-tag">div</span><span class="h-ab">&gt;</span><span class="h-ab">&lt;/</span><span class="h-tag">center</span><span class="h-ab">&gt;</span>
345             <span class="h-lno"> 10</span> <span class="h-ab">&lt;</span><span class="h-tag">div</span> <span class="h-attr">class</span>=<span class="h-attv">"menubar</span>"<span class="h-ab">&gt;</span>
346             <span class="h-lno"> 11</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/</span>"<span class="h-ab">&gt;</span>Home<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
347             <span class="h-lno"> 12</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/author/</span>"<span class="h-ab">&gt;</span>Authors<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
348             <span class="h-lno"> 13</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/recent</span>"<span class="h-ab">&gt;</span>Recent<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
349             <span class="h-lno"> 14</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/news</span>"<span class="h-ab">&gt;</span>News<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
350             <span class="h-lno"> 15</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/mirror</span>"<span class="h-ab">&gt;</span>Mirrors<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
351             <span class="h-lno"> 16</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/faq.html</span>"<span class="h-ab">&gt;</span>FAQ<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
352             <span class="h-lno"> 17</span> <span class="h-ent">&amp;middot;</span> <span class="h-ab">&lt;</span><span class="h-tag">a</span> <span class="h-attr">href</span>=<span class="h-attv">"/feedback</span>"<span class="h-ab">&gt;</span>Feedback<span class="h-ab">&lt;/</span><span class="h-tag">a</span><span class="h-ab">&gt;</span>
353             <span class="h-lno"> 18</span> <span class="h-ab">&lt;/</span><span class="h-tag">div</span><span class="h-ab">&gt;</span>
354             <span class="h-lno"> 19</span> <span class="h-ab">&lt;</span><span class="h-tag">form</span> <span class="h-attr">method</span>=<span class="h-attv">"get</span>" <span class="h-attr">action</span>=<span class="h-attv">"/search</span>" <span class="h-attr">name</span>=<span class="h-attv">"f</span>" <span class="h-attr">class</span>=<span class="h-attv">"searchbox</span>"<span class="h-ab">&gt;</span>
355             <span class="h-lno"> 20</span> <span class="h-ab">&lt;</span><span class="h-tag">input</span> <span class="h-attr">type</span>=<span class="h-attv">"text</span>" <span class="h-attr">name</span>=<span class="h-attv">"query</span>" <span class="h-attr">value</span>=<span class="h-attv">"</span>" <span class="h-attr">size</span>=<span class="h-attv">"35</span>"<span class="h-ab">&gt;</span>
356             <span class="h-lno"> 21</span> <span class="h-ab">&lt;</span><span class="h-tag">br</span><span class="h-ab">&gt;</span>in <span class="h-ab">&lt;</span><span class="h-tag">select</span> <span class="h-attr">name</span>=<span class="h-attv">"mode</span>"<span class="h-ab">&gt;</span>
357             <span class="h-lno"> 22</span> <span class="h-ab">&lt;</span><span class="h-tag">option</span> <span class="h-attr">value</span>=<span class="h-attv">"all</span>"<span class="h-ab">&gt;</span>All<span class="h-ab">&lt;/</span><span class="h-tag">option</span><span class="h-ab">&gt;</span>
358             <span class="h-lno"> 23</span> <span class="h-ab">&lt;</span><span class="h-tag">option</span> <span class="h-attr">value</span>=<span class="h-attv">"module</span>" <span class="h-ab">&gt;</span>Modules<span class="h-ab">&lt;/</span><span class="h-tag">option</span><span class="h-ab">&gt;</span>
359             <span class="h-lno"> 24</span> <span class="h-ab">&lt;</span><span class="h-tag">option</span> <span class="h-attr">value</span>=<span class="h-attv">"dist</span>" <span class="h-ab">&gt;</span>Distributions<span class="h-ab">&lt;/</span><span class="h-tag">option</span><span class="h-ab">&gt;</span>
360             <span class="h-lno"> 25</span> <span class="h-ab">&lt;</span><span class="h-tag">option</span> <span class="h-attr">value</span>=<span class="h-attv">"author</span>" <span class="h-ab">&gt;</span>Authors<span class="h-ab">&lt;/</span><span class="h-tag">option</span><span class="h-ab">&gt;</span>
361             <span class="h-lno"> 26</span> <span class="h-ab">&lt;/</span><span class="h-tag">select</span><span class="h-ab">&gt;</span><span class="h-ent">&amp;nbsp;</span><span class="h-ab">&lt;</span><span class="h-tag">input</span> <span class="h-attr">type</span>=<span class="h-attv">"submit</span>" <span class="h-attr">value</span>=<span class="h-attv">"CPAN Search</span>"<span class="h-ab">&gt;</span>
362             <span class="h-lno"> 27</span> <span class="h-ab">&lt;/</span><span class="h-tag">form</span><span class="h-ab">&gt;</span>
363             </pre>
364            
365             =end html
366            
367             =head1 CAVEATS
368            
369             C<Syntax::Highlight::HTML> relies on C<HTML::Parser> for parsing the HTML
370             and therefore suffers from the same limitations.
371            
372             =head1 SEE ALSO
373            
374             L<HTML::Parser>
375            
376             =head1 AUTHORS
377            
378             SE<eacute>bastien Aperghis-Tramoni, E<lt>sebastien@aperghis.netE<gt>
379            
380             =head1 BUGS
381            
382             Please report any bugs or feature requests to
383             C<bug-syntax-highlight-html@rt.cpan.org>, or through the web interface at
384             L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Syntax-Highlight-HTML>.
385             I will be notified, and then you'll automatically be notified of
386             progress on your bug as I make changes.
387            
388             =head1 COPYRIGHT & LICENSE
389            
390             Copyright (C)2004 SE<eacute>bastien Aperghis-Tramoni, All Rights Reserved.
391            
392             This program is free software; you can redistribute it and/or modify it
393             under the same terms as Perl itself.
394            
395             =cut
396              
397             1; # End of Syntax::Highlight::HTML
398