| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package HTML::TokeParser::Simple; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 5 |  |  | 5 |  | 153280 | use strict; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 216 |  | 
| 4 | 5 |  |  | 5 |  | 5239 | use HTML::TokeParser; | 
|  | 5 |  |  |  |  | 63654 |  | 
|  | 5 |  |  |  |  | 160 |  | 
| 5 | 5 |  |  | 5 |  | 4804 | use HTML::TokeParser::Simple::Token; | 
|  | 5 |  |  |  |  | 14 |  | 
|  | 5 |  |  |  |  | 135 |  | 
| 6 | 5 |  |  | 5 |  | 3773 | use HTML::TokeParser::Simple::Token::Tag; | 
|  | 5 |  |  |  |  | 12 |  | 
|  | 5 |  |  |  |  | 167 |  | 
| 7 | 5 |  |  | 5 |  | 2568 | use HTML::TokeParser::Simple::Token::Tag::Start; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 127 |  | 
| 8 | 5 |  |  | 5 |  | 2473 | use HTML::TokeParser::Simple::Token::Tag::End; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 126 |  | 
| 9 | 5 |  |  | 5 |  | 8332 | use HTML::TokeParser::Simple::Token::Text; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 122 |  | 
| 10 | 5 |  |  | 5 |  | 2738 | use HTML::TokeParser::Simple::Token::Comment; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 119 |  | 
| 11 | 5 |  |  | 5 |  | 2228 | use HTML::TokeParser::Simple::Token::Declaration; | 
|  | 5 |  |  |  |  | 39 |  | 
|  | 5 |  |  |  |  | 102 |  | 
| 12 | 5 |  |  | 5 |  | 2160 | use HTML::TokeParser::Simple::Token::ProcessInstruction; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 167 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | our $VERSION  = '3.16'; | 
| 15 | 5 |  |  | 5 |  | 22 | use base 'HTML::TokeParser'; | 
|  | 5 |  |  |  |  | 7 |  | 
|  | 5 |  |  |  |  | 3841 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | # constructors | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | my %FACTORY_CLASSES = ( | 
| 20 |  |  |  |  |  |  | S   => 'HTML::TokeParser::Simple::Token::Tag::Start', | 
| 21 |  |  |  |  |  |  | E   => 'HTML::TokeParser::Simple::Token::Tag::End', | 
| 22 |  |  |  |  |  |  | T   => 'HTML::TokeParser::Simple::Token::Text', | 
| 23 |  |  |  |  |  |  | C   => 'HTML::TokeParser::Simple::Token::Comment', | 
| 24 |  |  |  |  |  |  | D   => 'HTML::TokeParser::Simple::Token::Declaration', | 
| 25 |  |  |  |  |  |  | PI  => 'HTML::TokeParser::Simple::Token::ProcessInstruction', | 
| 26 |  |  |  |  |  |  | ); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | sub _croak { | 
| 29 | 3 |  |  | 3 |  | 8 | my ($proto, $message) = @_; | 
| 30 | 3 |  |  |  |  | 23 | require Carp; | 
| 31 | 3 |  |  |  |  | 765 | Carp::croak($message); | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | sub new { | 
| 35 | 19 |  |  | 19 | 1 | 130031 | my ($class, @args) = @_; | 
| 36 | 19 | 100 |  |  |  | 158 | return 1 == @args | 
| 37 |  |  |  |  |  |  | ? $class->SUPER::new(@args) | 
| 38 |  |  |  |  |  |  | : $class->_init(@args); | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub _init { | 
| 42 | 8 |  |  | 8 |  | 143 | my ($class, $source_type, $source) = @_; | 
| 43 |  |  |  |  |  |  | my %sources = ( | 
| 44 | 3 |  |  | 3 |  | 16 | file   => sub { $source }, | 
| 45 | 1 |  |  | 1 |  | 4 | handle => sub { $source }, | 
| 46 | 1 |  |  | 1 |  | 3 | string => sub { \$source }, | 
| 47 |  |  |  |  |  |  | url    => sub { | 
| 48 | 2 |  |  | 2 |  | 100 | eval "require LWP::Simple"; | 
| 49 | 2 | 50 |  |  |  | 11 | $class->_croak("Cannot load LWP::Simple: $@") if $@; | 
| 50 | 2 |  |  |  |  | 7 | my $content = LWP::Simple::get($source); | 
| 51 | 2 | 100 |  |  |  | 16 | $class->_croak("Could not fetch content from ($source)") | 
| 52 |  |  |  |  |  |  | unless defined $content; | 
| 53 | 1 |  |  |  |  | 6 | return \$content; | 
| 54 |  |  |  |  |  |  | }, | 
| 55 | 8 |  |  |  |  | 111 | ); | 
| 56 | 8 | 100 |  |  |  | 32 | unless (exists $sources{$source_type}) { | 
| 57 | 1 |  |  |  |  | 6 | $class->_croak("Unknown source type ($source_type)"); | 
| 58 |  |  |  |  |  |  | } | 
| 59 | 7 |  |  |  |  | 22 | return $class->new($sources{$source_type}->()); | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | sub get_token { | 
| 63 | 226 |  |  | 226 | 1 | 16232 | my $self = shift; | 
| 64 | 226 |  |  |  |  | 450 | my @args = @_; | 
| 65 | 226 |  |  |  |  | 667 | my $token = $self->SUPER::get_token( @args ); | 
| 66 | 226 | 100 |  |  |  | 4273 | return unless defined $token; | 
| 67 | 223 | 50 |  |  |  | 542 | if (my $factory_class = $FACTORY_CLASSES{$token->[0]}) { | 
| 68 | 223 |  |  |  |  | 965 | return $factory_class->new($token); | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  | else { | 
| 71 |  |  |  |  |  |  | # this should never happen | 
| 72 | 0 |  |  |  |  | 0 | $self->_croak("Cannot determine token class for token (@$token)"); | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub get_tag { | 
| 77 | 12 |  |  | 12 | 1 | 21104 | my $self = shift; | 
| 78 | 12 |  |  |  |  | 31 | my @args = @_; | 
| 79 | 12 |  |  |  |  | 150 | my $token = $self->SUPER::get_tag( @args ); | 
| 80 | 12 | 50 |  |  |  | 144 | return unless defined $token; | 
| 81 | 12 | 100 |  |  |  | 68 | return $token->[0] =~ /^\// | 
| 82 |  |  |  |  |  |  | ?  HTML::TokeParser::Simple::Token::Tag::End->new($token) | 
| 83 |  |  |  |  |  |  | :  HTML::TokeParser::Simple::Token::Tag::Start->new($token); | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub peek { | 
| 87 | 7 |  |  | 7 | 1 | 12 | my ($self, $count) = @_; | 
| 88 | 7 |  | 100 |  |  | 22 | $count  ||= 1; | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 7 | 100 |  |  |  | 31 | unless ($count =~ /^\d+$/) { | 
| 91 | 1 |  |  |  |  | 5 | $self->_croak("Argument to peek() must be a positive integer, not ($count)"); | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 6 |  |  |  |  | 7 | my $items = 0; | 
| 95 | 6 |  |  |  |  | 10 | my $html  = ''; | 
| 96 | 6 |  |  |  |  | 7 | my @tokens; | 
| 97 | 6 |  | 100 |  |  | 22 | while ( $items++ < $count && defined ( my $token = $self->get_token ) ) { | 
| 98 | 61 |  |  |  |  | 166 | $html .= $token->as_is; | 
| 99 | 61 |  |  |  |  | 237 | push @tokens, $token; | 
| 100 |  |  |  |  |  |  | } | 
| 101 | 6 |  |  |  |  | 25 | $self->unget_token(@tokens); | 
| 102 | 6 |  |  |  |  | 60 | return $html; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | 1; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | __END__ | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =head1 NAME | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | HTML::TokeParser::Simple - Easy to use C<HTML::TokeParser> interface | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | use HTML::TokeParser::Simple; | 
| 116 |  |  |  |  |  |  | my $p = HTML::TokeParser::Simple->new( $somefile ); | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | while ( my $token = $p->get_token ) { | 
| 119 |  |  |  |  |  |  | # This prints all text in an HTML doc (i.e., it strips the HTML) | 
| 120 |  |  |  |  |  |  | next unless $token->is_text; | 
| 121 |  |  |  |  |  |  | print $token->as_is; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | C<HTML::TokeParser> is an excellent module that's often used for parsing HTML. | 
| 128 |  |  |  |  |  |  | However, the tokens returned are not exactly intuitive to parse: | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | ["S",  $tag, $attr, $attrseq, $text] | 
| 131 |  |  |  |  |  |  | ["E",  $tag, $text] | 
| 132 |  |  |  |  |  |  | ["T",  $text, $is_data] | 
| 133 |  |  |  |  |  |  | ["C",  $text] | 
| 134 |  |  |  |  |  |  | ["D",  $text] | 
| 135 |  |  |  |  |  |  | ["PI", $token0, $text] | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | To simplify this, C<HTML::TokeParser::Simple> allows the user ask more | 
| 138 |  |  |  |  |  |  | intuitive (read: more self-documenting) questions about the tokens returned. | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | You can also rebuild some tags on the fly.  Frequently, the attributes | 
| 141 |  |  |  |  |  |  | associated with start tags need to be altered, added to, or deleted.  This | 
| 142 |  |  |  |  |  |  | functionality is built in. | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | Since this is a subclass of C<HTML::TokeParser>, all C<HTML::TokeParser> | 
| 145 |  |  |  |  |  |  | methods are available.  To truly appreciate the power of this module, please | 
| 146 |  |  |  |  |  |  | read the documentation for C<HTML::TokeParser> and C<HTML::Parser>. | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | =head1 CONTRUCTORS | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | =head2 C<new($source)> | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | The constructor for C<HTML::TokeParser::Simple> can be used just like | 
| 153 |  |  |  |  |  |  | C<HTML::TokeParser>'s constructor: | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new($filename); | 
| 156 |  |  |  |  |  |  | # or | 
| 157 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new($filehandle); | 
| 158 |  |  |  |  |  |  | # or | 
| 159 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new(\$html_string); | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =head2 C<new($source_type, $source)> | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | If you wish to be more explicit, there is a new style of | 
| 164 |  |  |  |  |  |  | constructor available. | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new(file   => $filename); | 
| 167 |  |  |  |  |  |  | # or | 
| 168 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new(handle => $filehandle); | 
| 169 |  |  |  |  |  |  | # or | 
| 170 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new(string => $html_string); | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | Note that you do not have to provide a reference for the string if using the | 
| 173 |  |  |  |  |  |  | string constructor. | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | As a convenience, you can also attempt to fetch the HTML directly from a URL. | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new(url => 'http://some.url'); | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | This method relies on C<LWP::Simple>.  If this module is not found or the page | 
| 180 |  |  |  |  |  |  | cannot be fetched, the constructor will C<croak()>. | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | =head1 PARSER METHODS | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | =head2 get_token | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | This method will return the next token that C<HTML::TokeParser::get_token()> | 
| 187 |  |  |  |  |  |  | method would return.  However, it will be blessed into a class appropriate | 
| 188 |  |  |  |  |  |  | which represents the token type. | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | =head2 get_tag | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | This method will return the next token that C<HTML::TokeParser::get_tag()> | 
| 193 |  |  |  |  |  |  | method would return.  However, it will be blessed into either the | 
| 194 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Tag::Start> or | 
| 195 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Tag::End> class. | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | =head2 peek | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | As of version C<3.14>, you can now C<peek()> at the upcomings tokens without | 
| 200 |  |  |  |  |  |  | affecting the state of the parser.  By default, C<peek()> will return the text | 
| 201 |  |  |  |  |  |  | of the next token, but specifying an integer C<$count> will return the text of | 
| 202 |  |  |  |  |  |  | the next C<$count> tokens. | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | This is useful when you're trying to debug where you are in a document. | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | warn $parser->peek(3); # show the next 3 tokens | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | =head1 ACCESSORS | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | The following methods may be called on the token object which is returned, | 
| 211 |  |  |  |  |  |  | not on the parser object. | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | =head2 Boolean Accessors | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | These accessors return true or false. | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | =over 4 | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | =item * C<is_tag([$tag])> | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | Use this to determine if you have any tag.  An optional "tag type" may be | 
| 222 |  |  |  |  |  |  | passed.  This will allow you to match if it's a I<particular> tag.  The | 
| 223 |  |  |  |  |  |  | supplied tag is case-insensitive. | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | if ( $token->is_tag ) { ... } | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | Optionally, you may pass a regular expression as an argument. | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | =item * C<is_start_tag([$tag])> | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | Use this to determine if you have a start tag.  An optional "tag type" may be | 
| 232 |  |  |  |  |  |  | passed.  This will allow you to match if it's a I<particular> start tag.  The | 
| 233 |  |  |  |  |  |  | supplied tag is case-insensitive. | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | if ( $token->is_start_tag ) { ... } | 
| 236 |  |  |  |  |  |  | if ( $token->is_start_tag( 'font' ) ) { ... } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | Optionally, you may pass a regular expression as an argument.  To match all | 
| 239 |  |  |  |  |  |  | header (h1, h2, ... h6) tags: | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | if ( $token->is_start_tag( qr/^h[123456]$/ ) ) { ... } | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | =item * C<is_end_tag([$tag])> | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | Use this to determine if you have an end tag.  An optional "tag type" may be | 
| 246 |  |  |  |  |  |  | passed.  This will allow you to match if it's a I<particular> end tag.  The | 
| 247 |  |  |  |  |  |  | supplied tag is case-insensitive. | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | When testing for an end tag, the forward slash on the tag is optional. | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | while ( $token = $p->get_token ) { | 
| 252 |  |  |  |  |  |  | if ( $token->is_end_tag( 'form' ) ) { ... } | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | Or: | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | while ( $token = $p->get_token ) { | 
| 258 |  |  |  |  |  |  | if ( $token->is_end_tag( '/form' ) ) { ... } | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | Optionally, you may pass a regular expression as an argument. | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | =item * C<is_text()> | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | Use this to determine if you have text.  Note that this is I<not> to be | 
| 266 |  |  |  |  |  |  | confused with the C<return_text> (I<deprecated>) method described below! | 
| 267 |  |  |  |  |  |  | C<is_text> will identify text that the user typically sees display in the Web | 
| 268 |  |  |  |  |  |  | browser. | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | =item * C<is_comment()> | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | Are you still reading this?  Nobody reads POD.  Don't you know you're supposed | 
| 273 |  |  |  |  |  |  | to go to CLPM, ask a question that's answered in the POD and get flamed?  It's | 
| 274 |  |  |  |  |  |  | a rite of passage. | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | Really. | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | C<is_comment> is used to identify comments.  See the HTML::Parser documentation | 
| 279 |  |  |  |  |  |  | for more information about comments.  There's more than you might think. | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | =item * C<is_declaration()> | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | This will match the DTD at the top of your HTML. (You I<do> use DTD's, don't | 
| 284 |  |  |  |  |  |  | you?) | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | =item * C<is_process_instruction()> | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | Process Instructions are from XML.  This is very handy if you need to parse out | 
| 289 |  |  |  |  |  |  | PHP and similar things with a parser. | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | Currently, there appear to be some problems with process instructions.  You can | 
| 292 |  |  |  |  |  |  | override C<HTML::TokeParser::Simple::Token::ProcessInstruction> if you need to. | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | =item * C<is_pi()> | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | This is a shorthand for C<is_process_instruction()>. | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | =back | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | =head2 Data Accessors | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | Some of these were originally C<return_> methods, but that name was not only | 
| 303 |  |  |  |  |  |  | unwieldy, but also went against reasonable conventions.  The C<get_> methods | 
| 304 |  |  |  |  |  |  | listed below still have C<return_> methods available for backwards | 
| 305 |  |  |  |  |  |  | compatibility reasons, but they merely call their C<get_> counterpart.  For | 
| 306 |  |  |  |  |  |  | example, calling C<return_tag()> actually calls C<get_tag()> internally. | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | =over 4 | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | =item * C<get_tag()> | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | Do you have a start tag or end tag?  This will return the type (lower case). | 
| 313 |  |  |  |  |  |  | Note that this is I<not> the same as the C<get_tag()> method on the actual | 
| 314 |  |  |  |  |  |  | parser object. | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | =item * C<get_attr([$attribute])> | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | If you have a start tag, this will return a hash ref with the attribute names | 
| 319 |  |  |  |  |  |  | as keys and the values as the values. | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | If you pass in an attribute name, it will return the value for just that | 
| 322 |  |  |  |  |  |  | attribute. | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | Returns false if the token is not a start tag. | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | =item * C<get_attrseq()> | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | For a start tag, this is an array reference with the sequence of the | 
| 329 |  |  |  |  |  |  | attributes, if any. | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | Returns false if the token is not a start tag. | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | =item * C<return_text()> | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | This method has been heavily deprecated (for a couple of years) in favor of | 
| 336 |  |  |  |  |  |  | C<as_is>.  Programmers were getting confused over the difference between | 
| 337 |  |  |  |  |  |  | C<is_text>, C<return_text>, and some parser methods such as | 
| 338 |  |  |  |  |  |  | C<HTML::TokeParser::get_text> and friends. | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | Using this method still succeeds, but will now carp and B<will be removed> | 
| 341 |  |  |  |  |  |  | in the next major release of this module. | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | =item * C<as_is()> | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | This is the exact text of whatever the token is representing. | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | =item * C<get_token0()> | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | For processing instructions, this will return the token found immediately after | 
| 350 |  |  |  |  |  |  | the opening tag.  Example:  For <?php, "php" will be the start of the returned | 
| 351 |  |  |  |  |  |  | string. | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | Note that process instruction handling appears to be incomplete in | 
| 354 |  |  |  |  |  |  | C<HTML::TokeParser>. | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | Returns false if the token is not a process instruction. | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | =back | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | =head1 MUTATORS | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | The C<delete_attr()> and C<set_attr()> methods allow the programmer to rewrite | 
| 363 |  |  |  |  |  |  | start tag attributes on the fly.  It should be noted that bad HTML will be | 
| 364 |  |  |  |  |  |  | "corrected" by this.  Specifically, the new tag will have all attributes | 
| 365 |  |  |  |  |  |  | lower-cased with the values properly quoted. | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | Self-closing tags (e.g. E<lt>hr /E<gt>) are also handled correctly.  Some older | 
| 368 |  |  |  |  |  |  | browsers require a space prior to the final slash in a self-closed tag.  If | 
| 369 |  |  |  |  |  |  | such a space is detected in the original HTML, it will be preserved. | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | Calling a mutator on an token type that does not support that property is a | 
| 372 |  |  |  |  |  |  | no-op.  For example: | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | if ($token->is_comment) { | 
| 375 |  |  |  |  |  |  | $token->set_attr(foo => 'bar'); # does nothing | 
| 376 |  |  |  |  |  |  | } | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | =over 4 | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | =item * C<delete_attr($name)> | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | This method attempts to delete the attribute specified.  It will silently fail | 
| 383 |  |  |  |  |  |  | if called on anything other than a start tag.  The argument is | 
| 384 |  |  |  |  |  |  | case-insensitive, but must otherwise be an exact match of the attribute you are | 
| 385 |  |  |  |  |  |  | attempting to delete.  If the attribute is not found, the method will return | 
| 386 |  |  |  |  |  |  | without changing the tag. | 
| 387 |  |  |  |  |  |  |  | 
| 388 |  |  |  |  |  |  | # <body bgcolor="#FFFFFF"> | 
| 389 |  |  |  |  |  |  | $token->delete_attr('bgcolor'); | 
| 390 |  |  |  |  |  |  | print $token->as_is; | 
| 391 |  |  |  |  |  |  | # <body> | 
| 392 |  |  |  |  |  |  |  | 
| 393 |  |  |  |  |  |  | After this method is called, if successful, the C<as_is()>, C<get_attr()> | 
| 394 |  |  |  |  |  |  | and C<get_attrseq()> methods will all return updated results. | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | =item * C<set_attr($name,$value)> | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | This method will set the value of an attribute.  If the attribute is not found, | 
| 399 |  |  |  |  |  |  | then C<get_attrseq()> will have the new attribute listed at the end. | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | # <p> | 
| 402 |  |  |  |  |  |  | $token->set_attr(class => 'some_class'); | 
| 403 |  |  |  |  |  |  | print $token->as_is; | 
| 404 |  |  |  |  |  |  | # <p class="some_class"> | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | # <body bgcolor="#FFFFFF"> | 
| 407 |  |  |  |  |  |  | $token->set_attr('bgcolor','red'); | 
| 408 |  |  |  |  |  |  | print $token->as_is; | 
| 409 |  |  |  |  |  |  | # <body bgcolor="red"> | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | After this method is called, if successful, the C<as_is()>, C<get_attr()> | 
| 412 |  |  |  |  |  |  | and C<get_attrseq()> methods will all return updated results. | 
| 413 |  |  |  |  |  |  |  | 
| 414 |  |  |  |  |  |  | =item * C<set_attr($hashref)> | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | Under the premise that C<set_> methods should accept what their corresponding | 
| 417 |  |  |  |  |  |  | C<get_> methods emit, the following works: | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | $tag->set_attr($tag->get_attr); | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | Theoretically that's a no-op and for purposes of rendering HTML, it should be. | 
| 422 |  |  |  |  |  |  | However, internally this calls C<$tag-E<gt>rewrite_tag>, so see that method to | 
| 423 |  |  |  |  |  |  | understand how this may affect you. | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | Of course, this is useless if you want to actually change the attributes, so you | 
| 426 |  |  |  |  |  |  | can do this: | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | my $attrs = { | 
| 429 |  |  |  |  |  |  | class  => 'headline', | 
| 430 |  |  |  |  |  |  | valign => 'top' | 
| 431 |  |  |  |  |  |  | }; | 
| 432 |  |  |  |  |  |  | $token->set_attr($attrs) | 
| 433 |  |  |  |  |  |  | if $token->is_start_tag('td') &&  $token->get_attr('class') eq 'stories'; | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | =item * C<rewrite_tag()> | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | This method rewrites the tag.  The tag name and the name of all attributes will | 
| 438 |  |  |  |  |  |  | be lower-cased.  Values that are not quoted with double quotes will be.  This | 
| 439 |  |  |  |  |  |  | may be called on both start or end tags.  Note that both C<set_attr()> and | 
| 440 |  |  |  |  |  |  | C<delete_attr()> call this method prior to returning. | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | If called on a token that is not a tag, it simply returns.  Regardless of how | 
| 443 |  |  |  |  |  |  | it is called, it returns the token. | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | # <body alink=#0000ff BGCOLOR=#ffffff class='none'> | 
| 446 |  |  |  |  |  |  | $token->rewrite_tag; | 
| 447 |  |  |  |  |  |  | print $token->as_is; | 
| 448 |  |  |  |  |  |  | # <body alink="#0000ff" bgcolor="#ffffff" class="none"> | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | A quick cleanup of sloppy HTML is now the following: | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | my $parser = HTML::TokeParser::Simple->new( string => $ugly_html ); | 
| 453 |  |  |  |  |  |  | while (my $token = $parser->get_token) { | 
| 454 |  |  |  |  |  |  | $token->rewrite_tag; | 
| 455 |  |  |  |  |  |  | print $token->as_is; | 
| 456 |  |  |  |  |  |  | } | 
| 457 |  |  |  |  |  |  |  | 
| 458 |  |  |  |  |  |  | =back | 
| 459 |  |  |  |  |  |  |  | 
| 460 |  |  |  |  |  |  | =head1 PARSER VERSUS TOKENS | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  | The parser returns tokens that are blessed into appropriate classes.  Some | 
| 463 |  |  |  |  |  |  | people get confused and try to call parser methods on tokens and token methods | 
| 464 |  |  |  |  |  |  | on the parser.  To prevent this, C<HTML::TokeParser::Simple> versions 1.4 and | 
| 465 |  |  |  |  |  |  | above now bless all tokens into appropriate token classes.  Please keep this in | 
| 466 |  |  |  |  |  |  | mind while using this module (and many thanks to PodMaster | 
| 467 |  |  |  |  |  |  | L<http://www.perlmonks.org/index.pl?node_id=107642> for pointing out this issue | 
| 468 |  |  |  |  |  |  | to me.) | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | =head1 EXAMPLES | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | =head2 Finding comments | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | For some strange reason, your Pointy-Haired Boss (PHB) is convinced that the | 
| 475 |  |  |  |  |  |  | graphics department is making fun of him by embedding rude things about him in | 
| 476 |  |  |  |  |  |  | HTML comments.  You need to get all HTML comments from the HTML. | 
| 477 |  |  |  |  |  |  |  | 
| 478 |  |  |  |  |  |  | use strict; | 
| 479 |  |  |  |  |  |  | use HTML::TokeParser::Simple; | 
| 480 |  |  |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | my @html_docs = glob( "*.html" ); | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | open PHB, "> phbreport.txt" or die "Cannot open phbreport for writing: $!"; | 
| 484 |  |  |  |  |  |  |  | 
| 485 |  |  |  |  |  |  | foreach my $doc ( @html_docs ) { | 
| 486 |  |  |  |  |  |  | print "Processing $doc\n"; | 
| 487 |  |  |  |  |  |  | my $p = HTML::TokeParser::Simple->new( file => $doc ); | 
| 488 |  |  |  |  |  |  | while ( my $token = $p->get_token ) { | 
| 489 |  |  |  |  |  |  | next unless $token->is_comment; | 
| 490 |  |  |  |  |  |  | print PHB $token->as_is, "\n"; | 
| 491 |  |  |  |  |  |  | } | 
| 492 |  |  |  |  |  |  | } | 
| 493 |  |  |  |  |  |  |  | 
| 494 |  |  |  |  |  |  | close PHB; | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | =head2 Stripping Comments | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | Uh oh.  Turns out that your PHB was right for a change.  Many of the comments | 
| 499 |  |  |  |  |  |  | in the HTML weren't very polite.  Since your entire graphics department was | 
| 500 |  |  |  |  |  |  | just fired, it falls on you need to strip those comments from the HTML. | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | use strict; | 
| 503 |  |  |  |  |  |  | use HTML::TokeParser::Simple; | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | my $new_folder = 'no_comment/'; | 
| 506 |  |  |  |  |  |  | my @html_docs  = glob( "*.html" ); | 
| 507 |  |  |  |  |  |  |  | 
| 508 |  |  |  |  |  |  | foreach my $doc ( @html_docs ) { | 
| 509 |  |  |  |  |  |  | print "Processing $doc\n"; | 
| 510 |  |  |  |  |  |  | my $new_file = "$new_folder$doc"; | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | open PHB, "> $new_file" or die "Cannot open $new_file for writing: $!"; | 
| 513 |  |  |  |  |  |  |  | 
| 514 |  |  |  |  |  |  | my $p = HTML::TokeParser::Simple->new( $file => doc ); | 
| 515 |  |  |  |  |  |  | while ( my $token = $p->get_token ) { | 
| 516 |  |  |  |  |  |  | next if $token->is_comment; | 
| 517 |  |  |  |  |  |  | print PHB $token->as_is; | 
| 518 |  |  |  |  |  |  | } | 
| 519 |  |  |  |  |  |  | close PHB; | 
| 520 |  |  |  |  |  |  | } | 
| 521 |  |  |  |  |  |  |  | 
| 522 |  |  |  |  |  |  | =head2 Changing form tags | 
| 523 |  |  |  |  |  |  |  | 
| 524 |  |  |  |  |  |  | Your company was foo.com and now is bar.com.  Unfortunately, whoever wrote your | 
| 525 |  |  |  |  |  |  | HTML decided to hardcode "http://www.foo.com/" into the C<action> attribute of | 
| 526 |  |  |  |  |  |  | the form tags.  You need to change it to "http://www.bar.com/". | 
| 527 |  |  |  |  |  |  |  | 
| 528 |  |  |  |  |  |  | use strict; | 
| 529 |  |  |  |  |  |  | use HTML::TokeParser::Simple; | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | my $new_folder = 'new_html/'; | 
| 532 |  |  |  |  |  |  | my @html_docs  = glob( "*.html" ); | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | foreach my $doc ( @html_docs ) { | 
| 535 |  |  |  |  |  |  | print "Processing $doc\n"; | 
| 536 |  |  |  |  |  |  | my $new_file = "$new_folder$doc"; | 
| 537 |  |  |  |  |  |  |  | 
| 538 |  |  |  |  |  |  | open FILE, "> $new_file" or die "Cannot open $new_file for writing: $!"; | 
| 539 |  |  |  |  |  |  |  | 
| 540 |  |  |  |  |  |  | my $p = HTML::TokeParser::Simple->new( file => $doc ); | 
| 541 |  |  |  |  |  |  | while ( my $token = $p->get_token ) { | 
| 542 |  |  |  |  |  |  | if ( $token->is_start_tag('form') ) { | 
| 543 |  |  |  |  |  |  | my $action = $token->get_attr(action); | 
| 544 |  |  |  |  |  |  | $action =~ s/www\.foo\.com/www.bar.com/; | 
| 545 |  |  |  |  |  |  | $token->set_attr('action', $action); | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  | print FILE $token->as_is; | 
| 548 |  |  |  |  |  |  | } | 
| 549 |  |  |  |  |  |  | close FILE; | 
| 550 |  |  |  |  |  |  | } | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | =head1 CAVEATS | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | For compatibility reasons with C<HTML::TokeParser>, methods that return | 
| 555 |  |  |  |  |  |  | references are violating encapsulation and altering the references directly | 
| 556 |  |  |  |  |  |  | B<will> alter the state of the object.  Subsequent calls to C<rewrite_tag()> | 
| 557 |  |  |  |  |  |  | can thus have unexpected results.  Do not alter these references directly | 
| 558 |  |  |  |  |  |  | unless you are following behavior described in these docs.  In the future, | 
| 559 |  |  |  |  |  |  | certain methods such as C<get_attr>, C<get_attrseq> and others may return a | 
| 560 |  |  |  |  |  |  | copy of the reference rather than the original reference.  This behavior has | 
| 561 |  |  |  |  |  |  | not yet been changed in order to maintain compatibility with previous versions | 
| 562 |  |  |  |  |  |  | of this module.  At the present time, your author is not aware of anyone taking | 
| 563 |  |  |  |  |  |  | advantage of this "feature," but it's better to be safe than sorry. | 
| 564 |  |  |  |  |  |  |  | 
| 565 |  |  |  |  |  |  | Use of C<$HTML::Parser::VERSION> which is less than 3.25 may result in | 
| 566 |  |  |  |  |  |  | incorrect behavior as older versions do not always handle XHTML correctly.  It | 
| 567 |  |  |  |  |  |  | is the programmer's responsibility to verify that the behavior of this code | 
| 568 |  |  |  |  |  |  | matches the programmer's needs. | 
| 569 |  |  |  |  |  |  |  | 
| 570 |  |  |  |  |  |  | Note that C<HTML::Parser> processes text in 512 byte chunks.  This sometimes | 
| 571 |  |  |  |  |  |  | will cause strange behavior and cause text to be broken into more than one | 
| 572 |  |  |  |  |  |  | token.  You can suppress this behavior with the following command: | 
| 573 |  |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | $p->unbroken_text( [$bool] ); | 
| 575 |  |  |  |  |  |  |  | 
| 576 |  |  |  |  |  |  | See the C<HTML::Parser> documentation and | 
| 577 |  |  |  |  |  |  | http://www.perlmonks.org/index.pl?node_id=230667 for more information. | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | =head1 BUGS | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | There are no known bugs, but that's no guarantee. | 
| 582 |  |  |  |  |  |  |  | 
| 583 |  |  |  |  |  |  | Address bug reports and comments to: E<lt>eop_divo_sitruc@yahoo.comE<gt>.  When | 
| 584 |  |  |  |  |  |  | sending bug reports, please provide the version of C<HTML::Parser>, | 
| 585 |  |  |  |  |  |  | C<HTML::TokeParser>, C<HTML::TokeParser::Simple>, the version of Perl, and the | 
| 586 |  |  |  |  |  |  | version of the operating system you are using. | 
| 587 |  |  |  |  |  |  |  | 
| 588 |  |  |  |  |  |  | Reverse the name to email the author. | 
| 589 |  |  |  |  |  |  |  | 
| 590 |  |  |  |  |  |  | =head1 SUBCLASSING | 
| 591 |  |  |  |  |  |  |  | 
| 592 |  |  |  |  |  |  | You may wish to change the behavior of this module.  You probably do not want | 
| 593 |  |  |  |  |  |  | to subclass C<HTML::TokeParser::Simple>.  Instead, you'll want to subclass one | 
| 594 |  |  |  |  |  |  | of the token classes.  C<HTML::TokeParser::Simple::Token> is the base class for | 
| 595 |  |  |  |  |  |  | all tokens.  Global behavioral changes should go there.  Otherwise, see the | 
| 596 |  |  |  |  |  |  | appropriate token class for the behavior you wish to alter. | 
| 597 |  |  |  |  |  |  |  | 
| 598 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 599 |  |  |  |  |  |  |  | 
| 600 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token> | 
| 601 |  |  |  |  |  |  |  | 
| 602 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Tag> | 
| 603 |  |  |  |  |  |  |  | 
| 604 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Text> | 
| 605 |  |  |  |  |  |  |  | 
| 606 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Comment> | 
| 607 |  |  |  |  |  |  |  | 
| 608 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::Declaration> | 
| 609 |  |  |  |  |  |  |  | 
| 610 |  |  |  |  |  |  | L<HTML::TokeParser::Simple::Token::ProcessInstruction> | 
| 611 |  |  |  |  |  |  |  | 
| 612 |  |  |  |  |  |  | =head1 COPYRIGHT | 
| 613 |  |  |  |  |  |  |  | 
| 614 |  |  |  |  |  |  | Copyright (c) 2004 by Curtis "Ovid" Poe.  All rights reserved.  This program is | 
| 615 |  |  |  |  |  |  | free software; you may redistribute it and/or modify it under the same terms as | 
| 616 |  |  |  |  |  |  | Perl itself | 
| 617 |  |  |  |  |  |  |  | 
| 618 |  |  |  |  |  |  | =head1 AUTHOR | 
| 619 |  |  |  |  |  |  |  | 
| 620 |  |  |  |  |  |  | Curtis "Ovid" Poe E<lt>eop_divo_sitruc@yahoo.comE<gt> | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | Reverse the name to email the author. | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | =cut |