File Coverage

blib/lib/PPI/Token/BOM.pm
Criterion Covered Total %
statement 15 16 93.7
branch 4 6 66.6
condition n/a
subroutine 3 3 100.0
pod n/a
total 22 25 88.0


line stmt bran cond sub pod time code
1             package PPI::Token::BOM;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Token::BOM - Tokens representing Unicode byte order marks
8              
9             =head1 INHERITANCE
10              
11             PPI::Token::BOM
12             isa PPI::Token
13             isa PPI::Element
14              
15             =head1 DESCRIPTION
16              
17             This is a special token in that it can only occur at the beginning of
18             documents. If a BOM byte mark occurs elsewhere in a file, it should
19             be treated as L<PPI::Token::Whitespace>. We recognize the byte order
20             marks identified at this URL:
21             L<https://web.archive.org/web/https://www.unicode.org/faq/utf_bom.html#BOM>
22              
23             UTF-32, big-endian 00 00 FE FF
24             UTF-32, little-endian FF FE 00 00
25             UTF-16, big-endian FE FF
26             UTF-16, little-endian FF FE
27             UTF-8 EF BB BF
28              
29             Note that as of this writing, PPI only has support for UTF-8
30             (namely, in POD and strings) and no support for UTF-16 or UTF-32. We
31             support the BOMs of the latter two for completeness only.
32              
33             The BOM is considered non-significant, like white space.
34              
35             =head1 METHODS
36              
37             There are no additional methods beyond those provided by the parent
38             L<PPI::Token> and L<PPI::Element> classes.
39              
40             =cut
41              
42 67     67   306 use strict;
  67         107  
  67         1798  
43 67     67   209 use PPI::Token ();
  67         69  
  67         18952  
44              
45             our $VERSION = '1.284';
46              
47             our @ISA = "PPI::Token";
48              
49             sub significant() { '' }
50              
51              
52              
53              
54              
55             #####################################################################
56             # Parsing Methods
57              
58             my %bom_types = (
59             "\x00\x00\xfe\xff" => 'UTF-32',
60             "\xff\xfe\x00\x00" => 'UTF-32',
61             "\xfe\xff" => 'UTF-16',
62             "\xff\xfe" => 'UTF-16',
63             "\xef\xbb\xbf" => 'UTF-8',
64             );
65              
66             sub __TOKENIZER__on_line_start {
67 16887     16887   21657 my $t = $_[1];
68 16887         27458 $_ = $t->{line};
69              
70 16887 100       48641 if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
71             \xff\xfe\x00\x00 | # UTF-32, little-endian
72             \xfe\xff | # UTF-16, big-endian
73             \xff\xfe | # UTF-16, little-endian
74             \xef\xbb\xbf) # UTF-8
75             /xs) {
76 2         5 my $bom = $1;
77              
78 2 50       6 if ($bom_types{$bom} ne 'UTF-8') {
79 0         0 return $t->_error("$bom_types{$bom} is not supported");
80             }
81              
82 2 50       7 $t->_new_token('BOM', $bom) or return undef;
83 2         4 $t->{line_cursor} += length $bom;
84             }
85              
86             # Continue just as if there was no BOM
87 16887         27409 $t->{class} = 'PPI::Token::Whitespace';
88 16887         55827 return $t->{class}->__TOKENIZER__on_line_start($t);
89             }
90              
91             1;
92              
93             =pod
94              
95             =head1 SUPPORT
96              
97             See the L<support section|PPI/SUPPORT> in the main module
98              
99             =head1 AUTHOR
100              
101             Chris Dolan E<lt>cdolan@cpan.orgE<gt>
102              
103             =head1 COPYRIGHT
104              
105             Copyright 2001 - 2011 Adam Kennedy.
106              
107             This program is free software; you can redistribute
108             it and/or modify it under the same terms as Perl itself.
109              
110             The full text of the license can be found in the
111             LICENSE file included with this module.
112              
113             =cut