File Coverage

blib/lib/Courriel/Headers.pm

Criterion	Covered	Total	%
statement	113	166	68.0
branch	17	34	50.0
condition	4	18	22.2
subroutine	24	29	82.7
pod	9	9	100.0
total	167	256	65.2

line	stmt	bran	cond	sub	pod	time	code
1							package Courriel::Headers;
2
3	8			8		2255	use strict;
	8					16
	8					313
4	8			8		40	use warnings;
	8					41
	8					269
5	8			8		35	use namespace::autoclean;
	8					11
	8					70
6
7							our $VERSION = '0.44';
8
9	8			8		3262	use Courriel::Header;
	8					24
	8					441
10	8			8		4428	use Courriel::Header::ContentType;
	8					24
	8					394
11	8			8		4671	use Courriel::Header::Disposition;
	8					28
	8					529
12							use Courriel::Types
13	8			8		69	qw( ArrayRef Defined HashRef HeaderArray NonEmptyStr Str Streamable StringRef );
	8					17
	8					162
14	8			8		80838	use Encode qw( decode );
	8					18
	8					581
15	8			8		43	use MIME::Base64 qw( decode_base64 );
	8					14
	8					425
16	8			8		6252	use MIME::QuotedPrint qw( decode_qp );
	8					2156
	8					509
17	8			8		54	use Params::ValidationCompiler qw( validation_for );
	8					15
	8					380
18	8			8		45	use Scalar::Util qw( blessed reftype );
	8					15
	8					388
19
20	8			8		42	use Moose;
	8					14
	8					122
21	8			8		31332	use MooseX::StrictConstructor;
	8					18
	8					78
22
23							with 'Courriel::Role::Streams' => { -exclude => ['stream_to'] };
24
25							has _headers => (
26							traits => ['Array'],
27							is => 'ro',
28							isa => HeaderArray,
29							default => sub { [] },
30							init_arg => 'headers',
31							handles => {
32							headers => 'elements',
33							},
34							);
35
36							# The _key_indices field, along with all the complicated code to
37							# get/add/remove headers below, is necessary because RFC 5322 says:
38							#
39							# However, for the purposes of this specification, header fields SHOULD NOT
40							# be reordered when a message is transported or transformed. More
41							# importantly, the trace header fields and resent header fields MUST NOT be
42							# reordered, and SHOULD be kept in blocks prepended to the message.
43							#
44							# So we store headers as an array ref. When we add additional values for a
45							# header, we will put them after the last header of the same name in the array
46							# ref. If no such header exists yet, then we just put them at the end of the
47							# arrayref.
48
49							has _key_indices => (
50							traits => ['Hash'],
51							isa => HashRef [ ArrayRef [NonEmptyStr] ],
52							init_arg => undef,
53							lazy => 1,
54							builder => '_build_key_indices',
55							clearer => '_clear_key_indices',
56							handles => {
57							__key_indices_for => 'get',
58							},
59							);
60
61							override BUILDARGS => sub {
62							my $class = shift;
63
64							my $p = super();
65
66							return $p unless $p->{headers};
67
68							# Could this be done as a coercion for the HeaderArray type? Maybe, but
69							# it'd probably need structured types, which seems like as much of a
70							# hassle as just doing this.
71							if ( reftype( $p->{headers} ) eq 'ARRAY' ) {
72							my $headers = $p->{headers};
73
74							## no critic (ControlStructures::ProhibitCStyleForLoops)
75							for ( my $i = 1; $i < @{$headers}; $i += 2 ) {
76							next if blessed( $headers->[ $i - 1 ] );
77
78							my $name = $headers->[ $i - 1 ];
79
80							next unless defined $name;
81
82							$headers->[$i] = $class->_inflate_header( $name, $headers->[$i] );
83							}
84							}
85							elsif ( reftype( $p->{headers} ) eq 'HASH' ) {
86							for my $name ( keys %{ $p->{headers} } ) {
87							next if blessed( $p->{headers}{$name} );
88
89							$p->{headers}{$name}
90							= $class->_inflate_header( $name, $p->{headers}{$name} );
91							}
92							}
93
94							return $p;
95							};
96
97							sub _inflate_header {
98	2210			2210		2468	my $class = shift;
99	2210					2214	my $name = shift;
100	2210					2330	my $value = shift;
101
102	2210	100				6747	my ( $header_class, $method )
		100
103							= lc $name eq 'content-type'
104							? ( 'Courriel::Header::ContentType', 'new_from_value' )
105							: lc $name eq 'content-disposition'
106							? ( 'Courriel::Header::Disposition', 'new_from_value' )
107							: ( 'Courriel::Header', 'new' );
108
109	2210					71248	return $header_class->$method(
110							name => $name,
111							value => $value,
112							);
113							}
114
115							sub _build_key_indices {
116	356			356		427	my $self = shift;
117
118	356					10895	my $headers = $self->_headers;
119
120	356					449	my %indices;
121							## no critic (ControlStructures::ProhibitCStyleForLoops)
122	356					490	for ( my $i = 0; $i < @{$headers}; $i += 2 ) {
	4553					7587
123	4197					3108	push @{ $indices{ lc $headers->[$i] } }, $i + 1;
	4197					9721
124							}
125
126	356					14463	return \%indices;
127							}
128
129							{
130							my $validator = validation_for(
131							params => [ { type => NonEmptyStr } ],
132							);
133
134							sub get {
135	309			309	1	469	my $self = shift;
136	309					6970	my ($name) = $validator->(@_);
137
138	309					5333	return @{ $self->_headers }[ $self->_key_indices_for($name) ];
	309					10438
139							}
140							}
141
142							{
143							my $validator = validation_for(
144							params => [ { type => NonEmptyStr } ],
145							);
146
147							sub get_values {
148	0			0	1	0	my $self = shift;
149	0					0	my ($name) = $validator->(@_);
150
151							return
152	0					0	map { $_->value }
153	0					0	@{ $self->_headers }[ $self->_key_indices_for($name) ];
	0					0
154							}
155							}
156
157							sub _key_indices_for {
158	655			655		961	my $self = shift;
159	655					816	my $name = shift;
160
161	655	100				703	return @{ $self->__key_indices_for( lc $name ) \|\| [] };
	655					27602
162							}
163
164							{
165							my $validator = validation_for(
166							params => [
167							{ type => NonEmptyStr },
168							{ type => Defined },
169							],
170							);
171
172							sub add {
173	173			173	1	236	my $self = shift;
174	173					3724	my ( $name, $value ) = $validator->(@_);
175
176	173					8132	my $headers = $self->_headers;
177
178	173					457	my $last_index = ( $self->_key_indices_for($name) )[-1];
179
180	173	100	66			1195	my $header
181							= blessed($value)
182							&& $value->isa('Courriel::Header')
183							? $value
184							: $self->_inflate_header( $name, $value );
185
186	173	50				348	if ($last_index) {
187	0					0	splice @{$headers}, $last_index + 1, 0, ( $name => $header );
	0					0
188							}
189							else {
190	173					182	push @{$headers}, ( $name => $header );
	173					423
191							}
192
193	173					7126	$self->_clear_key_indices;
194
195	173					374	return;
196							}
197							}
198
199							{
200							my $validator = validation_for(
201							params => [
202							{ type => NonEmptyStr },
203							{ type => Defined },
204							],
205							);
206
207							# Used to add things like Resent or Received headers
208
209							## no critic (Subroutines::ProhibitBuiltinHomonyms)
210							sub unshift {
211	0			0	1	0	my $self = shift;
212	0					0	my ( $name, $value ) = $validator->(@_);
213
214	0					0	my $headers = $self->_headers;
215
216	0	0	0			0	my $header
217							= blessed($value)
218							&& $value->isa('Courriel::Header')
219							? $value
220							: $self->_inflate_header( $name, $value );
221
222	0					0	unshift @{$headers}, ( $name => $header );
	0					0
223
224	0					0	return;
225							}
226							}
227
228							{
229							my $validator = validation_for(
230							params => [
231							{ type => NonEmptyStr },
232							],
233							);
234
235							sub remove {
236	173			173	1	237	my $self = shift;
237	173					3626	my ($name) = $validator->(@_);
238
239	173					7742	my $headers = $self->_headers;
240
241	173					405	for my $idx ( reverse $self->_key_indices_for($name) ) {
242	107					131	splice @{$headers}, $idx - 1, 2;
	107					2698
243							}
244
245	173					6545	$self->_clear_key_indices;
246
247	173					330	return;
248							}
249							}
250
251							{
252							my $validator = validation_for(
253							params => [
254							{ type => NonEmptyStr },
255							{ type => Defined },
256							],
257							);
258
259							sub replace {
260	173			173	1	207	my $self = shift;
261	173					3651	my ( $name, $value ) = $validator->(@_);
262
263	173					3519	$self->remove($name);
264	173					471	$self->add( $name => $value );
265
266	173					367	return;
267							}
268							}
269
270							{
271							my $horiz_text = qr/[^\x0a\x0d]/;
272							my $horiz_ws = qr/[ \t]/;
273							my $line_re = qr/
274							(?:
275							([^\s:][^:\n\r]*) # a header name
276							: # followed by a colon
277							$horiz_ws*
278							($horiz_text*) # header value - can be empty
279							)
280							\|
281							$horiz_ws+(\S$horiz_text*)? # continuation line
282							/x;
283
284							my $validator = validation_for(
285							params => [ text => { type => StringRef } ],
286							named_to_list => 1,
287							);
288
289							sub parse {
290	142			142	1	214	my $class = shift;
291	142					3822	my ($text) = $validator->(@_);
292
293	142					18804	my @headers;
294
295	142					496	$class->_maybe_fix_broken_headers($text);
296
297	142					204	while ( ${$text} =~ /\G${line_re}$Courriel::Helpers::LINE_SEP_RE/gc )
	2844					15600
298							{
299	2702	100				4628	if ( defined $1 ) {
300	2091					5145	push @headers, $1, $2;
301							}
302							else {
303	611	50				1260	die
304							'Header text contains a continuation line before a header name has been seen.'
305							unless @headers;
306
307	611		50			1071	$headers[-1] //= q{};
308
309							# RFC 5322 says:
310							#
311							# Runs of FWS, comment, or CFWS that occur between lexical tokens in a
312							# structured header field are semantically interpreted as a single
313							# space character.
314	611	50				1307	$headers[-1] .= q{ } if length $headers[-1];
315	611	100				1833	$headers[-1] .= $3 if defined $3;
316							}
317							}
318
319	142		50			217	my $pos = pos ${$text} // 0;
	142					526
320	142	50				224	if ( $pos != length ${$text} ) {
	142					421
321							my @lines = split $Courriel::Helpers::LINE_SEP_RE,
322	0					0	substr( ${$text}, 0, $pos );
	0					0
323	0					0	my $count = ( scalar @lines ) + 1;
324
325	0					0	my $line = ( split $Courriel::Helpers::LINE_SEP_RE, ${$text} )
	0					0
326							[ $count - 1 ];
327
328	0	0				0	die defined $line
329							? "Found an unparseable chunk in the header text starting at line $count:\n $line"
330							: 'Could not parse headers at all';
331							}
332
333							## no critic (ControlStructures::ProhibitCStyleForLoops)
334	142					570	for ( my $i = 1; $i < @headers; $i += 2 ) {
335	2091					3043	$headers[$i] = $class->_mime_decode( $headers[$i] );
336							}
337
338	142					5237	return $class->new( headers => \@headers );
339							}
340							}
341
342							sub _maybe_fix_broken_headers {
343	142			142		214	my $class = shift;
344	142					215	my $text = shift;
345
346							# Some broken email messages have a newline in the headers that isn't
347							# acting as a continuation, it's just an arbitrary line break. See
348							# t/data/stress-test/mbox_mime_applemail_1xb.txt
349	142					168	${$text}
	142					3433
350							=~ s/$Courriel::Helpers::LINE_SEP_RE([^\s:][^:]+$Courriel::Helpers::LINE_SEP_RE)/$1/g;
351
352	142					276	return;
353							}
354
355							{
356							my $validator = validation_for(
357							params => [
358							output => { type => Streamable },
359							skip => {
360							type => ArrayRef [NonEmptyStr], default => sub { [] }
361							},
362							charset => { type => NonEmptyStr, default => 'utf8' },
363							],
364							named_to_list => 1,
365							);
366
367							sub stream_to {
368	0			0	1	0	my $self = shift;
369	0					0	my ( $output, $skip, $charset ) = $validator->(@_);
370
371	0					0	my %skip = map { lc $_ => 1 } @{$skip};
	0					0
	0					0
372
373	0					0	for my $header ( grep { blessed($_) } @{ $self->_headers } ) {
	0					0
	0					0
374	0	0				0	next if $skip{ lc $header->name };
375
376	0					0	$header->stream_to( charset => $charset, output => $output );
377							}
378
379	0					0	return;
380							}
381							}
382
383							sub as_string {
384	0			0	1	0	my $self = shift;
385
386	0					0	my $string = q{};
387
388	0					0	$self->stream_to( output => $self->_string_output( \$string ), @_ );
389
390	0					0	return $string;
391							}
392
393							{
394							my $mime_word = qr/
395							(?:
396							=\? # begin encoded word
397							(?<charset>[-0-9A-Za-z_]+) # charset (encoding)
398							(?:\[A-Za-z]{1,8}(?:-[A-Za-z]{1,8}))? # language (RFC 2231)
399							\?
400							(?<encoding>[QqBb]) # encoding type
401							\?
402							(?<content>.*?) # Base64-encoded contents
403							\?= # end encoded word
404							\|
405							(?<unencoded>\S+)
406							)
407							(?<ws>[ \t]+)?
408							/x;
409
410							sub _mime_decode {
411	2091			2091		1823	my $self = shift;
412	2091					1990	my $text = shift;
413
414	2091	50				7006	return $text unless $text =~ /=\?[\w-]+\?[BQ]\?/i;
415
416	0						my @chunks;
417
418							# If a MIME encoded word is followed by _another_ such word, we ignore any
419							# intervening whitespace, otherwise we preserve the whitespace between a
420							# MIME encoded word and an unencoded word. See RFC 2047 for details on
421							# this.
422	0						while ( $text =~ /\G$mime_word/g ) {
423	0	0					if ( defined $+{charset} ) {
424							push @chunks, {
425							content => $self->_decode_one_word(
426							@+{ 'charset', 'encoding', 'content' }
427							),
428							ws => $+{ws},
429	0						is_mime => 1,
430							};
431							}
432							else {
433							push @chunks, {
434							content => $+{unencoded},
435							ws => $+{ws},
436	0						is_mime => 0,
437							};
438							}
439							}
440
441	0						my $result = q{};
442
443	0						for my $i ( 0 .. $#chunks ) {
444	0						$result .= $chunks[$i]{content};
445							$result .= ( $chunks[$i]{ws} // q{} )
446							unless $chunks[$i]{is_mime}
447							&& $chunks[ $i + 1 ]
448	0	0	0				&& $chunks[ $i + 1 ]{is_mime};
			0
			0
449							}
450
451	0						return $result;
452							}
453							}
454
455							sub _decode_one_word {
456	0			0			my $self = shift;
457	0						my $charset = shift;
458	0						my $encoding = shift;
459	0						my $content = shift;
460
461	0	0					if ( uc $encoding eq 'B' ) {
462	0						return decode( $charset, decode_base64($content) );
463							}
464							else {
465	0						$content =~ tr/_/ /;
466	0						return decode( $charset, decode_qp($content) );
467							}
468							}
469
470							__PACKAGE__->meta->make_immutable;
471
472							1;
473
474							# ABSTRACT: The headers for an email part
475
476							__END__
477
478							=pod
479
480							=encoding UTF-8
481
482							=head1 NAME
483
484							Courriel::Headers - The headers for an email part
485
486							=head1 VERSION
487
488							version 0.44
489
490							=head1 SYNOPSIS
491
492							my $email = Courriel->parse( text => ... );
493							my $headers = $email->headers;
494
495							print "$_\n" for $headers->get('Received');
496
497							=head1 DESCRIPTION
498
499							This class represents the headers of an email.
500
501							Any sub part of an email can have its own headers, so every part has an
502							associated object representing its headers. This class makes no distinction
503							between top-level headers and headers for a sub part.
504
505							Each individual header name/value pair is represented internally by a
506							L<Courriel::Header> object. Some headers have their own special
507							subclass. These are:
508
509							=over 4
510
511							=item * Content-Type
512
513							This is stored as a L<Courriel::Header::ContentType> object.
514
515							=item * Content-Disposition
516
517							This is stored as a L<Courriel::Header::Disposition> object.
518
519							=back
520
521							=head1 API
522
523							This class supports the following methods:
524
525							=head2 Courriel::Headers->parse( ... )
526
527							This method creates a new object by parsing a string. It accepts the following
528							parameters:
529
530							=over 4
531
532							=item * text
533
534							The text to parse. This can either be a plain scalar or a reference to a
535							scalar. If you pass a reference, the underlying scalar may be modified.
536
537							=back
538
539							Header parsing unfolds folded headers, and decodes any MIME-encoded values as
540							described in RFC 2047. Parsing also decodes header attributes encoded as
541							described in RFC 2231.
542
543							=head2 Courriel::Headers->new( headers => [ ... ] )
544
545							This method creates a new object. It accepts one parameter, C<headers>, which
546							should be an array reference of header names and values.
547
548							A given header key can appear multiple times.
549
550							This object does not (yet, perhaps) enforce RFC restrictions on repetition of
551							certain headers.
552
553							Header order is preserved, per RFC 5322.
554
555							=head2 $headers->get($name)
556
557							Given a header name, this returns a list of the L<Courriel::Header> objects
558							found for the header. Each occurrence of the header is returned as a separate
559							object.
560
561							=head2 $headers->get_values($name)
562
563							Given a header name, this returns a list of the string values found for the
564							header. Each occurrence of the header is returned as a separate string.
565
566							=head2 $headers->add( $name => $value )
567
568							Given a header name and value, this adds the headers to the object. If any of
569							the headers already have values in the object, then new values are added after
570							the existing values, rather than at the end of headers.
571
572							The value can be provided as a string or a L<Courriel::Header> object.
573
574							=head2 $headers->unshift( $name => $value )
575
576							This is like C<add()>, but this pushes the headers onto the front of the
577							internal headers array. This is useful if you are adding "Received" headers,
578							which per RFC 5322, should always be added at the I<top> of the headers.
579
580							The value can be provided as a string or a L<Courriel::Header> object.
581
582							=head2 $headers->remove($name)
583
584							Given a header name, this removes all instances of that header from the object.
585
586							=head2 $headers->replace( $name => $value )
587
588							A shortcut for calling C<remove()> and C<add()>.
589
590							The value can be provided as a string or a L<Courriel::Header> object.
591
592							=head2 $headers->as_string( skip => ...., charset => ... )
593
594							This returns a string representing the headers in the object. The values will
595							be folded and/or MIME-encoded as needed.
596
597							The C<skip> parameter should be an array reference containing the name of
598							headers that should be skipped. This parameter is optional, and the default is
599							to include all headers.
600
601							The C<charset> parameter specifies what character set to use for MIME-encoding
602							non-ASCII values. This defaults to "utf8". The charset name must be one
603							recognized by the L<Encode> module.
604
605							MIME encoding is always done using the "B" (Base64) encoding, never the "Q"
606							encoding.
607
608							=head2 $headers->stream_to( output => $output, skip => ...., charset => ... )
609
610							This method will send the stringified headers to the specified output.
611
612							See the C<as_string()> method for documentation on the C<skip> and C<charset>
613							parameters.
614
615							=head1 ROLES
616
617							This class does the C<Courriel::Role::Streams> role.
618
619							=head1 SUPPORT
620
621							Bugs may be submitted through L<the RT bug tracker\|http://rt.cpan.org/Public/Dist/Display.html?Name=Courriel>
622							(or L<bug-courriel@rt.cpan.org\|mailto:bug-courriel@rt.cpan.org>).
623
624							I am also usually active on IRC as 'autarch' on C<irc://irc.perl.org>.
625
626							=head1 AUTHOR
627
628							Dave Rolsky <autarch@urth.org>
629
630							=head1 COPYRIGHT AND LICENSE
631
632							This software is Copyright (c) 2016 by Dave Rolsky.
633
634							This is free software, licensed under:
635
636							The Artistic License 2.0 (GPL Compatible)
637
638							=cut