File Coverage

lib/Mail/Make/Headers/Subject.pm

Criterion	Covered	Total	%
statement	100	107	93.4
branch	20	26	76.9
condition	3	8	37.5
subroutine	23	25	92.0
pod	6	8	75.0
total	152	174	87.3

line	stmt	bran	cond	sub	pod	time	code
1							##----------------------------------------------------------------------------
2							## MIME Email Builder - ~/lib/Mail/Make/Headers/Subject.pm
3							## Version v0.1.1
4							## Copyright(c) 2026 DEGUEST Pte. Ltd.
5							## Author: Jacques Deguest <jack@deguest.jp>
6							## Created 2026/03/03
7							## Modified 2026/03/05
8							## All rights reserved
9							##
10							## This program is free software; you can redistribute it and/or modify it
11							## under the same terms as Perl itself.
12							##----------------------------------------------------------------------------
13							package Mail::Make::Headers::Subject;
14							BEGIN
15	0					0	{
16	9			9		146483	use strict;
	9					18
	9					371
17	9			9		39	use warnings;
	9					15
	9					547
18	9			9		94	warnings::register_categories( 'Mail::Make' );
19	9			9		42	use parent qw( Module::Generic );
	9					29
	9					58
20	9			9		259016	use vars qw( $VERSION $EXCEPTION_CLASS );
	9					21
	9					479
21	9			9		41	use utf8;
	9					27
	9					60
22	9			9		374	use Encode ();
	9					14
	9					178
23	9			9		515	use Mail::Make::Exception;
	9					16
	9					73
24	9			9		6136	use MIME::Base64 ();
	9					5638
	9					531
25							use overload(
26							'""' => 'as_string',
27	128			128		5825	'bool' => sub { 1 },
28	9			9		54	);
	9					13
	9					77
29	9					13	our $EXCEPTION_CLASS = 'Mail::Make::Exception';
30	9					245	our $VERSION = 'v0.1.1';
31							# RFC 2047 §2: an encoded-word must not exceed 75 characters total.
32							# =?UTF-8?B? ... ?= - the wrapper is 12 chars, leaving 63 for base64 text.
33							# 63 base64 chars encode floor(63 * 3/4) = 47 raw bytes.
34							# We use 45 bytes per chunk (multiple of 3) to keep base64 clean.
35							use constant
36							{
37	9					854	CHARSET => 'UTF-8',
38							EW_MAX_BYTES => 45,
39							FOLD_SEP => "\015\012 ",
40	9			9		1381	};
	9					17
41							};
42
43	9			9		56	use strict;
	9					15
	9					244
44	9			9		33	use warnings;
	9					13
	9					10789
45
46							sub init
47							{
48	64			64	1	370931	my $self = shift( @_ );
49	64					750	$self->{_raw} = undef; # original Perl string, not yet encoded
50	64					314	$self->{_encoded} = undef; # RFC 2047 form, cached after first encode
51	64					283	$self->{_exception_class} = $EXCEPTION_CLASS;
52	64	50				318	$self->SUPER::init( @_ ) \|\| return( $self->pass_error );
53	64					171	return( $self );
54							}
55
56							# as_string() - returns the RFC 2047 encoded form suitable for the wire.
57							# Pure ASCII values are returned unchanged.
58							sub as_string
59							{
60	60			60	1	148	my $self = shift( @_ );
61	60	50				194	return( '' ) if( !defined( $self->{_raw} ) );
62	60	50				198	return( $self->{_encoded} ) if( defined( $self->{_encoded} ) );
63	60					266	$self->{_encoded} = _encode_subject( $self->{_raw} );
64	60					555	return( $self->{_encoded} );
65							}
66
67							# decode( $encoded_string ) - class or instance method.
68							# Decodes an RFC 2047 encoded Subject value back to a Perl string.
69							sub decode
70							{
71	9			9	1	146	my $self = shift( @_ );
72	9		50			26	my $str = shift( @_ ) // return( $self->error( "No value to decode." ) );
73	9					25	return( _decode_subject( $str ) );
74							}
75
76							# field_name() - always returns 'Subject'
77	1			1	1	13	sub field_name { return( 'Subject' ); }
78
79							# raw() - returns the decoded Perl string (the original value before encoding)
80	1			1	1	6	sub raw { return( shift->{_raw} ); }
81
82							# value( [$text] ) - sets or gets the subject text.
83							# On assignment: stores the raw Perl string; clears the encoded cache.
84							# On retrieval: returns the decoded Perl string (i.e. human-readable).
85							sub value
86							{
87	61			61	1	245	my $self = shift( @_ );
88	61	100				183	if( @_ )
89							{
90	60					131	my $text = shift( @_ );
91	60	50				172	unless( defined( $text ) )
92							{
93	0					0	return( $self->error( "Subject value must be a defined scalar." ) );
94							}
95	60					138	$self->{_raw} = $text;
96	60					114	$self->{_encoded} = undef; # invalidate cache
97	60					137	return( $self );
98							}
99	1					7	return( $self->{_raw} );
100							}
101
102							# _decode_ew( $charset, $encoding, $text ) → decoded Perl string fragment
103							sub _decode_ew
104							{
105	16			16		74	my( $charset, $enc, $text ) = @_;
106	16					149	my $bytes;
107	16	100				48	if( uc( $enc ) eq 'B' )
108							{
109	15					44	$bytes = MIME::Base64::decode_base64( $text );
110							}
111							else
112							{
113							# Q encoding: _ → space, =XX → byte
114	1					4	$text =~ s/_/ /g;
115	1					5	$text =~ s/=([0-9A-Fa-f]{2})/chr( hex( $1 ) )/ge;
	2					10
116	1					3	$bytes = $text;
117							}
118	16					25	local $@;
119	16					29	my $decoded = eval{ Encode::decode( $charset, $bytes ) };
	16					75
120	16	50				790	return( $@ ? $bytes : $decoded );
121							}
122
123							# _decode_subject( $wire_string ) → $perl_string
124							# Decodes all RFC 2047 encoded-words in $wire_string.
125							# Handles both ?B? (Base64) and ?Q? (Quoted-Printable) forms.
126							sub _decode_subject
127							{
128	9			9		18	my $str = shift( @_ );
129	9	100	66			64	return( $str ) unless( defined( $str ) && $str =~ /=\?/ );
130
131							# Collapse folding whitespace between consecutive encoded-words:
132							# RFC 2047 §6.2: whitespace between two encoded-words is ignored.
133	8					50	$str =~ s/\?=[ \t](?:\015\012)?[ \t]=\?/?==?/g;
134
135	8					63	$str =~ s/=\?([A-Za-z0-9_-]+)\?([BbQq])\?([^?]*)\?=/_decode_ew( $1, $2, $3 )/ge;
	16					41
136	8					35	return( $str );
137							}
138
139							# _encode_subject( $perl_string ) → $wire_string
140							# Returns the string as-is if it is pure printable ASCII (RFC 2822 §2.2).
141							# Otherwise encodes the UTF-8 bytes in one or more RFC 2047 Base64
142							# encoded-words, folded with CRLF SP between them.
143							sub _encode_subject
144							{
145	60			60		198	my $text = shift( @_ );
146
147							# Pure printable ASCII + tab/space: no encoding needed.
148							# We also reject bare CRs and LFs here; they are illegal in a header.
149	60	100				450	return( $text ) unless( $text =~ /[^\x09\x20-\x7E]/ );
150
151							# Ensure the string carries Perl's UTF-8 flag before encoding to bytes.
152							# If the flag is off (raw octets from a context without Encode::decode),
153							# Encode::encode treats each octet as Latin-1, producing double-encoding:
154							# e.g. U+2014 (E2 80 94) becomes C3 A2 C2 80 C2 94 instead of E2 80 94.
155	17	100				87	utf8::decode( $text ) unless( utf8::is_utf8( $text ) );
156
157							# Encode the entire string to UTF-8 bytes, then split into safe chunks.
158							# Crucially: we split on byte boundaries that do NOT break a multi-byte UTF-8 sequence.
159							# Because we encode the whole string first we work purely in bytes and the split is
160							# safe.
161	17					163	my $bytes = Encode::encode( CHARSET, $text );
162
163	17					879	my @words;
164	17					39	my $offset = 0;
165	17					45	my $total = length( $bytes );
166	17					63	while( $offset < $total )
167							{
168							# Grab up to EW_MAX_BYTES bytes, but do not cut inside a multi-byte sequence.
169							# UTF-8 continuation bytes are 0x80..0xBF; a leading byte of a multi-byte sequence
170							# is 0xC0..0xFF. We back up until we sit at a leading byte boundary (or the very end).
171	31					50	my $len = EW_MAX_BYTES;
172	31	100				86	$len = $total - $offset if( $offset + $len > $total );
173	31					72	my $chunk = substr( $bytes, $offset, $len );
174
175							# If the byte immediately after our chunk is a UTF-8 continuation byte (0x80–0xBF),
176							# the last character of our chunk is split.c
177							# Walk backwards to a safe cut point.
178	31	100				81	if( $offset + $len < $total )
179							{
180	14					28	my $next = ord( substr( $bytes, $offset + $len, 1 ) );
181	14	50				37	if( ( $next & 0xC0 ) == 0x80 )
182							{
183							# Find last leading byte in chunk
184	0		0			0	while( $len > 0 && ( ord( substr( $chunk, $len - 1, 1 ) ) & 0xC0 ) == 0x80 )
185							{
186	0					0	$len--;
187							}
188	0					0	$chunk = substr( $bytes, $offset, $len );
189							}
190							}
191
192	31					116	my $b64 = MIME::Base64::encode_base64( $chunk, '' );
193	31					85	push( @words, '=?' . CHARSET . '?B?' . $b64 . '?=' );
194	31					76	$offset += $len;
195							}
196
197	17					94	return( join( FOLD_SEP, @words ) );
198							}
199
200							# NOTE: STORABLE support
201	0			0	0		sub STORABLE_freeze { CORE::return( CORE::shift->FREEZE( @_ ) ); }
202
203	0			0	0		sub STORABLE_thaw { CORE::return( CORE::shift->THAW( @_ ) ); }
204
205							1;
206							# NOTE: POD
207							__END__
208
209							=encoding utf-8
210
211							=head1 NAME
212
213							Mail::Make::Headers::Subject - RFC 2047 Aware Subject Header for Mail::Make
214
215							=head1 SYNOPSIS
216
217							use Mail::Make::Headers::Subject;
218
219							# Pure ASCII: passed through unchanged
220							my $s = Mail::Make::Headers::Subject->new;
221							$s->value( 'Quarterly Report' );
222							print $s->as_string;
223							# Quarterly Report
224
225							# Non-ASCII: automatically encoded per RFC 2047
226							$s->value( "Yamato, Inc. - Newsletter" );
227							print $s->as_string;
228							# =?UTF-8?B?QW5nZWxzLCBJbmMuIOKAlCBOZXdzbGV0dGVy?=
229
230							# Long Japanese subject: folded into multiple encoded-words
231							$s->value( "株式会社ヤマト・インク　第3四半期ニュースレター　2026年3月号" );
232							print $s->as_string;
233							# =?UTF-8?B?...?=\r\n =?UTF-8?B?...?=\r\n =?UTF-8?B?...?=
234
235							# Round-trip decode
236							my $decoded = $s->decode( $s->as_string );
237							# "株式会社ヤマト・インク　第3四半期ニュースレター　2026年3月号"
238
239							=head1 VERSION
240
241							v0.1.1
242
243							=head1 DESCRIPTION
244
245							A typed header object for the C<Subject> field that implements RFC 2047 encoded-word encoding and decoding.
246
247							Key properties:
248
249							=over 4
250
251							=item *
252
253							Pure printable ASCII subjects are passed through without modification.
254
255							No unnecessary C<=?UTF-8?B?...?=> wrapping.
256
257							=item *
258
259							Non-ASCII subjects are encoded as one or more C<=?UTF-8?B?...?=> encoded-words using Base64.
260
261							=item *
262
263							Long values are split into chunks of at most 45 UTF-8 bytes each, keeping every encoded-word within the RFC 2047 maximum of 75 characters.
264
265							=item *
266
267							Chunks are joined with C<CRLF SP> (header folding), producing a correctly folded multi-line header value.
268
269							=item *
270
271							Chunk boundaries are chosen so as never to split a multi-byte UTF-8 sequence.
272
273							=item *
274
275							The L</decode> method handles both C<?B?> (Base64) and C<?Q?> (Quoted-Printable) encoded-words, and collapses inter-word whitespace per RFC 2047 §6.2.
276
277							=back
278
279							=head1 METHODS
280
281							=head2 new
282
283							Instantiates a new object. Optionally accepts a subject string via the C<value> key in a hash argument, consistent with L<Module::Generic> C<init> conventions.
284
285							=head2 as_string
286
287							Returns the encoded form of the subject, suitable for the wire. Pure ASCII values are returned unchanged. Non-ASCII values are encoded in RFC 2047 C<=?UTF-8?B?...?=> form, folded at CRLF SP as required.
288
289							This method is also invoked by the C<""> overload.
290
291							=head2 decode( $encoded_string )
292
293							Class or instance method. Decodes all RFC 2047 encoded-words present in C<$encoded_string> and returns the result as a Perl Unicode string.
294
295							=head2 field_name
296
297							Returns the string C<Subject>.
298
299							=head2 raw
300
301							Returns the stored Perl Unicode string, before any RFC 2047 encoding.
302
303							=head2 value( [ $text ] )
304
305							Sets or gets the subject text as a Perl Unicode string. On assignment, the encoded cache is invalidated so that the next call to L</as_string> re-encodes the new value.
306
307							=head1 STANDARDS
308
309							=over 4
310
311							=item RFC 2047 - MIME Part Three: Message Header Extensions for Non-ASCII Text
312
313							=item RFC 2822 §2.2 - Header Fields
314
315							=back
316
317							=head1 AUTHOR
318
319							Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
320
321							=head1 SEE ALSO
322
323							L<Mail::Make>, L<Mail::Make::Headers>, L<Mail::Make::Headers::Generic>
324
325							=head1 COPYRIGHT & LICENSE
326
327							Copyright(c) 2026 DEGUEST Pte. Ltd.
328
329							All rights reserved.
330
331							This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
332
333							=cut