File Coverage

lib/Mail/Make/Headers/Subject.pm
Criterion Covered Total %
statement 100 107 93.4
branch 20 26 76.9
condition 3 8 37.5
subroutine 23 25 92.0
pod 6 8 75.0
total 152 174 87.3


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## MIME Email Builder - ~/lib/Mail/Make/Headers/Subject.pm
3             ## Version v0.1.1
4             ## Copyright(c) 2026 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2026/03/03
7             ## Modified 2026/03/05
8             ## All rights reserved
9             ##
10             ## This program is free software; you can redistribute it and/or modify it
11             ## under the same terms as Perl itself.
12             ##----------------------------------------------------------------------------
13             package Mail::Make::Headers::Subject;
14             BEGIN
15 0         0 {
16 9     9   146483 use strict;
  9         18  
  9         371  
17 9     9   39 use warnings;
  9         15  
  9         547  
18 9     9   94 warnings::register_categories( 'Mail::Make' );
19 9     9   42 use parent qw( Module::Generic );
  9         29  
  9         58  
20 9     9   259016 use vars qw( $VERSION $EXCEPTION_CLASS );
  9         21  
  9         479  
21 9     9   41 use utf8;
  9         27  
  9         60  
22 9     9   374 use Encode ();
  9         14  
  9         178  
23 9     9   515 use Mail::Make::Exception;
  9         16  
  9         73  
24 9     9   6136 use MIME::Base64 ();
  9         5638  
  9         531  
25             use overload(
26             '""' => 'as_string',
27 128     128   5825 'bool' => sub { 1 },
28 9     9   54 );
  9         13  
  9         77  
29 9         13 our $EXCEPTION_CLASS = 'Mail::Make::Exception';
30 9         245 our $VERSION = 'v0.1.1';
31             # RFC 2047 §2: an encoded-word must not exceed 75 characters total.
32             # =?UTF-8?B? ... ?= - the wrapper is 12 chars, leaving 63 for base64 text.
33             # 63 base64 chars encode floor(63 * 3/4) = 47 raw bytes.
34             # We use 45 bytes per chunk (multiple of 3) to keep base64 clean.
35             use constant
36             {
37 9         854 CHARSET => 'UTF-8',
38             EW_MAX_BYTES => 45,
39             FOLD_SEP => "\015\012 ",
40 9     9   1381 };
  9         17  
41             };
42              
43 9     9   56 use strict;
  9         15  
  9         244  
44 9     9   33 use warnings;
  9         13  
  9         10789  
45              
46             sub init
47             {
48 64     64 1 370931 my $self = shift( @_ );
49 64         750 $self->{_raw} = undef; # original Perl string, not yet encoded
50 64         314 $self->{_encoded} = undef; # RFC 2047 form, cached after first encode
51 64         283 $self->{_exception_class} = $EXCEPTION_CLASS;
52 64 50       318 $self->SUPER::init( @_ ) || return( $self->pass_error );
53 64         171 return( $self );
54             }
55              
56             # as_string() - returns the RFC 2047 encoded form suitable for the wire.
57             # Pure ASCII values are returned unchanged.
58             sub as_string
59             {
60 60     60 1 148 my $self = shift( @_ );
61 60 50       194 return( '' ) if( !defined( $self->{_raw} ) );
62 60 50       198 return( $self->{_encoded} ) if( defined( $self->{_encoded} ) );
63 60         266 $self->{_encoded} = _encode_subject( $self->{_raw} );
64 60         555 return( $self->{_encoded} );
65             }
66              
67             # decode( $encoded_string ) - class or instance method.
68             # Decodes an RFC 2047 encoded Subject value back to a Perl string.
69             sub decode
70             {
71 9     9 1 146 my $self = shift( @_ );
72 9   50     26 my $str = shift( @_ ) // return( $self->error( "No value to decode." ) );
73 9         25 return( _decode_subject( $str ) );
74             }
75              
76             # field_name() - always returns 'Subject'
77 1     1 1 13 sub field_name { return( 'Subject' ); }
78              
79             # raw() - returns the decoded Perl string (the original value before encoding)
80 1     1 1 6 sub raw { return( shift->{_raw} ); }
81              
82             # value( [$text] ) - sets or gets the subject text.
83             # On assignment: stores the raw Perl string; clears the encoded cache.
84             # On retrieval: returns the decoded Perl string (i.e. human-readable).
85             sub value
86             {
87 61     61 1 245 my $self = shift( @_ );
88 61 100       183 if( @_ )
89             {
90 60         131 my $text = shift( @_ );
91 60 50       172 unless( defined( $text ) )
92             {
93 0         0 return( $self->error( "Subject value must be a defined scalar." ) );
94             }
95 60         138 $self->{_raw} = $text;
96 60         114 $self->{_encoded} = undef; # invalidate cache
97 60         137 return( $self );
98             }
99 1         7 return( $self->{_raw} );
100             }
101              
102             # _decode_ew( $charset, $encoding, $text ) → decoded Perl string fragment
103             sub _decode_ew
104             {
105 16     16   74 my( $charset, $enc, $text ) = @_;
106 16         149 my $bytes;
107 16 100       48 if( uc( $enc ) eq 'B' )
108             {
109 15         44 $bytes = MIME::Base64::decode_base64( $text );
110             }
111             else
112             {
113             # Q encoding: _ → space, =XX → byte
114 1         4 $text =~ s/_/ /g;
115 1         5 $text =~ s/=([0-9A-Fa-f]{2})/chr( hex( $1 ) )/ge;
  2         10  
116 1         3 $bytes = $text;
117             }
118 16         25 local $@;
119 16         29 my $decoded = eval{ Encode::decode( $charset, $bytes ) };
  16         75  
120 16 50       790 return( $@ ? $bytes : $decoded );
121             }
122              
123             # _decode_subject( $wire_string ) → $perl_string
124             # Decodes all RFC 2047 encoded-words in $wire_string.
125             # Handles both ?B? (Base64) and ?Q? (Quoted-Printable) forms.
126             sub _decode_subject
127             {
128 9     9   18 my $str = shift( @_ );
129 9 100 66     64 return( $str ) unless( defined( $str ) && $str =~ /=\?/ );
130              
131             # Collapse folding whitespace between consecutive encoded-words:
132             # RFC 2047 §6.2: whitespace between two encoded-words is ignored.
133 8         50 $str =~ s/\?=[ \t]*(?:\015\012)?[ \t]*=\?/?==?/g;
134              
135 8         63 $str =~ s/=\?([A-Za-z0-9_-]+)\?([BbQq])\?([^?]*)\?=/_decode_ew( $1, $2, $3 )/ge;
  16         41  
136 8         35 return( $str );
137             }
138              
139             # _encode_subject( $perl_string ) → $wire_string
140             # Returns the string as-is if it is pure printable ASCII (RFC 2822 §2.2).
141             # Otherwise encodes the UTF-8 bytes in one or more RFC 2047 Base64
142             # encoded-words, folded with CRLF SP between them.
143             sub _encode_subject
144             {
145 60     60   198 my $text = shift( @_ );
146              
147             # Pure printable ASCII + tab/space: no encoding needed.
148             # We also reject bare CRs and LFs here; they are illegal in a header.
149 60 100       450 return( $text ) unless( $text =~ /[^\x09\x20-\x7E]/ );
150              
151             # Ensure the string carries Perl's UTF-8 flag before encoding to bytes.
152             # If the flag is off (raw octets from a context without Encode::decode),
153             # Encode::encode treats each octet as Latin-1, producing double-encoding:
154             # e.g. U+2014 (E2 80 94) becomes C3 A2 C2 80 C2 94 instead of E2 80 94.
155 17 100       87 utf8::decode( $text ) unless( utf8::is_utf8( $text ) );
156              
157             # Encode the entire string to UTF-8 bytes, then split into safe chunks.
158             # Crucially: we split on byte boundaries that do NOT break a multi-byte UTF-8 sequence.
159             # Because we encode the whole string first we work purely in bytes and the split is
160             # safe.
161 17         163 my $bytes = Encode::encode( CHARSET, $text );
162              
163 17         879 my @words;
164 17         39 my $offset = 0;
165 17         45 my $total = length( $bytes );
166 17         63 while( $offset < $total )
167             {
168             # Grab up to EW_MAX_BYTES bytes, but do not cut inside a multi-byte sequence.
169             # UTF-8 continuation bytes are 0x80..0xBF; a leading byte of a multi-byte sequence
170             # is 0xC0..0xFF. We back up until we sit at a leading byte boundary (or the very end).
171 31         50 my $len = EW_MAX_BYTES;
172 31 100       86 $len = $total - $offset if( $offset + $len > $total );
173 31         72 my $chunk = substr( $bytes, $offset, $len );
174              
175             # If the byte immediately after our chunk is a UTF-8 continuation byte (0x80–0xBF),
176             # the last character of our chunk is split.c
177             # Walk backwards to a safe cut point.
178 31 100       81 if( $offset + $len < $total )
179             {
180 14         28 my $next = ord( substr( $bytes, $offset + $len, 1 ) );
181 14 50       37 if( ( $next & 0xC0 ) == 0x80 )
182             {
183             # Find last leading byte in chunk
184 0   0     0 while( $len > 0 && ( ord( substr( $chunk, $len - 1, 1 ) ) & 0xC0 ) == 0x80 )
185             {
186 0         0 $len--;
187             }
188 0         0 $chunk = substr( $bytes, $offset, $len );
189             }
190             }
191              
192 31         116 my $b64 = MIME::Base64::encode_base64( $chunk, '' );
193 31         85 push( @words, '=?' . CHARSET . '?B?' . $b64 . '?=' );
194 31         76 $offset += $len;
195             }
196              
197 17         94 return( join( FOLD_SEP, @words ) );
198             }
199              
200             # NOTE: STORABLE support
201 0     0 0   sub STORABLE_freeze { CORE::return( CORE::shift->FREEZE( @_ ) ); }
202              
203 0     0 0   sub STORABLE_thaw { CORE::return( CORE::shift->THAW( @_ ) ); }
204              
205             1;
206             # NOTE: POD
207             __END__
208              
209             =encoding utf-8
210              
211             =head1 NAME
212              
213             Mail::Make::Headers::Subject - RFC 2047 Aware Subject Header for Mail::Make
214              
215             =head1 SYNOPSIS
216              
217             use Mail::Make::Headers::Subject;
218              
219             # Pure ASCII: passed through unchanged
220             my $s = Mail::Make::Headers::Subject->new;
221             $s->value( 'Quarterly Report' );
222             print $s->as_string;
223             # Quarterly Report
224              
225             # Non-ASCII: automatically encoded per RFC 2047
226             $s->value( "Yamato, Inc. - Newsletter" );
227             print $s->as_string;
228             # =?UTF-8?B?QW5nZWxzLCBJbmMuIOKAlCBOZXdzbGV0dGVy?=
229              
230             # Long Japanese subject: folded into multiple encoded-words
231             $s->value( "株式会社ヤマト・インク 第3四半期ニュースレター 2026年3月号" );
232             print $s->as_string;
233             # =?UTF-8?B?...?=\r\n =?UTF-8?B?...?=\r\n =?UTF-8?B?...?=
234              
235             # Round-trip decode
236             my $decoded = $s->decode( $s->as_string );
237             # "株式会社ヤマト・インク 第3四半期ニュースレター 2026年3月号"
238              
239             =head1 VERSION
240              
241             v0.1.1
242              
243             =head1 DESCRIPTION
244              
245             A typed header object for the C<Subject> field that implements RFC 2047 encoded-word encoding and decoding.
246              
247             Key properties:
248              
249             =over 4
250              
251             =item *
252              
253             Pure printable ASCII subjects are passed through without modification.
254              
255             No unnecessary C<=?UTF-8?B?...?=> wrapping.
256              
257             =item *
258              
259             Non-ASCII subjects are encoded as one or more C<=?UTF-8?B?...?=> encoded-words using Base64.
260              
261             =item *
262              
263             Long values are split into chunks of at most 45 UTF-8 bytes each, keeping every encoded-word within the RFC 2047 maximum of 75 characters.
264              
265             =item *
266              
267             Chunks are joined with C<CRLF SP> (header folding), producing a correctly folded multi-line header value.
268              
269             =item *
270              
271             Chunk boundaries are chosen so as never to split a multi-byte UTF-8 sequence.
272              
273             =item *
274              
275             The L</decode> method handles both C<?B?> (Base64) and C<?Q?> (Quoted-Printable) encoded-words, and collapses inter-word whitespace per RFC 2047 §6.2.
276              
277             =back
278              
279             =head1 METHODS
280              
281             =head2 new
282              
283             Instantiates a new object. Optionally accepts a subject string via the C<value> key in a hash argument, consistent with L<Module::Generic> C<init> conventions.
284              
285             =head2 as_string
286              
287             Returns the encoded form of the subject, suitable for the wire. Pure ASCII values are returned unchanged. Non-ASCII values are encoded in RFC 2047 C<=?UTF-8?B?...?=> form, folded at CRLF SP as required.
288              
289             This method is also invoked by the C<""> overload.
290              
291             =head2 decode( $encoded_string )
292              
293             Class or instance method. Decodes all RFC 2047 encoded-words present in C<$encoded_string> and returns the result as a Perl Unicode string.
294              
295             =head2 field_name
296              
297             Returns the string C<Subject>.
298              
299             =head2 raw
300              
301             Returns the stored Perl Unicode string, before any RFC 2047 encoding.
302              
303             =head2 value( [ $text ] )
304              
305             Sets or gets the subject text as a Perl Unicode string. On assignment, the encoded cache is invalidated so that the next call to L</as_string> re-encodes the new value.
306              
307             =head1 STANDARDS
308              
309             =over 4
310              
311             =item RFC 2047 - MIME Part Three: Message Header Extensions for Non-ASCII Text
312              
313             =item RFC 2822 §2.2 - Header Fields
314              
315             =back
316              
317             =head1 AUTHOR
318              
319             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
320              
321             =head1 SEE ALSO
322              
323             L<Mail::Make>, L<Mail::Make::Headers>, L<Mail::Make::Headers::Generic>
324              
325             =head1 COPYRIGHT & LICENSE
326              
327             Copyright(c) 2026 DEGUEST Pte. Ltd.
328              
329             All rights reserved.
330              
331             This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
332              
333             =cut