File Coverage

blib/lib/Text/Fold.pm

Criterion	Covered	Total	%
statement	76	77	98.7
branch	40	42	95.2
condition	20	22	90.9
subroutine	6	6	100.0
pod	1	1	100.0
total	143	148	96.6

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Fold;
2
3	3			3		58479	use strict;
	3					9
	3					230
4	3			3		15	use warnings;
	3					5
	3					83
5	3			3		2857	use String::UnicodeUTF8;
	3					14402
	3					21
6
7							$Text::Fold::VERSION = '0.5';
8
9							sub import {
10	3			3		125	no strict 'refs';
	3					6
	3					2871
11	2			2		19	*{ caller() . '::fold_text' } = \&fold_text;
	2					33
12							}
13
14							sub fold_text {
15	31			31	1	274	my ( $orig_line, $width, $join ) = @_;
16
17	31					33	my $conf;
18	31	100	100			127	if ( defined $width && ref($width) eq 'HASH' ) {
19	2					3	$conf = $width;
20	2					3	$width = undef;
21							}
22	31	100	50			74	$width = defined $width ? abs( int($width) ) \|\| 78 : 78;
23
24	31	100	100			166	if ( !defined $conf && defined $join && ref($join) eq 'HASH' ) {
			100
25	10					12	$conf = $join;
26	10					12	$join = undef;
27							}
28	31		100			91	$conf \|\|= {};
29	31	100				64	$join = $conf->{'join'} if exists $conf->{'join'};
30
31	31					34	my $soft_hyphen_threshold = 0;
32	31	100				57	if ( exists $conf->{'soft_hyphen_threshold'} ) {
33
34							# Zero-but-true means default to ~20% of width
35							# Since this should be a number this works, if it could be a string then this would not work
36	6	100	66			37	if ( $conf->{'soft_hyphen_threshold'} && $conf->{'soft_hyphen_threshold'} == 0 ) {
37	1					3	$soft_hyphen_threshold = int( $width / 5 );
38							}
39							else {
40	5					9	$soft_hyphen_threshold = abs( int( $conf->{'soft_hyphen_threshold'} ) );
41							}
42
43	6	100	100			24	if ( $soft_hyphen_threshold < 3 \|\| $soft_hyphen_threshold > $width ) {
44	2					4	$soft_hyphen_threshold = $width;
45							}
46							}
47
48	31	100				75	my $turn_back_into_byte_string = String::UnicodeUTF8::is_unicode($orig_line) ? 0 : 1;
49	31					69	my $line = String::UnicodeUTF8::get_unicode($orig_line);
50
51							# split(/\n/, "foo\nbar\nbaz\n") is (foo, bar, baz) not (foo, bar, baz, '')
52							# split(/\n/, "foo\nbar\nbaz\n\n\n") is (foo, bar, baz) not (foo, bar, baz, '', '', '')
53							# So we need to count the trailing newlines in order to add them back at the end.
54							#
55							# This trailing newline count is a corner case where `perldoc -q count` did not do the trick.
56							# If you have a better/faster way I'm all ears!
57							#
58							# Removing them (i.e. s///) is safe since the split() will essentially be removing them anyway.
59	31					347	my $trailing_newlines_count = 0;
60	31					144	while ( $line =~ s/(?:\015\012\|\012\|\015)\z//g ) {
61	14					57	$trailing_newlines_count++;
62							}
63
64							# It was entirely newlines
65	31	100				59	if ( $line eq '' ) {
66	5	50				31	return defined $join ? $join : "\n" x $trailing_newlines_count;
67							}
68
69	26					29	my @aggregate_tokens;
70							my $part; # buffer
71
72							LINE:
73	26					178	for $part ( split( /(?:\015\012\|\012\|\015)/, $line ) ) {
74							PARSE_PART:
75							{
76	189	100				204	if ( $part eq '' ) {
	189					373
77	14					17	push @aggregate_tokens, $part;
78	14					24	next LINE;
79							}
80
81	175					993	my @tokens = ( $part =~ m/.{1,$width}/g );
82
83							# unpack(A) lops off trailing spaces on each chunk, if there's a better way I'm all ears!
84							# my @tokens = unpack( "A$width" x ( CORE::length($part) / $width ) . ' A*', $part );
85
86	175					206	my $n; # buffer
87	175					255	my $last_index = $#tokens;
88	175					260	for $n ( 0 .. $last_index ) {
89	190	100				432	if ( $n < $last_index ) {
90	115	100	100			689	if ( $tokens[$n] =~ m/[^ \t\f]\z/ && $tokens[ $n + 1 ] =~ m/\A[^ \t\f]/ ) {
91
92	100	100				161	if ($soft_hyphen_threshold) {
93	17					106	my ($end_chunk) = $tokens[$n] =~ m/([^ \t\f]+)\z/;
94	17					63	my ($beg_chunk) = $tokens[ $n + 1 ] =~ m/\A([^ \t\f]+)/;
95
96	17	100				41	if ( CORE::length("$end_chunk$beg_chunk") <= $soft_hyphen_threshold ) {
97	6					157	$tokens[$n] =~ s/[^ \t\f]+\z//;
98	6					20	$tokens[ $n + 1 ] =~ s/\A[^ \t\f]+//;
99	6					14	push @aggregate_tokens, @tokens[ 0 .. $n ];
100	6					17	$part = join( '', $end_chunk, $beg_chunk, @tokens[ $n + 1 .. $last_index ] );
101	6					90	goto PARSE_PART;
102							}
103							else {
104	11					63	goto SOFT_HYPHEN;
105							}
106							}
107							else {
108	94	100				423	SOFT_HYPHEN:
109							my $last_chr = CORE::substr( $tokens[$n], -2, 1 ) =~ m/[ \t\f]/ ? CORE::substr( $tokens[$n], -1, 1, " " ) : CORE::substr( $tokens[$n], -1, 1, "-" );
110
111	94	100				145	if ($n) {
112	5					14	push @aggregate_tokens, @tokens[ 0 .. $n - 1 ], $tokens[$n];
113							}
114							else {
115	89					153	push @aggregate_tokens, $tokens[$n];
116							}
117
118	94					236	$part = join( '', $last_chr, @tokens[ $n + 1 .. $last_index ] );
119	94					1444	goto PARSE_PART;
120							}
121							}
122							}
123							}
124
125							# unpack will return an empty token as last token if the 2nd to last token
126							# was exactly $width long, so we need to pop the last element if it's empty
127	75	50				191	if ( $tokens[-1] eq '' ) {
128	0					0	pop @tokens;
129							}
130
131	75					164	push @aggregate_tokens, @tokens;
132							}
133							}
134
135	26	100				58	if ($turn_back_into_byte_string) {
136	25					69	for ( 0 .. $#aggregate_tokens ) {
137	173					1240	$aggregate_tokens[$_] = String::UnicodeUTF8::get_utf8( $aggregate_tokens[$_] );
138							}
139							}
140
141	26	100				215	if ($trailing_newlines_count) {
142	2					4	for ( 1 .. $trailing_newlines_count ) {
143	6					9	push @aggregate_tokens, '';
144							}
145							}
146
147	26	100				269	return join( defined $join ? $join : "\n", @aggregate_tokens );
148							}
149
150							1;
151
152							__END__