line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Stripper; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# DOCUMENT: Text-Stripper, strips of text. |
4
|
|
|
|
|
|
|
# VERSION: $Revision: 1.18 $ |
5
|
|
|
|
|
|
|
# DATE: $Date: 2007-06-14 20:00:01 $ |
6
|
|
|
|
|
|
|
# AUTHOR: M. Beranek |
7
|
|
|
|
|
|
|
# COPYRIGHT: M. Beranek |
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
97132
|
use 5.006001; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
126
|
|
10
|
3
|
|
|
3
|
|
18
|
use strict; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
105
|
|
11
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
3
|
|
|
|
|
17
|
|
|
3
|
|
|
|
|
13981
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
require Exporter; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
18
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
19
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# This allows declaration use Text::Stripper ':all'; |
22
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
23
|
|
|
|
|
|
|
# will save memory. |
24
|
|
|
|
|
|
|
#our %EXPORT_TAGS = ( 'all' => [ qw( |
25
|
|
|
|
|
|
|
# stripof |
26
|
|
|
|
|
|
|
#) ] ); |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
#our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
31
|
|
|
|
|
|
|
stripof |
32
|
|
|
|
|
|
|
breakpoints |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#my $cvsRev = '$Revision: 1.18 $'; |
36
|
|
|
|
|
|
|
#$cvsRev =~ s/\$Revision:\s//g; |
37
|
|
|
|
|
|
|
#$cvsRev =~ s/\s\$//g; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = '1.16'; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Possible breakpoints: |
42
|
|
|
|
|
|
|
our @breakpoints = ( ' ', '\t', '\.', ',', ';', ':', '!', |
43
|
|
|
|
|
|
|
'-', '\?', '\n', '\r', '\/', '\|', '\(', '\)' ); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# ---------------------------------------------------------------------- |
46
|
|
|
|
|
|
|
# Shortens text at a possible position. |
47
|
|
|
|
|
|
|
# $text - text to be shortend |
48
|
|
|
|
|
|
|
# $len - minimum length |
49
|
|
|
|
|
|
|
# $tol - maximum tolerance |
50
|
|
|
|
|
|
|
# $max - 0 = shorten as early as possible | 1 = shorten as late as possible |
51
|
|
|
|
|
|
|
# $dots - 0 = no dots after shortening | 1 add dots after shortening |
52
|
|
|
|
|
|
|
# ---------------------------------------------------------------------- |
53
|
|
|
|
|
|
|
sub stripof { |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Parameter: |
56
|
35
|
50
|
|
35
|
0
|
92
|
my $text = shift or return; |
57
|
35
|
50
|
|
|
|
63
|
my $len = shift or return $text; |
58
|
35
|
50
|
|
|
|
65
|
my $tol = shift or return substr($text, $len); |
59
|
35
|
100
|
|
|
|
61
|
my $max = shift or 0; |
60
|
35
|
50
|
|
|
|
60
|
my $dots = shift or 0; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# Possible breakpoints: |
63
|
|
|
|
|
|
|
#my @breakpoints = ( ' ', '\t', '.', ',', ';', ':', '!', '-', '?', '/', '|', '(', ')' ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# minimum / maximum length: |
66
|
35
|
|
|
|
|
44
|
my $maxLen = $len + $tol; |
67
|
35
|
|
|
|
|
31
|
my $minLen = $len; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# current length: |
70
|
35
|
|
|
|
|
37
|
my $textLen = length( $text ); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# if search for latest break: |
73
|
35
|
100
|
|
|
|
81
|
if( $max ){ |
74
|
|
|
|
|
|
|
# stop, if text is shorter than maximum: |
75
|
20
|
100
|
|
|
|
38
|
if( $textLen <= $maxLen ){ |
76
|
7
|
|
|
|
|
22
|
return $text; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# shortest possible text (will be in returned string always): |
81
|
28
|
|
|
|
|
33
|
my $minText = substr( $text, 0, $minLen ); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# longest possible text: |
84
|
28
|
|
|
|
|
45
|
my $maxText = substr( $text, 0, $maxLen ); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# text between minimum and maximum: |
87
|
28
|
|
|
|
|
37
|
my $restText = substr( $text, $minLen, $tol ); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# buffer for return-string: |
90
|
28
|
|
|
|
|
30
|
my $shortText = ""; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# buffer for additional-text: |
93
|
28
|
|
|
|
|
29
|
my $addText = ""; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# find breakpoint as late as possible: |
96
|
28
|
100
|
|
|
|
44
|
if( $max ){ |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# we're just working on $resttext: |
99
|
13
|
|
|
|
|
14
|
$addText = $restText; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# previously hardcoded regexp: |
102
|
|
|
|
|
|
|
# $addText =~ s/(.*)[ ,\t\.;:!-\?\/\|\(\)].+/$1/gi; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# use regexp to find possible breakpoints. regexps are greedy, |
105
|
|
|
|
|
|
|
# so they will find the last possible space: |
106
|
13
|
|
|
|
|
28
|
my $regexpBreakpoints = join '', @breakpoints; |
107
|
|
|
|
|
|
|
# print "X:".$regexpBreakpoints.":X"; |
108
|
13
|
|
|
|
|
137
|
$addText =~ s/(.*)[$regexpBreakpoints].*/$1/g; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# if no space was found: |
111
|
13
|
50
|
|
|
|
36
|
if( $addText eq '' ){ |
112
|
|
|
|
|
|
|
# return complete text: |
113
|
0
|
|
|
|
|
0
|
$addText = $restText; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
# return minimum + additional: |
116
|
13
|
|
|
|
|
28
|
$shortText = "$minText$addText"; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# search for first possible break: |
120
|
|
|
|
|
|
|
else { |
121
|
|
|
|
|
|
|
# emty additional text: |
122
|
15
|
|
|
|
|
17
|
$addText = ""; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# find first break: |
125
|
|
|
|
|
|
|
# test all characters in the $restText |
126
|
15
|
|
|
|
|
37
|
for( my $idx = 0; $idx < $tol; $idx ++ ){ |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# current character: |
129
|
39
|
|
|
|
|
46
|
my $char = substr( $restText, $idx, 1 ); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# is character a space? |
132
|
39
|
|
|
|
|
40
|
my $isSpace = 0; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# test if character matches on of the |
135
|
|
|
|
|
|
|
# space-characters defined in @breakpoints: |
136
|
39
|
|
|
|
|
51
|
foreach( @breakpoints ){ |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# if caharcter matches space: |
139
|
245
|
100
|
|
|
|
417
|
if( $char eq $_ ){ |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# mark as space, skip rest of @breakpoints: |
142
|
15
|
|
|
|
|
16
|
$isSpace = 1; |
143
|
15
|
|
|
|
|
16
|
last; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# if we didn't find a space: |
150
|
39
|
100
|
|
|
|
63
|
if( ! $isSpace ){ |
151
|
|
|
|
|
|
|
# append the character to the buffer: |
152
|
24
|
|
|
|
|
54
|
$addText .= $char; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
# if we found a space: |
155
|
|
|
|
|
|
|
else { |
156
|
|
|
|
|
|
|
# stop here: |
157
|
15
|
|
|
|
|
17
|
last; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
# return text = minimum-text + additional-text: |
162
|
15
|
|
|
|
|
21
|
$shortText = "$minText$addText"; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# if we want some dots on the shortened text: |
166
|
28
|
50
|
|
|
|
54
|
if( $dots ){ |
167
|
|
|
|
|
|
|
# only if text is really shorter than the original text: |
168
|
28
|
50
|
|
|
|
58
|
if( length($shortText) < length($text) ){ |
169
|
|
|
|
|
|
|
# append dots: |
170
|
28
|
|
|
|
|
35
|
$shortText .= "..."; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# return the shortened text: |
175
|
28
|
|
|
|
|
152
|
return $shortText; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
1; |
181
|
|
|
|
|
|
|
__END__ |