| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Text::Stripper; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | # DOCUMENT:  Text-Stripper, strips of text. | 
| 4 |  |  |  |  |  |  | # VERSION:   $Revision: 1.18 $ | 
| 5 |  |  |  |  |  |  | # DATE:      $Date: 2007-06-14 20:00:01 $ | 
| 6 |  |  |  |  |  |  | # AUTHOR:    M. Beranek | 
| 7 |  |  |  |  |  |  | # COPYRIGHT: M. Beranek | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 3 |  |  | 3 |  | 97132 | use 5.006001; | 
|  | 3 |  |  |  |  | 11 |  | 
|  | 3 |  |  |  |  | 126 |  | 
| 10 | 3 |  |  | 3 |  | 18 | use strict; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 105 |  | 
| 11 | 3 |  |  | 3 |  | 17 | use warnings; | 
|  | 3 |  |  |  |  | 17 |  | 
|  | 3 |  |  |  |  | 13981 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | require Exporter; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | our @ISA = qw(Exporter); | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | # Items to export into callers namespace by default. Note: do not export | 
| 18 |  |  |  |  |  |  | # names by default without a very good reason. Use EXPORT_OK instead. | 
| 19 |  |  |  |  |  |  | # Do not simply export all your public functions/methods/constants. | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # This allows declaration	use Text::Stripper ':all'; | 
| 22 |  |  |  |  |  |  | # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK | 
| 23 |  |  |  |  |  |  | # will save memory. | 
| 24 |  |  |  |  |  |  | #our %EXPORT_TAGS = ( 'all' => [ qw( | 
| 25 |  |  |  |  |  |  | #	stripof | 
| 26 |  |  |  |  |  |  | #) ] ); | 
| 27 |  |  |  |  |  |  | # | 
| 28 |  |  |  |  |  |  | #our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 31 |  |  |  |  |  |  | stripof | 
| 32 |  |  |  |  |  |  | breakpoints | 
| 33 |  |  |  |  |  |  | ); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | #my $cvsRev = '$Revision: 1.18 $'; | 
| 36 |  |  |  |  |  |  | #$cvsRev =~ s/\$Revision:\s//g; | 
| 37 |  |  |  |  |  |  | #$cvsRev =~ s/\s\$//g; | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | our $VERSION = '1.16'; | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | # Possible breakpoints: | 
| 42 |  |  |  |  |  |  | our @breakpoints = ( ' ', '\t', '\.', ',', ';', ':', '!', | 
| 43 |  |  |  |  |  |  | '-', '\?', '\n', '\r', '\/', '\|', '\(', '\)' ); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | # ---------------------------------------------------------------------- | 
| 46 |  |  |  |  |  |  | # Shortens text at a possible position. | 
| 47 |  |  |  |  |  |  | # $text - text to be shortend | 
| 48 |  |  |  |  |  |  | # $len  - minimum length | 
| 49 |  |  |  |  |  |  | # $tol  - maximum tolerance | 
| 50 |  |  |  |  |  |  | # $max  - 0 = shorten as early as possible | 1 = shorten as late as possible | 
| 51 |  |  |  |  |  |  | # $dots - 0 = no dots after shortening | 1 add dots after shortening | 
| 52 |  |  |  |  |  |  | # ---------------------------------------------------------------------- | 
| 53 |  |  |  |  |  |  | sub stripof { | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | # Parameter: | 
| 56 | 35 | 50 |  | 35 | 0 | 92 | my $text = shift or return; | 
| 57 | 35 | 50 |  |  |  | 63 | my $len  = shift or return $text; | 
| 58 | 35 | 50 |  |  |  | 65 | my $tol  = shift or return substr($text, $len); | 
| 59 | 35 | 100 |  |  |  | 61 | my $max  = shift or 0; | 
| 60 | 35 | 50 |  |  |  | 60 | my $dots = shift or 0; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # Possible breakpoints: | 
| 63 |  |  |  |  |  |  | #my @breakpoints = ( ' ', '\t', '.', ',', ';', ':', '!', '-', '?', '/', '|', '(', ')' ); | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | # minimum / maximum length: | 
| 66 | 35 |  |  |  |  | 44 | my $maxLen = $len + $tol; | 
| 67 | 35 |  |  |  |  | 31 | my $minLen = $len; | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | # current length: | 
| 70 | 35 |  |  |  |  | 37 | my $textLen = length( $text ); | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | # if search for latest break: | 
| 73 | 35 | 100 |  |  |  | 81 | if( $max ){ | 
| 74 |  |  |  |  |  |  | # stop, if text is shorter than maximum: | 
| 75 | 20 | 100 |  |  |  | 38 | if( $textLen <= $maxLen ){ | 
| 76 | 7 |  |  |  |  | 22 | return $text; | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # shortest possible text (will be in returned string always): | 
| 81 | 28 |  |  |  |  | 33 | my $minText  = substr( $text, 0, $minLen ); | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | # longest possible text: | 
| 84 | 28 |  |  |  |  | 45 | my $maxText  = substr( $text, 0, $maxLen ); | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | # text between minimum and maximum: | 
| 87 | 28 |  |  |  |  | 37 | my $restText = substr( $text, $minLen, $tol ); | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | # buffer for return-string: | 
| 90 | 28 |  |  |  |  | 30 | my $shortText = ""; | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | # buffer for additional-text: | 
| 93 | 28 |  |  |  |  | 29 | my $addText = ""; | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | # find breakpoint as late as possible: | 
| 96 | 28 | 100 |  |  |  | 44 | if( $max ){ | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | # we're just working on $resttext: | 
| 99 | 13 |  |  |  |  | 14 | $addText = $restText; | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | # previously hardcoded regexp: | 
| 102 |  |  |  |  |  |  | # $addText =~ s/(.*)[ ,\t\.;:!-\?\/\|\(\)].+/$1/gi; | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | # use regexp to find possible breakpoints. regexps are greedy, | 
| 105 |  |  |  |  |  |  | # so they will find the last possible space: | 
| 106 | 13 |  |  |  |  | 28 | my $regexpBreakpoints = join '', @breakpoints; | 
| 107 |  |  |  |  |  |  | # print "X:".$regexpBreakpoints.":X"; | 
| 108 | 13 |  |  |  |  | 137 | $addText =~ s/(.*)[$regexpBreakpoints].*/$1/g; | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | # if no space was found: | 
| 111 | 13 | 50 |  |  |  | 36 | if( $addText eq '' ){ | 
| 112 |  |  |  |  |  |  | # return complete text: | 
| 113 | 0 |  |  |  |  | 0 | $addText = $restText; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  | # return minimum + additional: | 
| 116 | 13 |  |  |  |  | 28 | $shortText = "$minText$addText"; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | # search for first possible break: | 
| 120 |  |  |  |  |  |  | else { | 
| 121 |  |  |  |  |  |  | # emty additional text: | 
| 122 | 15 |  |  |  |  | 17 | $addText = ""; | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | # find first break: | 
| 125 |  |  |  |  |  |  | # test all characters in the $restText | 
| 126 | 15 |  |  |  |  | 37 | for( my $idx = 0; $idx < $tol; $idx ++ ){ | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | # current character: | 
| 129 | 39 |  |  |  |  | 46 | my $char = substr( $restText, $idx, 1 ); | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | # is character a space? | 
| 132 | 39 |  |  |  |  | 40 | my $isSpace = 0; | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # test if character matches on of the | 
| 135 |  |  |  |  |  |  | # space-characters defined in @breakpoints: | 
| 136 | 39 |  |  |  |  | 51 | foreach( @breakpoints ){ | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | # if caharcter matches space: | 
| 139 | 245 | 100 |  |  |  | 417 | if( $char eq $_ ){ | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | # mark as space, skip rest of @breakpoints: | 
| 142 | 15 |  |  |  |  | 16 | $isSpace = 1; | 
| 143 | 15 |  |  |  |  | 16 | last; | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | # if we didn't find a space: | 
| 150 | 39 | 100 |  |  |  | 63 | if( ! $isSpace ){ | 
| 151 |  |  |  |  |  |  | # append the character to the buffer: | 
| 152 | 24 |  |  |  |  | 54 | $addText .= $char; | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | # if we found a space: | 
| 155 |  |  |  |  |  |  | else { | 
| 156 |  |  |  |  |  |  | # stop here: | 
| 157 | 15 |  |  |  |  | 17 | last; | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  | # return text = minimum-text + additional-text: | 
| 162 | 15 |  |  |  |  | 21 | $shortText = "$minText$addText"; | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | # if we want some dots on the shortened text: | 
| 166 | 28 | 50 |  |  |  | 54 | if( $dots ){ | 
| 167 |  |  |  |  |  |  | # only if text is really shorter than the original text: | 
| 168 | 28 | 50 |  |  |  | 58 | if( length($shortText) < length($text) ){ | 
| 169 |  |  |  |  |  |  | # append dots: | 
| 170 | 28 |  |  |  |  | 35 | $shortText .= "..."; | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | # return the shortened text: | 
| 175 | 28 |  |  |  |  | 152 | return $shortText; | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | 1; | 
| 181 |  |  |  |  |  |  | __END__ |