File Coverage

blib/lib/NetSDS/Util/String.pm

Criterion	Covered	Total	%
statement	24	68	35.2
branch	0	28	0.0
condition	0	30	0.0
subroutine	8	17	47.0
pod	9	9	100.0
total	41	152	26.9

line	stmt	bran	cond	sub	pod	time	code
1							#===============================================================================
2							#
3							# FILE: String.pm
4							#
5							# DESCRIPTION: Utilities for easy string processing
6							#
7							# NOTE: This module ported from Wono framework
8							# AUTHOR: Michael Bochkaryov (Rattler), <misha@rattler.kiev.ua>
9							# COMPANY: Net.Style
10							# VERSION: 1.044
11							# CREATED: 03.08.2008 15:04:22 EEST
12							#===============================================================================
13
14							=head1 NAME
15
16							NetSDS::Util::String - string prcessing routines
17
18							=head1 SYNOPSIS
19
20							use NetSDS::Util::String qw();
21
22							# Read from standard input
23							my $string = <STDIN>;
24
25							# Encode string to internal structure
26							$string = string_encode($tring);
27
28
29							=head1 DESCRIPTION
30
31							C<NetSDS::Util::String> module contains functions may be used to quickly solve
32							string processing tasks like parsing, recoding, formatting.
33
34							As in other NetSDS modules standard encoding is UTF-8.
35
36							=cut
37
38							package NetSDS::Util::String;
39
40	2			2		13325	use 5.8.0;
	2					9
	2					115
41	2			2		16	use warnings 'all';
	2					5
	2					86
42	2			2		13	use strict;
	2					13
	2					89
43
44	2			2		13	use base 'Exporter';
	2					4
	2					287
45
46	2			2		14	use version; our $VERSION = '1.044';
	2					5
	2					21
47
48							our @EXPORT = qw(
49							str_encode
50							str_decode
51							str_recode
52							str_trim
53							str_trim_left
54							str_trim_right
55							str_clean
56							str_camelize
57							str_decamelize
58							);
59
60	2			2		219	use POSIX;
	2					4
	2					19
61	2					258	use Encode qw(
62							encode
63							decode
64							encode_utf8
65							decode_utf8
66							from_to
67							is_utf8
68	2			2		8336	);
	2					12376
69
70							my $BLANK = "[:blank:][:space:][:cntrl:]";
71
72	2			2		14	use constant DEFAULT_ENCODING => 'UTF-8';
	2					6
	2					1764
73
74							#***********************************************************************
75							#
76							# ENCODING/DECODING/RECODING FUNCTIONS
77							#
78							#***********************************************************************
79
80							=head1 EXPORTED FUNCTIONS
81
82							=over
83
84							=item B<str_encode($str[, $encoding])> - encode string to internal UTF-8
85
86							By default this function treat first argument as byte string in UTF-8
87							and return it's internal Unicode representation.
88
89							In case of external character set isn't UTF-8 it should be added as second
90							argument of function.
91
92
93							# Convert UTF-8 byte string to internal Unicode representation
94							$uni_string = str_encode($byte_string);
95
96							# Convert KOI8-U byte string to internal
97							$uni_string = str_encode($koi8_string, 'KOI8-U');
98
99							After C<str_encode()> it's possible to process this string correctly
100							including regular expressions. All characters will be understood
101							as UTF-8 symbols instead of byte sequences.
102
103							=cut
104
105							#-----------------------------------------------------------------------
106							sub str_encode {
107	0			0	1		my ( $txt, $enc ) = @_;
108
109	0	0	0				if ( defined($txt) and ( $txt ne '' ) ) {
110	0	0					unless ( is_utf8($txt) ) {
111	0		0				$txt = decode( $enc \|\| DEFAULT_ENCODING, $txt );
112							}
113							}
114
115	0						return $txt;
116							}
117
118							#***********************************************************************
119
120							=item B<str_decode($str[, $encoding])> - decode internal UTF-8 to byte string
121
122							By default this function treat first argument as string in internal UTF-8
123							and return it in byte string (external) representation.
124
125							In case of external character set isn't UTF-8 it should be added as second
126							argument of function.
127
128
129							# Get UTF-8 byte string from internal Unicode representation
130							$byte_string = str_decode($uni_string);
131
132							# Convert to KOI8-U byte string from internal Unicode
133							$koi8_string = str_encode($uni_string, 'KOI8-U');
134
135							It's recommended to use C<str_encode()> when preparing data for
136							communication with external systems (especially networking).
137
138							=cut
139
140							#-----------------------------------------------------------------------
141							sub str_decode {
142	0			0	1		my ( $txt, $enc ) = @_;
143
144	0	0	0				if ( defined($txt) and ( $txt ne '' ) ) {
145	0	0					if ( is_utf8($txt) ) {
146	0		0				$txt = encode( $enc \|\| DEFAULT_ENCODING, $txt );
147							}
148							}
149
150	0						return $txt;
151							}
152
153							#***********************************************************************
154
155							=item B<str_recode($str, $FROM_ENC[, $TO_ENC])> - recode string
156
157							Translate string between different encodings.
158							If target encoding is not set UTF-8 used as default one.
159
160							=cut
161
162							#-----------------------------------------------------------------------
163							sub str_recode {
164	0			0	1		my ( $txt, $enc, $trg ) = @_;
165
166	0	0	0				if ( defined($txt) and ( $txt ne '' ) ) {
167	0	0					if ($enc) {
168	0		0				my $len = from_to( $txt, $enc, $trg \|\| DEFAULT_ENCODING );
169	0	0					unless ( defined($len) ) {
170	0						$txt = undef;
171							}
172							}
173							}
174
175	0						return $txt;
176							}
177
178							#***********************************************************************
179							#
180							# CLEANING STRINGS
181							#
182							#***********************************************************************
183
184							=item B<str_trim($str)> - remove leading/trailing space characters
185
186							$orig_str = " string with spaces ";
187							$new_str = str_trim($orig_str);
188
189							# Output: "string with spaces"
190							print $new_str;
191
192							=cut
193
194							#-----------------------------------------------------------------------
195							sub str_trim {
196	0			0	1		my ($s) = @_;
197
198	0	0	0				if ( defined($s) and ( $s ne '' ) ) {
199	0						$s =~ s/^[$BLANK]+//s;
200	0						$s =~ s/[$BLANK]+$//s;
201							}
202
203	0						return $s;
204							}
205
206							#***********************************************************************
207
208							=item B<str_trim_left($str)> - removes leading whitespaces
209
210							This function is similar to C<str_trim()> except of it removes only
211							leading space characters and leave trailing ones.
212
213							=cut
214
215							#-----------------------------------------------------------------------
216							sub str_trim_left {
217	0	0		0	1		my ($s) = @_ ? @_ : $_;
218
219	0	0	0				if ( defined($s) and ( $s ne '' ) ) {
220	0						$s =~ s/^[$BLANK]+//s;
221							}
222
223	0						return $s;
224							}
225
226							#***********************************************************************
227
228							=item B<str_trim_right($str)> - removes trailing whitespaces
229
230							This function is similar to C<str_trim()> except of it removes only
231							trailing space characters and leave leading ones.
232
233							=cut
234
235							#-----------------------------------------------------------------------
236							sub str_trim_right {
237	0	0		0	1		my ($s) = @_ ? @_ : $_;
238
239	0	0	0				if ( defined($s) and ( $s ne '' ) ) {
240	0						$s =~ s/[$BLANK]+$//s;
241							}
242
243	0						return $s;
244							}
245
246							#***********************************************************************
247
248							=item B<str_clean($str)> - clean string from extra spaces
249
250							Function is similar to C<str_trim()> but also changes all spacing chains
251							inside string to single spaces.
252
253							=cut
254
255							#-----------------------------------------------------------------------
256							sub str_clean {
257
258	0			0	1		my ($txt) = @_;
259
260	0	0	0				if ( defined($txt) and ( $txt ne '' ) ) {
261	0						$txt =~ s/^[$BLANK]+//s;
262	0						$txt =~ s/[$BLANK]+$//s;
263	0						$txt =~ s/[$BLANK]+/ /gs;
264							}
265
266	0						return $txt;
267							}
268
269							#**************************************************************************
270
271							=item B<str_camelize($strin)>
272
273							If pass undef - return undef.
274							If pass '' - return ''.
275
276							Examples:
277
278							# returns 'getValue'
279							str_camelize( 'get_value' )
280
281							# returns 'addUserAction'
282							str_camelize( 'ADD_User_actION' )
283
284							=cut
285
286							#-----------------------------------------------------------------------
287							sub str_camelize {
288
289	0			0	1		my $s = shift;
290
291	0	0	0				if ( defined($s) and ( $s ne '' ) ) {
292	0						$s = lc($s);
293	0						$s =~ s/_([0-9a-z])/\U$1/g;
294							}
295
296	0						return $s;
297							}
298
299							#**************************************************************************
300
301							=item B<str_decamelize(...)>
302
303							If pass undef - return undef.
304							If pass '' - return ''.
305
306							Examples:
307
308							# returns 'get_value'
309							str_decamelize( 'getValue' )
310
311							=cut
312
313							#-----------------------------------------------------------------------
314							sub str_decamelize {
315
316	0			0	1		my $s = shift;
317
318	0						$s =~ s/([A-Z])/_\L$1/g;
319
320	0						return lc($s);
321							}
322
323							1;
324							__END__
325
326							=back
327
328							=head1 EXAMPLES
329
330							None yet
331
332							=head1 BUGS
333
334							Unknown yet
335
336							=head1 TODO
337
338							Implement examples and tests.
339
340							=head1 SEE ALSO
341
342							L<Encode>, L<perlunicode>
343
344							=head1 AUTHORS
345
346							Valentyn Solomko <pere@pere.org.ua>
347
348							Michael Bochkaryov <misha@rattler.kiev.ua>
349
350							=cut