|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Data::Random::Structure::UTF8;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
317351
 | 
 use 5.8.0;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    | 
| 
4
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
24
 | 
 use strict;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
    | 
| 
5
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
31
 | 
 use warnings;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
216
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION='0.06';  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
2004
 | 
 use parent 'Data::Random::Structure';  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1213
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
70460
 | 
 use Scalar::Util qw( looks_like_number );  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5201
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	new {  | 
| 
14
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
4504
 | 
 	my $class = shift;  | 
| 
15
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 	my %options = @_;  | 
| 
16
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	my $only_unicode = 0;  | 
| 
17
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	if( exists $options{'only-unicode'} ){  | 
| 
18
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		if( defined $options{'only-unicode'} ){  | 
| 
19
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 			$only_unicode = $options{'only-unicode'}  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# do not pass our options to parent it may get confused and croak  | 
| 
22
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		delete $options{'only-unicode'}  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
24
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	my $self = $class->SUPER::new(%options);  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# at this point our _init() will be called via parent's  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# constructor. Our _init() will call parent's _init()  | 
| 
27
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	$self->only_unicode($only_unicode);  | 
| 
28
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	return $self  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	_reset {  | 
| 
31
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
 
 | 
22
 | 
 	my $self = shift;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# we are interfering with the internals of the parent... not good  | 
| 
33
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	$#{$self->{_types}} = -1;  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
    | 
| 
34
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	$#{$self->{_scalar_types}} = -1;  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	_init {  | 
| 
37
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
 
 | 
112
 | 
 	my $self = shift;  | 
| 
38
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	$self->_reset();  | 
| 
39
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 	$self->SUPER::_init(@_);  | 
| 
40
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
 	push @{$self->{_scalar_types}}, 'string-UTF8'  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	only_unicode {  | 
| 
43
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
  
1
  
 | 
3586
 | 
 	my $self = $_[0];  | 
| 
44
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	my $m = $_[1];  | 
| 
45
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
79
 | 
 	return $self->{'_only-unicode'} unless defined $m;  | 
| 
46
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	$self->_init();  | 
| 
47
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	$self->{'_only-unicode'} = $m;  | 
| 
48
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 	if( $m == 1 ){  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# delete just the 'string' type  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we will get various types but the strings will  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# be exclusively unicode  | 
| 
52
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 		my @idx = grep { $self->{'_scalar_types'}->[$_] eq 'string' }  | 
| 
53
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 			reverse 0 .. $#{$self->{_scalar_types}}  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		;  | 
| 
55
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		splice(@{$self->{_scalar_types}}, $_, 1) for @idx;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $m > 1 ){  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# delete ALL the _scalar_types and leave just our unicode string  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we will get only unicode strings no other scalar type  | 
| 
59
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$#{$self->{_scalar_types}} = -1;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
60
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		push @{$self->{_scalar_types}}, 'string-UTF8'  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
62
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	return $m  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	random_char_UTF8 {  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# the crucial part borrowed from The Effective Perler:  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# https://www.effectiveperlprogramming.com/2018/08/find-the-new-emojis-in-perls-unicode-support/  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	my $achar;  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	for(my $trials=100;$trials-->0;){  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		$achar = chr(int(rand(0x10FFF+1)));  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		return $achar if $achar =~ /\p{Present_In: 8.0}/;  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	}  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# just greek and coptic no holes  | 
| 
74
 | 
17822
 | 
 
 | 
 
 | 
  
17822
  
 | 
  
1
  
 | 
50354
 | 
 	return chr(0x03B0+int(rand(0x03F0-0x03B0)));  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $arand = rand();  | 
| 
77
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if( $arand < 0.2 ){  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return chr(0x03B0+int(rand(0x03F0-0x03B0)))  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $arand < 0.4 ){  | 
| 
80
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return chr(0x0400+int(rand(0x040F-0x0400)))  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $arand < 0.6 ){  | 
| 
82
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return chr(0x13A0+int(rand(0x13EF-0x13A0)))  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $arand < 0.8 ){  | 
| 
84
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return chr(0x1200+int(rand(0x137F-0x1200)))  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
86
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return chr(0xA980+int(rand(0xA9DF-0xA980)))  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	random_chars_UTF8 {  | 
| 
89
 | 
8911
 | 
 
 | 
 
 | 
  
8911
  
 | 
  
1
  
 | 
22519
 | 
 	my %options = @_;  | 
| 
90
 | 
8911
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19554
 | 
 	my $minl = defined($options{'min'}) ? $options{'min'} : 6;  | 
| 
91
 | 
8911
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16525
 | 
 	my $maxl = defined($options{'max'}) ? $options{'max'} : 32;  | 
| 
92
 | 
8911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14077
 | 
 	my $ret = "";  | 
| 
93
 | 
8911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27067
 | 
 	for(1..($minl+int(rand($maxl-$minl)))){  | 
| 
94
 | 
17822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33027
 | 
 		$ret .= random_char_UTF8()  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
96
 | 
8911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36190
 | 
 	return $ret;  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }		  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # override's parent's.  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # first call parent's namesake and if it fails because it  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # is decided to generate UTF8 something, it will default to  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # this method which must deal with all the extenstions we introduced  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # in our own _init()  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # CAVEAT: it relies on parent croaking the message  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   "I don't know how to generate $type\n"  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if that chanegs (in parent) then we will no longer be able to deduce  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $type and have to change this program.  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if that happens please file a bug.  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # unfortunately our parent class does not allow for input params...  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	generate_scalar {  | 
| 
110
 | 
19431
 | 
 
 | 
 
 | 
  
19431
  
 | 
  
1
  
 | 
98157
 | 
 	my $self = shift;  | 
| 
111
 | 
19431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30802
 | 
 	my $rc = eval { $self->SUPER::generate_scalar(@_) };  | 
| 
 
 | 
19431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41814
 | 
    | 
| 
112
 | 
19431
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
1839856
 | 
 	if( $@ || ! defined($rc) ){  | 
| 
113
 | 
8911
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
54310
 | 
 		if( $@ !~ /how to generate (.+?)\R/ ){  | 
| 
114
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			warn "something changed in parent class and can not parse this message any more, please file a bug: '$@'";  | 
| 
115
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return scalar(random_chars_UTF8(min=>2,max=>2));  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
117
 | 
8911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20691
 | 
 		my $type = $1;  | 
| 
118
 | 
8911
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19050
 | 
 		if( $type eq 'string-UTF8' ){  | 
| 
119
 | 
8911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19704
 | 
 			return scalar(random_chars_UTF8(min=>2,max=>2));  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} else {  | 
| 
121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			warn "child: I don't know how to generate $type, this is a bug, please file a bug and mention this: $@\n";  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# but don't die  | 
| 
123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return scalar(random_chars_UTF8(min=>2,max=>2));  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
126
 | 
10520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32949
 | 
 	return $rc  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	check_content_recursively {  | 
| 
129
 | 
1958
 | 
 
 | 
 
 | 
  
1958
  
 | 
  
1
  
 | 
21561
 | 
 	my $looking_for = $_[1]; # a hashref of types to look-for, required  | 
| 
130
 | 
1958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2902
 | 
 	my $bitparams = 0;  | 
| 
131
 | 
1958
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
7053
 | 
 	$bitparams |= 1 if exists($looking_for->{'numbers'}) && ($looking_for->{'numbers'}==1);  | 
| 
132
 | 
1958
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
5196
 | 
 	$bitparams |= 2 if exists($looking_for->{'strings-unicode'}) && ($looking_for->{'strings-unicode'}==1);  | 
| 
133
 | 
1958
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4896
 | 
 	$bitparams |= 4 if exists($looking_for->{'strings-plain'}) && ($looking_for->{'strings-plain'}==1);  | 
| 
134
 | 
1958
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4592
 | 
 	$bitparams |= (2+4) if exists($looking_for->{'strings'}) && ($looking_for->{'strings'}==1);  | 
| 
135
 | 
1958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3657
 | 
 	return _check_content_recursively($_[0], $bitparams);  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns 1 if we are looking for it and it was found  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns 0 if what we were looking for was not found.  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 'looking_for' can be more than one things.  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # it is a bit string, 1st bit if set looks for numbers,  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 2nd bit, if set, looks for unicode strings,  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 3rd bit, if set, looks for non-unicode strings (plain)  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if you set 'numbers'=>0, it simply means "do not check for numbers"  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # and so it will not check if it has any numbers  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # by giving nothing to check, it return 0, nothing was found  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub	_check_content_recursively {  | 
| 
147
 | 
22565
 | 
 
 | 
 
 | 
  
22565
  
 | 
 
 | 
32824
 | 
 	my $inp = $_[0];  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# NUMBER,UNICODE_STRING,NON_UNICODE_STRING  | 
| 
149
 | 
22565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29841
 | 
 	my $looking_for = $_[1];  | 
| 
150
 | 
22565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32712
 | 
 	my $aref = ref($inp);  | 
| 
151
 | 
22565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31924
 | 
 	my ($r, $v);  | 
| 
152
 | 
22565
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
50944
 | 
 	if( ($aref eq '') || ($aref eq 'SCALAR') ){  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
20099
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
34748
 | 
 		if( $aref eq 'SCALAR' ){ $inp = $$inp }  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
154
 | 
20099
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
42952
 | 
 		if( looks_like_number($inp) ){  | 
| 
155
 | 
5381
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9761
 | 
 			return 1 if $looking_for & 1; # a number  | 
| 
156
 | 
4979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8481
 | 
 			return 0;  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
158
 | 
14718
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22733
 | 
 		if( _has_utf8($inp) ){  | 
| 
159
 | 
13939
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26440
 | 
 			return 1 if $looking_for & 2; # unicode string  | 
| 
160
 | 
13337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25295
 | 
 			return 0;  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
162
 | 
779
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1989
 | 
 		return 1 if $looking_for & 4; # plain string  | 
| 
163
 | 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
985
 | 
 		return 0;  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $aref eq 'ARRAY' ){  | 
| 
165
 | 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2322
 | 
 		for my $v (@$inp){  | 
| 
166
 | 
7915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12574
 | 
 			$r = _check_content_recursively($v, $looking_for);  | 
| 
167
 | 
7915
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16493
 | 
 			return 1 if $r;  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif( $aref eq 'HASH' ){  | 
| 
170
 | 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6795
 | 
 		for my $k (sort keys %$inp){  | 
| 
171
 | 
6535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10936
 | 
 			$r = _check_content_recursively($k, $looking_for);  | 
| 
172
 | 
6535
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12365
 | 
 			return 1 if $r;  | 
| 
173
 | 
6157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10477
 | 
 			$r = _check_content_recursively($inp->{$k}, $looking_for);  | 
| 
174
 | 
6157
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13884
 | 
 			return 1 if $r;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
176
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	} else { die "don't know how to deal with this ref '$aref'" }  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
178
 | 
14718
 | 
 
 | 
 
 | 
  
14718
  
 | 
 
 | 
43065
 | 
 sub	_has_utf8 { return $_[0] =~ /[^\x00-\x7f]/ }  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # this does not work for unicode strings  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # from https://www.perlmonks.org/?node_id=958679  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # and https://www.perlmonks.org/?node_id=791677  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #sub isnum ($) {  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    return 0 if $_[0] eq '';  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $_[0] & ~$_[0] ? 0 : 1  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #}  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding utf8  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data::Random::Structure::UTF8 - Produce nested data structures with unicode keys, values, elements.  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Version 0.06  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module produces random, arbitrarily deep and long,  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nested Perl data structures  with unicode content for the  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 keys, values and/or array elements. Content can be forced  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to be exclusively strings and exclusively unicode. Or  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the strings can be unicode. Or anything goes, mixed  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 unicode and non-unicode strings as well as integers, floats, etc.  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is an object-oriented module  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which inherits from  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L and extends its functionality by  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 providing for unicode keys and values for hashtables and  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 unicode content for array elements or scalars, randomly mixed with the  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 usual repertoire of L, which is  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 non-unicode strings,  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 numerical, boolean values and the assorted entourage to the court  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of Emperor Computer, post-Turing.  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For example, it produces these:  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * unicode scalars: e.g. C<"αβγ">,  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * mixed arrays: e.g. C<["αβγ", "123", "xyz"]>  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * hashtables with some/all keys and/or values as unicode: e.g.  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<{"αβγ" => "123", "xyz" => "αβγ"}>  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * exclusive unicode arrays or hashtables: e.g. C<["αβγ", "χψζ"]>  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is accomplised by adding an extra  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 type C (invisible to the user) and the  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 respective generator method. All these are invisible to the user  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which will get the old functionality plus some (or maybe none  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 because this is a random process which does not eliminate non-unicode  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 strings, at the moment) unicode strings.  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Data::Random::Structure::UTF8;  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $randomiser = Data::Random::Structure::UTF8->new(  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         'max_depth' => 5,  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         'max_elements' => 20,  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # all the strings produced (keys, values, elements)  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# will be unicode strings  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'only-unicode' => 1,  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # all the strings produced (keys, values, elements)  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# will be a mixture of unicode and non-unicode  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# this is the default behaviour  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#'only-unicode' => 0,  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # only unicode strings will be produced for (keys, values, elements),  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# there will be no numbers, no bool, only unicode strings  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#'only-unicode' => 2,  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $perl_var = $randomiser->generate() or die;  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print pp($perl_var);  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # which prints the usual escape mess of Dump and Dumper  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 [  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "\x{7D5A}\x{4EC1}",  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "\x{E6E2}\x{75A4}",  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   329076,  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   0.255759160148987,  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   [  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "TEb97qJt",  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     1,  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "_ow|J\@~=6%*N;52?W3Y\$S1",  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "x{75A4}x{75A4}" => 123,  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "123" => "\x{7D5A}\x{4EC1}",  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "xyz" => [1, 2, "\x{7D5A}\x{4EC1}"],  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     },  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ],  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # can control the scalar type (for keys, values, items) on the fly  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this produces unicode strings in addition to  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Data::Random::Structure's usual repertoire:  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # non-unicode-string, numbers, bool, integer, float, etc.  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # (see there for the list)  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $randomiser->only_unicode(0); # the default: anything plus unicode strings  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $randomiser->only_unicode();  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this produces unicode strings in addition to  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Data::Random::Structure's usual repertoire:  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # numbers, bool, integer, float, etc.  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # (see there for the list)  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # EXCEPT non-unicode-strings, (all strings will be unicode)  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $randomiser->only_unicode(1);  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $randomiser->only_unicode();  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this produces unicode strings ONLY  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Data::Random::Structure's usual repertoire does not apply  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # there will be no numbers, no bool, no integer, no float, no nothing  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $randomiser->only_unicode(2);  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $randomiser->only_unicode();  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is an object oriented module which has exactly the same API as  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Constructor. In addition to L C<>  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 API, it takes parameter C<< 'only-unicode' >> with  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a valid value of 0, 1 or 2. Default is 0.  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 0 : keys, values, elements of the produced data structure will be  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a mixture of unicode strings, plus L's full  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 repertoire which includes non-unicode strings, integers, floats etc.  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 1 : keys, values, elements of the produced data structure will be  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a mixture of unicode strings, plus L's full  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 repertoire except non-unicode strings. That is, all strings will be  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 unicode. But there will possibly be integers etc.  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 2 : keys, values, elements of the produced data structure will be  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 only unicode strings. Nothing of L's  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 repertoire applies. Only unicode strings, no integers, no nothing.  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Controlling the scalar data types can also be done on the fly, after  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the object has been created using  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L C<>  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method.  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Additionally, L C<>'s API reports that  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the constructor takes 2 optional arguments, C and C.  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L C<> for up-to-date, official information.  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Controls what scalar types to be included in the nested  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data structures generated. With no parameters it returns back  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the current setting. Otherwise, valid input parameters and their  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 meanings are listed in L C<>  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate a nested data structure according to the specification  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 set in the constructor. See L C<> for  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all options. This method is not overriden by this module.  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It returns the Perl data structure as a reference.  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate a scalar which may contain unicode content.  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L for  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all options. This method is overriden by this module but  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calls the parent's too.  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It returns a Perl string.  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate an array with random, possibly unicode, content.  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L for  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all options. This method is not overriden by this module.  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It returns the Perl array as a reference.  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate an array with random, possibly unicode, content.  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L for  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all options. This method is not overriden by this module.  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It returns the Perl array as a reference.  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return a random unicode character, guaranteed to be valid.  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is a very simple method which selects characters  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from some pre-set code pages (Greek, Cyrillic, Cherokee,  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Ethiopic, Javanese) with equal probability.  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These pages and ranges were selected so that there are  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no "holes" between them which would produce an invalid  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character. Therefore, not all characters from the  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 particular code page will be produced.  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns a random unicode character guaranteed to be valid.  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ret = random_chars_UTF8($optional_paramshash)  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Arguments:  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<$optional_paramshash> : can contain  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'min'> sets the minimum length of the random sequence to be returned, default is 6  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'max'> sets the maximum length of the random sequence to be returned, default is 32  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return a random unicode-only string optionally specifying  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 minimum and maximum length. See  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L C<>  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for the range of characters it returns. The returned string  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is unicode and is guaranteed all its characters are valid.  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SUBROUTINES  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ret = check_content_recursively($perl_var, $paramshashref)  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Arguments:  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<$perl_var> : a Perl variable containing an arbitrarily nested data structure  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<$paramshashref> : can contain one or more of the following keys:  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'numbers'> set it to 1 to look for numbers (possibly among other things).  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If set to 1 and a number C<123> or C<"123"> is found, this sub returns 1.  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Set it to 0 to not look for numbers at all (and not report if  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 there are no numbers) - I, that's what  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 setting this to zero means.  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'strings-unicode'> set it to 1 to look for unicode strings (possibly among other things).  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The definition of "unicode string" is that at least one its characters is unicode.  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If set to 1 and a "unicode string" is found, this sub returns 1.  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'strings-plain'> set it to 1 to look for plain strings (possibly among other things).  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The definition of "plain string" is that none of its characters is unicode.  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If set to 1 and a "plain string" is found, this sub returns 1.  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * C<'strings'> set it to 1 to look for plain or unicode strings (possibly among other things).  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If set to 1 and a "plain string" or "unicode string" is found, this sub returns 1. Basically,  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it returns 1 when a string is found (as opposed to a "number").  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In general, by setting C<<'strings-unicode'=>1>> you are checking whether  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the input Perl variable contains a unicode string in a key, a value,  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an array element, or a scalar reference.  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 But, setting C<<'strings-unicode'=>0>>, it simply means do not look for  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this. It does not mean I.  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return value: 1 or 0 depending whether what  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 was looking for, was found.  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not an object-oriented method. It is called thously:  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # check if ANY scalar (hash key, value, array element or scalar ref)  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # contains ONLY single number (integer, float)  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the decicion is made by Scalar::Util:looks_like_number()  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( Data::Random::Structure::UTF8::check_content_recursively(  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{'abc'=>123, 'xyz'=>[1,2,3]},  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# look for numbers, are there any?  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'numbers' => 1,  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) ){ print "data structure contains numbers\n" }  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # check if it contains no numbers but it does unicode strings  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( Data::Random::Structure::UTF8::check_content_recursively(  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{'abc'=>123, 'xyz'=>[1,2,3]},  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# don't look for numbers  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'numbers' => 0,  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# look for unicode strings, are there any?  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'strings-unicode' => 1,  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) ){ print "data structure contains numbers\n" }  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 CAVEAT: as its name suggests, this is a recursive function. Beware  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of extremely deep data structures. Deep, not long. If you do get  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<<"Deep recursion..." warnings>>, and you do insist to go ahead,  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this will remove the warnings (but are you sure?):  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         no warnings 'recursion';  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if( Data::Random::Structure::UTF8::check_content_recursively(  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {'abc'=>123, 'xyz'=>[1,2,3]},  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'numbers' => 1,  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) ){ print "data structure contains numbers\n" }  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * The parent class L.  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * L for stringifying possibly-unicode Perl data structures.  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Andreas Hadjiprocopis, C<<  >>  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 BUGS  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please report any bugs or feature requests to C, or through  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the web interface at L.  I will be notified, and then you'll  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 automatically be notified of progress on your bug as I make changes.  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CAVEATS  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There are two issues users should know about.  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The first issue is that the unicode produced can make  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L to complain with  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Operation "lc" returns its argument for UTF-16 surrogate U+DA4B at /usr/local/share/perl5/Data/Dump.pm line 302.  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This, I have found, can be fixed with the following workaround (from L, thank you):  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Suppress `Operation "lc" returns its argument for UTF-16 surrogate 0xNNNN` warning  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # for the `lc()` call below; use 'utf8' instead of a more appropriate 'surrogate' pragma  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # since the latter is not available in until Perl 5.14  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     no warnings 'utf8';  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The second issue is that this class inherits from L  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and relies on it complaining about not being able to handle certain types  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which are our own extensions (the C extension). We have  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no way to know that except from catching its C'ing and parsing it  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with the following code  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $rc = eval { $self->SUPER::generate_scalar(@_) };  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    if( $@ || ! defined($rc) ){  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # parent doesn't know what to do, can we handle this?  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      if( $@ !~ /how to generate (.+?)\R/ ){ ...  ... }  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      else { print "type is $1" }  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      ...  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in order to extract the C which can not be handled  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and handle it ourselves. So whenever the parent class (L)  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 changes its C song, we will have to adopt this code  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 accordingly (in L C<>).  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For the moment, I have placed a catch-all, fall-back condition  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to handle this but it will be called for all kind of types  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and not only the types we have added.  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 So, this issue is not going to make the module die but may make it  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to skew the random results in favour of unicode strings (which  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is the fallback, default action when can't parse the type).  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SUPPORT  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can find documentation for this module with the perldoc command.  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perldoc Data::Random::Structure::UTF8  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can also look for information at:  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * RT: CPAN's request tracker (report bugs here)  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * AnnoCPAN: Annotated CPAN documentation  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * CPAN Ratings  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * Search CPAN  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * L   | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 ACKNOWLEDGEMENTS  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mark Allen who created L which is our parent class.  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DEDICATIONS AND HUGS  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 !Almaz!  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE AND COPYRIGHT  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This software is Copyright (c) 2020 by Andreas Hadjiprocopis.  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is free software, licensed under:  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   The Artistic License 2.0 (GPL Compatible)  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |