|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package School::Code::Compare::Charset;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: remove whitespace, comments and unessential chars from text  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $School::Code::Compare::Charset::VERSION = '0.104';  | 
| 
4
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
374741
 | 
 use strict;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
124
 | 
    | 
| 
5
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
21
 | 
 use warnings;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
117
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
2139
 | 
 use School::Code::Compare::Charset::NumSignes;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
    | 
| 
8
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1836
 | 
 use School::Code::Compare::Charset::Signes;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
    | 
| 
9
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1746
 | 
 use School::Code::Compare::Charset::NoWhitespace;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
    | 
| 
10
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1875
 | 
 use School::Code::Compare::Charset::NoComments::Hashy;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
123
 | 
    | 
| 
11
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1765
 | 
 use School::Code::Compare::Charset::NoComments::Slashy;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
121
 | 
    | 
| 
12
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1799
 | 
 use School::Code::Compare::Charset::NoComments::XMLy;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1849
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
15
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
  
0
  
 | 
9722
 | 
     my $class = shift;  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     my $self = {  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  language      => 'txt',  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                };  | 
| 
20
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     bless $self, $class;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     return $self;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub set_language {  | 
| 
26
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
  
0
  
 | 
28
 | 
     my $self = shift;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     $self->{language} = shift;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     return $self;  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sort_by_lines {  | 
| 
34
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
0
  
 | 
49
 | 
     my $self = shift;  | 
| 
35
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $lines_ref = shift;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     my @sorted = sort { $a cmp $b } @{$lines_ref};  | 
| 
 
 | 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
406
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     return \@sorted;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub trim_comments {  | 
| 
43
 | 
20
 | 
 
 | 
 
 | 
  
20
  
 | 
  
0
  
 | 
34
 | 
     my $self      = shift;  | 
| 
44
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     my $lines_ref = shift;  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     my $cleaned = '';  | 
| 
47
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     my $lang    = $self->{language};  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
20
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
285
 | 
     if ($lang eq 'python'  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      or $lang eq 'perl'  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      or $lang eq 'bash'  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      or $lang eq 'hashy'  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      ) {  | 
| 
54
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
         $cleaned = School::Code::Compare::Charset::NoComments::Hashy->new()  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        ->filter ( $lines_ref );  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($lang eq 'php'  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'js'  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'cpp'  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'cs'  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'c'  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'java'  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'slashy'  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      ) {  | 
| 
65
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         $cleaned = School::Code::Compare::Charset::NoComments::Slashy->new()  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        ->filter ( $lines_ref );  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($lang eq 'html'  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or $lang eq 'xml'  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
71
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $cleaned = School::Code::Compare::Charset::NoComments::XMLy->new()  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        ->filter ( $lines_ref );  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($lang eq 'txt') {  | 
| 
75
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         $cleaned = $lines_ref; # do nothing  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
     return $cleaned;  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_visibles {  | 
| 
82
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
0
  
 | 
276
 | 
     my $self      = shift;  | 
| 
83
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     my $lines_ref = shift;  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     my $cleaned = $self->trim_comments($lines_ref);  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
     return School::Code::Compare::Charset::NoWhitespace->new()  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                        ->filter($cleaned);  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_numsignes {  | 
| 
92
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
886
 | 
     my $self      = shift;  | 
| 
93
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $lines_ref = shift;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     my $no_comments = $self->trim_comments($lines_ref);  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # order is important:  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # first create numsigns, then remove whitespace.  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # otherwise "words" will loose their meaning.  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $ns = School::Code::Compare::Charset::NumSignes->new()  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                       ->filter($no_comments);  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     return School::Code::Compare::Charset::NoWhitespace->new()  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                        ->filter($ns);  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_signes {  | 
| 
109
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
0
  
 | 
3084
 | 
     my $self      = shift;  | 
| 
110
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $lines_ref = shift;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my $no_comments = $self->trim_comments($lines_ref);  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     my $ns = School::Code::Compare::Charset::Signes->new()  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                    ->filter($no_comments);  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     return School::Code::Compare::Charset::NoWhitespace->new()  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                        ->filter($ns);  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding UTF-8  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 School::Code::Compare::Charset - remove whitespace, comments and unessential chars from text  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version 0.104  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Boris Däppen <bdaeppen.perl@gmail.com>  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This software is copyright (c) 2020 by Boris Däppen.  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is free software; you can redistribute it and/or modify it under  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the same terms as the Perl 5 programming language system itself.  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |