File Coverage

blib/lib/App/CPRReporter.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1 1     1   21603 use strict;
  1         2  
  1         41  
2 1     1   6 use warnings;
  1         1  
  1         57  
3              
4             package App::CPRReporter 0.02;
5             $App::CPRReporter::VERSION = '0.03';
6 1     1   520 use Moose;
  0            
  0            
7             use namespace::autoclean;
8             use 5.012;
9             use autodie;
10              
11             use Carp qw/croak carp/;
12             use Text::ResusciAnneparser;
13             use Spreadsheet::XLSX;
14             use Text::Iconv;
15             use Data::Dumper;
16             use Text::Fuzzy::PP;
17              
18             has employees => (
19             is => 'ro',
20             isa => 'Str',
21             required => 1,
22             );
23              
24             has certificates => (
25             is => 'ro',
26             isa => 'Str',
27             required => 1,
28             );
29              
30             has course => (
31             is => 'ro',
32             isa => 'Str',
33             required => 1,
34             );
35              
36             # Actions that need to be run after the constructor
37             sub BUILD {
38             my $self = shift;
39              
40             # Add stuff here
41              
42             my $certparser =
43             Text::ResusciAnneparser->new( infile => $self->{certificates} );
44             $self->{_certificates} = $certparser->certified();
45             $self->{_training} = $certparser->in_training();
46              
47             $self->_parse_employees;
48              
49             # Make an array of employees that will be used for fuzzy matching
50             foreach my $employee ( keys %{$self->{_employees}} ) {
51             push( @{ $self->{_employee_array} }, $employee );
52             }
53              
54             #print Dumper($self->{_employee_array});
55              
56             # Only parse the course info after the array is created, the array is used in matching
57             $self->_parse_course;
58              
59             }
60              
61             # Run the application, merging the info of the certificates and the employees
62             sub run {
63             my $self = shift;
64              
65             # Certificates are here
66             my $certificate_count = 0;
67             my $certs = $self->{_certificates};
68             foreach my $date ( sort keys %{$certs} ) {
69             foreach my $certuser ( @{ $certs->{$date} } ) {
70             my $fullname = $self->_resolve_name( $certuser->{familyname},
71             $certuser->{givenname} );
72              
73             #say "Certificate found for $fullname";
74             $certificate_count++;
75              
76             # TODO Check if certificate date is already filled in and of is it keep the most recent one.
77             # Might not be required because we sort the date keys.
78             if ( defined $self->{_employees}->{$fullname} ) {
79              
80             # Fill in certificate
81             $self->{_employees}->{$fullname}->{cert} = $date;
82             } else {
83              
84             # Oops: user not found in personel database
85             #carp "Warning: employee '$fullname' not found in employee database"
86             if ( ref($fullname) ) {
87             carp "Fullname is reference, this should not be the case!";
88             }
89             push( @{ $self->{_not_in_hr}->{cert} }, $fullname );
90              
91             }
92             }
93             }
94              
95             say "$certificate_count certificates found";
96              
97             my $training_count = 0;
98             my $training = $self->{_training};
99             foreach my $traininguser ( @{$training} ) {
100             my $fullname = $self->_resolve_name( $traininguser->{familyname},
101             $traininguser->{givenname} );
102              
103             #say "Training found for $fullname";
104             # TODO deduplicate this code with a local function, see above
105             if ( defined $self->{_employees}->{$fullname} ) {
106              
107             # Fill in training if there is no certificate yet, otherwise notify!
108             if ( !defined $self->{_employees}->{$fullname}->{cert} ) {
109             $self->{_employees}->{$fullname}->{cert} = 'training';
110             $training_count++;
111             } else {
112              
113             #carp "Warning: employee '$fullname' is both in training and has a certificate from $self->{_employees}->{$fullname}->{cert}";
114             }
115             } else {
116              
117             # Oops: user not found in personel database
118             #carp "Warning: employee '$fullname' not found in employee database";
119             push( @{ $self->{_not_in_hr}->{training} }, $fullname );
120             $training_count++;
121             }
122             }
123              
124             say "$training_count people are in training";
125              
126             # Check people who are in training and that have a certificate
127             # now run the stats, for every dienst separately report
128             my $stats;
129             foreach my $employee ( keys %{$self->{_employees}} ) {
130             my $dienst = $self->{_employees}->{$employee}->{dienst};
131             my $cert = $self->{_employees}->{$employee}->{cert} || 'none';
132             my $course = $self->{_employees}->{$employee}->{course} || 'none';
133              
134             $stats->{employee_count} += 1;
135              
136             if ( $cert eq 'none' ) {
137             $stats->{$dienst}->{'not_started'}->{count} += 1;
138             push( @{ $stats->{$dienst}->{'not_started'}->{list} }, $employee );
139             } elsif ( $cert eq 'training' ) {
140             $stats->{$dienst}->{'training'}->{count} += 1;
141             push( @{ $stats->{$dienst}->{'training'}->{list} }, $employee );
142             } else {
143             $stats->{$dienst}->{'certified'}->{count} += 1;
144             push( @{ $stats->{$dienst}->{'certified'}->{list} }, $employee );
145             }
146              
147             if ( !( $course eq 'none' ) ) {
148             $stats->{$dienst}->{'course'}->{count} += 1;
149              
150             }
151             }
152              
153             #print Dumper($stats);
154              
155             # Display the results
156             say "Dienst;Certificaat;Training;Niet gestart;Theorie";
157              
158             foreach my $dienst ( sort keys %{$stats} ) {
159             next if ( $dienst eq 'employee_count' );
160              
161             if ( !defined $stats->{$dienst}->{certified}->{count} ) {
162             $stats->{$dienst}->{certified}->{count} = 0;
163             }
164             if ( !defined $stats->{$dienst}->{training}->{count} ) {
165             $stats->{$dienst}->{training}->{count} = 0;
166             }
167             if ( !defined $stats->{$dienst}->{not_started}->{count} ) {
168             $stats->{$dienst}->{not_started}->{count} = 0;
169             }
170             if ( !defined $stats->{$dienst}->{course}->{count} ) {
171             $stats->{$dienst}->{course}->{count} = 0;
172             }
173             say "$dienst;"
174             . $stats->{$dienst}->{certified}->{count} . ";"
175             . $stats->{$dienst}->{training}->{count} . ";"
176             . $stats->{$dienst}->{not_started}->{count} . ";"
177             . $stats->{$dienst}->{course}->{count};
178              
179             }
180              
181             if ( defined $self->{_not_in_hr}->{cert} ) {
182             say "";
183             say "Not found in the HR database while parsing certificates: "
184             . scalar( @{ $self->{_not_in_hr}->{cert} } );
185             foreach ( @{ $self->{_not_in_hr}->{cert} } ) {
186             say;
187             }
188             }
189              
190             if ( defined $self->{_not_in_hr}->{training} ) {
191             say "Not found in the HR database while parsing in training: "
192             . scalar( @{ $self->{_not_in_hr}->{training} } );
193             foreach ( @{ $self->{_not_in_hr}->{training} } ) {
194             say;
195             }
196             }
197              
198             if ( defined $self->{_not_in_hr}->{theory} ) {
199             say "Not found in the HR database while parsing theory: "
200             . scalar( @{ $self->{_not_in_hr}->{theory} } );
201             foreach ( @{ $self->{_not_in_hr}->{theory} } ) {
202             say;
203             }
204             }
205              
206             #say "";
207             #say "Resolved names";
208             #print Dumper($self->{_resolve});
209             }
210              
211             # Parse the employee database to extract the names and the group they are in
212             sub _parse_employees {
213              
214             my $self = shift;
215              
216             #my $converter = Text::Iconv -> new ("utf-8", "windows-1251");
217             my $excel = Spreadsheet::XLSX->new( $self->{employees} );
218              
219             my $sheet = @{ $excel->{Worksheet} }[0];
220             $sheet->{MaxRow} ||= $sheet->{MinRow};
221              
222             # Go over the rows in the sheet and extract employee info, skip first row
223             foreach my $row ( $sheet->{MinRow} + 1 .. $sheet->{MaxRow} ) {
224             my $dienst = $sheet->{Cells}[$row][0]->{Val} || next;
225             my $familyname = uc( $sheet->{Cells}[$row][2]->{Val} ) || "NotDefined_employee_$row";
226             my $givenname = uc( $sheet->{Cells}[$row][3]->{Val} ) || "NotDefined_employee_$row";
227              
228             my $name = "$familyname $givenname";
229             $self->{_employees}->{$name} = { dienst => $dienst };
230              
231             }
232              
233             }
234              
235             # Parse the course database to see when the theoretical course was followed
236             sub _parse_course {
237             my $self = shift;
238              
239             my $excel = Spreadsheet::XLSX->new( $self->{course} );
240              
241             my $sheet = @{ $excel->{Worksheet} }[0];
242             $sheet->{MaxRow} ||= $sheet->{MinRow};
243              
244             # Go over the rows in the sheet and extract employee info, skip first row
245             foreach my $row ( $sheet->{MinRow} + 1 .. $sheet->{MaxRow} ) {
246             my $familyname = $sheet->{Cells}[$row][1]->{Val} || "NotDefined_course_$row";
247             my $givenname = $sheet->{Cells}[$row][2]->{Val} || "NotDefined_course_$row";
248             $familyname = uc($familyname) || $row;
249             $givenname = uc($givenname) || $row;
250              
251             # Ensure no leading/trailing spaces are in the name
252             $familyname =~ s/^\s+//; # strip white space from the beginning
253             $familyname =~ s/\s+$//; # strip white space from the end
254             $givenname =~ s/^\s+//; # strip white space from the beginning
255             $givenname =~ s/\s+$//; # strip white space from the end
256             my $date = $sheet->{Cells}[$row][7]->{Val};
257              
258             # If the date is not filled in then date will be undefined.
259             next if ( !defined($date) );
260              
261             my $name = $self->_resolve_name( $familyname, $givenname );
262              
263             # Extract the formatted value from the cell, we can only call this function once we know the cell has a value
264             $date = $sheet->{Cells}[$row][7]->value();
265              
266             # If the employee already exists: OK, go ahead and insert training
267             if ( defined $self->{_employees}->{$name} ) {
268             $self->{_employees}->{$name}->{course} = $date;
269             } else {
270              
271             #carp "Oops: employee '$name' not found in employee database while parsing the theoretical training list";
272             push( @{ $self->{_not_in_hr}->{theory} }, $name );
273              
274             }
275             }
276              
277             }
278              
279             # Try to resolve a name in case it is not found in the personel database
280             sub _resolve_name {
281             my ( $self, $fname, $gname ) = @_;
282              
283             my $name;
284              
285             # Cleanup leading/trailing spaces
286              
287             # Straight match
288             $name = uc($fname) . " " . uc($gname);
289             if ( exists $self->{_employees}->{$name} ) {
290             $self->{_resolve}->{straight} += 1;
291             return $name;
292             }
293              
294             # First try, maybe they switched familyname and givenname?
295             my $orig = $name;
296             $name = uc($gname) . " " . uc($fname);
297             if ( exists $self->{_employees}->{$name} ) {
298             $self->_fixlog( 'switcharoo', $orig, $name );
299             return $name;
300             }
301              
302             # Exact match but missing parts?
303             $name = uc($fname) . " " . uc($gname);
304             foreach my $employee ( @{ $self->{_employee_array} } ) {
305             if ( $employee =~ /.*$name.*/ ) {
306             $self->_fixlog( 'partial', $name, $employee );
307             return $employee;
308             }
309              
310             # And the reverse could also occur
311             if ( $name =~ /.*$employee.*/ ) {
312             $self->_fixlog( 'partial', $name, $employee );
313             return $employee;
314             }
315             }
316              
317             # Check if we can find a match with fuzzy matching
318             $name = uc($fname) . " " . uc($gname);
319             my $tf = Text::Fuzzy::PP->new($name);
320             $tf->set_max_distance(3);
321             my $index = $tf->nearest( $self->{_employee_array} ) || -1;
322             if ( $index > 0 ) {
323             my $fixed = $self->{_employee_array}->[$index];
324             $self->_fixlog( 'fuzzy', $name, $fixed );
325             return $fixed;
326             }
327              
328             # People with double given name might shorten it
329             # Marie-Christine -> M.-Christine
330             if ( $gname =~ /^(\w)\w+(\-\w+)$/ ) {
331             $name = uc( $fname . " " . $1 . "." . "$2" );
332              
333             # Check if we can find a match with fuzzy matching
334             $tf = Text::Fuzzy::PP->new($name);
335             $tf->set_max_distance(3);
336             my $index = $tf->nearest( $self->{_employee_array} ) || -1;
337             if ( $index > 0 ) {
338             my $fixed = $self->{_employee_array}->[$index];
339             $self->_fixlog( 'fuzzy_short', $name, $fixed );
340             return $fixed;
341             }
342             }
343              
344             # Or maybe they left of the 'Marie-' part of their given name,
345             # try to fuzzy match after adding it
346             $name = uc( $fname . " Marie-" . $gname );
347             $tf = Text::Fuzzy::PP->new($name);
348             $tf->set_max_distance(3);
349             $index = $tf->nearest( $self->{_employee_array} ) || -1;
350             if ( $index > 0 ) {
351             my $fixed = $self->{_employee_array}->[$index];
352             $self->_fixlog( 'fuzzy_-marie', $name, $fixed );
353             return $fixed;
354             }
355              
356             # People with long given name might shorten it
357             # Match those by family name (exact match) and regexp on given name
358             foreach my $employee ( @{ $self->{_employee_array} } ) {
359             my $f_fname = uc($fname);
360             my $f_gname = uc($gname);
361             $name = $f_fname . " " . $f_gname;
362              
363             if ( $employee =~ /(\w+)\s(\w+)/ ) {
364             my $e_fname = $1;
365             my $e_gname = $2;
366              
367             if ( $e_fname =~ /$f_fname/ && $e_gname =~ /$f_gname/ ) {
368             $self->_fixlog( 'partial', $name, $employee );
369             return $employee;
370             }
371             }
372             }
373              
374             # Report no match found
375             #say "No match in employee database for '$name'";
376             $self->{_resolve}->{nomatch} += 1;
377             return $name;
378              
379             }
380              
381             # Log resolved names so that they can be used for later reference
382             sub _fixlog {
383             my ( $self, $type, $original, $fixed ) = @_;
384              
385             #say "$type match for '$original', replaced by '$fixed'";
386             $self->{_resolve}->{$type} += 1;
387             push(
388             @{ $self->{_resolve_list} },
389             { $original => { fixed => $fixed, type => $type } }
390             );
391              
392             }
393              
394             # Speed up the Moose object construction
395             __PACKAGE__->meta->make_immutable;
396             no Moose;
397             1;
398              
399             # ABSTRACT: Application to merge various datasets info an overview of who followed CPR training (cardiopulmonary resuscitation, the use of rescue breathing and chest compressions to help a person whose breathing and heartbeat have stopped)
400              
401             __END__
402              
403             =pod
404              
405             =encoding UTF-8
406              
407             =head1 NAME
408              
409             App::CPRReporter - Application to merge various datasets info an overview of who followed CPR training (cardiopulmonary resuscitation, the use of rescue breathing and chest compressions to help a person whose breathing and heartbeat have stopped)
410              
411             =head1 VERSION
412              
413             version 0.03
414              
415             =head1 SYNOPSIS
416              
417             my $object = App::CPRReporter->new(parameter => 'text.txt');
418              
419             =head1 DESCRIPTION
420              
421             This application parses various datasets and fuses the information to generate an overview of
422             who followed the theoretical part and the practical part of a course on CPR (cardiopulmonary
423             resuscitation, the use of rescue breathing and chest compressions to help a person whose
424             breathing and heartbeat have stopped).
425              
426             More specifically, this application was written to take into account the following information:
427              
428             =over
429              
430             =item An Excel document of the employee database, containing familyname (column C), given name (column D) and group (column A)
431              
432             =item An XML document extracted from the training station software (see Text::ResusciAnneparser)
433              
434             =item An Ecxel document of the people who followed training, containing familyname (column B), given name (column D) and course date (column H)
435              
436             =back
437              
438             This application solves two problems, firstly it automates the task of generating an overview of what people in what group already followed training and who not.
439             Secondly, the application also automates name resolving. The two Excel documents are generated by the personel department and hence should have to correct
440             names. However, the XML document is filled with user-typed input. Hence the name matching between all datasets needs do be done taking into account typos, inverse input, shortened names, ...
441              
442             =head1 METHODS
443              
444             =head2 C<new(%parameters)>
445              
446             This constructor returns a new App::CPRReporter object. Supported parameters are listed below
447              
448             =over
449              
450             =item employees
451              
452             The filename of the Excel document (Office 2007 format) with a full list of people that should follow the course.
453              
454             =item certificates
455              
456             The filename of the XML document extracted from the training software.
457              
458             =item course
459              
460             The filename of the Excel document (Office 2007 format) with an overview of people that followed the theoretical trainging.
461              
462             =back
463              
464             =head2 C<run()>
465              
466             Run the application and print out the result.
467              
468             =head2 BUILD
469              
470             Helper function to run custome code after the object has been created by Moose.
471              
472             =head1 TODO
473              
474             Currently, the application prints output to STDOUT in a CSV format. Future versions could write to Excel immediately.
475              
476             =head1 AUTHOR
477              
478             Lieven Hollevoet <hollie@cpan.org>
479              
480             =head1 COPYRIGHT AND LICENSE
481              
482             This software is copyright (c) 2013 by Lieven Hollevoet.
483              
484             This is free software; you can redistribute it and/or modify it under
485             the same terms as the Perl 5 programming language system itself.
486              
487             =cut