| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Alvis::NLPPlatform::Canonical; | 
| 2 | 3 |  |  | 3 |  | 13 | use strict; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 145 |  | 
| 3 | 3 |  |  | 3 |  | 13 | use warnings; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 1716 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | our $VERSION=$Alvis::NLPPlatform::VERSION; | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | sub CleanUp | 
| 9 |  |  |  |  |  |  | { | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 0 |  |  | 0 | 1 |  | my ($canonical, $preserveWhiteSpace) = @_; | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | #     my $tmp_str; | 
| 14 |  |  |  |  |  |  |  | 
| 15 | 0 | 0 |  |  |  |  | if (!$preserveWhiteSpace) { | 
| 16 | 0 |  |  |  |  |  | warn "\nRemoving White Spaces\n"; | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 0 |  |  |  |  |  | $canonical =~ s/^[\s\t]*(<[^>]+>)[\s\t\n]*(\n)/$1$2/go; | 
| 19 | 0 |  |  |  |  |  | $canonical =~ s/(\n)[\s\t\n]*(<[^>]+>)[\s\t]*(\n)/$1$2$3/go; | 
| 20 | 0 |  |  |  |  |  | $canonical =~ s/(\n)[\s\t\n]*(<[^>]+>)[\s\t]*/$1$2/go; | 
| 21 |  |  |  |  |  |  | } | 
| 22 | 0 |  |  |  |  |  | $canonical =~ s/ | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 0 |  |  |  |  |  | $canonical =~ s/<\/?list>/\n/go; | 
| 27 | 0 |  |  |  |  |  | $canonical =~ s/<\/?item>/\n/go; | 
| 28 | 0 |  |  |  |  |  | $canonical =~ s/<\/?canonicalDocument>/\n/go; | 
| 29 | 0 |  |  |  |  |  | $canonical =~ s/<\/?ulink[^>]*>//go; | 
| 30 | 0 | 0 |  |  |  |  | if (!$preserveWhiteSpace) { | 
| 31 | 0 |  |  |  |  |  | $canonical =~ s/\n+/\n/go; | 
| 32 | 0 |  |  |  |  |  | $canonical =~ s/^\n//go; | 
| 33 | 0 |  |  |  |  |  | $canonical =~ s/^\n$//go; | 
| 34 |  |  |  |  |  |  | } | 
| 35 |  |  |  |  |  |  |  | 
| 36 | 0 |  |  |  |  |  | my $pos = 0; | 
| 37 | 0 |  |  |  |  |  | my $str = $canonical; | 
| 38 | 0 |  |  |  |  |  | my $pos_section = -1; | 
| 39 | 0 |  |  |  |  |  | my $pos_prec_section = 0; | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 0 |  |  |  |  |  | Alvis::NLPPlatform::XMLEntities::decode($str); | 
| 42 | 0 |  |  |  |  |  | $canonical = ""; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 0 |  |  |  |  |  | while(($pos_section = index($str, "", $pos_prec_section)) > -1) { | 
| 45 | 0 |  |  |  |  |  | $canonical .= substr($str, $pos_prec_section, $pos_section - $pos_prec_section); | 
| 46 | 0 |  |  |  |  |  | chomp $canonical; | 
| 47 | 0 | 0 |  |  |  |  | if ($pos_section != $pos_prec_section) { | 
| 48 | 0 |  |  |  |  |  | $canonical .= "\n"; | 
| 49 |  |  |  |  |  |  | } | 
| 50 | 0 | 0 |  |  |  |  | if (!$preserveWhiteSpace) { | 
| 51 | 0 |  |  |  |  |  | $canonical =~ s/\n+/\n/go; | 
| 52 | 0 |  |  |  |  |  | $canonical =~ s/^\n//go; | 
| 53 | 0 |  |  |  |  |  | $canonical =~ s/^\n$//go; | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 0 |  |  |  |  |  | push @Alvis::NLPPlatform::tab_end_sections_byaddr, (length($canonical) - 1); | 
| 57 | 0 |  |  |  |  |  | $pos_prec_section = $pos_section + 10; | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  |  | 
| 61 | 0 |  |  |  |  |  | return($canonical); | 
| 62 |  |  |  |  |  |  | } | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | 1; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | __END__ |