| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package LaTeX::Parser; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | =head1 NAME | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | LaTeX::Parser - Perl extension to parse LaTeX files | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | use LaTeX::Parser; | 
| 10 |  |  |  |  |  |  | my $l = new LaTeX::Parser 'file' => 'file.tex'; | 
| 11 |  |  |  |  |  |  | my $p = $l->latex; # $p now hold a reference to an array of | 
| 12 |  |  |  |  |  |  | # file.tex parsed | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | Or use it to break up LaTeX in a variable: | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | my $l = new LaTeX::Parser 'content' => | 
| 17 |  |  |  |  |  |  | '\textit{Three Lives} by Gertrude Stein.'; | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | Contents of nested braces are extracted as a single element.  Another | 
| 20 |  |  |  |  |  |  | C will have to be created to parse nested braces. | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | This is a very early version of C, there are many bugs. | 
| 23 |  |  |  |  |  |  | I think this will work fine with plain TeX files but I do not plan on | 
| 24 |  |  |  |  |  |  | ever support that. | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | For now, only simple descriptions of the modules functions. | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | =cut | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 1 |  |  | 1 |  | 623 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 33 | 1 |  |  | 1 |  | 900 | use integer; | 
|  | 1 |  |  |  |  | 10 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | =over 4 | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =item LaTeX::Parser->new %hash | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | Creates a LaTeX::Parser object.  All values in C<%hash> are initialize | 
| 41 |  |  |  |  |  |  | to the values in the object.  The only two tested values to set are | 
| 42 |  |  |  |  |  |  | `C' and `C'.  `C' is the name of the file to load | 
| 43 |  |  |  |  |  |  | the LaTeX file from, and it get copied into `C'.  If content | 
| 44 |  |  |  |  |  |  | is set by then C<%hash> then `C' will never be called. | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | =cut | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | sub new { | 
| 49 | 1 |  |  | 1 | 1 | 47 | my $this = shift; | 
| 50 | 1 |  | 33 |  |  | 8 | my $class = ref($this) || $this; | 
| 51 | 1 |  |  |  |  | 4 | my %hash = @_; | 
| 52 | 1 |  |  |  |  | 2 | my $self = \%hash; | 
| 53 | 1 |  |  |  |  | 3 | bless $self, $class; | 
| 54 | 1 |  |  |  |  | 3 | return $self; | 
| 55 |  |  |  |  |  |  | } | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | # Function to be considered private that loads the LaTeX file and | 
| 58 |  |  |  |  |  |  | # throws out comments. | 
| 59 |  |  |  |  |  |  | sub load { | 
| 60 | 1 |  |  | 1 | 0 | 1 | my $self = shift; | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 1 |  |  |  |  | 3 | $self->{'content'} = ''; | 
| 63 | 1 | 50 |  |  |  | 37 | open(FILE, $self->{'file'}) || die "Can't load `$self->{file}', $!\n"; | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | LINE: | 
| 66 | 1 |  |  |  |  | 27 | while () { | 
| 67 | 26 | 100 |  |  |  | 43 | if (m/^%/) { | 
| 68 | 1 |  |  |  |  | 4 | next LINE; | 
| 69 |  |  |  |  |  |  | } | 
| 70 | 25 |  |  |  |  | 25 | s/%.*$//; | 
| 71 | 25 |  |  |  |  | 57 | $self->{'content'} .= $_; | 
| 72 |  |  |  |  |  |  | } | 
| 73 | 1 |  |  |  |  | 34 | close(FILE); | 
| 74 | 1 |  |  |  |  | 3 | return $self; | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =item LaTeX::Parser->latex | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | No arguments.  Actualy does all the work.  Loads the LaTeX file if not | 
| 80 |  |  |  |  |  |  | content was specified, and returns a reference to all parsed | 
| 81 |  |  |  |  |  |  | information. | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | =cut | 
| 84 |  |  |  |  |  |  | sub latex { | 
| 85 | 1 |  |  | 1 | 1 | 5 | my $self = shift; | 
| 86 |  |  |  |  |  |  |  | 
| 87 | 1 | 50 |  |  |  | 8 | if (!defined $self->{'content'}) { | 
| 88 | 1 |  |  |  |  | 3 | $self->load; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 | 1 |  |  |  |  | 3 | my $content = $self->{'content'}; | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 1 |  |  |  |  | 3 | do { | 
| 94 | 25 | 100 |  |  |  | 73 | if ($content =~ m/^(.*?)([\\\{])/s) { | 
| 95 | 24 |  |  |  |  | 37 | my $prematch = $1; | 
| 96 | 24 |  |  |  |  | 28 | my $match = $2; | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 24 | 100 |  |  |  | 41 | if ($prematch ne '') { | 
| 99 | 11 |  |  |  |  | 11 | push @{$self->{'parsed'}}, $prematch; | 
|  | 11 |  |  |  |  | 19 |  | 
| 100 | 11 |  |  |  |  | 17 | $prematch = quotemeta($prematch); | 
| 101 | 11 |  |  |  |  | 68 | $content =~ s/^$prematch//s; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 24 | 100 |  |  |  | 56 | if ($match eq '{') { | 
|  |  | 50 |  |  |  |  |  | 
| 105 | 13 |  |  |  |  | 18 | $match = &matching('{', '}', $content); | 
| 106 | 13 |  |  |  |  | 13 | push @{$self->{'parsed'}}, $match; | 
|  | 13 |  |  |  |  | 26 |  | 
| 107 | 13 |  |  |  |  | 19 | $match = quotemeta($match); | 
| 108 | 13 |  |  |  |  | 150 | $content =~ s/^$match//s; | 
| 109 |  |  |  |  |  |  | } elsif ($match eq '\\') { | 
| 110 | 11 | 50 |  |  |  | 30 | if ($content =~ m/^(\\[\w\\]+)/) { | 
|  |  | 0 |  |  |  |  |  | 
| 111 | 11 |  |  |  |  | 16 | $match = $1; | 
| 112 |  |  |  |  |  |  | } elsif ($content =~ m/^(\\.)/) { | 
| 113 | 0 |  |  |  |  | 0 | $match = $1; | 
| 114 |  |  |  |  |  |  | } else { | 
| 115 | 0 |  |  |  |  | 0 | die "A \\ Command I don't understand"; | 
| 116 |  |  |  |  |  |  | } | 
| 117 | 11 |  |  |  |  | 11 | push @{$self->{'parsed'}}, $match; | 
|  | 11 |  |  |  |  | 21 |  | 
| 118 | 11 |  |  |  |  | 16 | $match = quotemeta($match); | 
| 119 | 11 |  |  |  |  | 109 | $content =~ s/^$match//s; | 
| 120 |  |  |  |  |  |  | } else { | 
| 121 | 0 |  |  |  |  | 0 | die "Found `$match' where only `{' of `\\' should be"; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | } else { | 
| 125 | 1 |  |  |  |  | 2 | push @{$self->{'parsed'}}, $content; | 
|  | 1 |  |  |  |  | 2 |  | 
| 126 | 1 |  |  |  |  | 3 | $content = ''; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  | } while ($content ne ''); | 
| 129 | 1 |  |  |  |  | 4 | return $self->{'parsed'}; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | ############## | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | # Just a little utility program to match nested, single character | 
| 137 |  |  |  |  |  |  | # delimited quotes.  Should make it so one can backslach the | 
| 138 |  |  |  |  |  |  | # delimiter. | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | sub matching { | 
| 141 | 13 |  |  | 13 | 0 | 14 | my $begin = shift; | 
| 142 | 13 |  |  |  |  | 15 | my $end = shift; | 
| 143 | 13 |  |  |  |  | 13 | my $text = shift; | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 13 |  |  |  |  | 14 | my $loop = 1; | 
| 146 | 13 |  |  |  |  | 14 | my $deep = 1; | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 13 |  |  |  |  | 25 | until ($deep == 0) { | 
| 149 | 272 |  |  |  |  | 279 | my $c = substr($text, $loop, 1); | 
| 150 | 272 | 100 |  |  |  | 418 | if ($c eq $begin) { | 
| 151 | 2 |  |  |  |  | 3 | $deep++; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 272 | 100 |  |  |  | 381 | if ($c eq $end) { | 
| 154 | 15 |  |  |  |  | 14 | $deep--; | 
| 155 |  |  |  |  |  |  | } | 
| 156 | 272 |  |  |  |  | 439 | $loop++; | 
| 157 |  |  |  |  |  |  | } | 
| 158 | 13 |  |  |  |  | 29 | return substr($text, 0, $loop); | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | 1; | 
| 162 |  |  |  |  |  |  | __END__ |