| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package C::Sharp::Tokener; | 
| 2 |  |  |  |  |  |  | our $ref_name; | 
| 3 |  |  |  |  |  |  | our ($ref_line, $line, $col, $current_token, $handle_get_set) = (1,1,1,0,0); | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | our $VERSION = "0.08062001"; # MCS version number | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | =head1 NAME | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | C::Sharp::Tokener - Tokeniser for C# | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | use C::Sharp::Tokener; | 
| 14 |  |  |  |  |  |  | do { | 
| 15 |  |  |  |  |  |  | ($token, $ttype, $remainder) = C::Sharp::Tokener::tokener($input); | 
| 16 |  |  |  |  |  |  | } while ($input = $remainder) | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | use C::Sharp::Parser; | 
| 19 |  |  |  |  |  |  | $parser = new C::Sharp::Parser; | 
| 20 |  |  |  |  |  |  | $parser->YYParse(yylex => \&C::Sharp::Tokener::yy_tokener); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | C# is the new .NET programming language from Microsoft; the Mono project | 
| 25 |  |  |  |  |  |  | is an Open Source implementation of .NET. This code, based on the Mono | 
| 26 |  |  |  |  |  |  | project, implements a tokeniser for C#. Combined with | 
| 27 |  |  |  |  |  |  | C it can be used to create a compiler for C#. | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | L | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | =head1 AUTHOR | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | Simon Cozens (simon@cpan.org) | 
| 36 |  |  |  |  |  |  | Based very, very heavily on code by Miguel de Icaza (miguel@gnu.org) | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =cut | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | my %keywords; | 
| 41 |  |  |  |  |  |  | my $number; | 
| 42 |  |  |  |  |  |  | my $putback_char = -1; | 
| 43 |  |  |  |  |  |  | my $val; | 
| 44 |  |  |  |  |  |  | my $error_details; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | sub location { | 
| 47 | 0 |  | 0 | 0 | 0 | 0 | return "Line: $line Col: $col\nVirtLine: $ref_line Token: $current_token ". | 
| 48 |  |  |  |  |  |  | ($current_token eq "ERROR" && "Detail: $error_details"); | 
| 49 |  |  |  |  |  |  | } | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | sub properties { | 
| 52 | 0 | 0 |  | 0 | 0 | 0 | defined $_[0] ? $handle_get_set = $_[0] : $handle_get_set; | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 0 |  |  | 0 | 0 | 0 | sub error { $error_details } | 
| 56 | 0 |  |  | 0 | 0 | 0 | sub Line  { $line } | 
| 57 | 0 |  |  | 0 | 0 | 0 | sub Col   { $col } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | $keywords{lc($_)}=$_ for qw{ | 
| 60 |  |  |  |  |  |  | ABSTRACT AS ADD BASE BOOL BREAK BYTE CASE CATCH CHAR CHECKED CLASS CONST CONTINUE DECIMAL DEFAULT DELEGATE DO DOUBLE | 
| 61 |  |  |  |  |  |  | ELSE ENUM EVENT EXPLICIT EXTERN FALSE FINALLY FIXED FLOAT FOR FOREACH GOTO IF IMPLICIT IN INT INTERFACE INTERNAL IS | 
| 62 |  |  |  |  |  |  | LOCK LONG NAMESPACE NEW NULL OBJECT OPERATOR OUT OVERRIDE PARAMS PRIVATE PROTECTED PUBLIC READONLY REF RETURN REMOVE | 
| 63 |  |  |  |  |  |  | SBYTE SEALED SHORT SIZEOF STATIC STRING STRUCT SWITCH THIS THROW TRUE TRY TYPEOF UINT ULONG UNCHECKED UNSAFE USHORT | 
| 64 |  |  |  |  |  |  | USING VIRTUAL VOID WHILE | 
| 65 |  |  |  |  |  |  | }; | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | sub is_keyword { | 
| 68 | 36 | 50 | 33 | 36 | 0 | 146 | return 0 if (($_ eq "get" or $_ eq "set") and not $handle_get_set); | 
|  |  |  | 33 |  |  |  |  | 
| 69 | 36 |  |  |  |  | 217 | return exists $keywords{$_[0]}; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub yy_tokener { | 
| 73 | 0 |  |  | 0 | 0 | 0 | my $self = shift; # This is a Parse::Yapp object | 
| 74 | 0 |  |  |  |  | 0 | my ($token,$ttype); | 
| 75 | 0 |  |  |  |  | 0 | ($token, $ttype, $self->YYData->{INPUT}) = tokener($self->YYData->{INPUT}); | 
| 76 | 0 |  |  |  |  | 0 | return ($ttype, $token); | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | sub tokener { | 
| 80 | 84 |  |  | 84 | 0 | 3987 | $_ = shift; | 
| 81 | 84 |  |  |  |  | 89 | my ($allow_keyword) = 0; | 
| 82 | 84 |  |  |  |  | 180 | while ($_) { | 
| 83 | 86 |  |  |  |  | 288 | s/^\s+//sm; | 
| 84 | 86 | 100 |  |  |  | 198 | if (/^[a-zA-Z_]/) { # Check C# standard - may be Unicode aware | 
| 85 | 36 |  |  |  |  | 104 | s/(.\w*)//; | 
| 86 | 36 | 100 | 66 |  |  | 55 | return ($1, "IDENTIFIER", $_) if !is_keyword($1) or $allow_keyword; | 
| 87 | 10 |  |  |  |  | 45 | return ($1, uc($1), $_); | 
| 88 |  |  |  |  |  |  | } | 
| 89 | 50 | 100 | 66 |  |  | 203 | if (/^\.\d/ || /^\d/) { | 
| 90 | 1 |  |  |  |  | 2 | my $real = 0; | 
| 91 | 1 |  |  |  |  | 2 | my $val  = 0; | 
| 92 | 1 | 50 |  |  |  | 9 | if (s/^0[Xx]([A-Fa-f0-9]+)//) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 93 | 0 |  |  |  |  | 0 | $val = hex($1); | 
| 94 | 0 | 0 |  |  |  | 0 | die "Oops: [UL] not handled yet." if /^[ULul]/; | 
| 95 | 0 |  |  |  |  | 0 | return ($1, "LITERAL_INTEGER", $_); | 
| 96 |  |  |  |  |  |  | } elsif (s/^(\d+)(\.\D)/$2/) { | 
| 97 | 0 |  |  |  |  | 0 | return ($1, "LITERAL_INTEGER", $_); | 
| 98 |  |  |  |  |  |  | } elsif (s/^(\d+\.\d+)//) { | 
| 99 | 0 |  |  |  |  | 0 | $real =1; | 
| 100 | 0 |  |  |  |  | 0 | $val = $1; | 
| 101 |  |  |  |  |  |  | } else { | 
| 102 | 1 |  |  |  |  | 4 | s/(\d+)//; $val = $1; | 
|  | 1 |  |  |  |  | 2 |  | 
| 103 |  |  |  |  |  |  | } | 
| 104 | 1 | 50 |  |  |  | 4 | $val .= $1 if (s/^([eE][+-]\d+)//); | 
| 105 | 1 | 50 |  |  |  | 3 | if (s/^[fF]//) { return ($val, "LITERAL_FLOAT", $_) } | 
|  | 0 |  |  |  |  | 0 |  | 
| 106 | 1 | 50 |  |  |  | 3 | if (s/^[dD]//) { return ($val, "LITERAL_DOUBLE", $_) } | 
|  | 0 |  |  |  |  | 0 |  | 
| 107 | 1 | 50 |  |  |  | 3 | if (s/^[mM]//) { return ($val, "LITERAL_DECIMAL", $_) } | 
|  | 0 |  |  |  |  | 0 |  | 
| 108 | 1 | 50 |  |  |  | 4 | if (!$real) { | 
| 109 | 1 | 50 |  |  |  | 4 | die "Oops: [UL] not handled yet." if /^[ULul]/; | 
| 110 | 1 |  |  |  |  | 4 | return ($val, "LITERAL_INTEGER", $_); | 
| 111 |  |  |  |  |  |  | } | 
| 112 | 0 |  |  |  |  | 0 | die "Something went wrong with value $val"; | 
| 113 |  |  |  |  |  |  | } | 
| 114 | 49 | 100 |  |  |  | 128 | return (".", "DOT", $_) if s/^\.//; | 
| 115 | 44 | 100 |  |  |  | 77 | s[^//.*][] and next; | 
| 116 | 43 | 100 |  |  |  | 97 | s[^/\*.*?\*/][]ms and next; | 
| 117 |  |  |  |  |  |  | # Handle preprocessor commands here. (Honest) | 
| 118 | 42 | 100 |  |  |  | 279 | return $1, $1, $_ if s#^( | 
| 119 |  |  |  |  |  |  | [{}\[\]\(\),:;~\?] | 
| 120 |  |  |  |  |  |  | | | 
| 121 |  |  |  |  |  |  | \+[\+=]? | 
| 122 |  |  |  |  |  |  | | | 
| 123 |  |  |  |  |  |  | -[\-=>]? | 
| 124 |  |  |  |  |  |  | | | 
| 125 |  |  |  |  |  |  | [!=/%^]=? | 
| 126 |  |  |  |  |  |  | | | 
| 127 |  |  |  |  |  |  | ==? | 
| 128 |  |  |  |  |  |  | | | 
| 129 |  |  |  |  |  |  | &[&=]? | 
| 130 |  |  |  |  |  |  | | | 
| 131 |  |  |  |  |  |  | \|[\|=]? | 
| 132 |  |  |  |  |  |  | | | 
| 133 |  |  |  |  |  |  | <=? | 
| 134 |  |  |  |  |  |  | | | 
| 135 |  |  |  |  |  |  | >>?=? | 
| 136 |  |  |  |  |  |  | ) | 
| 137 |  |  |  |  |  |  | ##x; # Mighty. | 
| 138 | 2 | 100 |  |  |  | 15 | if (s/^\"([^\\\"]*(?:\\.[^\\\"]*)*)\"//) { # Thank you, Text::Balanced | 
| 139 | 1 |  |  |  |  | 2 | my $string = $1; | 
| 140 |  |  |  |  |  |  | #            $string =~ s/\\(.)/"\\$1"/eeg; | 
| 141 | 1 |  |  |  |  | 4 | return ($string, "LITERAL_STRING", $_); | 
| 142 |  |  |  |  |  |  | } | 
| 143 | 1 | 50 |  |  |  | 3 | die "Urgh" if /^\"/; | 
| 144 | 1 | 50 |  |  |  | 3 | if (s/^'//) { | 
| 145 | 0 | 0 |  |  |  | 0 | die "CS1011: Empty character literal" if /^'/; | 
| 146 | 0 |  |  |  |  | 0 | my $char; | 
| 147 | 0 | 0 |  |  |  | 0 | if (s/^\\(.)//) { $char = eval qq{"\\$1"}; } | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 148 | 0 |  |  |  |  | 0 | else {s/(.)//; $char = $1 }; | 
| 149 | 0 | 0 |  |  |  | 0 | die "CS1012: Too many characters in character literal" if not /^'/; | 
| 150 | 0 |  |  |  |  | 0 | return ($char, "LITERAL_CHARACTER", $_); | 
| 151 |  |  |  |  |  |  | } | 
| 152 | 1 | 50 |  |  |  | 4 | ($allow_keyword = 1), next if s/^@//; | 
| 153 | 1 | 50 |  |  |  | 8 | return ("","","") unless $_; | 
| 154 | 0 |  |  |  |  |  | die "Unrecognised input character: ".substr($_,0,1); | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | 1; |