|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package File::ANVL;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXxxxx make adding a value policy-driven, eg,  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # "add" could mean (a) replace, (b) push on end array,  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # (c) push on start of array, (d) string concatenation,  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # (d) error.  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
139546
 | 
 use 5.006;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
    | 
| 
9
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
24
 | 
 use strict;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
    | 
| 
10
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
20
 | 
 use warnings;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
152
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
18
 | 
 use constant NL		=> "\n";  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
345
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ANVL flavors  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
16
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
18
 | 
 use constant ANVL	=> 1;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
159
 | 
    | 
| 
17
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
20
 | 
 use constant ANVLR	=> 2;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
160
 | 
    | 
| 
18
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
18
 | 
 use constant ANVLS	=> 3;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14216
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $VERSION = sprintf "%d.%02d", q$Name: Release-1-05 $ =~ /Release-(\d+)-(\d+)/;  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require Exporter;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw(Exporter);  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT = qw();  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = qw(  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	anvl_recarray anvl_arrayhash  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	anvl_name_naturalize  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	anvl_rechash anvl_valsplit  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	erc_anvl_expand_array kernel_labels  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	xgetlines trimlines  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	make_get_anvl  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	anvl_opt_defaults anvl_decode anvl_om  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	anvl_encode anvl_recsplit  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	ANVL ANVLR ANVLS ANVLSH  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %EXPORT_TAGS = (all => [ @EXPORT_OK ]);  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # All these symbols must be listed also in EXPORT_OK (?)  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_FAIL = qw(  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	ANVL ANVLR ANVLS ANVLSH  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $anvl_mode = 'ANVL';		# default mode  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This is a magic routine that the Exporter calls for any unknown symbols.  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
54
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
0
  
 | 
490
 | 
 sub export_fail { my( $class, @symbols )=@_;  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	$anvl_mode = $_		for (@symbols);  | 
| 
57
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2767
 | 
 	return ();  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Initialize or re-initialize options to factory defaults.  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anvl_opt_defaults { return {  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Input options  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
66
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
  
0
  
 | 
61
 | 
 	autoindent	=> 1,	# yes, fix recoverably bad indention  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	comments	=> 0,	# no, don't parse comments  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elemsproc	=>	# to expand short form ERCs (if any)  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		\&File::ANVL::erc_anvl_expand_array,  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elemsprocpat	=>	# no call from anvl_om if no match  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		qr/^erc:/m,	# in rec; no call if set and matches  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	};  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx     decide on good name for short form and long form ERC  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Returns a closure that calls an input reader with that's set to *ARGV  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # by default.  If $reader and $readee are defined, they are stored in the  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # closure and all reads will be performed by calling &$reader($readee).  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The default reader collects text lines from a file and returns all the  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # lines associated with the next "record", which is considered to start  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # wherever the read pointer happens to be and continues to the first two  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # blank lines encountered that occur after "substance" is detected.  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Substance is defined to be at least one non-whitespace character  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # occurring on a non-comment line.  Comment and blank lines that precede  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # a record with substance are returned, but any such lines that follow  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # that the final record are discarded.  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
90
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
389
 | 
 sub make_get_anvl { my( $reader, $readee ) = shift;  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	unless ($reader) {  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $rec;		# returned record  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		my $s;			# next increment of input  | 
| 
96
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $substance;		# boolean detecting substance  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1393
 | 
 	    return sub { my( $filehandle ) = shift;  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Returns a subroutine, call it get_anvl()  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#   Usage:  $record = get_anvl( [$filehandle] );  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# It reads ANVL input records as text lines from the  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# file given by $filehandle (*ARGV by default, which can  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# process multiple files via while loop magic).  Usually,  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# the closure holds enough state information, set up by  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# make_get_anvl(), that get_anvl() can be called without  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# arguments.  get_anvl() returns the record read as a  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# string, or returns undef on end of input or error.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
112
 | 
4
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
18
 | 
 		$filehandle ||= *ARGV;  | 
| 
113
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 		local $/ = NL.NL;	# a kind of "paragraph" input mode  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# $/ === $INPUT_RECORD_SEPARATOR  | 
| 
115
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 		$rec = '';  | 
| 
116
 | 
4
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
118
 | 
 		1 while (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			defined($s = <$filehandle>) and	# read to eof and  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				($rec .= $s),	# save everything, but stop  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$substance =	# when we detect substance, ie,  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					$s =~ /^[^#\s]/m || $s =~ /^[^#].*\S/m,  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				! $substance	# non-comment with non-space  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
123
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 		return $substance ?  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$rec :	# return either collected record or undef  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			undef;	# any final blank or comment lines are tossed  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# yyy If more than one file, line numbers normally accumulate  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# across files.  Should we preserve line numbers within each  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# files?  (If so, use "close ARGV" (Perl idiom) to cause $.  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# (linenum) to be reset between files.  | 
| 
131
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	    };  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If we get here, $reader should reference an input method and  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# $readee is assumed to be any value (eg, BDB handle) that may  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# permit &reader to get the next record.  Any other arguments  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# passed to the get_anvl() function below will be passed along  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# too, ie, $reader($readee, @_).  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
140
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	ref($reader) eq "CODE"		or return undef;  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $rec;			# returned record  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $s;				# next increment of input  | 
| 
144
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $substance;			# boolean detecting substance  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return sub {  | 
| 
147
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
 		$rec = '';  | 
| 
148
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 		1 while (  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XXX should this accumulate in general??? or  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#     should we leave it to the definer of $reader?  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			defined($s = &reader($readee, @_)) and	# read and  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				($rec .= $s),	# save everything, but stop  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$substance =	# when we detect substance, ie,  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					$s =~ /^[^#\s]/m || $s =~ /^[^#].*\S/m,  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				! $substance	# non-comment with non-space  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
157
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return $substance ?  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$rec :	# return either collected record or undef  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			undef;	# any final blank or comment lines are tossed  | 
| 
160
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	};  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX deprecated!  see sub make_get_anvl  | 
| 
164
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
 sub xgetlines { my( $filehandle )=@_;  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $rec = '';			# returned record  | 
| 
167
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $s;				# next increment of input  | 
| 
168
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	local $/ = NL.NL;		# a kind of "paragraph" input mode  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# $/ === $INPUT_RECORD_SEPARATOR  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If $filehandle is specified, use the Perl <$filehandle> idiom to  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# return next unit of input (normally a line, but here a para).  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
174
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	$filehandle ||= *ARGV;  | 
| 
175
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	1 while ( defined( $s = <$filehandle> ) and	# read up to two \n's  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If we get here, $s now contains a block to save.  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$rec .= $s,  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# We continue reading only if there's no substance,  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# ie, no line read starts with a non-comment and no  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# non-comment line read contains non-whitespace  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s !~ /^[^#\s]/m and	# if no line read starts with  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#    $s !~ /^[^#].*\S/m	# or contains substance  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#(! ($s =~ /^[^#\s]/m || $s =~ /^[^#].*\S/m) and  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#	$rec .= "substance found in <$s>\n"),  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			! ($s =~ /^[^#\s]/m || $s =~ /^[^#].*\S/m)  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s !~ /^\s*[^#\s]/m ||	# match no line of susbstance  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$rec .= $s  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#($rec .= $s),	# only "paragraphs"; save everything  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s !~ /^\s*[^#\s]/m	# but stop when substance seen  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s !~ /^\s*[^#\s]/m	# but stop when substance seen  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# and while $s matches no line starting with ^#  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# while every line in $s is either all whitespace  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#    or all comment (ie, first non-ws char is #)  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s =~ /\S/	# but stop when we see substance  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$s !~ /\S/	# but stop when we see substance  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # substance means \S on a non-comment line  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #        $s !~ /^\S|[^#].*\S/m  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ! ($s =~ /^[^#\s]/m || $s =~ /^[^#].*\S/m)  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#); # /^\s*[^#\s]/m  | 
| 
209
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	defined($s) or  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return $rec || undef;	# almost eof or real eof  | 
| 
211
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return $rec;  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXX what happens when one file ends prematurely and  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# another begins? does last record for first file get  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# returned glued to beginning of first recond of 2nd file?  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If more than one file, line numbers normally just accumulate.  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# We want to preserve line numbers within files, so we use this  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# next Perl idiom to cause $. (linenum) to be reset between files.  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#close ARGV	if eof;		# reset line numbers between files  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # args: record, reference to whitespace lines, reference to real record lines  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx replace \n with NL throughout  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns undef when $rec trims to nothing (EOF)  | 
| 
226
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
0
  
 | 
1076
 | 
 sub trimlines { my( $rec, $r_wslines, $r_rrlines )=@_;  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# $rec might legitimately be undefined if called as  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# trimlines(getlines(), ...)  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
231
 | 
5
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
15
 | 
 	$rec ||= '';  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
233
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	$rec =~ s/^(\s*)//;		# '*' guarantees $1 will be defined  | 
| 
234
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	my $blanksection = $1;  | 
| 
235
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	my @newlines;  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	ref($r_wslines) eq 'SCALAR' and		# if given, define it  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$$r_wslines = scalar(@newlines = $blanksection =~ /\n/g);  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 	ref($r_rrlines) eq 'SCALAR' and		# if given, define it  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$$r_rrlines = scalar(@newlines = $rec =~ /\n/g);  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#$$r_rrlines = scalar($rec =~ /$/gm);	# xxx why doesn't this work?  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# At this point $r_wslines and $r_rrlines (if supplied) are safely  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# defined and ready for return.  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
248
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 	$rec or			# empty record (but $r_wslines may be defined)  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return undef;	# signal eof-style return  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#$rec =~ /\n\n$/ and		# ok record ending -- this is  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#	return $rec;		# the usual  return  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#$rec =~ s/\n*$/\n\n/;		# normalize premature eof ending  | 
| 
254
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return $rec;  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns empty string on success or string beginning "warning:..."  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # third arg (0 or 1) optional  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # elems is returned array of name value pairs  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #DEPRECATED:  | 
| 
261
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
1
 | 
 sub anvl_recsplit { my( $record, $r_elems, $strict )=@_;  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	! defined($record) and  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "needs an ANVL record";  | 
| 
265
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	ref($r_elems) ne "ARRAY" and  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "2nd arg must reference an array";  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 	my $strict_default = 0;  | 
| 
269
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	! defined($strict) and  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$strict = $strict_default;  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	local $_ = $record;  | 
| 
273
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	s/^\s*//; s/\s*$//;		# trim both ends  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
274
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	/\n$/	or s/$/\n/;		# normalize end of record to \n  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	/\n\n/ and  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "record should have no internal blank line(s)";  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx adjust regexp for ANVLR  | 
| 
279
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	! /^[^\s:][\w 	]*:/ and	# match against first element  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "well-formed record begins with a label and colon";  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$anvl_mode ne ANVLR and  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		s/^#.*\n//gm;		# remove comments plus final \n  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If we're not in strict parse mode, correct for common error  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# where continued value is not indented.  We can pretty safely  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# assume a continued value if a line is flush left and contains  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# no colon at all.  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#   | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# This next substitution match needs to be multi-line to avoid  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# more explicit looping.  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXX there's probably a more efficient way to do this.  | 
| 
294
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $msg = "";  | 
| 
295
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $indented = s/^([^\s:][^:]*)$/ $1/gm;  | 
| 
296
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	if ($indented) {  | 
| 
297
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$strict and  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			(@$r_elems = undef),  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return "error: $indented unindented value line(s)";  | 
| 
300
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$msg = "warning: indenting $indented value line(s)";  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# if we get here, assume standard continuation lines, and join them  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# (GRANVL style)  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
305
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	s/\n\s+/ /g;  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXX should have a newline-preserving form of parse?  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Split into array element pairs.  Toss first "false" split.  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx buggy limited patterns, how not to match newline  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# This is the critical splitting step.  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# splits line beginning  ..... xxx  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
314
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	s/\n$//;			# strip final \n  | 
| 
315
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	(undef, @$r_elems) = split /\n*^([^\s:][\w 	]*):\s*/m;  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	return $msg;  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxxxxxxx respond to 'comments' (def. off), 'autoindent' (def. on),  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   'anvlr' (def. off), 'granvl' ?  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This is the closest thing to a reference implementation of an ANVL  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # record parser.  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It returns "" on success, or "error: ..." or "warning: ..."  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
  
0
  
 | 
6286
 | 
 sub anvl_recarray { my( $record, $r_elems, $linenum, $o )=@_;  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
329
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	! defined($record) and  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: no input record";  | 
| 
331
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	ref($r_elems) ne "ARRAY" and  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: 2nd arg must reference an array";  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Note: this input $linenum is pure digits, while $lineno on  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# output is a combination of digits and type (':' or '#')  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
337
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	defined($linenum)	or $linenum = 1;  | 
| 
338
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 	$linenum =~ /\D/ and  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: 3rd arg ($linenum) must be a positive integer";  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX can't this be optimized a bit to keep defaults around?  | 
| 
341
 | 
9
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
34
 | 
 	$o ||= anvl_opt_defaults();  | 
| 
342
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 	ref($o) ne "HASH" and  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: 4th arg must reference a hash";  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	local $_ = $record;	# localizing $_ prevents modifying global $_  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 	s/^\s*//; s/\s*$//;		# trim both ends  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
172
 | 
    | 
| 
348
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 	/\n$/	or s/$/\n/;		# normalize end of record to \n  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#s/\n?$/\nEOR:/;	# whether record ends in \n or not, normalize  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#		# end of record to \nEOR: (note no \n after \nEOR:)  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Reject some malformed cases.  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#/\n\n/ and  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#	return "error: record should have no internal blank line(s)";  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx adjust regexp for ANVLR  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX fix so record can consist of nothing but comments and/or whitespace;  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     comments _may_ be recognized in regular records, but not in this kind  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#/^[^\s:][\w 	]*:/m or	# match against first element  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#	return "error: record ($_) should begin with a label and colon";  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Any other unindented line not containing a colon will either  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# cause an error or will be automatically indented.  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx what about $anvl_mode ne ANVLR and??  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Now we synthesize stuff (line numbers and pseudo-element names for  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# any comments) in order to create a uniform structure on each line,  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# so that we can finally call 'split' to bust apart that structure  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# into a Perl array in which every 3-element group corresponds to  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#     1. a line number,  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#     2. a label, and  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#     3. a value.  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# First insert a line number and ":" in front of each line.  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
377
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	my $num = $linenum;  | 
| 
378
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
 	s/^/ $num++ . ":" /gem;	# put a line number on each line  | 
| 
 
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
121
 | 
    | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Remove blank lines, now that line numbers have been preserved.  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
382
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
 	s/^\d+:[^\S\n]*\n//gm;  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Now, if we're not deleting comments, insert a pseudo-element  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# name '#:' in front of each comment while also changing the ':'  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# after the line numer to '#'.  This means that all lines will  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# begin with a line number followed by ':' for real elements or  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# by '#' for comment elements.  Eg, '# foo' on line 3 becomes  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# '3##:# foo', which conforms to the eventual split pattern we  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# rely on (at end).  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx problem with line #K:value, which becomes, eg, 4##:K:value  | 
| 
393
 | 
9
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
61
 | 
 	$$o{comments} and		# if we're keeping comments  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		s/^(\d+):#/$1##:/gm, 1  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	                #    ^^^  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	                #    123  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# 1=separator, 2=pseudo-name,  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# 3=original value minus '#' starts after :  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or				# else completely delete comments  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		s/^\d+:#.*\n//gm	# up to and including final \n  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	;  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Return if nothing's left after deleting blank lines and comments.  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
406
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	/^\s*$/s and  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "warning: record at line $linenum has no content";  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
409
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	my $msg = "";			# default return message  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If we're not in strict parse mode, correct for common error  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# where continued value is not indented.  We can pretty safely  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# assume a continued value if a line is flush left and contains  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# no colon at all.  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#   | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# This next substitution match is multi-line to avoid explicit  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# looping (yyy is this an efficient way to do it?).  It indents  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# by one space any line starting without a space or colon and  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# that has no instance of a colon until end of line.  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
421
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 	my $indented = s/^(\d+:)([^\s:][^:]*)$/$1 $2/gm;  | 
| 
422
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	if ($indented) {  | 
| 
423
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		unless ($$o{autoindent}) {  | 
| 
424
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 			@$r_elems = undef;	# XXXXX isn't this too much?  | 
| 
425
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 			return "error: $indented unindented value line(s)";  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
427
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		$msg = "warning: indenting $indented value line(s)";  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Now we join the (normalized) continuation lines (GRANVL style)  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# so each element-value pair is on one line.  The + in [ \t]+ is  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# very important; we can't use \s+ here because \s matches a \n.  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
434
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	s/\n\d+:[ \t]+/ /g;  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#s/\n\d+:\s+/ /g;  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXX should we have a newline-preserving form of parse?  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Get rid of initial whitespace from all non-comment GRANVL values.  | 
| 
439
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
 	s/^(\d+:[^\s:][^:]*:)[ \t]+/$1/gm;  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx problem with line #K:value, which becomes, eg, 4##:K:value  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Split into array of element pairs.  Toss first "false" split.  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx buggy limited patterns, how not to match newline  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# This is the critical splitting step.  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# splits line beginning  ..... xxx  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
448
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 	s/\n$//;			# strip final \n  | 
| 
449
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
 	@$r_elems = ('', 'ANVL',	# 3rd elem of 1st triple is  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# provided by first element resulting from the split  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		split /\n*^(\d+[:#])([^\s:][^:]*):/m  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx problem with line #K:value, which becomes, eg, 4##:K:value  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 	defined($$r_elems[2]) or  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: split failed ($_) on '$record', " .  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			"record at line $linenum";  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If there was a value with no label at the start of the record,  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# we deem that interesting enough to keep even though it's not  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# ANVL-compliant; the caller can prevent this by turning off  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# 'autoindent', the processing for which will either flag this as  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# an error or will have inserted one space in front of the value.  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
467
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	$$r_elems[2] =~ /^(\d+): (.*)/ and  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		($$r_elems[0], $$r_elems[2]) = ($1, $2);  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#(undef, @$r_elems) = split /\n*^([^\s:][\w 	]*):\s*/m;  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# yyy an approach once considered but not used  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# $num = $.;	# linenum  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# s/^/ $num++ . ":" /e	while (/\n/g);  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# /\G  ($N\#.*\n)+  (?=$N[^\#]) /gx	# comment block  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# /\G  ($N\S.*\n)+  (?=$N[^\S]) /gx	# element on one or more lines  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# /\G  (#.*\n)+(?=[^#])/g  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# /^#.*?\n[^#]/s        # (?=lookahead)  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#return "_=$_\n" . join(", ", @$r_elems);	# to check results  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
480
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 	return $msg;  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXX for consolidating a:b and a:c into a:b;c, MAJOR constraint  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #        is that b and c CANNOT contain '|' or we refuse...  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
486
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
0
  
 | 
2181
 | 
 sub anvl_arrayhash { my( $r_elems, $r_hash, $first_only )=@_;  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
488
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	ref($r_elems) ne "ARRAY" and  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: 1st arg must reference an array";  | 
| 
490
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	ref($r_hash) ne "HASH" and  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: 2nd arg must reference a hash";  | 
| 
492
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	defined($first_only)	or $first_only = 0;  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
494
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $num_elems = scalar @$r_elems;  | 
| 
495
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$num_elems % 3 != 0 and  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "error: input array length must be a multiple of 3";  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$num_elems < 1		and return "";	# no elements, we're done  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
500
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $msg = '';			# xxx needed?  | 
| 
501
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my ($name, $value, $n, $v);  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# We know there must be at least 3 elements, so it's safe to check  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# the special first triple (index 2 is the only one we look at now)  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# for an initial unlabeled record element (non-standard ANVL).  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If we find something, we make up the name, '_'.  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
508
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if ($$r_elems[2]) {		# first triple is special  | 
| 
509
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		$name = '_';  | 
| 
510
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		! defined $$r_hash{$name} and  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$$r_hash{$name} = [ 0 ]		# initialize array  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		or  | 
| 
513
 | 
1
 | 
  
 50
  
 | 
  
 50
  
 | 
 
 | 
 
 | 
12
 | 
 			push @{ $$r_hash{$name} }, 0	# add to array  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
516
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	for ($n = 3; $n < $num_elems; $n += 3) {  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		$name = $$r_elems[$n + 1];  | 
| 
519
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		! defined $$r_hash{$name} and  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$$r_hash{$name} = [ $n ]	# initialize array  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		or  | 
| 
522
 | 
4
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
24
 | 
 			push @{ $$r_hash{$name} }, $n	# add to array  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		;  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
525
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return $msg;  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ANVL value split  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx rename to anvl_valarray?  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns empty string on success or string beginning "warning:..."  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # r_svals is reference to an array that will be filled upon return  | 
| 
532
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
0
  
 | 
5052
 | 
 sub anvl_valsplit { my( $value, $r_svals )=@_;  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	! defined($value) and  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "needs an ANVL value";  | 
| 
536
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	ref($r_svals) ne "ARRAY" and  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "2nd arg must reference an array";  | 
| 
538
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	local $_;  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#xxx print "r_svals=$r_svals\n";  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#xxx print "value=$value\n";  | 
| 
542
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	my $warning = "";		# xxx used?  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#my $ret_subvalues = \$_[1];  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Assume value is all on one line and split it.  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#my @svals = split /\|/, $value;  | 
| 
547
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	@$r_svals = split /\|/, $value;  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#$_[1] = \@svals;  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$_ = [ split(/;/, $_) ]		# create array of arrays  | 
| 
550
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 		for (@$r_svals);  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#xxxprint("svals=", join(", ", @$_), "\n")	for (@$r_svals);  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxxx need to look for all 3 levels:  (change spec)  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXXXXX  value ::= one or more svals (sval1 | sval2 | ...)  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXXXXX  sval ::= one or more rvals (rval1 ; rval2 ; ...)  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXXXXX  rval ::= one or more qvals (qval1 (=) qval2 (=) ...)  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#   where s=sub, r=repeated, q=equivalent  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXXXXX  or ?? rval ::= one or more avals (aval1 (=) aval2 (=) ...)  | 
| 
559
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	return $warning ? "warning: $warning" : "";  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Create record hash, elem is key, value is value  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
564
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
2113
 | 
 sub anvl_rechash { my( $record, $r_hash, $strict )=@_;  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
566
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	! defined($record) and  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "needs an ANVL record";  | 
| 
568
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	ref($r_hash) ne "HASH" and  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "2nd arg must reference a hash";  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
571
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $msg = "";  | 
| 
572
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 	my @elems;  | 
| 
573
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	($msg = anvl_recsplit($record, \@elems, $strict)) and  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return "anvl_recsplit: $msg";  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
576
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my ($name, $value);  | 
| 
577
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	while (1) {  | 
| 
578
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$name = shift @elems;  | 
| 
579
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 		last	unless defined $name; 	# nothing left  | 
| 
580
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		$value = shift @elems;  | 
| 
581
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		if (! defined $$r_hash{$name}) {  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Nothing there, so store scalar and continue.  | 
| 
583
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 			$$r_hash{$name} = $value;	# 1st value (non-array)  | 
| 
584
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 			next;  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If we get here there's something's already there.  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Don't overwrite if we're in $strict mode.  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# xxx document this  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
590
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$strict		and next;	# don't overwrite  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# XXXXXxxxx make adding a value policy-driven, eg,  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# "add" could mean (a) replace, (b) push on end array,  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# (c) push on start of array, (d) string concatenation,  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# (d) error.  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# xxx should anvl_rechash save line numbers?  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# xxx should anvl_recsplit save line numbers?  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Whatever is there could be a scalar or an array reference.  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If not a reference, create an anonymous array, put a  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# scalar into it, and refer to the array.  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
603
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $v = $$r_hash{$name};	# add to current  | 
| 
604
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$v = [ $v ]		# make an array if currently  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			unless ref $v;	# there's only one value  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If we get here, we have a reference to an array,  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# possibly empty.  Either way, we can push onto it.  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
610
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @$v, $value;  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
612
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return $msg;  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # [ !"#\$%&'\(\)\*\+,/:;<=>\?@\[\\\]\|\0]  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %anvl_decoding = (  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'sp'  =>  ' ',		# decodes to space (0x20)  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'ex'  =>  '!',		# decodes to ! (0x21)  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'dq'  =>  '"',		# decodes to " (0x22)  | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'ns'  =>  '#',		# decodes to # (0x23)  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'do'  =>  '$',		# decodes to $ (0x24)  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'pe'  =>  '%',		# decodes to % (0x25)  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'am'  =>  '&',		# decodes to & (0x26)  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'sq'  =>  "'",		# decodes to ' (0x27)  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'op'  =>  '(',		# decodes to ( (0x28)  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'cp'  =>  ')',		# decodes to ) (0x29)  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'as'  =>  '*',		# decodes to * (0x2a)  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'pl'  =>  '+',		# decodes to + (0x2b)  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'co'  =>  ',',		# decodes to , (0x2c)  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'sl'  =>  '/',		# decodes to / (0x2f)  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'cn'  =>  ':',		# decodes to : (0x3a)  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'sc'  =>  ';',		# decodes to ; (0x3b)  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'lt'  =>  '<',		# decodes to < (0x3c)  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'eq'  =>  '=',		# decodes to = (0x3d)  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'gt'  =>  '>',		# decodes to > (0x3e)  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'qu'  =>  '?',		# decodes to ? (0x3f)  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'at'  =>  '@',		# decodes to @ (0x40)  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'ox'  =>  '[',		# decodes to [ (0x5b)  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'ls'  =>  '\\',		# decodes to \ (0x5c)  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'cx'  =>  ']',		# decodes to ] (0x5d)  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'vb'  =>  '|',		# decodes to | (0x7c)  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	'nu'  =>  "\0",		# decodes to null (0x00)  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXXX need way to encode newlines (using '\n' in interim)  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %anvl_encoding;  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #%cn :  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #%sc ;  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxxxx handle these separately  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	# XXXX remove %% from erc/anvlspec?  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	'%'   =>  '%pe',	# decodes to % (0x25)  xxxx do this first?  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	'_'   =>  '',		# a non-character used as a syntax shim  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	'{'   =>  '',		# a non-character that begins an expansion block  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	'}'   =>  '',		# a non-character that ends an expansion block  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Takes a single arg.  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anvl_decode {  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
5
 | 
 
 | 
  
 50
  
 | 
  
5
  
 | 
  
0
  
 | 
18
 | 
 	local $_ = shift(@_) || '';  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
664
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	pos() = 0;			# reset \G for $_ just to be safe  | 
| 
665
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	while (/(?=\%\{)/g) {		# lookahead; \G matches just before  | 
| 
666
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 		my $p = pos();		# note \G position before it changes  | 
| 
667
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 		s/\G \%\{ (.*?) \%\}//xs	# 's' modifier makes . match \n  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			or last;	# if no closing brace, skip match  | 
| 
669
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		my $exp_block = $1;	# save removed expansion block  | 
| 
670
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 		$exp_block =~ s/\s+//g;	# strip it of all whitespace  | 
| 
671
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 		pos() = $p;		# revert \G to where we started and  | 
| 
672
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 		s/\G/$exp_block/;	# re-insert changed expansion block  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
674
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	s/\%[}{]//g;			# remove any remaining unmatched  | 
| 
675
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	s/\%_//g;			# xxx %_ -> ''  | 
| 
676
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	s/\%\%/\%pe/g;			# xxx ??? xxxx???  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# decode %XY where XY together don't form a valid pair of hex digits  | 
| 
678
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	s/\%([g-z][a-z]|[a-z][g-z])/$anvl_decoding{$1}/g;  | 
| 
679
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	return $_;  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx encoding should be context-sensitive, eg, name, value  | 
| 
683
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
5515
 | 
 sub anvl_encode { my( $s )=@_;  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXXX just define this in the module??  | 
| 
686
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	unless (%anvl_encoding) {	# one-time definition  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# This just defines an inverse mapping so we can encode.  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$anvl_encoding{$anvl_decoding{$_}} = $_  | 
| 
689
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149
 | 
 			for (keys %anvl_decoding);  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$s =~  | 
| 
692
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	  s/([ !\"#\$\%&'\(\)\*\+,\/:;<=>\?@\[\\\]\|\0])/\%$anvl_encoding{$1}/g;  | 
| 
693
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	return $s;  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # return $name in natural word order, using ANVL inversion points  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # repeat for each final comma present  | 
| 
698
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
0
  
 | 
21
 | 
 sub anvl_name_naturalize { my( $name )=@_;  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
700
 | 
6
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
14
 | 
 	$name ||= '';  | 
| 
701
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	$name =~ /^\s*$/	and return $name;	# empty  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# "McCartney, Paul, Sir,,"  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# a, b, c, d, e,,, -> e d c a, b  | 
| 
705
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	my $prefix = '';  | 
| 
706
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 	while ($name =~ s/,\s*$//) {  | 
| 
707
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
106
 | 
 		$name =~ s/^(.*),\s*([^,]+)(,*$)/$1$3/ and  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$prefix .= $2 . ' ';  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
710
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 	return $prefix . $name;  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
713
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
 sub anvl_summarize { my( @nodes )=@_; }  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXX doesn't this really belong in an ERC.pm module?  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ordered list of kernel element names  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @kernel_labels = qw(  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	who  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	what  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	when  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	where  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	how  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	why  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	huh  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This routine inspects and possibly modifies in place the kind of element  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # array resulting from a call to anvl_recarray(), which splits and ANVL  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # record.  It is useful for transforming short form ERC elements into full  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # form elements, for example, to expand "erc:a|b|c|d" into the equivalent,  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # "erc:\nwho:a\nwhat:b\nwhen:c\nwhere:d".  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It returns the empty string on success, otherwise an error message.  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
735
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
1636
 | 
 sub erc_anvl_expand_array { my( $r_elems )=@_;  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
737
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
60
 | 
 	use File::ANVL;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5380
 | 
    | 
| 
738
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my ($lineno, $name, $value, $msg, @svals, $sval);  | 
| 
739
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $me = 'erc_anvl_expand_array';  | 
| 
740
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $i = 3;		# skip first 3 elems (anvl array preamble)  | 
| 
741
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
  	while (1) {  | 
| 
742
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 		$lineno = $$r_elems[$i++];  | 
| 
743
 | 
21
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
45
 | 
 		$name = $$r_elems[$i++] || '';  | 
| 
744
 | 
21
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
41
 | 
 		$value = $$r_elems[$i++] || '';  | 
| 
745
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 		last	unless defined $lineno;	# end of record  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  		next			# skip unless we have erc-type thing  | 
| 
747
 | 
17
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
53
 | 
 			if ($name ne 'erc' || $value =~ /^\s*$/);  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#if ($name !~ /^erc\b/ || $value =~ /^\s*$/);  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# xxx should do this for full generality  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If here, we have an erc-type thing with a non-empty value.  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
753
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 		($msg = anvl_valsplit($value, \@svals)) and  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return "error: $me: anvl_valsplit: $msg";  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# XXXX only doing straight "erc" (eg, not erc-about)  | 
| 
757
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $j = 0;  | 
| 
758
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		my @extras = ();  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If we exceed known labels, we'll re-use last known label.  | 
| 
760
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		my $unknown = $kernel_labels[$#kernel_labels];  | 
| 
761
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		foreach $sval (@svals) {  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# xxx not (yet) tranferring subvalue structure  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#     to anvl_om or other conversion  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Recall that each $sval is itself a reference to  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# an array of subvalues (often just one element).  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
768
 | 
8
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
65
 | 
 			push @extras,		# trust kernel_labels order  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$lineno,  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$kernel_labels[$j++] || $unknown,  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				join('; ',	# trim ends of subvalues  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					map(m/^\s*(.*?)\s*$/, @$sval)  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				);  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Finally, replace our $value element with '' and append  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# the new extra values we've just expanded.  | 
| 
777
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		splice @$r_elems, $i-1, 1,  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			'',		# replaces $value we just used up  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			@extras;	# adds new elements from $value  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  	}  | 
| 
781
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	return '';			# success  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=============  | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx checkm _in_   obj1 obj2 ...  --> returns noids  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx checkm _out_  id1 id2 ...  --> returns objects  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=============  | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx do metadata scan of object before ingest and confirm with user that  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # the object is correctly identified.  This could even be done remotely.  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Start with informal staff service for depositing objects, returning a  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # short url to a stable object, and not clogging up allstaff inboxes with  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # huge attachments.  Also applies to any number of draft docs for review  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # but in temporary storage (but stable).  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=============  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx do id generator service with 'expiring' ids.  To mint, you tell us  | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # who you are first.  To get a perm. id, you agree to use your minted id  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # and bind it within N months.  We track, and warn you several times  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # until N months as elapsed and then reclaim/recycle the id.  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ############################################  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Output Multiplexer routines  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ############################################  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #$erc = "erc: Smith, J.|The Whole Truth|2004|http://example.com/foo/bar";  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $errmsg = File::ERC::erc_anvl2erc_turtle ($erc, $rec);  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $errmsg and  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	print("$errmsg\n")  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    or  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	print("turtle record:\n$rec\n")  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    ;  | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx anvl_fmt not consistent with om_anvl!  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Input file(s) from ARGV.  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
818
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
 sub anvl_om { my( $om, $o, $get_anvl )		= (shift, shift, shift);  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
820
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return "anvl_om: 1st arg not an OM object"  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if ref($om) !~ /^File::OM::/;  | 
| 
822
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $p = $om->{outhandle};	# whether 'print' status or small  | 
| 
823
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	$o ||= anvl_opt_defaults();  | 
| 
824
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	$get_anvl ||= File::ANVL::make_get_anvl();	# xxx set input here?  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX test return value!  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
827
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $s = '';			# output strings are returned to $s  | 
| 
828
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $st = $p ? 1 : '';		# returns (stati or strings) accumulate  | 
| 
829
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($msg, $allmsgs, $anvlrec, $lineno, $name, $value, $pat, $n, $nmax);  | 
| 
830
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my (%rechash, $ne, $nemax, $elem_name);	# for alt. element ordering  | 
| 
831
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $r_elem_order = $$o{elem_order};  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
833
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$s = $om->ostream();		# open stream  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# This next line is a fast and compact (if cryptic) way to  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# accumulate $om->method calls.  Used after each method call, it  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# concatenates strings or ANDs up print statuses, depending on the  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# outhandle setting.  It makes several appearances in this routine.  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
840
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	$p and ($st &&= $s), 1 or ($st .= $s);	# accumulate method returns  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Numbers: record, element in record, and start line  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
844
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($startline, $recnum, $elemnum) = (1, 0, 0);  | 
| 
845
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my ($wslines, $rrlines);  | 
| 
846
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $r_elems = $om->{elemsref};		# abbreviation  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# xxx is that reference kosher?  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
849
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	while (1) {  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Get an ANVL record and count lines therein.  ANVL  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# records can come from anywhere, but typically from  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# a file (read in "paragraph" mode) or a BDB database.  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
855
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$anvlrec = trimlines(&$get_anvl(), \$wslines, \$rrlines);  | 
| 
856
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$startline += $wslines;  | 
| 
857
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		last		unless $anvlrec;  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
859
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$recnum++;		# increment record counter  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =for later  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX anvl_recarray is expensive, do we _need_ to do it if the output is  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     also in anvl?  Maybe call modified [2] here so "find" can work?  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (ref($om) eq 'File::OM::ANVL' and ! $r_elem_order) {  | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# xxx do quick expand (short->long erc) here?  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#     xxx _will_ disturb input line numbering  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$$o{find} and ($anvlrec !~ /$$o{find}/m) and  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				next;		# no output has occurred  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# xxx do quick check for 'show' and next  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XXXXXXXX must define lineno for verbose case  | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$s = $om->anvl_rec($anvlrec, $startline, $rrlines);  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$p and ($st &&= $s), 1 or ($st .= $s);  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			next;  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
875
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$msg = anvl_recarray($anvlrec, $r_elems, $startline, $o);  | 
| 
876
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$msg =~ /^error/	and return "anvl_recarray: $msg";  | 
| 
877
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$msg eq "" or  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#print $msg, "\n";  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#$o->{verbose} && print $msg, "\n";  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$allmsgs .= $msg . "\n";	# save other message  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# NB: apply 'find' here before possible expansion, which  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# means that a pattern like "who:\s*smith" won't work on  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# on a short form ANVL record.  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
886
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		$$o{find} and ($anvlrec !~ /$$o{find}/m) and  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			next;		# no output has occurred  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# If caller has set $$o{elemsproc} to a code reference,  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# it is called to process the element array just returned  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# from anvl_recarray.  Typically this is used to convert  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# (with erc_anvl_expand_array) short form ERCs to long  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# form ERCs.  As an optimization, the code is not called  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# if $$o{elemsprocpat} (typically, "erc") is set and  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# doesn't match the raw record string.  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
897
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		if (ref($$o{elemsproc}) eq "CODE" and	# if code and either  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			(! ($pat = $$o{elemsprocpat}))	# no pattern or  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				|| $anvlrec =~ $pat) {	# the pattern matches  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # [2] XXX can we call elemsproc directly on the $anvlrec? so we don't need  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     to call expensive anvl_recarray first?  | 
| 
903
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			($msg = &{$$o{elemsproc}}($r_elems)) and  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				return "File::ANVL::elemsproc: $msg";  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
906
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		ref($om) eq 'File::OM::Turtle' and  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			turtle_set_subject($om, $anvlrec);  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# The orec method is given first crack at a new record.  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# It sets and/or clears a number of values for keys (eg,  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# for turtle, $$o{subject}).  $recnum is useful for  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# outputting json separators (eg, no comma if $recnum eq 1)  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# or record numbers in comments (eg, if $$o{verbose}).  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# $startline is useful for parser diagnostics (eg, "error  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# on line 5").  $r_elems and $r_elem_order are needed for  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# discovering what elements will populate CSV/PSV records.  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX this next isn't needed if output is anvl ?!  (assuming final NL is  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # written when closing the record  | 
| 
920
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$s = $om->orec($recnum, $startline, $r_elems, $r_elem_order);  | 
| 
921
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		$p and ($st &&= $s), 1 or ($st .= $s);  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
923
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if ($r_elem_order) {  | 
| 
924
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			undef %rechash;		# don't want prior indices  | 
| 
925
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			($msg = anvl_arrayhash($r_elems, \%rechash)) and  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				return "anvl_arrayhash: $msg";  | 
| 
927
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$ne = -1;		# index into $$r_elem_order  | 
| 
928
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$nemax = scalar @$r_elem_order;  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} else {  | 
| 
930
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$n = 			# index into $$r_elems  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX don't reference r_elems if we haven't called anvl_recarray  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$$r_elems[2]	# if a no-label value starts  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					? -3	# rec, make sure to output it,  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					: 0;	# else skip it (normal)  | 
| 
935
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$nmax = scalar @$r_elems;  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX if output is to anvl, can we not skip the entire loop below? but  | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # not if it's possible to output anvl _and_ to care about element order  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX but still perform $show check and skip not-shown elems  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX and still perform value inversion if {invert} options  | 
| 
942
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$elemnum = 0;			# true elements, not comments  | 
| 
943
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		undef $name;  | 
| 
944
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		while (1) {  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Select next candidate element.  If we need to  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# output elements in a certain order, consult the  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# hash; otherwise, just use "found" order.  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
950
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			if ($r_elem_order) {	# use specified order  | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
952
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$ne++;  | 
| 
953
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$ne >= $nemax		and last;  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# For CSV and PSV, the element name at this  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# position may be deliberately undefined, or  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# may correspond to a named element missing  | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# in this record, in which case we skip it.  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				#  | 
| 
960
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$elem_name = $r_elem_order->[$ne];  | 
| 
961
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 				! defined($elem_name) || ! defined(  | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					#XXX ignore multiple instances for now  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					$n = $rechash{$elem_name}->[0]  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				    ) and  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# for CSV/PSV, output an empty element  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					next;  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			} else {		# use natural array order  | 
| 
969
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$n += 3;  | 
| 
970
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$n >= $nmax		and last;  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If we get here, $n is defined.  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
974
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$lineno = $$r_elems[$n];  | 
| 
975
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$name = $n < 3		# for special first triple  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				? '_'		# use synthesized name '_'  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				: $$r_elems[$n + 1];	# else real name  | 
| 
978
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			$value = $$r_elems[$n + 2] || "";  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
980
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$elemnum++		unless $name eq '#';  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Skip if 'show' given and not requested.  | 
| 
983
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			$$o{show} and ("$name: $value" !~ /$$o{show}/m) and  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				(undef $name),	# cause elem to be skipped  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				next;  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Instead of $om->oelem, $om->celem, $om->contelem,   | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# combine open and close into one, but first  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# naturalize values if called upon.  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#  | 
| 
991
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			$$o{invert} and $value =~ /,\s*$/ and  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$value = anvl_name_naturalize($value);  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		continue {  | 
| 
995
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$s = $om->elem($name, $value, $lineno);  | 
| 
996
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			$p and ($st &&= $s), 1 or ($st .= $s);  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
997
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			undef $name;		# clean the slate  | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
999
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$s = $om->crec($recnum);  | 
| 
1000
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		$p and ($st &&= $s), 1 or ($st .= $s);  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	continue {  | 
| 
1003
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$startline += $rrlines;  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XXX currently doing nothing with $allmsgs warnings!  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#     should probably print if verbose mode on  | 
| 
1007
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$s = $om->cstream();  | 
| 
1008
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	$p and ($st &&= $s), 1 or ($st .= $s);  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1010
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $st;  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx document all om options  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx should om also have a recstring slot (for anvlrec)?  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # xxx pass in turtle_nosubject (default)?  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub turtle_set_subject {  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1018
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
 	my ($om, $anvlrec) = (shift, shift);  | 
| 
1019
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $r_elems = $om->{elemsref};  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# In order to find the subject element for Turtle/RDF  | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# assertions, we need an element name pattern.  If one is  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# defined in $om->{turtle_subjelpat}, use it.  If it's undefined,  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# per-record code will use 'where' if it thinks the record  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# is an ERC, or use 'identifier|subject' as a last resort.  | 
| 
1026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# If no element matching subjelpat is found, $om->{subject}  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# will default to $om->{turtle_nosubject}.  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
1029
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	my $subjpat = $om->{turtle_subjelpat} ||  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		($anvlrec =~ /^erc\s*:/m  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? "^where\$" :	# 1st where in an 'erc', or  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		($anvlrec =~ /^(identifier|subject)\s*:/m  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? "^$1\$" :	# 1st identifier or subject,  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		($anvlrec =~ /^(.+)\s*:\s*(\n\s+)*\w/  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? "^$1\$" :	# or 1st non-empty element  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'')));			# or nothing (always matches)  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Now find a 'subject' for our Turtle/RDF assertions.  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  | 
| 
1040
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $j = 1;	# element names in positions 1, 4, 7, ...  | 
| 
1041
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	1 while ($j < $#$r_elems and			# quickly find it  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		@$r_elems[$j] !~ $subjpat and ($j += 3));  | 
| 
1043
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	$om->{subject} = $j < $#$r_elems && $subjpat ?	# if found,  | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			@$r_elems[$j + 1] :		# use associated value  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$om->{turtle_nosubject};	# else use default  | 
| 
1046
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $om->{subject};  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |