line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# bibliography package for Perl |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# utility subroutines |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Dana Jacobsen (dana@acm.org) |
7
|
|
|
|
|
|
|
# 11 January 1995 |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package bp_util; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
###### |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$opt_complex = 1; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# The global key registry. |
17
|
|
|
|
|
|
|
%glb_keyreg = (); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# |
20
|
|
|
|
|
|
|
# mname_to_canon takes a name string and returns it back as a Canonical name. |
21
|
|
|
|
|
|
|
# |
22
|
|
|
|
|
|
|
# Example input: |
23
|
|
|
|
|
|
|
# |
24
|
|
|
|
|
|
|
# John von Jones, Jr., Ed Krol, Ludwig von Beethoven |
25
|
|
|
|
|
|
|
# |
26
|
|
|
|
|
|
|
# output: |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
# Jones,von,John,Jr./Krol,Ed,/Beethoven,von,Ludwig, |
29
|
|
|
|
|
|
|
# |
30
|
|
|
|
|
|
|
# (the actual seperators are $cs_sep for '/' and $cs_sep2 for ',') |
31
|
|
|
|
|
|
|
# |
32
|
|
|
|
|
|
|
# This is a total heuristic hack, and if you know where names are split, |
33
|
|
|
|
|
|
|
# use multiple calls to name_to_canon instead. Use this routine if you |
34
|
|
|
|
|
|
|
# expect the input to be some sort of free-form such that you can't |
35
|
|
|
|
|
|
|
# easily seperate the names yourself. |
36
|
|
|
|
|
|
|
# |
37
|
|
|
|
|
|
|
# This routine assumes there can be multiple authors per line, seperated by |
38
|
|
|
|
|
|
|
# "and" or commas, and it's going to try to guess how to break them up, |
39
|
|
|
|
|
|
|
# given that it can get "name1, name2, jr, name3" as a 3 name string with |
40
|
|
|
|
|
|
|
# "name2, jr" as the second name. This method precludes the ability to |
41
|
|
|
|
|
|
|
# also correctly parse "last, first" format strings. If that is the format |
42
|
|
|
|
|
|
|
# your string is in, call the function with a "1" as the second argument. |
43
|
|
|
|
|
|
|
# |
44
|
|
|
|
|
|
|
# Note that no-break-space ("tie", ~ in TeX, \0 in troff) is \240. |
45
|
|
|
|
|
|
|
# |
46
|
|
|
|
|
|
|
sub mname_to_canon { |
47
|
0
|
|
|
0
|
|
0
|
local($allnames, $revauthor) = @_; |
48
|
0
|
|
|
|
|
0
|
local($firstn, $vonn, $lastn, $jrn); |
49
|
0
|
|
|
|
|
0
|
local(@names, $name, $oname, $nname, $rest); |
50
|
0
|
|
|
|
|
0
|
local(@cnames) = (); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Squeeze all spaces into one space. |
53
|
0
|
|
|
|
|
0
|
$allnames =~ s/\s+/ /g; |
54
|
|
|
|
|
|
|
# remove any beginning and trailing ands. |
55
|
0
|
|
|
|
|
0
|
$allnames =~ s/^and //; |
56
|
0
|
|
|
|
|
0
|
$allnames =~ s/ and$//; |
57
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
0
|
@names = split(/,? and /, $allnames); |
59
|
0
|
|
|
|
|
0
|
while (@names) { |
60
|
0
|
|
|
|
|
0
|
$oname = $name = shift @names; |
61
|
0
|
|
|
|
|
0
|
$firstn = $vonn = $lastn = $jrn = ''; |
62
|
|
|
|
|
|
|
# name has no spaces at beginning or end |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# squeeze all spaces around commas. They aren't telling us anything that |
65
|
|
|
|
|
|
|
# we can rely on, and it simplifies matching. Also combine them. |
66
|
0
|
|
|
|
|
0
|
$name =~ s/,+/,/g; |
67
|
0
|
|
|
|
|
0
|
$name =~ s/ ,/,/g; |
68
|
0
|
|
|
|
|
0
|
$name =~ s/, /,/g; |
69
|
|
|
|
|
|
|
|
70
|
0
|
0
|
0
|
|
|
0
|
if ( $revauthor && ($name =~ /,/) ) { |
71
|
0
|
0
|
|
|
|
0
|
if ($name =~ s/[, ]+([sj]r\.?|I+)$//i) { |
72
|
0
|
|
|
|
|
0
|
$jrn = ",$1"; |
73
|
|
|
|
|
|
|
} |
74
|
0
|
|
|
|
|
0
|
$name =~ s/^(.*),(.*)/$2 $1$jrn/g; |
75
|
|
|
|
|
|
|
# name has no spaces at beg or end |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
0
|
$name =~ s/[ \240]+([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)(,|$)/,$1/i; |
79
|
0
|
|
|
|
|
0
|
($nname, $rest, $jrn) = split(/,([^\240])/, $name, 2); |
80
|
0
|
0
|
|
|
|
0
|
$jrn = (defined $jrn) ? "$rest$jrn" : ''; |
81
|
|
|
|
|
|
|
#$jrn =~ s/,+$//; |
82
|
|
|
|
|
|
|
# nname has no spaces at beg or end. |
83
|
|
|
|
|
|
|
# jrn has no spaces at beg or end. |
84
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ / /) { |
85
|
0
|
|
|
|
|
0
|
($jrn, $rest) = $jrn =~ /([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)?,?(.*)$/i; |
86
|
0
|
0
|
|
|
|
0
|
unshift(@names, $rest) if defined $rest; |
87
|
0
|
0
|
|
|
|
0
|
$jrn = '' unless defined $jrn; |
88
|
|
|
|
|
|
|
} |
89
|
0
|
|
|
|
|
0
|
($firstn) = $nname =~ /^((\S* )*)/; |
90
|
0
|
|
|
|
|
0
|
$nname = substr($nname, length($firstn)); |
91
|
|
|
|
|
|
|
# nname has no spaces at beg or end. |
92
|
0
|
|
|
|
|
0
|
$lastn = $nname; |
93
|
0
|
|
|
|
|
0
|
$lastn =~ s/\240+/ /g; |
94
|
0
|
|
|
|
|
0
|
$firstn =~ s/\240+/ /g; |
95
|
0
|
|
|
|
|
0
|
$jrn =~ s/\240+/ /g; |
96
|
0
|
|
|
|
|
0
|
while ($firstn =~ / ([a-z]+ )$/) { |
97
|
0
|
|
|
|
|
0
|
$rest = $1; |
98
|
0
|
|
|
|
|
0
|
substr($vonn, 0, 0) = $rest; |
99
|
|
|
|
|
|
|
# XXXXX removed " - 1" from position argument |
100
|
0
|
|
|
|
|
0
|
substr($firstn, length($firstn) - length($rest)) = ''; |
101
|
|
|
|
|
|
|
} |
102
|
0
|
|
|
|
|
0
|
while ($lastn =~ /^([a-z]+ )/) { |
103
|
0
|
|
|
|
|
0
|
$rest = $1; |
104
|
0
|
|
|
|
|
0
|
$vonn .= $rest; |
105
|
0
|
|
|
|
|
0
|
$lastn = substr($lastn, length($rest)); |
106
|
|
|
|
|
|
|
} |
107
|
0
|
|
|
|
|
0
|
$vonn =~ s/\s+$//; |
108
|
0
|
|
|
|
|
0
|
$firstn =~ s/\s+$//; |
109
|
|
|
|
|
|
|
#print STDERR ":$vonn:$lastn:$firstn:$jrn:\n"; |
110
|
|
|
|
|
|
|
|
111
|
0
|
0
|
|
|
|
0
|
if ($jrn) { |
112
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^(et\.? ?al\.?)|(others)$/i) { |
113
|
0
|
|
|
|
|
0
|
$jrn = ''; |
114
|
0
|
|
|
|
|
0
|
unshift(@names, "et al."); |
115
|
|
|
|
|
|
|
} |
116
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^inc[\.]?$/i) { |
117
|
0
|
|
|
|
|
0
|
$lastn .= ", " . $jrn; |
118
|
0
|
|
|
|
|
0
|
$jrn = ''; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} |
121
|
0
|
0
|
|
|
|
0
|
if ($lastn =~ /^(et ?al)|(others)$/i) { |
122
|
0
|
|
|
|
|
0
|
$lastn = "et al."; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
push( @cnames, join($bib'cs_sep2, $lastn, $vonn, $firstn, $jrn) ); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep, @cnames ); |
129
|
0
|
|
|
|
|
0
|
$name =~ s/\s+$//; |
130
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# remove any spaces before and after parts of names. |
133
|
0
|
|
|
|
|
0
|
1 while $name =~ s/ ${bib'cs_sep2}/${bib'cs_sep2}/go; |
134
|
0
|
|
|
|
|
0
|
1 while $name =~ s/${bib'cs_sep2} /${bib'cs_sep2}/go; |
135
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
$name; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
######### |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# |
142
|
|
|
|
|
|
|
# name_to_canon takes a _single_ name and returns it back as a Canonical name. |
143
|
|
|
|
|
|
|
# |
144
|
|
|
|
|
|
|
# This will be faster than mname_to_canon. I also wrote it for bp, and |
145
|
|
|
|
|
|
|
# mname_to_canon is full of weird TeX things from r2b. |
146
|
|
|
|
|
|
|
# |
147
|
|
|
|
|
|
|
# Note that there are a few differences between the two. Notably, that |
148
|
|
|
|
|
|
|
# we only break out a von if it is space seperated -- a nbsp (tie) will |
149
|
|
|
|
|
|
|
# prevent us from breaking it. Note that nbsp => \240. |
150
|
|
|
|
|
|
|
# |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub name_to_canon { |
153
|
0
|
|
|
0
|
|
0
|
local($name, $revauthor) = @_; |
154
|
0
|
|
|
|
|
0
|
local($first, $last, $von, $jrn); |
155
|
|
|
|
|
|
|
|
156
|
0
|
0
|
|
|
|
0
|
&bib'panic("name_to_canon called with no arguments") unless defined $name; |
157
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
159
|
0
|
|
|
|
|
0
|
$name =~ s/ $//; |
160
|
0
|
|
|
|
|
0
|
$von = ''; $jrn = ''; |
|
0
|
|
|
|
|
0
|
|
161
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($name =~ s/[, ]+([sj]r\.?|I+)$//i) { |
163
|
0
|
|
|
|
|
0
|
$jrn = $1; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
# name has no space at end |
166
|
|
|
|
|
|
|
# jrn has no space at beg or end |
167
|
0
|
0
|
0
|
|
|
0
|
if ( $revauthor && ($name =~ /,/) ) { |
168
|
0
|
|
|
|
|
0
|
$name =~ s/^(.*)\s*,\s*(.*)/$2 $1/g; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
# strip off Jr., but leave "Hunt,\0Jr." alone. |
171
|
0
|
0
|
0
|
|
|
0
|
if (($name =~ /,/) && ($name !~ /,\240/) ) { |
172
|
|
|
|
|
|
|
# XXXXX Check the logic here |
173
|
0
|
0
|
|
|
|
0
|
if (!$revauthor) { |
174
|
0
|
0
|
|
|
|
0
|
if ($jrn) { |
175
|
|
|
|
|
|
|
# possibly reversed? |
176
|
0
|
|
|
|
|
0
|
local($newname) = &name_to_canon($name, 'reverse'); |
177
|
0
|
0
|
|
|
|
0
|
if (defined $newname) { |
178
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Names are in reverse order?"); |
179
|
0
|
|
|
|
|
0
|
return $newname; |
180
|
|
|
|
|
|
|
} else { |
181
|
0
|
|
|
|
|
0
|
&bib'goterror("name_to_canon already got jr!"); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
} else { |
184
|
0
|
0
|
|
|
|
0
|
&bib'goterror("Names seem to be reversed!") if $jrn; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
} |
187
|
0
|
|
|
|
|
0
|
($name, $jrn) = split(/ ?, ?/, $name, 2); |
188
|
|
|
|
|
|
|
} |
189
|
0
|
0
|
|
|
|
0
|
if ($name =~ / /) { |
190
|
0
|
|
|
|
|
0
|
($first, $last) = $name =~ /(.*) (\S*)$/; |
191
|
|
|
|
|
|
|
} else { |
192
|
0
|
|
|
|
|
0
|
$first = ''; |
193
|
0
|
|
|
|
|
0
|
$last = $name; |
194
|
|
|
|
|
|
|
} |
195
|
0
|
0
|
|
|
|
0
|
if ($first =~ / ([a-z].*)$/) { |
196
|
0
|
|
|
|
|
0
|
$von = $1; |
197
|
0
|
|
|
|
|
0
|
$von =~ s/\240/ /g; |
198
|
0
|
|
|
|
|
0
|
substr($first, length($first)-length($von)-1) = ''; |
199
|
|
|
|
|
|
|
#$first =~ s/ $von//; |
200
|
|
|
|
|
|
|
} |
201
|
0
|
|
|
|
|
0
|
while ($last =~ /^([a-z]+)\240/) { |
202
|
0
|
|
|
|
|
0
|
$von .= " $1"; |
203
|
0
|
|
|
|
|
0
|
substr($last, 0, length($1)+1) = ''; |
204
|
|
|
|
|
|
|
} |
205
|
0
|
|
|
|
|
0
|
$von =~ s/^ //; |
206
|
0
|
|
|
|
|
0
|
$last =~ s/\240/ /g; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
#print STDERR ":$last:$von:$first:$jrn:\n"; |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep2, $last, $von, $first, $jrn); |
211
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
0
|
$name =~ s/\s+$//; |
213
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
214
|
|
|
|
|
|
|
# remove spaces before and after seperators. |
215
|
0
|
|
|
|
|
0
|
1 while $name =~ s/ ${bib'cs_sep2}/${bib'cs_sep2}/go; |
216
|
0
|
|
|
|
|
0
|
1 while $name =~ s/${bib'cs_sep2} /${bib'cs_sep2}/go; |
217
|
|
|
|
|
|
|
|
218
|
0
|
0
|
|
|
|
0
|
if ($opt_complex > 1) { |
219
|
0
|
|
|
|
|
0
|
($last, $von, $first, $jrn) = split($bib'cs_sep2, $name); |
220
|
|
|
|
|
|
|
# Look for corporations |
221
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^Inc\.$/i) { |
222
|
0
|
|
|
|
|
0
|
$jrn = ''; |
223
|
0
|
|
|
|
|
0
|
$last = $last . ", Inc."; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
# put it back together |
226
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep2, $last, $von, $first, $jrn); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
0
|
$name; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# This routine turns a name string (possibly containing multiple names) in |
233
|
|
|
|
|
|
|
# canon format into a string suitable for output. |
234
|
|
|
|
|
|
|
# |
235
|
|
|
|
|
|
|
# The styles supported are: |
236
|
|
|
|
|
|
|
# |
237
|
|
|
|
|
|
|
# bibtex First von Last [or] von Last, First [or] von Last, Jr, First |
238
|
|
|
|
|
|
|
# |
239
|
|
|
|
|
|
|
# plain First von Last, Jr |
240
|
|
|
|
|
|
|
# |
241
|
|
|
|
|
|
|
# reverse von Last, First, Jr |
242
|
|
|
|
|
|
|
# |
243
|
|
|
|
|
|
|
# reverse2 Last, First von, Jr |
244
|
|
|
|
|
|
|
# |
245
|
|
|
|
|
|
|
# lname1 von Last, Jr, First [for first author] |
246
|
|
|
|
|
|
|
# First von Last [for subsequesent authors] |
247
|
|
|
|
|
|
|
# |
248
|
|
|
|
|
|
|
# XXXXX |
249
|
|
|
|
|
|
|
# |
250
|
|
|
|
|
|
|
# What we should do instead is have a more general solution. We could specify |
251
|
|
|
|
|
|
|
# names in the above sort of format, and have it parse that. But then how do |
252
|
|
|
|
|
|
|
# we handle BibTeX, which will make decisions based on what fields exist? But |
253
|
|
|
|
|
|
|
# for most of these, something like "FvL,J" or "vL,F,J" or "L,Fv,J" would work. |
254
|
|
|
|
|
|
|
# |
255
|
|
|
|
|
|
|
# Also, we really need a generic output form, that handles more subtle |
256
|
|
|
|
|
|
|
# variations, like when to put "et al." in place of 150 names, and a different |
257
|
|
|
|
|
|
|
# separator for the last name (", and " instead of ", "), initials, and so on. |
258
|
|
|
|
|
|
|
# |
259
|
|
|
|
|
|
|
# XXXXX Check out bibtex parsing. We look for a space, but we've tied all |
260
|
|
|
|
|
|
|
# spaces already! |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub canon_to_name { |
263
|
120
|
|
|
120
|
|
249
|
local($cname, $how) = @_; |
264
|
120
|
|
|
|
|
188
|
local(@names); |
265
|
120
|
|
|
|
|
127
|
local($name); |
266
|
120
|
|
|
|
|
174
|
local($n, $von, $last, $jr, $first); |
267
|
120
|
|
|
|
|
180
|
local($namenum) = 0; |
268
|
|
|
|
|
|
|
|
269
|
120
|
50
|
|
|
|
232
|
&bib'panic("canon_to_name called with no arguments") unless defined $cname; |
270
|
120
|
50
|
|
|
|
194
|
$how = 'bibtex' unless defined $how; |
271
|
|
|
|
|
|
|
|
272
|
120
|
|
|
|
|
407
|
foreach $name ( split(/$bib'cs_sep/o, $cname) ) { |
273
|
304
|
|
|
|
|
318
|
$namenum++; |
274
|
304
|
|
|
|
|
878
|
($last, $von, $first, $jr) = split(/$bib'cs_sep2/o, $name, 4); |
275
|
304
|
|
|
|
|
468
|
$last =~ s/ /\240/g; |
276
|
304
|
|
|
|
|
333
|
$von =~ s/ /\240/g; |
277
|
304
|
50
|
|
|
|
1005
|
if ($how =~ /^bibtex/) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# Turn ties back into spaces. |
279
|
0
|
|
|
|
|
0
|
$last =~ s/([^,])\240/$1 /g; |
280
|
0
|
|
|
|
|
0
|
$von =~ s/\240([a-z])/ $1/g; |
281
|
|
|
|
|
|
|
# Do the minimal amount of commas |
282
|
0
|
0
|
0
|
|
|
0
|
if ($jr) { |
|
|
0
|
|
|
|
|
|
283
|
0
|
|
|
|
|
0
|
$n = $von . ' ' . $last . ', ' . $jr . ', ' . $first; |
284
|
|
|
|
|
|
|
} elsif ( ($last =~ /\S\s+\S/) && ($last !~ /^{.*}$/) ) { |
285
|
0
|
|
|
|
|
0
|
$n = $von . ' ' . $last . ', ' . $first; |
286
|
|
|
|
|
|
|
} else { |
287
|
0
|
|
|
|
|
0
|
$n = join(' ', $first, $von, $last); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} elsif ($how =~ /^plain/) { |
290
|
|
|
|
|
|
|
# plain: "First von Last, Jr" for each name |
291
|
304
|
|
|
|
|
464
|
$n = $first; |
292
|
304
|
100
|
|
|
|
531
|
$n .= " $von " if $von; |
293
|
304
|
100
|
|
|
|
689
|
$n .= " $last" if $last; |
294
|
304
|
50
|
|
|
|
557
|
$n .= ", $jr" if $jr; |
295
|
|
|
|
|
|
|
} elsif ($how =~ /^reverse2/) { |
296
|
|
|
|
|
|
|
# This is "Last, First von, Jr." order. |
297
|
0
|
|
|
|
|
0
|
$n = "$last"; |
298
|
0
|
0
|
0
|
|
|
0
|
$n .= "," if ($first || $von || $jr); |
|
|
|
0
|
|
|
|
|
299
|
0
|
0
|
|
|
|
0
|
$n .= " $first" if $first; |
300
|
0
|
0
|
|
|
|
0
|
$n .= " $von" if $von; |
301
|
0
|
0
|
|
|
|
0
|
$n .= ", $jr" if $jr; |
302
|
|
|
|
|
|
|
} elsif ($how =~ /^reverse/) { |
303
|
|
|
|
|
|
|
# This is "von Last, First, Jr." order. |
304
|
0
|
|
|
|
|
0
|
$n = "$von $last"; |
305
|
0
|
0
|
0
|
|
|
0
|
$n .= ", $first" if ($first || $jr); |
306
|
0
|
0
|
|
|
|
0
|
$n .= ", $jr" if $jr; |
307
|
|
|
|
|
|
|
} elsif ($how =~ /^lname1/) { |
308
|
|
|
|
|
|
|
# lname1 : First author has last name first, the rest are in normal order. |
309
|
|
|
|
|
|
|
# Personally I hate this style, but its common in ecology. |
310
|
0
|
0
|
|
|
|
0
|
$last .= ", $jr" if $jr; |
311
|
0
|
0
|
|
|
|
0
|
if ($namenum == 1) { |
312
|
0
|
0
|
|
|
|
0
|
$last = join(' ', $von, $last) if ($von); |
313
|
0
|
0
|
|
|
|
0
|
if ($first) { |
314
|
0
|
|
|
|
|
0
|
$n = join(', ', $last, $first); |
315
|
|
|
|
|
|
|
} else { |
316
|
0
|
|
|
|
|
0
|
$n = $last; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} else { |
319
|
0
|
|
|
|
|
0
|
$n = join(' ', $first, $von, $last); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
# unknown name style |
322
|
|
|
|
|
|
|
} else { |
323
|
0
|
|
|
|
|
0
|
return &bib'goterror("canon_to_name doesn't know form: $how"); |
324
|
|
|
|
|
|
|
} |
325
|
304
|
|
|
|
|
381
|
$n =~ s/ \240/ /g; |
326
|
304
|
|
|
|
|
463
|
$n =~ s/^\s+//; |
327
|
304
|
|
|
|
|
671
|
$n =~ s/\s+$//; |
328
|
304
|
|
|
|
|
927
|
$n =~ s/\s+/ /g; |
329
|
304
|
|
|
|
|
688
|
push(@names, $n); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
120
|
50
|
|
|
|
267
|
if (wantarray) { |
333
|
0
|
|
|
|
|
0
|
@names; |
334
|
|
|
|
|
|
|
} else { |
335
|
|
|
|
|
|
|
# They want the complete string accoring to the style they asked for. |
336
|
120
|
50
|
|
|
|
572
|
if ($how =~ /lname1|plain/) { |
337
|
120
|
100
|
|
|
|
316
|
if (@names <= 2) { |
338
|
64
|
|
|
|
|
142
|
$n = join(' and ', @names); |
339
|
|
|
|
|
|
|
} else { |
340
|
56
|
|
|
|
|
103
|
$lname = pop(@names); |
341
|
56
|
|
|
|
|
206
|
$n = join(', ', @names) . ', and ' . $lname; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} else { |
344
|
0
|
|
|
|
|
0
|
$n = join(' and ', @names); |
345
|
|
|
|
|
|
|
} |
346
|
120
|
|
|
|
|
710
|
$n; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# XXXXX Obsolete? |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub parsename { |
353
|
0
|
|
|
0
|
|
0
|
local($name, $how) = @_; |
354
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
0
|
&canon_to_name( &mname_to_canon($name), $how); |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
######### |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# |
362
|
|
|
|
|
|
|
# parsedate takes a date and returns a list of month, year. |
363
|
|
|
|
|
|
|
# |
364
|
|
|
|
|
|
|
# taken from r2b |
365
|
|
|
|
|
|
|
# |
366
|
|
|
|
|
|
|
# date looks like month dec year |
367
|
|
|
|
|
|
|
# -------------------------------- ------------------- -- --------------- |
368
|
|
|
|
|
|
|
# 1984 84 1984 |
369
|
|
|
|
|
|
|
# 1974-1975 74 1974-1975 |
370
|
|
|
|
|
|
|
# August 1984 aug 84 1984 |
371
|
|
|
|
|
|
|
# May 1984 May 1984 may 84 1984 |
372
|
|
|
|
|
|
|
# 1976 November nov 76 1976 |
373
|
|
|
|
|
|
|
# 1976 November 1976 nov 76 1976 |
374
|
|
|
|
|
|
|
# 21 August 1984 {21 August} 84 1984 |
375
|
|
|
|
|
|
|
# August 18-21, 1984 {August 18-21} 84 1984 |
376
|
|
|
|
|
|
|
# 18-21 August 1991 {18-21 August} 91 1991 |
377
|
|
|
|
|
|
|
# July 31-August 4, 1984 1984 {July 31-August 4} 84 1984 |
378
|
|
|
|
|
|
|
# July-August 1980 {July-August} 80 1980 |
379
|
|
|
|
|
|
|
# February 1984 (revised May 1991) feb 84 1984 |
380
|
|
|
|
|
|
|
# Winter 1990 {Winter} 90 1990 |
381
|
|
|
|
|
|
|
# 1988 (in press) 88 1988 (in press) |
382
|
|
|
|
|
|
|
# to appear ?? to appear |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
sub parsedate { |
385
|
0
|
|
|
0
|
|
0
|
local($date) = @_; |
386
|
0
|
|
|
|
|
0
|
local($year) = undef; |
387
|
0
|
|
|
|
|
0
|
local($month); |
388
|
0
|
|
|
|
|
0
|
local($old_date) = $date; |
389
|
|
|
|
|
|
|
|
390
|
0
|
0
|
|
|
|
0
|
return (undef, undef) unless defined $date; |
391
|
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
0
|
$date =~ s/(\S+)\s+(\d+)\s+\1\s+\2/$1 $2/; # handle duplicate dates |
393
|
0
|
|
|
|
|
0
|
$date =~ s/^\s*(\d\d\d+)\s+(\S+)/$2 $1/; # handle 1976 November |
394
|
0
|
|
|
|
|
0
|
while ($date =~ /\s*[(]?((\d\d\d\d[-\/])?\d\d\d\d)[).]?\s*(\(.*\))?$/) { |
395
|
0
|
|
|
|
|
0
|
$year = $1; |
396
|
0
|
|
|
|
|
0
|
$date =~ s/,?\s*[(]?(\d\d\d\d[-\/])?\d\d\d\d[).]?\s*(\(.*\))?$//; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
0
|
|
|
|
|
0
|
$month = &canon_month($date); |
400
|
|
|
|
|
|
|
|
401
|
0
|
0
|
0
|
|
|
0
|
if ($month !~ /\S/) { |
|
|
0
|
|
|
|
|
|
402
|
0
|
|
|
|
|
0
|
undef $month; |
403
|
|
|
|
|
|
|
} elsif ( (!defined $year) && ($month eq $date) ) { |
404
|
0
|
|
|
|
|
0
|
$year = $old_date; |
405
|
0
|
|
|
|
|
0
|
undef $month; |
406
|
|
|
|
|
|
|
} |
407
|
0
|
|
|
|
|
0
|
($month, $year); |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
%month_table = ( |
411
|
|
|
|
|
|
|
'apr', 'April', |
412
|
|
|
|
|
|
|
'aug', 'August', |
413
|
|
|
|
|
|
|
'dec', 'December', |
414
|
|
|
|
|
|
|
'feb', 'February', |
415
|
|
|
|
|
|
|
'jan', 'January', |
416
|
|
|
|
|
|
|
'jul', 'July', |
417
|
|
|
|
|
|
|
'jun', 'June', |
418
|
|
|
|
|
|
|
'mar', 'March', |
419
|
|
|
|
|
|
|
'may', 'May', |
420
|
|
|
|
|
|
|
'nov', 'November', |
421
|
|
|
|
|
|
|
'oct', 'October', |
422
|
|
|
|
|
|
|
'sep', 'September', |
423
|
|
|
|
|
|
|
); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
sub canon_month { |
426
|
52
|
|
|
52
|
|
113
|
local($month) = @_; |
427
|
|
|
|
|
|
|
|
428
|
52
|
100
|
|
|
|
229
|
return $month if $month =~ /[\d\/\-]/; |
429
|
|
|
|
|
|
|
|
430
|
30
|
|
|
|
|
83
|
local($canm) = substr($month, 0, 3); |
431
|
|
|
|
|
|
|
|
432
|
30
|
|
|
|
|
62
|
$canm =~ tr/A-Z/a-z/; |
433
|
|
|
|
|
|
|
|
434
|
30
|
50
|
|
|
|
109
|
return $month unless defined $month_table{$canm}; |
435
|
|
|
|
|
|
|
|
436
|
30
|
|
|
|
|
248
|
$canm; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
sub output_month { |
440
|
0
|
|
|
0
|
|
|
local($canm, $how) = @_; |
441
|
0
|
|
|
|
|
|
local($outm) = $month_table{$canm}; |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# we don't know what they have |
444
|
0
|
0
|
|
|
|
|
return $canm unless defined $outm; |
445
|
|
|
|
|
|
|
|
446
|
0
|
0
|
0
|
|
|
|
if ( ($how eq 'short') && (length($outm) > 4) ) { |
447
|
0
|
|
|
|
|
|
substr($outm, 3) = '.'; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# 'long' format |
451
|
0
|
|
|
|
|
|
$outm; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub output_date { |
455
|
0
|
|
|
0
|
|
|
local($mo, $yr, $how) = @_; |
456
|
0
|
|
|
|
|
|
local($date); |
457
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
|
$how = 'short' unless defined $how; |
459
|
|
|
|
|
|
|
|
460
|
0
|
0
|
|
|
|
|
if (defined $mo) { |
461
|
0
|
|
|
|
|
|
$mo = &bp_util'output_month($mo, $how); |
462
|
0
|
0
|
|
|
|
|
if (defined $yr) { |
463
|
0
|
|
|
|
|
|
$date = "$mo $yr"; |
464
|
|
|
|
|
|
|
} else { |
465
|
0
|
|
|
|
|
|
$date = $mo; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} else { |
468
|
0
|
0
|
|
|
|
|
$date = $yr if defined $yr; |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
|
$date; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# |
475
|
|
|
|
|
|
|
# Generates a key for a canonical record. |
476
|
|
|
|
|
|
|
# |
477
|
|
|
|
|
|
|
# XXXXX This should take an option string and parse it to generate a key. |
478
|
|
|
|
|
|
|
# |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub genkey { |
481
|
0
|
|
|
0
|
|
|
local(%cent) = @_; |
482
|
0
|
|
|
|
|
|
local($key, $keytype, $sy); |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# first pick out the field we're going to use |
485
|
|
|
|
|
|
|
GETKEY: { |
486
|
0
|
|
|
|
|
|
defined $cent{'Authors'} && do |
487
|
0
|
0
|
|
|
|
|
{ $keytype = 'author'; $key = $cent{'Authors'}; last GETKEY; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
defined $cent{'CorpAuthor'} && do |
489
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'CorpAuthor'}; last GETKEY; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
defined $cent{'Editors'} && do |
491
|
0
|
0
|
|
|
|
|
{ $keytype = 'author'; $key = $cent{'Editors'}; last GETKEY; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
defined $cent{'Publisher'} && do |
493
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'Publisher'}; last GETKEY; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
defined $cent{'Organization'} && do |
495
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'Organization'}; last GETKEY; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# nothing defined |
497
|
0
|
|
|
|
|
|
$keytype = 'text'; $key = "Anonymous"; |
|
0
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
# next we want to reduce the name to a reasonable key |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
503
|
|
|
|
|
|
|
|
504
|
0
|
0
|
|
|
|
|
if ($keytype eq 'author') { |
|
|
0
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# # turn "Stephen van Rensselaer, Jr." into "vanRensselaerJr". |
506
|
|
|
|
|
|
|
# #$key =~ s/^([^\/]*)\/([^\/]*)\/([^\/]*)\/([^\|]*).*/$2$1$4/; |
507
|
|
|
|
|
|
|
# # turn "Stephen van Rensselaer, Jr." into "Rensselaer" |
508
|
|
|
|
|
|
|
# #$key =~ s/^([^\/]*)\/.*/$1/; |
509
|
|
|
|
|
|
|
# Remove everything past the first seperator |
510
|
0
|
|
|
|
|
|
local($split_sep) = index($key, $bib'cs_sep2); |
511
|
0
|
0
|
|
|
|
|
substr($key, $split_sep) = '' if $split_sep >= $[; |
512
|
|
|
|
|
|
|
} elsif ($keytype eq 'org') { |
513
|
0
|
|
|
|
|
|
$key =~ s/^(\S*).*/$1/; |
514
|
|
|
|
|
|
|
} else { |
515
|
|
|
|
|
|
|
# text |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
518
|
0
|
|
|
|
|
|
$key = &bib'nocharset($key); |
519
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
520
|
0
|
|
|
|
|
|
$key =~ tr/A-Za-z0-9\/\-//cd; |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# reduce it to fit normal lengths |
523
|
0
|
0
|
|
|
|
|
substr($key, 14) = '' if length($key) > 14; |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Now find the year |
526
|
0
|
0
|
0
|
|
|
|
if ( (defined $cent{'Year'}) && ($cent{'Year'} =~ /(\d\d\d\d)/) ) { |
|
|
0
|
0
|
|
|
|
|
527
|
0
|
|
|
|
|
|
$sy = $1; |
528
|
|
|
|
|
|
|
} elsif ( (defined $cent{'Month'}) && ($cent{'Month'} =~ /(\d\d\d\d)/) ) { |
529
|
0
|
|
|
|
|
|
$sy = $1; |
530
|
|
|
|
|
|
|
} else { |
531
|
0
|
|
|
|
|
|
$sy = "????"; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
# We lop off the century part |
534
|
0
|
|
|
|
|
|
substr($sy, 0, 2) = ''; |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# and add on the shortyear to the end of our key |
537
|
0
|
|
|
|
|
|
$key .= $sy; |
538
|
|
|
|
|
|
|
|
539
|
0
|
|
|
|
|
|
$key; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# |
543
|
|
|
|
|
|
|
# Register a key in our global key registry, returning the possibly changed |
544
|
|
|
|
|
|
|
# key. All this does is maintain a registry of keys, and if there is already |
545
|
|
|
|
|
|
|
# a key that matches, it adds letters from a -> z -> aa -> az -> ba -> bz -> ... |
546
|
|
|
|
|
|
|
# to the end of the key. A format uses these routines with something like: |
547
|
|
|
|
|
|
|
# |
548
|
|
|
|
|
|
|
# $can{'CiteKey'} = &bp_util'genkey(%can) unless defined $can{'CiteKey'}; |
549
|
|
|
|
|
|
|
# $can{'CiteKey'} = &bp_util'regkey($can{'CiteKey'}); |
550
|
|
|
|
|
|
|
# |
551
|
|
|
|
|
|
|
# in it's fromcanon routines. This generates a key if necessary, and then |
552
|
|
|
|
|
|
|
# registers it. A format may wish to do its own key generation, or even |
553
|
|
|
|
|
|
|
# throw out the citekey it was given and make a new one, so generation and |
554
|
|
|
|
|
|
|
# registration are seperate routines. |
555
|
|
|
|
|
|
|
# |
556
|
|
|
|
|
|
|
# It is recommended that keys be registered here rather than in the format, as |
557
|
|
|
|
|
|
|
# we would like one registry even for multiple formats. |
558
|
|
|
|
|
|
|
# |
559
|
|
|
|
|
|
|
# XXXXX is this necessary? This goes to an output routine after all. As long |
560
|
|
|
|
|
|
|
# as they register them all, or none, do we care? |
561
|
|
|
|
|
|
|
# |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub regkey { |
564
|
0
|
|
|
0
|
|
|
local($key) = @_; |
565
|
0
|
|
|
|
|
|
local($rkey, $nextkey, $rkeylen); |
566
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
$rkey = $key; |
568
|
0
|
|
|
|
|
|
$rkey =~ tr/A-Z/a-z/; |
569
|
0
|
|
|
|
|
|
$rkeylen = length($rkey); |
570
|
|
|
|
|
|
|
|
571
|
0
|
0
|
|
|
|
|
if (defined $glb_keyreg{$rkey}) { |
572
|
0
|
|
|
|
|
|
$nextkey = $key . 'a'; |
573
|
0
|
|
|
|
|
|
while (defined $glb_keyreg{$nextkey}) { |
574
|
|
|
|
|
|
|
# increment the characters after the key, 'z'+1 -> 'aa'. |
575
|
0
|
|
|
|
|
|
substr($nextkey, $rkeylen)++; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
# going to put ourselves in $nextkey |
578
|
0
|
|
|
|
|
|
$glb_keyreg{$nextkey} = 1; |
579
|
|
|
|
|
|
|
# key has changed, so update it for the output. |
580
|
0
|
|
|
|
|
|
$key .= substr($nextkey, $rkeylen); |
581
|
|
|
|
|
|
|
} else { |
582
|
0
|
|
|
|
|
|
$glb_keyreg{$rkey} = 1; |
583
|
|
|
|
|
|
|
# key is unchanged |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
0
|
|
|
|
|
|
$key; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
####################### |
590
|
|
|
|
|
|
|
# end of package |
591
|
|
|
|
|
|
|
####################### |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
1; |