line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ---------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
# NAME : BibTeX/Name.pm |
3
|
|
|
|
|
|
|
# CLASSES : Text::BibTeX::Name |
4
|
|
|
|
|
|
|
# RELATIONS : |
5
|
|
|
|
|
|
|
# DESCRIPTION: Provides an object-oriented interface to the BibTeX- |
6
|
|
|
|
|
|
|
# style author names (parsing them, that is; formatting |
7
|
|
|
|
|
|
|
# them is done by the Text::BibTeX::NameFormat class). |
8
|
|
|
|
|
|
|
# CREATED : Nov 1997, Greg Ward |
9
|
|
|
|
|
|
|
# MODIFIED : |
10
|
|
|
|
|
|
|
# VERSION : $Id$ |
11
|
|
|
|
|
|
|
# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights |
12
|
|
|
|
|
|
|
# reserved. |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# This file is part of the Text::BibTeX library. This |
15
|
|
|
|
|
|
|
# library is free software; you may redistribute it and/or |
16
|
|
|
|
|
|
|
# modify it under the same terms as Perl itself. |
17
|
|
|
|
|
|
|
# ---------------------------------------------------------------------- |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package Text::BibTeX::Name; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
require 5.004; |
22
|
|
|
|
|
|
|
|
23
|
13
|
|
|
13
|
|
219
|
use strict; |
|
13
|
|
|
|
|
29
|
|
|
13
|
|
|
|
|
401
|
|
24
|
13
|
|
|
13
|
|
69
|
use Carp; |
|
13
|
|
|
|
|
23
|
|
|
13
|
|
|
|
|
815
|
|
25
|
13
|
|
|
13
|
|
73
|
use vars qw'$VERSION'; |
|
13
|
|
|
|
|
24
|
|
|
13
|
|
|
|
|
695
|
|
26
|
|
|
|
|
|
|
$VERSION = 0.88; |
27
|
|
|
|
|
|
|
|
28
|
13
|
|
|
13
|
|
73
|
use Text::BibTeX; |
|
13
|
|
|
|
|
36
|
|
|
13
|
|
|
|
|
7315
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=encoding UTF-8 |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NAME |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Text::BibTeX::Name - interface to BibTeX-style author names |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use Text::BibTeX::Name; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$name = Text::BibTeX::Name->new(); |
41
|
|
|
|
|
|
|
$name->split('J. Random Hacker'); |
42
|
|
|
|
|
|
|
# or: |
43
|
|
|
|
|
|
|
$name = Text::BibTeX::Name->new('J. Random Hacker'); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
@firstname_tokens = $name->part ('first'); |
46
|
|
|
|
|
|
|
$lastname = join (' ', $name->part ('last')); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$format = Text::BibTeX::NameFormat->new(); |
49
|
|
|
|
|
|
|
# ...customize $format... |
50
|
|
|
|
|
|
|
$formatted = $name->format ($format); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 DESCRIPTION |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
C provides an abstraction for BibTeX-style names and |
55
|
|
|
|
|
|
|
some basic operations on them. A name, in the BibTeX world, consists of |
56
|
|
|
|
|
|
|
a list of I which are divided amongst four I: `first', |
57
|
|
|
|
|
|
|
`von', `last', and `jr'. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Tokens are separated by whitespace or commas at brace-level zero. Thus |
60
|
|
|
|
|
|
|
the name |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
van der Graaf, Horace Q. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
has five tokens, whereas the name |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
{Foo, Bar, and Sons} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
consists of a single token. Skip down to L<"EXAMPLES"> for more examples, or |
69
|
|
|
|
|
|
|
read on if you want to know the exact details of how names are split into |
70
|
|
|
|
|
|
|
tokens and parts. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
How tokens are divided into parts depends on the form of the name. If |
73
|
|
|
|
|
|
|
the name has no commas at brace-level zero (as in the second example), |
74
|
|
|
|
|
|
|
then it is assumed to be in either "first last" or "first von last" |
75
|
|
|
|
|
|
|
form. If there are no tokens that start with a lower-case letter, then |
76
|
|
|
|
|
|
|
"first last" form is assumed: the final token is the last name, and all |
77
|
|
|
|
|
|
|
other tokens form the first name. Otherwise, the earliest contiguous |
78
|
|
|
|
|
|
|
sequence of tokens with initial lower-case letters is taken as the `von' |
79
|
|
|
|
|
|
|
part; if this sequence includes the final token, then a warning is |
80
|
|
|
|
|
|
|
printed and the final token is forced to be the `last' part. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
If a name has a single comma, then it is assumed to be in "von last, |
83
|
|
|
|
|
|
|
first" form. A leading sequence of tokens with initial lower-case |
84
|
|
|
|
|
|
|
letters, if any, forms the `von' part; tokens between the `von' and the |
85
|
|
|
|
|
|
|
comma form the `last' part; tokens following the comma form the `first' |
86
|
|
|
|
|
|
|
part. Again, if there are no tokens following a leading sequence of |
87
|
|
|
|
|
|
|
lowercase tokens, a warning is printed and the token immediately |
88
|
|
|
|
|
|
|
preceding the comma is taken to be the `last' part. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
If a name has more than two commas, a warning is printed and the name is |
91
|
|
|
|
|
|
|
treated as though only the first two commas were present. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Finally, if a name has two commas, it is assumed to be in "von last, jr, |
94
|
|
|
|
|
|
|
first" form. (This is the only way to represent a name with a `jr' |
95
|
|
|
|
|
|
|
part.) The parsing of the name is the same as for a one-comma name, |
96
|
|
|
|
|
|
|
except that tokens between the two commas are taken to be the `jr' part. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 CAVEAT |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
The C code that does the actual work of splitting up names takes a shortcut |
101
|
|
|
|
|
|
|
and makes few assumptions about whitespace. In particular, there must be |
102
|
|
|
|
|
|
|
no leading whitespace, no trailing whitespace, no consecutive whitespace |
103
|
|
|
|
|
|
|
characters in the string, and no whitespace characters other than space. |
104
|
|
|
|
|
|
|
In other words, all whitespace must consist of lone internal spaces. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 EXAMPLES |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
The strings C<"John Smith"> and C<"Smith, John"> are different |
109
|
|
|
|
|
|
|
representations of the same name, so split into parts and tokens the |
110
|
|
|
|
|
|
|
same way, namely as: |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
first => ('John') |
113
|
|
|
|
|
|
|
von => () |
114
|
|
|
|
|
|
|
last => ('Smith') |
115
|
|
|
|
|
|
|
jr => () |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Note that every part is a list of tokens, even if there is only one |
118
|
|
|
|
|
|
|
token in that part; empty parts get empty token lists. Every token is |
119
|
|
|
|
|
|
|
just a string. Writing this example in actual code is simple: |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
$name = Text::BibTeX::Name->new("John Smith"); # or "Smith, John" |
122
|
|
|
|
|
|
|
$name->part ('first'); # returns list ("John") |
123
|
|
|
|
|
|
|
$name->part ('last'); # returns list ("Smith") |
124
|
|
|
|
|
|
|
$name->part ('von'); # returns list () |
125
|
|
|
|
|
|
|
$name->part ('jr'); # returns list () |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
(We'll omit the empty parts in the rest of the examples: just assume |
128
|
|
|
|
|
|
|
that any unmentioned part is an empty list.) If more than two tokens |
129
|
|
|
|
|
|
|
are included and there's no comma, they'll go to the first name: thus |
130
|
|
|
|
|
|
|
C<"John Q. Smith"> splits into |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
first => ("John", "Q.")) |
133
|
|
|
|
|
|
|
last => ("Smith") |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
and C<"J. R. R. Tolkein"> into |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
first => ("J.", "R.", "R.") |
138
|
|
|
|
|
|
|
last => ("Tolkein") |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
The ambiguous name C<"Kevin Philips Bong"> splits into |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
first => ("Kevin", "Philips") |
143
|
|
|
|
|
|
|
last => ("Bong") |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
which may or may not be the right thing, depending on the particular |
146
|
|
|
|
|
|
|
person. There's no way to know though, so if this fellow's last name is |
147
|
|
|
|
|
|
|
"Philips Bong" and not "Bong", the string representation of his name |
148
|
|
|
|
|
|
|
must disambiguate. One possibility is C<"Philips Bong, Kevin"> which |
149
|
|
|
|
|
|
|
splits into |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
first => ("Kevin") |
152
|
|
|
|
|
|
|
last => ("Philips", "Bong") |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Alternately, C<"Kevin {Philips Bong}"> takes advantage of the fact that |
155
|
|
|
|
|
|
|
tokes are only split on whitespace I, and becomes |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
first => ("Kevin") |
158
|
|
|
|
|
|
|
last => ("{Philips Bong}") |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
which is fine if your names are destined to be processed by TeX, but |
161
|
|
|
|
|
|
|
might be problematic in other contexts. Similarly, C<"St John-Mollusc, |
162
|
|
|
|
|
|
|
Oliver"> becomes |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
first => ("Oliver") |
165
|
|
|
|
|
|
|
last => ("St", "John-Mollusc") |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
which can also be written as C<"Oliver {St John-Mollusc}">: |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
first => ("Oliver") |
170
|
|
|
|
|
|
|
last => ("{St John-Mollusc}") |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Since tokens are separated purely by whitespace, hyphenated names will |
173
|
|
|
|
|
|
|
work either way: both C<"Nigel Incubator-Jones"> and C<"Incubator-Jones, |
174
|
|
|
|
|
|
|
Nigel"> come out as |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
first => ("Nigel") |
177
|
|
|
|
|
|
|
last => ("Incubator-Jones") |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Multi-token last names with lowercase components -- the "von part" -- |
180
|
|
|
|
|
|
|
work fine: both C<"Ludwig van Beethoven"> and C<"van Beethoven, Ludwig"> |
181
|
|
|
|
|
|
|
parse (correctly) into |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
first => ("Ludwig") |
184
|
|
|
|
|
|
|
von => ("van") |
185
|
|
|
|
|
|
|
last => ("Beethoven") |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
This allows these European aristocratic names to sort properly, |
188
|
|
|
|
|
|
|
i.e. I under I rather than I. Speaking of |
189
|
|
|
|
|
|
|
aristocratic European names, C<"Charles Louis Xavier Joseph de la |
190
|
|
|
|
|
|
|
Vall{\'e}e Poussin"> is handled just fine, and splits into |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
first => ("Charles", "Louis", "Xavier", "Joseph") |
193
|
|
|
|
|
|
|
von => ("de", "la") |
194
|
|
|
|
|
|
|
last => ("Vall{\'e}e", "Poussin") |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
so could be sorted under I rather than I. (Note that the sorting |
197
|
|
|
|
|
|
|
algorithm in L is a slavish imitiation of BibTeX |
198
|
|
|
|
|
|
|
0.99, and therefore does the wrong thing with these names: the sort key |
199
|
|
|
|
|
|
|
starts with the "von" part.) |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
However, capitalized "von parts" don't work so well: C<"R. J. Van de |
202
|
|
|
|
|
|
|
Graaff"> splits into |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
first => ("R.", "J.", "Van") |
205
|
|
|
|
|
|
|
von => ("de") |
206
|
|
|
|
|
|
|
last => ("Graaff") |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
which is clearly wrong. This name should be represented as C<"Van de |
209
|
|
|
|
|
|
|
Graaff, R. J."> |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
first => ("R.", "J.") |
212
|
|
|
|
|
|
|
last => ("Van", "de", "Graaff") |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
which is probably right. (This particular Van de Graaff was an |
215
|
|
|
|
|
|
|
American, so he probably belongs under I -- which is where my |
216
|
|
|
|
|
|
|
(British) dictionary puts him. Other Van de Graaff's mileages may |
217
|
|
|
|
|
|
|
vary.) |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Finally, many names include a suffix: "Jr.", "III", "fils", and so |
220
|
|
|
|
|
|
|
forth. These are handled, but with some limitations. If there's a |
221
|
|
|
|
|
|
|
comma before the suffix (the usual U.S. convention for "Jr."), then the |
222
|
|
|
|
|
|
|
name should be in I form, e.g. C<"Doe, Jr., John"> |
223
|
|
|
|
|
|
|
comes out (correctly) as |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
first => ("John") |
226
|
|
|
|
|
|
|
last => ("Doe") |
227
|
|
|
|
|
|
|
jr => ("Jr.") |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
but C<"John Doe, Jr."> is ambiguous and is parsed as |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
first => ("Jr.") |
232
|
|
|
|
|
|
|
last => ("John", "Doe") |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
(so don't do it that way). If there's no comma before the suffix -- the |
235
|
|
|
|
|
|
|
usual for Roman numerals, and occasionally seen with "Jr." -- then |
236
|
|
|
|
|
|
|
you're stuck and have to make the suffix part of the last name. Thus, |
237
|
|
|
|
|
|
|
C<"Gates III, William H."> comes out |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
first => ("William", "H.") |
240
|
|
|
|
|
|
|
last => ("Gates", "III") |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
but C<"William H. Gates III"> is ambiguous, and becomes |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
first => ("William", "H.", "Gates") |
245
|
|
|
|
|
|
|
last => ("III") |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
-- not what you want. Again, the curly-brace trick comes in handy, so |
248
|
|
|
|
|
|
|
C<"William H. {Gates III}"> splits into |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
first => ("William", "H.") |
251
|
|
|
|
|
|
|
last => ("{Gates III}") |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
There is no way to make a comma-less suffix the C part. (This is an |
254
|
|
|
|
|
|
|
unfortunate consequence of slavishly imitating BibTeX 0.99.) |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
Finally, names that aren't really names of people but rather are |
257
|
|
|
|
|
|
|
organization or company names should be forced into a single token by |
258
|
|
|
|
|
|
|
wrapping them in curly braces. For example, "Foo, Bar and Sons" should |
259
|
|
|
|
|
|
|
be written C<"{Foo, Bar and Sons}">, which will split as |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
last => ("{Foo, Bar and Sons}") |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Of course, if this is one name in a BibTeX C or C |
264
|
|
|
|
|
|
|
list, this name has to be wrapped in braces anyways (because of the C<" |
265
|
|
|
|
|
|
|
and ">), but that's another story. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 FORMATTING NAMES |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Putting a split-up name back together again in a flexible, customizable |
270
|
|
|
|
|
|
|
way is the job of another module: see L. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head1 METHODS |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=over 4 |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item new([ [OPTS,] NAME [, FILENAME, LINE, NAME_NUM]]) |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Creates a new C object. If NAME is supplied, it |
279
|
|
|
|
|
|
|
must be a string containing a single name, and it will be be passed to |
280
|
|
|
|
|
|
|
the C method for further processing. FILENAME, LINE, and |
281
|
|
|
|
|
|
|
NAME_NUM, if present, are all also passed to C to allow better |
282
|
|
|
|
|
|
|
error messages. |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
If the first argument is a hash reference, it is used to define |
285
|
|
|
|
|
|
|
configuration values. At the moment the available values are: |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=over 4 |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item BINMODE |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
Set the way Text::BibTeX deals with strings. By default it manages |
292
|
|
|
|
|
|
|
strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized |
293
|
|
|
|
|
|
|
UTF-8 strings and you can customise the normalization with the NORMALIZATION option. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Text::BibTeX::Name->new( |
296
|
|
|
|
|
|
|
{ binmode => 'utf-8', normalization => 'NFD' }, |
297
|
|
|
|
|
|
|
"Alberto Simões"}); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=back |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=cut |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub new { |
304
|
52
|
|
|
52
|
1
|
34872
|
my $class = shift; |
305
|
52
|
100
|
|
|
|
114
|
my $opts = ref $_[0] eq 'HASH' ? shift : {}; |
306
|
|
|
|
|
|
|
|
307
|
52
|
|
|
|
|
208
|
$opts->{ lc $_ } = $opts->{$_} for ( keys %$opts ); |
308
|
|
|
|
|
|
|
|
309
|
52
|
|
|
|
|
99
|
my ( $name, $filename, $line, $name_num ) = @_; |
310
|
|
|
|
|
|
|
|
311
|
52
|
|
33
|
|
|
188
|
$class = ref($class) || $class; |
312
|
52
|
|
|
|
|
85
|
my $self = bless { }, $class; |
313
|
|
|
|
|
|
|
|
314
|
52
|
|
|
|
|
108
|
$self->{binmode} = 'bytes'; |
315
|
52
|
|
|
|
|
69
|
$self->{normalization} = 'NFC'; |
316
|
|
|
|
|
|
|
$self->{binmode} = 'utf-8' |
317
|
52
|
100
|
100
|
|
|
200
|
if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i; |
318
|
52
|
100
|
|
|
|
102
|
$self->{normalization} = $opts->{normalization} if exists $opts->{normalization}; |
319
|
|
|
|
|
|
|
|
320
|
52
|
100
|
|
|
|
155
|
$self->split( Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), |
321
|
|
|
|
|
|
|
$filename, $line, $name_num, 1 ) |
322
|
|
|
|
|
|
|
if ( defined $name ); |
323
|
52
|
|
|
|
|
176
|
$self; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub DESTROY |
328
|
|
|
|
|
|
|
{ |
329
|
52
|
|
|
52
|
|
3580
|
my $self = shift; |
330
|
52
|
|
|
|
|
444
|
$self->free; # free the C structure kept by `split' |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=item split (NAME [, FILENAME, LINE, NAME_NUM]) |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Splits NAME (a string containing a single name) into tokens and |
337
|
|
|
|
|
|
|
subsequently into the four parts of a BibTeX-style name (first, von, |
338
|
|
|
|
|
|
|
last, and jr). (Each part is a list of tokens, and tokens are separated |
339
|
|
|
|
|
|
|
by whitespace or commas at brace-depth zero. See above for full details |
340
|
|
|
|
|
|
|
on how a name is split into its component parts.) |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
The token-lists that make up each part of the name are then stored in |
343
|
|
|
|
|
|
|
the C object for later retrieval or formatting with |
344
|
|
|
|
|
|
|
the C and C methods. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=cut |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
sub split |
349
|
|
|
|
|
|
|
{ |
350
|
54
|
|
|
54
|
1
|
767
|
my ($self, $name, $filename, $line, $name_num) = @_; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# Call the XSUB with default values if necessary |
353
|
54
|
100
|
|
|
|
120
|
$self->_split (Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), $filename, |
|
|
100
|
|
|
|
|
|
354
|
|
|
|
|
|
|
defined $line ? $line : -1, |
355
|
|
|
|
|
|
|
defined $name_num ? $name_num : -1, |
356
|
|
|
|
|
|
|
1); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item part (PARTNAME) |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Returns the list of tokens in part PARTNAME of a name previously split with |
363
|
|
|
|
|
|
|
C. For example, suppose a C object is created and |
364
|
|
|
|
|
|
|
initialized like this: |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$name = Text::BibTeX::Name->new(); |
367
|
|
|
|
|
|
|
$name->split ('Charles Louis Xavier Joseph de la Vall{\'e}e Poussin'); |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Then this code: |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
$name->part ('von'); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
would return the list C<('de','la')>. |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=cut |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub part { |
378
|
33
|
|
|
33
|
1
|
1539
|
my ( $self, $partname ) = @_; |
379
|
|
|
|
|
|
|
|
380
|
33
|
50
|
|
|
|
142
|
croak "unknown name part" |
381
|
|
|
|
|
|
|
unless $partname =~ /^(first|von|last|jr)$/; |
382
|
|
|
|
|
|
|
|
383
|
33
|
100
|
|
|
|
75
|
if ( exists $self->{$partname} ) { |
384
|
33
|
|
|
|
|
200
|
my @x = map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}) } |
385
|
24
|
|
|
|
|
29
|
@{ $self->{$partname} }; |
|
24
|
|
|
|
|
60
|
|
386
|
24
|
100
|
|
|
|
417
|
return @x > 1 ? @x : $x[0]; |
387
|
|
|
|
|
|
|
} |
388
|
9
|
|
|
|
|
20
|
return undef; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item format (FORMAT) |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Formats a name according to the specifications encoded in FORMAT, which |
395
|
|
|
|
|
|
|
should be a C (or descendant) object. (In short, |
396
|
|
|
|
|
|
|
it must supply a method C which takes a C |
397
|
|
|
|
|
|
|
object as its only argument.) Returns the formatted name as a string. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
See L for full details on formatting names. |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub format |
404
|
|
|
|
|
|
|
{ |
405
|
23
|
|
|
23
|
1
|
37
|
my ($self, $format) = @_; |
406
|
|
|
|
|
|
|
|
407
|
23
|
|
|
|
|
38
|
$format->apply ($self); |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
1; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=back |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head1 SEE ALSO |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
L, L, L. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head1 AUTHOR |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Greg Ward |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head1 COPYRIGHT |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file |
425
|
|
|
|
|
|
|
is part of the Text::BibTeX library. This library is free software; you |
426
|
|
|
|
|
|
|
may redistribute it and/or modify it under the same terms as Perl itself. |