File Coverage

lib/Parse/File/Taxonomy/Index.pm

Criterion	Covered	Total	%
statement	212	212	100.0
branch	81	84	96.4
condition	36	36	100.0
subroutine	18	18	100.0
pod	5	8	62.5
total	352	358	98.3

line	stmt	bran	cond	sub	pod	time	code
1							package Parse::File::Taxonomy::Index;
2	3			3		7756	use strict;
	3					5
	3					112
3	3			3		1598	use parent qw( Parse::File::Taxonomy );
	3					1068
	3					15
4	3			3		128	use Carp;
	3					5
	3					178
5	3			3		14	use Text::CSV;
	3					5
	3					15
6	3			3		68	use Scalar::Util qw( reftype );
	3					34
	3					167
7							our $VERSION = '0.04';
8	3					6540	use Parse::File::Taxonomy::Auxiliary qw(
9							path_check_fields
10							components_check_fields
11	3			3		1088	);
	3					6
12							#use Data::Dump;
13
14							=head1 NAME
15
16							Parse::File::Taxonomy::Index - Extract a taxonomy from a hierarchy inside a CSV file
17
18							=head1 SYNOPSIS
19
20							use Parse::File::Taxonomy::Index;
21
22							$source = "./t/data/alpha.csv";
23							$obj = Parse::File::Taxonomy::Index->new( {
24							file => $source,
25							} );
26
27							=cut
28
29							=head1 METHODS
30
31							=head2 C
32
33							=over 4
34
35							=item * Purpose
36
37							Parse::File::Taxonomy::Index constructor.
38
39							=item * Arguments
40
41							Single hash reference. There are two possible interfaces: C and C.
42
43							=over 4
44
45							=item 1 C interface
46
47							$source = "./t/data/delta.csv";
48							$obj = Parse::File::Taxonomy::Index->new( {
49							file => $source,
50							} );
51
52							Elements in the hash reference are keyed on:
53
54							=over 4
55
56							=item * C
57
58							Absolute or relative path to the incoming taxonomy file.
59							B for this interface.
60
61							=item * C
62
63							The name of the column in the header row under which each data record's unique
64							ID can be found. Defaults to C.
65
66							=item * C
67
68							The name of the column in the header row under which each data record's parent
69							ID can be found. (Will be empty in the case of top-level nodes, as they have
70							no parent.) Defaults to C.
71
72							=item * C
73
74							The name of the column in the header row under which, in each data record, there
75							is a found a string which differentiates that record from all other records
76							with the same parent ID. Defaults to C.
77
78							=item * Text::CSV options
79
80							Any other options which could normally be passed to Cnew()> will
81							be passed through to that module's constructor. On the recommendation of the
82							Text::CSV documentation, C is always set to a true value.
83
84							=back
85
86							=item 2 C interface
87
88							$obj = Parse::File::Taxonomy::Index->new( {
89							components => {
90							fields => $fields,
91							data_records => $data_records,
92							}
93							} );
94
95							Elements in this hash are keyed on:
96
97							=over 4
98
99							=item * C
100
101							This element is B for the C interface. The value of this
102							element is a hash reference with two keys, C and C.
103							C is a reference to an array holding the field or column names for the
104							data set. C is a reference to an array of array references,
105							each of the latter arrayrefs holding one record or row from the data set.
106
107							=back
108
109							=back
110
111							=item * Return Value
112
113							Parse::File::Taxonomy::Index object.
114
115							=item * Exceptions
116
117							C will throw an exception under any of the following conditions:
118
119							=over 4
120
121							=item * Argument to C is not a reference.
122
123							=item * Argument to C is not a hash reference.
124
125							=item * Argument to C must have either 'file' or 'components' element but not both.
126
127							=item * Lack columns in header row to match requirements.
128
129							=item * Non-numeric entry in C or C column.
130
131							=item * Duplicate entries in C column.
132
133							=item * Number of fields in a data record does not match number in header row.
134
135							=item * Empty string in a C column of a record.
136
137							=item * Unable to locate a record whose C is the C of a different record.
138
139							=item * No records with same C may share value of C column.
140
141							=item * C interface
142
143							=over 4
144
145							=item * In the C interface, unable to locate the file which is the value of the C element.
146
147							=item * The same field is found more than once in the header row of the
148							incoming taxonomy file.
149
150							=item * Unable to open or close the incoming taxonomy file for reading.
151
152							=back
153
154							=item * C interface
155
156							=over 4
157
158							=item * In the C interface, C element must be a hash reference with C and C elements.
159
160							=item * C element must be array reference.
161
162							=item * C element must be reference to array of array references.
163
164							=item * No duplicate fields in C element's array reference.
165
166							=back
167
168							=back
169
170							=back
171
172							=cut
173
174							sub new {
175	45			45	1	54294	my ($class, $args) = @_;
176	45					67	my $data;
177
178	45	100	100			784	croak "Argument to 'new()' must be hashref"
179							unless (ref($args) and reftype($args) eq 'HASH');
180	43	100	100			295	croak "Argument to 'new()' must have either 'file' or 'components' element"
181							unless ($args->{file} or $args->{components});
182	42	100	100			356	croak "Argument to 'new()' must have either 'file' or 'components' element but not both"
183							if ($args->{file} and $args->{components});
184
185	41	100				133	$data->{id_col} = $args->{id_col}
186							? delete $args->{id_col}
187							: 'id';
188	41	100				99	$data->{parent_id_col} = $args->{parent_id_col}
189							? delete $args->{parent_id_col}
190							: 'parent_id';
191	41	100				117	$data->{component_col} = $args->{component_col}
192							? delete $args->{component_col}
193							: 'name';
194
195	41	100				75	if ($args->{components}) {
196	22	100	100			323	croak "Value of 'components' element must be hashref"
197							unless (ref($args->{components}) and reftype($args->{components}) eq 'HASH');
198	20					34	for my $k ( qw\| fields data_records \| ) {
199	37	100				271	croak "Value of 'components' element must have '$k' key-value pair"
200							unless exists $args->{components}->{$k};
201	35	100	100			530	croak "Value of '$k' element must be arrayref"
202							unless (ref($args->{components}->{$k}) and
203							reftype($args->{components}->{$k}) eq 'ARRAY');
204							}
205	15					26	for my $row (@{$args->{components}->{data_records}}) {
	15					38
206	173	100	100			741	croak "Each element in 'data_records' array must be arrayref"
207							unless (ref($row) and reftype($row) eq 'ARRAY');
208							}
209	13					39	_prepare_fields($data, $args->{components}->{fields}, 1);
210	9					15	my $these_data_records = $args->{components}->{data_records};
211	9					53	delete $args->{components};
212	9					22	_prepare_data_records($data, $these_data_records, $args);
213							}
214							else {
215	19	100				556	croak "Cannot locate file '$args->{file}'"
216							unless (-f $args->{file});
217	18					57	$data->{file} = delete $args->{file};
218	18					41	$args->{binary} = 1;
219	18	50				102	my $csv = Text::CSV->new ( $args )
220							or croak "Cannot use CSV: ".Text::CSV->error_diag ();
221	18	50				2162	open my $IN, "<", $data->{file}
222							or croak "Unable to open '$data->{file}' for reading";
223	18					78	my $header_ref = $csv->getline($IN);
224	18					26761	_prepare_fields($data, $header_ref);
225
226	14					55	my $data_records = $csv->getline_all($IN);
227	14	50				58697	close $IN or croak "Unable to close after reading";
228	14					54	_prepare_data_records($data, $data_records, $args);
229							}
230
231	13					23	while (my ($k,$v) = each %{$args}) {
	21					66
232	8					20	$data->{$k} = $v;
233							}
234	13					77	return bless $data, $class;
235							}
236
237							sub _prepare_fields {
238	31			31		55	my ($data, $fields_ref, $components) = @_;
239	31	100				73	if (! $components) {
240	18					65	path_check_fields($data, $fields_ref);
241	17					47	_check_required_columns($data, $fields_ref);
242							}
243							else { # 'components' interface
244	13					47	components_check_fields($data, $fields_ref);
245	12					26	_check_required_columns($data, $fields_ref);
246							}
247	23					41	$data->{fields} = $fields_ref;
248	23					30	return $data;
249							}
250
251							sub _check_required_columns {
252	29			29		44	my ($data, $fields_ref) = @_;
253	29					53	my %col2idx = map { $fields_ref->[$_] => $_ } (0 .. $#{$fields_ref});
	228					346
	29					72
254	29					70	my %missing_columns = ();
255	29					57	my %main_columns = map { $_ => 1 } ( qw\| id_col parent_id_col component_col \| );
	87					164
256	29					83	for my $c ( keys %main_columns ) {
257	87	100				187	if (! exists $col2idx{$data->{$c}}) {
258	6					12	$missing_columns{$c} = $data->{$c};
259							}
260							}
261	29					58	my $error_msg = "Could not locate columns in header to match required arguments:";
262	29					88	for my $c (sort keys %missing_columns) {
263	6					16	$error_msg .= "\n $c: $missing_columns{$c}";
264							}
265	29	100				860	croak $error_msg if scalar keys %missing_columns;
266	23					47	$data->{fields} = $fields_ref;
267	23					47	for my $c (keys %main_columns) {
268	69					170	$data->{$c.'_idx'} = $col2idx{$data->{$c}};
269							}
270	23					81	return $data;
271							}
272
273							sub _prepare_data_records {
274	23			23		53	my ($data, $data_records, $args) = @_;
275							# Confirm no duplicate entries in 'id_col'. DONE
276							# Confirm all rows have same number of columns as header. DONE
277	23					41	my $error_msg = '';
278	23					23	my $field_count = scalar(@{$data->{fields}});
	23					53
279	23					48	my @non_numeric_id_records = ();
280	23					45	my %ids_seen = ();
281	23					38	my @bad_count_records = ();
282	23					31	my @nameless_component_records = ();
283	23					36	for my $rec (@{$data_records}) {
	23					54
284	295	100				735	if ($rec->[$data->{id_col_idx}] !~ m/^\d+$/) {
285	2					5	push @non_numeric_id_records, [ $rec->[$data->{id_col_idx}], '' ];
286							}
287	295	100	100			990	if (length($rec->[$data->{parent_id_col_idx}]) and
288							$rec->[$data->{parent_id_col_idx}] !~ m/^\d+$/
289							) {
290	2					5	push @non_numeric_id_records, [
291							$rec->[$data->{id_col_idx}],
292							$rec->[$data->{parent_id_col_idx}]
293							];
294							}
295	295					411	$ids_seen{$rec->[$data->{id_col_idx}]}++;
296	295					207	my $this_row_count = scalar(@{$rec});
	295					272
297	295	100				390	if ($this_row_count != $field_count) {
298	6					10	push @bad_count_records,
299							[ $rec->[$data->{id_col_idx}], $this_row_count ];
300							}
301	295	100				512	if (! length($rec->[$data->{component_col_idx}])) {
302	3					7	push @nameless_component_records, $rec->[$data->{id_col_idx}];
303							}
304							}
305	23					91	$error_msg = <
306							Non-numeric entries are not permitted in the '$data->{id_col}' or '$data->{parent_id_col}' columns.
307							The following records each violate this restriction one or two times:
308							NON_NUMERIC_IDS
309	23					45	for my $rec (@non_numeric_id_records) {
310	4					10	$error_msg .= " $data->{id_col}: $rec->[0]\t$data->{parent_id_col}: $rec->[1]\n";
311							}
312	23	100				191	croak $error_msg if @non_numeric_id_records;
313
314	22					40	my @dupe_ids = ();
315	22					193	for my $id (sort keys %ids_seen) {
316	278	100				430	push @dupe_ids, $id if $ids_seen{$id} > 1;
317							}
318	22					70	$error_msg = <
319							No duplicate entries are permitted in the '$data->{id_col}'column.
320							The following entries appear the number of times shown:
321							ERROR_MSG_DUPE
322	22					39	for my $id (@dupe_ids) {
323	4					15	$error_msg .= " $id:" . sprintf(" %6s\n" => $ids_seen{$id});
324							}
325	22	100				372	croak $error_msg if @dupe_ids;
326
327	20					57	$error_msg = <
328							Header row has $field_count columns. The following records
329							(identified by the value in their '$data->{id_col}' columns)
330							have different counts:
331							ERROR_MSG_WRONG_COUNT
332	20					38	for my $rec (@bad_count_records) {
333	6					16	$error_msg .= " $rec->[0]: $rec->[1]\n";
334							}
335	20	100				392	croak $error_msg if @bad_count_records;
336
337	18					47	$error_msg = <
338							Each data record must have a non-empty string in its 'component' column.
339							The following records (identified by the value in their '$data->{id_col}' columns)
340							lack valid components:
341							NAMELESS_COMPONENT
342	18					32	for my $rec (@nameless_component_records) {
343	3					7	$error_msg .= " id: $rec\n";
344							}
345	18	100				258	croak $error_msg if @nameless_component_records;
346
347	17					33	my %ids_missing_parents = ();
348	17					16	for my $rec (@{$data_records}) {
	17					26
349	217					199	my $parent_id = $rec->[$data->{parent_id_col_idx}];
350	217	100	100			605	if ( (length($parent_id)) and (! $ids_seen{$parent_id}) ) {
351	4					10	$ids_missing_parents{$rec->[$data->{id_col_idx}]} = $parent_id;
352							}
353							}
354	17					73	$error_msg = <
355							For each record with a non-null value in the '$data->{parent_id_col}' column,
356							there must be a record with that value in the '$data->{id_col}' column.
357							The following records (identified by the value in their '$data->{id_col}' columns)
358							appear to to have parent IDs which do not have records of their own:
359							ERROR_MSG_MISSING_PARENT
360	17					55	for my $k (sort {$a <=> $b} keys %ids_missing_parents) {
	2					12
361	4					14	$error_msg .= " $k: $ids_missing_parents{$k}\n";
362							}
363	17	100				424	croak $error_msg if scalar keys %ids_missing_parents;
364
365	15					23	my %families = ();
366	15					20	for my $rec (@{$data_records}) {
	15					28
367	191	100				287	if (length($rec->[$data->{parent_id_col_idx}])) {
368	133					262	$families{$rec->[$data->{parent_id_col_idx}]}{$rec->[$data->{component_col_idx}]}++;
369							}
370							}
371	15					54	$error_msg = <
372							No record with a non-null value in the '$data->{parent_id_col}' column
373							may have two children with the same value in the '$data->{component_col}' column.
374							The following are violations:
375							ERROR_MSG_SIBLINGS_NAMED_SAME
376
377	15					25	my $same_names = 0;
378	15					71	for my $k (sort {$a <=> $b} keys %families) {
	144					159
379	88					72	for my $l (sort keys %{$families{$k}}) {
	88					152
380	129	100				268	if ($families{$k}{$l} > 1) {
381	4					14	$error_msg .= " $data->{parent_id_col}: $k\|$data->{component_col}: $l\|count of $data->{component_col}s: $families{$k}{$l}\n";
382	4					7	$same_names++;
383							}
384							}
385							}
386	15	100				329	croak $error_msg if $same_names;
387
388	13					29	$data->{data_records} = $data_records;
389	13					196	return $data;
390							}
391
392							=head2 C
393
394							=over 4
395
396							=item * Purpose
397
398							Identify the names of the columns in the taxonomy.
399
400							=item * Arguments
401
402							my $fields = $self->fields();
403
404							No arguments; the information is already inside the object.
405
406							=item * Return Value
407
408							Reference to an array holding a list of the columns as they appear in the
409							header row of the incoming taxonomy file.
410
411							=item * Comment
412
413							Read-only.
414
415							=back
416
417							# Implemented in lib/Parse/File/Taxonomy.pm
418
419							=head2 C
420
421							=over 4
422
423							=item * Purpose
424
425							Once the taxonomy has been validated, get a list of its data rows as a Perl
426							data structure.
427
428							=item * Arguments
429
430							$data_records = $self->data_records;
431
432							None.
433
434							=item * Return Value
435
436							Reference to array of array references. The array will hold the data records
437							found in the incoming taxonomy file in their order in that file.
438
439							=item * Comment
440
441							Does not contain any information about the fields in the taxonomy, so you
442							should probably either (a) use in conjunction with C method above;
443							or (b) use C.
444
445							=back
446
447							# Implemented in lib/Parse/File/Taxonomy.pm
448
449							=cut
450
451							=head2 C
452
453							=over 4
454
455							=item * Purpose
456
457							Identify the index position of a given field within the header row.
458
459							=item * Arguments
460
461							$index = $obj->get_field_position('income');
462
463							Takes a single string holding the name of one of the fields (column names).
464
465							=item * Return Value
466
467							Integer representing the index position (counting from C<0>) of the field
468							provided as argument. Throws exception if the argument is not actually a
469							field.
470
471							=back
472
473							=cut
474
475							# Implemented in lib/Parse/File/Taxonomy.pm
476
477							=head2 Accessors
478
479							The following methods provide information about key columns in a
480							Parse::File::Taxonomy::Path object. The key columns are those which hold the
481							ID, parent ID and component information. They take no arguments. The methods
482							whose names end in C<_idx> return integers, as they return the index position
483							of the column in the header row. The other methods return strings.
484
485							$index_of_id_column = $self->id_col_idx;
486
487							$name_of_id_column = $self->id_col;
488
489							$index_of_parent_id_column = $self->parent_id_col_idx;
490
491							$name_of_parent_id_column = $self->parent_id_col;
492
493							$index_of_component_column = $self->component_col_idx;
494
495							$name_of_component_column = $self->component_col;
496
497							=cut
498
499							sub id_col_idx {
500	261			261	0	840	my $self = shift;
501	261					435	return $self->{id_col_idx};
502							}
503
504							sub id_col {
505	79			79	1	75	my $self = shift;
506	79					227	return $self->{id_col};
507							}
508
509							sub parent_id_col_idx {
510	134			134	0	109	my $self = shift;
511	134					203	return $self->{parent_id_col_idx};
512							}
513
514							sub parent_id_col {
515	68			68	1	62	my $self = shift;
516	68					215	return $self->{parent_id_col};
517							}
518
519							sub component_col_idx {
520	134			134	0	109	my $self = shift;
521	134					408	return $self->{component_col_idx};
522							}
523
524							sub component_col {
525	57			57	1	46	my $self = shift;
526	57					162	return $self->{component_col};
527							}
528
529							=head2 C
530
531							=over 4
532
533							=item * Purpose
534
535							Generate a new Perl data structure which holds the same information as a
536							Parse::File::Taxonomy::Index object but which expresses the route from the
537							root node to a given branch or leaf node as either a separator-delimited
538							string (as in the C column of a Parse::File::Taxonomy::Path object) or
539							as an array reference holding the list of names which delineate that route.
540
541							Another way of expressing this: Transform a taxonomy-by-index to a
542							taxonomy-by-path.
543
544							Example: Suppose we have a CSV file which serves as a taxonomy-by-index for
545							this data:
546
547							"id","parent_id","name","is_actionable"
548							"1","","Alpha","0"
549							"2","","Beta","0"
550							"3","1","Epsilon","0"
551							"4","3","Kappa","1"
552							"5","1","Zeta","0"
553							"6","5","Lambda","1"
554							"7","5","Mu","0"
555							"8","2","Eta","1"
556							"9","2","Theta","1"
557
558							Instead of having the route from the root node to a given node be represented
559							B by following Cs up the tree, suppose we want that
560							route to be represented by a string. Assuming that we work with default
561							column names, that would mean representing the information currently spread
562							out among the C, C and C columns in a single C
563							column which, by default, would hold an array reference.
564
565							$source = "./t/data/theta.csv";
566							$obj = Parse::File::Taxonomy::Index->new( {
567							file => $source,
568							} );
569
570							$taxonomy_with_path_as_array = $obj->pathify;
571
572							Yielding:
573
574							[
575							["path", "is_actionable"],
576							[["", "Alpha"], 0],
577							[["", "Beta"], 0],
578							[["", "Alpha", "Epsilon"], 0],
579							[["", "Alpha", "Epsilon", "Kappa"], 1],
580							[["", "Alpha", "Zeta"], 0],
581							[["", "Alpha", "Zeta", "Lambda"], 1],
582							[["", "Alpha", "Zeta", "Mu"], 0],
583							[["", "Beta", "Eta"], 1],
584							[["", "Beta", "Theta"], 1],
585							]
586
587							If we wanted the path information represented as a string rather than an array
588							reference, we would say:
589
590							$taxonomy_with_path_as_string = $obj->pathify( { as_string => 1 } );
591
592							Yielding:
593
594							[
595							["path", "is_actionable"],
596							["\|Alpha", 0],
597							["\|Beta", 0],
598							["\|Alpha\|Epsilon", 0],
599							["\|Alpha\|Epsilon\|Kappa", 1],
600							["\|Alpha\|Zeta", 0],
601							["\|Alpha\|Zeta\|Lambda", 1],
602							["\|Alpha\|Zeta\|Mu", 0],
603							["\|Beta\|Eta", 1],
604							["\|Beta\|Theta", 1],
605							]
606
607							If we are providing a true value to the C key, we also get to
608							choose what character to use as the separator in the C column.
609
610							$taxonomy_with_path_as_string_different_path_col_sep =
611							$obj->pathify( {
612							as_string => 1,
613							path_col_sep => '~~',
614							} );
615
616							Yields:
617
618							[
619							["path", "is_actionable"],
620							["~~Alpha", 0],
621							["~~Beta", 0],
622							["~~Alpha~~Epsilon", 0],
623							["~~Alpha~~Epsilon~~Kappa", 1],
624							["~~Alpha~~Zeta", 0],
625							["~~Alpha~~Zeta~~Lambda", 1],
626							["~~Alpha~~Zeta~~Mu", 0],
627							["~~Beta~~Eta", 1],
628							["~~Beta~~Theta", 1],
629							]
630
631							Finally, should we want the C column in the returned arrayref to be
632							named something other than I, we can provide a value to the C
633							key.
634
635							[
636							["foo", "is_actionable"],
637							[["", "Alpha"], 0],
638							[["", "Beta"], 0],
639							[["", "Alpha", "Epsilon"], 0],
640							[["", "Alpha", "Epsilon", "Kappa"], 1],
641							[["", "Alpha", "Zeta"], 0],
642							[["", "Alpha", "Zeta", "Lambda"], 1],
643							[["", "Alpha", "Zeta", "Mu"], 0],
644							[["", "Beta", "Eta"], 1],
645							[["", "Beta", "Theta"], 1],
646							]
647
648							item * Arguments
649
650							Optional single hash reference. If provided, the following keys may be used:
651
652							=over 4
653
654							=item * C
655
656							User-supplied name for column holding path information in the returned array
657							reference. Defaults to C.
658
659							=item * C
660
661							Boolean. If supplied with a true value, path information will be represented
662							as a separator-delimited string rather than an array reference.
663
664							=item * C
665
666							User-supplied string to be used to separate the parts of the route when
667							C is called with a true value. Not meaningful unless C
668							is true.
669
670							=back
671
672							=item * Return Value
673
674							Reference to an array of array references. The first element in the array
675							will be a reference to an array of field names. Each succeeding element will
676							be a reference to an array holding data for one record in the original
677							taxonomy. The path data will be represented, by default, as an array
678							reference built up from the component (C) column in the original
679							taxonomy, but if C is selected, the path data in all non-header
680							elements will be a separator-delimited string.
681
682							=back
683
684							=cut
685
686							sub pathify {
687	15			15	1	14624	my ($self, $args) = @_;
688	15	100				38	if (defined $args) {
689	10	100	100			63	unless (ref($args) and (reftype($args) eq 'HASH')) {
690	2					320	croak "Argument to pathify() must be hash ref";
691							}
692	8					15	my %permissible_args = map { $_ => 1 } ( qw\| path_col as_string path_col_sep \| );
	24					45
693	8					11	for my $k (keys %{$args}) {
	8					22
694	12	100				124	croak "'$k' is not a recognized key for pathify() argument hashref"
695							unless $permissible_args{$k};
696							}
697	7	100	100			36	if ($args->{path_col_sep} and not $args->{as_string}) {
698	1					85	croak "Supplying a value for key 'path_col_step' is only valid when also supplying true value for 'as_string'";
699							}
700							}
701	11	100				31	$args->{path_col} = defined($args->{path_col}) ? $args->{path_col} : 'path';
702	11	100				23	if ($args->{as_string}) {
703	4	100				11	$args->{path_col_sep} = defined($args->{path_col_sep}) ? $args->{path_col_sep} : '\|';
704							}
705
706	11					14	my @rewritten = ();
707	11					10	my @fields_in = @{$self->fields};
	11					35
708	11					20	my @fields_out = ( $args->{path_col} );
709	11					15	for my $f (@fields_in) {
710	72	100	100			85	unless (
			100
711							($f eq $self->id_col) or
712							($f eq $self->parent_id_col) or
713							($f eq $self->component_col)
714							) {
715	39					54	push @fields_out, $f;
716							}
717							}
718	11					19	push @rewritten, \@fields_out;
719
720	11					21	my %colsin2idx = map { $fields_in[$_] => $_ } (0 .. $#fields_in);
	72					120
721
722	127					164	my %hashed_data = map { $_->[$self->id_col_idx] => {
	11					25
723							parent_id => $_->[$self->parent_id_col_idx],
724							component => $_->[$self->component_col_idx],
725	11					20	} } @{$self->data_records};
726
727	11					22	my @this_path = ();
728	11					8	my $code;
729							$code = sub {
730	258			258		217	my $id = shift;
731	258					301	push @this_path, $hashed_data{$id}{component};
732	258					249	my $parent_id = $hashed_data{$id}{parent_id};
733	258	100				295	if (length($parent_id)) {
734	131					126	&{$code}($parent_id);
	131					183
735							}
736							else {
737	127					115	push @this_path, '';
738							}
739	258					248	return;
740	11					41	};
741	11					12	for my $rec (@{$self->data_records}) {
	11					23
742	127					103	my @new_record;
743	127					193	&{$code}($rec->[$self->id_col_idx]);
	127					155
744	127					214	my $path_as_array_ref = [ reverse @this_path ];
745	127	100				179	if ($args->{as_string}) {
746	44					80	push @new_record,
747	44					37	join($args->{path_col_sep} => @{$path_as_array_ref});
748							}
749							else {
750	83					83	push @new_record, $path_as_array_ref;
751							}
752	127					121	for my $f (grep { $_ ne $args->{path_col} } @fields_out) {
	618					729
753	491					681	push @new_record, $rec->[$colsin2idx{$f}];
754							}
755	127					149	push @rewritten, \@new_record;
756	127					203	@this_path = ();
757							}
758	11					57	return \@rewritten;
759							}
760
761							1;
762
763							# vim: formatoptions=crqot