File Coverage

lib/Text/Tradition/Collation.pm

Criterion	Covered	Total	%
statement	40	42	95.2
branch			n/a
condition			n/a
subroutine	14	14	100.0
pod			n/a
total	54	56	96.4

line	stmt	sub	time	code
1				package Text::Tradition::Collation;
2
3	10	10	72	use feature 'say';
	10		26
	10		1131
4	10	10	3670	use Encode qw( decode_utf8 );
	10		130764
	10		868
5	10	10	5252	use File::Temp;
	10		149958
	10		746
6	10	10	2659	use File::Which;
	10		8201
	10		506
7	10	10	5209	use Graph;
	10		866422
	10		455
8	10	10	7924	use IPC::Run qw( run binary );
	10		279834
	10		662
9	10	10	101	use JSON qw/ to_json /;
	10		31
	10		95
10	10	10	6353	use Text::CSV;
	10		102751
	10		512
11	10	10	3623	use Text::Tradition::Collation::Data;
	10		46
	10		479
12	10	10	4632	use Text::Tradition::Collation::Reading;
	10		113
	10		2087
13	10	10	5421	use Text::Tradition::Collation::RelationshipStore;
	10		47
	10		412
14	10	10	91	use Text::Tradition::Error;
	10		26
	10		284
15	10	10	59	use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
	10		22
	10		1265
16	10	10	15701	use XML::LibXML;
	0
	0
17				use XML::LibXML::XPathContext;
18				use Moose;
19
20				has _data => (
21				isa => 'Text::Tradition::Collation::Data',
22				is => 'ro',
23				required => 1,
24				handles => [ qw(
25				sequence
26				paths
27				_set_relations
28				relations
29				_set_start
30				_set_end
31				ac_label
32				has_cached_table
33				relationships
34				relationship_types
35				related_readings
36				get_relationship
37				del_relationship
38				equivalence
39				equivalence_graph
40				readings
41				reading
42				_add_reading
43				del_reading
44				has_reading
45				wit_list_separator
46				baselabel
47				linear
48				wordsep
49				direction
50				change_direction
51				start
52				end
53				cached_table
54				_graphcalc_done
55				has_cached_svg
56				wipe_table
57				)]
58				);
59
60				has 'tradition' => (
61				is => 'ro',
62				isa => 'Text::Tradition',
63				writer => '_set_tradition',
64				weak_ref => 1,
65				);
66
67				=encoding utf8
68
69				=head1 NAME
70
71				Text::Tradition::Collation - a software model for a text collation
72
73				=head1 SYNOPSIS
74
75				use Text::Tradition;
76				my $t = Text::Tradition->new(
77				'name' => 'this is a text',
78				'input' => 'TEI',
79				'file' => '/path/to/tei_parallel_seg_file.xml' );
80
81				my $c = $t->collation;
82				my @readings = $c->readings;
83				my @paths = $c->paths;
84				my @relationships = $c->relationships;
85
86				my $svg_variant_graph = $t->collation->as_svg();
87
88				=head1 DESCRIPTION
89
90				Text::Tradition is a library for representation and analysis of collated
91				texts, particularly medieval ones. The Collation is the central feature of
92				a Tradition, where the text, its sequence of readings, and its relationships
93				between readings are actually kept.
94
95				=head1 CONSTRUCTOR
96
97				=head2 new
98
99				The constructor. Takes a hash or hashref of the following arguments:
100
101				=over
102
103				=item * tradition - The Text::Tradition object to which the collation
104				belongs. Required.
105
106				=item * linear - Whether the collation should be linear; that is, whether
107				transposed readings should be treated as two linked readings rather than one,
108				and therefore whether the collation graph is acyclic. Defaults to true.
109
110				=item * baselabel - The default label for the path taken by a base text
111				(if any). Defaults to 'base text'.
112
113				=item * wit_list_separator - The string to join a list of witnesses for
114				purposes of making labels in display graphs. Defaults to ', '.
115
116				=item * ac_label - The extra label to tack onto a witness sigil when
117				representing another layer of path for the given witness - that is, when
118				a text has more than one possible reading due to scribal corrections or
119				the like. Defaults to ' (a.c.)'.
120
121				=item * wordsep - The string used to separate words in the original text.
122				Defaults to ' '.
123
124				=back
125
126				=head1 ACCESSORS
127
128				=head2 tradition
129
130				=head2 linear
131
132				=head2 wit_list_separator
133
134				=head2 baselabel
135
136				=head2 ac_label
137
138				=head2 wordsep
139
140				Simple accessors for collation attributes.
141
142				=head2 start
143
144				The meta-reading at the start of every witness path.
145
146				=head2 end
147
148				The meta-reading at the end of every witness path.
149
150				=head2 readings
151
152				Returns all Reading objects in the graph.
153
154				=head2 reading( $id )
155
156				Returns the Reading object corresponding to the given ID.
157
158				=head2 add_reading( $reading_args )
159
160				Adds a new reading object to the collation.
161				See L<Text::Tradition::Collation::Reading> for the available arguments.
162
163				=head2 del_reading( $object_or_id )
164
165				Removes the given reading from the collation, implicitly removing its
166				paths and relationships.
167
168				=head2 has_reading( $id )
169
170				Predicate to see whether a given reading ID is in the graph.
171
172				=head2 reading_witnesses( $object_or_id )
173
174				Returns a list of sigils whose witnesses contain the reading.
175
176				=head2 paths
177
178				Returns all reading paths within the document - that is, all edges in the
179				collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
180
181				=head2 add_path( $source, $target, $sigil )
182
183				Links the given readings in the collation in sequence, under the given witness
184				sigil. The readings may be specified by object or ID.
185
186				=head2 del_path( $source, $target, $sigil )
187
188				Links the given readings in the collation in sequence, under the given witness
189				sigil. The readings may be specified by object or ID.
190
191				=head2 has_path( $source, $target );
192
193				Returns true if the two readings are linked in sequence in any witness.
194				The readings may be specified by object or ID.
195
196				=head2 relationships
197
198				Returns all Relationship objects in the collation.
199
200				=head2 add_relationship( $reading, $other_reading, $options, $changed_readings )
201
202				Adds a new relationship of the type given in $options between the two readings,
203				which may be specified by object or ID. Returns a value of ( $status, @vectors)
204				where $status is true on success, and @vectors is a list of relationship edges
205				that were ultimately added. If an array reference is passed in as $changed_readings,
206				then any readings that were altered due to the relationship creation are added to
207				the array.
208
209				See L<Text::Tradition::Collation::Relationship> for the available options.
210
211				=cut
212
213				sub BUILDARGS {
214				my ( $class, @args ) = @_;
215				my %args = @args == 1 ? %{ $args[0] } : @args;
216				# TODO determine these from the Moose::Meta object
217				my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218				linear wordsep direction start end cached_table _graphcalc_done);
219				my %data_args;
220				for my $attr (@delegate_attrs) {
221				$data_args{$attr} = delete $args{$attr} if exists $args{$attr};
222				}
223				$args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224				return \%args;
225				}
226
227				sub BUILD {
228				my $self = shift;
229				$self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
230				$self->_set_start( $self->add_reading(
231				{ 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232				$self->_set_end( $self->add_reading(
233				{ 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
234				}
235
236				=head2 register_relationship_type( %relationship_definition )
237
238				Add a relationship type definition to this collation. The argument can be either a
239				hash or a hashref, defining the properties of the relationship. For relationship types
240				and their properties, see L<Text::Tradition::Collation::RelationshipType>.
241
242				=head2 get_relationship_type( $relationship_name )
243
244				Retrieve the RelationshipType object for the relationship with the given name.
245
246				=cut
247
248				sub register_relationship_type {
249				my $self = shift;
250				my %args = @_ == 1 ? %{$_[0]} : @_;
251				if( $self->relations->has_type( $args{name} ) ) {
252				throw( 'Relationship type ' . $args{name} . ' already registered' );
253				}
254				$self->relations->add_type( %args );
255				}
256
257				sub get_relationship_type {
258				my( $self, $name ) = @_;
259				return $self->relations->has_type( $name )
260				? $self->relations->type( $name ) : undef;
261				}
262
263				### Reading construct/destruct functions
264
265				sub add_reading {
266				my( $self, $reading ) = @_;
267				unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
268				my %args = %$reading;
269				if( $args{'init'} ) {
270				# If we are initializing an empty collation, don't assume that we
271				# have set a tradition.
272				delete $args{'init'};
273				} elsif( $self->tradition->can('language') && $self->tradition->has_language
274				&& !exists $args{'language'} ) {
275				$args{'language'} = $self->tradition->language;
276				}
277				$reading = Text::Tradition::Collation::Reading->new(
278				'collation' => $self,
279				%args );
280				}
281				# First check to see if a reading with this ID exists.
282				if( $self->reading( $reading->id ) ) {
283				throw( "Collation already has a reading with id " . $reading->id );
284				}
285				$self->_graphcalc_done(0);
286				$self->_add_reading( $reading->id => $reading );
287				# Once the reading has been added, put it in both graphs.
288				$self->sequence->add_vertex( $reading->id );
289				$self->relations->add_reading( $reading->id );
290				return $reading;
291				};
292
293				around del_reading => sub {
294				my $orig = shift;
295				my $self = shift;
296				my $arg = shift;
297
298				if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
299				$arg = $arg->id;
300				}
301				# Remove the reading from the graphs.
302				$self->_graphcalc_done(0);
303				$self->_clear_cache; # Explicitly clear caches to GC the reading
304				$self->sequence->delete_vertex( $arg );
305				$self->relations->delete_reading( $arg );
306
307				# Carry on.
308				$self->$orig( $arg );
309				};
310
311				=head2 merge_readings( $main, $second, $concatenate, $with_str )
312
313				Merges the $second reading into the $main one. If $concatenate is true, then
314				the merged node will carry the text of both readings, concatenated with either
315				$with_str (if specified) or a sensible default (the empty string if the
316				appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
317
318				The first two arguments may be either readings or reading IDs.
319
320				=begin testing
321
322				use Test::More::UTF8;
323				use Text::Tradition;
324				use TryCatch;
325
326				my $cxfile = 't/data/Collatex-16.xml';
327				my $t = Text::Tradition->new(
328				'name' => 'inline',
329				'input' => 'CollateX',
330				'file' => $cxfile,
331				);
332				my $c = $t->collation;
333
334				my $rno = scalar $c->readings;
335				# Split n21 ('unto') for testing purposes
336				my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
337				my $old_r = $c->reading( 'n21' );
338				$old_r->alter_text( 'to' );
339				$c->del_path( 'n20', 'n21', 'A' );
340				$c->add_path( 'n20', 'n21p0', 'A' );
341				$c->add_path( 'n21p0', 'n21', 'A' );
342				$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
343				$c->flatten_ranks();
344				ok( $c->reading( 'n21p0' ), "New reading exists" );
345				is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
346
347				# Combine n3 and n4 ( with his )
348				$c->merge_readings( 'n3', 'n4', 1 );
349				ok( !$c->reading('n4'), "Reading n4 is gone" );
350				is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
351
352				# Collapse n9 and n10 ( rood / root )
353				$c->merge_readings( 'n9', 'n10' );
354				ok( !$c->reading('n10'), "Reading n10 is gone" );
355				is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
356
357				# Try to combine n21 and n21p0. This should break.
358				my $remaining = $c->reading('n21');
359				$remaining \|\|= $c->reading('n22'); # one of these should still exist
360				try {
361				$c->merge_readings( 'n21p0', $remaining, 1 );
362				ok( 0, "Bad reading merge changed the graph" );
363				} catch( Text::Tradition::Error $e ) {
364				like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
365				} catch {
366				ok( 0, "Unexpected error on bad reading merge: $@" );
367				}
368
369				try {
370				$c->calculate_ranks();
371				ok( 1, "Graph is still evidently whole" );
372				} catch( Text::Tradition::Error $e ) {
373				ok( 0, "Caught a rank exception: " . $e->message );
374				}
375
376				# Test right-to-left reading merge.
377				my $rtl = Text::Tradition->new(
378				'name' => 'inline',
379				'input' => 'Tabular',
380				'sep_char' => ',',
381				'direction' => 'RL',
382				'file' => 't/data/arabic_snippet.csv'
383				);
384				my $rtlc = $rtl->collation;
385				is( $rtlc->reading('r8.1')->text, 'Ø³Ø¨Ø¨', "Got target first reading in RTL text" );
386				my $pt = $rtlc->path_text('A');
387				my @path = $rtlc->reading_sequence( $rtlc->start, $rtlc->end, 'A' );
388				is( $rtlc->reading('r9.1')->text, 'ØµÙ„Ø§Ø', "Got target second reading in RTL text" );
389				$rtlc->merge_readings( 'r8.1', 'r9.1', 1 );
390				is( $rtlc->reading('r8.1')->text, 'Ø³Ø¨Ø¨ ØµÙ„Ø§Ø', "Got target merged reading in RTL text" );
391				is( $rtlc->path_text('A'), $pt, "Path text is still correct" );
392				is( scalar($rtlc->reading_sequence( $rtlc->start, $rtlc->end, 'A' )),
393				scalar(@path) - 1, "Path was shortened" );
394
395				=end testing
396
397				=cut
398
399				sub merge_readings {
400				my $self = shift;
401
402				# Sanity check
403				my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
404				my $mergemeta = $kept_obj->is_meta;
405				throw( "Cannot merge meta and non-meta reading" )
406				unless ( $mergemeta && $del_obj->is_meta )
407				\|\| ( !$mergemeta && !$del_obj->is_meta );
408				if( $mergemeta ) {
409				throw( "Cannot merge with start or end node" )
410				if( $kept_obj eq $self->start \|\| $kept_obj eq $self->end
411				\|\| $del_obj eq $self->start \|\| $del_obj eq $self->end );
412				throw( "Cannot combine text of meta readings" ) if $combine;
413				}
414				# We can only merge readings in a linear graph if:
415				# - they are contiguous with only one edge between them, OR
416				# - they are at equivalent ranks in the graph.
417				if( $self->linear ) {
418				my @delpred = $del_obj->predecessors;
419				my @keptsuc = $kept_obj->successors;
420				unless ( @delpred == 1 && $delpred[0] eq $kept_obj
421				&& @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
422				my( $is_ok, $msg ) = $self->relations->relationship_valid(
423				$kept_obj, $del_obj, 'collated' );
424				unless( $is_ok ) {
425				throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
426				}
427				}
428				}
429
430				# We only need the IDs for adding paths to the graph, not the reading
431				# objects themselves.
432				my $kept = $kept_obj->id;
433				my $deleted = $del_obj->id;
434				$self->_graphcalc_done(0);
435
436				# The kept reading should inherit the paths and the relationships
437				# of the deleted reading.
438				foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
439				my @vector = ( $kept );
440				push( @vector, $path->[1] ) if $path->[0] eq $deleted;
441				unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
442				next if $vector[0] eq $vector[1]; # Don't add a self loop
443				my %wits = %{$self->sequence->get_edge_attributes( @$path )};
444				$self->sequence->add_edge( @vector );
445				my $fwits = $self->sequence->get_edge_attributes( @vector );
446				@wits{keys %$fwits} = values %$fwits;
447				$self->sequence->set_edge_attributes( @vector, \%wits );
448				}
449				$self->relations->merge_readings( $kept, $deleted, $combine );
450
451				# Do the deletion deed.
452				if( $combine ) {
453				# Combine the text of the readings
454				my $joinstr = $combine_char;
455				unless( defined $joinstr ) {
456				$joinstr = '' if $kept_obj->join_next \|\| $del_obj->join_prior;
457				$joinstr = $self->wordsep unless defined $joinstr;
458				}
459				$kept_obj->_combine( $del_obj, $joinstr );
460				}
461				$self->del_reading( $deleted );
462				}
463
464				=head2 merge_related( @relationship_types )
465
466				Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
467
468				WARNING: This operation cannot be undone.
469
470				=cut
471
472				=begin testing
473
474				use Test::Warn;
475				use Text::Tradition;
476				use TryCatch;
477
478				my $t;
479				warnings_exist {
480				$t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
481				} [qr/Cannot set relationship on a meta reading/],
482				"Got expected relationship drop warning on parse";
483
484				my $c = $t->collation;
485				# Force the transitive propagation of all existing relationships.
486				$c->relations->propagate_all_relationships();
487
488				my %rdg_ids;
489				map { $rdg_ids{$_} = 1 } $c->readings;
490				$c->merge_related( 'orthographic' );
491				is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
492				"Successfully collapsed orthographic variation" );
493				map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
494				foreach my $rid ( keys %rdg_ids ) {
495				my $exp = $rdg_ids{$rid};
496				is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
497				( $exp ? "retained" : "removed" ) );
498				}
499				ok( $c->linear, "Graph is still linear" );
500				try {
501				$c->calculate_ranks; # This should succeed
502				ok( 1, "Can still calculate ranks on the new graph" );
503				} catch {
504				ok( 0, "Rank calculation on merged graph failed: $@" );
505				}
506
507				# Now add some transpositions
508				$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
509				$c->merge_related( 'transposition' );
510				is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
511				"Transposed relationship is merged away" );
512				ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
513				ok( !$c->linear, "Graph is no longer linear" );
514				try {
515				$c->calculate_ranks; # This should fail
516				ok( 0, "Rank calculation happened on nonlinear graph?!" );
517				} catch ( Text::Tradition::Error $e ) {
518				is( $e->message, 'Cannot calculate ranks on a non-linear graph',
519				"Rank calculation on merged graph threw an error" );
520				}
521
522				=end testing
523
524				=cut
525
526				# TODO: there should be a way to display merged without affecting the underlying data!
527
528				sub merge_related {
529				my $self = shift;
530				my %reltypehash;
531				map { $reltypehash{$_} = 1 } @_;
532
533				# Set up the filter for finding related readings
534				my $filter = sub {
535				exists $reltypehash{$_[0]->type};
536				};
537
538				# Go through all readings looking for related ones
539				foreach my $r ( $self->readings ) {
540				next unless $self->reading( "$r" ); # might have been deleted meanwhile
541				while( my @related = $self->related_readings( $r, $filter ) ) {
542				push( @related, $r );
543				@related = sort {
544				scalar $b->witnesses <=> scalar $a->witnesses
545				} @related;
546				my $keep = shift @related;
547				foreach my $delr ( @related ) {
548				$self->linear( 0 )
549				unless( $self->get_relationship( $keep, $delr )->colocated );
550				$self->merge_readings( $keep, $delr );
551				}
552				}
553				}
554				}
555
556				=head2 compress_readings
557
558				Where possible in the graph, compresses plain sequences of readings into a
559				single reading. The sequences must consist of readings with no
560				relationships to other readings, with only a single witness path between
561				them and no other witness paths from either that would skip the other. The
562				readings must also not be marked as nonsense or bad grammar.
563
564				WARNING: This operation cannot be undone.
565
566				=begin testing
567
568				use Text::Tradition;
569
570				my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
571				my $c = $t->collation;
572				my $n = scalar $c->readings;
573				$c->compress_readings();
574				is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
575
576				# Now put in a join-word and make sure the thing still works.
577				my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
578				my $c2 = $t2->collation;
579				# Split n21 ('unto') for testing purposes
580				my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
581				my $old_r = $c2->reading( 'n21' );
582				$old_r->alter_text( 'to' );
583				$c2->del_path( 'n20', 'n21', 'A' );
584				$c2->add_path( 'n20', 'n21p0', 'A' );
585				$c2->add_path( 'n21p0', 'n21', 'A' );
586				$c2->calculate_ranks();
587				is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
588				$c2->compress_readings();
589				is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
590				is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
591
592
593				=end testing
594
595				=cut
596
597				sub compress_readings {
598				my $self = shift;
599				# Sanity check: first save the original text of each witness.
600				my %origtext;
601				foreach my $wit ( $self->tradition->witnesses ) {
602				$origtext{$wit->sigil} = $self->path_text( $wit->sigil );
603				if( $wit->is_layered ) {
604				my $acsig = $wit->sigil . $self->ac_label;
605				$origtext{$acsig} = $self->path_text( $acsig );
606				}
607				}
608
609				# Now do the deed.
610				# Anywhere in the graph that there is a reading that joins only to a single
611				# successor, and neither of these have any relationships, just join the two
612				# readings.
613				foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
614				# Now look for readings that can be joined to their successors.
615				next unless $rdg->is_combinable;
616				my %seen;
617				while( $self->sequence->successors( $rdg ) == 1 ) {
618				my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
619				throw( "Infinite loop" ) if $seen{$next->id};
620				$seen{$next->id} = 1;
621				last if $self->sequence->predecessors( $next ) > 1;
622				last unless $next->is_combinable;
623				say "Joining readings $rdg and $next";
624				$self->merge_readings( $rdg, $next, 1 );
625				}
626				}
627
628				# Finally, make sure we haven't screwed anything up.
629				foreach my $wit ( $self->tradition->witnesses ) {
630				my $pathtext = $self->path_text( $wit->sigil );
631				throw( "Text differs for witness " . $wit->sigil )
632				unless $pathtext eq $origtext{$wit->sigil};
633				if( $wit->is_layered ) {
634				my $acsig = $wit->sigil . $self->ac_label;
635				$pathtext = $self->path_text( $acsig );
636				throw( "Layered text differs for witness " . $wit->sigil )
637				unless $pathtext eq $origtext{$acsig};
638				}
639				}
640
641				$self->relations->rebuild_equivalence();
642				$self->calculate_ranks();
643				}
644
645				# Helper function for manipulating the graph.
646				sub _stringify_args {
647				my( $self, $first, $second, @args ) = @_;
648				$first = $first->id
649				if ref( $first ) eq 'Text::Tradition::Collation::Reading';
650				$second = $second->id
651				if ref( $second ) eq 'Text::Tradition::Collation::Reading';
652				return( $first, $second, @args );
653				}
654
655				# Helper function for manipulating the graph.
656				sub _objectify_args {
657				my( $self, $first, $second, $arg ) = @_;
658				$first = $self->reading( $first )
659				unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
660				$second = $self->reading( $second )
661				unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
662				return( $first, $second, $arg );
663				}
664
665				=head2 duplicate_reading( $reading, @witlist )
666
667				Split the given reading into two, so that the new reading is in the path for
668				the witnesses given in @witlist. If the result is that certain non-colocated
669				relationships (e.g. transpositions) are no longer valid, these will be removed.
670				Returns the newly-created reading.
671
672				=begin testing
673
674				use Test::More::UTF8;
675				use Text::Tradition;
676				use TryCatch;
677
678				my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
679				is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
680				ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
681
682				my $sc = $st->collation;
683				my $numr = 17;
684				ok( $sc->reading('n131'), "Tradition has the affected reading" );
685				is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
686				is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
687
688				# Detach the erroneously collated reading
689				my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
690				ok( $newr, "New reading was created" );
691				ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
692				is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
693				is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
694				my $csucc = $sc->common_successor( 'n131', 'n131_0' );
695				is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
696
697				# Check that the bad transposition is gone
698				is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
699				is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
700
701				# The collation should not be fixed
702				my @pairs = $sc->identical_readings();
703				is( scalar @pairs, 0, "Not re-collated yet" );
704				# Fix the collation
705				ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
706				@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
707				is( scalar @pairs, 3, "Found three more identical readings" );
708				is( $sc->end->rank, 11, "The ranks shifted appropriately" );
709				$sc->flatten_ranks();
710				is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
711
712				# Check that we can't "duplicate" a reading with no wits or with all wits
713				try {
714				my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
715				ok( 0, "Reading duplication without witnesses throws an error" );
716				} catch( Text::Tradition::Error $e ) {
717				like( $e->message, qr/Must specify one or more witnesses/,
718				"Reading duplication without witnesses throws the expected error" );
719				} catch {
720				ok( 0, "Reading duplication without witnesses threw the wrong error" );
721				}
722
723				try {
724				my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'MÃ¼11475' );
725				ok( 0, "Reading duplication with all witnesses throws an error" );
726				} catch( Text::Tradition::Error $e ) {
727				like( $e->message, qr/Cannot join all witnesses/,
728				"Reading duplication with all witnesses throws the expected error" );
729				} catch {
730				ok( 0, "Reading duplication with all witnesses threw the wrong error" );
731				}
732
733				try {
734				$sc->calculate_ranks();
735				ok( 1, "Graph is still evidently whole" );
736				} catch( Text::Tradition::Error $e ) {
737				ok( 0, "Caught a rank exception: " . $e->message );
738				}
739
740				=end testing
741
742				=cut
743
744				sub duplicate_reading {
745				my( $self, $r, @wits ) = @_;
746				# Check that we are not doing anything unwise.
747				throw( "Must specify one or more witnesses for the duplicated reading" )
748				unless @wits;
749				unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
750				$r = $self->reading( $r );
751				}
752				throw( "Cannot duplicate a meta-reading" )
753				if $r->is_meta;
754				throw( "Cannot join all witnesses to the new reading" )
755				if scalar( @wits ) == scalar( $r->witnesses );
756
757				# Get all the reading attributes and duplicate them.
758				my $rmeta = Text::Tradition::Collation::Reading->meta;
759				my %args;
760				foreach my $attr( $rmeta->get_all_attributes ) {
761				next if $attr->name =~ /^_/;
762				my $acc = $attr->get_read_method;
763				if( !$acc && $attr->has_applied_traits ) {
764				my $tr = $attr->applied_traits;
765				if( $tr->[0] =~ /::(Array\|Hash)$/ ) {
766				my $which = $1;
767				my %methods = reverse %{$attr->handles};
768				$acc = $methods{elements};
769				$args{$attr->name} = $which eq 'Array'
770				? [ $r->$acc ] : { $r->$acc };
771				}
772				} elsif( $acc ) {
773				my $attrval = $r->$acc;
774				if( defined $attrval ) {
775				$args{$attr->name} = $attrval;
776				}
777				}
778				}
779				# By definition the new reading will no longer be common.
780				$args{is_common} = 0;
781				# The new reading also needs its own ID.
782				$args{id} = $self->_generate_dup_id( $r->id );
783
784				# Try to make the new reading.
785				my $newr = $self->add_reading( \%args );
786				# The old reading is also no longer common.
787				$r->is_common( 0 );
788
789				# For each of the witnesses, dissociate from the old reading and
790				# associate with the new.
791				foreach my $wit ( @wits ) {
792				my $prior = $self->prior_reading( $r, $wit );
793				my $next = $self->next_reading( $r, $wit );
794				$self->del_path( $prior, $r, $wit );
795				$self->add_path( $prior, $newr, $wit );
796				$self->del_path( $r, $next, $wit );
797				$self->add_path( $newr, $next, $wit );
798				}
799
800				# If the graph is ranked, we need to look for relationships that are now
801				# invalid (i.e. 'non-colocation' types that might now be colocated) and
802				# remove them. If not, we can skip it.
803				my $succ;
804				my %rrk;
805				my @deleted_relations;
806				if( $self->end->has_rank ) {
807				# Find the point where we can stop checking
808				$succ = $self->common_successor( $r, $newr );
809
810				# Hash the existing ranks
811				foreach my $rdg ( $self->readings ) {
812				$rrk{$rdg->id} = $rdg->rank;
813				}
814				# Calculate the new ranks
815				$self->calculate_ranks();
816
817				# Check for invalid non-colocated relationships among changed-rank readings
818				# from where the ranks start changing up to $succ
819				my $lastrank = $succ->rank;
820				foreach my $rdg ( $self->readings ) {
821				next if $rdg->rank > $lastrank;
822				next if $rdg->rank == $rrk{$rdg->id};
823				my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
824				next unless @noncolo;
825				foreach my $nc ( @noncolo ) {
826				unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
827				push( @deleted_relations, [ $rdg->id, $nc->id ] );
828				}
829				}
830				}
831				}
832				return ( $newr, @deleted_relations );
833				}
834
835				sub _generate_dup_id {
836				my( $self, $rid ) = @_;
837				my $newid;
838				my $i = 0;
839				while( !$newid ) {
840				$newid = $rid."_$i";
841				if( $self->has_reading( $newid ) ) {
842				$newid = '';
843				$i++;
844				}
845				}
846				return $newid;
847				}
848
849				### Path logic
850
851				sub add_path {
852				my $self = shift;
853
854				# We only need the IDs for adding paths to the graph, not the reading
855				# objects themselves.
856				my( $source, $target, $wit ) = $self->_stringify_args( @_ );
857
858				$self->_graphcalc_done(0);
859				# Connect the readings
860				unless( $self->sequence->has_edge( $source, $target ) ) {
861				$self->sequence->add_edge( $source, $target );
862				$self->relations->add_equivalence_edge( $source, $target );
863				}
864				# Note the witness in question
865				$self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
866				}
867
868				sub del_path {
869				my $self = shift;
870				my @args;
871				if( ref( $_[0] ) eq 'ARRAY' ) {
872				my $e = shift @_;
873				@args = ( @$e, @_ );
874				} else {
875				@args = @_;
876				}
877
878				# We only need the IDs for removing paths from the graph, not the reading
879				# objects themselves.
880				my( $source, $target, $wit ) = $self->_stringify_args( @args );
881
882				$self->_graphcalc_done(0);
883				if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
884				$self->sequence->delete_edge_attribute( $source, $target, $wit );
885				}
886				unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
887				$self->sequence->delete_edge( $source, $target );
888				$self->relations->delete_equivalence_edge( $source, $target );
889				}
890				}
891
892
893				# Extra graph-alike utility
894				sub has_path {
895				my $self = shift;
896				my( $source, $target, $wit ) = $self->_stringify_args( @_ );
897				return undef unless $self->sequence->has_edge( $source, $target );
898				return $self->sequence->has_edge_attribute( $source, $target, $wit );
899				}
900
901				=head2 clear_witness( @sigil_list )
902
903				Clear the given witnesses out of the collation entirely, removing references
904				to them in paths, and removing readings that belong only to them. Should only
905				be called via $tradition->del_witness.
906
907				=cut
908
909				sub clear_witness {
910				my( $self, @sigils ) = @_;
911
912				$self->_graphcalc_done(0);
913				# Clear the witness(es) out of the paths
914				foreach my $e ( $self->paths ) {
915				foreach my $sig ( @sigils ) {
916				$self->del_path( $e, $sig );
917				}
918				}
919
920				# Clear out the newly unused readings
921				foreach my $r ( $self->readings ) {
922				unless( $self->reading_witnesses( $r ) ) {
923				$self->del_reading( $r );
924				}
925				}
926				}
927
928				sub add_relationship {
929				my $self = shift;
930				my( $source, $target, $opts, $altered_readings ) = $self->_stringify_args( @_ );
931				my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
932				my $did_recalc;
933				foreach my $v ( @vectors ) {
934				my $rel = $self->get_relationship( $v );
935				next unless $rel->colocated;
936				my $r1 = $self->reading( $v->[0] );
937				my $r2 = $self->reading( $v->[1] );
938				# If it's a spelling or orthographic relationship, and one is marked
939				# as a lemma, set the normal form on the non-lemma to reflect that.
940				if( $r1->does( 'Text::Tradition::Morphology' ) ) {
941				my @changed = $r1->relationship_added( $r2, $rel );
942				if( ref( $altered_readings ) eq 'ARRAY' ) {
943				push( @$altered_readings, @changed );
944				}
945				}
946				next if $did_recalc;
947				if( $r1->has_rank && $r2->has_rank && $r1->rank ne $r2->rank ) {
948				$self->_graphcalc_done(0);
949				$self->_clear_cache;
950				$did_recalc = 1;
951				}
952				}
953				return @vectors;
954				}
955
956				around qw/ get_relationship del_relationship / => sub {
957				my $orig = shift;
958				my $self = shift;
959				my @args = @_;
960				if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
961				@args = @{$_[0]};
962				}
963				my @stringargs = $self->_stringify_args( @args );
964				$self->$orig( @stringargs );
965				};
966
967				=head2 reading_witnesses( $reading )
968
969				Return a list of sigils corresponding to the witnesses in which the reading appears.
970
971				=cut
972
973				sub reading_witnesses {
974				my( $self, $reading ) = @_;
975				# We need only check either the incoming or the outgoing edges; I have
976				# arbitrarily chosen "incoming". Thus, special-case the start node.
977				if( $reading eq $self->start ) {
978				return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
979				}
980				my %all_witnesses;
981				foreach my $e ( $self->sequence->edges_to( $reading ) ) {
982				my $wits = $self->sequence->get_edge_attributes( @$e );
983				@all_witnesses{ keys %$wits } = 1;
984				}
985				my $acstr = $self->ac_label;
986				foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
987				delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
988				}
989				return keys %all_witnesses;
990				}
991
992				=head1 OUTPUT METHODS
993
994				=head2 as_svg( \%options )
995
996				Returns an SVG string that represents the graph, via as_dot and graphviz.
997				See as_dot for a list of options. Must have GraphViz (dot) installed to run.
998
999				=begin testing
1000
1001				use File::Which;
1002				use Text::Tradition;
1003				use XML::LibXML;
1004				use XML::LibXML::XPathContext;
1005
1006
1007				SKIP: {
1008				skip( 'Need Graphviz installed to test graphs', 16 )
1009				unless File::Which::which( 'dot' );
1010
1011				my $datafile = 't/data/Collatex-16.xml';
1012
1013				my $tradition = Text::Tradition->new(
1014				'name' => 'inline',
1015				'input' => 'CollateX',
1016				'file' => $datafile,
1017				);
1018				my $collation = $tradition->collation;
1019
1020				# Test the svg creation
1021				my $parser = XML::LibXML->new();
1022				$parser->load_ext_dtd( 0 );
1023				my $svg = $parser->parse_string( $collation->as_svg() );
1024				is( $svg->documentElement->nodeName(), 'svg', 'Got an svg document' );
1025
1026				# Test for the correct number of nodes in the SVG
1027				my $svg_xpc = XML::LibXML::XPathContext->new( $svg->documentElement() );
1028				$svg_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1029				my @svg_nodes = $svg_xpc->findnodes( '//svg:g[@class="node"]' );
1030				is( scalar @svg_nodes, 26, "Correct number of nodes in the graph" );
1031
1032				# Test for the correct number of edges
1033				my @svg_edges = $svg_xpc->findnodes( '//svg:g[@class="edge"]' );
1034				is( scalar @svg_edges, 32, "Correct number of edges in the graph" );
1035
1036				# Test svg creation for a subgraph
1037				my $part_svg = $parser->parse_string( $collation->as_svg( { from => 15 } ) ); # start, no end
1038				is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph to end" );
1039				my $part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1040				$part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1041				@svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1042				is( scalar( @svg_nodes ), 9,
1043				"Correct number of nodes in the subgraph" );
1044				@svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1045				is( scalar( @svg_edges ), 10,
1046				"Correct number of edges in the subgraph" );
1047
1048				$part_svg = $parser->parse_string( $collation->as_svg( { from => 10, to => 13 } ) ); # start, no end
1049				is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph in the middle" );
1050				$part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1051				$part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1052				@svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1053				is( scalar( @svg_nodes ), 9,
1054				"Correct number of nodes in the subgraph" );
1055				@svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1056				is( scalar( @svg_edges ), 11,
1057				"Correct number of edges in the subgraph" );
1058
1059
1060				$part_svg = $parser->parse_string( $collation->as_svg( { to => 5 } ) ); # start, no end
1061				is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1062				$part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1063				$part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1064				@svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1065				is( scalar( @svg_nodes ), 7,
1066				"Correct number of nodes in the subgraph" );
1067				@svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1068				is( scalar( @svg_edges ), 7,
1069				"Correct number of edges in the subgraph" );
1070
1071				# Test a right-to-left graph
1072				my $arabic = Text::Tradition->new(
1073				input => 'Tabular',
1074				sep_char => ',',
1075				name => 'arabic',
1076				direction => 'RL',
1077				file => 't/data/arabic_snippet.csv' );
1078				my $rl_svg = $parser->parse_string( $arabic->collation->as_svg() );
1079				is( $rl_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1080				my $rl_xpc = XML::LibXML::XPathContext->new( $rl_svg->documentElement() );
1081				$rl_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1082				my %node_cx;
1083				foreach my $node ( $rl_xpc->findnodes( '//svg:g[@class="node"]' ) ) {
1084				my $nid = $node->getAttribute('id');
1085				$node_cx{$nid} = $rl_xpc->findvalue( './svg:ellipse/@cx', $node );
1086				}
1087				my @sorted = sort { $node_cx{$a} <=> $node_cx{$b} } keys( %node_cx );
1088				is( $sorted[0], '__END__', "End node is the leftmost" );
1089				is( $sorted[$#sorted], '__START__', "Start node is the rightmost" );
1090
1091				# Now try making it bidirectional
1092				$arabic->collation->change_direction('BI');
1093				my $bi_svg = $parser->parse_string( $arabic->collation->as_svg() );
1094				is( $bi_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1095				my $bi_xpc = XML::LibXML::XPathContext->new( $bi_svg->documentElement() );
1096				$bi_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1097				my %node_cy;
1098				foreach my $node ( $bi_xpc->findnodes( '//svg:g[@class="node"]' ) ) {
1099				my $nid = $node->getAttribute('id');
1100				$node_cy{$nid} = $rl_xpc->findvalue( './svg:ellipse/@cy', $node );
1101				}
1102				@sorted = sort { $node_cy{$a} <=> $node_cy{$b} } keys( %node_cy );
1103				is( $sorted[0], '__START__', "Start node is the topmost" );
1104				is( $sorted[$#sorted], '__END__', "End node is the bottom-most" );
1105
1106
1107				} #SKIP
1108
1109				=end testing
1110
1111				=cut
1112
1113				sub as_svg {
1114				my( $self, $opts ) = @_;
1115				throw( "Need GraphViz installed to output SVG" )
1116				unless File::Which::which( 'dot' );
1117				my $want_subgraph = exists $opts->{'from'} \|\| exists $opts->{'to'};
1118				$self->calculate_ranks()
1119				unless( $self->_graphcalc_done \|\| $opts->{'nocalc'} \|\| !$self->linear );
1120				my @cmd = qw/dot -Tsvg/;
1121				my( $svg, $err );
1122				my $dotfile = File::Temp->new();
1123				## USE FOR DEBUGGING
1124				# $dotfile->unlink_on_destroy(0);
1125				binmode $dotfile, ':utf8';
1126				print $dotfile $self->as_dot( $opts );
1127				push( @cmd, $dotfile->filename );
1128				run( \@cmd, ">", binary(), \$svg );
1129				$svg = decode_utf8( $svg );
1130				return $svg;
1131				}
1132
1133
1134				=head2 as_dot( \%options )
1135
1136				Returns a string that is the collation graph expressed in dot
1137				(i.e. GraphViz) format. Options include:
1138
1139				=over 4
1140
1141				=item * from
1142
1143				=item * to
1144
1145				=item * color_common
1146
1147				=back
1148
1149				=cut
1150
1151				sub as_dot {
1152				my( $self, $opts ) = @_;
1153				my $startrank = $opts->{'from'} if $opts;
1154				my $endrank = $opts->{'to'} if $opts;
1155				my $color_common = $opts->{'color_common'} if $opts;
1156				my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
1157				&& $self->end->rank > 100;
1158				$STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
1159
1160				# Check the arguments
1161				if( $startrank ) {
1162				return if $endrank && $startrank > $endrank;
1163				return if $startrank > $self->end->rank;
1164				}
1165				if( defined $endrank ) {
1166				return if $endrank < 0;
1167				$endrank = undef if $endrank == $self->end->rank;
1168				}
1169
1170				my $graph_name = $self->tradition->name;
1171				$graph_name =~ s/[^\w\s]//g;
1172				$graph_name = join( '_', split( /\s+/, $graph_name ) );
1173
1174				my %graph_attrs = (
1175				'bgcolor' => 'none',
1176				);
1177				unless( $self->direction eq 'BI' ) {
1178				$graph_attrs{rankdir} = $self->direction;
1179				}
1180				my %node_attrs = (
1181				'fontsize' => 14,
1182				'fillcolor' => 'white',
1183				'style' => 'filled',
1184				'shape' => 'ellipse'
1185				);
1186				my %edge_attrs = (
1187				'arrowhead' => 'open',
1188				'color' => '#000000',
1189				'fontcolor' => '#000000',
1190				);
1191
1192				my $dot = sprintf( "digraph %s {\n", $graph_name );
1193				$dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1194				$dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1195
1196				# Output substitute start/end readings if necessary
1197				if( $startrank ) {
1198				$dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1199				}
1200				if( $endrank ) {
1201				$dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1202				}
1203				if( $STRAIGHTENHACK ) {
1204				## HACK part 1
1205				my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1206				$dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1207				$dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1208				}
1209				my %used; # Keep track of the readings that actually appear in the graph
1210				# Sort the readings by rank if we have ranks; this speeds layout.
1211				my @all_readings = $self->end->has_rank
1212				? sort { $a->rank <=> $b->rank } $self->readings
1213				: $self->readings;
1214				# TODO Refrain from outputting lacuna nodes - just grey out the edges.
1215				foreach my $reading ( @all_readings ) {
1216				# Only output readings within our rank range.
1217				next if $startrank && $reading->rank < $startrank;
1218				next if $endrank && $reading->rank > $endrank;
1219				$used{$reading->id} = 1;
1220				# Need not output nodes without separate labels
1221				next if $reading->id eq $reading->text;
1222				my $rattrs;
1223				my $label = $reading->text;
1224				unless( $label =~ /^[[:punct:]]+$/ ) {
1225				$label .= '-' if $reading->join_next;
1226				$label = "-$label" if $reading->join_prior;
1227				}
1228				$label =~ s/\"/\\\"/g;
1229				$rattrs->{'label'} = $label;
1230				$rattrs->{'id'} = $reading->id;
1231				$rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1232				$dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1233				}
1234
1235				# Add the real edges.
1236				my @edges = $self->paths;
1237				my( %substart, %subend );
1238				foreach my $edge ( @edges ) {
1239				# Do we need to output this edge?
1240				if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1241				my $label = $self->_path_display_label( $opts,
1242				$self->path_witnesses( $edge ) );
1243				my $variables = { %edge_attrs, 'label' => $label };
1244
1245				# Account for the rank gap if necessary
1246				my $rank0 = $self->reading( $edge->[0] )->rank
1247				if $self->reading( $edge->[0] )->has_rank;
1248				my $rank1 = $self->reading( $edge->[1] )->rank
1249				if $self->reading( $edge->[1] )->has_rank;
1250				if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1251				$variables->{'minlen'} = $rank1 - $rank0;
1252				}
1253
1254				# EXPERIMENTAL: make edge width reflect no. of witnesses
1255				my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1256				$variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1257
1258				my $varopts = _dot_attr_string( $variables );
1259				$dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1260				$edge->[0], $edge->[1], $varopts );
1261				} elsif( $used{$edge->[0]} ) {
1262				$subend{$edge->[0]} = $edge->[1];
1263				} elsif( $used{$edge->[1]} ) {
1264				$substart{$edge->[1]} = $edge->[0];
1265				}
1266				}
1267
1268				# If we are asked to, add relationship links
1269				if( exists $opts->{show_relations} ) {
1270				my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1271				if( $filter eq 'transposition' ) {
1272				$filter =~ qr/^transposition$/;
1273				}
1274				my %typecolors;
1275				my @types = sort( map { $_->name } $self->relations->types );
1276				if( exists $opts->{graphcolors} ) {
1277				foreach my $tdx ( 0 .. $#types ) {
1278				$typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1279				}
1280				} else {
1281				map { $typecolors{$_} = '#FFA14F' } @types;
1282				}
1283				foreach my $redge ( $self->relationships ) {
1284				if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1285				my $rel = $self->get_relationship( $redge );
1286				next unless $filter eq 'all' \|\| $rel->type =~ /$filter/;
1287				my $variables = {
1288				arrowhead => 'none',
1289				color => $typecolors{$rel->type},
1290				constraint => 'false',
1291				penwidth => '3',
1292				};
1293				unless( exists $opts->{graphcolors} ) {
1294				$variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1295				}
1296				$dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1297				$redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1298				}
1299				}
1300				}
1301
1302				# Add substitute start and end edges if necessary
1303				foreach my $node ( keys %substart ) {
1304				my $witstr = $self->_path_display_label( $opts,
1305				$self->path_witnesses( $substart{$node}, $node ) );
1306				my $variables = { %edge_attrs, 'label' => $witstr };
1307				my $nrdg = $self->reading( $node );
1308				if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1309				# Substart is actually one lower than $startrank
1310				$variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1311				}
1312				my $varopts = _dot_attr_string( $variables );
1313				$dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1314				}
1315				foreach my $node ( keys %subend ) {
1316				my $witstr = $self->_path_display_label( $opts,
1317				$self->path_witnesses( $node, $subend{$node} ) );
1318				my $variables = { %edge_attrs, 'label' => $witstr };
1319				my $varopts = _dot_attr_string( $variables );
1320				$dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1321				}
1322				# HACK part 2
1323				if( $STRAIGHTENHACK ) {
1324				my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1325				$dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1326				}
1327
1328				$dot .= "}\n";
1329				return $dot;
1330				}
1331
1332				sub _dot_attr_string {
1333				my( $hash ) = @_;
1334				my @attrs;
1335				foreach my $k ( sort keys %$hash ) {
1336				my $v = $hash->{$k};
1337				push( @attrs, $k.'="'.$v.'"' );
1338				}
1339				return( '[ ' . join( ', ', @attrs ) . ' ]' );
1340				}
1341
1342				=head2 path_witnesses( $edge )
1343
1344				Returns the list of sigils whose witnesses are associated with the given edge.
1345				The edge can be passed as either an array or an arrayref of ( $source, $target ).
1346
1347				=cut
1348
1349				sub path_witnesses {
1350				my( $self, @edge ) = @_;
1351				# If edge is an arrayref, cope.
1352				if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1353				my $e = shift @edge;
1354				@edge = @$e;
1355				}
1356				my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1357				return @wits;
1358				}
1359
1360				# Helper function. Make a display label for the given witnesses, showing a.c.
1361				# witnesses only where the main witness is not also in the list.
1362				sub _path_display_label {
1363				my $self = shift;
1364				my $opts = shift;
1365				my %wits;
1366				map { $wits{$_} = 1 } @_;
1367
1368				# If an a.c. wit is listed, remove it if the main wit is also listed.
1369				# Otherwise keep it for explicit listing.
1370				my $aclabel = $self->ac_label;
1371				my @disp_ac;
1372				foreach my $w ( sort keys %wits ) {
1373				if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1374				if( exists $wits{$1} ) {
1375				delete $wits{$w};
1376				} else {
1377				push( @disp_ac, $w );
1378				}
1379				}
1380				}
1381
1382				if( $opts->{'explicit_wits'} ) {
1383				return join( ', ', sort keys %wits );
1384				} else {
1385				# See if we are in a majority situation.
1386				my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1387				$maj = $maj > 5 ? $maj : 5;
1388				if( scalar keys %wits > $maj ) {
1389				unshift( @disp_ac, 'majority' );
1390				return join( ', ', @disp_ac );
1391				} else {
1392				return join( ', ', sort keys %wits );
1393				}
1394				}
1395				}
1396
1397				=head2 as_adjacency_list
1398
1399				Returns a JSON structure that represents the collation sequence graph.
1400
1401				=begin testing
1402
1403				use JSON qw/ from_json /;
1404				use Text::Tradition;
1405
1406				my $t = Text::Tradition->new(
1407				'input' => 'Self',
1408				'file' => 't/data/florilegium_graphml.xml' );
1409				my $c = $t->collation;
1410
1411				# Make a connection so we can test rank preservation
1412				$c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1413
1414				# Create an adjacency list of the whole thing; test the output.
1415				my $adj_whole = from_json( $c->as_adjacency_list() );
1416				is( scalar @$adj_whole, scalar $c->readings(),
1417				"Same number of nodes in graph and adjacency list" );
1418				my @adj_whole_edges;
1419				map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1420				is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1421				"Same number of edges in graph and adjacency list" );
1422				# Find the reading whose rank should be preserved
1423				my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1424				my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1425				is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1426
1427				# Now create an adjacency list of just a portion. w76 to w122
1428				my $adj_part = from_json( $c->as_adjacency_list(
1429				{ from => $c->reading('w76')->rank,
1430				to => $c->reading('w122')->rank }));
1431				is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1432				my @adj_part_edges;
1433				map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1434				is( scalar @adj_part_edges, 58,
1435				"Same number of edges in partial graph and adjacency list" );
1436				# Check for consistency
1437				my %part_nodes;
1438				map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1439				foreach my $edge ( @adj_part_edges ) {
1440				my $testid = $edge->{id};
1441				ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1442				}
1443
1444				=end testing
1445
1446				=cut
1447
1448				sub as_adjacency_list {
1449				my( $self, $opts ) = @_;
1450				# Make a structure that contains all the nodes, the nodes they point to,
1451				# and the attributes of the edges that connect them.
1452				# [ { id: 'n0', label: 'Gallia', adjacent: [
1453				# { id: 'n1', label: 'P Q' } ,
1454				# { id: 'n2', label: 'R S', minlen: 2 } ] },
1455				# { id: 'n1', label: 'est', adjacent: [ ... ] },
1456				# ... ]
1457				my $startrank = $opts->{'from'} \|\| 0;
1458				my $endrank = $opts->{'to'} \|\| $self->end->rank;
1459
1460				$self->calculate_ranks()
1461				unless( $self->_graphcalc_done \|\| $opts->{'nocalc'} \|\| !$self->linear );
1462				my $list = [];
1463				foreach my $rdg ( $self->readings ) {
1464				my @successors;
1465				my $phony = '';
1466				# Figure out what the node's successors should be.
1467				if( $rdg eq $self->start && $startrank > 0 ) {
1468				# Connect the start node with all the nodes at startrank.
1469				# Lacunas should be included only if the node really has that rank.
1470				@successors = $self->readings_at_rank( $startrank, 1 );
1471				$phony = 'start';
1472				} elsif( $rdg->rank < $startrank
1473				\|\| $rdg->rank > $endrank && $rdg ne $self->end ) {
1474				next;
1475				} else {
1476				@successors = $rdg->successors;
1477				}
1478				# Make sure that the end node is at the end of the successors
1479				# list if it is needed.
1480				if( grep { $_ eq $self->end } @successors ) {
1481				my @ts = grep { $_ ne $self->end } @successors;
1482				@successors = ( @ts, $self->end );
1483				} elsif ( grep { $_->rank > $endrank } @successors ) {
1484				push( @successors, $self->end );
1485				}
1486
1487				my $listitem = { id => $rdg->id, label => $rdg->text };
1488				my $adjacent = [];
1489				my @endwits;
1490				foreach my $succ ( @successors ) {
1491				my @edgewits;
1492				if( $phony eq 'start' ) {
1493				@edgewits = $succ->witnesses;
1494				} elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1495				@edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1496				}
1497
1498				if( $succ eq $self->end ) {
1499				@edgewits = @endwits;
1500				} elsif( $succ->rank > $endrank ) {
1501				# These witnesses will point to 'end' instead, not to the
1502				# actual successor.
1503				push( @endwits, @edgewits );
1504				next;
1505				}
1506				my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1507				my $edgedef = { id => $succ->id, label => $edgelabel };
1508				my $rankoffset = $succ->rank - $rdg->rank;
1509				if( $rankoffset > 1 and $succ ne $self->end ) {
1510				$edgedef->{minlen} = $rankoffset;
1511				}
1512				push( @$adjacent, $edgedef );
1513				}
1514				$listitem->{adjacent} = $adjacent;
1515				push( @$list, $listitem );
1516				}
1517				return to_json( $list );
1518				}
1519
1520				=head2 as_graphml
1521
1522				Returns a GraphML representation of the collation. The GraphML will contain
1523				two graphs. The first expresses the attributes of the readings and the witness
1524				paths that link them; the second expresses the relationships that link the
1525				readings. This is the native transfer format for a tradition.
1526
1527				=begin testing
1528
1529				use Text::Tradition;
1530				use TryCatch;
1531
1532				my $READINGS = 311;
1533				my $PATHS = 361;
1534
1535				my $datafile = 't/data/florilegium_tei_ps.xml';
1536				my $tradition = Text::Tradition->new( 'input' => 'TEI',
1537				'name' => 'test0',
1538				'file' => $datafile,
1539				'linear' => 1 );
1540
1541				ok( $tradition, "Got a tradition object" );
1542				is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1543				ok( $tradition->collation, "Tradition has a collation" );
1544
1545				my $c = $tradition->collation;
1546				is( scalar $c->readings, $READINGS, "Collation has all readings" );
1547				is( scalar $c->paths, $PATHS, "Collation has all paths" );
1548				is( scalar $c->relationships, 0, "Collation has all relationships" );
1549
1550				# Add a few relationships
1551				$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1552				$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1553				$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1554				'is_significant' => 'yes' } );
1555
1556				# Now write it to GraphML and parse it again.
1557
1558				my $graphml = $c->as_graphml;
1559				my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1560				is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1561				is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1562				is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1563				my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1564				is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1565
1566				# Now add a stemma, write to GraphML, and look at the output.
1567				SKIP: {
1568				skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1569				my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1570				is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1571				is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1572				$graphml = $c->as_graphml;
1573				like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1574				}
1575
1576				=end testing
1577
1578				=cut
1579
1580				## TODO MOVE this to Tradition.pm and modularize it better
1581				sub as_graphml {
1582				my( $self, $options ) = @_;
1583				$self->calculate_ranks unless $self->_graphcalc_done;
1584
1585				my $start = $options->{'from'}
1586				? $self->reading( $options->{'from'} ) : $self->start;
1587				my $end = $options->{'to'}
1588				? $self->reading( $options->{'to'} ) : $self->end;
1589				if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1590				throw( 'Start node must be before end node' );
1591				}
1592				# The readings need to be ranked for this to work.
1593				$start = $self->start unless $start->has_rank;
1594				$end = $self->end unless $end->has_rank;
1595				my $rankoffset = 0;
1596				unless( $start eq $self->start ) {
1597				$rankoffset = $start->rank - 1;
1598				}
1599				my %use_readings;
1600
1601				# Some namespaces
1602				my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1603				my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1604				my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1605				'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1606
1607				# Create the document and root node
1608				require XML::LibXML;
1609				my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1610				my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1611				$graphml->setDocumentElement( $root );
1612				$root->setNamespace( $xsi_ns, 'xsi', 0 );
1613				$root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1614
1615				# List of attribute types to save on our objects and their corresponding
1616				# GraphML types
1617				my %save_types = (
1618				'Str' => 'string',
1619				'Int' => 'int',
1620				'Bool' => 'boolean',
1621				'ReadingID' => 'string',
1622				'RelationshipType' => 'string',
1623				'RelationshipScope' => 'string',
1624				'Ternary' => 'string',
1625				);
1626
1627				# Add the data keys for the graph. Include an extra key 'version' for the
1628				# GraphML output version.
1629				my %graph_data_keys;
1630				my $gdi = 0;
1631				my %graph_attributes = ( 'version' => 'string' );
1632				# Graph attributes include those of Tradition and those of Collation.
1633				my %gattr_from;
1634				# TODO Use meta introspection method from duplicate_reading to do this
1635				# instead of naming custom keys.
1636				my $tmeta = $self->tradition->meta;
1637				my $cmeta = $self->meta;
1638				map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1639				map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1640				foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1641				next if $attr->name =~ /^_/;
1642				next unless $save_types{$attr->type_constraint->name};
1643				$graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1644				}
1645				# Extra custom keys for complex objects that should be saved in some form.
1646				# The subroutine should return a string, or undef/empty.
1647				if( $tmeta->has_method('stemmata') ) {
1648				$graph_attributes{'stemmata'} = sub {
1649				my @stemstrs;
1650				map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1651				$self->tradition->stemmata;
1652				join( "\n", @stemstrs );
1653				};
1654				}
1655
1656				if( $tmeta->has_method('user') ) {
1657				$graph_attributes{'user'} = sub {
1658				$self->tradition->user ? $self->tradition->user->id : undef
1659				};
1660				}
1661
1662				foreach my $datum ( sort keys %graph_attributes ) {
1663				$graph_data_keys{$datum} = 'dg'.$gdi++;
1664				my $key = $root->addNewChild( $graphml_ns, 'key' );
1665				my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1666				: $graph_attributes{$datum};
1667				$key->setAttribute( 'attr.name', $datum );
1668				$key->setAttribute( 'attr.type', $dtype );
1669				$key->setAttribute( 'for', 'graph' );
1670				$key->setAttribute( 'id', $graph_data_keys{$datum} );
1671				}
1672
1673				# Add the data keys for reading nodes
1674				my %reading_attributes;
1675				my $rmeta = Text::Tradition::Collation::Reading->meta;
1676				foreach my $attr( $rmeta->get_all_attributes ) {
1677				next if $attr->name =~ /^_/;
1678				next unless $save_types{$attr->type_constraint->name};
1679				$reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1680				}
1681				if( $self->start->does('Text::Tradition::Morphology' ) ) {
1682				# Extra custom key for the reading morphology
1683				$reading_attributes{'lexemes'} = 'string';
1684				}
1685
1686				my %node_data_keys;
1687				my $ndi = 0;
1688				foreach my $datum ( sort keys %reading_attributes ) {
1689				$node_data_keys{$datum} = 'dn'.$ndi++;
1690				my $key = $root->addNewChild( $graphml_ns, 'key' );
1691				$key->setAttribute( 'attr.name', $datum );
1692				$key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1693				$key->setAttribute( 'for', 'node' );
1694				$key->setAttribute( 'id', $node_data_keys{$datum} );
1695				}
1696
1697				# Add the data keys for edges, that is, paths and relationships. Path
1698				# data does not come from a Moose class so is here manually.
1699				my $edi = 0;
1700				my %edge_data_keys;
1701				my %edge_attributes = (
1702				witness => 'string', # ID/label for a path
1703				extra => 'boolean', # Path key
1704				);
1705				my @path_attributes = keys %edge_attributes; # track our manual additions
1706				my $pmeta = Text::Tradition::Collation::Relationship->meta;
1707				foreach my $attr( $pmeta->get_all_attributes ) {
1708				next if $attr->name =~ /^_/;
1709				next unless $save_types{$attr->type_constraint->name};
1710				$edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1711				}
1712				foreach my $datum ( sort keys %edge_attributes ) {
1713				$edge_data_keys{$datum} = 'de'.$edi++;
1714				my $key = $root->addNewChild( $graphml_ns, 'key' );
1715				$key->setAttribute( 'attr.name', $datum );
1716				$key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1717				$key->setAttribute( 'for', 'edge' );
1718				$key->setAttribute( 'id', $edge_data_keys{$datum} );
1719				}
1720
1721				# Add the collation graph itself. First, sanitize the name to a valid XML ID.
1722				my $xmlidname = $self->tradition->name;
1723				$xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1724				if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1725				$xmlidname = '_'.$xmlidname;
1726				}
1727				my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1728				$sgraph->setAttribute( 'edgedefault', 'directed' );
1729				$sgraph->setAttribute( 'id', $xmlidname );
1730				$sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1731				$sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1732				$sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1733				$sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1734				$sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1735
1736				# Tradition/collation attribute data
1737				foreach my $datum ( keys %graph_attributes ) {
1738				my $value;
1739				if( $datum eq 'version' ) {
1740				$value = '3.2';
1741				} elsif( ref( $graph_attributes{$datum} ) ) {
1742				my $sub = $graph_attributes{$datum};
1743				$value = &$sub();
1744				} elsif( $gattr_from{$datum} eq 'Tradition' ) {
1745				$value = $self->tradition->$datum;
1746				} else {
1747				$value = $self->$datum;
1748				}
1749				_add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1750				}
1751
1752				my $node_ctr = 0;
1753				my %node_hash;
1754				# Add our readings to the graph
1755				foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1756				next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1757				( $n->rank < $start->rank \|\| $n->rank > $end->rank );
1758				$use_readings{$n->id} = 1;
1759				# Add to the main graph
1760				my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1761				my $node_xmlid = 'n' . $node_ctr++;
1762				$node_hash{ $n->id } = $node_xmlid;
1763				$node_el->setAttribute( 'id', $node_xmlid );
1764				foreach my $d ( keys %reading_attributes ) {
1765				my $nval = $n->$d;
1766				# Custom serialization
1767				if( $d eq 'lexemes' ) {
1768				# If nval is a true value, we have lexemes so we need to
1769				# serialize them. Otherwise set nval to undef so that the
1770				# key is excluded from this reading.
1771				$nval = $nval ? $n->_serialize_lexemes : undef;
1772				} elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1773				$nval = undef;
1774				}
1775				if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1776				# Adjust the ranks within the subgraph.
1777				$nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1778				: $nval - $rankoffset;
1779				}
1780				_add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1781				if defined $nval;
1782				}
1783				}
1784
1785				# Add the path edges to the sequence graph
1786				my $edge_ctr = 0;
1787				foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1788				# We add an edge in the graphml for every witness in $e.
1789				next unless( $use_readings{$e->[0]} \|\| $use_readings{$e->[1]} );
1790				my @edge_wits = sort $self->path_witnesses( $e );
1791				$e->[0] = $self->start->id unless $use_readings{$e->[0]};
1792				$e->[1] = $self->end->id unless $use_readings{$e->[1]};
1793				# Skip any path from start to end; that witness is not in the subgraph.
1794				next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1795				foreach my $wit ( @edge_wits ) {
1796				my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1797				$node_hash{ $e->[0] },
1798				$node_hash{ $e->[1] } );
1799				my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1800				$edge_el->setAttribute( 'source', $from );
1801				$edge_el->setAttribute( 'target', $to );
1802				$edge_el->setAttribute( 'id', $id );
1803
1804				# It's a witness path, so add the witness
1805				my $base = $wit;
1806				my $key = $edge_data_keys{'witness'};
1807				# Is this an ante-corr witness?
1808				my $aclabel = $self->ac_label;
1809				if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1810				# Keep the base witness
1811				$base = $1;
1812				# ...and record that this is an 'extra' reading path
1813				_add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1814				}
1815				_add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1816				}
1817				}
1818
1819				# Report the actual number of nodes and edges that went in
1820				$sgraph->setAttribute( 'parse.edges', $edge_ctr );
1821				$sgraph->setAttribute( 'parse.nodes', $node_ctr );
1822
1823				# Add the relationship graph to the XML
1824				map { delete $edge_data_keys{$_} } @path_attributes;
1825				$self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1826				$node_data_keys{'id'}, \%edge_data_keys );
1827
1828				# Save and return the thing
1829				my $result = decode_utf8( $graphml->toString(1) );
1830				return $result;
1831				}
1832
1833				sub _add_graphml_data {
1834				my( $el, $key, $value ) = @_;
1835				return unless defined $value;
1836				my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1837				$data_el->setAttribute( 'key', $key );
1838				$data_el->appendText( $value );
1839				}
1840
1841				=head2 as_csv
1842
1843				Returns a CSV alignment table representation of the collation graph, one
1844				row per witness (or witness uncorrected.)
1845
1846				=head2 as_tsv
1847
1848				Returns a tab-separated alignment table representation of the collation graph,
1849				one row per witness (or witness uncorrected.)
1850
1851				=begin testing
1852
1853				use Text::Tradition;
1854				use Text::CSV;
1855
1856				my $READINGS = 311;
1857				my $PATHS = 361;
1858				my $WITS = 13;
1859				my $WITAC = 4;
1860
1861				my $datafile = 't/data/florilegium_tei_ps.xml';
1862				my $tradition = Text::Tradition->new( 'input' => 'TEI',
1863				'name' => 'test0',
1864				'file' => $datafile,
1865				'linear' => 1 );
1866
1867				my $c = $tradition->collation;
1868				# Export the thing to CSV
1869				my $csvstr = $c->as_csv();
1870				# Count the columns
1871				my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1872				my @lines = split(/\n/, $csvstr );
1873				ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1874				is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1875				my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1876				ok( @q_ac, "Found a layered witness" );
1877
1878				my $t2 = Text::Tradition->new( input => 'Tabular',
1879				name => 'test2',
1880				string => $csvstr,
1881				sep_char => ',' );
1882				is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1883				is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1884
1885				# Now do it with TSV
1886				my $tsvstr = $c->as_tsv();
1887				my $t3 = Text::Tradition->new( input => 'Tabular',
1888				name => 'test3',
1889				string => $tsvstr,
1890				sep_char => "\t" );
1891				is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1892				is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1893
1894				my $table = $c->alignment_table;
1895				my $noaccsv = $c->as_csv({ noac => 1 });
1896				my @noaclines = split(/\n/, $noaccsv );
1897				ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1898				is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1899				is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1900
1901				my $safecsv = $c->as_csv({ safe_ac => 1});
1902				my @safelines = split(/\n/, $safecsv );
1903				ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1904				is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1905				@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1906				ok( @q_ac, "Found a sanitized layered witness" );
1907				is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1908
1909				# Test relationship collapse
1910				$c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1911				$c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1912
1913				my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1914				my $t4 = Text::Tradition->new( input => 'Tabular',
1915				name => 'test4',
1916				string => $mergedtsv,
1917				sep_char => "\t" );
1918				is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1919				is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1920
1921				# Test non-ASCII sigla
1922				my $t5 = Text::Tradition->new( input => 'Tabular',
1923				name => 'nonascii',
1924				file => 't/data/armexample.xlsx',
1925				excel => 'xlsx' );
1926				my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1927				my @awitlines = split( /\n/, $awittsv );
1928				like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1929
1930				=end testing
1931
1932				=cut
1933
1934				sub _tabular {
1935				my( $self, $opts ) = @_;
1936				my $table = $self->alignment_table( $opts );
1937				my $csv_options = { binary => 1, quote_null => 0 };
1938				$csv_options->{'sep_char'} = $opts->{fieldsep};
1939				if( $opts->{fieldsep} eq "\t" ) {
1940				# If it is really tab separated, nothing is an escape char.
1941				$csv_options->{'quote_char'} = undef;
1942				$csv_options->{'escape_char'} = '';
1943				}
1944				my $csv = Text::CSV->new( $csv_options );
1945				my @result;
1946
1947				# Make the header row
1948				my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1949				if( $opts->{ascii} ) {
1950				# TODO think of a fix for this
1951				throw( "Cannot currently produce ASCII sigla with witness layers" )
1952				unless $opts->{noac};
1953				my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1954				@witnesses = @awits;
1955				}
1956				$csv->combine( @witnesses );
1957				push( @result, $csv->string );
1958
1959				# Make the rest of the rows
1960				foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1961				my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1962				my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1963				# Quick and dirty collapse of requested relationship types
1964				if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1965				# Now substitute the reading in the relevant index of @row
1966				# for its merge-related reading
1967				my %substitutes;
1968				while( @rowobjs ) {
1969				my $thisr = shift @rowobjs;
1970				next unless $thisr;
1971				next if exists $substitutes{$thisr->{t}->text};
1972				# Make sure we don't have A <-> B substitutions.
1973				$substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1974				foreach my $thatr ( @rowobjs ) {
1975				next unless $thatr;
1976				next if exists $substitutes{$thatr->{t}->text};
1977				my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1978				next unless $ttrel;
1979				next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1980				# If we have got this far then we need to merge them.
1981				$substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1982				}
1983				}
1984				@row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1985				}
1986				$csv->combine( @row );
1987				push( @result, $csv->string );
1988				}
1989				return join( "\n", @result );
1990				}
1991
1992				sub as_csv {
1993				my $self = shift;
1994				my $opts = shift \|\| {};
1995				$opts->{fieldsep} = ',';
1996				return $self->_tabular( $opts );
1997				}
1998
1999				sub as_tsv {
2000				my $self = shift;
2001				my $opts = shift \|\| {};
2002				$opts->{fieldsep} = "\t";
2003				return $self->_tabular( $opts );
2004				}
2005
2006				=head2 alignment_table
2007
2008				Return a reference to an alignment table, in a slightly enhanced CollateX
2009				format which looks like this:
2010
2011				$table = { alignment => [ { witness => "SIGIL",
2012				tokens => [ { t => "TEXT" }, ... ] },
2013				{ witness => "SIG2",
2014				tokens => [ { t => "TEXT" }, ... ] },
2015				... ],
2016				length => TEXTLEN };
2017
2018				=cut
2019
2020				sub alignment_table {
2021				my( $self, $opts ) = @_;
2022				if( $self->has_cached_table ) {
2023				return $self->cached_table
2024				unless $opts->{noac} \|\| $opts->{safe_ac};
2025				}
2026
2027				# Make sure we can do this
2028				throw( "Need a linear graph in order to make an alignment table" )
2029				unless $self->linear;
2030				$self->calculate_ranks()
2031				unless $self->_graphcalc_done && $self->end->has_rank;
2032
2033				my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
2034				my @all_pos = ( 1 .. $self->end->rank - 1 );
2035				foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2036				# say STDERR "Making witness row(s) for " . $wit->sigil;
2037				my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2038				my @row = _make_witness_row( \@wit_path, \@all_pos );
2039				my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
2040				$witobj->{'identifier'} = $wit->identifier if $wit->identifier;
2041				push( @{$table->{'alignment'}}, $witobj );
2042				if( $wit->is_layered && !$opts->{noac} ) {
2043				my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
2044				$wit->sigil.$self->ac_label );
2045				my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2046				my $witlabel = $opts->{safe_ac}
2047				? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
2048				my $witacobj = { 'witness' => $witlabel,
2049				'tokens' => \@ac_row };
2050				$witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
2051				push( @{$table->{'alignment'}}, $witacobj );
2052				}
2053				}
2054				unless( $opts->{noac} \|\| $opts->{safe_ac} ) {
2055				$self->cached_table( $table );
2056				}
2057				return $table;
2058				}
2059
2060				sub _make_witness_row {
2061				my( $path, $positions ) = @_;
2062				my %char_hash;
2063				map { $char_hash{$_} = undef } @$positions;
2064				my $debug = 0;
2065				foreach my $rdg ( @$path ) {
2066				say STDERR "rank " . $rdg->rank if $debug;
2067				# say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
2068				$char_hash{$rdg->rank} = { 't' => $rdg };
2069				}
2070				my @row = map { $char_hash{$_} } @$positions;
2071				# Fill in lacuna markers for undef spots in the row
2072				my $last_el = shift @row;
2073				my @filled_row = ( $last_el );
2074				foreach my $el ( @row ) {
2075				# If we are using node reference, make the lacuna node appear many times
2076				# in the table. If not, use the lacuna tag.
2077				if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
2078				$el = $last_el;
2079				}
2080				push( @filled_row, $el );
2081				$last_el = $el;
2082				}
2083				return @filled_row;
2084				}
2085
2086
2087				=head1 NAVIGATION METHODS
2088
2089				=head2 reading_sequence( $first, $last, $sigil, $backup )
2090
2091				Returns the ordered list of readings, starting with $first and ending
2092				with $last, for the witness given in $sigil. If a $backup sigil is
2093				specified (e.g. when walking a layered witness), it will be used wherever
2094				no $sigil path exists. If there is a base text reading, that will be
2095				used wherever no path exists for $sigil or $backup.
2096
2097				=cut
2098
2099				# TODO Think about returning some lazy-eval iterator.
2100				# TODO Get rid of backup; we should know from what witness is whether we need it.
2101
2102				sub reading_sequence {
2103				my( $self, $start, $end, $witness ) = @_;
2104
2105				$witness = $self->baselabel unless $witness;
2106				my @readings = ( $start );
2107				my %seen;
2108				my $n = $start;
2109				while( $n && $n->id ne $end->id ) {
2110				if( exists( $seen{$n->id} ) ) {
2111				throw( "Detected loop for $witness at " . $n->id );
2112				}
2113				$seen{$n->id} = 1;
2114
2115				my $next = $self->next_reading( $n, $witness );
2116				unless( $next ) {
2117				throw( "Did not find any path for $witness from reading " . $n->id );
2118				}
2119				push( @readings, $next );
2120				$n = $next;
2121				}
2122				# Check that the last reading is our end reading.
2123				my $last = $readings[$#readings];
2124				throw( "Last reading found from " . $start->text .
2125				" for witness $witness is not the end!" ) # TODO do we get this far?
2126				unless $last->id eq $end->id;
2127
2128				return @readings;
2129				}
2130
2131				=head2 readings_at_rank( $rank )
2132
2133				Returns a list of readings at a given rank, taken from the alignment table.
2134
2135				=cut
2136
2137				sub readings_at_rank {
2138				my( $self, $rank, $nolacuna ) = @_;
2139				my $table = $self->alignment_table;
2140				# Table rank is real rank - 1.
2141				my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
2142				my %readings;
2143				foreach my $e ( @elements ) {
2144				next unless ref( $e ) eq 'HASH';
2145				next unless exists $e->{'t'};
2146				my $rdg = $e->{'t'};
2147				next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
2148				$readings{$e->{'t'}->id} = $e->{'t'};
2149				}
2150				return values %readings;
2151				}
2152
2153				=head2 next_reading( $reading, $sigil );
2154
2155				Returns the reading that follows the given reading along the given witness
2156				path.
2157
2158				=cut
2159
2160				sub next_reading {
2161				# Return the successor via the corresponding path.
2162				my $self = shift;
2163				my $answer = $self->_find_linked_reading( 'next', @_ );
2164				return undef unless $answer;
2165				return $self->reading( $answer );
2166				}
2167
2168				=head2 prior_reading( $reading, $sigil )
2169
2170				Returns the reading that precedes the given reading along the given witness
2171				path.
2172
2173				=cut
2174
2175				sub prior_reading {
2176				# Return the predecessor via the corresponding path.
2177				my $self = shift;
2178				my $answer = $self->_find_linked_reading( 'prior', @_ );
2179				return $self->reading( $answer );
2180				}
2181
2182				sub _find_linked_reading {
2183				my( $self, $direction, $node, $path ) = @_;
2184
2185				# Get a backup if we are dealing with a layered witness
2186				my $alt_path;
2187				my $aclabel = $self->ac_label;
2188				if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2189				$alt_path = $1;
2190				}
2191
2192				my @linked_paths = $direction eq 'next'
2193				? $self->sequence->edges_from( $node )
2194				: $self->sequence->edges_to( $node );
2195				return undef unless scalar( @linked_paths );
2196
2197				# We have to find the linked path that contains all of the
2198				# witnesses supplied in $path.
2199				my( @path_wits, @alt_path_wits );
2200				@path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2201				@alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2202				my $base_le;
2203				my $alt_le;
2204				foreach my $le ( @linked_paths ) {
2205				if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2206				$base_le = $le;
2207				}
2208				my @le_wits = sort $self->path_witnesses( $le );
2209				if( _is_within( \@path_wits, \@le_wits ) ) {
2210				# This is the right path.
2211				return $direction eq 'next' ? $le->[1] : $le->[0];
2212				} elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2213				$alt_le = $le;
2214				}
2215				}
2216				# Got this far? Return the alternate path if it exists.
2217				return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2218				if $alt_le;
2219
2220				# Got this far? Return the base path if it exists.
2221				return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2222				if $base_le;
2223
2224				# Got this far? We have no appropriate path.
2225				warn "Could not find $direction node from " . $node->id
2226				. " along path $path";
2227				return undef;
2228				}
2229
2230				# Some set logic.
2231				sub _is_within {
2232				my( $set1, $set2 ) = @_;
2233				my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2234				foreach my $el ( @$set1 ) {
2235				$ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2236				}
2237				return $ret;
2238				}
2239
2240				# Return the string that joins together a list of witnesses for
2241				# display on a single path.
2242				sub _witnesses_of_label {
2243				my( $self, $label ) = @_;
2244				my $regex = $self->wit_list_separator;
2245				my @answer = split( /\Q$regex\E/, $label );
2246				return @answer;
2247				}
2248
2249				=head2 common_readings
2250
2251				Returns the list of common readings in the graph (i.e. those readings that are
2252				shared by all non-lacunose witnesses.)
2253
2254				=cut
2255
2256				sub common_readings {
2257				my $self = shift;
2258				my @common = grep { $_->is_common } $self->readings;
2259				return @common;
2260				}
2261
2262				=head2 path_text( $sigil [, $start, $end, $use_normal_form ] )
2263
2264				Returns the text of a witness (plus its backup, if we are using a layer) as
2265				stored in the collation. The text is returned as a string, where the
2266				individual readings are joined with spaces and the meta-readings (e.g.
2267				lacunae) are omitted. Optional specification of $start and $end allows the
2268				generation of a subset of the witness text. Optional specification of
2269				$use_normal_form produces a text based on the normal form, rather than the
2270				raw text, of the reading.
2271
2272				=cut
2273
2274				sub path_text {
2275				my( $self, $wit, $start, $end, $normal ) = @_;
2276				$start = $self->start unless $start;
2277				$end = $self->end unless $end;
2278				my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2279				return $self->known_path_text( $normal, @path );
2280				}
2281
2282				=head2 known_path_text( $use_normal_form, @sequence )
2283
2284				Returns the text of a given sequence of readings. No attempt is made to
2285				validate the sequence in question. If $use_normal_form is set to true, the
2286				normal form of each reading in the sequence will be used to construct the
2287				text.
2288
2289				=cut
2290
2291				sub known_path_text {
2292				my( $self, $normal, @path ) = @_;
2293				my $pathtext = '';
2294				my $last;
2295				foreach my $r ( @path ) {
2296				unless ( $r->join_prior \|\| !$last \|\| $last->join_next ) {
2297				$pathtext .= ' ';
2298				}
2299				$pathtext .= $normal ? $r->normal_form : $r->text;
2300				$last = $r;
2301				}
2302				return $pathtext;
2303				}
2304
2305				=head1 INITIALIZATION METHODS
2306
2307				These are mostly for use by parsers.
2308
2309				=head2 make_witness_path( $witness )
2310
2311				Link the array of readings contained in $witness->path (and in
2312				$witness->uncorrected_path if it exists) into collation paths.
2313				Clear out the arrays when finished.
2314
2315				=head2 make_witness_paths
2316
2317				Call make_witness_path for all witnesses in the tradition.
2318
2319				=cut
2320
2321				# For use when a collation is constructed from a base text and an apparatus.
2322				# We have the sequences of readings and just need to add path edges.
2323				# When we are done, clear out the witness path attributes, as they are no
2324				# longer needed.
2325				# TODO Find a way to replace the witness path attributes with encapsulated functions?
2326
2327				sub make_witness_paths {
2328				my( $self ) = @_;
2329				foreach my $wit ( $self->tradition->witnesses ) {
2330				# say STDERR "Making path for " . $wit->sigil;
2331				$self->make_witness_path( $wit );
2332				}
2333				}
2334
2335				sub make_witness_path {
2336				my( $self, $wit ) = @_;
2337				my @chain = @{$wit->path};
2338				my $sig = $wit->sigil;
2339				# Add start and end if necessary
2340				unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2341				push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2342				foreach my $idx ( 0 .. $#chain-1 ) {
2343				$self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2344				}
2345				if( $wit->is_layered ) {
2346				@chain = @{$wit->uncorrected_path};
2347				unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2348				push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2349				foreach my $idx( 0 .. $#chain-1 ) {
2350				my $source = $chain[$idx];
2351				my $target = $chain[$idx+1];
2352				$self->add_path( $source, $target, $sig.$self->ac_label )
2353				unless $self->has_path( $source, $target, $sig );
2354				}
2355				}
2356				$wit->clear_path;
2357				$wit->clear_uncorrected_path;
2358				}
2359
2360				=head2 calculate_ranks
2361
2362				Calculate the reading ranks (that is, their aligned positions relative
2363				to each other) for the graph. This can only be called on linear collations.
2364
2365				=begin testing
2366
2367				use Text::Tradition;
2368
2369				my $cxfile = 't/data/Collatex-16.xml';
2370				my $t = Text::Tradition->new(
2371				'name' => 'inline',
2372				'input' => 'CollateX',
2373				'file' => $cxfile,
2374				);
2375				my $c = $t->collation;
2376
2377				# Make an svg
2378				my $table = $c->alignment_table;
2379				ok( $c->has_cached_table, "Alignment table was cached" );
2380				is( $c->alignment_table, $table, "Cached table returned upon second call" );
2381				$c->calculate_ranks;
2382				is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2383				$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2384				is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2385				$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2386				isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2387
2388				=end testing
2389
2390				=cut
2391
2392				sub calculate_ranks {
2393				my $self = shift;
2394				# Save the existing ranks, in case we need to invalidate the cached SVG.
2395				throw( "Cannot calculate ranks on a non-linear graph" )
2396				unless $self->linear;
2397				my %existing_ranks;
2398				map { $existing_ranks{$_} = $_->rank } $self->readings;
2399
2400				# Do the rankings based on the relationship equivalence graph, starting
2401				# with the start node.
2402				my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2403
2404				# Transfer our rankings from the topological graph to the real one.
2405				foreach my $r ( $self->readings ) {
2406				if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2407				$r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2408				} else {
2409				# Die. Find the last rank we calculated.
2410				my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}\|\|-1 )
2411				<=> ( $node_ranks->{$self->equivalence( $b->id )}\|\|-1 ) }
2412				$self->readings;
2413				my $last = pop @all_defined;
2414				throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2415				}
2416				}
2417				# Do we need to invalidate the cached data?
2418				if( $self->has_cached_table ) {
2419				foreach my $r ( $self->readings ) {
2420				next if defined( $existing_ranks{$r} )
2421				&& $existing_ranks{$r} == $r->rank;
2422				# Something has changed, so clear the cache
2423				$self->_clear_cache;
2424				# ...and recalculate the common readings.
2425				$self->calculate_common_readings();
2426				last;
2427				}
2428				}
2429				# The graph calculation information is now up to date.
2430				$self->_graphcalc_done(1);
2431				}
2432
2433				sub _clear_cache {
2434				my $self = shift;
2435				$self->wipe_table if $self->has_cached_table;
2436				}
2437
2438
2439				=head2 flatten_ranks
2440
2441				A convenience method for parsing collation data. Searches the graph for readings
2442				with the same text at the same rank, and merges any that are found.
2443
2444				=cut
2445
2446				sub flatten_ranks {
2447				my ( $self, %args ) = shift;
2448				my %unique_rank_rdg;
2449				my $changed;
2450				foreach my $p ( $self->identical_readings( %args ) ) {
2451				# say STDERR "Combining readings at same rank: @$p";
2452				$changed = 1;
2453				$self->merge_readings( @$p );
2454				# TODO see if this now makes a common point.
2455				}
2456				# If we merged readings, the ranks are still fine but the alignment
2457				# table is wrong. Wipe it.
2458				$self->wipe_table() if $changed;
2459				}
2460
2461				=head2 identical_readings
2462				=head2 identical_readings( start => $startnode, end => $endnode )
2463				=head2 identical_readings( startrank => $startrank, endrank => $endrank )
2464
2465				Goes through the graph identifying all pairs of readings that appear to be
2466				identical, and therefore able to be merged into a single reading. Returns the
2467				relevant identical pairs. Can be restricted to run over only a part of the
2468				graph, specified either by node or by rank.
2469
2470				=cut
2471
2472				sub identical_readings {
2473				my ( $self, %args ) = @_;
2474				# Find where we should start and end.
2475				my $startrank = $args{startrank} \|\| 0;
2476				if( $args{start} ) {
2477				throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2478				&& $self->reading( $args{start} )->has_rank;
2479				$startrank = $self->reading( $args{start} )->rank;
2480				}
2481				my $endrank = $args{endrank} \|\| $self->end->rank;
2482				if( $args{end} ) {
2483				throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2484				&& $self->reading( $args{end} )->has_rank;
2485				$endrank = $self->reading( $args{end} )->rank;
2486				}
2487
2488				# Make sure the ranks are correct.
2489				unless( $self->_graphcalc_done ) {
2490				$self->calculate_ranks;
2491				}
2492				# Go through the readings looking for duplicates.
2493				my %unique_rank_rdg;
2494				my @pairs;
2495				foreach my $rdg ( $self->readings ) {
2496				next unless $rdg->has_rank;
2497				my $rk = $rdg->rank;
2498				next if $rk > $endrank \|\| $rk < $startrank;
2499				my $key = $rk . "\|\|" . $rdg->text;
2500				if( exists $unique_rank_rdg{$key} ) {
2501				# Make sure they don't have different grammatical forms
2502				my $ur = $unique_rank_rdg{$key};
2503				if( $rdg->is_identical( $ur ) ) {
2504				push( @pairs, [ $ur, $rdg ] );
2505				}
2506				} else {
2507				$unique_rank_rdg{$key} = $rdg;
2508				}
2509				}
2510
2511				return @pairs;
2512				}
2513
2514
2515				=head2 calculate_common_readings
2516
2517				Goes through the graph identifying the readings that appear in every witness
2518				(apart from those with lacunae at that spot.) Marks them as common and returns
2519				the list.
2520
2521				=begin testing
2522
2523				use Text::Tradition;
2524
2525				my $cxfile = 't/data/Collatex-16.xml';
2526				my $t = Text::Tradition->new(
2527				'name' => 'inline',
2528				'input' => 'CollateX',
2529				'file' => $cxfile,
2530				);
2531				my $c = $t->collation;
2532
2533				my @common = $c->calculate_common_readings();
2534				is( scalar @common, 8, "Found correct number of common readings" );
2535				my @marked = sort $c->common_readings();
2536				is( scalar @common, 8, "All common readings got marked as such" );
2537				my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2538				is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2539
2540				=end testing
2541
2542				=cut
2543
2544				sub calculate_common_readings {
2545				my $self = shift;
2546				my @common;
2547				map { $_->is_common( 0 ) } $self->readings;
2548				# Implicitly calls calculate_ranks
2549				my $table = $self->alignment_table;
2550				foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2551				my @row = map { $_->{'tokens'}->[$idx]
2552				? $_->{'tokens'}->[$idx]->{'t'} : '' }
2553				@{$table->{'alignment'}};
2554				my %hash;
2555				foreach my $r ( @row ) {
2556				if( $r ) {
2557				$hash{$r->id} = $r unless $r->is_meta;
2558				} else {
2559				$hash{'UNDEF'} = $r;
2560				}
2561				}
2562				if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2563				my( $r ) = values %hash;
2564				$r->is_common( 1 );
2565				push( @common, $r );
2566				}
2567				}
2568				return @common;
2569				}
2570
2571				=head2 text_from_paths
2572
2573				Calculate the text array for all witnesses from the path, for later consistency
2574				checking. Only to be used if there is no non-graph-based way to know the
2575				original texts.
2576
2577				=cut
2578
2579				sub text_from_paths {
2580				my $self = shift;
2581				foreach my $wit ( $self->tradition->witnesses ) {
2582				my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2583				my @text;
2584				foreach my $r ( @readings ) {
2585				next if $r->is_meta;
2586				push( @text, $r->text );
2587				}
2588				$wit->text( \@text );
2589				if( $wit->is_layered ) {
2590				my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2591				$wit->sigil.$self->ac_label );
2592				my @uctext;
2593				foreach my $r ( @ucrdgs ) {
2594				next if $r->is_meta;
2595				push( @uctext, $r->text );
2596				}
2597				$wit->layertext( \@uctext );
2598				}
2599				}
2600				}
2601
2602				=head1 UTILITY FUNCTIONS
2603
2604				=head2 common_predecessor( $reading_a, $reading_b )
2605
2606				Find the last reading that occurs in sequence before both the given readings.
2607				At the very least this should be $self->start.
2608
2609				=head2 common_successor( $reading_a, $reading_b )
2610
2611				Find the first reading that occurs in sequence after both the given readings.
2612				At the very least this should be $self->end.
2613
2614				=begin testing
2615
2616				use Text::Tradition;
2617
2618				my $cxfile = 't/data/Collatex-16.xml';
2619				my $t = Text::Tradition->new(
2620				'name' => 'inline',
2621				'input' => 'CollateX',
2622				'file' => $cxfile,
2623				);
2624				my $c = $t->collation;
2625
2626				is( $c->common_predecessor( 'n24', 'n23' )->id,
2627				'n20', "Found correct common predecessor" );
2628				is( $c->common_successor( 'n24', 'n23' )->id,
2629				'__END__', "Found correct common successor" );
2630
2631				is( $c->common_predecessor( 'n19', 'n17' )->id,
2632				'n16', "Found correct common predecessor for readings on same path" );
2633				is( $c->common_successor( 'n21', 'n10' )->id,
2634				'__END__', "Found correct common successor for readings on same path" );
2635
2636				=end testing
2637
2638				=cut
2639
2640				## Return the closest reading that is a predecessor of both the given readings.
2641				sub common_predecessor {
2642				my $self = shift;
2643				my( $r1, $r2 ) = $self->_objectify_args( @_ );
2644				return $self->_common_in_path( $r1, $r2, 'predecessors' );
2645				}
2646
2647				sub common_successor {
2648				my $self = shift;
2649				my( $r1, $r2 ) = $self->_objectify_args( @_ );
2650				return $self->_common_in_path( $r1, $r2, 'successors' );
2651				}
2652
2653
2654				# TODO think about how to do this without ranks...
2655				sub _common_in_path {
2656				my( $self, $r1, $r2, $dir ) = @_;
2657				my $iter = $self->end->rank;
2658				my @candidates;
2659				my @last_r1 = ( $r1 );
2660				my @last_r2 = ( $r2 );
2661				# my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2662				my %all_seen;
2663				# say STDERR "Finding common $dir for $r1, $r2";
2664				while( !@candidates ) {
2665				last unless $iter--; # Avoid looping infinitely
2666				# Iterate separately down the graph from r1 and r2
2667				my( @new_lc1, @new_lc2 );
2668				foreach my $lc ( @last_r1 ) {
2669				foreach my $p ( $lc->$dir ) {
2670				if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2671				# say STDERR "Path candidate $p from $lc";
2672				push( @candidates, $p );
2673				} elsif( !$all_seen{$p->id} ) {
2674				$all_seen{$p->id} = 'r1';
2675				push( @new_lc1, $p );
2676				}
2677				}
2678				}
2679				foreach my $lc ( @last_r2 ) {
2680				foreach my $p ( $lc->$dir ) {
2681				if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2682				# say STDERR "Path candidate $p from $lc";
2683				push( @candidates, $p );
2684				} elsif( !$all_seen{$p->id} ) {
2685				$all_seen{$p->id} = 'r2';
2686				push( @new_lc2, $p );
2687				}
2688				}
2689				}
2690				@last_r1 = @new_lc1;
2691				@last_r2 = @new_lc2;
2692				}
2693				my @answer = sort { $a->rank <=> $b->rank } @candidates;
2694				return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2695				}
2696
2697				sub throw {
2698				Text::Tradition::Error->throw(
2699				'ident' => 'Collation error',
2700				'message' => $_[0],
2701				);
2702				}
2703
2704				no Moose;
2705				__PACKAGE__->meta->make_immutable;
2706
2707				=head1 BUGS/TODO
2708
2709				=over
2710
2711				=item * Rework XML serialization in a more modular way
2712
2713				=back
2714
2715				=head1 LICENSE
2716
2717				This package is free software and is provided "as is" without express
2718				or implied warranty. You can redistribute it and/or modify it under
2719				the same terms as Perl itself.
2720
2721				=head1 AUTHOR
2722
2723				Tara L Andrews E<lt>aurum@cpan.orgE<gt>