File Coverage

lib/Text/Tradition/Collation.pm
Criterion Covered Total %
statement 40 42 95.2
branch n/a
condition n/a
subroutine 14 14 100.0
pod n/a
total 54 56 96.4


line stmt bran cond sub pod time code
1             package Text::Tradition::Collation;
2              
3 10     10   72 use feature 'say';
  10         26  
  10         1131  
4 10     10   3670 use Encode qw( decode_utf8 );
  10         130764  
  10         868  
5 10     10   5252 use File::Temp;
  10         149958  
  10         746  
6 10     10   2659 use File::Which;
  10         8201  
  10         506  
7 10     10   5209 use Graph;
  10         866422  
  10         455  
8 10     10   7924 use IPC::Run qw( run binary );
  10         279834  
  10         662  
9 10     10   101 use JSON qw/ to_json /;
  10         31  
  10         95  
10 10     10   6353 use Text::CSV;
  10         102751  
  10         512  
11 10     10   3623 use Text::Tradition::Collation::Data;
  10         46  
  10         479  
12 10     10   4632 use Text::Tradition::Collation::Reading;
  10         113  
  10         2087  
13 10     10   5421 use Text::Tradition::Collation::RelationshipStore;
  10         47  
  10         412  
14 10     10   91 use Text::Tradition::Error;
  10         26  
  10         284  
15 10     10   59 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
  10         22  
  10         1265  
16 10     10   15701 use XML::LibXML;
  0            
  0            
17             use XML::LibXML::XPathContext;
18             use Moose;
19              
20             has _data => (
21             isa => 'Text::Tradition::Collation::Data',
22             is => 'ro',
23             required => 1,
24             handles => [ qw(
25             sequence
26             paths
27             _set_relations
28             relations
29             _set_start
30             _set_end
31             ac_label
32             has_cached_table
33             relationships
34             relationship_types
35             related_readings
36             get_relationship
37             del_relationship
38             equivalence
39             equivalence_graph
40             readings
41             reading
42             _add_reading
43             del_reading
44             has_reading
45             wit_list_separator
46             baselabel
47             linear
48             wordsep
49             direction
50             change_direction
51             start
52             end
53             cached_table
54             _graphcalc_done
55             has_cached_svg
56             wipe_table
57             )]
58             );
59              
60             has 'tradition' => (
61             is => 'ro',
62             isa => 'Text::Tradition',
63             writer => '_set_tradition',
64             weak_ref => 1,
65             );
66              
67             =encoding utf8
68              
69             =head1 NAME
70              
71             Text::Tradition::Collation - a software model for a text collation
72              
73             =head1 SYNOPSIS
74              
75             use Text::Tradition;
76             my $t = Text::Tradition->new(
77             'name' => 'this is a text',
78             'input' => 'TEI',
79             'file' => '/path/to/tei_parallel_seg_file.xml' );
80              
81             my $c = $t->collation;
82             my @readings = $c->readings;
83             my @paths = $c->paths;
84             my @relationships = $c->relationships;
85            
86             my $svg_variant_graph = $t->collation->as_svg();
87            
88             =head1 DESCRIPTION
89              
90             Text::Tradition is a library for representation and analysis of collated
91             texts, particularly medieval ones. The Collation is the central feature of
92             a Tradition, where the text, its sequence of readings, and its relationships
93             between readings are actually kept.
94              
95             =head1 CONSTRUCTOR
96              
97             =head2 new
98              
99             The constructor. Takes a hash or hashref of the following arguments:
100              
101             =over
102              
103             =item * tradition - The Text::Tradition object to which the collation
104             belongs. Required.
105              
106             =item * linear - Whether the collation should be linear; that is, whether
107             transposed readings should be treated as two linked readings rather than one,
108             and therefore whether the collation graph is acyclic. Defaults to true.
109              
110             =item * baselabel - The default label for the path taken by a base text
111             (if any). Defaults to 'base text'.
112              
113             =item * wit_list_separator - The string to join a list of witnesses for
114             purposes of making labels in display graphs. Defaults to ', '.
115              
116             =item * ac_label - The extra label to tack onto a witness sigil when
117             representing another layer of path for the given witness - that is, when
118             a text has more than one possible reading due to scribal corrections or
119             the like. Defaults to ' (a.c.)'.
120              
121             =item * wordsep - The string used to separate words in the original text.
122             Defaults to ' '.
123              
124             =back
125              
126             =head1 ACCESSORS
127              
128             =head2 tradition
129              
130             =head2 linear
131              
132             =head2 wit_list_separator
133              
134             =head2 baselabel
135              
136             =head2 ac_label
137              
138             =head2 wordsep
139              
140             Simple accessors for collation attributes.
141              
142             =head2 start
143              
144             The meta-reading at the start of every witness path.
145              
146             =head2 end
147              
148             The meta-reading at the end of every witness path.
149              
150             =head2 readings
151              
152             Returns all Reading objects in the graph.
153              
154             =head2 reading( $id )
155              
156             Returns the Reading object corresponding to the given ID.
157              
158             =head2 add_reading( $reading_args )
159              
160             Adds a new reading object to the collation.
161             See L<Text::Tradition::Collation::Reading> for the available arguments.
162              
163             =head2 del_reading( $object_or_id )
164              
165             Removes the given reading from the collation, implicitly removing its
166             paths and relationships.
167              
168             =head2 has_reading( $id )
169              
170             Predicate to see whether a given reading ID is in the graph.
171              
172             =head2 reading_witnesses( $object_or_id )
173              
174             Returns a list of sigils whose witnesses contain the reading.
175              
176             =head2 paths
177              
178             Returns all reading paths within the document - that is, all edges in the
179             collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
180              
181             =head2 add_path( $source, $target, $sigil )
182              
183             Links the given readings in the collation in sequence, under the given witness
184             sigil. The readings may be specified by object or ID.
185              
186             =head2 del_path( $source, $target, $sigil )
187              
188             Links the given readings in the collation in sequence, under the given witness
189             sigil. The readings may be specified by object or ID.
190              
191             =head2 has_path( $source, $target );
192              
193             Returns true if the two readings are linked in sequence in any witness.
194             The readings may be specified by object or ID.
195              
196             =head2 relationships
197              
198             Returns all Relationship objects in the collation.
199              
200             =head2 add_relationship( $reading, $other_reading, $options, $changed_readings )
201              
202             Adds a new relationship of the type given in $options between the two readings,
203             which may be specified by object or ID. Returns a value of ( $status, @vectors)
204             where $status is true on success, and @vectors is a list of relationship edges
205             that were ultimately added. If an array reference is passed in as $changed_readings,
206             then any readings that were altered due to the relationship creation are added to
207             the array.
208              
209             See L<Text::Tradition::Collation::Relationship> for the available options.
210              
211             =cut
212              
213             sub BUILDARGS {
214             my ( $class, @args ) = @_;
215             my %args = @args == 1 ? %{ $args[0] } : @args;
216             # TODO determine these from the Moose::Meta object
217             my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218             linear wordsep direction start end cached_table _graphcalc_done);
219             my %data_args;
220             for my $attr (@delegate_attrs) {
221             $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
222             }
223             $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224             return \%args;
225             }
226              
227             sub BUILD {
228             my $self = shift;
229             $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
230             $self->_set_start( $self->add_reading(
231             { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232             $self->_set_end( $self->add_reading(
233             { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
234             }
235              
236             =head2 register_relationship_type( %relationship_definition )
237              
238             Add a relationship type definition to this collation. The argument can be either a
239             hash or a hashref, defining the properties of the relationship. For relationship types
240             and their properties, see L<Text::Tradition::Collation::RelationshipType>.
241              
242             =head2 get_relationship_type( $relationship_name )
243              
244             Retrieve the RelationshipType object for the relationship with the given name.
245              
246             =cut
247              
248             sub register_relationship_type {
249             my $self = shift;
250             my %args = @_ == 1 ? %{$_[0]} : @_;
251             if( $self->relations->has_type( $args{name} ) ) {
252             throw( 'Relationship type ' . $args{name} . ' already registered' );
253             }
254             $self->relations->add_type( %args );
255             }
256              
257             sub get_relationship_type {
258             my( $self, $name ) = @_;
259             return $self->relations->has_type( $name )
260             ? $self->relations->type( $name ) : undef;
261             }
262              
263             ### Reading construct/destruct functions
264              
265             sub add_reading {
266             my( $self, $reading ) = @_;
267             unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
268             my %args = %$reading;
269             if( $args{'init'} ) {
270             # If we are initializing an empty collation, don't assume that we
271             # have set a tradition.
272             delete $args{'init'};
273             } elsif( $self->tradition->can('language') && $self->tradition->has_language
274             && !exists $args{'language'} ) {
275             $args{'language'} = $self->tradition->language;
276             }
277             $reading = Text::Tradition::Collation::Reading->new(
278             'collation' => $self,
279             %args );
280             }
281             # First check to see if a reading with this ID exists.
282             if( $self->reading( $reading->id ) ) {
283             throw( "Collation already has a reading with id " . $reading->id );
284             }
285             $self->_graphcalc_done(0);
286             $self->_add_reading( $reading->id => $reading );
287             # Once the reading has been added, put it in both graphs.
288             $self->sequence->add_vertex( $reading->id );
289             $self->relations->add_reading( $reading->id );
290             return $reading;
291             };
292              
293             around del_reading => sub {
294             my $orig = shift;
295             my $self = shift;
296             my $arg = shift;
297            
298             if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
299             $arg = $arg->id;
300             }
301             # Remove the reading from the graphs.
302             $self->_graphcalc_done(0);
303             $self->_clear_cache; # Explicitly clear caches to GC the reading
304             $self->sequence->delete_vertex( $arg );
305             $self->relations->delete_reading( $arg );
306            
307             # Carry on.
308             $self->$orig( $arg );
309             };
310              
311             =head2 merge_readings( $main, $second, $concatenate, $with_str )
312              
313             Merges the $second reading into the $main one. If $concatenate is true, then
314             the merged node will carry the text of both readings, concatenated with either
315             $with_str (if specified) or a sensible default (the empty string if the
316             appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
317              
318             The first two arguments may be either readings or reading IDs.
319              
320             =begin testing
321              
322             use Test::More::UTF8;
323             use Text::Tradition;
324             use TryCatch;
325              
326             my $cxfile = 't/data/Collatex-16.xml';
327             my $t = Text::Tradition->new(
328             'name' => 'inline',
329             'input' => 'CollateX',
330             'file' => $cxfile,
331             );
332             my $c = $t->collation;
333              
334             my $rno = scalar $c->readings;
335             # Split n21 ('unto') for testing purposes
336             my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
337             my $old_r = $c->reading( 'n21' );
338             $old_r->alter_text( 'to' );
339             $c->del_path( 'n20', 'n21', 'A' );
340             $c->add_path( 'n20', 'n21p0', 'A' );
341             $c->add_path( 'n21p0', 'n21', 'A' );
342             $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
343             $c->flatten_ranks();
344             ok( $c->reading( 'n21p0' ), "New reading exists" );
345             is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
346              
347             # Combine n3 and n4 ( with his )
348             $c->merge_readings( 'n3', 'n4', 1 );
349             ok( !$c->reading('n4'), "Reading n4 is gone" );
350             is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
351              
352             # Collapse n9 and n10 ( rood / root )
353             $c->merge_readings( 'n9', 'n10' );
354             ok( !$c->reading('n10'), "Reading n10 is gone" );
355             is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
356              
357             # Try to combine n21 and n21p0. This should break.
358             my $remaining = $c->reading('n21');
359             $remaining ||= $c->reading('n22'); # one of these should still exist
360             try {
361             $c->merge_readings( 'n21p0', $remaining, 1 );
362             ok( 0, "Bad reading merge changed the graph" );
363             } catch( Text::Tradition::Error $e ) {
364             like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
365             } catch {
366             ok( 0, "Unexpected error on bad reading merge: $@" );
367             }
368              
369             try {
370             $c->calculate_ranks();
371             ok( 1, "Graph is still evidently whole" );
372             } catch( Text::Tradition::Error $e ) {
373             ok( 0, "Caught a rank exception: " . $e->message );
374             }
375              
376             # Test right-to-left reading merge.
377             my $rtl = Text::Tradition->new(
378             'name' => 'inline',
379             'input' => 'Tabular',
380             'sep_char' => ',',
381             'direction' => 'RL',
382             'file' => 't/data/arabic_snippet.csv'
383             );
384             my $rtlc = $rtl->collation;
385             is( $rtlc->reading('r8.1')->text, 'سبب', "Got target first reading in RTL text" );
386             my $pt = $rtlc->path_text('A');
387             my @path = $rtlc->reading_sequence( $rtlc->start, $rtlc->end, 'A' );
388             is( $rtlc->reading('r9.1')->text, 'صلاح', "Got target second reading in RTL text" );
389             $rtlc->merge_readings( 'r8.1', 'r9.1', 1 );
390             is( $rtlc->reading('r8.1')->text, 'سبب صلاح', "Got target merged reading in RTL text" );
391             is( $rtlc->path_text('A'), $pt, "Path text is still correct" );
392             is( scalar($rtlc->reading_sequence( $rtlc->start, $rtlc->end, 'A' )),
393             scalar(@path) - 1, "Path was shortened" );
394              
395             =end testing
396              
397             =cut
398              
399             sub merge_readings {
400             my $self = shift;
401              
402             # Sanity check
403             my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
404             my $mergemeta = $kept_obj->is_meta;
405             throw( "Cannot merge meta and non-meta reading" )
406             unless ( $mergemeta && $del_obj->is_meta )
407             || ( !$mergemeta && !$del_obj->is_meta );
408             if( $mergemeta ) {
409             throw( "Cannot merge with start or end node" )
410             if( $kept_obj eq $self->start || $kept_obj eq $self->end
411             || $del_obj eq $self->start || $del_obj eq $self->end );
412             throw( "Cannot combine text of meta readings" ) if $combine;
413             }
414             # We can only merge readings in a linear graph if:
415             # - they are contiguous with only one edge between them, OR
416             # - they are at equivalent ranks in the graph.
417             if( $self->linear ) {
418             my @delpred = $del_obj->predecessors;
419             my @keptsuc = $kept_obj->successors;
420             unless ( @delpred == 1 && $delpred[0] eq $kept_obj
421             && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
422             my( $is_ok, $msg ) = $self->relations->relationship_valid(
423             $kept_obj, $del_obj, 'collated' );
424             unless( $is_ok ) {
425             throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
426             }
427             }
428             }
429            
430             # We only need the IDs for adding paths to the graph, not the reading
431             # objects themselves.
432             my $kept = $kept_obj->id;
433             my $deleted = $del_obj->id;
434             $self->_graphcalc_done(0);
435            
436             # The kept reading should inherit the paths and the relationships
437             # of the deleted reading.
438             foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
439             my @vector = ( $kept );
440             push( @vector, $path->[1] ) if $path->[0] eq $deleted;
441             unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
442             next if $vector[0] eq $vector[1]; # Don't add a self loop
443             my %wits = %{$self->sequence->get_edge_attributes( @$path )};
444             $self->sequence->add_edge( @vector );
445             my $fwits = $self->sequence->get_edge_attributes( @vector );
446             @wits{keys %$fwits} = values %$fwits;
447             $self->sequence->set_edge_attributes( @vector, \%wits );
448             }
449             $self->relations->merge_readings( $kept, $deleted, $combine );
450            
451             # Do the deletion deed.
452             if( $combine ) {
453             # Combine the text of the readings
454             my $joinstr = $combine_char;
455             unless( defined $joinstr ) {
456             $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
457             $joinstr = $self->wordsep unless defined $joinstr;
458             }
459             $kept_obj->_combine( $del_obj, $joinstr );
460             }
461             $self->del_reading( $deleted );
462             }
463              
464             =head2 merge_related( @relationship_types )
465              
466             Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
467              
468             WARNING: This operation cannot be undone.
469              
470             =cut
471              
472             =begin testing
473              
474             use Test::Warn;
475             use Text::Tradition;
476             use TryCatch;
477              
478             my $t;
479             warnings_exist {
480             $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
481             } [qr/Cannot set relationship on a meta reading/],
482             "Got expected relationship drop warning on parse";
483              
484             my $c = $t->collation;
485             # Force the transitive propagation of all existing relationships.
486             $c->relations->propagate_all_relationships();
487              
488             my %rdg_ids;
489             map { $rdg_ids{$_} = 1 } $c->readings;
490             $c->merge_related( 'orthographic' );
491             is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
492             "Successfully collapsed orthographic variation" );
493             map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
494             foreach my $rid ( keys %rdg_ids ) {
495             my $exp = $rdg_ids{$rid};
496             is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
497             ( $exp ? "retained" : "removed" ) );
498             }
499             ok( $c->linear, "Graph is still linear" );
500             try {
501             $c->calculate_ranks; # This should succeed
502             ok( 1, "Can still calculate ranks on the new graph" );
503             } catch {
504             ok( 0, "Rank calculation on merged graph failed: $@" );
505             }
506              
507             # Now add some transpositions
508             $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
509             $c->merge_related( 'transposition' );
510             is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
511             "Transposed relationship is merged away" );
512             ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
513             ok( !$c->linear, "Graph is no longer linear" );
514             try {
515             $c->calculate_ranks; # This should fail
516             ok( 0, "Rank calculation happened on nonlinear graph?!" );
517             } catch ( Text::Tradition::Error $e ) {
518             is( $e->message, 'Cannot calculate ranks on a non-linear graph',
519             "Rank calculation on merged graph threw an error" );
520             }
521              
522             =end testing
523              
524             =cut
525              
526             # TODO: there should be a way to display merged without affecting the underlying data!
527              
528             sub merge_related {
529             my $self = shift;
530             my %reltypehash;
531             map { $reltypehash{$_} = 1 } @_;
532            
533             # Set up the filter for finding related readings
534             my $filter = sub {
535             exists $reltypehash{$_[0]->type};
536             };
537            
538             # Go through all readings looking for related ones
539             foreach my $r ( $self->readings ) {
540             next unless $self->reading( "$r" ); # might have been deleted meanwhile
541             while( my @related = $self->related_readings( $r, $filter ) ) {
542             push( @related, $r );
543             @related = sort {
544             scalar $b->witnesses <=> scalar $a->witnesses
545             } @related;
546             my $keep = shift @related;
547             foreach my $delr ( @related ) {
548             $self->linear( 0 )
549             unless( $self->get_relationship( $keep, $delr )->colocated );
550             $self->merge_readings( $keep, $delr );
551             }
552             }
553             }
554             }
555              
556             =head2 compress_readings
557              
558             Where possible in the graph, compresses plain sequences of readings into a
559             single reading. The sequences must consist of readings with no
560             relationships to other readings, with only a single witness path between
561             them and no other witness paths from either that would skip the other. The
562             readings must also not be marked as nonsense or bad grammar.
563              
564             WARNING: This operation cannot be undone.
565              
566             =begin testing
567              
568             use Text::Tradition;
569              
570             my $t = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
571             my $c = $t->collation;
572             my $n = scalar $c->readings;
573             $c->compress_readings();
574             is( scalar $c->readings, $n - 6, "Compressing readings seems to work" );
575              
576             # Now put in a join-word and make sure the thing still works.
577             my $t2 = Text::Tradition->new( input => 'CollateX', file => 't/data/Collatex-16.xml' );
578             my $c2 = $t2->collation;
579             # Split n21 ('unto') for testing purposes
580             my $new_r = $c2->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
581             my $old_r = $c2->reading( 'n21' );
582             $old_r->alter_text( 'to' );
583             $c2->del_path( 'n20', 'n21', 'A' );
584             $c2->add_path( 'n20', 'n21p0', 'A' );
585             $c2->add_path( 'n21p0', 'n21', 'A' );
586             $c2->calculate_ranks();
587             is( scalar $c2->readings, $n + 1, "We have our extra test reading" );
588             $c2->compress_readings();
589             is( scalar $c2->readings, $n - 6, "Compressing readings also works with join_next" );
590             is( $c2->reading( 'n21p0' )->text, 'unto', "The joined word has no space" );
591              
592              
593             =end testing
594              
595             =cut
596              
597             sub compress_readings {
598             my $self = shift;
599             # Sanity check: first save the original text of each witness.
600             my %origtext;
601             foreach my $wit ( $self->tradition->witnesses ) {
602             $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
603             if( $wit->is_layered ) {
604             my $acsig = $wit->sigil . $self->ac_label;
605             $origtext{$acsig} = $self->path_text( $acsig );
606             }
607             }
608            
609             # Now do the deed.
610             # Anywhere in the graph that there is a reading that joins only to a single
611             # successor, and neither of these have any relationships, just join the two
612             # readings.
613             foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
614             # Now look for readings that can be joined to their successors.
615             next unless $rdg->is_combinable;
616             my %seen;
617             while( $self->sequence->successors( $rdg ) == 1 ) {
618             my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
619             throw( "Infinite loop" ) if $seen{$next->id};
620             $seen{$next->id} = 1;
621             last if $self->sequence->predecessors( $next ) > 1;
622             last unless $next->is_combinable;
623             say "Joining readings $rdg and $next";
624             $self->merge_readings( $rdg, $next, 1 );
625             }
626             }
627            
628             # Finally, make sure we haven't screwed anything up.
629             foreach my $wit ( $self->tradition->witnesses ) {
630             my $pathtext = $self->path_text( $wit->sigil );
631             throw( "Text differs for witness " . $wit->sigil )
632             unless $pathtext eq $origtext{$wit->sigil};
633             if( $wit->is_layered ) {
634             my $acsig = $wit->sigil . $self->ac_label;
635             $pathtext = $self->path_text( $acsig );
636             throw( "Layered text differs for witness " . $wit->sigil )
637             unless $pathtext eq $origtext{$acsig};
638             }
639             }
640              
641             $self->relations->rebuild_equivalence();
642             $self->calculate_ranks();
643             }
644              
645             # Helper function for manipulating the graph.
646             sub _stringify_args {
647             my( $self, $first, $second, @args ) = @_;
648             $first = $first->id
649             if ref( $first ) eq 'Text::Tradition::Collation::Reading';
650             $second = $second->id
651             if ref( $second ) eq 'Text::Tradition::Collation::Reading';
652             return( $first, $second, @args );
653             }
654              
655             # Helper function for manipulating the graph.
656             sub _objectify_args {
657             my( $self, $first, $second, $arg ) = @_;
658             $first = $self->reading( $first )
659             unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
660             $second = $self->reading( $second )
661             unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
662             return( $first, $second, $arg );
663             }
664              
665             =head2 duplicate_reading( $reading, @witlist )
666              
667             Split the given reading into two, so that the new reading is in the path for
668             the witnesses given in @witlist. If the result is that certain non-colocated
669             relationships (e.g. transpositions) are no longer valid, these will be removed.
670             Returns the newly-created reading.
671              
672             =begin testing
673              
674             use Test::More::UTF8;
675             use Text::Tradition;
676             use TryCatch;
677              
678             my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
679             is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
680             ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
681              
682             my $sc = $st->collation;
683             my $numr = 17;
684             ok( $sc->reading('n131'), "Tradition has the affected reading" );
685             is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
686             is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
687              
688             # Detach the erroneously collated reading
689             my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
690             ok( $newr, "New reading was created" );
691             ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
692             is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
693             is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
694             my $csucc = $sc->common_successor( 'n131', 'n131_0' );
695             is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
696              
697             # Check that the bad transposition is gone
698             is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
699             is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
700              
701             # The collation should not be fixed
702             my @pairs = $sc->identical_readings();
703             is( scalar @pairs, 0, "Not re-collated yet" );
704             # Fix the collation
705             ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
706             @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
707             is( scalar @pairs, 3, "Found three more identical readings" );
708             is( $sc->end->rank, 11, "The ranks shifted appropriately" );
709             $sc->flatten_ranks();
710             is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
711              
712             # Check that we can't "duplicate" a reading with no wits or with all wits
713             try {
714             my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
715             ok( 0, "Reading duplication without witnesses throws an error" );
716             } catch( Text::Tradition::Error $e ) {
717             like( $e->message, qr/Must specify one or more witnesses/,
718             "Reading duplication without witnesses throws the expected error" );
719             } catch {
720             ok( 0, "Reading duplication without witnesses threw the wrong error" );
721             }
722              
723             try {
724             my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
725             ok( 0, "Reading duplication with all witnesses throws an error" );
726             } catch( Text::Tradition::Error $e ) {
727             like( $e->message, qr/Cannot join all witnesses/,
728             "Reading duplication with all witnesses throws the expected error" );
729             } catch {
730             ok( 0, "Reading duplication with all witnesses threw the wrong error" );
731             }
732              
733             try {
734             $sc->calculate_ranks();
735             ok( 1, "Graph is still evidently whole" );
736             } catch( Text::Tradition::Error $e ) {
737             ok( 0, "Caught a rank exception: " . $e->message );
738             }
739              
740             =end testing
741              
742             =cut
743              
744             sub duplicate_reading {
745             my( $self, $r, @wits ) = @_;
746             # Check that we are not doing anything unwise.
747             throw( "Must specify one or more witnesses for the duplicated reading" )
748             unless @wits;
749             unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
750             $r = $self->reading( $r );
751             }
752             throw( "Cannot duplicate a meta-reading" )
753             if $r->is_meta;
754             throw( "Cannot join all witnesses to the new reading" )
755             if scalar( @wits ) == scalar( $r->witnesses );
756              
757             # Get all the reading attributes and duplicate them.
758             my $rmeta = Text::Tradition::Collation::Reading->meta;
759             my %args;
760             foreach my $attr( $rmeta->get_all_attributes ) {
761             next if $attr->name =~ /^_/;
762             my $acc = $attr->get_read_method;
763             if( !$acc && $attr->has_applied_traits ) {
764             my $tr = $attr->applied_traits;
765             if( $tr->[0] =~ /::(Array|Hash)$/ ) {
766             my $which = $1;
767             my %methods = reverse %{$attr->handles};
768             $acc = $methods{elements};
769             $args{$attr->name} = $which eq 'Array'
770             ? [ $r->$acc ] : { $r->$acc };
771             }
772             } elsif( $acc ) {
773             my $attrval = $r->$acc;
774             if( defined $attrval ) {
775             $args{$attr->name} = $attrval;
776             }
777             }
778             }
779             # By definition the new reading will no longer be common.
780             $args{is_common} = 0;
781             # The new reading also needs its own ID.
782             $args{id} = $self->_generate_dup_id( $r->id );
783              
784             # Try to make the new reading.
785             my $newr = $self->add_reading( \%args );
786             # The old reading is also no longer common.
787             $r->is_common( 0 );
788            
789             # For each of the witnesses, dissociate from the old reading and
790             # associate with the new.
791             foreach my $wit ( @wits ) {
792             my $prior = $self->prior_reading( $r, $wit );
793             my $next = $self->next_reading( $r, $wit );
794             $self->del_path( $prior, $r, $wit );
795             $self->add_path( $prior, $newr, $wit );
796             $self->del_path( $r, $next, $wit );
797             $self->add_path( $newr, $next, $wit );
798             }
799            
800             # If the graph is ranked, we need to look for relationships that are now
801             # invalid (i.e. 'non-colocation' types that might now be colocated) and
802             # remove them. If not, we can skip it.
803             my $succ;
804             my %rrk;
805             my @deleted_relations;
806             if( $self->end->has_rank ) {
807             # Find the point where we can stop checking
808             $succ = $self->common_successor( $r, $newr );
809            
810             # Hash the existing ranks
811             foreach my $rdg ( $self->readings ) {
812             $rrk{$rdg->id} = $rdg->rank;
813             }
814             # Calculate the new ranks
815             $self->calculate_ranks();
816            
817             # Check for invalid non-colocated relationships among changed-rank readings
818             # from where the ranks start changing up to $succ
819             my $lastrank = $succ->rank;
820             foreach my $rdg ( $self->readings ) {
821             next if $rdg->rank > $lastrank;
822             next if $rdg->rank == $rrk{$rdg->id};
823             my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
824             next unless @noncolo;
825             foreach my $nc ( @noncolo ) {
826             unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
827             push( @deleted_relations, [ $rdg->id, $nc->id ] );
828             }
829             }
830             }
831             }
832             return ( $newr, @deleted_relations );
833             }
834              
835             sub _generate_dup_id {
836             my( $self, $rid ) = @_;
837             my $newid;
838             my $i = 0;
839             while( !$newid ) {
840             $newid = $rid."_$i";
841             if( $self->has_reading( $newid ) ) {
842             $newid = '';
843             $i++;
844             }
845             }
846             return $newid;
847             }
848              
849             ### Path logic
850              
851             sub add_path {
852             my $self = shift;
853              
854             # We only need the IDs for adding paths to the graph, not the reading
855             # objects themselves.
856             my( $source, $target, $wit ) = $self->_stringify_args( @_ );
857              
858             $self->_graphcalc_done(0);
859             # Connect the readings
860             unless( $self->sequence->has_edge( $source, $target ) ) {
861             $self->sequence->add_edge( $source, $target );
862             $self->relations->add_equivalence_edge( $source, $target );
863             }
864             # Note the witness in question
865             $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
866             }
867              
868             sub del_path {
869             my $self = shift;
870             my @args;
871             if( ref( $_[0] ) eq 'ARRAY' ) {
872             my $e = shift @_;
873             @args = ( @$e, @_ );
874             } else {
875             @args = @_;
876             }
877              
878             # We only need the IDs for removing paths from the graph, not the reading
879             # objects themselves.
880             my( $source, $target, $wit ) = $self->_stringify_args( @args );
881              
882             $self->_graphcalc_done(0);
883             if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
884             $self->sequence->delete_edge_attribute( $source, $target, $wit );
885             }
886             unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
887             $self->sequence->delete_edge( $source, $target );
888             $self->relations->delete_equivalence_edge( $source, $target );
889             }
890             }
891              
892              
893             # Extra graph-alike utility
894             sub has_path {
895             my $self = shift;
896             my( $source, $target, $wit ) = $self->_stringify_args( @_ );
897             return undef unless $self->sequence->has_edge( $source, $target );
898             return $self->sequence->has_edge_attribute( $source, $target, $wit );
899             }
900              
901             =head2 clear_witness( @sigil_list )
902              
903             Clear the given witnesses out of the collation entirely, removing references
904             to them in paths, and removing readings that belong only to them. Should only
905             be called via $tradition->del_witness.
906              
907             =cut
908              
909             sub clear_witness {
910             my( $self, @sigils ) = @_;
911              
912             $self->_graphcalc_done(0);
913             # Clear the witness(es) out of the paths
914             foreach my $e ( $self->paths ) {
915             foreach my $sig ( @sigils ) {
916             $self->del_path( $e, $sig );
917             }
918             }
919            
920             # Clear out the newly unused readings
921             foreach my $r ( $self->readings ) {
922             unless( $self->reading_witnesses( $r ) ) {
923             $self->del_reading( $r );
924             }
925             }
926             }
927              
928             sub add_relationship {
929             my $self = shift;
930             my( $source, $target, $opts, $altered_readings ) = $self->_stringify_args( @_ );
931             my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
932             my $did_recalc;
933             foreach my $v ( @vectors ) {
934             my $rel = $self->get_relationship( $v );
935             next unless $rel->colocated;
936             my $r1 = $self->reading( $v->[0] );
937             my $r2 = $self->reading( $v->[1] );
938             # If it's a spelling or orthographic relationship, and one is marked
939             # as a lemma, set the normal form on the non-lemma to reflect that.
940             if( $r1->does( 'Text::Tradition::Morphology' ) ) {
941             my @changed = $r1->relationship_added( $r2, $rel );
942             if( ref( $altered_readings ) eq 'ARRAY' ) {
943             push( @$altered_readings, @changed );
944             }
945             }
946             next if $did_recalc;
947             if( $r1->has_rank && $r2->has_rank && $r1->rank ne $r2->rank ) {
948             $self->_graphcalc_done(0);
949             $self->_clear_cache;
950             $did_recalc = 1;
951             }
952             }
953             return @vectors;
954             }
955              
956             around qw/ get_relationship del_relationship / => sub {
957             my $orig = shift;
958             my $self = shift;
959             my @args = @_;
960             if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
961             @args = @{$_[0]};
962             }
963             my @stringargs = $self->_stringify_args( @args );
964             $self->$orig( @stringargs );
965             };
966              
967             =head2 reading_witnesses( $reading )
968              
969             Return a list of sigils corresponding to the witnesses in which the reading appears.
970              
971             =cut
972              
973             sub reading_witnesses {
974             my( $self, $reading ) = @_;
975             # We need only check either the incoming or the outgoing edges; I have
976             # arbitrarily chosen "incoming". Thus, special-case the start node.
977             if( $reading eq $self->start ) {
978             return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
979             }
980             my %all_witnesses;
981             foreach my $e ( $self->sequence->edges_to( $reading ) ) {
982             my $wits = $self->sequence->get_edge_attributes( @$e );
983             @all_witnesses{ keys %$wits } = 1;
984             }
985             my $acstr = $self->ac_label;
986             foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
987             delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
988             }
989             return keys %all_witnesses;
990             }
991              
992             =head1 OUTPUT METHODS
993              
994             =head2 as_svg( \%options )
995              
996             Returns an SVG string that represents the graph, via as_dot and graphviz.
997             See as_dot for a list of options. Must have GraphViz (dot) installed to run.
998              
999             =begin testing
1000              
1001             use File::Which;
1002             use Text::Tradition;
1003             use XML::LibXML;
1004             use XML::LibXML::XPathContext;
1005              
1006              
1007             SKIP: {
1008             skip( 'Need Graphviz installed to test graphs', 16 )
1009             unless File::Which::which( 'dot' );
1010              
1011             my $datafile = 't/data/Collatex-16.xml';
1012              
1013             my $tradition = Text::Tradition->new(
1014             'name' => 'inline',
1015             'input' => 'CollateX',
1016             'file' => $datafile,
1017             );
1018             my $collation = $tradition->collation;
1019              
1020             # Test the svg creation
1021             my $parser = XML::LibXML->new();
1022             $parser->load_ext_dtd( 0 );
1023             my $svg = $parser->parse_string( $collation->as_svg() );
1024             is( $svg->documentElement->nodeName(), 'svg', 'Got an svg document' );
1025              
1026             # Test for the correct number of nodes in the SVG
1027             my $svg_xpc = XML::LibXML::XPathContext->new( $svg->documentElement() );
1028             $svg_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1029             my @svg_nodes = $svg_xpc->findnodes( '//svg:g[@class="node"]' );
1030             is( scalar @svg_nodes, 26, "Correct number of nodes in the graph" );
1031              
1032             # Test for the correct number of edges
1033             my @svg_edges = $svg_xpc->findnodes( '//svg:g[@class="edge"]' );
1034             is( scalar @svg_edges, 32, "Correct number of edges in the graph" );
1035              
1036             # Test svg creation for a subgraph
1037             my $part_svg = $parser->parse_string( $collation->as_svg( { from => 15 } ) ); # start, no end
1038             is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph to end" );
1039             my $part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1040             $part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1041             @svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1042             is( scalar( @svg_nodes ), 9,
1043             "Correct number of nodes in the subgraph" );
1044             @svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1045             is( scalar( @svg_edges ), 10,
1046             "Correct number of edges in the subgraph" );
1047              
1048             $part_svg = $parser->parse_string( $collation->as_svg( { from => 10, to => 13 } ) ); # start, no end
1049             is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph in the middle" );
1050             $part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1051             $part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1052             @svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1053             is( scalar( @svg_nodes ), 9,
1054             "Correct number of nodes in the subgraph" );
1055             @svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1056             is( scalar( @svg_edges ), 11,
1057             "Correct number of edges in the subgraph" );
1058              
1059              
1060             $part_svg = $parser->parse_string( $collation->as_svg( { to => 5 } ) ); # start, no end
1061             is( $part_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1062             $part_xpc = XML::LibXML::XPathContext->new( $part_svg->documentElement() );
1063             $part_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1064             @svg_nodes = $part_xpc->findnodes( '//svg:g[@class="node"]' );
1065             is( scalar( @svg_nodes ), 7,
1066             "Correct number of nodes in the subgraph" );
1067             @svg_edges = $part_xpc->findnodes( '//svg:g[@class="edge"]' );
1068             is( scalar( @svg_edges ), 7,
1069             "Correct number of edges in the subgraph" );
1070              
1071             # Test a right-to-left graph
1072             my $arabic = Text::Tradition->new(
1073             input => 'Tabular',
1074             sep_char => ',',
1075             name => 'arabic',
1076             direction => 'RL',
1077             file => 't/data/arabic_snippet.csv' );
1078             my $rl_svg = $parser->parse_string( $arabic->collation->as_svg() );
1079             is( $rl_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1080             my $rl_xpc = XML::LibXML::XPathContext->new( $rl_svg->documentElement() );
1081             $rl_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1082             my %node_cx;
1083             foreach my $node ( $rl_xpc->findnodes( '//svg:g[@class="node"]' ) ) {
1084             my $nid = $node->getAttribute('id');
1085             $node_cx{$nid} = $rl_xpc->findvalue( './svg:ellipse/@cx', $node );
1086             }
1087             my @sorted = sort { $node_cx{$a} <=> $node_cx{$b} } keys( %node_cx );
1088             is( $sorted[0], '__END__', "End node is the leftmost" );
1089             is( $sorted[$#sorted], '__START__', "Start node is the rightmost" );
1090            
1091             # Now try making it bidirectional
1092             $arabic->collation->change_direction('BI');
1093             my $bi_svg = $parser->parse_string( $arabic->collation->as_svg() );
1094             is( $bi_svg->documentElement->nodeName(), 'svg', "Got an svg subgraph from start" );
1095             my $bi_xpc = XML::LibXML::XPathContext->new( $bi_svg->documentElement() );
1096             $bi_xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
1097             my %node_cy;
1098             foreach my $node ( $bi_xpc->findnodes( '//svg:g[@class="node"]' ) ) {
1099             my $nid = $node->getAttribute('id');
1100             $node_cy{$nid} = $rl_xpc->findvalue( './svg:ellipse/@cy', $node );
1101             }
1102             @sorted = sort { $node_cy{$a} <=> $node_cy{$b} } keys( %node_cy );
1103             is( $sorted[0], '__START__', "Start node is the topmost" );
1104             is( $sorted[$#sorted], '__END__', "End node is the bottom-most" );
1105            
1106              
1107             } #SKIP
1108              
1109             =end testing
1110              
1111             =cut
1112              
1113             sub as_svg {
1114             my( $self, $opts ) = @_;
1115             throw( "Need GraphViz installed to output SVG" )
1116             unless File::Which::which( 'dot' );
1117             my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1118             $self->calculate_ranks()
1119             unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1120             my @cmd = qw/dot -Tsvg/;
1121             my( $svg, $err );
1122             my $dotfile = File::Temp->new();
1123             ## USE FOR DEBUGGING
1124             # $dotfile->unlink_on_destroy(0);
1125             binmode $dotfile, ':utf8';
1126             print $dotfile $self->as_dot( $opts );
1127             push( @cmd, $dotfile->filename );
1128             run( \@cmd, ">", binary(), \$svg );
1129             $svg = decode_utf8( $svg );
1130             return $svg;
1131             }
1132              
1133              
1134             =head2 as_dot( \%options )
1135              
1136             Returns a string that is the collation graph expressed in dot
1137             (i.e. GraphViz) format. Options include:
1138              
1139             =over 4
1140              
1141             =item * from
1142              
1143             =item * to
1144              
1145             =item * color_common
1146              
1147             =back
1148              
1149             =cut
1150              
1151             sub as_dot {
1152             my( $self, $opts ) = @_;
1153             my $startrank = $opts->{'from'} if $opts;
1154             my $endrank = $opts->{'to'} if $opts;
1155             my $color_common = $opts->{'color_common'} if $opts;
1156             my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
1157             && $self->end->rank > 100;
1158             $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
1159              
1160             # Check the arguments
1161             if( $startrank ) {
1162             return if $endrank && $startrank > $endrank;
1163             return if $startrank > $self->end->rank;
1164             }
1165             if( defined $endrank ) {
1166             return if $endrank < 0;
1167             $endrank = undef if $endrank == $self->end->rank;
1168             }
1169            
1170             my $graph_name = $self->tradition->name;
1171             $graph_name =~ s/[^\w\s]//g;
1172             $graph_name = join( '_', split( /\s+/, $graph_name ) );
1173              
1174             my %graph_attrs = (
1175             'bgcolor' => 'none',
1176             );
1177             unless( $self->direction eq 'BI' ) {
1178             $graph_attrs{rankdir} = $self->direction;
1179             }
1180             my %node_attrs = (
1181             'fontsize' => 14,
1182             'fillcolor' => 'white',
1183             'style' => 'filled',
1184             'shape' => 'ellipse'
1185             );
1186             my %edge_attrs = (
1187             'arrowhead' => 'open',
1188             'color' => '#000000',
1189             'fontcolor' => '#000000',
1190             );
1191              
1192             my $dot = sprintf( "digraph %s {\n", $graph_name );
1193             $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1194             $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1195              
1196             # Output substitute start/end readings if necessary
1197             if( $startrank ) {
1198             $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1199             }
1200             if( $endrank ) {
1201             $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1202             }
1203             if( $STRAIGHTENHACK ) {
1204             ## HACK part 1
1205             my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1206             $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1207             $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1208             }
1209             my %used; # Keep track of the readings that actually appear in the graph
1210             # Sort the readings by rank if we have ranks; this speeds layout.
1211             my @all_readings = $self->end->has_rank
1212             ? sort { $a->rank <=> $b->rank } $self->readings
1213             : $self->readings;
1214             # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1215             foreach my $reading ( @all_readings ) {
1216             # Only output readings within our rank range.
1217             next if $startrank && $reading->rank < $startrank;
1218             next if $endrank && $reading->rank > $endrank;
1219             $used{$reading->id} = 1;
1220             # Need not output nodes without separate labels
1221             next if $reading->id eq $reading->text;
1222             my $rattrs;
1223             my $label = $reading->text;
1224             unless( $label =~ /^[[:punct:]]+$/ ) {
1225             $label .= '-' if $reading->join_next;
1226             $label = "-$label" if $reading->join_prior;
1227             }
1228             $label =~ s/\"/\\\"/g;
1229             $rattrs->{'label'} = $label;
1230             $rattrs->{'id'} = $reading->id;
1231             $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1232             $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1233             }
1234            
1235             # Add the real edges.
1236             my @edges = $self->paths;
1237             my( %substart, %subend );
1238             foreach my $edge ( @edges ) {
1239             # Do we need to output this edge?
1240             if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1241             my $label = $self->_path_display_label( $opts,
1242             $self->path_witnesses( $edge ) );
1243             my $variables = { %edge_attrs, 'label' => $label };
1244            
1245             # Account for the rank gap if necessary
1246             my $rank0 = $self->reading( $edge->[0] )->rank
1247             if $self->reading( $edge->[0] )->has_rank;
1248             my $rank1 = $self->reading( $edge->[1] )->rank
1249             if $self->reading( $edge->[1] )->has_rank;
1250             if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1251             $variables->{'minlen'} = $rank1 - $rank0;
1252             }
1253            
1254             # EXPERIMENTAL: make edge width reflect no. of witnesses
1255             my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1256             $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1257              
1258             my $varopts = _dot_attr_string( $variables );
1259             $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1260             $edge->[0], $edge->[1], $varopts );
1261             } elsif( $used{$edge->[0]} ) {
1262             $subend{$edge->[0]} = $edge->[1];
1263             } elsif( $used{$edge->[1]} ) {
1264             $substart{$edge->[1]} = $edge->[0];
1265             }
1266             }
1267            
1268             # If we are asked to, add relationship links
1269             if( exists $opts->{show_relations} ) {
1270             my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1271             if( $filter eq 'transposition' ) {
1272             $filter =~ qr/^transposition$/;
1273             }
1274             my %typecolors;
1275             my @types = sort( map { $_->name } $self->relations->types );
1276             if( exists $opts->{graphcolors} ) {
1277             foreach my $tdx ( 0 .. $#types ) {
1278             $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1279             }
1280             } else {
1281             map { $typecolors{$_} = '#FFA14F' } @types;
1282             }
1283             foreach my $redge ( $self->relationships ) {
1284             if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1285             my $rel = $self->get_relationship( $redge );
1286             next unless $filter eq 'all' || $rel->type =~ /$filter/;
1287             my $variables = {
1288             arrowhead => 'none',
1289             color => $typecolors{$rel->type},
1290             constraint => 'false',
1291             penwidth => '3',
1292             };
1293             unless( exists $opts->{graphcolors} ) {
1294             $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1295             }
1296             $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1297             $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1298             }
1299             }
1300             }
1301            
1302             # Add substitute start and end edges if necessary
1303             foreach my $node ( keys %substart ) {
1304             my $witstr = $self->_path_display_label( $opts,
1305             $self->path_witnesses( $substart{$node}, $node ) );
1306             my $variables = { %edge_attrs, 'label' => $witstr };
1307             my $nrdg = $self->reading( $node );
1308             if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1309             # Substart is actually one lower than $startrank
1310             $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1311             }
1312             my $varopts = _dot_attr_string( $variables );
1313             $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1314             }
1315             foreach my $node ( keys %subend ) {
1316             my $witstr = $self->_path_display_label( $opts,
1317             $self->path_witnesses( $node, $subend{$node} ) );
1318             my $variables = { %edge_attrs, 'label' => $witstr };
1319             my $varopts = _dot_attr_string( $variables );
1320             $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1321             }
1322             # HACK part 2
1323             if( $STRAIGHTENHACK ) {
1324             my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1325             $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1326             }
1327              
1328             $dot .= "}\n";
1329             return $dot;
1330             }
1331              
1332             sub _dot_attr_string {
1333             my( $hash ) = @_;
1334             my @attrs;
1335             foreach my $k ( sort keys %$hash ) {
1336             my $v = $hash->{$k};
1337             push( @attrs, $k.'="'.$v.'"' );
1338             }
1339             return( '[ ' . join( ', ', @attrs ) . ' ]' );
1340             }
1341              
1342             =head2 path_witnesses( $edge )
1343              
1344             Returns the list of sigils whose witnesses are associated with the given edge.
1345             The edge can be passed as either an array or an arrayref of ( $source, $target ).
1346              
1347             =cut
1348              
1349             sub path_witnesses {
1350             my( $self, @edge ) = @_;
1351             # If edge is an arrayref, cope.
1352             if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1353             my $e = shift @edge;
1354             @edge = @$e;
1355             }
1356             my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1357             return @wits;
1358             }
1359              
1360             # Helper function. Make a display label for the given witnesses, showing a.c.
1361             # witnesses only where the main witness is not also in the list.
1362             sub _path_display_label {
1363             my $self = shift;
1364             my $opts = shift;
1365             my %wits;
1366             map { $wits{$_} = 1 } @_;
1367              
1368             # If an a.c. wit is listed, remove it if the main wit is also listed.
1369             # Otherwise keep it for explicit listing.
1370             my $aclabel = $self->ac_label;
1371             my @disp_ac;
1372             foreach my $w ( sort keys %wits ) {
1373             if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1374             if( exists $wits{$1} ) {
1375             delete $wits{$w};
1376             } else {
1377             push( @disp_ac, $w );
1378             }
1379             }
1380             }
1381            
1382             if( $opts->{'explicit_wits'} ) {
1383             return join( ', ', sort keys %wits );
1384             } else {
1385             # See if we are in a majority situation.
1386             my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1387             $maj = $maj > 5 ? $maj : 5;
1388             if( scalar keys %wits > $maj ) {
1389             unshift( @disp_ac, 'majority' );
1390             return join( ', ', @disp_ac );
1391             } else {
1392             return join( ', ', sort keys %wits );
1393             }
1394             }
1395             }
1396              
1397             =head2 as_adjacency_list
1398              
1399             Returns a JSON structure that represents the collation sequence graph.
1400              
1401             =begin testing
1402              
1403             use JSON qw/ from_json /;
1404             use Text::Tradition;
1405              
1406             my $t = Text::Tradition->new(
1407             'input' => 'Self',
1408             'file' => 't/data/florilegium_graphml.xml' );
1409             my $c = $t->collation;
1410            
1411             # Make a connection so we can test rank preservation
1412             $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1413              
1414             # Create an adjacency list of the whole thing; test the output.
1415             my $adj_whole = from_json( $c->as_adjacency_list() );
1416             is( scalar @$adj_whole, scalar $c->readings(),
1417             "Same number of nodes in graph and adjacency list" );
1418             my @adj_whole_edges;
1419             map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1420             is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1421             "Same number of edges in graph and adjacency list" );
1422             # Find the reading whose rank should be preserved
1423             my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1424             my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1425             is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1426              
1427             # Now create an adjacency list of just a portion. w76 to w122
1428             my $adj_part = from_json( $c->as_adjacency_list(
1429             { from => $c->reading('w76')->rank,
1430             to => $c->reading('w122')->rank }));
1431             is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1432             my @adj_part_edges;
1433             map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1434             is( scalar @adj_part_edges, 58,
1435             "Same number of edges in partial graph and adjacency list" );
1436             # Check for consistency
1437             my %part_nodes;
1438             map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1439             foreach my $edge ( @adj_part_edges ) {
1440             my $testid = $edge->{id};
1441             ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1442             }
1443              
1444             =end testing
1445              
1446             =cut
1447              
1448             sub as_adjacency_list {
1449             my( $self, $opts ) = @_;
1450             # Make a structure that contains all the nodes, the nodes they point to,
1451             # and the attributes of the edges that connect them.
1452             # [ { id: 'n0', label: 'Gallia', adjacent: [
1453             # { id: 'n1', label: 'P Q' } ,
1454             # { id: 'n2', label: 'R S', minlen: 2 } ] },
1455             # { id: 'n1', label: 'est', adjacent: [ ... ] },
1456             # ... ]
1457             my $startrank = $opts->{'from'} || 0;
1458             my $endrank = $opts->{'to'} || $self->end->rank;
1459            
1460             $self->calculate_ranks()
1461             unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1462             my $list = [];
1463             foreach my $rdg ( $self->readings ) {
1464             my @successors;
1465             my $phony = '';
1466             # Figure out what the node's successors should be.
1467             if( $rdg eq $self->start && $startrank > 0 ) {
1468             # Connect the start node with all the nodes at startrank.
1469             # Lacunas should be included only if the node really has that rank.
1470             @successors = $self->readings_at_rank( $startrank, 1 );
1471             $phony = 'start';
1472             } elsif( $rdg->rank < $startrank
1473             || $rdg->rank > $endrank && $rdg ne $self->end ) {
1474             next;
1475             } else {
1476             @successors = $rdg->successors;
1477             }
1478             # Make sure that the end node is at the end of the successors
1479             # list if it is needed.
1480             if( grep { $_ eq $self->end } @successors ) {
1481             my @ts = grep { $_ ne $self->end } @successors;
1482             @successors = ( @ts, $self->end );
1483             } elsif ( grep { $_->rank > $endrank } @successors ) {
1484             push( @successors, $self->end );
1485             }
1486            
1487             my $listitem = { id => $rdg->id, label => $rdg->text };
1488             my $adjacent = [];
1489             my @endwits;
1490             foreach my $succ ( @successors ) {
1491             my @edgewits;
1492             if( $phony eq 'start' ) {
1493             @edgewits = $succ->witnesses;
1494             } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1495             @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1496             }
1497            
1498             if( $succ eq $self->end ) {
1499             @edgewits = @endwits;
1500             } elsif( $succ->rank > $endrank ) {
1501             # These witnesses will point to 'end' instead, not to the
1502             # actual successor.
1503             push( @endwits, @edgewits );
1504             next;
1505             }
1506             my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1507             my $edgedef = { id => $succ->id, label => $edgelabel };
1508             my $rankoffset = $succ->rank - $rdg->rank;
1509             if( $rankoffset > 1 and $succ ne $self->end ) {
1510             $edgedef->{minlen} = $rankoffset;
1511             }
1512             push( @$adjacent, $edgedef );
1513             }
1514             $listitem->{adjacent} = $adjacent;
1515             push( @$list, $listitem );
1516             }
1517             return to_json( $list );
1518             }
1519              
1520             =head2 as_graphml
1521              
1522             Returns a GraphML representation of the collation. The GraphML will contain
1523             two graphs. The first expresses the attributes of the readings and the witness
1524             paths that link them; the second expresses the relationships that link the
1525             readings. This is the native transfer format for a tradition.
1526              
1527             =begin testing
1528              
1529             use Text::Tradition;
1530             use TryCatch;
1531              
1532             my $READINGS = 311;
1533             my $PATHS = 361;
1534              
1535             my $datafile = 't/data/florilegium_tei_ps.xml';
1536             my $tradition = Text::Tradition->new( 'input' => 'TEI',
1537             'name' => 'test0',
1538             'file' => $datafile,
1539             'linear' => 1 );
1540              
1541             ok( $tradition, "Got a tradition object" );
1542             is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1543             ok( $tradition->collation, "Tradition has a collation" );
1544              
1545             my $c = $tradition->collation;
1546             is( scalar $c->readings, $READINGS, "Collation has all readings" );
1547             is( scalar $c->paths, $PATHS, "Collation has all paths" );
1548             is( scalar $c->relationships, 0, "Collation has all relationships" );
1549              
1550             # Add a few relationships
1551             $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1552             $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1553             $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1554             'is_significant' => 'yes' } );
1555              
1556             # Now write it to GraphML and parse it again.
1557              
1558             my $graphml = $c->as_graphml;
1559             my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1560             is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1561             is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1562             is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1563             my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1564             is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1565              
1566             # Now add a stemma, write to GraphML, and look at the output.
1567             SKIP: {
1568             skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1569             my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1570             is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1571             is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1572             $graphml = $c->as_graphml;
1573             like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1574             }
1575              
1576             =end testing
1577              
1578             =cut
1579              
1580             ## TODO MOVE this to Tradition.pm and modularize it better
1581             sub as_graphml {
1582             my( $self, $options ) = @_;
1583             $self->calculate_ranks unless $self->_graphcalc_done;
1584            
1585             my $start = $options->{'from'}
1586             ? $self->reading( $options->{'from'} ) : $self->start;
1587             my $end = $options->{'to'}
1588             ? $self->reading( $options->{'to'} ) : $self->end;
1589             if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1590             throw( 'Start node must be before end node' );
1591             }
1592             # The readings need to be ranked for this to work.
1593             $start = $self->start unless $start->has_rank;
1594             $end = $self->end unless $end->has_rank;
1595             my $rankoffset = 0;
1596             unless( $start eq $self->start ) {
1597             $rankoffset = $start->rank - 1;
1598             }
1599             my %use_readings;
1600            
1601             # Some namespaces
1602             my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1603             my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1604             my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1605             'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1606              
1607             # Create the document and root node
1608             require XML::LibXML;
1609             my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1610             my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1611             $graphml->setDocumentElement( $root );
1612             $root->setNamespace( $xsi_ns, 'xsi', 0 );
1613             $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1614            
1615             # List of attribute types to save on our objects and their corresponding
1616             # GraphML types
1617             my %save_types = (
1618             'Str' => 'string',
1619             'Int' => 'int',
1620             'Bool' => 'boolean',
1621             'ReadingID' => 'string',
1622             'RelationshipType' => 'string',
1623             'RelationshipScope' => 'string',
1624             'Ternary' => 'string',
1625             );
1626            
1627             # Add the data keys for the graph. Include an extra key 'version' for the
1628             # GraphML output version.
1629             my %graph_data_keys;
1630             my $gdi = 0;
1631             my %graph_attributes = ( 'version' => 'string' );
1632             # Graph attributes include those of Tradition and those of Collation.
1633             my %gattr_from;
1634             # TODO Use meta introspection method from duplicate_reading to do this
1635             # instead of naming custom keys.
1636             my $tmeta = $self->tradition->meta;
1637             my $cmeta = $self->meta;
1638             map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1639             map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1640             foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1641             next if $attr->name =~ /^_/;
1642             next unless $save_types{$attr->type_constraint->name};
1643             $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1644             }
1645             # Extra custom keys for complex objects that should be saved in some form.
1646             # The subroutine should return a string, or undef/empty.
1647             if( $tmeta->has_method('stemmata') ) {
1648             $graph_attributes{'stemmata'} = sub {
1649             my @stemstrs;
1650             map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1651             $self->tradition->stemmata;
1652             join( "\n", @stemstrs );
1653             };
1654             }
1655            
1656             if( $tmeta->has_method('user') ) {
1657             $graph_attributes{'user'} = sub {
1658             $self->tradition->user ? $self->tradition->user->id : undef
1659             };
1660             }
1661            
1662             foreach my $datum ( sort keys %graph_attributes ) {
1663             $graph_data_keys{$datum} = 'dg'.$gdi++;
1664             my $key = $root->addNewChild( $graphml_ns, 'key' );
1665             my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1666             : $graph_attributes{$datum};
1667             $key->setAttribute( 'attr.name', $datum );
1668             $key->setAttribute( 'attr.type', $dtype );
1669             $key->setAttribute( 'for', 'graph' );
1670             $key->setAttribute( 'id', $graph_data_keys{$datum} );
1671             }
1672              
1673             # Add the data keys for reading nodes
1674             my %reading_attributes;
1675             my $rmeta = Text::Tradition::Collation::Reading->meta;
1676             foreach my $attr( $rmeta->get_all_attributes ) {
1677             next if $attr->name =~ /^_/;
1678             next unless $save_types{$attr->type_constraint->name};
1679             $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1680             }
1681             if( $self->start->does('Text::Tradition::Morphology' ) ) {
1682             # Extra custom key for the reading morphology
1683             $reading_attributes{'lexemes'} = 'string';
1684             }
1685            
1686             my %node_data_keys;
1687             my $ndi = 0;
1688             foreach my $datum ( sort keys %reading_attributes ) {
1689             $node_data_keys{$datum} = 'dn'.$ndi++;
1690             my $key = $root->addNewChild( $graphml_ns, 'key' );
1691             $key->setAttribute( 'attr.name', $datum );
1692             $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1693             $key->setAttribute( 'for', 'node' );
1694             $key->setAttribute( 'id', $node_data_keys{$datum} );
1695             }
1696              
1697             # Add the data keys for edges, that is, paths and relationships. Path
1698             # data does not come from a Moose class so is here manually.
1699             my $edi = 0;
1700             my %edge_data_keys;
1701             my %edge_attributes = (
1702             witness => 'string', # ID/label for a path
1703             extra => 'boolean', # Path key
1704             );
1705             my @path_attributes = keys %edge_attributes; # track our manual additions
1706             my $pmeta = Text::Tradition::Collation::Relationship->meta;
1707             foreach my $attr( $pmeta->get_all_attributes ) {
1708             next if $attr->name =~ /^_/;
1709             next unless $save_types{$attr->type_constraint->name};
1710             $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1711             }
1712             foreach my $datum ( sort keys %edge_attributes ) {
1713             $edge_data_keys{$datum} = 'de'.$edi++;
1714             my $key = $root->addNewChild( $graphml_ns, 'key' );
1715             $key->setAttribute( 'attr.name', $datum );
1716             $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1717             $key->setAttribute( 'for', 'edge' );
1718             $key->setAttribute( 'id', $edge_data_keys{$datum} );
1719             }
1720              
1721             # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1722             my $xmlidname = $self->tradition->name;
1723             $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1724             if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1725             $xmlidname = '_'.$xmlidname;
1726             }
1727             my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1728             $sgraph->setAttribute( 'edgedefault', 'directed' );
1729             $sgraph->setAttribute( 'id', $xmlidname );
1730             $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1731             $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1732             $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1733             $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1734             $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1735            
1736             # Tradition/collation attribute data
1737             foreach my $datum ( keys %graph_attributes ) {
1738             my $value;
1739             if( $datum eq 'version' ) {
1740             $value = '3.2';
1741             } elsif( ref( $graph_attributes{$datum} ) ) {
1742             my $sub = $graph_attributes{$datum};
1743             $value = &$sub();
1744             } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1745             $value = $self->tradition->$datum;
1746             } else {
1747             $value = $self->$datum;
1748             }
1749             _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1750             }
1751              
1752             my $node_ctr = 0;
1753             my %node_hash;
1754             # Add our readings to the graph
1755             foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1756             next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1757             ( $n->rank < $start->rank || $n->rank > $end->rank );
1758             $use_readings{$n->id} = 1;
1759             # Add to the main graph
1760             my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1761             my $node_xmlid = 'n' . $node_ctr++;
1762             $node_hash{ $n->id } = $node_xmlid;
1763             $node_el->setAttribute( 'id', $node_xmlid );
1764             foreach my $d ( keys %reading_attributes ) {
1765             my $nval = $n->$d;
1766             # Custom serialization
1767             if( $d eq 'lexemes' ) {
1768             # If nval is a true value, we have lexemes so we need to
1769             # serialize them. Otherwise set nval to undef so that the
1770             # key is excluded from this reading.
1771             $nval = $nval ? $n->_serialize_lexemes : undef;
1772             } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1773             $nval = undef;
1774             }
1775             if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1776             # Adjust the ranks within the subgraph.
1777             $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1778             : $nval - $rankoffset;
1779             }
1780             _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1781             if defined $nval;
1782             }
1783             }
1784              
1785             # Add the path edges to the sequence graph
1786             my $edge_ctr = 0;
1787             foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1788             # We add an edge in the graphml for every witness in $e.
1789             next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1790             my @edge_wits = sort $self->path_witnesses( $e );
1791             $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1792             $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1793             # Skip any path from start to end; that witness is not in the subgraph.
1794             next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1795             foreach my $wit ( @edge_wits ) {
1796             my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1797             $node_hash{ $e->[0] },
1798             $node_hash{ $e->[1] } );
1799             my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1800             $edge_el->setAttribute( 'source', $from );
1801             $edge_el->setAttribute( 'target', $to );
1802             $edge_el->setAttribute( 'id', $id );
1803            
1804             # It's a witness path, so add the witness
1805             my $base = $wit;
1806             my $key = $edge_data_keys{'witness'};
1807             # Is this an ante-corr witness?
1808             my $aclabel = $self->ac_label;
1809             if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1810             # Keep the base witness
1811             $base = $1;
1812             # ...and record that this is an 'extra' reading path
1813             _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1814             }
1815             _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1816             }
1817             }
1818            
1819             # Report the actual number of nodes and edges that went in
1820             $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1821             $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1822            
1823             # Add the relationship graph to the XML
1824             map { delete $edge_data_keys{$_} } @path_attributes;
1825             $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1826             $node_data_keys{'id'}, \%edge_data_keys );
1827              
1828             # Save and return the thing
1829             my $result = decode_utf8( $graphml->toString(1) );
1830             return $result;
1831             }
1832              
1833             sub _add_graphml_data {
1834             my( $el, $key, $value ) = @_;
1835             return unless defined $value;
1836             my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1837             $data_el->setAttribute( 'key', $key );
1838             $data_el->appendText( $value );
1839             }
1840              
1841             =head2 as_csv
1842              
1843             Returns a CSV alignment table representation of the collation graph, one
1844             row per witness (or witness uncorrected.)
1845              
1846             =head2 as_tsv
1847              
1848             Returns a tab-separated alignment table representation of the collation graph,
1849             one row per witness (or witness uncorrected.)
1850              
1851             =begin testing
1852              
1853             use Text::Tradition;
1854             use Text::CSV;
1855              
1856             my $READINGS = 311;
1857             my $PATHS = 361;
1858             my $WITS = 13;
1859             my $WITAC = 4;
1860              
1861             my $datafile = 't/data/florilegium_tei_ps.xml';
1862             my $tradition = Text::Tradition->new( 'input' => 'TEI',
1863             'name' => 'test0',
1864             'file' => $datafile,
1865             'linear' => 1 );
1866              
1867             my $c = $tradition->collation;
1868             # Export the thing to CSV
1869             my $csvstr = $c->as_csv();
1870             # Count the columns
1871             my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1872             my @lines = split(/\n/, $csvstr );
1873             ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1874             is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1875             my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1876             ok( @q_ac, "Found a layered witness" );
1877              
1878             my $t2 = Text::Tradition->new( input => 'Tabular',
1879             name => 'test2',
1880             string => $csvstr,
1881             sep_char => ',' );
1882             is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1883             is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1884              
1885             # Now do it with TSV
1886             my $tsvstr = $c->as_tsv();
1887             my $t3 = Text::Tradition->new( input => 'Tabular',
1888             name => 'test3',
1889             string => $tsvstr,
1890             sep_char => "\t" );
1891             is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1892             is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1893              
1894             my $table = $c->alignment_table;
1895             my $noaccsv = $c->as_csv({ noac => 1 });
1896             my @noaclines = split(/\n/, $noaccsv );
1897             ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1898             is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1899             is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1900              
1901             my $safecsv = $c->as_csv({ safe_ac => 1});
1902             my @safelines = split(/\n/, $safecsv );
1903             ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1904             is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1905             @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1906             ok( @q_ac, "Found a sanitized layered witness" );
1907             is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1908              
1909             # Test relationship collapse
1910             $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1911             $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1912              
1913             my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1914             my $t4 = Text::Tradition->new( input => 'Tabular',
1915             name => 'test4',
1916             string => $mergedtsv,
1917             sep_char => "\t" );
1918             is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1919             is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1920              
1921             # Test non-ASCII sigla
1922             my $t5 = Text::Tradition->new( input => 'Tabular',
1923             name => 'nonascii',
1924             file => 't/data/armexample.xlsx',
1925             excel => 'xlsx' );
1926             my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1927             my @awitlines = split( /\n/, $awittsv );
1928             like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1929              
1930             =end testing
1931              
1932             =cut
1933              
1934             sub _tabular {
1935             my( $self, $opts ) = @_;
1936             my $table = $self->alignment_table( $opts );
1937             my $csv_options = { binary => 1, quote_null => 0 };
1938             $csv_options->{'sep_char'} = $opts->{fieldsep};
1939             if( $opts->{fieldsep} eq "\t" ) {
1940             # If it is really tab separated, nothing is an escape char.
1941             $csv_options->{'quote_char'} = undef;
1942             $csv_options->{'escape_char'} = '';
1943             }
1944             my $csv = Text::CSV->new( $csv_options );
1945             my @result;
1946            
1947             # Make the header row
1948             my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1949             if( $opts->{ascii} ) {
1950             # TODO think of a fix for this
1951             throw( "Cannot currently produce ASCII sigla with witness layers" )
1952             unless $opts->{noac};
1953             my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1954             @witnesses = @awits;
1955             }
1956             $csv->combine( @witnesses );
1957             push( @result, $csv->string );
1958            
1959             # Make the rest of the rows
1960             foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1961             my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1962             my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1963             # Quick and dirty collapse of requested relationship types
1964             if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1965             # Now substitute the reading in the relevant index of @row
1966             # for its merge-related reading
1967             my %substitutes;
1968             while( @rowobjs ) {
1969             my $thisr = shift @rowobjs;
1970             next unless $thisr;
1971             next if exists $substitutes{$thisr->{t}->text};
1972             # Make sure we don't have A <-> B substitutions.
1973             $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1974             foreach my $thatr ( @rowobjs ) {
1975             next unless $thatr;
1976             next if exists $substitutes{$thatr->{t}->text};
1977             my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1978             next unless $ttrel;
1979             next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1980             # If we have got this far then we need to merge them.
1981             $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1982             }
1983             }
1984             @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1985             }
1986             $csv->combine( @row );
1987             push( @result, $csv->string );
1988             }
1989             return join( "\n", @result );
1990             }
1991              
1992             sub as_csv {
1993             my $self = shift;
1994             my $opts = shift || {};
1995             $opts->{fieldsep} = ',';
1996             return $self->_tabular( $opts );
1997             }
1998              
1999             sub as_tsv {
2000             my $self = shift;
2001             my $opts = shift || {};
2002             $opts->{fieldsep} = "\t";
2003             return $self->_tabular( $opts );
2004             }
2005              
2006             =head2 alignment_table
2007              
2008             Return a reference to an alignment table, in a slightly enhanced CollateX
2009             format which looks like this:
2010              
2011             $table = { alignment => [ { witness => "SIGIL",
2012             tokens => [ { t => "TEXT" }, ... ] },
2013             { witness => "SIG2",
2014             tokens => [ { t => "TEXT" }, ... ] },
2015             ... ],
2016             length => TEXTLEN };
2017              
2018             =cut
2019              
2020             sub alignment_table {
2021             my( $self, $opts ) = @_;
2022             if( $self->has_cached_table ) {
2023             return $self->cached_table
2024             unless $opts->{noac} || $opts->{safe_ac};
2025             }
2026            
2027             # Make sure we can do this
2028             throw( "Need a linear graph in order to make an alignment table" )
2029             unless $self->linear;
2030             $self->calculate_ranks()
2031             unless $self->_graphcalc_done && $self->end->has_rank;
2032              
2033             my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
2034             my @all_pos = ( 1 .. $self->end->rank - 1 );
2035             foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2036             # say STDERR "Making witness row(s) for " . $wit->sigil;
2037             my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2038             my @row = _make_witness_row( \@wit_path, \@all_pos );
2039             my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
2040             $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
2041             push( @{$table->{'alignment'}}, $witobj );
2042             if( $wit->is_layered && !$opts->{noac} ) {
2043             my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
2044             $wit->sigil.$self->ac_label );
2045             my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2046             my $witlabel = $opts->{safe_ac}
2047             ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
2048             my $witacobj = { 'witness' => $witlabel,
2049             'tokens' => \@ac_row };
2050             $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
2051             push( @{$table->{'alignment'}}, $witacobj );
2052             }
2053             }
2054             unless( $opts->{noac} || $opts->{safe_ac} ) {
2055             $self->cached_table( $table );
2056             }
2057             return $table;
2058             }
2059              
2060             sub _make_witness_row {
2061             my( $path, $positions ) = @_;
2062             my %char_hash;
2063             map { $char_hash{$_} = undef } @$positions;
2064             my $debug = 0;
2065             foreach my $rdg ( @$path ) {
2066             say STDERR "rank " . $rdg->rank if $debug;
2067             # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
2068             $char_hash{$rdg->rank} = { 't' => $rdg };
2069             }
2070             my @row = map { $char_hash{$_} } @$positions;
2071             # Fill in lacuna markers for undef spots in the row
2072             my $last_el = shift @row;
2073             my @filled_row = ( $last_el );
2074             foreach my $el ( @row ) {
2075             # If we are using node reference, make the lacuna node appear many times
2076             # in the table. If not, use the lacuna tag.
2077             if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
2078             $el = $last_el;
2079             }
2080             push( @filled_row, $el );
2081             $last_el = $el;
2082             }
2083             return @filled_row;
2084             }
2085              
2086              
2087             =head1 NAVIGATION METHODS
2088              
2089             =head2 reading_sequence( $first, $last, $sigil, $backup )
2090              
2091             Returns the ordered list of readings, starting with $first and ending
2092             with $last, for the witness given in $sigil. If a $backup sigil is
2093             specified (e.g. when walking a layered witness), it will be used wherever
2094             no $sigil path exists. If there is a base text reading, that will be
2095             used wherever no path exists for $sigil or $backup.
2096              
2097             =cut
2098              
2099             # TODO Think about returning some lazy-eval iterator.
2100             # TODO Get rid of backup; we should know from what witness is whether we need it.
2101              
2102             sub reading_sequence {
2103             my( $self, $start, $end, $witness ) = @_;
2104              
2105             $witness = $self->baselabel unless $witness;
2106             my @readings = ( $start );
2107             my %seen;
2108             my $n = $start;
2109             while( $n && $n->id ne $end->id ) {
2110             if( exists( $seen{$n->id} ) ) {
2111             throw( "Detected loop for $witness at " . $n->id );
2112             }
2113             $seen{$n->id} = 1;
2114            
2115             my $next = $self->next_reading( $n, $witness );
2116             unless( $next ) {
2117             throw( "Did not find any path for $witness from reading " . $n->id );
2118             }
2119             push( @readings, $next );
2120             $n = $next;
2121             }
2122             # Check that the last reading is our end reading.
2123             my $last = $readings[$#readings];
2124             throw( "Last reading found from " . $start->text .
2125             " for witness $witness is not the end!" ) # TODO do we get this far?
2126             unless $last->id eq $end->id;
2127            
2128             return @readings;
2129             }
2130              
2131             =head2 readings_at_rank( $rank )
2132              
2133             Returns a list of readings at a given rank, taken from the alignment table.
2134              
2135             =cut
2136              
2137             sub readings_at_rank {
2138             my( $self, $rank, $nolacuna ) = @_;
2139             my $table = $self->alignment_table;
2140             # Table rank is real rank - 1.
2141             my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
2142             my %readings;
2143             foreach my $e ( @elements ) {
2144             next unless ref( $e ) eq 'HASH';
2145             next unless exists $e->{'t'};
2146             my $rdg = $e->{'t'};
2147             next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
2148             $readings{$e->{'t'}->id} = $e->{'t'};
2149             }
2150             return values %readings;
2151             }
2152              
2153             =head2 next_reading( $reading, $sigil );
2154              
2155             Returns the reading that follows the given reading along the given witness
2156             path.
2157              
2158             =cut
2159              
2160             sub next_reading {
2161             # Return the successor via the corresponding path.
2162             my $self = shift;
2163             my $answer = $self->_find_linked_reading( 'next', @_ );
2164             return undef unless $answer;
2165             return $self->reading( $answer );
2166             }
2167              
2168             =head2 prior_reading( $reading, $sigil )
2169              
2170             Returns the reading that precedes the given reading along the given witness
2171             path.
2172              
2173             =cut
2174              
2175             sub prior_reading {
2176             # Return the predecessor via the corresponding path.
2177             my $self = shift;
2178             my $answer = $self->_find_linked_reading( 'prior', @_ );
2179             return $self->reading( $answer );
2180             }
2181              
2182             sub _find_linked_reading {
2183             my( $self, $direction, $node, $path ) = @_;
2184            
2185             # Get a backup if we are dealing with a layered witness
2186             my $alt_path;
2187             my $aclabel = $self->ac_label;
2188             if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2189             $alt_path = $1;
2190             }
2191            
2192             my @linked_paths = $direction eq 'next'
2193             ? $self->sequence->edges_from( $node )
2194             : $self->sequence->edges_to( $node );
2195             return undef unless scalar( @linked_paths );
2196            
2197             # We have to find the linked path that contains all of the
2198             # witnesses supplied in $path.
2199             my( @path_wits, @alt_path_wits );
2200             @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2201             @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2202             my $base_le;
2203             my $alt_le;
2204             foreach my $le ( @linked_paths ) {
2205             if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2206             $base_le = $le;
2207             }
2208             my @le_wits = sort $self->path_witnesses( $le );
2209             if( _is_within( \@path_wits, \@le_wits ) ) {
2210             # This is the right path.
2211             return $direction eq 'next' ? $le->[1] : $le->[0];
2212             } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2213             $alt_le = $le;
2214             }
2215             }
2216             # Got this far? Return the alternate path if it exists.
2217             return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2218             if $alt_le;
2219              
2220             # Got this far? Return the base path if it exists.
2221             return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2222             if $base_le;
2223              
2224             # Got this far? We have no appropriate path.
2225             warn "Could not find $direction node from " . $node->id
2226             . " along path $path";
2227             return undef;
2228             }
2229              
2230             # Some set logic.
2231             sub _is_within {
2232             my( $set1, $set2 ) = @_;
2233             my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2234             foreach my $el ( @$set1 ) {
2235             $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2236             }
2237             return $ret;
2238             }
2239              
2240             # Return the string that joins together a list of witnesses for
2241             # display on a single path.
2242             sub _witnesses_of_label {
2243             my( $self, $label ) = @_;
2244             my $regex = $self->wit_list_separator;
2245             my @answer = split( /\Q$regex\E/, $label );
2246             return @answer;
2247             }
2248              
2249             =head2 common_readings
2250              
2251             Returns the list of common readings in the graph (i.e. those readings that are
2252             shared by all non-lacunose witnesses.)
2253              
2254             =cut
2255              
2256             sub common_readings {
2257             my $self = shift;
2258             my @common = grep { $_->is_common } $self->readings;
2259             return @common;
2260             }
2261              
2262             =head2 path_text( $sigil [, $start, $end, $use_normal_form ] )
2263              
2264             Returns the text of a witness (plus its backup, if we are using a layer) as
2265             stored in the collation. The text is returned as a string, where the
2266             individual readings are joined with spaces and the meta-readings (e.g.
2267             lacunae) are omitted. Optional specification of $start and $end allows the
2268             generation of a subset of the witness text. Optional specification of
2269             $use_normal_form produces a text based on the normal form, rather than the
2270             raw text, of the reading.
2271              
2272             =cut
2273              
2274             sub path_text {
2275             my( $self, $wit, $start, $end, $normal ) = @_;
2276             $start = $self->start unless $start;
2277             $end = $self->end unless $end;
2278             my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2279             return $self->known_path_text( $normal, @path );
2280             }
2281              
2282             =head2 known_path_text( $use_normal_form, @sequence )
2283              
2284             Returns the text of a given sequence of readings. No attempt is made to
2285             validate the sequence in question. If $use_normal_form is set to true, the
2286             normal form of each reading in the sequence will be used to construct the
2287             text.
2288              
2289             =cut
2290              
2291             sub known_path_text {
2292             my( $self, $normal, @path ) = @_;
2293             my $pathtext = '';
2294             my $last;
2295             foreach my $r ( @path ) {
2296             unless ( $r->join_prior || !$last || $last->join_next ) {
2297             $pathtext .= ' ';
2298             }
2299             $pathtext .= $normal ? $r->normal_form : $r->text;
2300             $last = $r;
2301             }
2302             return $pathtext;
2303             }
2304              
2305             =head1 INITIALIZATION METHODS
2306              
2307             These are mostly for use by parsers.
2308              
2309             =head2 make_witness_path( $witness )
2310              
2311             Link the array of readings contained in $witness->path (and in
2312             $witness->uncorrected_path if it exists) into collation paths.
2313             Clear out the arrays when finished.
2314              
2315             =head2 make_witness_paths
2316              
2317             Call make_witness_path for all witnesses in the tradition.
2318              
2319             =cut
2320              
2321             # For use when a collation is constructed from a base text and an apparatus.
2322             # We have the sequences of readings and just need to add path edges.
2323             # When we are done, clear out the witness path attributes, as they are no
2324             # longer needed.
2325             # TODO Find a way to replace the witness path attributes with encapsulated functions?
2326              
2327             sub make_witness_paths {
2328             my( $self ) = @_;
2329             foreach my $wit ( $self->tradition->witnesses ) {
2330             # say STDERR "Making path for " . $wit->sigil;
2331             $self->make_witness_path( $wit );
2332             }
2333             }
2334              
2335             sub make_witness_path {
2336             my( $self, $wit ) = @_;
2337             my @chain = @{$wit->path};
2338             my $sig = $wit->sigil;
2339             # Add start and end if necessary
2340             unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2341             push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2342             foreach my $idx ( 0 .. $#chain-1 ) {
2343             $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2344             }
2345             if( $wit->is_layered ) {
2346             @chain = @{$wit->uncorrected_path};
2347             unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2348             push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2349             foreach my $idx( 0 .. $#chain-1 ) {
2350             my $source = $chain[$idx];
2351             my $target = $chain[$idx+1];
2352             $self->add_path( $source, $target, $sig.$self->ac_label )
2353             unless $self->has_path( $source, $target, $sig );
2354             }
2355             }
2356             $wit->clear_path;
2357             $wit->clear_uncorrected_path;
2358             }
2359              
2360             =head2 calculate_ranks
2361              
2362             Calculate the reading ranks (that is, their aligned positions relative
2363             to each other) for the graph. This can only be called on linear collations.
2364              
2365             =begin testing
2366              
2367             use Text::Tradition;
2368              
2369             my $cxfile = 't/data/Collatex-16.xml';
2370             my $t = Text::Tradition->new(
2371             'name' => 'inline',
2372             'input' => 'CollateX',
2373             'file' => $cxfile,
2374             );
2375             my $c = $t->collation;
2376              
2377             # Make an svg
2378             my $table = $c->alignment_table;
2379             ok( $c->has_cached_table, "Alignment table was cached" );
2380             is( $c->alignment_table, $table, "Cached table returned upon second call" );
2381             $c->calculate_ranks;
2382             is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2383             $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2384             is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2385             $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2386             isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2387              
2388             =end testing
2389              
2390             =cut
2391              
2392             sub calculate_ranks {
2393             my $self = shift;
2394             # Save the existing ranks, in case we need to invalidate the cached SVG.
2395             throw( "Cannot calculate ranks on a non-linear graph" )
2396             unless $self->linear;
2397             my %existing_ranks;
2398             map { $existing_ranks{$_} = $_->rank } $self->readings;
2399              
2400             # Do the rankings based on the relationship equivalence graph, starting
2401             # with the start node.
2402             my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2403              
2404             # Transfer our rankings from the topological graph to the real one.
2405             foreach my $r ( $self->readings ) {
2406             if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2407             $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2408             } else {
2409             # Die. Find the last rank we calculated.
2410             my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2411             <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2412             $self->readings;
2413             my $last = pop @all_defined;
2414             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2415             }
2416             }
2417             # Do we need to invalidate the cached data?
2418             if( $self->has_cached_table ) {
2419             foreach my $r ( $self->readings ) {
2420             next if defined( $existing_ranks{$r} )
2421             && $existing_ranks{$r} == $r->rank;
2422             # Something has changed, so clear the cache
2423             $self->_clear_cache;
2424             # ...and recalculate the common readings.
2425             $self->calculate_common_readings();
2426             last;
2427             }
2428             }
2429             # The graph calculation information is now up to date.
2430             $self->_graphcalc_done(1);
2431             }
2432              
2433             sub _clear_cache {
2434             my $self = shift;
2435             $self->wipe_table if $self->has_cached_table;
2436             }
2437              
2438              
2439             =head2 flatten_ranks
2440              
2441             A convenience method for parsing collation data. Searches the graph for readings
2442             with the same text at the same rank, and merges any that are found.
2443              
2444             =cut
2445              
2446             sub flatten_ranks {
2447             my ( $self, %args ) = shift;
2448             my %unique_rank_rdg;
2449             my $changed;
2450             foreach my $p ( $self->identical_readings( %args ) ) {
2451             # say STDERR "Combining readings at same rank: @$p";
2452             $changed = 1;
2453             $self->merge_readings( @$p );
2454             # TODO see if this now makes a common point.
2455             }
2456             # If we merged readings, the ranks are still fine but the alignment
2457             # table is wrong. Wipe it.
2458             $self->wipe_table() if $changed;
2459             }
2460              
2461             =head2 identical_readings
2462             =head2 identical_readings( start => $startnode, end => $endnode )
2463             =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2464              
2465             Goes through the graph identifying all pairs of readings that appear to be
2466             identical, and therefore able to be merged into a single reading. Returns the
2467             relevant identical pairs. Can be restricted to run over only a part of the
2468             graph, specified either by node or by rank.
2469              
2470             =cut
2471              
2472             sub identical_readings {
2473             my ( $self, %args ) = @_;
2474             # Find where we should start and end.
2475             my $startrank = $args{startrank} || 0;
2476             if( $args{start} ) {
2477             throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2478             && $self->reading( $args{start} )->has_rank;
2479             $startrank = $self->reading( $args{start} )->rank;
2480             }
2481             my $endrank = $args{endrank} || $self->end->rank;
2482             if( $args{end} ) {
2483             throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2484             && $self->reading( $args{end} )->has_rank;
2485             $endrank = $self->reading( $args{end} )->rank;
2486             }
2487            
2488             # Make sure the ranks are correct.
2489             unless( $self->_graphcalc_done ) {
2490             $self->calculate_ranks;
2491             }
2492             # Go through the readings looking for duplicates.
2493             my %unique_rank_rdg;
2494             my @pairs;
2495             foreach my $rdg ( $self->readings ) {
2496             next unless $rdg->has_rank;
2497             my $rk = $rdg->rank;
2498             next if $rk > $endrank || $rk < $startrank;
2499             my $key = $rk . "||" . $rdg->text;
2500             if( exists $unique_rank_rdg{$key} ) {
2501             # Make sure they don't have different grammatical forms
2502             my $ur = $unique_rank_rdg{$key};
2503             if( $rdg->is_identical( $ur ) ) {
2504             push( @pairs, [ $ur, $rdg ] );
2505             }
2506             } else {
2507             $unique_rank_rdg{$key} = $rdg;
2508             }
2509             }
2510            
2511             return @pairs;
2512             }
2513            
2514              
2515             =head2 calculate_common_readings
2516              
2517             Goes through the graph identifying the readings that appear in every witness
2518             (apart from those with lacunae at that spot.) Marks them as common and returns
2519             the list.
2520              
2521             =begin testing
2522              
2523             use Text::Tradition;
2524              
2525             my $cxfile = 't/data/Collatex-16.xml';
2526             my $t = Text::Tradition->new(
2527             'name' => 'inline',
2528             'input' => 'CollateX',
2529             'file' => $cxfile,
2530             );
2531             my $c = $t->collation;
2532              
2533             my @common = $c->calculate_common_readings();
2534             is( scalar @common, 8, "Found correct number of common readings" );
2535             my @marked = sort $c->common_readings();
2536             is( scalar @common, 8, "All common readings got marked as such" );
2537             my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2538             is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2539              
2540             =end testing
2541              
2542             =cut
2543              
2544             sub calculate_common_readings {
2545             my $self = shift;
2546             my @common;
2547             map { $_->is_common( 0 ) } $self->readings;
2548             # Implicitly calls calculate_ranks
2549             my $table = $self->alignment_table;
2550             foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2551             my @row = map { $_->{'tokens'}->[$idx]
2552             ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2553             @{$table->{'alignment'}};
2554             my %hash;
2555             foreach my $r ( @row ) {
2556             if( $r ) {
2557             $hash{$r->id} = $r unless $r->is_meta;
2558             } else {
2559             $hash{'UNDEF'} = $r;
2560             }
2561             }
2562             if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2563             my( $r ) = values %hash;
2564             $r->is_common( 1 );
2565             push( @common, $r );
2566             }
2567             }
2568             return @common;
2569             }
2570              
2571             =head2 text_from_paths
2572              
2573             Calculate the text array for all witnesses from the path, for later consistency
2574             checking. Only to be used if there is no non-graph-based way to know the
2575             original texts.
2576              
2577             =cut
2578              
2579             sub text_from_paths {
2580             my $self = shift;
2581             foreach my $wit ( $self->tradition->witnesses ) {
2582             my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2583             my @text;
2584             foreach my $r ( @readings ) {
2585             next if $r->is_meta;
2586             push( @text, $r->text );
2587             }
2588             $wit->text( \@text );
2589             if( $wit->is_layered ) {
2590             my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2591             $wit->sigil.$self->ac_label );
2592             my @uctext;
2593             foreach my $r ( @ucrdgs ) {
2594             next if $r->is_meta;
2595             push( @uctext, $r->text );
2596             }
2597             $wit->layertext( \@uctext );
2598             }
2599             }
2600             }
2601              
2602             =head1 UTILITY FUNCTIONS
2603              
2604             =head2 common_predecessor( $reading_a, $reading_b )
2605              
2606             Find the last reading that occurs in sequence before both the given readings.
2607             At the very least this should be $self->start.
2608              
2609             =head2 common_successor( $reading_a, $reading_b )
2610              
2611             Find the first reading that occurs in sequence after both the given readings.
2612             At the very least this should be $self->end.
2613            
2614             =begin testing
2615              
2616             use Text::Tradition;
2617              
2618             my $cxfile = 't/data/Collatex-16.xml';
2619             my $t = Text::Tradition->new(
2620             'name' => 'inline',
2621             'input' => 'CollateX',
2622             'file' => $cxfile,
2623             );
2624             my $c = $t->collation;
2625              
2626             is( $c->common_predecessor( 'n24', 'n23' )->id,
2627             'n20', "Found correct common predecessor" );
2628             is( $c->common_successor( 'n24', 'n23' )->id,
2629             '__END__', "Found correct common successor" );
2630              
2631             is( $c->common_predecessor( 'n19', 'n17' )->id,
2632             'n16', "Found correct common predecessor for readings on same path" );
2633             is( $c->common_successor( 'n21', 'n10' )->id,
2634             '__END__', "Found correct common successor for readings on same path" );
2635              
2636             =end testing
2637              
2638             =cut
2639              
2640             ## Return the closest reading that is a predecessor of both the given readings.
2641             sub common_predecessor {
2642             my $self = shift;
2643             my( $r1, $r2 ) = $self->_objectify_args( @_ );
2644             return $self->_common_in_path( $r1, $r2, 'predecessors' );
2645             }
2646              
2647             sub common_successor {
2648             my $self = shift;
2649             my( $r1, $r2 ) = $self->_objectify_args( @_ );
2650             return $self->_common_in_path( $r1, $r2, 'successors' );
2651             }
2652              
2653              
2654             # TODO think about how to do this without ranks...
2655             sub _common_in_path {
2656             my( $self, $r1, $r2, $dir ) = @_;
2657             my $iter = $self->end->rank;
2658             my @candidates;
2659             my @last_r1 = ( $r1 );
2660             my @last_r2 = ( $r2 );
2661             # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2662             my %all_seen;
2663             # say STDERR "Finding common $dir for $r1, $r2";
2664             while( !@candidates ) {
2665             last unless $iter--; # Avoid looping infinitely
2666             # Iterate separately down the graph from r1 and r2
2667             my( @new_lc1, @new_lc2 );
2668             foreach my $lc ( @last_r1 ) {
2669             foreach my $p ( $lc->$dir ) {
2670             if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2671             # say STDERR "Path candidate $p from $lc";
2672             push( @candidates, $p );
2673             } elsif( !$all_seen{$p->id} ) {
2674             $all_seen{$p->id} = 'r1';
2675             push( @new_lc1, $p );
2676             }
2677             }
2678             }
2679             foreach my $lc ( @last_r2 ) {
2680             foreach my $p ( $lc->$dir ) {
2681             if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2682             # say STDERR "Path candidate $p from $lc";
2683             push( @candidates, $p );
2684             } elsif( !$all_seen{$p->id} ) {
2685             $all_seen{$p->id} = 'r2';
2686             push( @new_lc2, $p );
2687             }
2688             }
2689             }
2690             @last_r1 = @new_lc1;
2691             @last_r2 = @new_lc2;
2692             }
2693             my @answer = sort { $a->rank <=> $b->rank } @candidates;
2694             return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2695             }
2696              
2697             sub throw {
2698             Text::Tradition::Error->throw(
2699             'ident' => 'Collation error',
2700             'message' => $_[0],
2701             );
2702             }
2703              
2704             no Moose;
2705             __PACKAGE__->meta->make_immutable;
2706              
2707             =head1 BUGS/TODO
2708              
2709             =over
2710              
2711             =item * Rework XML serialization in a more modular way
2712              
2713             =back
2714              
2715             =head1 LICENSE
2716              
2717             This package is free software and is provided "as is" without express
2718             or implied warranty. You can redistribute it and/or modify it under
2719             the same terms as Perl itself.
2720              
2721             =head1 AUTHOR
2722              
2723             Tara L Andrews E<lt>aurum@cpan.orgE<gt>