File Coverage

blib/lib/Math/DyckWords.pm

Criterion	Covered	Total	%
statement	117	119	98.3
branch	31	34	91.1
condition	31	51	60.7
subroutine	16	16	100.0
pod	8	9	88.8
total	203	229	88.6

line	stmt	bran	cond	sub	pod	time	code
1							#######################################################################
2							# $Id: DyckWords.pm,v 1.2 2010/04/14 03:41:06 mmertel Exp $
3
4							=head1 NAME
5
6							Math::DyckWords - Perl module for generating Dyck words. Dyck words
7							are named after the mathematician Walther von Dyck.
8
9							=head1 SYNOPSIS
10
11							use Math::DyckWords qw( dyck_words_by_lex
12							dyck_words_by_position
13							dyck_words_by_swap
14							ranking
15							unranking
16							catalan_number );
17
18							@words = dyck_words_by_lex( 4 );
19							@words = dyck_words_by_position( 4 );
20							@words = dyck_words_by_swap( 4 );
21							$rank = ranking( '01010101' );
22							$word = unranking( 3, 2 );
23
24							=head1 DESCRIPTION
25
26							Dyck words are even numbered string of X's and Y's, or 0's and 1's,
27							or any other binary alphabet for that matter, such that no initial
28							segment has more Y's or 1's. The following are the Dyck words of
29							length 2n where n = 3:
30
31							000111
32							010011
33							010101
34							001101
35							001011
36
37							Another common use of Dyck words is in dealing with the balanced
38							parenthesis problem. Substituting the left and right parentheses
39							for the 0's an 1's listed above we have:
40
41							((()))
42							()(())
43							()()()
44							(())()
45							(()())
46
47							There is also a relationship between Dyck words and Catalan numbers.
48							Catalan numbers have many applications in combinatorics and consists
49							of a sequence of ever increasing integers following the formula:
50
51							(2n)!/(n!(n+1)!)
52
53							The first few numbers in the Catalan sequence are:
54
55							1, 1, 2, 5, 14, 132, 429, 1430, 4862, 16796, 58786, 208012
56
57							The relationship between Dyck words and the Catalan sequence can
58							be easily seen as the nth Catalan number is equal to the number of
59							permutations, or unique Dyck words of length 2n. For example,
60							the 3rd Catalan number, using a zero index, is 5. This is the same
61							number of Dyck words of length 2n where n = 3.
62
63							The algorithms in this module are based on those presented in the
64							scholarly paper "Generating and ranking of Dyck words" by Zoltan Kasa
65							available on-line at http://arxiv4.library.cornell.edu/pdf/1002.2625,
66							and the provide three different Dyck word generators - lexigraphical,
67							positional, and one that generates Dyck words by swapping characters.
68
69							=head1 EXPORT
70
71							None by default.
72
73							=cut
74
75							package Math::DyckWords;
76
77	1			1		30798	use 5.006;
	1					4
	1					44
78	1			1		7	use strict;
	1					1
	1					36
79	1			1		6	use warnings;
	1					7
	1					43
80
81	1			1		5	use Carp;
	1					2
	1					118
82	1			1		1834	use Data::Dumper;
	1					13282
	1					86
83	1			1		4816	use Math::BigInt;
	1					26829
	1					7
84	1			1		22058	use Exporter;
	1					3
	1					1896
85
86							our $VERSION = '0.03';
87							our @ISA = qw( Exporter );
88							our @EXPORT_OK = qw( dyck_words_by_lex
89							dyck_words_by_position
90							dyck_words_by_swap
91							ranking
92							unranking
93							catalan_number );
94
95							=head1 FUNCTIONS
96
97							=over
98
99							=item dyck_words_by_lex( $n )
100
101							This algorithm returns a list of all Dyck words of length 2n in ascending
102							lexicographic order, i.e. 000111, 001011, 001101, 010011, 010101
103
104							=back
105
106							=cut
107
108							my @words;
109
110							sub dyck_words_by_lex {
111	195			195	1	292	my ( $n, $X, $i, $n0, $n1 ) = @_;
112
113							# initialization - the first time called, the only argument
114							# is the length 2n of the words
115	195	100				360	if( not defined $X ) {
116	1					3	( $X, $i, $n0, $n1 ) = ( '0', 1, 1, 0 );
117	1					3	@words = ();
118							}
119
120							# Case 1: We can continue by adding 0 and 1.
121	195	100	66			689	if( $n0 < $n && $n1 < $n && $n0 > $n1 ) {
			100
122	41					132	dyck_words_by_lex( $n, $X . '0', $i++, $n0 + 1, $n1 );
123	41					120	dyck_words_by_lex( $n, $X . '1', $i++, $n0, $n1 + 1 );
124							}
125
126							# Case 2: We can continue by adding 0 only.
127	195	100	66			1107	if( ( $n0 < $n && $n1 < $n && $n0 == $n1 ) \|\|
			100
			66
			66
128							( $n0 < $n && $n1 == $n ) )
129							{
130	22					58	dyck_words_by_lex( $n, $X . '0', $i++, $n0 + 1, $n1 );
131							}
132
133							# Case 3: We can continue by adding 1 only.
134	195	100	100			674	if( $n0 == $n && $n1 < $n ) {
135	90					259	dyck_words_by_lex( $n, $X . '1', $i++, $n0, $n1 + 1 );
136							}
137
138							# Case 5: A Dyck word is obtained.
139	195	100	100			708	if( $n0 == $n && $n1 == $n ) {
140	42					72	push @words, $X;
141							}
142
143							# All Dyck words have been obtained
144	195					259	return @words;
145							}
146
147							=over
148
149							=item dyck_words_by_position( $n )
150
151							This algorithm returns a list of all Dyck words of length 2n in descending
152							lexicographic order, i.e. 010101, 010011, 001101, 001011, 000111.
153
154							=back
155
156							=cut
157
158							sub dyck_words_by_position {
159	1			1	1	382	my $n = shift;
160
161							# reset the return list
162	1					5	@words = ();
163
164							# generate the maximum Dyck word of length n - which has 1s in all
165							# even numbered positions, i.e. 2468 = 01010101
166	1					3	my @b = map { $_ * 2 } ( 1 .. $n );
	5					10
167
168							# set a flag
169	1					3	my $found = 1;
170
171	1					5	while( $found ) {
172							# save the Dyck word to the return list
173	42					633	push @words, translate_positions( @b );
174
175							# reset flag
176	42					54	$found = 0;
177
178							# reverse iterate through the length of the word
179							# setting the appropriate bits to 1's or 0's
180	42					94	for( my $i = $n - 1; $i >= 1; $i-- ) {
181	89	100				210	if( $b[ $i - 1 ] < $n + $i ) {
182	41					43	$b[ $i - 1 ] += 1;
183
184	41					88	for( my $j = $i + 1; $j <= $n - 1; $j++ ) {
185	44	100				135	$b[ $j - 1 ] = $b[ $j - 2 ] + 1 > $j * 2
186							? $b[ $j - 2 ] + 1
187							: $j * 2;
188							}
189	41					38	$found = 1;
190	41					86	last;
191							}
192							}
193							}
194	1					26	return @words;
195							}
196
197							=over
198
199							=item translate_positions( @p )
200
201							This function translates an array of integer values indicating
202							the position of 1's in the resultant Dyck word, and is called by
203							the dyck_words_by_position function.
204
205							=back
206
207							=cut
208
209							sub translate_positions( @ ) {
210	43			43	1	54	my $n = scalar @_;
211
212							# convert the list of positions to a hash for easier lookup
213	43					42	my %position;
214	43					193	@position{ @_ } = @_;
215
216	43					56	my $word;
217	43					96	for( my $i = 0; $i < $n * 2; $i++ ) {
218	430	100				1084	$word .= exists $position{ $i + 1 } ? '1' : '0';
219							}
220	43					140	return $word;
221							}
222
223							=over
224
225							=item dyck_words_by_position( $n )
226
227							This algorithm returns a list of all Dyck words of length 2n in no
228							particular order, i.e. 010101, 001101, 001011, 000111, 010011. This
229							is done by changing the first occurrence of '10' to '01'.
230
231							=back
232
233							=cut
234
235							sub dyck_words_by_swap {
236	42			42	0	388	my ( $n, $X, $k ) = @_;
237
238	42	100				84	if( not defined $X ) {
239	1					5	$X = join '', ( '01' x $n );
240	1					2	$k = 0;
241	1					5	@words = ( $X );
242							}
243
244	42					50	my $i = $k;
245
246	42					94	while( $i < $n * 2 ) {
247	83					153	my $j = index( $X, '10', $i );
248	83	100				139	if( $j > 0 ) {
249	41					204	my @Y = split //, $X;
250							# swap
251	41					125	( $Y[ $j ], $Y[ $j + 1 ] ) =
252							( $Y[ $j + 1 ], $Y[ $j ] );
253	41					89	my $Y = join '', @Y;
254	41					62	push @words, $Y;
255	41					94	dyck_words_by_swap( $n, $Y, $j - 1 );
256	41					148	$i = $j + 2;
257							}
258							else {
259	42					80	return @words;
260							}
261							}
262							}
263
264							=over
265
266							=item monotonic_path_count( $n, $i, $j )
267
268							Ranking Dyck words means to determine the position of a Dyck
269							word in a given ordered sequence of all Dyck words.
270							For ranking these words we will use the following function,
271							where f(n,i,j) represents the number of paths between (0,0)
272							and (i,j) not crossing the diagonal x = y of the grid.
273
274							=back
275
276							=cut
277
278							sub monotonic_path_count {
279	141			141	1	151	my ( $n, $i, $j ) = @_;
280
281	141	100	33			861	if( $n >= $i and $i >= 0 and $j == 0 ) {
			66
282	58					163	return 1;
283							}
284	83	100	66			381	if( $n >= $i and $i > $j and $j >= 1 ) {
			66
285	47					101	return monotonic_path_count( $n, $i - 1, $j ) +
286							monotonic_path_count( $n, $i, $j - 1 );
287							}
288	36	50	33			181	if( $n >= $i and $i >= 1 and $j == $i ) {
			33
289	36					73	return monotonic_path_count( $n, $i, $i - 1 );
290							}
291	0	0	0			0	if( $n >= $j and $j > $i and $i >= 0 ) {
			0
292	0					0	return 0;
293							}
294							}
295
296							=over
297
298							=item positions( $w )
299
300							This function converts a Dyck word string of 1's and 0's into a list
301							of positions where the 1's are located, i.e. 2468 => 01010101
302
303							=back
304
305							=cut
306
307							sub positions( $ ) {
308	1			1	1	3	my $w = shift;
309
310	1					5	my ( $i, @p ) = ( 1, () );
311	1					10	foreach my $p ( split //, $w ) {
312	10	100				24	if( $p == 1 ) {
313	5					10	push @p, $i;
314							}
315	10					16	$i++;
316							}
317	1					6	return @p;
318							}
319
320							=over
321
322							=item ranking( $w )
323
324							This function returns the rank of an individual Dyck word $w in the
325							list of all Dyck words of the same length.
326
327							=back
328
329							=cut
330
331							sub ranking( $ ) {
332	1			1	1	1037	my @b = positions( shift );
333
334	1					3	my @c = ( 2 );
335	1					13	my $n = scalar @b;
336	1					6	for( my $j = 2; $j <= $n; $j++ ) {
337	4	100				21	$c[ $j - 1 ] = $b[ $j - 2 ] + 1 > $j * 2
338							? $b[ $j - 2 ] + 1
339							: $j * 2;
340							}
341	1					2	my $nr = 1;
342	1					5	for( my $i = 1; $i <= $n - 1; $i++ ) {
343	4					209	for( my $j = $c[ $i - 1]; $j <= $b[ $i - 1] - 1; $j++ ) {
344	3					9	$nr = $nr + monotonic_path_count( $n, $n - $i, $n + $i - $j );
345							}
346							}
347	1					4	return $nr;
348							}
349
350							=over
351
352							=item unranking( $n, $r )
353
354							This function returns the rank $r Dyck word of length $n.
355
356							=back
357
358							=cut
359
360							sub unranking( $$ ) {
361	1			1	1	361	my ( $n, $nr ) = @_;
362
363							# initialize the dyck word to all '0'
364	1					6	my @b = ( '0' x ( $n * 2 ) );
365
366	1					2	$nr--;
367
368	1					6	for( my $i = 1; $i <= $n; $i++ ) {
369	5	100				18	$b[ $i ] = $b[ $i - 1 ] + 1 > $i * 2
370							? $b[ $i - 1 ] + 1
371							: $i * 2;
372
373	5					9	my $j = $n + $i - $b[ $i ];
374	5					11	my $np = monotonic_path_count( $n, $n - $i, $j );
375
376	5		66			26	while( $nr >= $np && ( $b[ $i ] < $n + $i ) ) {
377	3					4	$nr = $nr - $np;
378	3					4	$b[ $i ] = $b[ $i ] + 1;
379	3					5	$j = $j - 1;
380	3					6	$np = monotonic_path_count( $n, $n - $i, $j );
381							}
382							}
383							# discard the zeroth element of the list of positions
384	1					3	shift @b;
385
386	1					5	return translate_positions( @b );
387							}
388
389							=over
390
391							=item catalan_number( $n )
392
393							Using the formula - (2n)!/(n!(n+1)!) - this function returns the
394							corresponding number $n from the Catalan sequence.
395
396							=back
397
398							=cut
399
400							sub catalan_number( $ ) {
401	1			1	1	261	my $x = shift;
402
403	1					13	my $X = Math::BigInt->new( $x );
404	1					110	my $Y = $X->copy;
405	1					21	my $Z = $X->copy;
406
407	1					16	return $X->bmul( 2 )->bfac->bdiv(
408							$Y->bfac->bmul( $Z->badd( 1 )->bfac )
409							);
410							}
411
412							1;