File Coverage

blib/lib/LaTeX/ToUnicode.pm
Criterion Covered Total %
statement 124 179 69.2
branch 19 50 38.0
condition 0 3 0.0
subroutine 17 20 85.0
pod 2 3 66.6
total 162 255 63.5


line stmt bran cond sub pod time code
1 1     1   114541 use strict;
  1         3  
  1         49  
2 1     1   6 use warnings;
  1         2  
  1         105  
3             package LaTeX::ToUnicode;
4             BEGIN {
5 1     1   102 $LaTeX::ToUnicode::VERSION = '1.93';
6             }
7             #ABSTRACT: Convert LaTeX commands to Unicode (simplistically)
8              
9             require Exporter;
10             our @ISA = qw(Exporter);
11             our @EXPORT_OK = qw( convert debuglevel $endcw );
12              
13 1     1   8 use utf8;
  1         2  
  1         6  
14 1     1   665 use Encode;
  1         22674  
  1         121  
15 1     1   791 use LaTeX::ToUnicode::Tables;
  1         5  
  1         4442  
16              
17             # Terminating a control word (not symbol) the way TeX does: at the
18             # boundary between a letter (lookbehind) and a nonletter (lookahead),
19             # and then ignore any following whitespace.
20             our $endcw = qr/(?<=[a-zA-Z])(?=[^a-zA-Z]|$)\s*/;
21              
22             # Debugging output on or off; it's pretty random what gets output.
23             # Add more as needed. There is also more debugging output as warn
24             # statements, commented out, too voluminous to enable here.
25             my $debug = 0;
26              
27 0     0 1 0 sub debuglevel { $debug = shift; }
28             sub _debug {
29 510 50   510   1252 return unless $debug;
30             # The backtrace info is split between caller(0) and caller(1), sigh.
31             # We don't need the package name, it's included in $subr in practice.
32 0         0 my (undef,$filename,$line,undef) = caller(0);
33 0         0 my (undef,undef,undef,$subr) = caller(1);
34 0         0 warn @_, " at $filename:$line ($subr)\n";
35             }
36              
37             # The main conversion function.
38             #
39             sub convert {
40 102     102 1 237874 my ($string, %options) = @_;
41             #warn debug_hash_as_string("starting with: $string", %options);
42              
43             # First, remove leading and trailing horizontal whitespace
44             # on each line of the possibly-multiline string we're given.
45 102         570 $string =~ s/^[ \t]*//m;
46 102         642 $string =~ s/[ \t]*$//m;
47            
48             # For HTML output, must convert special characters that were in the
49             # TeX text (&<>) to their entities to avoid misparsing. We want to
50             # do this first, because conversion of the markup commands might
51             # output HTML tags like , and we don't want to convert those <>.
52             # Although <tt> works, better to keep the output HTML as
53             # human-readable as we can.
54             #
55 102 50       519 if ($options{html}) {
56 0         0 $string =~ s/([^\\]|^)&/$1&/g;
57 0         0 $string =~ s/
58 0         0 $string =~ s/>/>/g;
59             }
60            
61 102         207 my $user_hook = $options{hook};
62 102 50       349 if ($user_hook) {
63 0         0 _debug("before user hook: $string");
64 0         0 $string = &$user_hook($string, \%options);
65 0         0 _debug("after user hook: $string");
66             }
67            
68             # Convert general commands that take arguments, since (1) they might
69             # insert TeX commands that need to be converted, and (2) because
70             # their arguments could well contain constructs that will map to a
71             # Perl string \x{nnnn} for Unicode character nnnn; those Perl braces
72             # for the \x will confuse further parsing of the TeX.
73             #
74 102         285 $string = _convert_commands_with_arg($string);
75 102         401 _debug("after commands with arg: $string");
76            
77             # Convert markups (\texttt, etc.); they have the same brace-parsing issue.
78 102         337 $string = _convert_markups($string, \%options);
79 102         417 _debug("after markups: $string");
80            
81             # And urls, a special case of commands with arguments.
82 102         295 $string = _convert_urls($string, \%options);
83 102         308 _debug("after urls: $string");
84              
85 102         183 $string = _convert_control_words($string);
86 102         491 _debug("after control words: $string");
87              
88 102         261 $string = _convert_control_symbols($string);
89 102         371 _debug("after control symbols: $string");
90              
91 102         265 $string = _convert_accents($string);
92 102 100       315 $string = _convert_german($string) if $options{german};
93 102         250 $string = _convert_symbols($string);
94 102         302 $string = _convert_ligatures($string);
95            
96             # Let's handle ties here, after all the other conversions, since
97             # they don't fit well with any of the tables. We don't handle TeX's
98             # other special characters: $ & # ^ _.
99             #
100             # /~, or ~ at the beginning of a line, is probably part of a url or
101             # path, not a tie. Otherwise, consider it a space, since we can't
102             # distinguish true no-break spots (Donald~E. Knuth) from ties that
103             # are only relevant to a particular line width.
104             #
105 102         262 $string =~ s,([^/])~,$1 ,g;
106            
107             # Remove kerns. Clearly needs generalizing/sharpening to recognize
108             # dimens better, and plenty of other commands could use it.
109             # Here, we only handle literal dimensions ("+1.3pt"), not dimens
110             # referring to control sequences, with or without factors
111             # ("1.1\baselineskip").
112             #_debug("before kern: $string");
113 102         423 my $dimen_re = qr/[-+]?[0-9., ]+[a-z][a-z]\s*/;
114 102         541 $string =~ s!\\kern${endcw}${dimen_re}!!g;
115            
116             # What the heck, let's do \hfuzz and \vfuzz too. They come up pretty
117             # often and it's practically the same thing (just also ignore optional =)..
118 102         394 $string =~ s!\\[hv]fuzz${endcw}=?\s*${dimen_re}!!g;
119              
120             # And here is \penalty. natbib outputs \penalty0 sometimes.
121             # Similar with $dimen_re, we only handle literal and decimal
122             # integers here, not things like "0 or `A.
123 102         233 my $number_re = qr/[-+]?[0-9]+\s*/;
124 102         380 $string =~ s!\\penalty${endcw}\s*${number_re}!!g;
125              
126             # After all the conversions, $string contains \x{....} constructs
127             # (Perl Unicode characters) where translations have happened. Change
128             # those to the desired output format. Thus we assume that the
129             # Unicode \x{....}'s are not themselves involved in further
130             # translations, which is, so far, true.
131             #
132 102 50       281 if (! $options{entities}) {
    0          
133             # Convert our \x strings from Tables.pm to the binary characters.
134            
135             # As an extra-special case, we want to preserve the translation of
136             # \{ and \} as 007[bd] entities even if the --entities option is
137             # not give; otherwise they'd get eliminated like all other braces.
138             # Use a temporary cs \xx to keep them marked, and don't use braces
139             # to delimit the argument since they'll get deleted.
140 102         239 $string =~ s/\\x\{(007[bd])\}/\\xx($1)/g;
141            
142             # Convert all other characters to characters.
143             # Assume exactly four hex digits, since we wrote Tables.pm that way.
144 102         585 $string =~ s/\\x\{(....)\}/ pack('U*', hex($1))/eg;
  85         879  
145              
146             } elsif ($options{entities}) {
147             # Convert the XML special characters that appeared in the input,
148             # e.g., from a TeX \&. Unless we're generating HTML output, in
149             # which case they have already been converted.
150 0 0       0 if (! $options{html}) {
151 0         0 $string =~ s/&/&/g;
152 0         0 $string =~ s/
153 0         0 $string =~ s/>/>/g;
154             }
155            
156             # Our values in Tables.pm are simple ASCII strings \x{....},
157             # so we can replace them with hex entities with no trouble.
158             # Fortunately TeX does not have a standard \x control sequence.
159 0         0 $string =~ s/\\x\{(....)\}/&#x$1;/g;
160            
161             # The rest of the job is about binary Unicode characters in the
162             # input. We want to transform them into entities also. As always
163             # in Perl, there's more than one way to do it, and several are
164             # described here, just for the fun of it.
165 0         0 my $ret = "";
166             #
167             # decode_utf8 is described in https://perldoc.perl.org/Encode.
168             # Without the decode_utf8, all of these methods output each byte
169             # separately; apparently $string is a byte string at this point,
170             # not a Unicode string. I don't know why that is.
171 0         0 $ret = decode_utf8($string);
172             #
173             # Transform everything that's not printable ASCII or newline into
174             # entities.
175 0         0 $ret =~ s/([^ -~\n])/ sprintf("&#x%04x;", ord($1)) /eg;
  0         0  
176             #
177             # This method leaves control characters as literal; doesn't matter
178             # for XML output, since control characters aren't allowed, but
179             # let's use the regexp method anyway.
180             #$ret = encode("ascii", decode_utf8($string), Encode::FB_XMLCREF);
181             #
182             # The nice_string function from perluniintro also works.
183             #
184             # This fails, just outputs numbers (that is, ord values):
185             # foreach my $c (unpack("U*", $ret)) {
186             #
187             # Without the decode_utf8, outputs each byte separately.
188             # With the decode_utf8, works, but the above seems cleaner.
189             #foreach my $c (split(//, $ret)) {
190             # if (ord($c) <= 31 || ord($c) >= 128) {
191             # $ret .= sprintf("&#x%04x;", ord($c));
192             # } else {
193             # $ret .= $c;
194             # }
195             #}
196             #
197 0         0 $string = $ret; # assigned from above.
198             }
199              
200 102 50       339 if ($string =~ /\\x\{/) {
201 0         0 warn "LaTeX::ToUnicode::convert: untranslated \\x remains: $string\n";
202 0         0 warn "LaTeX::ToUnicode::convert: please report as bug.\n";
203             }
204            
205             # Drop all remaining braces.
206 102         374 $string =~ s/[{}]//g;
207            
208 102 50       276 if (! $options{entities}) {
209             # With all the other braces gone, now we can convert the preserved
210             # brace entities from \{ and \} to actual braces.
211 102         185 $string =~ s/\\xx\((007[bd])\)/ pack('U*', hex($1))/eg;
  2         8  
212             }
213              
214             # Backslashes might remain. Don't remove them, as it makes for a
215             # useful way to find unhandled commands.
216              
217             # leave newlines alone, but trim spaces and tabs.
218 102         313 $string =~ s/^[ \t]+//s; # remove leading whitespace
219 102         293 $string =~ s/[ \t]+$//s; # remove trailing whitespace
220 102         190 $string =~ s/[ \t]+/ /gs; # collapse all remaining whitespace to one space
221            
222 102         988 $string;
223             }
224              
225             # Convert commands that take a single braced argument. The table
226             # defines text we're supposed to insert before and after the argument.
227             # We let future processing handle conversion of both the inserted text
228             # and the argument.
229             #
230             sub _convert_commands_with_arg {
231 102     102   242 my $string = shift;
232              
233 102         427 foreach my $cmd ( keys %LaTeX::ToUnicode::Tables::ARGUMENT_COMMANDS ) {
234 408         828 my $repl = $LaTeX::ToUnicode::Tables::ARGUMENT_COMMANDS{$cmd};
235 408         631 my $lft = $repl->[0]; # ref to two-element list
236 408         610 my $rht = $repl->[1];
237             # \cmd{foo} -> LFT foo RHT
238 408         14902 $string =~ s/\\$cmd${endcw}\{(.*?)\}/$lft$1$rht/g;
239             #warn "replaced arg $cmd, yielding $string\n";
240             }
241            
242 102         402 $string;
243             }
244              
245             # Convert url commands in STRING. This is a special case of commands
246             # with arguments: \url{u} and \href{u}{desc text}. The HTML output
247             # (generated if $OPTIONS{html} is set) is just too special to be handled
248             # in a table; further, \href is the only two-argument command we are
249             # currently handling.
250             #
251             sub _convert_urls {
252 102     102   241 my ($string,$options) = @_;
253              
254 102 50       290 if ($options->{html}) {
255             # HTML output.
256             # \url{URL} -> URL
257 0         0 $string =~ s,\\url$endcw\{([^}]*)\}
258             ,$1,gx;
259             #
260             # \href{URL}{TEXT} -> TEXT
261 0         0 $string =~ s,\\href$endcw\{([^}]*)\}\s*\{([^}]*)\}
262             ,$2,gx;
263              
264             } else {
265             # plain text output.
266             # \url{URL} -> URL
267 102         487 $string =~ s/\\url$endcw\{([^}]*)\}/$1/g;
268             #
269             # \href{URL}{TEXT} -> TEXT (URL)
270             # but, as a special case, if URL ends with TEXT, just output URL,
271             # as in:
272             # \href{https://doi.org/10/fjzzc8}{10/fjzzc8}
273             # ->
274             # https://doi.org/10/fjzzc8
275             #
276             # Yet more specialness: the TEXT might have extra braces, as in
277             # \href{https://doi.org/10/fjzzc8}{{10/fjzzc8}}
278             # left over from previous markup commands (\path) which got
279             # removed. We want to accept and ignore such extra braces,
280             # hence the \{+ ... \}+ in recognizing TEXT.
281             #
282             #warn "txt url: starting with $string\n";
283 102 50       522 if ($string =~ m/\\href$endcw\{([^}]*)\}\s*\{+([^}]*)\}+/) {
284 0         0 my $url = $1;
285 0         0 my $text = $2;
286             #warn " url: $url\n";
287             #warn " text: $text\n";
288 0 0       0 my $repl = ($url =~ m!$text$!) ? $url : "$text ($url)";
289             #warn " repl: $repl\n";
290 0         0 $string =~ s/\\href$endcw\{([^}]*)\}\s*\{+([^}]*)\}+/$repl/;
291             #warn " result: $string\n";
292             }
293             }
294            
295 102         193 $string;
296             }
297              
298             # Convert control words (not symbols), that is, a backslash and an
299             # alphabetic sequence of characters terminated by a non-alphabetic
300             # character. Following whitespace is ignored.
301             #
302             sub _convert_control_words {
303 102     102   189 my $string = shift;
304              
305 102         1074 foreach my $command ( keys %LaTeX::ToUnicode::Tables::CONTROL_WORDS ) {
306 7242         13895 my $repl = $LaTeX::ToUnicode::Tables::CONTROL_WORDS{$command};
307             # replace {\CMD}, whitespace ignored after \CMD.
308 7242         189345 $string =~ s/\{\\$command$endcw\}/$repl/g;
309            
310             # replace \CMD, preceded by not-consumed non-backslash.
311 7242         181829 $string =~ s/(?<=[^\\])\\$command$endcw/$repl/g;
312            
313             # replace \CMD at beginning of whole string, which otherwise
314             # wouldn't be matched. Two separate regexps to avoid
315             # variable-length lookbehind.
316 7242         173578 $string =~ s/^\\$command$endcw/$repl/g;
317             }
318              
319 102         941 $string;
320             }
321              
322             # Convert control symbols, other than accents. Much simpler than
323             # control words, since are self-delimiting, don't take arguments, and
324             # don't consume any following text.
325             #
326             sub _convert_control_symbols {
327 102     102   204 my $string = shift;
328              
329 102         870 foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::CONTROL_SYMBOLS ) {
330 2754         4357 my $repl = $LaTeX::ToUnicode::Tables::CONTROL_SYMBOLS{$symbol};
331              
332             # because these are not alphabetic, we can quotemeta them,
333             # and we need to because "\" is one of the symbols.
334 2754         3471 my $rx = quotemeta($symbol);
335            
336             # the preceding character must not be a backslash, else "\\ "
337             # could have the "\ " seen first as a control space, leaving
338             # a spurious \ behind. Don't consume the preceding.
339             # Or it could be at the beginning of a line.
340             #
341 2754         32551 $string =~ s/(^|(?<=[^\\]))\\$rx/$repl/g;
342             #warn "after sym $symbol (\\$rx -> $repl), have: $string\n";
343             }
344              
345 102         1149 $string;
346             }
347              
348             # Convert accents.
349             #
350             sub _convert_accents {
351 102     102   168 my $string = shift;
352            
353             # first the non-alphabetic accent commands, like \".
354 102         808 my %tbl = %LaTeX::ToUnicode::Tables::ACCENT_SYMBOLS;
355 102 100       489 $string =~ s/(\{\\(.)\s*\{(\\?\w{1,2})\}\})/$tbl{$2}{$3} || $1/eg; #{\"{a}}
  29         321  
356 102 100       719 $string =~ s/(\{\\(.)\s*(\\?\w{1,2})\})/ $tbl{$2}{$3} || $1/eg; # {\"a}
  47         519  
357 102 50       449 $string =~ s/(\\(.)\s*(\\?\w{1,1}))/ $tbl{$2}{$3} || $1/eg; # \"a
  6         40  
358 102 100       407 $string =~ s/(\\(.)\s*\{(\\?\w{1,2})\})/ $tbl{$2}{$3} || $1/eg; # \"{a}
  20         125  
359            
360             # second the alphabetic commands, like \c. They have to be handled
361             # differently because \cc is not \c{c}! The only difference in the
362             # regular expressions is using $endcw instead of just \s*.
363             #
364 102         804 %tbl = %LaTeX::ToUnicode::Tables::ACCENT_LETTERS;
365 102 50       732 $string =~ s/(\{\\(.)$endcw\{(\\?\w{1,2})\}\})/$tbl{$2}{$3} || $1/eg; #{\"{a}}
  19         106  
366 102 0       508 $string =~ s/(\{\\(.)$endcw(\\?\w{1,2})\})/ $tbl{$2}{$3} || $1/eg; # {\"a}
  0         0  
367 102 0       608 $string =~ s/(\\(.)$endcw(\\?\w{1,1}))/ $tbl{$2}{$3} || $1/eg; # \"a
  0         0  
368 102 0       548 $string =~ s/(\\(.)$endcw\{(\\?\w{1,2})\})/ $tbl{$2}{$3} || $1/eg; # \"{a}
  0         0  
369            
370            
371             # The argument is just one \w character for the \"a case, not two,
372             # because otherwise we might consume a following character that is
373             # not part of the accent, e.g., a backslash (\"a\'e).
374             #
375             # Others can be two because of the \t tie-after accent. Even {\t oo} is ok.
376             #
377             # Allow whitespace after the \CMD in all cases, e.g., "\c c". Even
378             # for the control symbols, it turns out spaces are ignored there
379             # (as in \" o), unlike the usual syntax.
380             #
381             # Some non-word constituents would work, but in practice we hope
382             # everyone just uses letters.
383              
384 102         360 $string;
385             }
386              
387             # For the [n]german package.
388             sub _convert_german {
389 3     3   6 my $string = shift;
390              
391 3         43 foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::GERMAN ) {
392 87         345 $string =~ s/\Q$symbol\E/$LaTeX::ToUnicode::Tables::GERMAN{$symbol}/g;
393             }
394 3         9 $string;
395             }
396              
397             # Control words that produce printed symbols (and letters in languages
398             # other than English), that is.
399             #
400             sub _convert_symbols {
401 102     102   163 my $string = shift;
402              
403 102         597 foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::SYMBOLS ) {
404 2652         4691 my $repl = $LaTeX::ToUnicode::Tables::SYMBOLS{$symbol};
405             # preceded by a (non-consumed) non-backslash,
406             # usual termination for a control word.
407             # These commands don't take arguments.
408 2652         67544 $string =~ s/(?<=[^\\])\\$symbol$endcw/$repl/g;
409            
410             # or the beginning of the whole string:
411 2652         64120 $string =~ s/^\\$symbol$endcw/$repl/g;
412             }
413 102         443 $string;
414             }
415              
416             # Special character sequences, not \commands. They aren't all
417             # technically ligatures, but no matter.
418             #
419             sub _convert_ligatures {
420 102     102   273 my $string = shift;
421              
422             # have to convert these in order specified.
423 102         571 my @ligs = @LaTeX::ToUnicode::Tables::LIGATURES;
424 102         398 for (my $i = 0; $i < @ligs; $i+=2) {
425 816         1143 my $in = $ligs[$i];
426 816         1096 my $out = $ligs[$i+1];
427 816         5895 $string =~ s/\Q$in\E/$out/g;
428             }
429 102         328 $string;
430             }
431              
432             #
433             # Convert LaTeX markup commands in STRING like \textbf{...} and
434             # {\bfshape ...} and {\bf ...}.
435             #
436             # If we're aiming for plain text output, they are just cleared away (the
437             # braces are not removed).
438             #
439             # If we're generating HTML output ("html" key is set in $OPTIONS hash
440             # ref), we use the value in the hash, so that \textbf{foo} becomes
441             # foo. Nested markup doesn't work.
442             #
443             sub _convert_markups {
444 102     102   285 my ($string, $options) = @_;
445            
446             # HTML is different.
447 102 50       294 return _convert_markups_html($string) if $options->{html};
448            
449             # Not HTML, so here we'll "convert" to plain text by removing the
450             # markup commands.
451              
452             # we can do all the markup commands at once.
453 102         718 my $markups = join('|', keys %LaTeX::ToUnicode::Tables::MARKUPS);
454            
455             #warn "_convert_markups: markups = $markups\n";
456             #warn "_convert_markups plain text: starting with $string\n";
457             # Remove \textMARKUP{...}, leaving just the {...}
458 102         694 $string =~ s/\\text($markups)$endcw//g;
459             #warn " after \text: $string\n";
460              
461             # Similarly remove \MARKUPshape, plus remove \upshape.
462 102         440 $string =~ s/\\($markups|up)shape$endcw//g;
463             #warn " after \...shape: $string\n";
464              
465             # Remove braces and \command in: {... \MARKUP ...}
466             # where neither ... can contain braces.
467 102         525 $string =~ s/(\{[^{}]+)\\(?:$markups)$endcw([^{}]+\})/$1$2/g;
468             #warn " after ...\\markup...: $string\n";
469              
470             # Remove braces and \command in: {\MARKUP ...}
471 102         863 $string =~ s/\{\\(?:$markups)$endcw([^{}]*)\}/$1/g;
472             #warn " after {\\markup...}: $string\n";
473              
474             # Remove: {\MARKUP
475             # Although this will leave unmatched } chars behind, there's no
476             # alternative without full parsing, since the bib entry will often
477             # look like: {\em {The TeX{}book}}. Also might, in principle, be
478             # at the end of a line.
479 102         529 $string =~ s/\{\\(?:$markups)$endcw//g;
480             #warn " after {\\markup: $string\n";
481              
482             # Ultimately we remove all braces in ltx2crossrefxml SanitizeText fns,
483             # so the unmatched braces don't matter ... that code should be moved here.
484              
485 102         278 $string;
486             }
487              
488             # Convert \markup in STRING to html. We can't always figure out where to
489             # put the end tag, but we always put it somewhere. We don't even attempt
490             # to handle nested markup.
491             #
492             sub _convert_markups_html {
493 0     0     my ($string) = @_;
494            
495 0           my %MARKUPS = %LaTeX::ToUnicode::Tables::MARKUPS;
496             # have to consider each markup \command separately.
497 0           for my $markup (keys %MARKUPS) {
498 0           my $hcmd = $MARKUPS{$markup}; # some TeX commands don't translate
499 0 0         my $tag = $hcmd ? "<$hcmd>" : "";
500 0 0         my $end_tag = $hcmd ? "" : "";
501            
502             # The easy one: \textMARKUP{...}
503 0           $string =~ s/\\text$markup$endcw\{(.*?)\}/$tag$1$end_tag/g;
504              
505             # {x\MARKUP(shape) y} -> xy (leave out braces)
506 0           $string =~ s/\{([^{}]+)\\$markup(shape)?$endcw([^{}]+)\}
507             /$1$tag$3$end_tag/gx;
508              
509             # {\MARKUP(shape) y} -> y. Same as previous but without
510             # the x part. Could do it in one regex but this seems clearer.
511 0           $string =~ s/\{\\$markup(shape)?$endcw([^{}]+)\}
512             /$tag$2$end_tag/gx;
513            
514             # for {\MARKUP(shape) ... with no matching brace, we don't know
515             # where to put the end tag, so seems best to do nothing.
516             }
517            
518 0           $string;
519             }
520              
521            
522             ##############################################################
523             # debug_hash_as_string($LABEL, HASH)
524             #
525             # Return LABEL followed by HASH elements, followed by a newline, as a
526             # single string. If HASH is a reference, it is followed (but no recursive
527             # derefencing).
528             ###############################################################
529             sub debug_hash_as_string {
530 0     0 0   my ($label) = shift;
531 0 0 0       my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
  0            
532              
533 0           my $str = "$label: {";
534 0           my @items = ();
535 0           for my $key (sort keys %hash) {
536 0           my $val = $hash{$key};
537 0 0         $val = ".undef" if ! defined $val;
538 0           $key =~ s/\n/\\n/g;
539 0           $val =~ s/\n/\\n/g;
540 0           push (@items, "$key:$val");
541             }
542 0           $str .= join (",", @items);
543 0           $str .= "}";
544              
545 0           return "$str\n";
546             }
547              
548             1;
549              
550             __END__