File Coverage

blib/lib/Text/SmartyPants.pm
Criterion Covered Total %
statement 91 221 41.1
branch 27 118 22.8
condition 4 26 15.3
subroutine 9 17 52.9
pod 14 14 100.0
total 145 396 36.6


line stmt bran cond sub pod time code
1             package Text::SmartyPants;
2 27     27   10484 use strict;
  27         67  
  27         866  
3 27     27   140 use vars qw($VERSION);
  27         56  
  27         47348  
4             $VERSION = "1.3";
5              
6             # Configurable variables:
7             my $smartypants_attr = "1";
8              
9             # 1 => "--" for em-dashes; no en-dash support
10             # 2 => "---" for em-dashes; "--" for en-dashes
11             # 3 => "--" for em-dashes; "---" for en-dashes
12             # See docs for more configuration options.
13              
14             # Globals:
15             my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script)[\s>]!;
16              
17             =head1 Methods
18              
19             =head2 process
20              
21             Do the bulk of the conversion work.
22              
23             =cut
24              
25             sub process {
26 36 50   36 1 171 shift if ( $_[0] eq __PACKAGE__ ); # oops, called in OOP fashion.
27              
28             # Paramaters:
29 36         82 my $text = shift; # text to be parsed
30              
31             # value of the smart_quotes="" attribute. Default to 'everything on'
32 36   50     195 my $attr = shift || '1';
33              
34             # Options to specify which transformations to make:
35 36         93 my ( $do_quotes, $do_backticks, $do_dashes, $do_ellipses, $do_stupefy );
36              
37             # should we translate &quot; entities into normal quotes?
38 36         79 my $convert_quot = 0;
39              
40             # Parse attributes:
41             # 0 : do nothing
42             # 1 : set all
43             # 2 : set all, using old school en- and em- dash shortcuts
44             # 3 : set all, using inverted old school en and em- dash shortcuts
45             #
46             # q : quotes
47             # b : backtick quotes (``double'' only)
48             # B : backtick quotes (``double'' and `single')
49             # d : dashes
50             # D : old school dashes
51             # i : inverted old school dashes
52             # e : ellipses
53             # w : convert &quot; entities to " for Dreamweaver users
54              
55 36 50       160 if ( $attr eq "0" ) {
    50          
    0          
    0          
    0          
56              
57             # Do nothing.
58 0         0 return $text;
59             }
60             elsif ( $attr eq "1" ) {
61              
62             # Do everything, turn all options on.
63 36         72 $do_quotes = 1;
64 36         65 $do_backticks = 1;
65 36         65 $do_dashes = 1;
66 36         61 $do_ellipses = 1;
67             }
68             elsif ( $attr eq "2" ) {
69              
70             # Do everything, turn all options on, use old school dash shorthand.
71 0         0 $do_quotes = 1;
72 0         0 $do_backticks = 1;
73 0         0 $do_dashes = 2;
74 0         0 $do_ellipses = 1;
75             }
76             elsif ( $attr eq "3" ) {
77              
78             # Do everything, turn all options on, use inverted old school dash shorthand.
79 0         0 $do_quotes = 1;
80 0         0 $do_backticks = 1;
81 0         0 $do_dashes = 3;
82 0         0 $do_ellipses = 1;
83             }
84             elsif ( $attr eq "-1" ) {
85              
86             # Special "stupefy" mode.
87 0         0 $do_stupefy = 1;
88             }
89             else {
90 0         0 my @chars = split( //, $attr );
91 0         0 foreach my $c (@chars) {
92 0 0       0 if ( $c eq "q" ) { $do_quotes = 1; }
  0 0       0  
    0          
    0          
    0          
    0          
    0          
    0          
93 0         0 elsif ( $c eq "b" ) { $do_backticks = 1; }
94 0         0 elsif ( $c eq "B" ) { $do_backticks = 2; }
95 0         0 elsif ( $c eq "d" ) { $do_dashes = 1; }
96 0         0 elsif ( $c eq "D" ) { $do_dashes = 2; }
97 0         0 elsif ( $c eq "i" ) { $do_dashes = 3; }
98 0         0 elsif ( $c eq "e" ) { $do_ellipses = 1; }
99 0         0 elsif ( $c eq "w" ) { $convert_quot = 1; }
100             else {
101              
102             # Unknown attribute option, ignore.
103             }
104             }
105             }
106              
107 36   33     217 my $tokens ||= _tokenize($text);
108 36         80 my $result = '';
109 36         79 my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
110              
111 36         71 my $prev_token_last_char = ""; # This is a cheat, used to get some context
112             # for one-character tokens that consist of
113             # just a quote char. What we do is remember
114             # the last character of the previous text
115             # token, to use as context to curl single-
116             # character quote tokens correctly.
117              
118 36         101 foreach my $cur_token (@$tokens) {
119 400 100       812 if ( $cur_token->[0] eq "tag" ) {
120              
121             # Don't mess with quotes inside tags.
122 244         387 $result .= $cur_token->[1];
123 244 100       1018 if ( $cur_token->[1] =~ m/$tags_to_skip/ ) {
124 39 100 66     232 $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
125             }
126             }
127             else {
128 156         253 my $t = $cur_token->[1];
129 156         274 my $last_char = substr( $t, -1 ); # Remember last char of this token before processing.
130 156 100       357 if ( !$in_pre ) {
131 92         181 $t = ProcessEscapes($t);
132              
133 92 50       199 if ($convert_quot) {
134 0         0 $t =~ s/&quot;/"/g;
135             }
136              
137 92 50       213 if ($do_dashes) {
138 92 50       250 $t = EducateDashes($t) if ( $do_dashes == 1 );
139 92 50       195 $t = EducateDashesOldSchool($t) if ( $do_dashes == 2 );
140 92 50       189 $t = EducateDashesOldSchoolInverted($t) if ( $do_dashes == 3 );
141             }
142              
143 92 50       237 $t = EducateEllipses($t) if $do_ellipses;
144              
145             # Notes: backticks need to be processed before quotes.
146 92 50       190 if ($do_backticks) {
147 92         161 $t = EducateBackticks($t);
148 92 50       200 $t = EducateSingleBackticks($t) if ( $do_backticks == 2 );
149             }
150              
151 92 50       182 if ($do_quotes) {
152 92 50       232 if ( $t eq q/'/ ) {
    50          
153              
154             # Special case: single-character ' token
155 0 0       0 if ( $prev_token_last_char =~ m/\S/ ) {
156 0         0 $t = "&#8217;";
157             }
158             else {
159 0         0 $t = "&#8216;";
160             }
161             }
162             elsif ( $t eq q/"/ ) {
163              
164             # Special case: single-character " token
165 0 0       0 if ( $prev_token_last_char =~ m/\S/ ) {
166 0         0 $t = "&#8221;";
167             }
168             else {
169 0         0 $t = "&#8220;";
170             }
171             }
172             else {
173              
174             # Normal case:
175 92         167 $t = EducateQuotes($t);
176             }
177             }
178              
179 92 50       208 $t = StupefyEntities($t) if $do_stupefy;
180             }
181 156         239 $prev_token_last_char = $last_char;
182 156         306 $result .= $t;
183             }
184             }
185              
186 36         216 return $result;
187             }
188              
189             =head2 SmartQuotes
190              
191             Quotes to entities.
192              
193             =cut
194              
195             sub SmartQuotes {
196              
197             # Paramaters:
198 0     0 1 0 my $text = shift; # text to be parsed
199 0         0 my $attr = shift; # value of the smart_quotes="" attribute
200              
201 0         0 my $do_backticks; # should we educate ``backticks'' -style quotes?
202              
203 0 0       0 if ( $attr == 0 ) {
    0          
204              
205             # do nothing;
206 0         0 return $text;
207             }
208             elsif ( $attr == 2 ) {
209              
210             # smarten ``backticks'' -style quotes
211 0         0 $do_backticks = 1;
212             }
213             else {
214 0         0 $do_backticks = 0;
215             }
216              
217             # Special case to handle quotes at the very end of $text when preceded by
218             # an HTML tag. Add a space to give the quote education algorithm a bit of
219             # context, so that it can guess correctly that it's a closing quote:
220 0         0 my $add_extra_space = 0;
221 0 0       0 if ( $text =~ m/>['"]\z/ ) {
222 0         0 $add_extra_space = 1; # Remember, so we can trim the extra space later.
223 0         0 $text .= " ";
224             }
225              
226 0   0     0 my $tokens ||= _tokenize($text);
227 0         0 my $result = '';
228 0         0 my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
229              
230 0         0 my $prev_token_last_char = ""; # This is a cheat, used to get some context
231             # for one-character tokens that consist of
232             # just a quote char. What we do is remember
233             # the last character of the previous text
234             # token, to use as context to curl single-
235             # character quote tokens correctly.
236              
237 0         0 foreach my $cur_token (@$tokens) {
238 0 0       0 if ( $cur_token->[0] eq "tag" ) {
239              
240             # Don't mess with quotes inside tags
241 0         0 $result .= $cur_token->[1];
242 0 0       0 if ( $cur_token->[1] =~ m/$tags_to_skip/ ) {
243 0 0 0     0 $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
244             }
245             }
246             else {
247 0         0 my $t = $cur_token->[1];
248 0         0 my $last_char = substr( $t, -1 ); # Remember last char of this token before processing.
249 0 0       0 if ( !$in_pre ) {
250 0         0 $t = ProcessEscapes($t);
251 0 0       0 if ($do_backticks) {
252 0         0 $t = EducateBackticks($t);
253             }
254              
255 0 0       0 if ( $t eq q/'/ ) {
    0          
256              
257             # Special case: single-character ' token
258 0 0       0 if ( $prev_token_last_char =~ m/\S/ ) {
259 0         0 $t = "&#8217;";
260             }
261             else {
262 0         0 $t = "&#8216;";
263             }
264             }
265             elsif ( $t eq q/"/ ) {
266              
267             # Special case: single-character " token
268 0 0       0 if ( $prev_token_last_char =~ m/\S/ ) {
269 0         0 $t = "&#8221;";
270             }
271             else {
272 0         0 $t = "&#8220;";
273             }
274             }
275             else {
276              
277             # Normal case:
278 0         0 $t = EducateQuotes($t);
279             }
280              
281             }
282 0         0 $prev_token_last_char = $last_char;
283 0         0 $result .= $t;
284             }
285             }
286              
287 0 0       0 if ($add_extra_space) {
288 0         0 $result =~ s/ \z//; # Trim trailing space if we added one earlier.
289             }
290 0         0 return $result;
291             }
292              
293             =head2 SmartDashes
294              
295             Call the individual dash conversion to entities functions.
296              
297             =cut
298              
299             sub SmartDashes {
300              
301             # Paramaters:
302 0     0 1 0 my $text = shift; # text to be parsed
303 0         0 my $attr = shift; # value of the smart_dashes="" attribute
304              
305             # reference to the subroutine to use for dash education, default to EducateDashes:
306 0         0 my $dash_sub_ref = \&EducateDashes;
307              
308 0 0       0 if ( $attr == 0 ) {
    0          
    0          
309              
310             # do nothing;
311 0         0 return $text;
312             }
313             elsif ( $attr == 2 ) {
314              
315             # use old smart dash shortcuts, "--" for en, "---" for em
316 0         0 $dash_sub_ref = \&EducateDashesOldSchool;
317             }
318             elsif ( $attr == 3 ) {
319              
320             # inverse of 2, "--" for em, "---" for en
321 0         0 $dash_sub_ref = \&EducateDashesOldSchoolInverted;
322             }
323              
324 0         0 my $tokens;
325 0   0     0 $tokens ||= _tokenize($text);
326              
327 0         0 my $result = '';
328 0         0 my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
329 0         0 foreach my $cur_token (@$tokens) {
330 0 0       0 if ( $cur_token->[0] eq "tag" ) {
331              
332             # Don't mess with quotes inside tags
333 0         0 $result .= $cur_token->[1];
334 0 0       0 if ( $cur_token->[1] =~ m/$tags_to_skip/ ) {
335 0 0 0     0 $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
336             }
337             }
338             else {
339 0         0 my $t = $cur_token->[1];
340 0 0       0 if ( !$in_pre ) {
341 0         0 $t = ProcessEscapes($t);
342 0         0 $t = $dash_sub_ref->($t);
343             }
344 0         0 $result .= $t;
345             }
346             }
347 0         0 return $result;
348             }
349              
350             =head2 SmartEllipses
351              
352             Call the individual ellipse conversion to entities functions.
353              
354             =cut
355              
356             sub SmartEllipses {
357              
358             # Paramaters:
359 0     0 1 0 my $text = shift; # text to be parsed
360 0         0 my $attr = shift; # value of the smart_ellipses="" attribute
361              
362 0 0       0 if ( $attr == 0 ) {
363              
364             # do nothing;
365 0         0 return $text;
366             }
367              
368 0         0 my $tokens;
369 0   0     0 $tokens ||= _tokenize($text);
370              
371 0         0 my $result = '';
372 0         0 my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
373 0         0 foreach my $cur_token (@$tokens) {
374 0 0       0 if ( $cur_token->[0] eq "tag" ) {
375              
376             # Don't mess with quotes inside tags
377 0         0 $result .= $cur_token->[1];
378 0 0       0 if ( $cur_token->[1] =~ m/$tags_to_skip/ ) {
379 0 0 0     0 $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
380             }
381             }
382             else {
383 0         0 my $t = $cur_token->[1];
384 0 0       0 if ( !$in_pre ) {
385 0         0 $t = ProcessEscapes($t);
386 0         0 $t = EducateEllipses($t);
387             }
388 0         0 $result .= $t;
389             }
390             }
391 0         0 return $result;
392             }
393              
394             =head2 EducateQuotes
395              
396             Parameter: String.
397              
398             Returns: The string, with "educated" curly quote HTML entities.
399              
400             Example input: "Isn't this fun?"
401             Example output: &#8220;Isn&#8217;t this fun?&#8221;
402              
403             =cut
404              
405             sub EducateQuotes {
406 92     92 1 156 local $_ = shift;
407              
408             # Tell perl not to gripe when we use $1 in substitutions,
409             # even when it's undefined. Use $^W instead of "no warnings"
410             # for compatibility with Perl 5.005:
411 92         226 local $^W = 0;
412              
413             # Make our own "punctuation" character class, because the POSIX-style
414             # [:PUNCT:] is only available in Perl 5.6 or later:
415 92         240 my $punct_class = qr/[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]/;
416              
417             # Special case if the very first character is a quote
418             # followed by punctuation at a non-word-break. Close the quotes by brute force:
419 92         302 s/^'(?=$punct_class\B)/&#8217;/;
420 92         257 s/^"(?=$punct_class\B)/&#8221;/;
421              
422             # Special case for double sets of quotes, e.g.:
423             # <p>He said, "'Quoted' words in a larger quote."</p>
424 92         153 s/"'(?=\w)/&#8220;&#8216;/g;
425 92         139 s/'"(?=\w)/&#8216;&#8220;/g;
426              
427 92         176 my $close_class = qr![^\ \t\r\n\[\{\(]!;
428              
429             # Single closing quotes:
430 92         317 s {
431             ($close_class)?
432             '
433             (?(1)| # If $1 captured, then do nothing;
434             (?=\s | s\b) # otherwise, positive lookahead for a whitespace
435             ) # char or an 's' at a word ending position. This
436             # is a special case to handle something like:
437             # "<i>Custer</i>'s Last Stand."
438             } {$1&#8217;}xgi;
439              
440             # Single opening quotes:
441 92         146 s/'/&#8216;/g;
442              
443             # Double closing quotes:
444 92         257 s {
445             ($close_class)?
446             "
447             (?(1)|(?=\s)) # If $1 captured, then do nothing;
448             # if not, then make sure the next char is whitespace.
449             } {$1&#8221;}xg;
450              
451             # Double opening quotes:
452 92         145 s/"/&#8220;/g;
453              
454 92         257 return $_;
455             }
456              
457             =head2 EducateBackticks
458              
459             Replace double (back)ticks w/ HTML entities.
460              
461             =cut
462              
463             sub EducateBackticks {
464              
465             #
466             # Parameter: String.
467             # Returns: The string, with ``backticks'' -style double quotes
468             # translated into HTML curly quote entities.
469             #
470             # Example input: ``Isn't this fun?''
471             # Example output: &#8220;Isn't this fun?&#8221;
472             #
473              
474 92     92 1 149 local $_ = shift;
475 92         148 s/``/&#8220;/g;
476 92         150 s/''/&#8221;/g;
477 92         150 return $_;
478             }
479              
480             =head2 EducateSingleBackticks
481              
482             Replace single (back)ticks w/ HTML entities.
483              
484             =cut
485              
486             sub EducateSingleBackticks {
487              
488             #
489             # Parameter: String.
490             # Returns: The string, with `backticks' -style single quotes
491             # translated into HTML curly quote entities.
492             #
493             # Example input: `Isn't this fun?'
494             # Example output: &#8216;Isn&#8217;t this fun?&#8217;
495             #
496              
497 0     0 1 0 local $_ = shift;
498 0         0 s/`/&#8216;/g;
499 0         0 s/'/&#8217;/g;
500 0         0 return $_;
501             }
502              
503             =head2 EducateDashes
504              
505             Dashes to HTML entity
506              
507             Parameter: String.
508              
509             Returns: The string, with each instance of "--" translated to
510             an em-dash HTML entity.
511              
512             =cut
513              
514             sub EducateDashes {
515              
516 92     92 1 160 local $_ = shift;
517 92         136 s/--/&#8212;/g;
518 92         161 return $_;
519             }
520              
521             =head2 EducateDashesOldSchool
522              
523             Dashes to entities.
524              
525              
526             Parameter: String.
527              
528             Returns: The string, with each instance of "--" translated to
529             an en-dash HTML entity, and each "---" translated to
530             an em-dash HTML entity.
531              
532              
533             =cut
534              
535             sub EducateDashesOldSchool {
536              
537 0     0 1 0 local $_ = shift;
538 0         0 s/---/&#8212;/g; # em
539 0         0 s/--/&#8211;/g; # en
540 0         0 return $_;
541             }
542              
543             =head2 EducateDashesOldSchoolInverted
544              
545              
546             Parameter: String.
547              
548             Returns: The string, with each instance of "--" translated to
549             an em-dash HTML entity, and each "---" translated to
550             an en-dash HTML entity. Two reasons why: First, unlike the
551             en- and em-dash syntax supported by
552             EducateDashesOldSchool(), it's compatible with existing
553             entries written before SmartyPants 1.1, back when "--" was
554             only used for em-dashes. Second, em-dashes are more
555             common than en-dashes, and so it sort of makes sense that
556             the shortcut should be shorter to type. (Thanks to Aaron
557             Swartz for the idea.)
558              
559            
560             =cut
561              
562             sub EducateDashesOldSchoolInverted {
563              
564 0     0 1 0 local $_ = shift;
565 0         0 s/---/&#8211;/g; # en
566 0         0 s/--/&#8212;/g; # em
567 0         0 return $_;
568             }
569              
570             =head2 EducateEllipses
571              
572             Parameter: String.
573             Returns: The string, with each instance of "..." translated to
574             an ellipsis HTML entity.
575              
576             Example input: Huh...?
577             Example output: Huh&#8230;?
578              
579             =cut
580              
581             sub EducateEllipses {
582              
583 92     92 1 138 local $_ = shift;
584 92         151 s/\.\.\./&#8230;/g;
585 92         151 return $_;
586             }
587              
588             =head2 StupefyEntities
589              
590             Parameter: String.
591             Returns: The string, with each SmartyPants HTML entity translated to
592             its ASCII counterpart.
593              
594             Example input: &#8220;Hello &#8212; world.&#8221;
595             Example output: "Hello -- world."
596              
597             =cut
598              
599             sub StupefyEntities {
600              
601 0     0 1 0 local $_ = shift;
602              
603 0         0 s/&#8211;/-/g; # en-dash
604 0         0 s/&#8212;/--/g; # em-dash
605              
606 0         0 s/&#8216;/'/g; # open single quote
607 0         0 s/&#8217;/'/g; # close single quote
608              
609 0         0 s/&#8220;/"/g; # open double quote
610 0         0 s/&#8221;/"/g; # close double quote
611              
612 0         0 s/&#8230;/.../g; # ellipsis
613              
614 0         0 return $_;
615             }
616              
617             =head2 SmartyPantsVersion
618              
619             Return the version of SmartyPants.
620              
621             =cut
622              
623             sub SmartyPantsVersion {
624 0     0 1 0 return $VERSION;
625             }
626              
627             =head2 ProcessEscapes
628              
629             Parameter: String.
630             Returns: The string, with after processing the following backslash
631             escape sequences. This is useful if you want to force a "dumb"
632             quote or other character to appear.
633              
634             Escape Value
635             ------ -----
636             \\ &#92;
637             \" &#34;
638             \' &#39;
639             \. &#46;
640             \- &#45;
641             \` &#96;
642              
643             =cut
644              
645             sub ProcessEscapes {
646              
647 92     92 1 154 local $_ = shift;
648              
649 92         156 s! \\\\ !&#92;!gx;
650 92         135 s! \\" !&#34;!gx;
651 92         130 s! \\' !&#39;!gx;
652 92         138 s! \\\. !&#46;!gx;
653 92         135 s! \\- !&#45;!gx;
654 92         138 s! \\` !&#96;!gx;
655              
656 92         163 return $_;
657             }
658              
659             sub _tokenize {
660              
661             #
662             # Parameter: String containing HTML markup.
663             # Returns: Reference to an array of the tokens comprising the input
664             # string. Each token is either a tag (possibly with nested,
665             # tags contained therein, such as <a href="<MTFoo>">, or a
666             # run of text between tags. Each element of the array is a
667             # two-element array; the first is either 'tag' or 'text';
668             # the second is the actual value.
669             #
670             #
671             # Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
672             # <http://www.bradchoate.com/past/mtregex.php>
673             #
674              
675 36     36   72 my $str = shift;
676              
677 36         65 my $pos = 0;
678 36         74 my $len = length $str;
679 36         74 my @tokens;
680              
681             # pattern to match balanced nested <> pairs, up to two levels deep:
682 36         141 my $nested_angles = qr/<(?:[^<>]|<[^<>]*>)*>/;
683              
684 36         367 while ( $str =~ m/($nested_angles)/gs ) {
685 244         481 my $whole_tag = $1;
686 244         355 my $sec_start = pos $str;
687 244         354 my $tag_start = $sec_start - length $whole_tag;
688 244 100       513 if ( $pos < $tag_start ) {
689 152         378 push @tokens, [ 'text', substr( $str, $pos, $tag_start - $pos ) ];
690             }
691 244         547 push @tokens, [ 'tag', $whole_tag ];
692 244         1257 $pos = pos $str;
693             }
694 36 100       121 push @tokens, [ 'text', substr( $str, $pos, $len - $pos ) ] if $pos < $len;
695 36         165 \@tokens;
696             }
697              
698             1;
699             __END__
700              
701              
702             =pod
703              
704             =head1 Name
705              
706             Text::SmartyPants - cute little punctuation assistant
707              
708             =head1 Synopsis
709              
710             SmartyPants is a free web publishing plug-in for Movable Type, Blosxom,
711             and BBEdit that easily translates plain ASCII punctuation characters
712             into "smart" typographic punctuation HTML entities.
713              
714              
715             =head1 Description
716              
717             SmartyPants can perform the following transformations:
718              
719             =over 4
720              
721             =item *
722              
723             Straight quotes ( " and ' ) into "curly" quote HTML entities
724              
725             =item *
726              
727             Backticks-style quotes (``like this'') into "curly" quote HTML entities
728              
729             =item *
730              
731             Dashes (C<--> and C<--->) into en- and em-dash entities
732              
733             =item *
734              
735             Three consecutive dots (C<...>) into an ellipsis entity
736              
737             =back
738              
739             This means you can write, edit, and save your posts using plain old
740             ASCII straight quotes, plain dashes, and plain dots, but your published
741             posts (and final HTML output) will appear with smart quotes, em-dashes,
742             and proper ellipses.
743              
744             SmartyPants is a combination plug-in -- the same file works with Movable
745             Type, Blosxom, and BBEdit. It can also be used from a Unix-style
746             command-line. Version requirements and installation instructions for
747             each of these tools can be found in the corresponding sub-section under
748             "Installation", below.
749              
750             SmartyPants does not modify characters within C<< <pre> >>, C<< <code>
751             >>, C<< <kbd> >>, or C<< <script> >> tag blocks. Typically, these tags
752             are used to display text where smart quotes and other "smart
753             punctuation" would not be appropriate, such as source code or example
754             markup.
755              
756              
757             =head2 Backslash Escapes
758              
759             If you need to use literal straight quotes (or plain hyphens and
760             periods), SmartyPants accepts the following backslash escape sequences
761             to force non-smart punctuation. It does so by transforming the escape
762             sequence into a decimal-encoded HTML entity:
763              
764             Escape Value Character
765             ------ ----- ---------
766             \\ &#92; \
767             \" &#34; "
768             \' &#39; '
769             \. &#46; .
770             \- &#45; -
771             \` &#96; `
772              
773             This is useful, for example, when you want to use straight quotes as
774             foot and inch marks: 6'2" tall; a 17" iMac.
775              
776              
777             =head2 MT-Textile Integration
778              
779             Movable Type users should also note that SmartyPants can work in
780             conjunction with Brad Choate's MT-Textile plug-in:
781              
782             http://bradchoate.com/past/mttextile.php
783              
784             MT-Textile is a port of Dean Allen's original Textile project to Perl
785             and Movable Type. MT-Textile by itself only translates Textile markup
786             to HTML. However, if SmartyPants is also installed, MT-Textile will
787             call on SmartyPants to educate quotes, dashes, and ellipses,
788             automatically. Using SmartyPants in conjunction with MT-Textile
789             requires no modifications to your Movable Type templates.
790              
791             Textile is Dean Allen's "humane web text generator", an easy-to-write
792             and easy-to-read shorthand for writing text for the web. An online
793             Textile web application is available at Mr. Allen's site:
794              
795             http://textism.com/tools/textile/
796              
797              
798             =head1 Installation
799              
800             =head2 Movable Type
801              
802             SmartyPants works with Movable Type version 2.5 or later.
803              
804             =over 4
805              
806             =item 1.
807              
808             Copy the "SmartyPants.pl" file into your Movable Type "plugins" directory.
809             The "plugins" directory should be in the same directory as "mt.cgi"; if it
810             doesn't already exist, use your FTP program to create it. Your
811             installation should look like this:
812              
813             (mt home)/plugins/SmartyPants.pl
814              
815             =item 2.
816              
817             If you're using SmartyPants with Brad Choate's MT-Textile, you're done.
818              
819             If not, to activate SmartyPants on your weblog, you need to edit your MT
820             templates. The easiest way is to add the C<smarty_pants> attribute to
821             each MT template tag whose contents you wish to apply SmartyPants'
822             transformations. Obvious tags would include C<MTEntryTitle>,
823             C<MTEntryBody>, and C<MTEntryMore>. SmartyPants should work within any
824             MT content tag.
825              
826              
827             For example, to apply SmartyPants to your entry titles:
828              
829             <$MTEntryTitle smarty_pants="1"$>
830              
831             The value passed to C<smarty_pants> specifies the way SmartyPants works.
832             See "Options", below, for full details on all of the supported options.
833              
834             =back
835              
836              
837             =head2 Blosxom
838              
839             SmartyPants works with Blosxom version 2.0 or later.
840              
841             =over 4
842              
843             =item 1.
844              
845             Rename the "SmartyPants.pl" plug-in to "SmartyPants" (case is
846             important). Movable Type requires plug-ins to have a ".pl" extension;
847             Blosxom forbids it (at least as of this writing).
848              
849             =item 2.
850              
851             Copy the "SmartyPants" plug-in file to your Blosxom plug-ins folder. If
852             you're not sure where your Blosxom plug-ins folder is, see the Blosxom
853             documentation for information.
854              
855             =item 3.
856              
857             That's it. The entries in your weblog should now automatically have
858             SmartyPants' default transformations applied.
859              
860             =item 4.
861              
862             If you wish to configure SmartyPants' behavior, open the "SmartyPants"
863             plug-in, and edit the value of the C<$smartypants_attr> configuration
864             variable, located near the top of the script. The default value is 1;
865             see "Options", below, for the full list of supported values.
866              
867             =back
868              
869              
870             =head2 BBEdit
871              
872             SmartyPants works with BBEdit 6.1 or later on Mac OS X; and BBEdit 5.1
873             or later on Mac OS 9 or earlier (provided you have MacPerl
874             installed).
875              
876             =over 4
877              
878             =item 1.
879              
880             Copy the "SmartyPants.pl" file to appropriate filters folder in your
881             "BBEdit Support" folder. On Mac OS X, this should be:
882              
883             BBEdit Support:Unix Support:Unix Filters:
884              
885             On Mac OS 9 or earlier, this should be:
886              
887             BBEdit Support:MacPerl Support: Perl Filters:
888              
889             See the BBEdit documentation for more details on the location of these
890             folders.
891              
892             You can rename "SmartyPants.pl" to whatever you wish.
893              
894             =item 2.
895              
896             That's it. To use SmartyPants, select some text in a BBEdit document,
897             then choose SmartyPants from the Filters sub-menu or the Filters
898             floating palette. On Mac OS 9, the Filters sub-menu is in the "Camel"
899             menu; on Mac OS X, it is in the "#!" menu.
900              
901             =item 3.
902              
903             If you wish to configure SmartyPants' behavior, open the SmartyPants
904             file and edit the value of the C<$smartypants_attr> configuration
905             variable, located near the top of the script. The default value is 1;
906             see "Options", below, for the full list of supported values.
907              
908             =back
909              
910              
911             =head1 Options
912              
913             =head2 smarty_pants
914              
915             For MT users, the C<smarty_pants> template tag attribute is where you
916             specify configuration options. For Blosxom and BBEdit users, settings
917             are specified by editing the value of the C<$smartypants_attr> variable
918             in the script itself.
919              
920             Numeric values are the easiest way to configure SmartyPants' behavior:
921              
922             =over 4
923              
924             =item B<"0">
925              
926             Suppress all transformations. (Do nothing.)
927              
928             =item B<"1">
929              
930             Performs default SmartyPants transformations: quotes (including
931             ``backticks'' -style), em-dashes, and ellipses. "--" (dash dash) is used
932             to signify an em-dash; there is no support for en-dashes.
933              
934             =item B<"2">
935              
936             Same as smarty_pants="1", except that it uses the old-school typewriter
937             shorthand for dashes: "--" (dash dash) for en-dashes, "---" (dash dash dash)
938             for em-dashes.
939              
940             =item B<"3">
941              
942             Same as smarty_pants="2", but inverts the shorthand for dashes: "--"
943             (dash dash) for em-dashes, and "---" (dash dash dash) for en-dashes.
944              
945             =item B<"-1">
946              
947             Stupefy mode. Reverses the SmartyPants transformation process, turning the
948             HTML entities produced by SmartyPants into their ASCII equivalents. E.g.
949             "&#8220;" is turned into a simple double-quote ("), "&#8212;" is turned
950             into two dashes, etc. This is useful if you are using SmartyPants from
951             Brad Choate's MT-Textile text filter, but wish to suppress smart
952             punctuation in specific MT templates, such as RSS feeds. Text filters do
953             their work before templates are processed; but you can use
954             smarty_pants="-1" to reverse the transformations in specific templates.
955              
956             =back
957              
958              
959             The following single-character attribute values can be combined to toggle
960             individual transformations from within the smarty_pants attribute. For
961             example, to educate normal quotes and em-dashes, but not ellipses or
962             ``backticks'' -style quotes:
963              
964             <$MTFoo smarty_pants="qd"$>
965              
966             =over 4
967              
968             =item B<"q">
969              
970             Educates normal quote characters: (") and (').
971              
972             =item B<"b">
973              
974             Educates ``backticks'' -style double quotes.
975              
976             =item B<"B">
977              
978             Educates ``backticks'' -style double quotes and `single' quotes.
979              
980             =item B<"d">
981              
982             Educates em-dashes.
983              
984             =item B<"D">
985              
986             Educates em-dashes and en-dashes, using old-school typewriter shorthand:
987             (dash dash) for en-dashes, (dash dash dash) for em-dashes.
988              
989             =item B<"i">
990              
991             Educates em-dashes and en-dashes, using inverted old-school typewriter
992             shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
993              
994             =item B<"e">
995              
996             Educates ellipses.
997              
998             =item B<"w">
999              
1000             Translates any instance of C<&quot;> into a normal double-quote character.
1001             This should be of no interest to most people, but of particular interest
1002             to anyone who writes their posts using Dreamweaver, as Dreamweaver
1003             inexplicably uses this entity to represent a literal double-quote
1004             character. SmartyPants only educates normal quotes, not entities (because
1005             ordinarily, entities are used for the explicit purpose of representing the
1006             specific character they represent). The "w" option must be used in
1007             conjunction with one (or both) of the other quote options ("q" or "b").
1008             Thus, if you wish to apply all SmartyPants transformations (quotes, en-
1009             and em-dashes, and ellipses) and also translate C<&quot;> entities into
1010             regular quotes so SmartyPants can educate them, you should pass the
1011             following to the smarty_pants attribute:
1012              
1013             <$MTFoo smarty_pants="qDew"$>
1014              
1015             For Blosxom and BBEdit users, set:
1016              
1017             my $smartypants_attr = "qDew";
1018              
1019             =back
1020              
1021              
1022             =head2 Deprecated MT Attributes
1023              
1024             The following Movable Type attributes are supported only for
1025             compatibility with older versions of SmartyPants. They are obsoleted by
1026             the C<smarty_pants> attribute, which offers more control than these
1027             individual attributes. If you're setting up SmartyPants for the first
1028             time, you should use the C<SmartyPants> attribute instead.
1029              
1030             Blosxom and BBEdit users should simply ignore this section.
1031              
1032             =head3 smart_quotes
1033              
1034             The smart_quotes attribute accepts the following values:
1035              
1036             =over 4
1037              
1038             =item B<"0">
1039              
1040             Suppress all quote education. (Do nothing.)
1041              
1042             =item B<"1">
1043              
1044             Default behavior. Educates normal quote characters: (") and (').
1045              
1046             =item B<"2">
1047              
1048             Educate ``backticks'' -style double quotes (in addition to educating
1049             regular quotes). Transforms each instance of two consecutive backtick
1050             characters (C<``>) into an opening double-quote, and each instance of two
1051             consecutive apostrophes (C<''>) into a closing double-quote.
1052              
1053             =back
1054              
1055              
1056             =head3 smart_dashes
1057              
1058             The smart_dashes attribute accepts the following values:
1059              
1060             =over 4
1061              
1062             =item B<"0">
1063              
1064             Suppress dash education. (Do nothing.)
1065              
1066             =item B<"1">
1067              
1068             Default behavior. Transforms each instance of "--" (dash dash) into an
1069             HTML entity-encoded em-dash.
1070              
1071             =item B<"2">
1072              
1073             Educates both en- and em-dashes, using the old-school typewriter
1074             shorthand for dashes. Each instance of "--" (dash dash) is turned into
1075             an HTML entity-encoded en-dash; each instance of "---" (dash dash dash)
1076             is turned into an em-dash.
1077              
1078             =item B<"3">
1079              
1080             Same as smart_dashes="2", but inverts the shorthand, using "--" (dash
1081             dash) for em-dashes, and "---" (dash dash dash) for en-dashes. Although
1082             somewhat counterintuitive in that the longer shortcut is used for the
1083             shorter dash, this syntax is backwards compatible with SmartyPants 1.0's
1084             original syntax, which used (dash dash) for em-dashes.
1085              
1086              
1087             =back
1088              
1089              
1090             =head3 smart_ellipses
1091              
1092             The smart_ellipses attribute accepts the following values:
1093              
1094             =over 4
1095              
1096             =item B<"0">
1097              
1098             Suppress ellipsis education. (Do nothing.)
1099              
1100             =item B<"1">
1101              
1102             Default behavior. Transforms each instance of "..." (dot dot dot) into
1103             an HTML entity-encoded ellipsis. If there are four consecutive dots,
1104             SmartyPants assumes this means "full stop" followed by "ellipsis".
1105              
1106             =back
1107              
1108              
1109             =head2 Version Info Tag
1110              
1111             If you include this tag in a Movable Type template:
1112              
1113             <$MTSmartyPantsVersion$>
1114              
1115             it will be replaced with a string representing the version number of the
1116             installed version of SmartyPants, e.g. "1.2".
1117              
1118              
1119             =head1 Caveats
1120              
1121             =head2 Why You Might Not Want to Use Smart Quotes in Your Weblog
1122              
1123             For one thing, you might not care.
1124              
1125             Most normal, mentally stable individuals do not take notice of proper
1126             typographic punctuation. Many design and typography nerds, however, break
1127             out in a nasty rash when they encounter, say, a restaurant sign that uses
1128             a straight apostrophe to spell "Joe's".
1129              
1130             If you're the sort of person who just doesn't care, you might well want to
1131             continue not caring. Using straight quotes -- and sticking to the 7-bit
1132             ASCII character set in general -- is certainly a simpler way to live.
1133              
1134             Even if you I<do> care about accurate typography, you still might want to
1135             think twice before educating the quote characters in your weblog. One side
1136             effect of publishing curly quote HTML entities is that it makes your
1137             weblog a bit harder for others to quote from using copy-and-paste. What
1138             happens is that when someone copies text from your blog, the copied text
1139             contains the 8-bit curly quote characters (as well as the 8-bit characters
1140             for em-dashes and ellipses, if you use these options). These characters
1141             are not standard across different text encoding methods, which is why they
1142             need to be encoded as HTML entities.
1143              
1144             People copying text from your weblog, however, may not notice that you're
1145             using curly quotes, and they'll go ahead and paste the unencoded 8-bit
1146             characters copied from their browser into an email message or their own
1147             weblog. When pasted as raw "smart quotes", these characters are likely to
1148             get mangled beyond recognition.
1149              
1150             That said, my own opinion is that any decent text editor or email client
1151             makes it easy to stupefy smart quote characters into their 7-bit
1152             equivalents, and I don't consider it my problem if you're using an
1153             indecent text editor or email client.
1154              
1155              
1156             =head2 Algorithmic Shortcomings
1157              
1158             One situation in which quotes will get curled the wrong way is when
1159             apostrophes are used at the start of leading contractions. For example:
1160              
1161             the '80s
1162             'Twas the night before Christmas.
1163              
1164             In both cases above, SmartyPants will turn the apostrophes into opening
1165             single-quotes, when in fact they should be closing ones. I don't think
1166             this problem can be solved in the general case -- every word processor
1167             I've tried gets this wrong as well. In such cases, it's best to use the
1168             proper HTML entity for closing single-quotes (C<&#8217;>) by hand.
1169              
1170             (I should also note that my personal style is to abbreviate decades like
1171             this:
1172              
1173             the 80's
1174              
1175             so admittedly, I'm not all that interested in solving this problem.)
1176              
1177              
1178             =head1 Bugs
1179              
1180             To file bug reports or feature requests (other than topics listed in the
1181             Caveats section above) please send email to:
1182              
1183             smartypants@daringfireball.net
1184              
1185             If the bug involves quotes being curled the wrong way, please send example
1186             text to illustrate.
1187              
1188              
1189             =head1 See Also
1190              
1191             This plug-in effectively obsoletes the technique documented here:
1192              
1193             http://daringfireball.net/2002/08/movable_type_smart_quote_devilry.html
1194              
1195             However, the above instructions may still be of interest if for some
1196             reason you are still running an older version of Movable Type.
1197              
1198              
1199             =head1 Version History
1200              
1201             1.0: Wed Nov 13, 2002
1202              
1203             Initial release.
1204              
1205              
1206             1.1: Wed Feb 5, 2003
1207              
1208             + The smart_dashes template attribute now offers an option to
1209             use "--" for *en* dashes, and "---" for *em* dashes.
1210              
1211             + The default smart_dashes behavior now simply translates "--"
1212             (dash dash) into an em-dash. Previously, it would look for
1213             " -- " (space dash dash space), which was dumb, since many
1214             people do not use spaces around their em dashes.
1215              
1216             + Using the smarty_pants attribute with a value of "2" will
1217             do the same thing as smarty_pants="1", with one difference:
1218             it will use the new shortcuts for en- and em-dashes.
1219              
1220             + Closing quotes (single and double) were incorrectly curled in
1221             situations like this:
1222             "<a>foo</a>",
1223             where the comma could be just about any punctuation character.
1224             Fixed.
1225              
1226             + Added <kbd> to the list of tags in which text shouldn't be
1227             educated.
1228              
1229              
1230             1.2: Thu Feb 27, 2003
1231              
1232             + SmartyPants is now a combination plug-in, supporting both
1233             Movable Type (2.5 or later) and Blosxom (2.0 or later).
1234             It also works as a BBEdit text filter and standalone
1235             command-line Perl program. Thanks to Rael Dornfest for the
1236             initial Blosxom port (and for the excellent Blosxom plug-in
1237             API).
1238              
1239             + SmartyPants now accepts the following backslash escapes,
1240             to force non-smart punctuation. It does so by transforming
1241             the escape sequence into a decimal-encoded HTML entity:
1242              
1243             Escape Value Character
1244             ------ ----- ---------
1245             \\ &#92; \
1246             \" &#34; "
1247             \' &#39; '
1248             \. &#46; .
1249             \- &#45; -
1250             \` &#96; `
1251              
1252             Note that this could produce different results than previous
1253             versions of SmartyPants, if for some reason you have an article
1254             containing one or more of these sequences. (Thanks to Charles
1255             Wiltgen for the suggestion.)
1256              
1257             + Added a new option to support inverted en- and em-dash notation:
1258             "--" for em-dashes, "---" for en-dashes. This is compatible with
1259             SmartyPants' original "--" syntax for em-dashes, but also allows
1260             you to specify en-dashes. It can be invoked by using
1261             smart_dashes="3", smarty_pants="3", or smarty_pants="i".
1262             (Suggested by Aaron Swartz.)
1263              
1264             + Added a new option to automatically convert &quot; entities into
1265             regular double-quotes before sending text to EducateQuotes() for
1266             processing. This is mainly for the benefit of people who write
1267             posts using Dreamweaver, which substitutes this entity for any
1268             literal quote char. The one and only way to invoke this option
1269             is to use the letter shortcuts for the smarty_pants attribute;
1270             the shortcut for this option is "w" (for Dream_w_eaver).
1271             (Suggested by Jonathon Delacour.)
1272              
1273             + Added <script> to the list of tags in which SmartyPants doesn't
1274             touch the contents.
1275              
1276             + Fixed a very subtle bug that would occur if a quote was the very
1277             last character in a body of text, preceded immediately by a tag.
1278             Lacking any context, previous versions of SmartyPants would turn
1279             this into an opening quote mark. It's now correctly turned into
1280             a closing one.
1281              
1282             + Opening quotes were being curled the wrong way when the
1283             subsequent character was punctuation. E.g.: "a '.foo' file".
1284             Fixed.
1285              
1286             + New MT global template tag: <$MTSmartyPantsVersion$>
1287             Prints the version number of SmartyPants, e.g. "1.2".
1288              
1289              
1290             1.2.1: Mon Mar 10, 2003
1291              
1292             + New "stupefy mode" for smarty_pants attribute. If you set
1293              
1294             smarty_pants="-1"
1295              
1296             SmartyPants will perform reverse transformations, turning HTML
1297             entities into plain ASCII equivalents. E.g. "&#8220;" is turned
1298             into a simple double-quote ("), "&#8212;" is turned into two
1299             dashes, etc. This is useful if you are using SmartyPants from Brad
1300             Choate's MT-Textile text filter, but wish to suppress smart
1301             punctuation in specific MT templates, such as RSS feeds. Text
1302             filters do their work before templates are processed; but you can
1303             use smarty_pants="-1" to reverse the transformations in specific
1304             templates.
1305              
1306             + Replaced the POSIX-style regex character class [:punct:] with an
1307             ugly hard-coded normal character class of all punctuation; POSIX
1308             classes require Perl 5.6 or later, but SmartyPants still supports
1309             back to 5.005.
1310              
1311             + Several small changes to allow SmartyPants to work when Blosxom
1312             is running in static mode.
1313              
1314              
1315             1.2.2: Thu Mar 13, 2003
1316              
1317             + 1.2.1 contained a boneheaded addition which prevented SmartyPants
1318             from compiling under Perl 5.005. This has been remedied, and is
1319             the only change from 1.2.1.
1320              
1321              
1322             1.3: Tue 13 May 2003
1323              
1324             + Plugged the biggest hole in SmartyPants's smart quotes algorithm.
1325             Previous versions were hopelessly confused by single-character
1326             quote tokens, such as:
1327              
1328             <p>"<i>Tricky!</i>"</p>
1329              
1330             The problem was that the EducateQuotes() function works on each
1331             token separately, with no means of getting surrounding context
1332             from the previous or next tokens. The solution is to curl these
1333             single-character quote tokens as a special case, *before* calling
1334             EducateQuotes().
1335              
1336             + New single-quotes backtick mode for smarty_pants attribute.
1337             The only way to turn it on is to include "B" in the configuration
1338             string, e.g. to translate backtick quotes, dashes, and ellipses:
1339              
1340             smarty_pants="Bde"
1341              
1342             + Fixed a bug where an opening quote would get curled the wrong way
1343             if the quote started with three dots, e.g.:
1344              
1345             <p>"...meanwhile"</p>
1346              
1347             + Fixed a bug where opening quotes would get curled the wrong way
1348             if there were double sets of quotes within each other, e.g.:
1349              
1350             <p>"'Some' people."</p>
1351              
1352             + Due to popular demand, four consecutive dots (....) will now be
1353             turned into an ellipsis followed by a period. Previous versions
1354             would turn this into a period followed by an ellipsis. If you
1355             really want a period-then-ellipsis sequence, escape the first
1356             period with a backslash: \....
1357              
1358             + Removed "&" from our home-grown punctuation class, since it
1359             denotes an entity, not a literal ampersand punctuation
1360             character. This fixes a bug where SmartyPants would mis-curl
1361             the opening quote in something like this:
1362              
1363             "&#8230;whatever"
1364              
1365             + SmartyPants has always had a special case where it looks for
1366             "'s" in situations like this:
1367              
1368             <i>Custer</i>'s Last Stand
1369              
1370             This special case is now case-insensitive.
1371              
1372              
1373             =head1 Author
1374              
1375             John Gruber
1376             http://daringfireball.net
1377              
1378              
1379             =head1 Additional Credits
1380              
1381             Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in.
1382             Brad Choate also contributed a few bits of source code to this plug-in.
1383             Brad Choate is a fine hacker indeed. (http://bradchoate.com/)
1384              
1385             Jeremy Hedley (http://antipixel.com/) and Charles Wiltgen
1386             (http://playbacktime.com/) deserve mention for exemplary beta testing.
1387              
1388             Rael Dornfest (http://raelity.org/) ported SmartyPants to Blosxom.
1389              
1390              
1391             =head1 Copyright and License
1392              
1393             Copyright (c) 2003 John Gruber
1394             (http://daringfireball.net/)
1395             All rights reserved.
1396              
1397             Redistribution and use in source and binary forms, with or without
1398             modification, are permitted provided that the following conditions are met:
1399              
1400             * Redistributions of source code must retain the above copyright
1401             notice, this list of conditions and the following disclaimer.
1402              
1403             * Redistributions in binary form must reproduce the above copyright
1404             notice, this list of conditions and the following disclaimer in the
1405             documentation and/or other materials provided with the distribution.
1406              
1407             * Neither the name "SmartyPants" nor the names of its contributors may
1408             be used to endorse or promote products derived from this software
1409             without specific prior written permission.
1410              
1411             This software is provided by the copyright holders and contributors "as is"
1412             and any express or implied warranties, including, but not limited to, the
1413             implied warranties of merchantability and fitness for a particular purpose
1414             are disclaimed. In no event shall the copyright owner or contributors be
1415             liable for any direct, indirect, incidental, special, exemplary, or
1416             consequential damages (including, but not limited to, procurement of
1417             substitute goods or services; loss of use, data, or profits; or business
1418             interruption) however caused and on any theory of liability, whether in
1419             contract, strict liability, or tort (including negligence or otherwise)
1420             arising in any way out of the use of this software, even if advised of the
1421             possibility of such damage.
1422              
1423             =cut