File Coverage

blib/lib/App/Basis/ConvertText2/UtfTransform.pm
Criterion Covered Total %
statement 37 37 100.0
branch 6 6 100.0
condition 2 3 66.6
subroutine 10 10 100.0
pod 2 2 100.0
total 57 58 98.2


line stmt bran cond sub pod time code
1             # ABSTRACT: Convert ascii text into UTF8 to simulate text formatting
2              
3             =head1 NAME
4              
5             App::Basis::ConvertText2::UtfTransform
6              
7             =head1 SYNOPSIS
8              
9             use 5.10.0 ;
10             use strict ;
11             use warnings ;
12             use App::Basis::ConvertText2::UtfTransform
13              
14             my $string = "bold text
15             italic text
16             flipped upside down text and reversed
17             Some Leet speak
18             text in bubbles
19             script text
20             are you leet" ;
21              
22             say utf_transform( $string) ;
23              
24             my $smile = ":beer: is food! :) I <3 :cake: ;)" ;
25              
26             say uttf_smilies( $smile ) ;
27              
28             =head1 DESCRIPTION
29              
30             A number of popular websites (eg twitter) do not allow the use of HTML to create
31             bold/italic font effects or perform smily transformations
32              
33             However we can simulate this with some clever transformations of plain ascii text
34             into UTF8 codes which are a different font and so effectively create the same effect.
35              
36             We have transformations for flip (reverses the string and flips upside down,
37             bold, italic, bubbles and leet.
38              
39             We can transform A-Z a-z 0-9 and ? ! ,
40              
41             I have only implemented a small set of smilies, ones that I am likely to use
42              
43             =head1 Note
44              
45             You cannot embed one format within another, so you cannot have bold script, or
46             bold italic.
47              
48             =head1 See Also
49              
50             L
51              
52             =head1 Functions
53              
54             =over 4
55              
56             =cut
57              
58             package App::Basis::ConvertText2::UtfTransform;
59             $App::Basis::ConvertText2::UtfTransform::VERSION = '0.4.0';
60 1     1   29408 use 5.014;
  1         4  
  1         35  
61 1     1   5 use warnings;
  1         1  
  1         29  
62 1     1   5 use strict;
  1         14  
  1         31  
63 1     1   998 use Acme::LeetSpeak;
  1         1744  
  1         76  
64 1     1   1382 use Text::Emoticon;
  1         3616  
  1         11  
65 1     1   28 use Exporter;
  1         2  
  1         34  
66 1     1   5 use vars qw( @EXPORT @ISA);
  1         1  
  1         2154  
67              
68             @ISA = qw(Exporter);
69              
70             # this is the list of things that will get imported into the loading packages
71             # namespace
72             @EXPORT = qw(
73             utf_transform
74             utf_smilies
75             );
76              
77             # ----------------------------------------------------------------------------
78              
79             # UTF8 codes to transform normal ascii to different UTF8 codes
80             # to perform text effects that can be used on websites that allow UTF8 but
81             # do not allow HTML codes
82              
83             # ----------------------------------------------------------------------------
84              
85             my %flip = (
86             "a" => "\x{0250}",
87             "b" => "q",
88             "c" => "\x{0254}",
89             "d" => "p",
90             "e" => "\x{01DD}",
91             "f" => "\x{025F}",
92             "g" => "\x{0183}",
93             "h" => "\x{0265}",
94             "i" => "\x{0131}",
95             "j" => "\x{027E}",
96             "k" => "\x{029E}",
97             "l" => "\x{0283}",
98             "m" => "\x{026F}",
99             "n" => "u",
100             "o" => "o",
101             "p" => "d",
102             "q" => "q",
103             "r" => "\x{0279}",
104             "s" => "s",
105             "t" => "\x{0287}",
106             "u" => "n",
107             "v" => "\x{028C}",
108             "w" => "\x{028D}",
109             "x" => "x",
110             "y" => "\x{028E}",
111             "z" => "z",
112             "0" => "0",
113             "1" => "1",
114             "2" => "2",
115             "3" => "3",
116             "4" => "4",
117             "5" => "5",
118             "6" => "6",
119             "7" => "7",
120             "8" => "8",
121             "9" => "9",
122             "?" => "\x{00BF}",
123             "!" => "\x{00A1}",
124             "," => ",",
125             );
126              
127             my %bold = (
128             "A" => "\x{1D400}",
129             "B" => "\x{1D401}",
130             "C" => "\x{1D402}",
131             "D" => "\x{1D403}",
132             "E" => "\x{1D404}",
133             "F" => "\x{1D405}",
134             "G" => "\x{1D406}",
135             "H" => "\x{1D407}",
136             "I" => "\x{1D408}",
137             "J" => "\x{1D409}",
138             "K" => "\x{1D40A}",
139             "L" => "\x{1D40B}",
140             "M" => "\x{1D40C}",
141             "N" => "\x{1D40D}",
142             "O" => "\x{1D40E}",
143             "P" => "\x{1D40F}",
144             "Q" => "\x{1D410}",
145             "R" => "\x{1D411}",
146             "S" => "\x{1D412}",
147             "T" => "\x{1D413}",
148             "U" => "\x{1D414}",
149             "V" => "\x{1D415}",
150             "W" => "\x{1D416}",
151             "X" => "\x{1D417}",
152             "Y" => "\x{1D418}",
153             "Z" => "\x{1D419}",
154             "a" => "\x{1D41A}",
155             "b" => "\x{1D41B}",
156             "c" => "\x{1D41C}",
157             "d" => "\x{1D41D}",
158             "e" => "\x{1D41E}",
159             "f" => "\x{1D41F}",
160             "g" => "\x{1D420}",
161             "h" => "\x{1D421}",
162             "i" => "\x{1D422}",
163             "j" => "\x{1D423}",
164             "k" => "\x{1D424}",
165             "l" => "\x{1D425}",
166             "m" => "\x{1D426}",
167             "n" => "\x{1D427}",
168             "o" => "\x{1D428}",
169             "p" => "\x{1D429}",
170             "q" => "\x{1D42A}",
171             "r" => "\x{1D42B}",
172             "s" => "\x{1D42C}",
173             "t" => "\x{1D42D}",
174             "u" => "\x{1D42E}",
175             "v" => "\x{1D42F}",
176             "w" => "\x{1D430}",
177             "x" => "\x{1D431}",
178             "y" => "\x{1D432}",
179             "z" => "\x{1D433}",
180             "0" => "\x{1D7CE}",
181             "1" => "\x{1D7CF}",
182             "2" => "\x{1D7D0}",
183             "3" => "\x{1D7D1}",
184             "4" => "\x{1D7D2}",
185             "5" => "\x{1D7D3}",
186             "6" => "\x{1D7D4}",
187             "7" => "\x{1D7D5}",
188             "8" => "\x{1D7D6}",
189             "9" => "\x{1D7D7}",
190             "?" => "?",
191             "!" => "!",
192             "," => ",",
193             );
194              
195             my %italic = (
196             "A" => "\x{1D434}",
197             "B" => "\x{1D435}",
198             "C" => "\x{1D436}",
199             "D" => "\x{1D437}",
200             "E" => "\x{1D438}",
201             "F" => "\x{1D439}",
202             "G" => "\x{1D43A}",
203             "H" => "\x{1D43B}",
204             "I" => "\x{1D43C}",
205             "J" => "\x{1D43D}",
206             "K" => "\x{1D43E}",
207             "L" => "\x{1D43F}",
208             "M" => "\x{1D440}",
209             "N" => "\x{1D441}",
210             "O" => "\x{1D442}",
211             "P" => "\x{1D443}",
212             "Q" => "\x{1D444}",
213             "R" => "\x{1D445}",
214             "S" => "\x{1D446}",
215             "T" => "\x{1D447}",
216             "U" => "\x{1D448}",
217             "V" => "\x{1D449}",
218             "W" => "\x{1D44A}",
219             "X" => "\x{1D44B}",
220             "Y" => "\x{1D44C}",
221             "Z" => "\x{1D44D}",
222             "a" => "\x{1D622}",
223             "b" => "\x{1D623}",
224             "c" => "\x{1D624}",
225             "d" => "\x{1D625}",
226             "e" => "\x{1D626}",
227             "f" => "\x{1D627}",
228             "g" => "\x{1D628}",
229             "h" => "\x{1d629}",
230             "i" => "\x{1D62a}",
231             "j" => "\x{1D62b}",
232             "k" => "\x{1D62c}",
233             "l" => "\x{1D62d}",
234             "m" => "\x{1D62e}",
235             "n" => "\x{1D62f}",
236             "o" => "\x{1D630}",
237             "p" => "\x{1D631}",
238             "q" => "\x{1D632}",
239             "r" => "\x{1D633}",
240             "s" => "\x{1D634}",
241             "t" => "\x{1D635}",
242             "u" => "\x{1D636}",
243             "v" => "\x{1D637}",
244             "w" => "\x{1D638}",
245             "x" => "\x{1D639}",
246             "y" => "\x{1D63a}",
247             "z" => "\x{1D63b}",
248             "0" => "0",
249             "1" => "1",
250             "2" => "2",
251             "3" => "3",
252             "4" => "4",
253             "5" => "5",
254             "6" => "6",
255             "7" => "7",
256             "8" => "8",
257             "9" => "9",
258             "?" => "?",
259             "!" => "!",
260             "," => ",",
261             );
262              
263             # mathematical bold script capital and small
264             # http://www.fileformat.info/info/unicode/category/Lu/list.htm
265             # http://www.fileformat.info/info/unicode/category/Ll/list.htm
266              
267             my %script = (
268             "A" => "\x{1d4d0}",
269             "B" => "\x{1d4d1}",
270             "C" => "\x{1d4d2}",
271             "D" => "\x{1d4d3}",
272             "E" => "\x{1d4d4}",
273             "F" => "\x{1d4d5}",
274             "G" => "\x{1d4d6}",
275             "H" => "\x{1d4d7}",
276             "I" => "\x{1d4d8}",
277             "J" => "\x{1d4d9}",
278             "K" => "\x{1d4da}",
279             "L" => "\x{1d4db}",
280             "M" => "\x{1d4dc}",
281             "N" => "\x{1d4dd}",
282             "O" => "\x{1d4de}",
283             "P" => "\x{1d4df}",
284             "Q" => "\x{1d4e0}",
285             "R" => "\x{1d4e1}",
286             "S" => "\x{1d4e2}",
287             "T" => "\x{1D4e3}",
288             "U" => "\x{1D4e4}", ## special
289             "V" => "\x{1D4e5}",
290             "W" => "\x{1D4e6}",
291             "X" => "\x{1D4e7}",
292             "Y" => "\x{1D4e8}",
293             "Z" => "\x{1D4e9}",
294             "a" => "\x{1D4ea}",
295             "b" => "\x{1D4eb}",
296             "c" => "\x{1D4ec}",
297             "d" => "\x{1D4ed}",
298             "e" => "\x{1D4ee}",
299             "f" => "\x{1D4ef}",
300             "g" => "\x{1D4f0}",
301             "h" => "\x{1d4f1}",
302             "i" => "\x{1D4f2}",
303             "j" => "\x{1D4f3}",
304             "k" => "\x{1D4f4}",
305             "l" => "\x{1D4f5}",
306             "m" => "\x{1D4f6}",
307             "n" => "\x{1D4f7}",
308             "o" => "\x{1D4f8}",
309             "p" => "\x{1D4f9}",
310             "q" => "\x{1D4fa}",
311             "r" => "\x{1D4fb}",
312             "s" => "\x{1D4fc}",
313             "t" => "\x{1D4fd}",
314             "u" => "\x{1D4fe}",
315             "v" => "\x{1D4ff}",
316             "w" => "\x{1D500}",
317             "x" => "\x{1D501}",
318             "y" => "\x{1D502}",
319             "z" => "\x{1D503}",
320             "0" => "0",
321             "1" => "1",
322             "2" => "2",
323             "3" => "3",
324             "4" => "4",
325             "5" => "5",
326             "6" => "6",
327             "7" => "7",
328             "8" => "8",
329             "9" => "9",
330             "?" => "?",
331             "!" => "!",
332             "," => ",",
333             );
334              
335             my %bubbles = (
336             "A" => "\x{24B6}",
337             "B" => "\x{24B7}",
338             "C" => "\x{24B8}",
339             "D" => "\x{24B9}",
340             "E" => "\x{24BA}",
341             "F" => "\x{24BB}",
342             "G" => "\x{24BC}",
343             "H" => "\x{24BD}",
344             "I" => "\x{24BE}",
345             "J" => "\x{24BF}",
346             "K" => "\x{24C0}",
347             "L" => "\x{24C1}",
348             "M" => "\x{24C2}",
349             "N" => "\x{24C3}",
350             "O" => "\x{24C4}",
351             "P" => "\x{24C5}",
352             "Q" => "\x{24C6}",
353             "R" => "\x{24C7}",
354             "S" => "\x{24C8}",
355             "T" => "\x{24C9}",
356             "U" => "\x{24CA}",
357             "V" => "\x{24CB}",
358             "W" => "\x{24CC}",
359             "X" => "\x{24CD}",
360             "Y" => "\x{24CE}",
361             "Z" => "\x{24CF}",
362             "a" => "\x{24D0}",
363             "b" => "\x{24D1}",
364             "c" => "\x{24D2}",
365             "d" => "\x{24D3}",
366             "e" => "\x{24D4}",
367             "f" => "\x{24D5}",
368             "g" => "\x{24D6}",
369             "h" => "\x{24D7}",
370             "i" => "\x{24D8}",
371             "j" => "\x{24D9}",
372             "k" => "\x{24DA}",
373             "l" => "\x{24DB}",
374             "m" => "\x{24DC}",
375             "n" => "\x{24DD}",
376             "o" => "\x{24DE}",
377             "p" => "\x{24DF}",
378             "q" => "\x{24E0}",
379             "r" => "\x{24E1}",
380             "s" => "\x{24E2}",
381             "t" => "\x{24E3}",
382             "u" => "\x{24E4}",
383             "v" => "\x{24E5}",
384             "w" => "\x{24E6}",
385             "x" => "\x{24E7}",
386             "y" => "\x{24E8}",
387             "z" => "\x{24E9}",
388             "0" => "\x{24EA}",
389             "1" => "\x{2460}",
390             "2" => "\x{2461}",
391             "3" => "\x{2462}",
392             "4" => "\x{2463}",
393             "5" => "\x{2464}",
394             "6" => "\x{2465}",
395             "7" => "\x{2466}",
396             "8" => "\x{2467}",
397             "9" => "\x{2468}",
398             "?" => "?",
399             "!" => "!",
400             "," => ",",
401             );
402              
403             # http://www.fileformat.info/info/unicode/category/So/list.htm
404             my %smilies = (
405             '<3' => "\x{2665}", #heart
406             ':heart:' => "\x{2665}", #heart
407             ':)' => "\x{1f600}", #smile
408             ':D' => "\x{1f625}", #grin
409             '8-)' => "\x{1f60e}", #cool
410             ':P' => "\x{1f61b}", #pull tounge
411             ":'(" => "\x{1f62c}", #cry
412             ':(' => "\x{2639}", #sad
413             ";)" => "\x{1f609}", #wink
414             ":sleep:" => "\x{1f634}", #sleep
415             ":halo:" => "\x{1f607}", #halo
416             ":devil:" => "\x{1f608}", #devil
417             ":horns:" => "\x{1f608}", #devil
418             "(c)" => "\x{00a9}", # copyright
419             "(r)" => "\x{00ae}", # registered
420             "(tm)" => "\x{0099}", # trademark
421             ":email:" => "\x{2709}", # email
422             ":yes:" => "\x{2713}", # tick
423             ":no:" => "\x{2715}", # cross
424             ":beer:" => "\x{1F37A}", # beer
425             ":wine:" => "\x{1f377}", # wine
426             ":wine_glass:" => "\x{1f377}", # wine
427             ":cake:" => "\x{1f382}", # cake
428             ":star:" => "\x{2606}", # star
429             ":ok:" => "\x{1f44d}", # ok = thumbsup
430             ":yes:" => "\x{1f44d}", # yes = thumbsup
431             ":thumbsup:" => "\x{1f44d}", # thumbsdown
432             ":thumbsdown:" => "\x{1f44e}", # thumbsup
433             ":bad:" => "\x{1f44e}", # bad = thumbsdown
434             ":no:" => "\x{1f44e}", # no = thumbsdown
435             ":ghost:" => "\x{1f47b}", # ghost
436             ":skull:" => "\x{1f480}", # skull
437             ":time:" => "\x{231a}", # time, watch face
438             ":hourglass:" => "\x{231b}", # hourglass
439             );
440              
441             my $smiles = join( '|', map { quotemeta($_) } keys %smilies );
442              
443             my %code_map = (
444             f => \%flip,
445             b => \%bold,
446             i => \%italic,
447             o => \%bubbles,
448             s => \%script,
449             );
450              
451             # ----------------------------------------------------------------------------
452             # regexp replace function
453             sub _transform {
454 6     6   15 my ( $code, $string ) = @_;
455 6         6 my $transform = 1;
456              
457 6 100       17 if ( $code eq 'f' ) {
    100          
458              
459             # needs to be reversed and in lower case for flip
460 1         5 $string = reverse lc($string);
461             }
462             elsif ( $code eq 'l' ) {
463              
464             # leet
465 1         6 $string = leet($string);
466 1         331 $transform = 0;
467             }
468              
469 6 100 66     29 if ( $transform && $code_map{$code} ) {
470 5         87 $string =~ s/([A-ZA-z0-9?!,])/$code_map{$code}->{$1}/gsm;
471             }
472              
473 6         27 return $string;
474             }
475              
476             # ----------------------------------------------------------------------------
477              
478             =item utf_transform
479              
480             transform A-ZA-z0-9!?, into UTF8 forms suitable for websites that do not allow
481             HTML codes for these
482              
483             we use the following psuedo HTML elements
484              
485             flip text upside down and reversed
486             bold text
487             italic text
488             bubbles text
489             script text
490             leet text LeetSpeak
491              
492             B
493              
494             incoming string
495              
496             B
497              
498             transformed string
499              
500             =cut
501              
502             sub utf_transform {
503 1     1 1 10 my ($in) = @_;
504              
505             # transform for formatting
506 1         16 $in =~ s|<(\w)>(.*?)|_transform( $1, $2)|egsi;
  6         10  
507              
508 1         5 return $in;
509             }
510              
511             # ----------------------------------------------------------------------------
512              
513             =item utf_smilies
514              
515             transform some character strings into UTF smilies
516              
517             I have only implemented a small set of smilies, ones that I am likely to use
518              
519             | smilie | symbol |
520             |---------------------------+-------------|
521             | <3. :heart: | heart |
522             | :) | smile |
523             | :D | grin |
524             | 8-) | cool |
525             | :P | pull tongue |
526             | :( | cry |
527             | :( | sad |
528             | ;) | wink |
529             | :halo: | halo |
530             | :devil:, :horns: | devil horns |
531             | (c) | copyright |
532             | (r) | registered |
533             | (tm) | trademark |
534             | :email: | email |
535             | :yes: | tick |
536             | :no: | cross |
537             | :beer: | beer |
538             | :wine:, :wine_glass: | wine |
539             | :cake: | cake |
540             | :star: | star |
541             | :ok:, :thumbsup: | thumbsup |
542             | :bad:, :thumbsdown: | thumbsup |
543             | :ghost: | ghost |
544             | :skull: | skull |
545             | :hourglass: | hourglass |
546             | :time: | watch face |
547             | :sleep: | sleep |
548              
549             B
550              
551             incoming string
552              
553             B
554              
555             transformed string
556              
557             =cut
558              
559             sub utf_smilies {
560 1     1 1 1319 my ($in) = @_;
561              
562 1         169 $in =~ s/(?
563              
564 1         9 return $in;
565             }
566              
567             # ----------------------------------------------------------------------------
568              
569             =back
570              
571             =cut
572              
573             # ----------------------------------------------------------------------------
574             1;