| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::Greple::charcode; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
240846
|
use 5.024; |
|
|
1
|
|
|
|
|
4
|
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
52
|
|
|
5
|
1
|
|
|
1
|
|
486
|
use utf8; |
|
|
1
|
|
|
|
|
230
|
|
|
|
1
|
|
|
|
|
4
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = "0.9909"; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=encoding utf-8 |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
App::Greple::charcode - greple module to annotate unicode character data |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=for html |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
greple -Mcharcode [ module option -- ] [ command option ] ... |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
COMMAND OPTION |
|
24
|
|
|
|
|
|
|
--no-annotate do not print annotation |
|
25
|
|
|
|
|
|
|
--[no-]align align annotations |
|
26
|
|
|
|
|
|
|
--align-all align to the same column for all lines |
|
27
|
|
|
|
|
|
|
--align-side align to the longest line |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
PATTERNS |
|
30
|
|
|
|
|
|
|
--composite find composite character (combining character sequence) |
|
31
|
|
|
|
|
|
|
--precomposed find precomposed character |
|
32
|
|
|
|
|
|
|
--combined find both composite and precomposed characters |
|
33
|
|
|
|
|
|
|
--outstand find --combined and non-ASCII characters |
|
34
|
|
|
|
|
|
|
--dt=type specify decomposition type |
|
35
|
|
|
|
|
|
|
--surrogate find character in UTF-16 surrogate pair range |
|
36
|
|
|
|
|
|
|
--outstand find non-ASCII combining characters |
|
37
|
|
|
|
|
|
|
-p/-P prop find \p{prop} or \P{prop} characters |
|
38
|
|
|
|
|
|
|
--ansicode find ANSI terminal control sequences |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
MODULE OPTION |
|
41
|
|
|
|
|
|
|
--column[=#] display column number |
|
42
|
|
|
|
|
|
|
--visible[=#] display character name |
|
43
|
|
|
|
|
|
|
--char[=#] display character itself |
|
44
|
|
|
|
|
|
|
--width[=#] display width |
|
45
|
|
|
|
|
|
|
--utf8[=#] display UTF-8 encoding |
|
46
|
|
|
|
|
|
|
--utf16[=#] display UTF-16 encoding |
|
47
|
|
|
|
|
|
|
--code[=#] display Unicode code point |
|
48
|
|
|
|
|
|
|
--name[=#] display character name |
|
49
|
|
|
|
|
|
|
--nfd[=#] display Unicode Normalization Form D |
|
50
|
|
|
|
|
|
|
--nfc[=#] display Unicode Normalization Form C |
|
51
|
|
|
|
|
|
|
--nfkd[=#] display Unicode Normalization Form KD |
|
52
|
|
|
|
|
|
|
--nfkc[=#] display Unicode Normalization Form KC |
|
53
|
|
|
|
|
|
|
--split[=#] put annotattion for each character |
|
54
|
|
|
|
|
|
|
--alignto[=#] align annotation to # |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
--config KEY[=VALUE],... |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
greple -Mcc [ module option -- ] [ command option ] ... |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
-Mcc alias module for -Mcharcode |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 VERSION |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Version 0.9909 |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Greple module C<-Mcharcode> (or C<-Mcc> for short) displays |
|
70
|
|
|
|
|
|
|
information about the matched characters. It can visualize Unicode |
|
71
|
|
|
|
|
|
|
zero-width combining or hidden characters, which can be useful for |
|
72
|
|
|
|
|
|
|
examining text containing visually indistinguishable or imperceptible |
|
73
|
|
|
|
|
|
|
elements. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The following output, retrieved from this document for non-ASCII |
|
76
|
|
|
|
|
|
|
characters (C<\P{ASCII}>), shows that the character C<\N{VARIATION |
|
77
|
|
|
|
|
|
|
SELECTOR-15}> is included after the copyright character. The same |
|
78
|
|
|
|
|
|
|
character, presumably left over from editing, is also included after a |
|
79
|
|
|
|
|
|
|
normal ASCII C character. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
$ greple -Mcharcode '\P{ASCII}' charcode.pm |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
┌─── 12 \x{fe0e} \N{VARIATION SELECTOR-15} |
|
84
|
|
|
|
|
|
|
│ ┌─ 14 \x{a9} \N{COPYRIGHT SIGN} |
|
85
|
|
|
|
|
|
|
│ ├─ 14 \x{fe0e} \N{VARIATION SELECTOR-15} |
|
86
|
|
|
|
|
|
|
Copyright︎ ©︎ 2025 Kazumasa Utashiro. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
The nasal sound of the K line (カ行) in Japanese is sometimes |
|
89
|
|
|
|
|
|
|
represented by adding a semivoiced dot to the K line character, and |
|
90
|
|
|
|
|
|
|
since Unicode does not define a corresponding character, it is |
|
91
|
|
|
|
|
|
|
represented by combining the original character with a combining |
|
92
|
|
|
|
|
|
|
character. This module allows you to see how it is done. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
┌───────── 0 \x{30ab} \N{KATAKANA LETTER KA} |
|
95
|
|
|
|
|
|
|
├───────── 0 \x{309a} \N{COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK} |
|
96
|
|
|
|
|
|
|
│ ┌─────── 2 \x{30ad} \N{KATAKANA LETTER KI} |
|
97
|
|
|
|
|
|
|
│ ├─────── 2 \x{309a} \N{COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK} |
|
98
|
|
|
|
|
|
|
│ │ ┌───── 4 \x{30af} \N{KATAKANA LETTER KU} |
|
99
|
|
|
|
|
|
|
│ │ ├───── 4 \x{309a} \N{COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK} |
|
100
|
|
|
|
|
|
|
│ │ │ ┌─── 6 \x{30b1} \N{KATAKANA LETTER KE} |
|
101
|
|
|
|
|
|
|
│ │ │ ├─── 6 \x{309a} \N{COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK} |
|
102
|
|
|
|
|
|
|
│ │ │ │ ┌─ 8 \x{30b3} \N{KATAKANA LETTER KO} |
|
103
|
|
|
|
|
|
|
│ │ │ │ ├─ 8 \x{309a} \N{COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK} |
|
104
|
|
|
|
|
|
|
カ゚キ゚ク゚ケ゚コ゚ |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=for html |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 COMMAND OPTIONS |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=over 7 |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item B<--annotate>, B<--no-annotate> |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Print annotation or not. Enabled by default, so use C<--no-annotate> |
|
117
|
|
|
|
|
|
|
to disable it. |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=item B<-->[B]B |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Align annotation or not. |
|
122
|
|
|
|
|
|
|
Default true. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item B<--align-all> |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Align to the same column for all lines |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item B<--align-side> |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Align to the longest line length, regardless of match position. |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=back |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 PATTERN OPTIONS |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
If multiple patterns are given to B, it normally prints only |
|
137
|
|
|
|
|
|
|
the lines that match all of the patterns. However, for the purposes |
|
138
|
|
|
|
|
|
|
of this module, it is desirable to display lines that match any of |
|
139
|
|
|
|
|
|
|
them, so the C<--need=1> option is specified by default. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
If multiple patterns are specified, the strings matching each pattern |
|
142
|
|
|
|
|
|
|
will be displayed in a different color. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=over 7 |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item B<--composite> |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Search for composite characters (combining character sequence) |
|
149
|
|
|
|
|
|
|
composed of base and combining characters. |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item B<--precomposed> |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Search for precomposed characters (C<\p{Dt=Canonical}>). |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item B<--combined> |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Find both B and B characters. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item B<--dt>=I, B<--decomposition-type>=I |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Specifies the C. It can take three values: |
|
162
|
|
|
|
|
|
|
C, C (C), or C. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item B<--outstand> |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Matches outstanding characters, those are non-ASCII combining |
|
167
|
|
|
|
|
|
|
characters. |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item B<--surrogate> |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Matches to characters in UTF-16 surragate pair range (U+10000 to |
|
172
|
|
|
|
|
|
|
U+10FFFF). |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item B<-p> I, B<-P> I |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Short cut for C<-E '\p{prop}'> and C<-E '\P{prop}'>. |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
You will not be able to use greple's C<-p> option, but it probably |
|
179
|
|
|
|
|
|
|
won't be a problem. If you must use it, use C<--pargraph>. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item B<--ansicode> |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Search ANSI terminal control sequence. Automatically disables C |
|
184
|
|
|
|
|
|
|
and C parameter and activates C. Colorized output is |
|
185
|
|
|
|
|
|
|
disabled too. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
To be precise, it searches for CSI Control sequences defined in |
|
188
|
|
|
|
|
|
|
ECMA-48. Pattern is defined as this. |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
(?x) |
|
191
|
|
|
|
|
|
|
# see ECMA-48 5.4 Control sequences |
|
192
|
|
|
|
|
|
|
(?: \e\[ | \x9b ) # csi |
|
193
|
|
|
|
|
|
|
[\x30-\x3f]* # parameter bytes |
|
194
|
|
|
|
|
|
|
[\x20-\x2f]* # intermediate bytes |
|
195
|
|
|
|
|
|
|
[\x40-\x7e] # final byte |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=for html |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=back |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head1 MODULE OPTIONS and PARAMS |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Module-specific options are specified between C<-Mcharcode> and C<-->. |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
greple -Mcharcode --config width,name=0 -- ... |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
Parameters can be set in two ways, one using the C<--config> option |
|
210
|
|
|
|
|
|
|
and the other using dedicated options. See the L |
|
211
|
|
|
|
|
|
|
section for more information. |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=over 7 |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item B<--config>=I |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Set configuration parameters. |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item B |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=item B<--column>[=I<#>] |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Show column number. |
|
224
|
|
|
|
|
|
|
Default C<1>. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item B |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=item B<--visible>[=I<#>] |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Display invisible characters in a visible string representation. |
|
231
|
|
|
|
|
|
|
Default C<0>. |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item B |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item B<--char>[=I<#>] |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Show the character itself. |
|
238
|
|
|
|
|
|
|
Default C<0>. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=item B |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item B<--width>[=I<#>] |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
Show the width. |
|
245
|
|
|
|
|
|
|
Default C<0>. |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=item B |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item B<--utf8>[=I<#>] |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Show the UTF-8 encoding in hex. |
|
252
|
|
|
|
|
|
|
Default C<0>. |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item B |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=item B<--utf16>[=I<#>] |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Show the UTF-16 encoding in hex. |
|
259
|
|
|
|
|
|
|
Default C<0>. |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=item B |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=item B<--code>[=I<#>] |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Show the character code point in hex. |
|
266
|
|
|
|
|
|
|
Default C<1>. |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item B, B, B, B |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item B<--nfd>[=I<#>], B<--nfc>[=I<#>], B<--nfkd>[=I<#>], B<--nfkc>[=I<#>] |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Show the Unicode Normalization Form D, C, KD and KC. |
|
273
|
|
|
|
|
|
|
See L. |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item B |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=item B<--name>[=I<#>] |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
Show the Unicode name of the character. |
|
280
|
|
|
|
|
|
|
Default C<1>. |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item B |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item B<--split>[=I<#>] |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
If a pattern matching multiple characters is given, annotate each |
|
287
|
|
|
|
|
|
|
character independently. |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item B=I |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=item B<--alignto>=I |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Align annotation messages. Defaults to C<1>, which aligns to the |
|
294
|
|
|
|
|
|
|
rightmost column; C<0> means no align; if a value of C<2> or greater |
|
295
|
|
|
|
|
|
|
is given, it aligns to that numbered column. |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
I can be negative; if C<-1> is specified, align to the same |
|
298
|
|
|
|
|
|
|
column for all lines. If C<-2> is specified, align to the longest |
|
299
|
|
|
|
|
|
|
line length, regardless of match position. |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=back |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Configuration parameters can be set in several ways. |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head2 MODULE START FUNCTION |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
The start function of a module can be specified at the same time as |
|
310
|
|
|
|
|
|
|
the module declaration. |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
greple -Mcharcode::config(alignto=0) |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
greple -Mcharcode::config=alignto=80 |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=head2 PRIVATE MODULE OPTION |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
Module-specific options are specified between C<-Mcharcode> and C<-->. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
greple -Mcharcode --config alignto=80 -- ... |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
greple -Mcharcode --alignto=80 -- ... |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=head2 GENERIC MODULE OPTION |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Module-specific C<---config> option can be called by normal command |
|
327
|
|
|
|
|
|
|
line option C<--charcode::config>. |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
greple -Mcharcode --charcode::config alignto=80 ... |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head1 EXAMPLES |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head2 HOMOGLYPH |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
greple -Mcc -P ASCII --align-side --cm=S t/homoglyph |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=for html |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=head2 BOX DRAWINGS |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
perldoc -m App::ansicolumn::Border | greple -Mcc --code -- --outstand --mc=10, |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=for html |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=head2 AYNU ITAK |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
greple -Mcc --outstand --split t/ainu.txt |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=for html |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head1 INSTALL |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
cpanm -n App::Greple::charcode |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
L |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
L |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
L |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 LICENSE |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Copyright︎ ©︎ 2025 Kazumasa Utashiro. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
374
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=head1 AUTHOR |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
Kazumasa Utashiro |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=cut |
|
381
|
|
|
|
|
|
|
|
|
382
|
1
|
|
|
1
|
|
101
|
use Exporter qw(import); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
56
|
|
|
383
|
|
|
|
|
|
|
our @EXPORT_OK = qw(config); |
|
384
|
|
|
|
|
|
|
our %EXPORT_TAGS = (alias => \@EXPORT_OK); |
|
385
|
|
|
|
|
|
|
|
|
386
|
1
|
|
|
1
|
|
4
|
use Encode (); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
12
|
|
|
387
|
1
|
|
|
1
|
|
499
|
use Getopt::EX::Config; |
|
|
1
|
|
|
|
|
23323
|
|
|
|
1
|
|
|
|
|
8
|
|
|
388
|
1
|
|
|
1
|
|
52
|
use Hash::Util qw(lock_keys); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
5
|
|
|
389
|
1
|
|
|
1
|
|
46
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
33
|
|
|
390
|
1
|
|
|
1
|
|
448
|
use Text::ANSI::Fold::Util qw(ansi_width); |
|
|
1
|
|
|
|
|
45002
|
|
|
|
1
|
|
|
|
|
60
|
|
|
391
|
|
|
|
|
|
|
|
|
392
|
1
|
|
|
1
|
|
488
|
use App::Greple::annotate; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
297
|
|
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
our $config = Getopt::EX::Config->new( |
|
395
|
|
|
|
|
|
|
column => 1, |
|
396
|
|
|
|
|
|
|
visible => 1, |
|
397
|
|
|
|
|
|
|
char => 0, |
|
398
|
|
|
|
|
|
|
width => 0, |
|
399
|
|
|
|
|
|
|
utf8 => 0, |
|
400
|
|
|
|
|
|
|
utf16 => 0, |
|
401
|
|
|
|
|
|
|
nfd => 0, |
|
402
|
|
|
|
|
|
|
nfc => 0, |
|
403
|
|
|
|
|
|
|
nfkd => 0, |
|
404
|
|
|
|
|
|
|
nfkc => 0, |
|
405
|
|
|
|
|
|
|
code => 0, |
|
406
|
|
|
|
|
|
|
name => 1, |
|
407
|
|
|
|
|
|
|
split => \$App::Greple::annotate::config->{split}, |
|
408
|
|
|
|
|
|
|
alignto => \$App::Greple::annotate::config->{alignto}, |
|
409
|
|
|
|
|
|
|
); |
|
410
|
|
|
|
|
|
|
lock_keys %{$config}; |
|
411
|
|
|
|
|
|
|
my %type = ( '*' => ':1' ); |
|
412
|
0
|
|
0
|
0
|
0
|
|
sub optspec { $_[0] . ( $type{$_[0]} // $type{'*'} // '' ) } |
|
|
|
|
0
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
our %CONFIG_TAGS = ( |
|
415
|
|
|
|
|
|
|
field => [ qw(column visible char width utf8 utf16 code name) ], |
|
416
|
|
|
|
|
|
|
); |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub finalize { |
|
419
|
0
|
|
|
0
|
0
|
|
our($mod, $argv) = @_; |
|
420
|
|
|
|
|
|
|
$config->deal_with( |
|
421
|
|
|
|
|
|
|
$argv, |
|
422
|
0
|
|
|
|
|
|
map(optspec($_), keys %{$config}), |
|
423
|
|
|
|
|
|
|
'all:1' => sub { |
|
424
|
0
|
|
|
0
|
|
|
for ($CONFIG_TAGS{field}->@*) { |
|
425
|
0
|
0
|
|
|
|
|
my $ref = ref $config->{$_} ? $config->{$_} : \$config->{$_}; |
|
426
|
0
|
|
|
|
|
|
$$ref = $_[1]; |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
}, |
|
429
|
0
|
|
|
|
|
|
); |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
|
|
432
|
1
|
|
|
1
|
|
1296
|
use Unicode::UCD qw(charinfo); |
|
|
1
|
|
|
|
|
20738
|
|
|
|
1
|
|
|
|
|
79
|
|
|
433
|
1
|
|
|
1
|
|
8
|
use Unicode::Normalize; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
1039
|
|
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub charname { |
|
436
|
0
|
0
|
|
0
|
0
|
|
local $_ = @_ ? shift : $_; |
|
437
|
0
|
|
|
|
|
|
s/(.)/name($1)/sger; |
|
|
0
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub name { |
|
441
|
0
|
|
|
0
|
1
|
|
my $char = shift; |
|
442
|
0
|
0
|
|
|
|
|
if (my $info = Unicode::UCD::charinfo(ord($char))) { |
|
443
|
0
|
|
|
|
|
|
"\\N{" . $info->{name} . "}"; |
|
444
|
|
|
|
|
|
|
} else { |
|
445
|
0
|
|
|
|
|
|
"[noinfo]"; |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
sub charcode { |
|
450
|
0
|
0
|
|
0
|
0
|
|
local *_ = @_ ? \$_[0] : \$_; |
|
451
|
0
|
|
|
|
|
|
s/(.)/code($1)/sger; |
|
|
0
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
0
|
|
|
0
|
1
|
|
sub utf8 { encode('UTF-8', @_) } |
|
455
|
0
|
|
|
0
|
1
|
|
sub utf16 { encode('UTF-16', @_) } |
|
456
|
|
|
|
|
|
|
sub encode { |
|
457
|
0
|
|
|
0
|
0
|
|
my $code = shift; |
|
458
|
0
|
0
|
|
|
|
|
local *_ = @_ ? \$_[0] : \$_; |
|
459
|
0
|
|
|
|
|
|
Encode::encode($code, $_) =~ s/(.)/code($1)/ger; |
|
|
0
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
} |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub normalize { |
|
463
|
0
|
|
|
0
|
0
|
|
my $sub = shift; |
|
464
|
0
|
0
|
|
|
|
|
local *_ = @_ ? \$_[0] : \$_; |
|
465
|
0
|
|
|
|
|
|
$sub->($_); |
|
466
|
|
|
|
|
|
|
} |
|
467
|
0
|
|
|
0
|
1
|
|
sub nfd { charcode normalize \&NFD => @_ } |
|
468
|
0
|
|
|
0
|
1
|
|
sub nfc { charcode normalize \&NFC => @_ } |
|
469
|
0
|
|
|
0
|
1
|
|
sub nfkd { charcode normalize \&NFKD => @_ } |
|
470
|
0
|
|
|
0
|
1
|
|
sub nfkc { charcode normalize \&NFKC => @_ } |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub code { |
|
473
|
0
|
|
|
0
|
1
|
|
state $format = [ qw(\x{%02x} \x{%04x}) ]; |
|
474
|
0
|
|
|
|
|
|
my $ord = ord $_[0]; |
|
475
|
0
|
|
|
|
|
|
sprintf $format->[$ord > 0xff], $ord; |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
my %cmap = ( |
|
479
|
|
|
|
|
|
|
"\t" => '\t', |
|
480
|
|
|
|
|
|
|
"\n" => '\n', |
|
481
|
|
|
|
|
|
|
"\r" => '\r', |
|
482
|
|
|
|
|
|
|
"\f" => '\f', |
|
483
|
|
|
|
|
|
|
"\b" => '\b', |
|
484
|
|
|
|
|
|
|
"\a" => '\a', |
|
485
|
|
|
|
|
|
|
"\e" => '\e', |
|
486
|
|
|
|
|
|
|
); |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
sub control { |
|
489
|
0
|
0
|
|
0
|
0
|
|
local $_ = @_ ? $_[0] : $_; |
|
490
|
0
|
0
|
|
|
|
|
if (s/\A([\t\n\r\f\b\a\e])/$cmap{$1}/e) { |
|
|
0
|
0
|
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
$_; |
|
492
|
0
|
|
|
|
|
|
} elsif (s/\A([\x00-\x1f])/sprintf "\\c%c", ord($1)+0x40/e) { |
|
493
|
0
|
|
|
|
|
|
$_; |
|
494
|
|
|
|
|
|
|
} else { |
|
495
|
0
|
|
|
|
|
|
code($_); |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
my $invisible_re = $ENV{INVISIBLE_RE} = qr/[^\pL\pN\pP\pS]/; |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub visible { |
|
502
|
0
|
0
|
|
0
|
1
|
|
local *_ = @_ ? \$_[0] : \$_; |
|
503
|
0
|
|
|
|
|
|
s{($invisible_re)}{control($1)}ger; |
|
|
0
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub width { |
|
507
|
0
|
0
|
|
0
|
1
|
|
local *_ = @_ ? \$_[0] : \$_; |
|
508
|
0
|
|
|
|
|
|
ansi_width($_); |
|
509
|
|
|
|
|
|
|
} |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub describe { |
|
512
|
0
|
|
|
0
|
0
|
|
(my $column, local $_) = { @_ }->@{ qw(column match) }; |
|
513
|
0
|
|
|
|
|
|
my @s; |
|
514
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' %3d ' , $column if $config->{column}; |
|
515
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' %s ' , visible if $config->{visible}; |
|
516
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' char="%s" ' , $_ if $config->{char}; |
|
517
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' w=%d ' , width if $config->{width}; |
|
518
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' utf8=%s ' , utf8 if $config->{utf8}; |
|
519
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' utf16=%s ' , utf16 if $config->{utf16}; |
|
520
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' nfd=%s ' , nfd if $config->{nfd}; |
|
521
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' nfc=%s ' , nfc if $config->{nfc}; |
|
522
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' nfkd=%s ' , nfkd if $config->{nfkd}; |
|
523
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' nfkc=%s ' , nfkc if $config->{nfkc}; |
|
524
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' code=%s ' , charcode if $config->{code}; |
|
525
|
0
|
0
|
|
|
|
|
push @s, sprintf qw' name=%s ' , charname if $config->{name}; |
|
526
|
1
|
|
|
1
|
|
7
|
join "\N{NBSP}", @s; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
8
|
|
|
|
0
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
$App::Greple::annotate::ANNOTATE = \&describe; |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
1; |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
__DATA__ |