File Coverage

blib/lib/UI/KeyboardLayout.pm
Criterion Covered Total %
statement 36 3926 0.9
branch 1 2700 0.0
condition 1 1827 0.0
subroutine 11 210 5.2
pod 0 173 0.0
total 49 8836 0.5


line stmt bran cond sub pod time code
1             package UI::KeyboardLayout;
2            
3             $VERSION = $VERSION = "0.71";
4            
5             binmode $DB::OUT, ':utf8' if $DB::OUT; # (older) Perls had "Wide char in Print" in debugger otherwise
6             binmode $DB::LINEINFO, ':utf8' if $DB::LINEINFO; # (older) Perls had "Wide char in Print" in debugger otherwise
7            
8 1     1   15391 use strict;
  1         2  
  1         29  
9 1     1   559 use utf8;
  1         8  
  1         4  
10 1   50 1   145 BEGIN { my $n = ($ENV{UI_KEYBOARDLAYOUT_DEBUG} || 0);
11 1 50       6 if ($n =~ /^0x/i) {
12 0         0 $n = hex $n;
13             } else {
14 1         2 $n += 0;
15             }
16 1         40 eval "sub debug() { $n }";
17             # 1 2 4 8 0x10 0x20
18 1         4 my @dbg = (qw( debug_face_layout_recipes debug_GUESS_MASSAGE debug_OPERATOR debug_import debug_stacking debug_noid ),
19             # 0x40 0x80 0x100 0x200 0x400 0x800 0x1000
20             qw(warnSORTEDLISTS printSORTEDLISTS warnSORTCOMPOSE warnDO_COMPOSE warnCACHECOMP dontCOMPOSE_CACHE warnUNRES),
21             # 0x2000 0x4000
22             qw(debug_STACKING printSkippedComposeKey),
23             '_debug_PERL_dollar1_scoping');
24 1         1 my $c = 0; # printSORTEDLISTS: Dumpvalue to STDOUT (implementation detail!)
25 1         8 my @dbg_b = map $n & (1<<$_), 0..31;
26 1         3 for (@dbg) {
27 16         585 eval "sub $_ () {$dbg_b[$c++]}";
28             }
29             }
30             sub debug_PERL_dollar1_scoping () { debug & 0x1000000 }
31            
32             my $ctrl_after = 1; # In "pairs of nonShift/Shift-columns" (1 simplifies output of BACK/ESCAPE/RETURN/CANCEL)
33             my $create_alpha_ctrl = 2;
34             my %start_SEC = (FKEYS => [96, 24, sub { my($self,$u,$v)=@_; 'F' . (1+$u-$v->[0]) }],
35             ARROWS => [128, 16,
36             sub { my($self,$u,$v)=@_;
37             (qw(HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD))[$u-$v->[0]]}],
38             NUMPAD => [144, 16,
39             sub { my($self,$u,$v)=@_;
40             ((map { ($_ > 10 ? 'F' : "NUMPAD") . $_} 7..9,14,4..6,15,1..3,16,0), 'DECIMAL')[$u-$v->[0]]}]);
41             my $maxEntityLen = 111; # Avoid overflow of prefix char above 0fff in kbdutool (but now can channel them to smaller values)
42             my $avoid_overlong_synonims_Entity = 20; # These two are currently disabled
43            
44 84     84 0 267 sub toU($) { substr+(qq(\x{fff}).shift),1 } # Some bullshit one must do to make perl's Unicode 8-bit-aware (!)
45            
46             #use subs qw(chr lc);
47 1     1   426 use subs qw(chr lc uc ucfirst);
  1         19  
  1         3  
48            
49             #BEGIN { *CORE::GLOGAL::chr = sub ($) { toU CORE::chr shift };
50             # *CORE::GLOGAL::lc = sub ($) { CORE::lc toU shift };
51             #}
52             ### Remove ß ẞ :
53             ## my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
54             my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ß ẞ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ℊ Ɡ ϳ Ϳ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
55             my %unfix = reverse %fix;
56            
57 84     84   86 sub chr($) { local $^W = 0; toU CORE::chr shift } # Avoid illegal character 0xfffe etc warnings...
  84         84  
58 0 0   0     sub lc($) { my $in = shift; $unfix{$in} || CORE::lc toU $in }
  0            
59 0 0   0     sub uc($) { my $in = shift; $fix{$in} || CORE::uc toU $in }
  0            
60 0 0   0     sub ucfirst($) { my $in = shift; $fix{$in} || CORE::ucfirst toU $in }
  0            
61            
62             # We use this for printing, not for reading (so we can use //o AFTER the UCD is read)
63 1     1   184 my $rxCombining = qr/\p{NonspacingMark}/; # The initial version matches what Perl knows
  1         2  
  1         12  
64             my $rxZW = qr/\p{Line_Break: ZW}|[\xAD\x{200b}-\x{200f}\x{2060}-\x{2064}\x{fe00}-\x{fe0f}]/;
65            
66 0     0 0   sub rxCombining { $rxCombining }
67            
68             =pod
69            
70             =encoding UTF-8
71            
72             =head1 NAME
73            
74             UI::KeyboardLayout - Module for designing keyboard layouts
75            
76             =head1 SYNOPSIS
77            
78             #!/usr/bin/perl -wC31
79             use UI::KeyboardLayout;
80             use strict;
81            
82             # Download from http://www.unicode.org/Public/UNIDATA/
83             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt");
84            
85             UI::KeyboardLayout::->set__value('ComposeFiles', # CygWin too
86             ['/usr/share/X11/locale/en_US.UTF-8/Compose']);
87             # http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h
88             UI::KeyboardLayout::->set__value('KeySyms',
89             ['/usr/share/X11/include/keysymdef.h']);
90             UI::KeyboardLayout::->set__value('EntityFiles',
91             ["$ENV{HOME}/Downloads/bycodes.html"]);
92             UI::KeyboardLayout::->set__value('rfc1345Files',
93             ["$ENV{HOME}/Downloads/rfc1345.html"]);
94            
95             my $i = do {local $/; open $in, '<', 'MultiUni.kbdd' or die; <$in>};
96             # Init from in-memory copy of the configfile
97             my $k = UI::KeyboardLayout:: -> new_from_configfile($i)
98             -> fill_win_template( 1, [qw(faces CyrillicPhonetic)] );
99             print $k;
100            
101             open my $f, '<', "$ENV{HOME}/Downloads/NamesList.txt" or die;
102             my $k = UI::KeyboardLayout::->new();
103             my ($d,$c,$names,$blocks,$extraComb,$uniVersion) = $k->parse_NameList($f);
104             close $f or die;
105             $k->print_decompositions($d);
106             $k->print_compositions ($c);
107            
108             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt",
109             "$ENV{HOME}/Downloads/DerivedAge.txt");
110             my $l = UI::KeyboardLayout::->new();
111             $l->print_compositions;
112             $l->print_decompositions;
113            
114             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList-6.1.0d8.txt",
115             "$ENV{HOME}/Downloads/DerivedAge-6.1.0d13.txt"));
116             my $l = UI::KeyboardLayout::->new_from_configfile('examples/EurKey++.kbdd');
117             for my $F (qw(US CyrillicPhonetic)) {
118             # Open file, select()
119             print $l->fill_win_template(1,[qw(faces US)]);
120             $l->print_coverage(q(US));
121             print $l->fill_osx_template([qw[faces US)]);
122             }
123            
124             perl -wC31 UI-KeyboardLayout\examples\grep_nameslist.pl "\b(ALPHA|BETA|GAMMA|DELTA|EPSILON|ZETA|ETA|THETA|IOTA|KAPPA|LAMDA|MU|NU|XI|OMICRON|PI|RHO|SIGMA|TAU|UPSILON|PHI|CHI|PSI|OMEGA)\b" ~/Downloads/NamesList.txt >out-greek
125            
126             =head1 AUTHORS
127            
128             Ilya Zakharevich, ilyaz@cpan.org
129            
130             =head1 DESCRIPTION
131            
132             In this section, a "keyboard" has a certain "character repertoir" (which characters may be
133             entered using this keyboard), and a mapping associating a character in the repertoir
134             to a keypress or to several (sequential or simultaneous) keypresses. A small enough keyboard
135             may have a pretty arbitrary mapping and remain useful (witness QUERTY
136             vs Dvorak vs Colemac). However, if a keyboard has a sufficiently large repertoir,
137             there must be a strong logic ("orthogonality") in this association - otherwise
138             the most part of the repertoir will not be useful (except for people who have an
139             extraordinary memory - and are ready to invest part of it into the keyboard).
140            
141             "Character repertoir" needs of different people vary enormously; observing
142             the people around me, I get a very narrow point of view. But it is the best
143             I can do; what I observe is that many of them would use 1000-2000 characters
144             if they had a simple way to enter them; and the needs of different people do
145             not match a lot. So to be helpful to different people, a keyboard should have
146             at least 2000-3000 different characters in the repertoir. (Some ballpark
147             comparisons: L
148             has about 2800 characters; L corresponds
149             to about 3600 Unicode characters.)
150            
151             To access these characters, how much structure one needs to carry in memory? One can
152             make a (trivial) estimate from below: on Windows, the standard US keyboard allows
153             entering 100 - or 104 - characters (94 ASCII keys, SPACE, ENTER, TAB - moreover, C-ENTER,
154             BACKSPACE and C-BACKSPACE also produce characters; so do C-[, C-] and C-\
155             C-Break in most layouts!). If one needs about 30 times more, one could do
156             with 5 different ways to "mogrify" a character; if these mogrifications
157             are "orthogonal", then there are 2^5 = 32 ways of combining them, and
158             one could access 32*104 = 3328 characters.
159            
160             Of course, the characters in a "reasonable repertoir" form a very amorphous
161             mass; there is no way to introduce a structure like that which is "natural"
162             (so there is a hope for "ordinary people" to keep it in memory). So the
163             complexity of these mogrification is not in their number, but in their
164             "nature". One may try to decrease this complexity by having very easy to
165             understand mogrifications - but then there is no hope in having 5 of them
166             - or 10, or 15, or 20.
167            
168             However, we B that many people I able to memorise the layout of
169             70 symbols on a keyboard. So would they be able to handle, for example, 30
170             different "natural" mogrifications? And how large a repertoir of characters
171             one would be able to access using these mogrifications?
172            
173             This module does not answer these questions directly, but it provides tools
174             for investigating them, and tools to construct the actually working keyboard
175             layouts based on these ideas. It consists of the following principal
176             components:
177            
178             =over 4
179            
180             =item Unicode table examiner
181            
182             distills relations between different Unicode characters from the Unicode tables,
183             and combines the results with user-specified "manual mogrification" rules.
184             From these automatic/manual mogrifications, it constructs orthogonal scaffolding
185             supporting Unicode characters (we call it I, but it
186             is a major generalization of the corresponding Unicode consortium's terms).
187            
188             =item Layout constructor
189            
190             allows building keyboard layouts based on the above mogrification rules, and
191             on other visual and/or logical directives. It combines the bulk-handling
192             ability of automatic rule-based approach with a flexibility provided by
193             a system of manual overrides. (The rules are read from a F<.kbdd> L
194             Description> file|/"Keyboard description files">.
195            
196             =item System-specific software layouts
197            
198             may be created basing on the "theoretical layout" made by the layout
199             constructor — currently only on Windows (only via F route) and OS X.
200            
201             =item Report/Debugging framework
202            
203             creates human-readable descriptions of the layout, and/or debugging reports on
204             how the layout creation logic proceeded.
205            
206             =back
207            
208             The last (and, probably, the most important) component of the distribution is
209             L created using this toolset.
210            
211             =head1 Keyboard description files
212            
213             =head2 Syntax
214            
215             I could not find an appropriate existing configuration file format, so was
216             farced to invent yet-another-config-file-format. Sorry...
217            
218             Config file is for initialization of a tree implementing a hash of hashes of
219             hashes etc whole leaves are either strings or arrays of strings, and keys are
220             words. The file consists of I<"sections">; each section fills a certain hash
221             in the tree.
222            
223             Sections are separated by "section names" which are sequences of word
224             character and C (possibly empty) enclosed in square brackets.
225             C<[]> is a root hash, then C<[word]> is a hash reference by key C in the
226             root hash, then C<[word/another]> is a hash referenced by element of the hash
227             referenced by C<[word]> etc. Additionally, a section separator may look like
228             C<< [visual -> wordsAndSlashes] >>.
229            
230             Sections are of two type: normal and visual. A normal section
231             consists of comments (starting with C<#>) and assignments. An assignment is
232             in one of 4 forms:
233            
234             word=value
235             +word=value
236             @word=value,value,value,value
237             /word=value/value/value/value
238            
239             The first assigns a string C to the key C in the hash of the
240             current section. The second adds a value to an array referenced by the key
241             C; the other two add several values. Trailing whitespace is stripped.
242            
243             Any string value without end-of-line characters and trailing whitespace
244             can be added this way (and values without commas or without slash can
245             be added in bulk to arrays). In particular, there may be no whitespace before
246             C<=> sign, and the whitespace after C<=> is a part of the value.
247            
248             Visual sections consist of comments, assignments, and C, which
249             is I of the section. Comments
250             after the last assignment become parts of the content. The content is
251             preserved as a whole, and assigned to the key C; trailing
252             whitespace is stripped. (This is the way to insert a value containing
253             end-of-line-characters.)
254            
255             In the context of this distribution, the intent of visual sections is to be
256             parsed by a postprocessor. So the only purpose of explicit assignments in a
257             visual section is to configure how I is parsed; after the parsing
258             is done (and the result is copied elsewhere in the tree) these values should
259             better be not used.
260            
261             =head2 Semantic of visual sections
262            
263             Two types of visual sections are supported: C and C. A content of
264             C section is just an embedded (part of) F<.klc> file. We can read deadkey
265             mappings and deadkey names from such sections. The name of the section becomes the
266             name of the mapping functions which may be used inside the C rule
267             (or in a recipe for a computed layer).
268            
269             A content of C section consists of C<#>-comment lines and "the mapping
270             lines"; every "mapping line" encodes one row in a keyboard (in one or several
271             layouts). (But the make up of rows of this keyboard may be purely imaginary;
272             it is normal to have a "keyboard" with one row of numbers 0...9.)
273             Configuration settings specify how many lines are per row, and how many layers
274             are encoded by every line, and what are the names of these layers:
275            
276             visual_rowcount # how many config lines per row of keyboard
277             visual_per_row_counts # Array of length visual_rowcount
278             visual_prefixes # Array of chars; <= visual_rowcount (miss=SPACE)
279             prefix_repeat # How many times prefix char is repeated (n/a to SPACE)
280             in_key_separator # If several layers per row, splits a key-descr
281             layer_names # Where to put the resulting keys array
282             in_key_separator2 # If one of entries is longer than 1 char, join by this
283             # (optional)
284            
285             Each line consists of a prefix (which is ignored except for sanity checking), and
286             whitespace-separated list of key descriptions. (Whitespace followed by a
287             combining character is not separating.) Each key description is split using
288             C into slots, one slot per layout. (The leading
289             C is not separating.) Each key/layout
290             description consists of one or two entries. An entry is either two dashes
291             C<--> (standing for empty), or a hex number of length >=4, or a string.
292             (A hex numbers must be separated by C<.> from neighbor word
293             characters.) A loner character which has a different uppercase is
294             auto-replicated in uppercase (more precisely, titlecase) form. Missing or empty key/layout description
295             gives two empty entries (note that the leading key/layout description cannot
296             be empty; same for "the whole key description" - use the leading C<-->.
297            
298             If one of the entries in a slot is a string of length ≥ 2, one must separate
299             the entries by C. Likewise, if a slot has only one entry,
300             and it is longer than 1 char, it must be started or terminated by C.
301            
302             To simplify BiDi keyboards, a line may optionally be prefixed with the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing>
303             character; if so, it may optionally be ended by spaces and the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing> character.
304             For compatibility with other components, layer names should not contain characters C<+()[]>.
305            
306             =head2 Inclusion of F<.klc> files
307            
308             Instead of including a F<.klc> file (or its part) verbatim in a visual
309             section, one can make a section C with
310             a key C. Filename will be included and parsed as a C
311             visual section (with name C???). (Currently only UTF-16
312             files are supported.)
313            
314             =head2 Metadata
315            
316             A metadata entry is either a string, or an array. A string behaves as
317             if were an array with the string repeated sufficiently many times. Each
318             personality defines C which chooses the element of the arrays.
319             The entries
320            
321             COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID
322             DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME
323            
324             should be defined in the personality section, or above this section in the
325             configuration tree. (Used when output Windows F<.klc> files and OS X
326             F<.keylayout> files.)
327            
328             OSX_ADD_VERSION OSX_LAYOUTNAME
329            
330             The first one is the ordinal of the word after which to insert the version
331             into C (OS X allows layout names longer than the limit of 64 UTF-16
332             codepoints of Windows); the second one allows a completely different name.
333            
334             Optional metadata currently consists only of C key (the protocol
335             version; hardwired now as C<1.0>) and keys C defining
336             what goes into the C section of F<.klc> file (the latter may also
337             be specified in a face's section, or its parents).
338            
339             =head2 Layer/Face/Prefix-key Recipes
340            
341             The sections C and C contain instructions how
342             to build Layers and Faces out of simpler elements. Similar recipes appear
343             as values of C entries in a face. Such a "recipe" is
344             executed with I: a base face name, a layer number, and a prefix
345             character (the latter is undefined when the recipe is a layer recipe or
346             face recipe). (The recipe is free to ignore the parameters; for example, most
347             recipes ignore the prefix character even when they are "prefix key" recipes.)
348            
349             The recipes and the visual sections are the most important components of the description
350             of a keyboard group.
351            
352             To construct layers of a face, a face recipe is executed several times with different
353             "layer number" parameter. In contrast, in simplest cases a layer recipe is executed
354             once. However, when the layer is a part of a compound ("parent") recipe, it inherits
355             the "parameters" from the parent. In particular, it may be executed several times with
356             different face name (if used in different faces), or with different layer number (if used
357             - explicitly or explicitly - in different layer slots; for example, C
358             in a face/prefix-key recipe will execute the C recipe separately for all the
359             layer numbers; or one can use C together with
360             C). Depending on the recipe, these calls may result in the same layout
361             of the resulting layers, or in different layouts.
362            
363             A recipe may be of three kinds: it is either a "first comer wins" which is a space-separated collection of
364             simpler recipes, or C, or a "mutator": C or just C.
365             All recipes must be C<()>-balanced
366             and C<[]>-balanced; so must be the C; in turn, the C is either a
367             layer name, or another recipe. A layer name must be defined either in a visual C section,
368             or be a key in the C section (so it should not have C<+()[]> characters),
369             or be the literal C.
370             When C is processed, first, the resulting layer(s) of the C recipe
371             are calculated; then the layer(s) are processed by the C (one key at a time).
372            
373             The most important C keywords are C (with argument a face name, defined either
374             via a C section, or via C) and C (with argument
375             of the form C, with layer names defined as above). Both
376             select the layer (out of a face, or out of a list) with number equal to the "layer number parameter" in the context
377             of the recipe. The C builder is similar to C, but chooses the "other"
378             layer ("cyclically the next" layer if more than 2 are present).
379            
380             The other selectors are C, C and C; they
381             operate on the base face or face associated to the base face.
382            
383             The simplest forms of C are C (note that
384             C/C/C return C when case-conversion results in no
385             change; use C/C/C if one wants them to behave
386             as Perl operators). Recall that a layer
387             is nothing more than a structure associating a pair "unshifted/shifted character" to the key number, and that
388             these characters may be undefined. These simplest mutators modify these characters
389             independently of their key numbers and shift state (with C making all of
390             them undefined). Similar user-defined simple mutators are C;
391             here C consists of pairs "FROM TO" of characters (with optional spaces between pairs);
392             characters not appearing as FROM become undefined by C.
393             (As usual, characters may be replaced by hex numbers with 4 or more hex digits;
394             separate the number from a neighboring word character by C<.> [dot].)
395            
396             All mutators must have a form C or C, with C
397             C<(),[]>-balanced. Other simple mutators are C (converts
398             control-char [those between 0x00 and 0x1f] to the corresponding [uppercase] character),
399             C (adds a constant to the [numerical code of the] input character
400             so that C becomes C), C (keeps input characters
401             which match, converts everything else to C), C
402             (similar to C, but pairs all characters in the layers based on their position),
403             C (all defined characters are converted to C).
404            
405             The mutator C is similar to , but takes the F<.klc>-style
406             visual C section as the description of the mutation. C may
407             be followed by a character as in C; if not, C is the prefix key from
408             the recipe's execution parameters.
409            
410             The simple mutator C has flavors: one can append C or C
411             to the name, and the resulting characters become prefix keys (the “C-inverted”
412             prefix followed by C behaves as non-inverted prefix followed by C).
413            
414             Some mutators pay attention not only to what the character is, but how it is
415             accessible on the given key: such are C, C,
416             C. Some other mutators also take into
417             account how the key is positioned with respect to the other keys.
418            
419             C assigns a character
420             to a particular column of the keyboard. Which keys are in which columns is
421             governed by how the corresponding
422             visual layer is formatted (shifted to the right by C array of the
423             visual layer). This visual layer is one associated to the face by the
424             C key (and the face is the parameter face of the
425             mutator). C is a comma-separated list;
426             empty positions map to the undefined character.
427            
428             C chooses a mutator based on the row of the keyboard. On the top row,
429             it is the first mutator which is chosen, etc. The list C is separated by C
430             surrounded by whitespace.
431            
432             The mutator C converts some non-prefix characters to prefix
433             characters; the conversion happens if the argument of the mutator coincides with
434             what is at the corresponding position in C, and this position contains
435             a prefix character. (Nowadays this mutator is not very handy — most of its uses
436             may be accomplished by having I prefix characters in appropriate faces.)
437            
438             The mutators C, C process their
439             argument in a special way: the characters in C which duplicated the characters
440             present (on the same key, and possibly with the same modifiers) in C are
441             ignored. The remaining characters are combined “as usual” with “the first comer wins”.
442            
443             The most important mutator is C (and its flavors). (See L mutator>.)
444            
445             Note that C is similar to a selector;
446             it is the only way to insert a
447             layer without a selector, since a bareword is interpreted as a C; C is a synonym
448             of C (repeated as many times as there are layers
449             in the parameter "base face").
450            
451            
452             The recipes in a space-separated list of recipes ("first comer wins") are
453             interpreted independently to give a collection of layers to combine; then,
454             for every key numbers and both shift states, one takes the leftmost recipe
455             which produces a defined character for this position, and the result is put
456             into the resulting layer.
457            
458             Keep in mind that to understand what a recipe does, one should trace
459             its description right-to-left order: for example, C creates
460             a layout where C<:> is at position of C<.>, but on the second [=other] layer (essentially,
461             if the base layout is the standard one, it binds the character C<:> to the keypress C).
462            
463             To simplify formatting of F<.kbdd> files, a recipe may be an array reference.
464             The string may be split on spaces, or split after comma or C<|>.
465            
466             =head2 The C mutator
467            
468             The essense of C is to have several mutation rules and choose I
469             of the results of application of these rules. Grouping the rules allows
470             one a flexible way to control what I actually means. The rules may
471             be separated by comma, by C<|>, or by C<|||> (interchangeable with C<||||>).
472            
473             In the simplest case of grouping, C form a C<|>-separated list, and
474             each group consists of one rule. Then I result is one coming from
475             an earlier rule. The groups are separated by C<|>, and the rules inside the
476             group are separated by comma; if more than one rule appears in a group, a
477             different kind of competition appears (inside the group).
478            
479             The I of the generated characters is a list C
480             UNICODE_BLOCK, IN_CASE_PAIR, FROM_NON_ALTGR_POSITION>
481             with lexicographical order (the earlier element is stronger that ones after it).
482             Here C describes whether a character is generated by
483             Unicode compositing (versus “compatibility compositing” or other
484             “artificially generated” mogrifiers); the older age wins, as well as
485             honest compositing, earlier Unicode blocks, as well as case pairs and
486             characters from non-C-positions. (Experience shows that these rules
487             have a pretty good correlation with being “more suitable for human consumption”.)
488            
489             Moreover, quality in case-pairs is equalized by assigning the strongest
490             I of two. Such pairs are always considered “tied together” when
491             they compete with other characters. (In particular, if a single character
492             with higher quality occupies one of C positions, a
493             case pair with lower quality is completely ignored; so the “other” position
494             may be taken by a single character with yet lower quality.)
495            
496             In addition, the characters which lost the competition for
497             non-C-positions are considered I on C-positions. (With
498             boosted priority compared to mutated C-characters; see above.)
499            
500             This mutator comes in several flavors: one can append to its name
501             C/C/C/C<32OK> (in this
502             order). Unless C is specified, it will not modify characters on a key
503             which produces C when used without modifiers. Unless C<32OK> is specified, it
504             will not produce Unicode characters after C<0xFFFF> (the default is to follow
505             the brain-damaged semantic of prefix keys on Windows). Unless C is
506             specified, the result is optimized by removing duplicates (per key) generated
507             by application of C. With the C modifier, the generated characters
508             are not counted as “obtained by logical rules” when statistics for the generated
509             keyboard layout are calculated.
510            
511             =head2 Linked prefixes
512            
513             On top of what is explained above, there is a way to arrange “linking” of two prefix keys;
514             this linking allows characters which cannot be fit on one (prefixed) key to
515             “migrate” to unassigned positions on the otherwise-prefixed key. (This is
516             similar to migration from non-C-position to C-position.)
517             This is achieved by using mutator rules of the following form:
518            
519             primary = +PRE-GROUPS1|||SHARED||||POST-GROUPS1
520             secondary = PRE-GROUPS2||||PRE-GROUPS1|||SHARED||||POST-GROUPS2
521            
522             Groups with digits are not shared (specific to a particular prefix); C is
523             (effectively) reverted when accessed from the secondary prefix; for the
524             secondary key, the recipies from C which were used in the primary
525             key are removed from C, and are appended to the end of C;
526             the C are skipped when finding assignments for the secondary
527             prefix.
528            
529             In the primary recipe, C<|||> and C<||||> are interchangeable with C<|>.
530             Moreover, if C is empty, the secondary recipe should be written as
531            
532             secondary = PRE-GROUPS2|||PRE-GROUPS1|||SHARED
533            
534             if C is empty, this should be written as one of
535            
536             secondary = PRE-GROUPS2|||SHARED
537             secondary = PRE-GROUPS2||||SHARED
538             secondary = PRE-GROUPS2||||SHARED||||POST-GROUPS2
539            
540             These rules are to allow macro-ization of the common parts of the primary
541             and secondary recipe. Put the common parts as a value of the key
542             C (here C<***> denotes a word), and replace them by
543             the macro C<< >> in the recipes.
544            
545             B: the primary key recipe starts with the C<+> character; it
546             forces interpretation of C<|||> and C<||||> as of ordinary C<|>.
547            
548             If not I, the top-level groups are formed by C<||||> (if present), otherwise by C<|||>.
549             The number of top-level groups should be at most 3. The second of C<||||>-groups
550             may have at most 2 C<|||>-groups; there should be no other subdivision. This way,
551             there may be up to 4 groups with different roles.
552            
553             The second of 3 toplevel C<|||>-groups, or the first of two sublevel C<|||>-groups
554             is the “skip” group. The last of two or three toplevel C<|||>-groups (or of
555             sublevel C<|||>-groups, or the 2nd toplevel C<||||>-group without subdivisions) is the
556             inverted group; the 3rd of toplevel C<||||>-groups is the “extra” group.
557            
558             “Penalize/prohibit” lists start anew in every top-level group.
559            
560             =head2 Atomic mutators rules
561            
562             As explained above, the individual RULES in C may be
563             separated by C<,> or C<|>, or C<|||> or C<||||>. Such an individual
564             rule is a combination of I combined by C<+> operators,
565             and/or preceded by C<-> prefix (with understanding that C<+-> must
566             be replaced by C<-->). The prefix C<-> means I of the
567             rule; the operator C<+> is the composition of the rules.
568            
569             B the atomic rule C<< >> converts its input character into
570             its superscript forms (if such forms exist; for example, C may
571             be converted to C<ᵃ> or C<ª>). The atomic rules C, C, C
572             behave the same as the corresponding MUTATORs. The atomic rule C
573             converts a control-character to the corresponding “uppercase” character:
574             C<^A> is converted to C, and C<^\> is converted to C<\>. (The last
575             4 rules cannot be inverted by C<->.)
576            
577             The composition is performed (as usual) from right to left. B the
578             indivial rule C<< +lc+dectrl >> converts C<^A> to C<ᵃ> or C<ª>.
579            
580             In addition to rules listed above, the atomic rules may be of the
581             following types:
582            
583             =over
584            
585             =item *
586            
587             A hex number with ≥4 digits, or a character: implements the composition
588             inverting (compatibility or not) Unicode decompositions into two characters;
589             the character in the rule must the first character of the decomposition.
590             Here “Unicode decompositions” are either deduced from Unicode decomposition
591             rules (with compatibility decompositions having lower priority), or deduced
592             basing on splitting the name of the character into parts.
593            
594             =item *
595            
596             C<< >> is an inversion of a Unicode decomposition which goes from
597             1 character to 1 character.
598            
599             =item *
600            
601             Flavors of characters C<< >> from Unicode tables come from Unicode
602             1-character to 1-character decompositions
603             marked with C<< >>. B C<< >> for a subscript form;
604             or C<< >>.
605            
606             =item *
607            
608             C<< >> rules TBC ..........................................
609            
610             =item *
611            
612             Calculated rules C<< >> are extracted by a
613             heuristic algorithm which tries to parse the Unicode name of the character.
614            
615             For the best understanding of what these rules produce, inspect
616             results of print_compositions(), print_decompositions() methods documented
617             in L<"SYNOPSIS">. The following “keywords” are processed by the algorithm:
618            
619             WITH, OVER, ABOVE, PRECEDED BY, BELOW (only with LONG DASH)
620            
621             are separators;
622            
623             COMBINING CYRILLIC LETTER, BARRED, SLANTED, APPROXIMATELY, ASYMPTOTICALLY,
624             SMALL (not near LETTER), ALMOST, SQUARED, BIG, N-ARY, LARGE, LUNATE,
625             SIDEWAYS DIAERESIZED, SIDEWAYS OPEN, INVERTED, ARCHAIC, EPIGRAPHIC,
626             SCRIPT, LONG, MATHEMATICAL, AFRICAN, INSULAR, VISIGOTHIC, MIDDLE-WELSH,
627             BROKEN, TURNED, INSULAR, SANS-SERIF, REVERSED, OPEN, CLOSED, DOTLESS, TAILLESS, FINAL
628             BAR, SYMBOL, OPERATOR, SIGN, ROTUNDA, LONGA, IN TRIANGLE, SMALL CAPITAL (as smallcaps)
629            
630             are modifiers. For an C, one scans for
631            
632             QUAD, UNDERBAR, TILDE, DIAERESIS, VANE, STILE, JOT, OVERBAR, BAR
633            
634             TBC ..........................................
635            
636             =item *
637            
638             Additionally, C are considered C variants of
639             their middle letter, as well as C of C<0>.
640            
641             =item *
642            
643             C<< >> rules are obtained by scanning the name for
644            
645             WHITE, BLACK, CIRCLED, BUT NOT
646            
647             as well as for C (as C), paleo-Latin digraphs and C
648             (as C), doubled-letters
649             (as C), C doubled-letters
650             (as C), C (possibly with C
651             or C; as C).
652            
653             =item *
654            
655             Manual prearranged rules TBC ..........................................
656            
657             =item *
658            
659             C<< >> Explicit named substitution rules TBC ..........................................
660            
661             =item *
662            
663             C<< >> Prohibits handling non-substituted input TBC ..........................................
664            
665             =item *
666            
667             C<< >> rules TBC ..........................................
668            
669             =back
670            
671             =head2 Input substitution in atomic rules
672            
673             TBC ..........................................
674            
675             =head2 The C mutator
676            
677             TBC ..............................
678            
679             =head2 Pseudo-mutators for generation of documentation
680            
681             A few mutators do not introduce any characters (in other words, they behave as
682             C) but are used for their side effects: in prefix-key recipes,
683             C introduces documentation of what the prefix key is intended
684             for. Likewise, C allows adding CSS classes to highlight
685             parts of HTML output generated by this module, the parts corresponding to selected
686             characters in a face.
687            
688             C is a comma-separated list, every triple in the
689             list being C. C is one of C/C (which
690             add formatting to the key containing one of the C) or C/C
691             (which add formatting to an individual character displayed on the key),
692             one can add a digit to C to limit to a particular layer in the face
693             (useful when a character appears several times in a face).
694             The lower-case variants select characters basing on the I of a key.
695             One can also append C<=CONTEXT> to C, then the class is added only if
696             C appears as one of the options for the HTML output generator.
697            
698             The CSS rules generated by this module support several classes directly; the
699             rest should be supported by the user-supplied rules. The classes with existing
700             support are: on keys
701            
702             to_w from_w # generate arrows between keys
703             from_nw from_ne to_nw to_ne # generate arrows between keys; will yellow-outline
704             pure # unless combined with this
705             red-bg green-bg blue-bg # tint the key as the whole (as background)
706            
707             On characters
708            
709             very-special need-learn may-guess # provide green/brown/yellow-outlines
710             special # provide blue outline (thick unless combined with
711             thinspecial # <-- this)
712            
713             =head2 Extra CSS classes for documentation
714            
715             In additional, several CSS classes are auto-generated basing on Unicode
716             properties of the character. TBC ........................
717            
718             =head2 Debugging mutators
719            
720             If the bit 0x40 of the environment variable C
721             (decimal or C<0xHEX>) is set, debugging output for mutators is enabled:
722            
723             r ║ ║ ┆ ║ ṙ ṛ ┆ ║ ║ ║ ║ ⓡ ┆
724             ║ ║ ┆ ║ Ṙ Ṛ ┆ ║ ║ ║ ║ Ⓡ ┆
725             ║ ║ ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
726             ║ ║ Ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
727             Extracted [ …list… ] deadKey=00b0
728            
729             The output contains a line per character assigned to the keyboard key (if
730             there are 2 layers, each with lc/uc variants, there are 4 lines); empty lines are
731             omitted. The first column indicates the base character (lc of the 1st layer) of
732             the key; the separator C<║> indicates C<|>-groups in the mutator. Above, the first
733             group produces no mutations, the second group mutates only the characters in
734             the second layer, and the third group produces two mutations per a character in
735             the first layer. The 7th group is also producing mogrifications on the 1st layer.
736            
737             The next example clarifies C<┆>-separator: to the left of it are mogrifications which
738             come in case pairs, to the right are mogrifications where mogrified-lc is not
739             a case pair of mogrified-uc:
740            
741             t ║ ║ ᵵ ║ ꞇ ┆ ʇ ║ ┆ ║
742             ║ ║ ║ Ꞇ ┆ ᴛ ║ ┆ ║
743             ║ ║ ║ ┆ ║ ꝧ ┆ ║
744             ║ ║ ║ ┆ ║ Ꝧ ┆ ║
745             Extracted [ …list… ] deadKey=02dc
746            
747             In this one, C<│> separates mogrifications with different priorities (based on
748             Unicode ages, whether the atomic mutator was compatibility/synthetic one, and the
749             Unicode block).
750            
751             / ║ ║ ║ ║ ║ │ ∴ ║ ║
752             ║ ║ ║ ║ ║ │ ≘ ≗ ║ ║
753             ║ ║ ║ ║ ║ / │ ⊘ ║ ║
754             Extracted [ …list… ] deadKey=00b0
755            
756             For secondary mogrifiers, where the distinction between C<|||> and C<|>
757             matters, some of the C<║>-separators are replaced by C<┃>. Additionally,
758             there are two rounds of extraction: first the characters corresponding
759             to the primary mogrifier are TMP-extracted (from the groups PRE-GROUPS1,
760             COMMON); then what is the extracted from COMMON is put back at the
761             effective end (at the end of POST-GROUPS2, or, if no such, at
762             the beginning of COMMON):
763            
764             t ║ ║ ᵵ ┃ ┃ ʇ │ │ ꞇ ┆ ║
765             ║ ║ ┃ ┃ │ ᴛ │ Ꞇ ┆ ║
766             ║ ║ ┃ ┃ │ │ ꝧ ┆ ║
767             ║ ║ ┃ ┃ │ │ Ꝧ ┆ ║
768             TMP Extracted: <…list…> from layers 0 0 | 0 0
769             t ║ ║ ᵵ ┃ ꞇ ┆ ʇ ┋ ┃ ┆ │ ┆ │ ┆ ║
770             ║ ║ ┃ Ꞇ ┆ ᴛ ┋ ┃ ┆ │ ┆ │ ┆ ║
771             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ ꝧ ┆ ║
772             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ Ꝧ ┆ ║
773             Extracted [ …list… ] deadKey=02dc
774            
775             In the second part of the debugging output, the part of common which is put
776             back is separated by C<┋>.
777            
778             When bit 0x80 is set, much more lower-level debugging info is printed. The
779             arrays at separate depth mean: group number, priority, not-cased-pair, layer
780             number, subgroup, is-uc. When bit 0x100 is set, the debugging output for
781             combining atomic mutators is enabled.
782            
783             =head2 Personalities
784            
785             A personality C is defined in the section C. (C may
786             include slashes - untested???)
787            
788             An array C gives the list of layers forming the face. (As of version
789             0.03, only 2 layers are supported.) The string C is a “fallback”
790             face: if a keypress is not defined by C, it would be taken from
791             C; additionally, it affects the C key bindings: for example,
792             if C has C where C has C<γ>, and there is a binding for
793             C, the same binding applies for C.
794             .........
795            
796             =head2 Substitutions
797            
798             In section C one defines composition rules which may be
799             used on par with composition rules extracted from I.
800             An array C is converted to a hash accessible as C<< >> from
801             a C filter of satellite face processor. An element of the the array
802             must consist of two characters (the first is mapped to the second one). If
803             both characters have upper-case variants, the translation between these variants
804             is also included.
805            
806             =head2 Classification of diacritics
807            
808             The section C contains arrays each describing a class of
809             diacritic marks. Each array may contain up to 7 elements, each
810             consising of diacritic marks in the order of similarity to the
811             "principal" mark of the array. Combining characters may be
812             preceded by horizontal space. Seven elements should contain:
813            
814             Surrogate chars; 8bit chars; Modifiers
815             Modifiers below (or above if the base char is below)
816             Vertical (or Comma-like or Doubled or Dotlike or Rotated or letter-like) Modifiers
817             Prime-like or Centered modifiers
818             Combining
819             Combining below (or above if base char is below)
820             Vertical combining and dotlike Combining
821            
822             These lists determine what a C filter of satellite face processor
823             will produce when followed by whitespace characters
824             (possibly with modifiers) C. (So, if F<.kbdd> file
825             uses C) this determines what diacritic prefix keys produce.
826            
827             =head2 Compose Key
828            
829             The scalar configuration variable C controls the ID of the prefix
830             key to access F<.Compose> composition rules. The rules are read from files
831             in the class/object variable; set this variable with
832            
833             $self->set__value('ComposeFiles', [@Files]); # Class name (instead of $self) is OK here
834            
835             The format of the files is the same as for X11’s F<.Compose> (but C are
836             not supported); only compositions starting with C<< >>, having no
837             deadkeys, and (on Windows) expanding to 1 UTF-16 codepoint are processed. (See
838             L<“systematic” parts of rules in the standard
839             F<.XCompose>|"“Systematic” parts of rules in a few .XCompose"> — see lines with postfix C.)
840            
841             Repeating this prefix twice accesses characters via their HTML/MathML entity names. The files
842             are as above (the variable name is C); the format is the same as in
843             F.
844            
845             Repeating this prefix 3 times accesses characters via their C codes;
846             the variable C contains files in the format of F.
847             It is recommended to download these files (or the later flavors)
848            
849             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
850             http://www.w3.org/TR/xml-entity-names/bycodes.html
851             http://tools.ietf.org/html/rfc1345
852            
853             See L<"SYNOPSIS"> for an example. Note that this mechanism does not assign this
854             prefix key to any particular position on the keyboard layout; this should be
855             done elsewhere. Implementation detail: if some of these 3 maps cannot be created,
856             they are skipped (so less than 3 chained maps are created).
857            
858             For more control, one can make this configuration variable into an array. The
859             value C is equivalent to the array with elements
860            
861             ComposeFiles,dotcompose,warn,KEY
862             EntityFiles,entity,warn,,KEY
863             rfc1345Files,rfc1345,warn,,KEY
864            
865             Five comma-separated fields are: the variable controlling the filelist,
866             the type of files in the filelist (only the 3 listed types are supported now),
867             whether to warn when a particular flavor
868             of composition table could not be loaded, the global access prefix, the prefix
869             for access from the previous element (chained access).
870            
871             If C (etc.) has more than 1 file, bindings from earlier files
872             take precedence over bindings from the later ones. If the same sequence is
873             bound several times inside a file, a later binding takes precedence.
874            
875             =head2 Names of prefix keys
876            
877             Section C defines naming of prefix keys. If not named there (or in
878             processed F<.klc> files), the C property will be used; if none,
879             Unicode name of the character will be used.
880            
881             =head2 More than 2 layers and/or exotic modifier keys
882            
883             This is controlled by C, C, and C
884             configuration arrays. TBC..................................
885            
886             =head2 CAVEATS for German/French/BÉPO/Neo keyboards
887            
888             Non-US keycaps: the character "a" is on C<(VK_)A>, but its scancode is now different.
889             E.g., French's A is on 0x10, which is US's Q. Our table of scancodes is
890             currently hardwired. Some pictures and tables are available on
891            
892             http://bepo.fr/wiki/Pilote_Windows
893            
894             With this module, the scancode and the C-code for a position in a layout
895             are calculated via the C configuration variable; the first recognized
896             character at the given position of this layer is translated to
897             the C-code (using a hardwired table). The mapping of C-codes
898             to scancodes is currently hardwired.
899            
900             For “unusual” keys, one can use the C subsection of the face to describe
901             its scancode (the first entry in the array) and the bindings. If the scancode
902             is empty, the name of the key is translated to a scancode using the hardwired
903             tables.
904            
905             =head1 Keyboards: on ease of access (What makes an easy-to-use keyboard layout)
906            
907             The content of this section has no I relationship to the functionality
908             of this module. However, we feel that it is better that the user of this
909             module understands these concerns. Moreover, it is these concerns which
910             lead to the principles underlying the functionality of this module.
911            
912             =head2 On the needs of keyboard layout users
913            
914             Let's start with trivialities: different people have different needs
915             with respect to keyboard layouts. For a moment, ignore the question
916             of the repertoir of characters available via keyboard; then the most
917             crucial distinction corresponds to a certain scale. In absense of
918             a better word, we use a provisional name "the required typing speed".
919            
920             One example of people on the "quick" (or "rabid"?) pole of this scale are
921             people who type a lot of text which is either "already prepared", or for
922             which the "quality of prose" is not crucial. Quite often, these people may
923             type in access of 100 words per minute. For them, the most important
924             questions are of physical exhaustion from typing. The position
925             of most frequent letters relative to the "rest" finger position, whether
926             frequently typed together letters are on different hands (or at least
927             not on the same/adjacent fingers), the distance fingers must travel
928             when typing common words, how many keypresses are needed to reach
929             a letter/symbol which is not "on the face fo the keyboard" - their
930             primary concerns are of this kind.
931            
932             On the other, "deliberate", pole these concerns cease to be crucial.
933             On this pole are people who type while they "create" the text, and
934             what takes most of their focus is this "creation" process. They may
935             "polish their prose", or the text they write may be overburdened by
936             special symbols - anyway, what they concentrate on is not the typing itself.
937            
938             For them, the details of the keyboard layout are important mostly in
939             the relation to how much they I the writer from the other
940             things the writer is focused on. The primary question is now not
941             "how easy it is to type this", but "how easy it is to I how
942             to type this". The focus transfers from the mechanics of finger movements
943             to the psycho/neuro/science of memory.
944            
945             These questions are again multifaceted: there are symbols one encounters
946             every minute; after you recall once how to access them, most probably
947             you won't need to recall them again - until you have a long interval when
948             you do not type. The situation is quite different with symbols you need
949             once per week - most probably, each time you will need to call them again
950             and again. If such rarely used symbols/letters are frequenct (since I
951             of them appear), it is important to have an easy way to find how to type them;
952             on the other hand, probably there is very little need for this way to
953             be easily memorizable. And for symbols which you need once per day, one needs
954             both an easy way to find how to type them, I the way to type them should
955             better be easily memorizable.
956            
957             Now add to this the fact that for different people (so: different usage
958             scenarios) this division into "all the time/every minute/every day/every week"
959             categories is going to be different. And one should not forget important
960             scenario of going to vacation: when you return, you need to "reboot" your
961             typing skills from the dormant state.
962            
963             =head2 On “mixing” several “allied” layouts
964            
965             On the other hand, note that the questions discussed above are more or less
966             orthogonal: if the logic of recollection requires ω to be related in some
967             way to the W-key,
968             then it does not matter where the W-key is on the keyboard - the same logic
969             is applicable to the QWERTY base layou t, or BÉPO one, or Colemak, or Dvorak.
970             This module concerns itself I with the questions of "consistency" and
971             the related question of "the ease of recall"; we care only about which symbols
972             relate to which "base keys", and do not care about where the base key sit on
973             the physical keyboard.
974            
975             B The “main island” of the keyboard contains a 4×10 rectangle
976             of keys. So if a certain collection of special keys may be easily memorized
977             as a rectangular table, it is nice to be able to map this table to the
978             physical keyboard layout. This module contains tool making this task easy.
979            
980             Now consider the question of the character repertoir: a person may need ways
981             to type "continuously" in several languages; quite often one must must type
982             a “standalone” foreign word in a sentence; in addition to this, there may
983             be a need to I type "standalone" characters or symbols outside
984             the repertoir of these languages. Moreover, these languages may use different
985             scripts (such as Polish/Bulgarian/Greek/Arabic/Japanese), or may share a
986             "bulk" of their characters, and differ only in some "exceptional letters".
987             To add insult to injury, these "exceptional letters" may be rare in the language
988             (such as ÿ in French or à in Swedish) or may have a significant letter frequency
989             (such as é in French) or be somewhere in between (such as ñ in Spanish).
990            
991             And the non-language symbols do not need to be the I symbols (although
992             often they are). An Engish-language discussion of etimology at the coffee table
993             may lead to a need to write down a word in polytonic greek, or old norse;
994             next moment one would need to write a phonetic transcription in IPA/APA
995             symbols. A discussion of keyboard layout may involve writing down symbols
996             for non-character keys of the keyboard. A typography freak would optimize
997             a document by fine-tuned whitespaces. Almost everybody needs arrows symbols,
998             and many people would use box drawing characters if they had a simple access
999             to them.
1000            
1001             Essentially, this means that as far as it does not impacts other accessibility
1002             goals, it makes sense to have unified memorizable access to as many
1003             symbols/characters as possible. (An example of impacting other aspects:
1004             MicroSoft's (and IBM's) "US International" keyboards steal characters C<`~'^">:
1005             typing them produces "unexpected results" - they are deadkeys. This
1006             significantly simplifies entering characters with accents, but makes it
1007             harder to enter non-accented characters.)
1008            
1009             =head2 The simplest rules of design of “large” keyboard layouts
1010            
1011             One of the most known principles of design of human-machine interaction
1012             is that "simple common tasks should be simple to perform, and complicated
1013             tasks should be possible to perform". I strongly disagree with this
1014             principle - IMO, it lacks a very important component: "a gradual increase
1015             in complexity". When a certain way of doing things is easy to perform, and another
1016             similar way is still "possible to perform", but on a very elevated level
1017             of complexity, this leads to a significant psychological barrier erected
1018             between these two ways. Even when switching from the first way to the other one
1019             has significant benefits, this barrier leads to self-censorship. Essentially,
1020             people will
1021             ignore the benefits even if they exceed the penalty of "the elevated level of
1022             complexity" mentioned above. And IMO self-censorship is the worst type of
1023             censorship. (There is a certain similarity between this situation and that
1024             of "self-fulfilled prophesies". "People won't want to do this, so I would not
1025             make it simpler to do" - and now people do not want to do this...)
1026            
1027             So I would add another clause to the law above: "and moderately complicated
1028             tasks should remain moderately hard to perform". What does it tell us in
1029             the situation of keyboard layout? One can separate several levels of
1030             complexity.
1031            
1032             =over 10
1033            
1034             =item Basic:
1035            
1036             There should be some "base keyboards": keyboard layouts used for continuous
1037             typing in a certain language or script. Access from one base keyboard to
1038             letters of another should be as simple as possible.
1039            
1040             =item By parts:
1041            
1042             If a symbol can be thought of as a combination of certain symbols accessible
1043             on the base keyboard, one should be able to "compose" the symbol: enter it
1044             by typing a certain "composition prefix" key then the combination (as far
1045             as the combination is unambiguously associated to one symbol).
1046            
1047             The "thoughts" above should be either obvious (as in "combining a and e should
1048             give æ") or governed by simple mneumonic rules; the rules should cover as
1049             wide a range as possible (as in "Greek/Coptic/Hebrew/Russian letters are
1050             combined as G/C/H/R and the corresponding Latin letter; the correspondence is
1051             phonetic, or, in presence of conflicts, visual").
1052            
1053             =item Quick access:
1054            
1055             As many non-basic letters as possible (of those expected to appear often)
1056             should be available via shortcuts. Same should be applicable to starting
1057             sequences of composition rules (such as "instead of typing C
1058             and C<'> one can type C).
1059            
1060             =item Smart access
1061            
1062             Certain non-basic characters may be accessible by shortcuts which are not
1063             based on composition rules. However, these shortcuts should be deducible
1064             by using simple mneumonic rules (such as "to get a vowel with `-accent,
1065             type C-key with the physical keyboard's key sitting below the vowel key").
1066            
1067             =item Superdeath:
1068            
1069             If everything else fails, the user should be able to enter a character by
1070             its Unicode number (preferably in the most frequently referenced format:
1071             hexadecimal).
1072            
1073             =back
1074            
1075             =over
1076            
1077             B This does not seem to be easily achievable, but it looks like a very nifty
1078             UI: a certain HotKey is reserved (e.g., C);
1079             when it is tapped, and a character-key is pressed (for example, B) a
1080             menu-driven interface pops up where user may navigate to different variants
1081             of B, Beta, etc - each of variants with a hotkey to reach I, and with
1082             instructions how to reach it later from the keyboard without this UI.
1083            
1084             Also: if a certain timeout passes after pressing the initial HotKey, an instruction
1085             what to do next should appear.
1086            
1087             =back
1088            
1089             =head2 The finer rules of design of “large” keyboard layouts
1090            
1091             Here are the finer points elaborating on the levels of complexity discussed above:
1092            
1093             =over 4
1094            
1095             =item 1
1096            
1097             It looks reasonable to allow "fuzzy mneumonic rules": the rules which specify
1098             several possible variants where to look for the shortcut (up to 3-4 variants).
1099             If/when one forgets the keying of the shortcut, but remembers such a rule,
1100             a short experiment with these positions allows one to reconstruct the lost
1101             memory.
1102            
1103             =item
1104            
1105             The "base keyboards" (those used for continuous typing in a certain language
1106             or script) should be identical to some "standard" widely used keyboards.
1107             These keyboards should differ from each other in position of keys used by the
1108             scripts only; the "punctuation keys" should be in the same position. If a
1109             script B has more letters than a script A, then a lot of
1110             "punctuation" on the layout A will be replaced by letters in the layout B.
1111             This missing punctuation should be made available by pressing a modifier
1112             (C? compare with L's top row).
1113            
1114             =item
1115            
1116             If more than one base keyboard is used, there must be a quick access:
1117             if one needs to enter one letter from layout B when the active layout is A, one
1118             should not be forced to switch to B, type the letter, then switch back
1119             to A. It should better be available I on a prefixed combination "C".
1120            
1121             =item
1122            
1123             One should consider what the C does when the layouts A
1124             and B are identical on a particular key (e.g., punctuation). One can go with the "Occam's
1125             razor" approach and make the C prefix into the do-nothing identity map.
1126             The alternative is make it access some symbols useful both for
1127             script A and script B. It is a judgement call.
1128            
1129             Note that there is a gray area when layouts A and B are not identical,
1130             but a key C produces punctuation in layout A, and a letter in layout
1131             B. Then when in layout B, this punctuation is available on C,
1132             so, in principle, C would duplicate the functionality
1133             of C. Compare with "there is more than one way to do it" below;
1134             remember that OS (or misbehaving applications) may make some keypresses
1135             "unavailable". I feel that in these situations, “having duplication” is
1136             a significant advantage over “having some extra symbols available”.
1137            
1138             =item
1139            
1140             The considerations in two preceding parts are applicable also in the
1141             case when there are more “allied” layouts than A and B. Ways to make it possible
1142             are numerous: one can have several alternative C’s, B one
1143             can use a I prefix key C. With a large enough
1144             collection of layouts, a combination of both approaches may be visualized
1145             as a chain of layout
1146            
1147             S< >… C B C
1148            
1149             here we have two quick access prefix keys, the left one C, and the right one
1150             C. Superscripts C<² ³ …> mean “pressing the prefix key several times”;
1151             the prefix keys move one left/right along the chain of layouts.
1152            
1153             =item
1154            
1155             The three preceding parts were concerned with entering one character from
1156             an “allied” layout. To address another frequent need, entering one word
1157             from an “allied” layout, yet another approach may be needed. The solution may
1158             be to use a certain combination of modifier keys. (How to choose useful
1159             combinations? See: L<"A convenient assignment of KBD* bitmaps to modifier keys">.)
1160            
1161             (Using “exotic” modifier keys may be impossible in some badly coded applications.
1162             This should not stop one from implementing this feature: sometimes one has a choice
1163             from several applications performing the same task. Moreover, since this feature
1164             is a “frill”, there is no pressing need to have it I available.)
1165            
1166             =item
1167            
1168             Paired symbols (such as such as ≤≥, «», ‹›, “”, ‘’ should be put on paired
1169             keyboard's keys: <> or [] or ().
1170            
1171             =item
1172            
1173             "Directional symbols" (such as arrows) should be put either on numeric keypad
1174             or on a 3×3 subgrid on the letter-part of the keyboard (such as QWE/ASD/ZXC).
1175             (Compare with [broken?] implementation in L.)
1176            
1177             =item
1178            
1179             for symbols that are naturally thought of as sitting in a table, one can
1180             create intuitive mapping of quite large tables to the keyboard. Split each
1181             key in halves by a horizontal line, think of C as sitting in the
1182             top half. Then ignoring C<`~> key and most of punctuation on the right
1183             hand side, keyboard becomes an 8×10 grid. Taking into account C
1184             modifier (either as an extra bit, or as splitting a key by a horizontal line),
1185             one can map up to 8×10×2 (or 8×20) table to a keyboard.
1186            
1187             B Think of L.
1188            
1189             =item
1190            
1191             Cheatsheets are useful. And there are people who are ready to dedicate a
1192             piece of their memory to where on a layout is a particularly useful to them
1193             symbol. So even if there is no logical position for a certain symbol, but
1194             there is an empty slot on layout, one should not hesitate in using this slot.
1195            
1196             However, this I distractive to people who do not want to dedicate
1197             their memory to "special cases". So it makes sense to have three kinds of
1198             cheatsheets for layouts: one with special cases ignored (useful for most
1199             people), one with all general cases ignored (useful for checks "is this
1200             symbol available in some place I do not know about" and for memorization),
1201             and one with all the bells and whistles.
1202            
1203             (Currently this module allows emitting HTML keyboard layouts with such
1204             information indicated by classes in markup. The details may be treated
1205             by the CSS rules.)
1206            
1207             =item
1208            
1209             "There is more than one way to do it" is not a defect, it is an asset.
1210             If it is a reasonable expectation to find a symbol X on keypress K', and
1211             the same holds for keypress K'' I they both do not conflict with other
1212             "being intuitive" goals, go with both variants. Same for 3 variants, 4
1213             - now you get my point.
1214            
1215             B The standard Russian phonetic layout has Ё on the C<^>-key; on the
1216             other hand, Ё is a variant of Е; so it makes sense to have Ё available on
1217             C as well. Same for Ъ and Ь.
1218            
1219             =item
1220            
1221             Dead keys which are "abstract" (as opposed to being related to letters
1222             engraved on physical keyboard) should better be put on modified state
1223             of "zombie" keys of the keyboard (C, C, C, C).
1224            
1225             B Making C a prefix key may lead to usability issues
1226             for people used to type CAPITALIZED PHRASES by keeping C pressed
1227             all the time. As a minimum, the symbols accessed via C
1228             should be strikingly different from those produced by C so that
1229             such problems are noted ASAP. Example: on the first sight, producing
1230             C on C or C
1231             looks like a good idea. Do not do this: the visually undistinguishable
1232             C would lead to significantly hard-to-debug problems if
1233             it was unintentional.
1234            
1235             =back
1236            
1237            
1238             =head2 Explanation of keyboard layout terms used in the docs
1239            
1240             The aim of this module is to make keyboard layout design as simple as
1241             possible. It turns out that even very elaborate designs can be made
1242             quickly and the process is not very error-prone. It looks like certain
1243             venues not tried before are now made possible; at least I'm not aware of
1244             other attempts in this direction. One can make layouts which can be
1245             "explained" very concisely, while they contain thousand(s) of accessible
1246             letters.
1247            
1248             Unfortunately, being on unchartered territories, in my explanations I'm
1249             forced to use home-grown terms. So be patient with me... The terms are
1250             I, I, I and I. (One may want compare them
1251             with what ISO 9995 does: L…. On
1252             the other hand, most parts of ISO 9995 look as remote from being ergonomic
1253             [in the sense discussed in these sections] as one may imagine!)
1254            
1255             In what follows,
1256             the words I and I are used interchangeably. A I
1257             means a physical key on a keyboard tapped (possibly together with
1258             one of modifiers C, C - or, rarely, L<[right] C|http://www.microsoft.com/resources/msdn/goglobal/keyboards/kbdcan.html>;
1259             more advanced layouts may use “extra” modifiers). The key C
1260             is often marked as such on the keycap, otherwise it is just the "right" C key; at least
1261             on Windows, for many simple layouts it can be replaced by C. What is a I?
1262             Tapping such a key does not produce any letter, but modifies what the next
1263             keypress would do (sometimes it is called a I; in C terms,
1264             it is probably a I. Sometimes, prefix keys may be “chained”; then
1265             insertion of a character happens not on the second keypress, but on the third one [or fourth/etc]).
1266            
1267             To describe which character (or a prefix) is produced by a keypress one must describe
1268             I: which prefix keys were already tapped, and which modifier keys are
1269             currently pressed. It is natural to consider the C modifier specially: let’s
1270             remove it from the context; now given a context, a keypress may produce two characters:
1271             one with C, one without. A I describe such a pair of characters (or
1272             prefixes) for every key of the keyboard.
1273            
1274             So, the plain I is the part of keyboard layout accessible by using only
1275             non-prefix keys (possibly in combination with C). Many keyboard layouts
1276             have up to 2 additional layers accessible without prefix keys: the C-layer and C-layer.
1277            
1278             On the simplest layouts, such as "US" or "Russian", there is no prefix keys or “extra”
1279             modifier keys -
1280             but this is only feasible for languages which use very few characters with
1281             diacritic marks. However, note that most layouts do not use
1282             C-layer - sometimes it is claimed that this causes problems with
1283             system/application interaction.
1284            
1285             A I consists of the layers of the layout accessible with a particular
1286             combination of prefix keys. The I consists of the plain layer
1287             and “additional prefix-less layers” of the layout;
1288             it is the part of layout accessible without switching "sticky state" and
1289             without using prefix keys. There may be up to 3 layers (Plain, C, C)
1290             per face on the standard Windows keyboard layouts. A I is a face exposed after pressing
1291             a prefix key (or a chain of prefix keys).
1292            
1293             A I is a collection of faces: the primary face, plus one face per
1294             a defined prefix-key (or a prefix chain). Finally, a I is a collection of personalities
1295             (switchable by sticky keys [like C] and/or in other system-specific ways)
1296             designed to work smoothly together. For example, in multi-script settings, there may be:
1297            
1298             =over 4
1299            
1300             =item *
1301            
1302             one personality per script (e.g., Latin/Greek/Cyrillic/Arabic);
1303            
1304             =item *
1305            
1306             every personality may have several script-specific additional (“satellite”) faces (one per a particular diacritic for Latin
1307             personality, one for regional/historic “flavors” for Cyrillic personality, one per aspiration type for Greek personality, etc);
1308            
1309             =item *
1310            
1311             every personality may also have “liason” faces accessing the base faces of other personalities;
1312            
1313             =item *
1314            
1315             with chained prefixes, it is easy to design intuitive ways to access satellite faces of other personalities;
1316             then every personality will also contain the satellite faces of I personalities (on different prefix chains!).
1317            
1318             =item *
1319            
1320             For access to “technical symbols” (currencies/math/IPA etc), the personalities may share a certain collection
1321             of faces assigned to the same prefix keys.
1322            
1323             =back
1324            
1325             =head2 Example of keyboard layout groups
1326            
1327             Start with a I elaborate example (it is more or less a simplified variant
1328             of the L layout|http://k.ilyaz.org>. A keyboard layout group may consist of
1329             phonetically matched Latin and Cyrillic personalities, and visually matched Greek
1330             and Math personalities. Several prefix-keys may be shared by all 4 of these
1331             personalities; in addition, there would be 4 prefix-keys allowing access to primary
1332             faces of these 4 personalities from other personalities of the group. Also, there
1333             may be specialised prefix keys tuned for particular need of entering Latin script,
1334             Cyrillic script, Greek script, and Math.
1335            
1336             Suppose that there are 8 specialized-for-Latin prefix-keys (for example, name them
1337            
1338             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1339            
1340             although in practice each one of them may do more than the name suggests).
1341             Then the Latin personality will have the following 13 faces:
1342            
1343             Primary/Latin-Primary/Cyrillic-Primary/Greek-Primary/Math-Primary
1344             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1345            
1346             B Here Latin-Primary is the face one gets when one presses
1347             the Access-Latin prefix-key when in Latin mode; it may be convenient to define
1348             it to be the same as Primary - or maybe not. For example, if one defines it
1349             to be Greek-Primary, then this prefix-key has a convenient semantic of flipping
1350             between Latin and Greek modes for the next typed character: when in
1351             Latin, C would enter α, when in Greek, the same keypresses
1352             [now meaning "Latin-PREFIX-KEY α"] would enter "a".
1353            
1354             Assume that the only “extra” modifier used by the layout is C. Then each of
1355             these faces would consists of two layers: the plain one, and the C-
1356             one. For example, pressing C with a key on Greek face could add
1357             diaeresis to a vowel, or use a modified ("final" or "symbol") "glyph" for
1358             a consonant (as in σ/ς θ/ϑ). Or, on Latin face, C may produce æ. Or, on a
1359             Cyrillic personality, AltGr-я (ya) may produce ѣ (yat').
1360            
1361             Likewise, the Greek personality may define special prefix-keys to access polytonic
1362             greek vowels. “Chaining” these prefix keys after the C prefix
1363             key would make it possible to enter polytonic Greek letters from non-Greek
1364             personalities without switching to the Greek personality.
1365            
1366             With such a keyboard layout group, to type one Greek word in a Cyrillic text one
1367             would switch to the Greek personality, then back to Cyrillic; but when all one
1368             need to type now is only one Greek letter, it may be easier to use the
1369             "Greek-PREFIX-KEY letter" combination, and save switching back to the
1370             Cyrillic personality. (Of course, for this to work the letter should be
1371             on the primary face of the Greek personality.)
1372            
1373             How to make it possible to easily enter a short Greek word when in Cyrillic mode?
1374             If one uses one more “extra” modifier key (say, C), one could
1375             reserve combinations of modifiers with this key to “use” other personality. Say,
1376             C would enter Greek β, C would enter
1377             Cyrillic б, etc.
1378            
1379             =head2 “Onion rings” approach to keyboard layout groups
1380            
1381             Looks too complicated? Try to think about it in a different way: there
1382             are many faces in a keyboard layout group; break them into 3 "onion rings":
1383            
1384             =over 4
1385            
1386             =item I faces
1387            
1388             one can "switch to a such a face" and type continuously using
1389             this face without pressing prefix keys. In other words, these faces
1390             can be made "active" (in an OS-dependent way).
1391            
1392             When one CORE face is active, the letters in another CORE face are still
1393             accessible by pressing one particular prefix key before each of these
1394             letters. This prefix key does not depend on which core face is
1395             currently "active".
1396            
1397             =item I faces
1398            
1399             one cannot "switch to them", however, letters
1400             in these faces are accessible by pressing one particular prefix key
1401             before this letter. This prefix key does not depend on which
1402             core face is currently "active".
1403            
1404             =item I faces
1405            
1406             one cannot "switch to them", and letters in these faces
1407             are accessible from one particular core face only. One must press a
1408             prefix key before every letter in such faces.
1409            
1410             (In presence of “chained prefixes”, the description is less direct:
1411             these faces are much easier to access from one particular CORE face.
1412             From another CORE face, one must preceed this prefix key by the
1413             access-that-CORE-face prefix.)
1414            
1415             =back
1416            
1417             For example, when entering a mix of Latin/Cyrillic scripts and math,
1418             it makes sense to make the base-Latin and base-Cyrillic faces into
1419             the core; it is convenient when (several) Math faces and a Greek face
1420             can be made universally accessible. On the other hand, faces containing
1421             diacritized Latin letters and diacritized Cyrillic letters should better
1422             be made satellite; this avoids a proliferation of prefix keys which would
1423             make typing slower.
1424            
1425             Comparing to the terms of the preceding section, the CORE faces correspond
1426             to personalities. A personality I the base face from other personalities;
1427             it may also import satellite faces from other personalities.
1428            
1429             In a personality, one should make access to satellite faces, the imported
1430             CORE faces, and the universally accessible faces as simple as possible.
1431             If “other” satellite faces are imported, the access to them may be more
1432             cumbersome.
1433            
1434             =head2 Large Latin layouts: on access to diacritic marks
1435            
1436             Every prefix key has a numeric I. On Windows, there are situations
1437             when this numeric ID may be visible to the user. (This module makes every
1438             effort to make this happen as rarely as possible. However, this effort
1439             blows up the size of the layout DLL, and at some moment one may hit the
1440             L.
1441             To reduce the size of the DLL, the module makes a triage, and won’t protect the ID from leaking in some rare cases.)
1442             When such a leak happens, what the user sees is the character with this codepoint.
1443             So it makes sense to choose the ID to be the codepoint of a character “related
1444             to what the prefix key ‘does’”.
1445            
1446             The logic: if the prefix keys add some diacritic, the ID should be the
1447             I related to this diacritic: either
1448             C’s 8-bit characters with high bit set, or
1449             if none with the needed glyph, suitable non-Latin-1 "spacing modifier letters" or
1450             "spacing clones of diacritics".
1451            
1452             If followed by “special keys”, one should be able to access other related
1453             modifier letters and combining characters (see L<"Classification of diacritics">
1454             and the section C in L
1455             layout|http://search.cpan.org/~ilyaz/UI-KeyboardLayout/examples/izKeys.kbdd>);
1456             one possible convenient choice is:
1457            
1458             =over 4
1459            
1460             =item The second press of the prefix key
1461            
1462             The principal combining mark;
1463            
1464             =item SPACE
1465            
1466             The primary non-ASCII spacing modifier letter;
1467            
1468             =item SPACE-related (NBSP, or C, or C)
1469            
1470             The secondary/ternary/etc modifier letter;
1471            
1472             =item digits (possibly with C and/or C)
1473            
1474             related combining marks (with C and/or C, other categories
1475             from L<"Classification of diacritics">).
1476            
1477             =item C<'> or C<"> (possibly with C)
1478            
1479             secondary/ternary/etc combining marks (or, if these are on
1480             digits, replace by prime-shape modifier chars).
1481            
1482             =back
1483            
1484             =head2 The choice of prefix keys
1485            
1486             Some stats on prefix keys: C uses 41 prefix keys for diacritics (but 15 are fake, see below!);
1487             L uses 24|http://www.macfreek.nl/memory/Mac_Keyboard_Layout> (not counting prefix №, action=specials
1488             on L:
1489            
1490             "'@2#3%5^67*8AaCcEeGghHjJ KkMmNnQqRrsUuvwWYyZz‘’“ default=terminator
1491             №ʺʹƧƨƐɛƼƽƄƅ⁊ȢȣƏəƆɔƎǝƔɣƕǶƞȠ K’ĸƜɯŊŋƢƣƦʀſƱʊʌƿǷȜȝƷʒʻʼʽ №
1492            
1493             ); bépo uses 20, while EurKey uses 8, and L uses 5|http://www.macfreek.nl/memory/Mac_Keyboard_Layout>.
1494             On the other end of spectrum, there are 10 US keyboard keys with "calculatable" relation to Latin diacritics:
1495            
1496             `~^-'",./? --- grave/tilde/hat/macron/acute/diaeresis/cedilla/dot/stroke/hook-above
1497            
1498             To this list one may add a "calculatable" key C<$> as I;
1499             on the other hand, one should probably remove C since C should better
1500             be "set in stone" to denote C<¿>. If one adds Greek, then the calculatable positions
1501             for aspiration are on C<[ ]> (or on C<( )>). Of widely used Latin diacritics, this
1502             leaves out I (and doubled I);
1503             these diacretics should be either “mixed in” with similar "calculatable" diacritics
1504             (for example, may either create a character with cedilla, or with
1505             ogonek — depending on the character), or should be assigned on less intuitive positions.
1506            
1507             Extra prefix keys of L|http://www.pentzlin.com/info2-9995-3-V3.pdf>:
1508             I.
1509             Additionally, the following diacritics produce only 4 precomposed characters: ṲṳḀḁ, so their use as prefix characters is questionable:
1510             I
1511             (Here ↓ is a shortcut for C, same with ↑ for C, and ↗ for C; ↺ means C, and ² means C.
1512             Combined arrows expand to multiple diacritics.)
1513            
1514             (Keep in mind that this list is just a conjecture; the standard does not distinguish combining characters
1515             and prefix keys, so it is not clear which keypresses produce combining characters, and which are prefix keys.)
1516            
1517             =head2 What follows is partially deprecated
1518            
1519             Parts of following subsections is better explained in
1520             L;
1521             some other parts duplicate
1522            
1523             =head2 On principles of intuitive design of Latin keyboard
1524            
1525             Using tricks described below, it is easy to create a convenient map of vowels
1526             with 3 diacritics `¨´ to the QWERTY keyboad. However, some common
1527             (meaning: from Latin-1–10 of ISO 8859) letters from Latin alphabet
1528             cannot be composed this way; they are B<ÆÐÞÇIJØŒß>
1529             (one may need to add B<ªº>, as well as B<¡¿> for non-alphabetical symbols). It is crucial
1530             that these letters may be entered by an intuitively clear key of the keyboard.
1531             There is an obvious ASCII letter associated to each of these (e.g., B associated to the thorn
1532             B<Þ>), and in the best world just pressing this letter with C-modifier
1533             would produce the desired symbol.
1534            
1535             Note that ª may be associated to @; then º may be mapped to the nearby 2.
1536            
1537             There is only one conflict: both B<Ø>,B<Œ> "want" to be entered as C;
1538             this is the ONLY piece of arbitrariness in the design so far. After
1539             resolving this conflict, C-keys B are assigned their meanings,
1540             and cannot carry other letters (call them the “stuck in stone keys”).
1541            
1542             (Other keys "stuck in stone" are dead keys: it is important to have the
1543             glyph etched on these keyboard's keys similar to the task they perform.)
1544            
1545             Then there are several non-alphabetical symbols accessible through ISO 8859
1546             encodings. Assigning them C- access is another important task to perform.
1547             Some of these symbols come in pairs, such as ≤≥, «», ‹›, “”, ‘’; it makes
1548             sense to assign them to paired keyboard's keys: <> or [] or ().
1549            
1550             However, this task is in conflict of interests with yet another (!) task, so
1551             let us explain the needs answered by that task first.
1552            
1553             One can always enter accented letters using dead keys; but many people desire a
1554             quickier way to access them, by just pressing AltGr-key (possibly with
1555             shift). The most primitive keyboard designs (such as IBM International
1556             or Apple’s US (Extended)
1557            
1558             http://www.borgendale.com/uls.htm
1559             http://www.macfreek.nl/memory/Mac_Keyboard_Layout
1560            
1561             ) omit this step and assign only the NECESSARY letters for AltGr- access.
1562             (Others, like MicroSoft International, assign only a very small set.)
1563            
1564             This problem breaks into two tasks, choosing a repertoir of letters which
1565             will be typable this way, and map them to the keys of the keyboard.
1566             For example, EurKey choses to use ´¨`-accented characters B (except
1567             for B<Ỳ>), plus B<ÅÑ>; MicroSoft International does C<ÄÅÉÚÍÓÖÁÑß> only (and IBM
1568             International does
1569             none); Bepo does only B<ÉÈÀÙŸ> (but also has the Azeri B<Ə> available - which is
1570             not in ISO 8819 - and has B<Ê> on the 105th key "C<2nd \|>"),
1571             L has none
1572             (at least if one does not count uc characters without lc counterparts), same for L
1573            
1574             http://bepo.fr/wiki/Manuel
1575             http://bepo.fr/wiki/Utilisateur:Masaru # old version of .klc
1576             http://www.jlg-utilities.com/download/us_jlg.klc
1577             http://tlt.its.psu.edu/suggestions/international/accents/codemacext.html
1578             or look for "a graphic of the special characters" on
1579             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
1580            
1581             =head2 Our solution
1582            
1583             First, the answer (the alternative, illustrated description is on
1584             L):
1585            
1586             =over 10
1587            
1588             =item Rule 0:
1589            
1590             non-ASCII letters which are not accented by B<` ´ ¨ ˜ ˆ ˇ ° ¯ ⁄> are entered by
1591             C-keys "obviously associated" to them. Supported: B<ÆÐÞÇIJŒß>.
1592            
1593             =item Rule 0a:
1594            
1595             Same is applicable to B<Ê> and B<Ñ>.
1596            
1597             =item Rule 1:
1598            
1599             Vowels B accented by B<¨´`> are assigned the so called I<"natural position">:
1600             3 “alphabetic” rows of keyboard are allocated to accents (B<¨> is the top, B<´> is the middle, B<`> is
1601             the bottom row of 3 alphabetic-rows on keyboard - so B<À> is on B-row),
1602             and are on the same diagonal as the base letter. For left-hand
1603             vowels (B,B) the diagonal is in the direction of \, for right hand
1604             voweles (B,B,B,B) - in the direction of /.
1605            
1606             =item Rule 1a:
1607            
1608             If the "natural position" is occupied, the neighbor key in the
1609             direction of "the other diagonal" is chosen. (So for B,B it is
1610             the /-diagonal, and for right-hand vowels B it is the \-diag.)
1611            
1612             =item Rule 1b:
1613            
1614             This neighbor key is below unless the key is on bottom row - then it is above.
1615            
1616             Supported by rules "1": all but B<ÏËỲ>.
1617            
1618             =item Rule 2:
1619            
1620             Additionally, B<Å>,B<Ø>,B<Ì> are available on keys B,B

,B.

1621             B<ª> is on B<@>, and B<º> is on the nearby B<2>.
1622            
1623             =back
1624            
1625             =head2 Clarification:
1626            
1627             B<0.> If you remember only Rule 0, you still can enter all Latin-1 letter using
1628             Rule 0; all you need to remember that most of the dead keys are at “obvious”
1629             positions: for L|http://k.ilyaz.org> it is B<`';"~^.,-/> for B<`´¨¨˜ˆ°¸¯ ̸>
1630             (B<¨> is repeated on B<;">!) and B<6> for B<ˇ> (memorizable as “opposite” of B<^> for B<ˆ>).
1631            
1632             (What the rule 0 actually says is: "You do not need to memorize me". ;-)
1633            
1634             (If you need a diacritic which is only I to one of the listed diacritics,
1635             there is a good chance that the dead key above L.)
1636            
1637             B<1.> If all you remember are rules 1,1a, you can calculate the position of the
1638             AltGr-key for AEYUIO accented by `´¨ up to a choice of 3 keys (the "natural
1639             key" and its 2 neighbors) - which are quick to try all if you forgot the
1640             precise position. If you remember rules 1,1ab, then this choice is down to
1641             2 possible candidates.
1642            
1643             Essentially, all you must remember in details is that the "natural positions"
1644             form a B — \ on left, / on right, and in case of bad luck you
1645             should move in the direction of other diagonal one step. Then a letter is
1646             either in its "obvious position", or in one of 3 modifications of the
1647             “natural position”.
1648            
1649             Note that these rules cover I the Latin letters appearing in
1650             Latin-1..Latin-10, I we resolve the B<Œ/Ø>-conflict by putting B<Œ> to the key B (since
1651             B<Ø> may be entered using CB)!
1652            
1653             =head2 Motivations:
1654            
1655             It is important to have a logical way to quickly understand whether a letter
1656             is quickly accessible from a keyboard, and on which key. (Or, maybe, to find
1657             a small set of keys on which a letter may be present — then, if one forgets,
1658             it is possible to quickly un-forget by trying a small number of keys).
1659            
1660             In fact, the problem of choosing “the optimal” assignment (by minimizing the
1661             rules to remember) has almost unique solution. Understanding this solution
1662             (to a problem which is essentially combinatorial optimization) may be a great help
1663             in memorizing the rules.
1664            
1665             The idea: we assign alphabetical Latin characters only to alphabetical keys
1666             on the keyboard; this frees the way to use (paired) symbol keys to enter (paired)
1667             Unicode symbols. Now observe the diagonals on the alphabetic part of the
1668             keyboard: \-diagonals (like B) and /-diagonals (like B). Each diagonal
1669             contains 3 (or less) alphabetic keys; what we want is to assign ¨-accent to the top
1670             one, ´-accent to the middle one, and `-accent to the bottom one.
1671            
1672             On the left-hand part of the keyboard, use \-diagonals, on the right-hand
1673             part use /-diagonals; now each diagonal contains EXACTLY 3 alphabetic keys.
1674             Moreover, the diagonals which contain vowels B do not intersect!
1675            
1676             If we have not decided to have keys set in stone, this would be all - we
1677             would get "completely predictable" access to B<´¨`>-accented characters B.
1678             For example, B<Ÿ> would be accessible on CB, B<Ý> on CB, B<Ỳ> on CB.
1679             Unfortunately, the diagonals contain keys C set in stone. So we need
1680             a way to "move away" from these keys. The rule is very simple: we move
1681             one step away in the direction of "other" diagonal (/-diagonal on the left
1682             half, and \-diagonal on the right half) one step down (unless we start
1683             on keys B, B where "down" is impossible and we move up to B or B).
1684            
1685             Examples: B<Ä> is on B, B<Á> "wants to be" on B (used for C<Æ>), so it is moved to
1686             C; B<Ö> wants to be on B (already used for B<Ø> or B<Œ>), and is moved away to B;
1687             B<È> wants to be on B (occupied by B<Ç>), but is moved away to B.
1688            
1689             There is no way to enter B<Ï> using this layout (unless we agree to move it
1690             to the "8*" key, which may conflict with convenience of entering typographic
1691             quotation marks). Fortunately, this letter is rare (comparing even to B<Ë>
1692             which is quite frequent in Dutch). So there is no big deal that it is not
1693             available for "handy" input - remember that one can always use deadkeys.
1694            
1695             http://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_other_languages
1696            
1697             Note that the keys B

and B are not engaged by this layout; since B

1698             is a neighbor of B, it is natural to use it to resolve the conflict
1699             between B<Ø> or B<Œ> (which both want to be set in stone on B). This leaves
1700             only the key B unengaged; but what we do not cover are two keys B<Å> and B<Ñ>
1701             which are relatively frequent in Latin-derived European languages.
1702            
1703             Note that B<Ì> is moderately frequent in Italian, but B<Ñ> is much more frequent
1704             in Spanish. Since B<Ì> and B<Ñ> want to be on the same key (which on many keyboards is taken by
1705             B<Ñ>), it makes sense to prefer B<Ñ>… Likewise, B<Ê> is much more frequent
1706             than B<Ë>; switch them.
1707            
1708             This leaves only the key B unassigned, I a very rare B<Ỳ> on B. In
1709             L|http://k.ilyaz.org>, one puts B<Å> and B<Ì> there. This completes
1710             the explanation of the rule 2.
1711            
1712             =head2 On possibilities of merging 2 diacritics on one prefix key
1713            
1714             With many diacritics, and the limited mnemonically-viable positions on
1715             the keyboard, it makes sense to merge several diacritics on the same prefix key.
1716             Possible candidates are cedilla/ogonek/comma-below (on C),
1717             dot-above/ring-above/dot-below (on C), caron/breve, circumflex/inverted-breve (on C
1718             In some cases, only one of the diacretics would be applicable to a particular character.
1719             Otherwise, one must decide which of several choices to prefer. The notes below may be
1720             useful when designing such preferences. (This module can take most of such choices
1721             automatically due to knowledge of L
1722             of characters; this age correlates well with expected frequency of use.)
1723            
1724             Another trick discussed below is implementing a rare diacritic X by applying the diacretic Y to a character
1725             with pre-composed diacritic Z.
1726            
1727             U-caron: ǔ, Ǔ which is used to indicate u in the third tone of Chinese language pinyin.
1728             But U-breve ŭ/Ŭ is used in Latin encodings.
1729             Ǧ/ǧ (G with caron) is used, but only in "exotic" or old languages (has no
1730             combined form - while G-breve ğ/Ğ is in Latin encodings.
1731             A-breve Ă: A-caron Ǎ is not in Latin-N; apparently, is used only in pinyin,
1732             zarma, Hokkien, vietnamese, IPA, transliteration of Old Latin, Bible and Cyrillic's big yus.
1733            
1734             In EurKey: only a takes breve, the rest take caron (including G but not U)
1735            
1736             Merging ° and dot-accent ˙ in Latin-N: only A and U take °, and they
1737             do not take dot-accent. In EurKey: also small w,y take ring accent; same in
1738             Bepo - but they do not take dot accent in Latin-N.
1739            
1740             Double-´ and cornu (both on a,u only) can be taken by ¨ or ˙ on letters with
1741             ¨ precombined (in Unicode ¨ is not precombined with diaeresis or dots).
1742             But one must special-case Ë and Ï and Ø (have Ê and IJ instead; IJ takes no accents,
1743             but Ê takes acute, grave, tilde and dot below...)! Æ takes acute and macron; Ø takes acute.
1744            
1745             Actually, cornu=horn is only on o,u, so using dot/ring on ö and ü is very viable...
1746            
1747             So for using AltGr-letter after deadkeys: diaeresis can take dot above, hat and wedge, diaeresis.
1748             Likewise, ` and ´ are not precombined together (but there is a combined
1749             combining mark). So one can do something else on vowels (ogonek?).
1750            
1751             Applying ´ to `-accented forms: we do not have ỳ (on AltGr-keys), so must use "the natural position"
1752             which is mixed with Ñ (takes no accents) and Ç (takes acute!!!).
1753            
1754             s, t do not precombine with `; so can use for the "alternative cedilla".
1755            
1756             Only a/u/w/y take ring, and they do not take cedilla. Can merge.
1757            
1758             Bepo's hook above; ảɓƈɗẻểƒɠɦỉƙɱỏƥʠʂɚƭủʋⱳƴỷȥ ẢƁƇƊẺỂƑƓỈƘⱮỎƤƬỦƲⱲƳỶȤ
1759            
1760             perl -wlnae "next unless /HOOK/; push @F, shift @F; print qq(@F)" NamesList.txt | sort | less
1761            
1762             Of capital letters only T and Y take different kinds of hooks... (And for T both are in Latin-Extended-B...)
1763            
1764            
1765             =head1 Useful tidbits from Unicode mailing list
1766            
1767             =for html
1768            
1769            
1770             =head2 On keyboards
1771            
1772             On MS keyboard (absolutely wrong!)
1773            
1774             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0268.html
1775            
1776             Symbols for Keyboard keys:
1777            
1778             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0204.html
1779             “Menu key” variations:
1780             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0239.html
1781             Role of ISO/IEC 9995, switchable keycaps
1782             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0576.html
1783            
1784             On the other hand, having access to text only math symbols makes it possible to implement it in computer languages, making source code easier to read.
1785            
1786             Right now, I feel there is a lack of keyboard maps. You can develop them on your own, but that is very time consuming.
1787            
1788             http://unicode.org/mail-arch/unicode-ml/y2011-m04/0117.html
1789            
1790             Fallback in “smart keyboards” interacting with Text-Service unaware applications
1791            
1792             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0165.html
1793            
1794             Keyboards - agreement (5 scripts at end)
1795            
1796             ftp://ftp.cen.eu/CEN/Sectors/List/ICT/CWAs/CWA-16108-2010-MEEK.pdf
1797            
1798             Need for a keyboard, keyman examples; why "standard" keyboards are doomed
1799            
1800             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0015.html
1801             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0022.html
1802             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0036.html
1803             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0053.html
1804            
1805             =head2 History of Unicode
1806            
1807             Unicode in 1889
1808            
1809             http://www.archive.org/stream/unicodeuniversa00unkngoog#page/n3/mode/2up
1810            
1811             Structure of development of Unicode
1812            
1813             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0056.html
1814             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0099.html
1815             I don't have a problem with Unicode. It is what it is; it cannot
1816             possibly be all things to all people:
1817             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0101.html
1818            
1819             Control characters’ names
1820            
1821             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0036.html
1822            
1823             Compromizes vs reality
1824            
1825             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0106.html
1826             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0117.html
1827            
1828             Stability of normalization
1829            
1830             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0055.html
1831            
1832             Universality vs affordability
1833            
1834             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0157.html
1835            
1836             Drachma
1837            
1838             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0167.html
1839             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3866.pdf
1840            
1841             w-ring is a stowaway
1842            
1843             http://unicode.org/mail-arch/unicode-ml/y2012-m04/0043.html
1844            
1845             History of squared pH (and about what fits into ideographic square)
1846            
1847             http://unicode.org/mail-arch/unicode-ml/y2012-m02/0123.html
1848             http://unicode.org/mail-arch/unicode-ml/y2013-m09/0111.html
1849            
1850             Silly quotation marks: 201b, 201f
1851            
1852             http://en.wikipedia.org/wiki/Quotation_mark_glyphs
1853             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0300.html
1854             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0317.html
1855             http://en.wikipedia.org/wiki/Comma
1856             http://en.wikipedia.org/wiki/%CA%BBOkina
1857             http://en.wikipedia.org/wiki/Saltillo_%28linguistics%29
1858             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0367.html
1859             http://unicode.org/unicode/reports/tr8/
1860             under "4.6 Apostrophe Semantics Errata"
1861            
1862             OHM: In modern usage, for new documents, this character should not be used
1863            
1864             http://unicode.org/mail-arch/unicode-ml/y2011-m08/0060.html
1865            
1866             Uppercase eszett ß ẞ
1867            
1868             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0007.html
1869             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0008.html
1870             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0142.html
1871             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0045.html
1872             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0147.html
1873             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0170.html
1874             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0196.html
1875            
1876             Should not use (roman numerals)
1877            
1878             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0253.html
1879            
1880             Colors in Unicode names
1881            
1882             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0100.html
1883            
1884             Xerox and interrobang
1885            
1886             http://unicode.org/mail-arch/unicode-ml/y2005-m04/0035.html
1887            
1888             Tibetian (history of encoding, relative difficulty of handling comparing to cousins)
1889            
1890             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0036.html
1891             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0040.html
1892            
1893             Translation of 8859 to 10646 for Latvian was MECHANICAL
1894            
1895             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0057.html
1896            
1897             Hyphens:
1898            
1899             http://unicode.org/mail-arch/unicode-ml/y2009-m10/0038.html
1900            
1901             NOT and BROKEN BAR
1902            
1903             http://unicode.org/mail-arch/unicode-ml/y2007-m12/0207.html
1904             http://www.cs.tut.fi/~jkorpela/latin1/ascii-hist.html#5C
1905            
1906             Combining power of generative features - implementor's view
1907            
1908             http://unicode.org/mail-arch/unicode-ml/y2004-m09/0145.html
1909            
1910             =head2 Greek and about
1911            
1912             OXIA vs TONOS
1913            
1914             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gkbkgd.html#oxia
1915            
1916             Greek letters for non-Greek
1917            
1918             http://stephanus.tlg.uci.edu/~opoudjis/unicode/unicode_interloping.html#ipa
1919            
1920             Macron and breve in Greek dictionaries
1921            
1922             http://www.unicode.org/mail-arch/unicode-ml/y2013-m08/0011.html
1923            
1924             LAMBDA vs LAMDA
1925            
1926             http://unicode.org/mail-arch/unicode-ml/y2010-m06/0063.html
1927            
1928             COMBINING GREEK YPOGEGRAMMENI equilibristic (depends on a vowel?)
1929            
1930             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0299.html
1931             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0308.html
1932             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_adscript.html
1933             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0046.html
1934            
1935             =head2 Latin, Cyrillic, Hebrew, etc
1936            
1937             Book Spine reading direction
1938            
1939             http://www.artlebedev.com/mandership/122/
1940            
1941             What is a "Latin" char
1942            
1943             http://unicode.org/forum/viewtopic.php?f=23&t=102
1944            
1945             Federal vs regional aspects of Latinization (a lot of flak; cp1251)
1946            
1947             http://peoples.org.ru/stenogramma.html
1948            
1949             Yiddish digraphs
1950            
1951             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0121.html
1952            
1953             Cyrillic Script, Unicode status (+combining)
1954            
1955             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ngc339csy8
1956             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ktxptbccph
1957            
1958             The IBM 1401 Hebrew Letter Key
1959            
1960             http://www.qsm.co.il/Hebrew/HebKey.htm
1961            
1962             GOST 10859
1963            
1964             http://unicode.org/mail-arch/unicode-ml/y2009-m09/0082.html
1965             http://www.mailcom.com/besm6/ACPU-128.jpg
1966            
1967             Hebrew char input
1968            
1969             http://rishida.net/scripts/pickers/hebrew/
1970             http://rishida.net/scripts/uniview/#title
1971            
1972             Cyrillic soup
1973            
1974             http://czyborra.com/charsets/cyrillic.html
1975            
1976             How to encode Latin-in-fraktur
1977            
1978             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0279.html
1979             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0263.html
1980            
1981             The presentation of the existing COMBINING CEDILLA which has three major forms [ȘșȚț and Latvian Ģģ]
1982            
1983             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0045.html
1984             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0066.html
1985            
1986             =head2 Math and technical texts
1987            
1988             Missing: .... skew-orthogonal complement
1989            
1990             Math Almost-Text encoding
1991            
1992             http://unicode.org/notes/tn28/UTN28-PlainTextMath-v3.pdf
1993             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0018.html
1994             For me 1/2/3/4 means unambiguously ((1/2)/3)/4, i.e. 1/(2*3*4)
1995            
1996             Unicode mostly encodes characters that are in use or have been
1997             encoded in other standards. While not semantically agnostic, it is
1998             much less oriented towards semantic clarifications and
1999             distinctions than many people might hope for (and this includes
2000             me, some of the time at least).
2001            
2002             Horizontal/vertical line/arrow extensions
2003            
2004             http://unicode.org/charts/PDF/U2300.pdf
2005             http://unicode.org/mail-arch/unicode-ml/y2003-m07/0513.html
2006             http://std.dkuug.dk/JTC1/SC2/WG2/docs/n2508.htm
2007            
2008             Pretty-printing text math
2009            
2010             http://code.google.com/p/sympy/wiki/PrettyPrinting
2011            
2012             Sub/Super on a terminal
2013            
2014             http://unicode.org/mail-arch/unicode-ml/y2008-m07/0028.html
2015            
2016             CR symbols
2017            
2018             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0163.html
2019            
2020             Math layout
2021            
2022             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0303.html
2023            
2024             Attempts of classification
2025            
2026             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4384.pdf
2027             http://std.dkuug.dk/JTC1/SC2/WG2/
2028            
2029             Buttons Target Also=not-in-series-of-n4384
2030             square 1🞌 2⬝ 3🞍 4▪ 5◾ 6◼ 7■ s⬛ (solid=s⬛)
2031             box 1□ 2🞎 3🞏 4🞐 5🞑 6🞒 7🞓 o⬜ 1🞔 2▣ 3🞕 🞖 =white square (open=o⬜) also: ▫◽◻⌑⧈⬚⸋⊡
2032             black circle 1⋅ 2∙ 3🞄 4⦁ 5⦁ 6⚫ 7● also: ·
2033             ring 1○ 2⭘ 3🞆 4🞆 5🞇 6🞈 7🞉 1⊙ 2🞊 3⦿ 🞋 =white circle also: ⊚⌾◌⚪⚬⨀◦⦾
2034             black diamond 1🞗 2🞘 3⬩ 4🞙 5⬥ 6◆
2035             white diamond ◇ 1🞚 2◈ 3🞛 🞜 also: ⋄
2036             black lozenge 1🞝 2🞞 3⬪ 4🞟 5⬧ 6⧫
2037             white lozenge ◊ 🞠
2038             cross 1🞡 2🞢 3🞣 4🞤 5🞥 6🞦 7🞧
2039             saltire 1🞨 2🞩 3🞪 4🞫 5🞬 6🞭 7🞮 ≈ times (rotated cross)
2040             5-asterisk 1🞯 2🞰 3🞱 4🞲 5🞳 6🞴
2041             6-asterisk 1🞵 2🞶 3🞷 4🞸 5🞹 6🞺
2042             8-asterisk 1🞻 2🞼 3🞽 4🞾 5🞿
2043             centered n-gon 3⯅ 4⯀ 5⬟ 6⬣ 8⯃
2044             cent on-corner 3⯆ 4⯁ 5⯂ 6⬢ 8⯄ (also ⯇ ⯈)
2045             light star 3🟀 4🟄 5🟉 6✶ 8🟎 12🟒
2046             medium star 3🟁 4🟅 5★ 6🟋 8🟏 12🟓
2047             (heavy) star 3🟂 4🟆 5🟊 6🟌 8🟐 12✹
2048             pinwheel 3🟃 4🟇 5✯ 6🟍 8🟑 12🟔 lighter: ✵
2049            
2050             =head2 Unicode and linguists
2051            
2052             Linguists mailing lists
2053            
2054             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0066.html
2055            
2056             Obsolete IPA
2057            
2058             http://unicode.org/mail-arch/unicode-ml/y2009-m01/0487.html
2059             http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3Asubhead%3D%2F%28%3Fi%29archaic%2F%3A]+&g=
2060            
2061             Teutonista (vowel guide p11, kbd p13)
2062            
2063             http://www.sprachatlas.phil.uni-erlangen.de/materialien/Teuthonista_Handbuch.pdf
2064            
2065             Glottals
2066            
2067             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0151.html
2068             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0163.html
2069             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0202.html
2070             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0205.html
2071            
2072             =head2 Spaces, invisible characters, VS
2073            
2074             Substitute blank
2075            
2076             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0101.html
2077            
2078             Representing invisible characters
2079            
2080             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0094.html
2081            
2082             Ignorable glyphs
2083            
2084             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0132.html
2085             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2086             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0120.html
2087            
2088             HOWTO: (non)dummy VS in fonts
2089            
2090             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0118.html
2091            
2092             ZWSP ZWNJ WJ SHY NON-BREAKING HYPHEN
2093            
2094             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0123.html
2095             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0188.html
2096             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0199.html
2097             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0201.html
2098             http://unicode.org/mail-arch/unicode-ml/y2007-m06/0122.html
2099             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0297.html
2100            
2101             On which base to draw a "standalone" diacritics
2102            
2103             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0075.html
2104            
2105             Variation sequences
2106            
2107             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0246.html
2108            
2109             =head2 Typesetting
2110            
2111             Upside-down text in CSS (remove position?)
2112            
2113             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0037.html
2114            
2115             Unicode to PostScript
2116            
2117             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0056.html
2118             http://www.linuxfromscratch.org/blfs/view/svn/pst/enscript.html
2119             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0062.html
2120            
2121             Spacing: English and French
2122            
2123             http://unicode.org/mail-arch/unicode-ml/y2006-m09/0167.html
2124             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0103.html
2125             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2126            
2127             Chicago Manual of Style
2128            
2129             http://unicode.org/mail-arch/unicode-ml/y2006-m01/0127.html
2130            
2131             Coloring parts of ligatures
2132             Implemenations:
2133            
2134             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0195.html
2135             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0233.html
2136             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0208.html
2137             GPOS
2138             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0167.html
2139            
2140             Chinese typesetting
2141            
2142             http://idsgn.org/posts/the-end-of-movable-type-in-china/
2143            
2144             @fonts and non-URL URIs
2145            
2146             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0156.html
2147            
2148             =head2 Looking at the future
2149            
2150             Why and how to introduce innovative characters
2151            
2152             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0045.html
2153            
2154             Unicode knows the concept of a provisional property
2155            
2156             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0142.html
2157             http://unicode.org/reports/tr23/
2158             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0161.html
2159             If you want to make analogies, however, the ISO ballots constitute
2160             the *provisional* publication for character code points and names.
2161             that needs to be available from day one for a character to be
2162             implementable at all (such as decomp mappings, bidi class,
2163             code point, name, etc.).
2164            
2165             ZERO-WIDTH UNDEFINED DECOMPOSITION MARK
2166             - to define decomposition, prepend it
2167            
2168             Exciting new letter forms for English
2169            
2170             http://www.theonion.com/articles/alphabet-updated-with-15-exciting-new-replacement,2869/
2171            
2172             Proposing new stuff, finding new stuff proposed
2173            
2174             http://unicode.org/mail-arch/unicode-ml/y2008-m01/0238.html
2175             http://www.unicode.org/mail-arch/unicode-ml/y2013-m09/0056.html
2176            
2177             A useful set of criteria for encoding symbols is found in
2178             Annex H of this document:
2179            
2180             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3002.pdf
2181            
2182             =head2 Unsorted
2183            
2184             Summary views into CLDR
2185            
2186             http://www.unicode.org/cldr/charts//by_type/patterns.characters.html
2187             http://www.unicode.org/cldr/charts//by_type/misc.exemplarCharacters.html
2188            
2189             Pound
2190            
2191             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0242.html
2192            
2193             Classification of Dings (bats etc)
2194            
2195             std.dkuug.dk/jtc1/sc2/wg2/docs/n4115.pdf
2196            
2197             Escape: 2be9 2b9b
2198             ARROW SHAFT - various
2199            
2200             Locales
2201            
2202             http://blog.kyero.com/2011/11/14/what-is-the-common-locale-data-repository/
2203             http://blog.kyero.com/2010/12/02/lost-in-translation-locales-not-languages/
2204             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0203.html
2205            
2206             General
2207            
2208             http://ebixio.com/online_docs/UnicodeDemystified.pdf
2209            
2210             Diacritics in fonts
2211            
2212             http://unicode.org/mail-arch/unicode-ml/y2011-m05/0047.html
2213             http://www.user.uni-hannover.de/nhtcapri/combining-marks.html#greek
2214            
2215             Licences (GPL etc) in TV sets
2216            
2217             http://unicode.org/mail-arch/unicode-ml/y2009-m12/0092.html
2218            
2219             Similar glyphs:
2220            
2221             http://unicode.org/reports/tr39/data/confusables.txt
2222            
2223             GeoLocation by IP
2224            
2225             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0197.html
2226            
2227             Per language character repertoir:
2228            
2229             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0253.html
2230             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0255.html
2231            
2232             Dates/numbers in Unicode
2233            
2234             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0122.html
2235            
2236             Normalization FAQ
2237            
2238             http://www.macchiato.com/unicode/nfc-faq
2239            
2240             Apostrophe
2241            
2242             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0060.html
2243             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0063.html
2244             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0066.html
2245             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0251.html
2246             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0309.html
2247            
2248             Apostroph as soft sign
2249            
2250             http://unicode.org/mail-arch/unicode-ml/y2010-m08/0123.html
2251            
2252             Questionner at start of Unicode proposal
2253            
2254             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0087.html
2255            
2256             Rubi
2257            
2258             http://en.wikipedia.org/wiki/Ruby_character#Unicode
2259            
2260             Tamil/ISCII
2261            
2262             http://unicode.org/faq/indic.html
2263             http://unicode.org/versions/Unicode6.1.0/ch09.pdf
2264             http://www.brainsphere.co.in/keyboard/tm.pdf
2265            
2266             CGI and OpenType
2267            
2268             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0097.html
2269            
2270             Numbers in scripts ;-)
2271            
2272             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0120.html
2273            
2274             Indicating coverage of the font
2275            
2276             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0152.html
2277             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0167.html
2278            
2279             Accessing ligatures
2280            
2281             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0210.html
2282            
2283             Folding characters
2284            
2285             http://unicode.org/reports/tr30/tr30-4.html
2286            
2287             Writing systems vs written languages
2288            
2289             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0198.html
2290             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0241.html
2291            
2292             MS Visual OpenType tables
2293            
2294             http://www.microsoft.com/typography/VOLT.mspx
2295             http://www.microsoft.com/typography
2296            
2297             "Same" character Oacute used for different "functions" in the same text
2298            
2299             http://unicode.org/mail-arch/unicode-ml/y2004-m08/0019.html
2300             etc:
2301             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0227.html
2302            
2303             Diacritics
2304            
2305             http://www.sil.org/~gaultney/ProbsOfDiacDesignLowRes.pdf
2306             http://en.wikipedia.org/wiki/Sylfaen_%28typeface%29
2307             http://tiro.com/Articles/sylfaen_article.pdf
2308            
2309             Sign writing
2310            
2311             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4342.pdf
2312            
2313             Writing digits in non-decimal
2314            
2315             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0050.html
2316             Which separator is less ambiguous? Breve ˘ ? ␣ ? Inverted ␣ ?
2317            
2318             Use to identify a letter:
2319            
2320             http://unicode.org/charts/collation/
2321            
2322             Perl has problems with unpaired surrogates (whole thread)
2323            
2324             http://unicode.org/mail-arch/unicode-ml/y2010-m11/0034.html
2325            
2326             Complex fonts (e.g., Indic)
2327            
2328             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0049.html
2329            
2330             Complex glyphs in Symbola (pre-6.01) font may crash older versions of Windows
2331            
2332             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0082.html
2333             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0084.html
2334            
2335             Window 7 SP1 improvements
2336            
2337             http://babelstone.blogspot.de/2010/05/prototyping-tangut-imes-or-why-windows.html
2338            
2339             Middle dot is ambiguous
2340            
2341             http://unicode.org/mail-arch/unicode-ml/y2010-m09/0023.html
2342             http://unicode.org/mail-arch/unicode-ml/y2013-m03/0151.html
2343            
2344             Superscript == modifiers
2345            
2346             http://unicode.org/mail-arch/unicode-ml/y2010-m03/0133.html
2347            
2348             Translation of Unicode names
2349            
2350             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0066.html
2351             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0076.html
2352            
2353             Transliteration on passports (see p.IV-48), UniDEcode
2354            
2355             http://www.icao.int/publications/Documents/9303_p1_v1_cons_en.pdf
2356             http://unicode.org/mail-arch/unicode-ml/y2013-m11/0025.html
2357            
2358             =head1 Keyboard input on Windows: interaction of applications and the kernel
2359            
2360             =head2 Keyboard input on Windows, Part I: what is the kernel doing?
2361            
2362             This is not documented. We try to provide a description which is
2363             both as simple as possible, and as complete as possible. (We ignore
2364             many important parts: the handling of hot keys [or C]), IME,
2365             handling of focus switch [C etc], the syncronization of keystate
2366             between different queues, waking up the system, the keyboard filters,
2367             widening of virtual keycodes, and LED lights.)
2368            
2369             We omit Step 0, when the hardware keyboard drivers (PS/2 or USB) deliver keydown/up(/repeat???) event for scan
2370             codes of corresponding keys. (This is a complicated topic, but well-documented.)
2371            
2372             =over
2373            
2374             =item 1
2375            
2376             The scan codes are massaged (see “Low level scancode mapping” in L<"SEE ALSO">).
2377            
2378             =item 2
2379            
2380             The keyboard layout tables map the translated scancode to a virtual keycode.
2381             (This may also depend on the “modification column”; see L<"Far Eastern keyboards on Windows">.)
2382             The “internal” key state table is updated.
2383            
2384             =item 3
2385            
2386             Mythology: the modification keys (C, C, C etc) are taken into account.
2387            
2388             What actually happens: any key may act as a modification key. The keyboard layout tables
2389             map keycodes to 8-bit masks. (The customary names for lower bits of the mask are C,
2390             C, C, C; two more bits are named C and C — after
2391             OYAYUBI 親指, meaning THUMB; two more
2392             bits are unnamed.) The keycodes of the currently pressed keys (from the “internal” table) are translated to masks, and
2393             these masks are ORed together. (For the purpose of translation to C/etc [done
2394             in ToUnicode()/ToUnicodeEx()], the bit C may be set
2395             also when key C was pressed odd number of times; this is
2396             controlled by C flag in a virtual key descriptor [of the key being currently processed]
2397             of the keyboard layout tables.)
2398            
2399             The keyboard layout tables translate the ORed mask to a number called “modification column”.
2400             (Thess two numbers are completely hidden from applications. The only glint the
2401             applications get is in the [useless, since there is no way to map it to anything “real”] result of
2402             L.])
2403            
2404             =item 4
2405            
2406             Depending on the current “modification column”, the virtual keycode of the current key event
2407             may be massaged further. (See L<"Far Eastern keyboards on Windows">.) Numpad keycodes
2408             depend also on the state of C — provided the keyboard layout table marks them with
2409             C flag. A few other scancodes may also produce different virtual keycodes in
2410             different situations (e.g., C).
2411            
2412             When C flag is present, fake presses/releases of left C are generated
2413             on presses(repeats)/releases of right C (exception: the press is not generated if any
2414             Ctrl key is down; likewise for when left C up when right C is released). With
2415             keypad presses/releases in presence of C and C, fake releases/presses of C
2416             are generated.
2417            
2418             =item 5
2419            
2420             If needed, asyncroneous key state for the current key's non-left-non-right flavor is updated.
2421             (The rest is dropped if the key is consumed by a C hook.)
2422            
2423             Asyncroneous key state for the current key is updated. Numpad-by-number flags are updated.
2424             (The rest is dropped if the key is a hotkey.)
2425            
2426             The message C is posted to the application. If C [usually
2427             called the C key] is
2428             down, but C is not, the event is of C flavor (this info is duplicated in
2429             lParam. Additionally, for C tapping, the UP event is also made C — although
2430             at this moment C is not down!).
2431             (The C flag [of the scancode] is also delivered to the application.)
2432            
2433             (When a C message is posted, the key state is updated. This key state
2434             may be used by TranslateMessage() as an argument to ToUnicode(), and is returned by GetKeyState() etc.)
2435            
2436             B
2437             with TranslateMessage()/DispatchMessage() or uses some equivalent code.>
2438            
2439             =item 6
2440            
2441             Before the application dispatches C to the message handler,
2442             TranslateMessage() calls L with C (unless a popup menu
2443             is active; then C — which disables character-by-number input via
2444             numeric KeyPad) and the buffer of 16 UTF-16 code units.
2445            
2446             =item 7
2447            
2448             The UTF-16 code units obtained from ToUnicode() are posted via PostMessage(). All the code units but
2449             the last one are marked by C flag in C. If the initial message
2450             was C, the C flavor is posted; if ToUnicode() returns a
2451             deadkey, the C flavor is posted.
2452            
2453             (The bit C is set/used only for the console handler.)
2454            
2455             =back
2456            
2457             =head2 Keyboard input on Windows, Part II: The semantic of ToUnicode()
2458            
2459             L,
2460             the semantic is not. Here we fix this.
2461            
2462             =over 4
2463            
2464             =item 1
2465            
2466             If the bit 0x01 in C is not set, the key event is checked for contributing to
2467             character-by-number input via numeric KeyPad (and numpad-by-number flags are updated).
2468             If so, the character is
2469             delivered only when C is released. (This the only case when KEYUP
2470             delivers a character.) Unless the bit 0x02 in C is set, the KEYUP
2471             events are not processed any more.
2472            
2473             =item 2
2474            
2475             The flag C is acted upon, and C is processed.
2476            
2477             =item 3
2478            
2479             The keys which are currently down are mapped to the ORed bitmap (see above).
2480            
2481             =item 4
2482            
2483             If the key event does not contribute to input-by-number via numeric keypad,
2484             and C is set, and no other bits except C, C are set:
2485             then the bit C is removed from the ORed mask.
2486            
2487             =item 5
2488            
2489             If C is active, C state is flipped in the following cases: either at most
2490             C is set in the bitmap, and C is set in the descriptor,
2491             or both C and C are set in the bitmap, and C is set in the
2492             descriptor.
2493            
2494             Now the ORed bitmap is converted to the modification column (see above).
2495            
2496             =item 6
2497            
2498             The key descriptor for the current virtual keycode is consulted (the “row” of the table).
2499             If C flag is on, C is active, and no other bits but C are set in the bitmap,
2500             the row is replaced by the next row.
2501            
2502             =item 7
2503            
2504             The entry at
2505             the row/column is extracted; if defined, it is either a string (zero or more UTF-16 code units), or a
2506             dead key ID (one UTF-16 unit). (I: the ID is taken from the next row of the table.)
2507            
2508             (If the ORed mask corresponds to a valid modification column, but the row does not
2509             define the behaviour at this column, and the bit C is set, and no other bits but C, C
2510             are set, then an autogenerated character in the range 0x00..0x1f is emitted for virtual keycodes
2511             'A'..'Z' and widened virtual keycodes 0xFF61..0xFF91 [for latter, based on the low bits of translation-to-scancode]).
2512            
2513             =item 8
2514            
2515             The resulting units are fed to the finite automaton. When the automaton is in
2516             0-state, a fed character unit is passed through, and a fed deadkey ID sets the state
2517             of the automaton to this number. In non-0 state, the IDs behave the
2518             same as numerically equal character units; the behaviour is described by the keyboard layout
2519             tables. The automaton changes the state according to the input; it may also emit a character
2520             (= 1 code unit; then it is always reset to 0 state). When “unrecognized input” arrives, the automaton
2521             emits the ID I the input, and resets to 0 state.
2522            
2523             (On KEYUP event, the changes to the state of the finite-automaton are ignored. This is only
2524             relevant if C has bit 0x02 set.)
2525            
2526             =item 9
2527            
2528             After UTF-16 units are passed through the automaton, its output is returned by ToUnicode().
2529             If the automaton is in non-0 state, the state ID becomes the output.
2530            
2531             =back
2532            
2533             B MSKLC restricts the length of the string associated to the row/column cell to
2534             be at most 4 UTF-16 code units. There are 2 restrictions for keyboard layouts created with other tools:
2535             first, the maximal number of UTF-16 codepoints in all these strings is stored in a byte, hence there
2536             may be at most 255 UTF-16 codepoints. Second, the actual slot C where the string is allocated
2537             contains two shorts, then the UTF-16 data; its length is also stored in a byte. This results in
2538             the maximal string length of 125 code units — if it is stored in one slot.
2539            
2540             However, with creative allocations, one can use more than one slot for a string storage
2541             (theoretically, one may imagine specially crafted layout where this would break the
2542             layout; on practice, such situations should not arise — even if one stores long strings in
2543             I slots good for 4-chars strings.
2544            
2545             B If the application uses the stardard message pump
2546             with TranslateMessage()/DispatchMessage(), the caller of ToUnicode() is TranslateMessage().
2547             In this case, ToUnicode() is called with an output buffer consisting of 16 UTF-16 code units. For
2548             such applications, the strings associated to keypresses are truncated after 16 code units.
2549            
2550             B If the string is “long” (i.e., defined via LIGATURES), when it is fed through the
2551             finite automaton, the transitions to non-0 state do not generate deadkey IDs in the output
2552             string. (The LIGATURES may contain strings of one code unit! This may lead to non-obvious
2553             behaviour! If pressing such a key after a deadkey generates a chained deadkey, this
2554             would happen without delivering C message.)
2555            
2556             B How kernel recognizes which key sequences contribute to
2557             character-by-number input via numeric KeyPad? First, the starter keydown must happen
2558             when the ORed mask contains C, and no other bits except C
2559             and C. (E.g., one can press C, then tap C, release C
2560             [with 1,2,3 on the numeric keypad].
2561             This would deliver C, then C<1> would start character-by-number input
2562             provided C and C together have ORed mask “in between” of C
2563             and C.)
2564            
2565             After the starter keydown (NumPad: 0..9, DOT, PLUS) is recognized as such, all the keydowns
2566             should be followed by the corresponding keyup (keydowns-due-to-repeat are ignored);
2567             more precisely, between two KEYDOWN events, the KEYUP for the first of them must be present.
2568             (In other words, KEYDOWN/KEYUP events must come in the expected order, maybe with some intermixed “extra” KEYUP events.)
2569             In the decimal mode (numeric starter) only the keys with scancodes of NumPad 0..9 are allowed.
2570             In the hex mode (starter is NumPad's DOT or PLUS) also the keys with virtual codes
2571             '0'..'9' and 'A'..'F' are allowed. The sequence is terminated by releasing C
2572             (=C) key.
2573            
2574             B In most cases, the resulting number is reduced mod 256. The exceptions are: the starter key is C,
2575             or the translate-to codepage is multibyte (then a number above 255 is interpreted as big-endian combination
2576             of bytes). In multibyte codepages, numbers 0x80..0xFF
2577             are considered in C codepage (unless the translate-to codepage is Japanese, and the number’s codepoint is Katakana).
2578            
2579             B If the starter key is C or C, the number is a codepoint in the default codepage of the keyboard layout;
2580             if it is another digit, it is in the OEM codepage.
2581             Enabling hex modes (C or C) requires extra tinkering; see L<"Hex input of unicode is not enabled">.
2582            
2583             B since keyboard layout normally map C to the mask C, and do not define
2584             a modification column for the ORed mask C<=KBDALT>, and C is B stripped for
2585             key events in input-by-number, these key events usually do not generate spurious Cs.
2586            
2587             B if the bit 0x01 of C is intended to be set, then there is a way to query
2588             the kernel “what would happen if a particular key with a particular combination of modifiers
2589             were pressed now”. (Recall that a “usual” ToUnicode() call is “destructive”: it modifies the
2590             I of the keyboard stored in the kernel. The information about whether one is in the
2591             middle of entering-by-number and/or whether one is in a middle of a deadkey sequence is
2592             erased or modified by such calls.) In general, there is no way preserve the state of
2593             entering-by-number; however, in presence of bit 0x01, this is of no concern, so a solution
2594             exists.
2595            
2596             Using C, and setting the high bit of C gives the same result as
2597             ToUnicode() with C and no high bit in C. Moreover, this preserves the state of
2598             the deadkey-finite-automaton. This way, one gets a “I” flavor of ToUnicode().
2599            
2600             =head2 Keyboard input on Windows, Part III: Customary “special” keybindings of typical keyboards
2601            
2602             Typically, keyboards define a few keypresses which deliver “control” characters
2603             (for benefits of console applications). As shown above, even if the keyboard does not
2604             define C combinations (but does define modification column for C
2605             which is associated to C — with maybe C, C intermixed), C
2606             with C<^letter> I be delivered to the application. Same with happen for combinations
2607             with modifiers which produce only C, C, C.
2608            
2609             Additionally, the typical keyboards also define the following bindings:
2610            
2611             Ctrl-Space ——→ 0x20
2612             Esc, Ctrl-[ ——→ 0x1b
2613             Ctrl-] ——→ 0x1d
2614             Ctrl-\ ——→ 0x1c
2615             BackSpace ——→ ^H
2616             Ctrl-BackSpace ——→ 0x7f
2617             Ctrl-Break ——→ ^C
2618             Tab ——→ ^I
2619             Enter ——→ ^M
2620             Ctrl-Enter ——→ ^J
2621            
2622             In addition to this, the standard US keyboard (and keyboards built by this Perl module) define
2623             the following bindings with C modifiers:
2624            
2625             @ ——→ 0x00
2626             ^ ——→ 0x1e
2627             _ ——→ 0x1f
2628            
2629             =head2 Can an application on Windows accept keyboard events? Part I: insert only
2630            
2631             The logic described above makes the kernel deliver more or less “correct” C messages
2632             to the application. The only bindings which may be defined in the keyboard layout, but will not be
2633             seen as C are those in modification columns which involve C, and do not
2634             involve any bits except C and C. (Due to the stripping of C described
2635             above, these modification columns are never accessed — I.)
2636            
2637             Try to design an application with an entry field; the application should insert B the
2638             characters ”delivered for insertion” by the keyboard layout and the kernel. The application
2639             should not do anything else for all the other keyboard events. First, ignore
2640             the C stripping.
2641            
2642             Then the only C which are NOT supposed to insert the contents to the editable UI fields are the
2643             L described above. They are easy to recognize and ignore: just
2644             ignore all the C carrying characters in the range C<0x00..0x1f>, C<0x7f>, and ignore C<0x20>
2645             delivered when one of C keys is down. So the application which inserts all the I
2646             Cs will follow I of the keyboard as close as possible.
2647            
2648             Now return to consideration of C stripping. If the application follows the policy above,
2649             pressing C would enter C — provided C is mapped to C, as done
2650             on standard keyboards. So the application should recognize which C carrying C
2651             are actually due to stripping of C, and should not insert the delivered characters.
2652            
2653             Here comes the major flaw of the Windows’ keyboard subsystem: the kernel translates
2654             SCANCODE —→ VK_CODE —→ ORED_MASK —→ MODIFICATION_COLUMN, then operates in terms of
2655             ORed masks and modification columns. The application can access only the first two levels
2656             of this translation; one cannot query the kernel for any information about the last
2657             two numbers. (Except for the API L,
2658             but it is unclear how this API may help: it translates “in wrong direction” and covers only BMP.)
2659             Therefore, there is no bullet-proof way to recognize when C arrived
2660             due to C stripping.
2661            
2662             B of course, if only C keys are associated to non-0 ORed mask bitmaps,
2663             and they are associated to the “expected” C bits, then the
2664             application would easily recognize this situation by checking whether C is down,
2665             but C is not. (Also observe that this is exactly the situation distinguishing
2666             C from C — no surprises here!)
2667            
2668             Assuming that the application uses this method, it would correctly recognize stripped
2669             events on the “primitive” keyboards. However, on a keyboard with an extra modifier
2670             key (call it C; assume its mask involves a non-SHIFT/ALT/CTRL/KANA bit),
2671             the C combination will not be stripped by the kernel, but the application
2672             would think that it was, and would not insert the character in C message. A bug!
2673            
2674             Moreover, if “supporing only the naive mapping” were a feasible
2675             restriction, there would be no reason for the kernel to go through the extra step of “the ORed mask”.
2676             Actually, to have a keyboard which is simultaneously backward compatible, easy for users, and
2677             covering a sufficiently wide range of possible characters, one B use more or
2678             less convoluted implementations (as in L bitmaps to modifier keys>).
2679            
2680             B the fact that the kernel and the applications speak different
2681             incompatible languages makes even the primitive task discussed here impossible
2682             to code in a bullet-proof way. A heuristic workaround exists, but it will not
2683             work with all keyboards and all combinations of modifiers.
2684            
2685             B some applications (e.g., Emacs) manage to distinguish
2686             C combination of modifier keys from the combination C produced by
2687             a typical C; these applications are able to use C-modified
2688             keys as a bindable accelerator keys. We address this question in the L.
2689            
2690             =head2 Can an application on Windows accept keyboard events? Part II: special key events
2691            
2692             In the preceding section, we considered the most primitive application accepting
2693             the user inserting of characters, and nothing more. “Real applications” must
2694             support also keyboard actions different from “insertion”; so those KEYDOWN events
2695             which are not related to insertion may trigger some “special actions”. To model a full-featured
2696             keyboard input, consider the following specification:
2697            
2698             As above, the application has an entry field, and should insert B the
2699             characters ”delivered for insertion” by the keyboard layout and the kernel.
2700             For all the keyboard events I, the application
2701             should write to the log file which of C modifiers were down,
2702             and the virtual keycode of the KEYDOWN event. Again, at first, we ignore
2703             the C stripping.
2704            
2705             At first, the problem looks simple: with the standard message pump, when C
2706             message is processed, the corresponding C messages are already
2707             sent to the message queue. One can PeekMessage() for these messages; if present,
2708             and not “special”, they correspond to “insertion”, so nothing should be written to the log.
2709             Otherwise, one reports this C to the log.
2710            
2711             Unfortunately, this solution is wrong. Inspect again what the kernel is delivering
2712             during the input-by-number via numeric keyboard: the KEYDOWN for decimal/hex digits
2713             B a part of the “insertion”, but it does not generate any C.
2714             Essentially, the application may see C pressed during the processing of
2715             C, but even if C is supposed to format the paragraph,
2716             this action should not be triggered (but C should be eventually inserted).
2717            
2718             B Input-by-number is getting in the way of using the standard message
2719             pump. C: one should write a clone of TranslateMessage() which delivers
2720             suitable C messages for KEYDOWN/KEYUP involved in Input-by-number. Doing
2721             this, one can also remove sillyness from the Windows’ handling of Input-by-number
2722             (such as taking C for numbers above 255).
2723            
2724             B: myTranslateMessage() should:
2725            
2726             =over 4
2727            
2728             =item *
2729            
2730             when non handling input-by-number, call ToUnicode(), but use C, so that ToUnicode() does not handle input-by-number.
2731            
2732             =item *
2733            
2734             Recognize input-by-number starters by the scancode/virtual-keycode, the presence of C down, and
2735             the fact that ToUnicode() produces nothing or C<'0'..'9','.',',','+'>.
2736            
2737             =item *
2738            
2739             After the starter, allow continuation by checking the scancode/virtual-keycode and the presence of C down.
2740             Do not call ToUnicode() for continuation keydown/up events.
2741            
2742             =item *
2743            
2744             After a chain of continuations followed by KEYUP for C, one should PostMessage() for C with
2745             accumulated input.
2746            
2747             =back
2748            
2749             Combining this with the heuristical recognition of stripped C, one gets an architecture
2750             with a naive approximation to handling of C (but still miles ahead of all the applications
2751             I saw!), and bullet-proof handling of other combinations of modifiers.
2752            
2753             B this implementation of MyTranslateMessage() loses one “feature” of the original one:
2754             that input-by-number is disabled in the presence of (popup) menu. However, since I never saw
2755             this “feature” in action (and never have heard of it described anywhere), this must be of
2756             negligible price.
2757            
2758             B I the applications I checked do this logic wrong. Most of them check B for
2759             “whether the key event looks like those which should trigger special actions”, then perform
2760             these special actions (and ignore the character payload).
2761            
2762             As shown above, the reasonable way is to do this in the opposite order, and check for
2763             special actions only I it is known that the key event does not carry a character payload.
2764             The impossibility of reversing the order of these checks is due to the same reason as one discussed
2765             above: the
2766             kernel and application speaking different languages.
2767            
2768             Indeed, since the application knows nothing
2769             about ORed masks, it has no way to distinguish that, for example, C may be I to be
2770             distinct from C and C, and while the last two do not carry the character
2771             payload, the first one does. Checking I for the absense of C
2772             delegates such a discrimination to the kernel, which has enough information about the
2773             intent of the keyboard layout. (Likewise, the keyboard may define the pair of C
2774             and C to insert ᵃ. Then C alone will not carry any character payload,
2775             its combination with a deadkey may.)
2776            
2777             Why the applications are trying to grab the potential special-key messages as early
2778             as possible? I suspect that the developers are afraid that otherwise, a keyboard layout may
2779             “steal” important accelerators from the application. While this is technically possible,
2780             nowadays keyboard accelerators are rarely the I way to access features of the applications;
2781             and among hundreds of keyboard layout I saw, all but 2 or 3 would not “steal” I from applications.
2782             (Or maybe the developers just have no clue that the correct solution is so simple?)
2783            
2784             B Among the applications I checked, the worst offender is Firefox. It follows L
2785             unfortunate advice by Mike Kaplan|http://blogs.msdn.com/b/michkap/archive/2005/01/19/355870.aspx>
2786             and tries to reconstruct the mentioned above row/columns table of the keyboard layout, then
2787             uses this (heuristically reconstructed) table as a substitute for the real thing. And
2788             due to the mismatch of languages spoken by kernel and applications, working via such an
2789             attempted reconstruction turns out to have very little relationship to the actually intended
2790             behaviour of the keyboard (the behaviour observed in less baroque applications). In particular, if
2791             keyboards uses different modification columns for C and C=C
2792             modifiers, pressing C inputs wrong characters in Firefox.
2793            
2794             B Among notable applications which fail spectacularly is Emacs. The developers
2795             forget that for a generation, it is already XXI century; so they L
2796             ToUnicode()|http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c>!
2797             (Even if ToUnicode() is available, its result is converted to the result of the
2798             corresponding ToAscii() code.)
2799            
2800             In addition to 8-bitness, Emacs also suffers from check-for-specials-first syndrome…
2801            
2802             =head2 Can an application on Windows accept keyboard events? Part III: better detection of C stripping
2803            
2804             We explained above that L
2805             handling the case when C might have been stripped by the kernel|"Can an application on Windows accept keyboard events? Part I: insert only">. The
2806             very naive heuristic algorithm described there will recognize the simplest
2807             cases, but will also have many false positives: for many combinations it will decide
2808             that C was stripped while it was not. The result will be that
2809             when the kernel reports that the character C is delivered, the
2810             application would interpret it as C, so C would not be inserted.
2811             It will not handle, for example,
2812             the C modifier combinations with L
2813             from that section|"A convenient assignment of C bitmaps to modifier keys">.
2814            
2815             Indeed, with this assignment, the only combination of modifiers for which the kernel will strip C
2816             is C (and C if one does not assign any bits to C).
2817             So C is not stripped, hence the
2818             correct C is delivered by the kernel. However, since this combination is
2819             still visible to the application as having C, and not having C,
2820             it is delivered as the C flavor.
2821            
2822             So the net result is: one designed a nice assignment of masks to the modifier
2823             keys. This assignment makes keypresses successfully navigate around the quirks
2824             of I’s calculations of the character to deliver. However, the naive
2825             algorithm used by I will force the application to ignore this
2826             correctly delivered character to insert.
2827            
2828             A very robust workaround for this problem is introduced in the
2829             L.
2830             What we discuss here is a simple heuristic to recognize the combinations involving
2831             C and an “unexpected modifier”, so that these combinations become
2832             exceptions to the rule “C flavor means ‘do not insert’”.
2833            
2834             B when C message arrives, inspect the virtual keycodes
2835             which are reported as pressed. Ignore the keycode for the current message.
2836             Ignore the keycodes for “usual modifiers” (C) which are
2837             expected to keep stripping. Ignore the keycode for the keys which may be
2838             kept “stuck down” by the keyboards (see L<"Far Eastern keyboards on Windows">).
2839             If some keycode remains, then consider it as an “extra” modifier, and ignore
2840             the fact that the message was of C flavor.
2841            
2842             So all one must do is to define one user message (for input-by-number-in-progress),
2843             code two very simple routines, MyTranslateMessage() and HasExtraModifiersHeuristical(), and perform two
2844             PeekMessage() on KEYDOWN event, and one gets a powerful almost-robust
2845             algorithm for keyboard input on Windows. (Recall that all the applications
2846             I saw provide close-to-abysmal support of keyboard input on Windows.)
2847            
2848             =head2 Can an application on Windows accept keyboard events? Part IV: application-specific modifiers
2849            
2850             Some application handle certain keys as “extra modifiers for the purpose of
2851             application-specific accelerator keypresses”. For example, Emacs may treat
2852             the C in this way (as a C modifier for its bindable-keys
2853             framework). Usually, C does not
2854             contribute anything into the ORed mask; hence, C
2855             combination will deliver the same character as just C alone. When
2856             the application treats C as an accelerator, it must
2857             ignore the character delivered by this combination.
2858            
2859             Additionally, many keyboard layouts
2860             use the C flag (it makes the kernel to fake pressing/releasing the
2861             left C key when the right C is pressed/released) with “standard”
2862             assignments of the ORed masks. On such keyboards, pressing right C (i.e.,
2863             C) delivers the same characters as pressing any C together with
2864             any C. On the other hand, an application may distinguish left-C combinined
2865             with left-C from C pressed
2866             on such keyboards by inspecting which (virtual) keys are currently down. So the application
2867             may consider left-C combinined with left-C
2868             as “intended to be an accelerator”; then the application would ignore the characters delivered by
2869             such a keypress.
2870            
2871             One can immediately see that such applications would inevitably enter into conflict
2872             with keyboards which B these key combinations. For example, on a keyboard
2873             which defines an ORed mask for C, pressing C
2874             I deliver a different character than pressing C. However, the
2875             application does not know this, and just ignores the character delivered by
2876             C.
2877            
2878             A similar situation arises when the keyboard defines C to
2879             deliver a different character than C. Again, the character will be ignored
2880             by the application. Since the fact that such a “unusual” keyboard is active
2881             implies user's intent, such behaviour is a bug of the application.
2882            
2883             B an application must interpret a keypress as “intended to be an accelerator”
2884             only if this keypress produces no character, or produces B character as
2885             the key without the “extra” modifier. (Correspondingly, if replacing C by
2886             C does not change the delivered character.)
2887            
2888             B to do this, the application must be able to query “what would happen
2889             if the user pressed different key combinations?”; such a query requires “non-destructive”
2890             calls of ToUnicode(). (These calls must be done I the “actual”, destructive,
2891             call of ToUnicode() corresponding to the currently pressed down modifiers.)
2892            
2893             Fortunately, with the framework described in the
2894             L stripping">,
2895             the call of ToUnicode() is performed with C being 0x01. As explained near the end of the section
2896             L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">, this call has a “non-destructive”
2897             flavor! Hence, for applications with such “enhanced” modifier keys, the logic of the
2898             L stripping">
2899             should be enhanced in the following ways:
2900            
2901             =over 4
2902            
2903             =item *
2904            
2905             Make a non-destructive call of ToUnicode(). Store the result. If no insertable character
2906             (or deadkey) is delivered, ignore the rest.
2907            
2908             =item *
2909            
2910             If both left C and left C are down (AND right C AND right C are up!)
2911             replace left C by the right C, and
2912             make another non-destructive call of ToUnicode(). If the result is identical to the first one,
2913             mark C as “special modifiers present for accelerators”.
2914            
2915             Remove left C and left C from the collection of keys which are down (argument to ToUnicde()),
2916             and continue with the previous step.
2917             (This may be generalized to other combinations of left/right C/C.)
2918            
2919             =item *
2920            
2921             For every other “special modifier” virtual key which is down,
2922             make another non-destructive call of ToUnicode() with this virtual key up.
2923             If the result is identical to the first one, mark this “special modifier” as “present for accelerators”.
2924            
2925             =item *
2926            
2927             Finally, if nothing suitable for accelerators is found, make a “usual” call of ToUnicode()
2928             (so that on future keypresses the deadkey finite automaton behaves as expected). Generate the
2929             corresponding messages.
2930            
2931             =back
2932            
2933             If no insertable character is delivered, or suitable “extra” accelerators are found, the
2934             process-the-accelerator logic should be triggered.
2935            
2936             For example, if the character Ω is delivered, and a special modifier C is down
2937             and marked as suitable as accelerator, then Ω will be ignored. The accelerator for C
2938             should be triggered. (Processing this as C may be also done. This may require an
2939             extra non-destructive call.)
2940            
2941             An alternative logic is possible: if this Ω was generated by modifiers C
2942             with the virtual key C, then the application may query what C generates standalone (for example,
2943             cyrillic ц), and trigger the accelerator for C. (This assumes that
2944             C with C generates the same Ω!)
2945            
2946             If no character is delivered, then this is a “trivial” situation, and the framework of accelerator keys
2947             should be called as if the complication considered here did not exist.
2948            
2949             B this logic handles the intended behaviour of C key as well! So, with this implementation,
2950             the application would
2951            
2952             =over 5
2953            
2954             =item *
2955            
2956             Handle C-NUMPAD input-by-number in an intuitive mostly compatible with Windows way
2957             (but not bug-for-bug compatible with the Windows' way);
2958            
2959             =item *
2960            
2961             Would recognize C modifier which does not change the delivered character as such. (So it may be processed
2962             as the menu accessor.)
2963            
2964             =item *
2965            
2966             Would recognize B the key combinations defined by the keyboard layout (and deliverable via ToUnicode());
2967            
2968             =item *
2969            
2970             Would recognize all the application-specific extra modifier keys which do not interfere with the
2971             key combinations defined by the keyboard layout.
2972            
2973             =back
2974            
2975             =head2 Far Eastern keyboards on Windows
2976            
2977             The syntax of defining these keyboards is documented in F of the toolkit.
2978             The semantic of the NLS table is undocumented. Here we fix this.
2979            
2980             The function returning the NLS table should be exported with ordinal 2.
2981             The offsets of both tables in the module should be below 0x10000.
2982             The keyboard layout should define a function with ordinal 3 or 5 returning 0, or
2983             be loaded through such a function returning non-0; the signature is
2984            
2985             BOOL ordinal5(HKL hkl, LPWSTR __OUT__ dllname , PCLIENTKEYBOARDTYPE type_if_remote_session, LPVOID dummy);
2986             BOOL ordinal3(LPWSTR __OUT__ dllname);
2987            
2988             if return is non-0, keyboard is reloaded from C.
2989            
2990             In short, these layouts have an extra table which may define the following enhancements:
2991            
2992             One 3-state (or 2-state) radio-button:
2993             on keys with VK codes DBE_ALPHANUMERIC/DBE_HIRAGANA/DBE_KATAKANA
2994             (the third state can be also toggled independently of the others).
2995             Three Toggling (like CAPSLOCK) button (pairs):
2996             toggling radio-button-like VK codes DBE_SBCSCHAR/DBE_DBCSCHAR, DBE_ROMAN/DBE_NOROMAN, DBE_CODEINPUT/DBE_NOCODEINPUT
2997             Make key produce different VK codes with different modifiers.
2998             Make a “reverse NUMPAD” translation.
2999             Manipulate a couple of bits of IME state.
3000             A few random hacks for key-deficient hardware layouts.
3001            
3002             (Via assigning ORed masks to radio-buttons, the radio-buttons and toggle-buttons above may affect the layout.
3003             Using this, it is easy to convert each toggling buttons to 2-state radiobuttons.
3004             The limitation is that the number of modification columns compatible with the
3005             extra table is at most 8 — counting one for C.)
3006            
3007             Every C may be associated to two tables of functions, the “normal” one, and the “alternative” one. For
3008             every modification column, each table
3009             assigns a filter id, and a parameter for the filter. (Recall that columns are associated
3010             to the ORed masks by the table in the C structure. One B define all the entries
3011             in the table — or at least the entries reachable by the
3012             modifier keys. B the limit on the number of states in the tables is 8; it is not clear what happens with the 
3013             states above this; some versions of Windows may buffer-overflow.)
3014            
3015             The input/output for the filters consists of: the C, C/C flag, the flags associated to the scancode in C<< KBDTABLES->ausVK >>
3016             (may be added to upsteam), the
3017             parameter given in C structure (and an unused C read/write parameter). A filter may change these parameters,
3018             then pass the event forward, or it may ignore an event. Filters by ID:
3019            
3020             KBDNLS_NULL Ignore key (should not be called; only for unreachable slots in the tables).
3021             KBDNLS_NOEVENT Ignore key.
3022             KBDNLS_SEND_BASE_VK Pass through VK unchanged.
3023             KBDNLS_SEND_PARAM_VK Replace VK by the number specified as the parameter.
3024             KBDNLS_KANAMODE Ignore UP; on DOWN, toggle (=generate UP-or-DOWN for) DBE_KATAKANA
3025            
3026             These 3 generate UP for “other” key, then DOWN for the target (as needed!):
3027             KBDNLS_ALPHANUM Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_ALPHANUMERIC
3028             KBDNLS_HIRAGANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_HIRAGANA
3029             KBDNLS_KATAKANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_KATAKANA
3030            
3031             KBDNLS_SBCSDBCS Ignore UP; Toggle DBE_SBCSCHAR / DBE_DBCSCHAR
3032             KBDNLS_ROMAN Ignore UP; Toggle DBE_ROMAN / DBE_NOROMAN
3033             KBDNLS_CODEINPUT Ignore UP; Toggle DBE_CODEINPUT / DBE_NOCODEINPUT
3034             KBDNLS_HELP_OR_END Pass-through if NUMPAD flag ON (in ausVK); send-or-toggle HELP/END (see below)
3035             KBDNLS_HOME_OR_CLEAR Pass-through if NUMPAD flag ON (in ausVK); send HOME/CLEAR (see below)
3036             KBDNLS_NUMPAD If !NUMLOCK | SHIFT, replace NUMPADn/DECIMAL by no-numpad flavors
3037             KBDNLS_KANAEVENT Replace VK by the number specified as the parameter. On DOWN, see below
3038             KBDNLS_CONV_OR_NONCONV See below
3039            
3040             The startup values are C, C, C, C.
3041            
3042             Typical usages:
3043            
3044             KBDNLS_KANAMODE (VK_KANA (Special case))
3045             KBDNLS_ALPHANUM (VK_DBE_ALPHANUMERIC)
3046             KBDNLS_HIRAGANA (VK_DBE_HIRAGANA)
3047             KBDNLS_KATAKANA (VK_DBE_KATAKANA)
3048             KBDNLS_SBCSDBCS (VK_DBE_SBCSCHAR/VK_DBE_DBCSCHAR)
3049             KBDNLS_ROMAN (VK_DBE_ROMAN/VK_DBE_NOROMAN)
3050             KBDNLS_CODEINPUT (VK_DBE_CODEINPUT/VK_DBE_NOCODEINPUT)
3051             KBDNLS_HELP_OR_END (VK_HELP or VK_END) [NEC PC-9800 Only]
3052             KBDNLS_HOME_OR_CLEAR (VK_HOME or VK_CLEAR) [NEC PC-9800 Only]
3053             KBDNLS_NUMPAD (VK_xxx for Numpad) [NEC PC-9800 Only]
3054             KBDNLS_KANAEVENT (VK_KANA) [Fujitsu FMV oyayubi Only]
3055             KBDNLS_CONV_OR_NONCONV (VK_CONVERT and VK_NONCONVERT) [Fujitsu FMV oyayubi Only]
3056            
3057             Toggle (= 2-state) and 3-state radio-keys are switched by sending KEYUP for the currently
3058             “active” key, then KEYDOWN for the newly activated key. When switching 3-state, additional
3059             action happens depending on the new state:
3060            
3061             DBE_ALPHANUMERIC If IME is off, and KANA toggle is on, switch IME on in the KATAKANA mode
3062             DBE_HIRAGANA If IME is off, and KANA toggle is off, switch IME off in the ALPHANUMERIC mode
3063             DBE_KATAKANA SAME AS HIRAGANA
3064            
3065             Additionally, C of C switches IME to
3066            
3067             KANA toggle on: switch IME off in the ALPHANUMERIC mode
3068             KANA toggle off: switch IME on in the KATAKANA mode
3069            
3070             and C (on C and C) passes through, and does
3071            
3072             KANA toggle on, IME off: switch IME off in the ALPHANUMERIC mode
3073             otherwise: Do nothing
3074            
3075             (The semantic of IME being-in/switching-to OFF/ON mode is not clear (probably IME-specific).
3076             The switching happens by
3077             calling C for devices with a C
3078             and C, while putting the request at into global memory — unless
3079             C flag is set on the foreground keyboard.)
3080            
3081             For C, the registry is checked at statup. For C, the registry is checked at statup, and:
3082            
3083             KANA_AWARE: flips END/HELP if KANA toggle is ON (on input, “HELP” means not-an-END)
3084             otherwise: sends END/HELP depending on what registry says.
3085            
3086             The checked values are C, C, C in the hive C.
3087            
3088             Which of two tables is chosen is controlled by the type (C/C/C) of the key's tables, and the (per key) history bit.
3089             The initial state of the bit is in C
3090             (L!).
3091             The tables of type C are ignored (the key descriptor passes all events
3092             through), the C key uses only the first table. The C key uses the first table on KEYDOWN, and
3093             uses the first or the second table on KEYUP. The choice depends on modifiers present in the preceding KEYDOWN;
3094             the bitmap C is indexed by the modification column of KEYDOWN event; the second table is
3095             used on the following KEYUP if the indexed bit is set. (The KEYREPEAT events are handled the same way as KEYUP.)
3096            
3097             The typical usage of C keys is to make the KEYUP event match B no matter what
3098             is the order of releasing the modifier keys and the main key.
3099             Having the history bit up “propagates” to KEYUP the information about which modifiers were active on KEYDOWN. This helps in ensuring
3100             consistency of some actions between the KEYDOWN event and the corresponding KEYUP event: remember that the state of modifiers
3101             on KEYUP is often different than the state on KEYDOWN: people can release modifiers in different orders:
3102            
3103             press-Shift, press-Enter, release-Shift, release-Enter ---> Shift-Enter pressed, Enter released
3104             press-Shift, press-Enter, release-Enter, release-Shift ---> Shift-Enter pressed and released
3105            
3106             If pressing C acts as if it were the C key (and only so with C!), to ensure consistency, one would need
3107             to make releasing C B also releasing C to act as if it were the C key. So one can make pressing
3108             C special (via the first table), sets the history bit on C, and make I map C
3109             and C to be special too (send C) I.
3110            
3111             B the standard key processing has its own filters too. C processing adds fake C up/down events
3112             (provided the flag C is set);
3113             C processing ignores/fakes the C/C for C (=C)
3114             (provided the flag C is set); C becomes
3115             C (same for C); C become C/C; C may become C.
3116             OEM translations (NumPad→Cursor, except C; C<00> to double-press of C<0>) come first, then locale-specific (C,
3117             C), then those defined in the tables above.
3118            
3119             B As opposed to these translations, C and C is actually handled inside the
3120             even loop, by ToUnicode().
3121            
3122             B L (and references inside!)
3123             explains fine points of using Japanese keyboards. See also: L.
3124            
3125             =head2 A convenient assignment of C bitmaps to modifier keys
3126            
3127             In this section, we omit discussion of C modifier; so every
3128             bitmap may be further combined with C to produce two different bindings.
3129             Assign ORed masks to the modifier keys as follows:X
3130            
3131             lCtrl Win lAlt rAlt Menu rCtrl
3132             CTRL|LOYA CTRL|X1 ALT|KANA CTRL|ALT|LOYA|X1 CTRL|ALT|X2 CTRL|ALT|ROYA
3133            
3134             with suitable backward-compatible mapping of ORed masks to modification columns.
3135             This assignment allows using C flag (faking presses of C when
3136             C is pressed — this greatly increases compatibility of C with brain-damaged
3137             applications), all the combinations involving at most one of C, C or
3138             C give distinct ORed masks, it
3139             avoids stripping of C on C combined with other modifiers,
3140             makes C work with all relevant combinations, while completely preserving all
3141             application-visible properties of keyboard events [except those with C
3142             modifiers; this combination is equivalent to C].
3143            
3144             Note that ignoring the C and C bits, all combinations of
3145             C are possible, which gives at least 32 C-pairs.
3146             In fact, the only combination of C which may appear with
3147             different C bits is C; hence there are 33 possible combinations
3148             of C. Indeed, C is determined by C.
3149             If one of C is present, then C is set; so assume C are not present.
3150             But then, if C B set, then both C B be present; which gives the
3151             only duplication.
3152            
3153             Leaving out 5 combinations of C, C, C [8, minus the empty one, and
3154             C, which is avoided by most application due to its similarity to C,
3155             and C which is undistinguishable by the mask from C]
3156             to have bindable keypresses in applications, and having C as equivalent to
3157             C, this gives 27 C-pairs which may produce characters.
3158            
3159             B C being undistinguishable by the mask from C
3160             is not a big deal, since there is no standard keyboard shortcuts involving C.
3161            
3162             B Combinations of C with C L combination: multiple problems">;
3163             likewise for L with C |"C combination: many keys are not delivered to applications">.
3164            
3165             B Removing the binding for C key, only 21 useful C-pairs remain.
3166             (This is what C of L is
3167             using; out of 24 distinct combinations, C, C and C should be
3168             excluded.) B While this may look as a complete overkill, recall that characters
3169             outside BMP can be inserted on Windows I via one keypress, possibly with many
3170             modifiers. (This restriction relates only to the “classical” flavor of Windows keyboard layouts).
3171             Unicode L
3172             discourse|http://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols>. If a keyboard
3173             layout would want to support these letters, this would quickly exhaust the possible combinations
3174             of modifiers. (For 2-script layout, one could live with Latin/AltGr-Latin/Greek + 18 mathematical
3175             alphabets. But for layouts supporting more scripts, it lookes like using C key is not
3176             avoidable.)
3177            
3178             B Applications may call ToUnicode() with I of modifiers:
3179             for example, they may "put" C down, but do not specify whether it is C or
3180             C. Likewise for C.
3181            
3182             To support that, one would need to define a mask for standalone C and C
3183             (i.e., C and C). Since these modifiers are present when the real “left-right-handed”
3184             keys are down, the masks should be “contained” in the masks of handed keys. B one
3185             can make the pseudo-key C to generate bit C, and the pseudo-key C to generate
3186             the bit C. Then for any combination of modifiers with unhanded C and/or C,
3187             either the corresponding combination of bits is supported by the layout (and then the
3188             application will access the corresponding modification column — which is probably not
3189             the “expected” column corresponding to some handed flavor), or the combination is not
3190             yet defined. In the latter case, one may actually decide I to resolve this: one can
3191             map this combination of modifiers to an arbitatrary modification column!
3192            
3193             In particular, one can map such combination of modifiers to a certain choice of handedness
3194             of C and C. (An example of such a problematic application is L;
3195             look for “I”.)
3196            
3197             B Some applications may do a "reverse lookup" using
3198             L|https://msdn.microsoft.com/en-us/library/windows/desktop/ms646329%28v=vs.85%29.aspx>
3199             (this is B API which exposes the modifier masks). Most of these calls would not
3200             know anything about "higher bits", only S/C/A would be covered. In particular,
3201             it makes sense to add "fake" entries mapping combinations of bits 0x1/0x2/0x4 to the
3202             "corresponding" modification columns.
3203            
3204             For example, C above would produce modififier mask C;
3205             this mask would access a certain column in the table of bindings; make the
3206             mask C access the same column. Then an application making a lookup
3207             for a certain character via VkKeyScanW() would see C. Since this is
3208             the mask which is I produced by pressing C, the application
3209             would think (correctly! — but only thanks to this fake entry) that this character
3210             may be produced with C modifier.
3211            
3212             B The maximal number of “modification columns” supported by Windows is 126. A
3213             larger number would make the size of C to overflow the maximal number
3214             storable in the field C of type C = C.
3215            
3216             Given that the column 15 is ignored, this reduces the number of strings associated to
3217             a keypress (with different “modifiers”) to 125.
3218            
3219             =head1 WINDOWS GOTCHAS
3220            
3221             First of all, keyboard layouts on Windows are controlled by DLLs; the only function
3222             of these DLLs is to export a table of "actions" to perform. This table is passed
3223             to the kernel, and that's it - whatever is not supported by the format of this table
3224             cannot be implemented by native layouts. (The DLL performs no "actions" when
3225             actual keyboard events arrive.)
3226            
3227             Essentially, the logic is like that: there are primary "keypresses", and
3228             chained "keypresses" ("prefix keys" [= deadkeys] and keys pressed after them).
3229             Primary keypresses are distinguished by which physical key on keyboard is
3230             pressed, and which of "modifier keys" are also pressed at this moment (as well
3231             as the state of "latched keys" - usually C only, but may be also C). This combination
3232             determines which Unicode character is generated by the keypress, and whether
3233             this character starts a "chained sequence".
3234            
3235             On the other hand, the behaviour of chained keys is governed I by Unicode
3236             characters they generate: if there are several physical keypresses generating
3237             the same Unicode characters, these keypresses are completely interchangeable
3238             inside a chained sequence. (The only restriction is that the first keypress
3239             should be marked as "prefix key"; for example, there may be two keys producing
3240             B<-> so that one is producing a "real dash sign", and another is producing a
3241             "prefix" B<->.)
3242            
3243             The table allows: to map Cs to Cs; to associate a C to several
3244             (numbered) choices of characters to output, and mark some of these choices as prefixes
3245             (deadkeys). (These "base" choices may contain up to 4 16-bit characters (with 32-bit
3246             characters mapped to 2 16-bit surrogates); but only those with 1 16-bit character may
3247             be marked as deadkeys.) For each prefix character (not a prefix key!) one can
3248             associate a table mapping input 16-bit "base characters" to output 16-bit characters,
3249             and mark some of the output choices as prefix characters.
3250            
3251             The numbered choices above are determined by the state of "modifier keys" (such as
3252             C, C, C), but not directly. First of all, C may be
3253             associated to a certain combination of 6 "modifier bits" (called "logical" C,
3254             C, C, C, C and C, but the logical bits are not
3255             required to coincide with names of modifier keys). (Example: one can bind C
3256             to activate C and C bits.) The 64 possible combinations of modifier bits
3257             are mapped to the numbered choices above.
3258            
3259             Additionally, one can define two "separate
3260             numbered choices" in presence of CapsLock (but the only allowed modifier bit is C).
3261             The another way to determine what C is doing: one can mark that it
3262             flips the "logical C" bit (separately on no-modifiers state, C-only state,
3263             and C-only state [?!] - here "only" allow for the C bit to be C).
3264            
3265             C key is considered equivalent to C combination (of those
3266             are present, or always???), and one cannot bind C and C combinations.
3267             Additionally, binding bare C modifier on alphabetical keys (and
3268             C, C<[>, C<]>, C<\>) may confuse some applications.
3269            
3270             B there is some additional stuff allowed to be done (but only in presence
3271             of Far_East_Support installed???). FE-keyboards can define some sticky state (so
3272             may define some other "latching" keys in addition to C). However,
3273             I did not find a clear documentation yet (C in the DDK toolkit???).
3274            
3275             There is a tool to create/compile the required DLL: F of I
3276             Keyboard Layout Creator> (with a graphic frontend F). The tool does
3277             not support customization of modifier bits, and has numerous bugs concerning binding keys which
3278             usually do not generate characters. The graphic frontend does not support
3279             chained prefix keys, adds another batch of bugs, and has arbitrarily limitations:
3280             refuses to work if the compiled version of keyboard is already installed;
3281             refuses to work if C is redefined in useful ways.
3282            
3283             B uninstall the keyboard, comment the definition of C,
3284             load in F and create an install package. Then uncomment the
3285             definition of C, and compile 4 architecture versions using F,
3286             moving the DLLs into suitable directories of the install package. Install
3287             the keyboard.
3288            
3289             For development cycle, one does not need to rebuild the install package
3290             while recompiling.
3291            
3292             The following sections classify GOTCHAS into 3 categories:
3293            
3294             L<"WINDOWS GOTCHAS for keyboard users">
3295            
3296             L<"WINDOWS GOTCHAS for keyboard developers using MSKLC">
3297            
3298             L<"WINDOWS GOTCHAS for keyboard developers (problems in kernel)">
3299            
3300             =head1 WINDOWS GOTCHAS for keyboard users
3301            
3302             =head2 MSKLC keyboards not working on Windows 8 without reboot
3303            
3304             The layout is shown as active, but "preview" is grayed out,
3305             and is not shown on the Win-Space list. See also:
3306            
3307             http://www.errordetails.com/125726/activate-custom-keyboard-layout-created-with-msklc-windows
3308            
3309             The workaround is to reboot. Compare with
3310            
3311             http://blogs.msdn.com/b/michkap/archive/2012/03/12/10281199.aspx
3312            
3313             =head2 Default keyboard of an application
3314            
3315             Apparently, there is no way to choose a default keyboard for a certain
3316             language. The configuration UI allows moving keyboards up and down in
3317             the list, but, apparently, this order is not related to which keyboard
3318             is selected when an application starts. (This may be fixed on Windows 8?)
3319            
3320             =head2 Hex input of unicode is not enabled
3321            
3322             One needs to explicitly tinker with the registry (see F)
3323             and then I to enable this.
3324            
3325             =head2 Standard fonts have some chars exchanged
3326            
3327             At least in Consolas and Lucida Sans Unicode φ and ϕ are exchanged.
3328             Compare with Courier and Times. (This may be due to the L
3329             Unicode's pre-v3.0 choice of representative glyphs|http://en.wikipedia.org/wiki/Phi#Computing>,
3330             or the L
3331             between French/English Apla=Didot/Porson's approaches|http://www.greekfontsociety.gr/pages/en_typefaces19th.html>.)
3332            
3333             =head2 The console font configuration
3334            
3335             According to L, it is controlled by Registry hive
3336            
3337             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont
3338            
3339             The key C<0> usually gives C, and the key C<00>
3340             gives C. Adding random numbers does not work; however,
3341             if one adds one more zero (at least when adding to a sequence of zeros),
3342             one can add more fonts.
3343             You need to export this hive (e.g., use
3344            
3345             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont" console-ttf.reg
3346            
3347             ), save a copy (so you can always restore if the love goes sour)
3348             then edit the resulting file.
3349            
3350             So if the maximal key with 0s is C<00>, add one extra row with an extra 0
3351             at end, and the family name of your font. The "family name" is what the Font
3352             list in C shows for I (a "stacked" icon is shown);
3353             for individual fonts the weight (Regular, Book, Bold etc) is appended. So I add a line
3354            
3355             "000"="DejaVu Sans Mono"
3356            
3357             the result is (omitting Far Eastern fonts)
3358            
3359             Windows Registry Editor Version 5.00
3360            
3361             [HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont]
3362             "949"="..."
3363             "0"="Lucida Console"
3364             "950"="..."
3365             "932"="..."
3366             "936"="..."
3367             "00"="Consolas"
3368             "000"="DejaVu Sans Mono"
3369            
3370             The full file is in F. After importing this
3371             file via F (or give it as parameter to F; both require administrative priviledges)
3372             the font is immediately available in menu. (However, it does not work in "existing"
3373             console windows, only in newly created windows.)
3374            
3375             B<(Do not use the example file directly. First inspect the hive exported on your system,
3376             and find the number of 0s to use. Then add a new line with correct number of
3377             zeros - as a value, one can use the string above. This will I the defaults
3378             of your setup.> Keep in mind that
3379             selection-by-fontfamily is buggy: if you have more than one version of the font
3380             in different weight, it is a Russian Rullette which one of them will be taken
3381             (at least for DejaVu, which uses C as the default weight). First install
3382             the "normal" flavor of the font, then do as above (so the system has no way of picking
3383             the wrong flavor!), and only after this install the remaining
3384             flavors.
3385            
3386             B keep in mind that I distribute a good-for-console L<“merge” of two
3387             fonts|http://ilyaz.org/software/fonts/>: C; C brings
3388             in nicely shaped nicely-scalable
3389             glyphs, and C brings a scalable font with complete coverage of BMP (as of 2015, of Unicode C).
3390             (We omit Han/Hangul since it does not fit in a narrow box of a console font.
3391             (As of 2015, it does not include U+30fb since apparently, this breaks display of
3392             "undefined" character in PUA in Windows' console.)
3393            
3394             B the string to put into C is the I of the font.
3395             The family name is what is shown in the C list of the C — but only
3396             for families with more than one font; otherwise the “metric name” of the font is appended.
3397            
3398             On Windows, it is tricky to find the family name using the default Windows' tools, without
3399             inspecting the font in a font editor. One workaround is to select the font in C
3400             application, then inspect C via:
3401            
3402             reg export HKCU\Software\Microsoft\CharMap character-map-font.reg
3403            
3404             Note: the mentioned above MicroSoft KB article lists the wrong way to find the family name.
3405             What is visible in the C dialogue of the font, and in C is the
3406             I. Fortunately, quite often the full name and the family name coincide —
3407             this is what happened with C. To find the "Full name" of the font, one can look into the hive
3408            
3409             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts
3410             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts" fonts.reg
3411            
3412             For example, after installing C, I see
3413             C as a key in this hive.
3414            
3415             B for desktop icons coming from the “Public” user (“shared”
3416             icons) which start a console application, the default font is not directly editable.
3417             To reset it, one must:
3418            
3419             =over
3420            
3421             =item *
3422            
3423             copy the F<.lnk> icon file to “your” desktop directory;
3424            
3425             =item *
3426            
3427             start the application using the “new” icon;
3428            
3429             =item *
3430            
3431             change the font via “Properties” of the window's menu;
3432            
3433             =item *
3434            
3435             as administrator, copy the F<.lnk> file back to the F
3436             directory (usually in something like F). Manually refresh
3437             the desktop. Verify that the “old” icon works as expected.
3438             (Now you can remove the “new” icon created on the first step.)
3439            
3440             =back
3441            
3442             =head2 There is no way to show Unicode contents on Windows
3443            
3444             Until Firefox C, one could use FireFox to show arbitrary
3445             Unicode text (limited only by which fonts are installed on your
3446             system). If you upgraded to a newer version, there is no (AFAIK)
3447             Windows program (for general public consumption) which would visualize
3448             Unicode text. The applications are limited either (in the worst case) by
3449             the characters supported by the currently selected font, or (in the best
3450             case) they can show additionally characters, but only those considered by the
3451             system as "important enough" (coming from a few of default fonts?).
3452            
3453             There is a workaround for this major problem in FireFox (present at least
3454             up to C). The problem is caused
3455             by L
3456             which blatantly saves a few seconds of load time for a tiny minority of
3457             users, the price being an unability to show Unicode I
3458             (compare with comments L<33|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c33>
3459             and L<75|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c75> on the bug report above).
3460            
3461             It is not documented, but this action is controlled by C
3462             setting C. To enable Unicode,
3463             make this setting into C (if you have it in the list as C, double-clicking it would
3464             do this — do search to determine this; otherwise you need to create a new
3465             C entry).
3466            
3467             There is an alternative/additional way to enable extra fonts; it makes
3468             sense if you know a few character-rich fonts present on your system. The (undocumented)
3469             settings C (apparently) control fallback fonts for situations
3470             when a suitable font cannot be found via more specific settings. For example, when
3471             you installed (free) L,
3472             L, L fonts on your system, you may set (these
3473             variables are not present by default; you need to create new C variables):
3474            
3475             font.name-list.sans-serif.x-unicode DejaVu Sans,Symbola,DejaVu Serif,DejaVu Sans Mono,Junicode,Unifont Smooth
3476             font.name-list.serif.x-unicode DejaVu Serif,Symbola,Junicode,DejaVu Sans,Symbola,DejaVu Sans Mono,Unifont Smooth
3477             font.name-list.cursive.x-unicode Junicode,Symbola,DejaVu Sans,DejaVu Serif,DejaVu Sans Mono,Unifont Smooth
3478             font.name-list.monospace.x-unicode DejaVu Sans Mono,DejaVu Sans,Symbola,DejaVu Serif,Junicode,Unifont Smooth
3479            
3480             And maybe also L
3481            
3482             font.name-list.fantasy.x-unicode Symbola,DejaVu Serif,Junicode,DejaVu Sans Mono,DejaVu Sans,Unifont Smooth
3483            
3484             (Above, we use the L||http://ilyaz.org/software/fonts/>
3485             as the font of last resort. Although the glyphs are very coarse, in this role
3486             it is very useful since it contains all the Unicode C characters in BMP.
3487            
3488             B L of C
3489             contains “fake” glyphs for characters not supported by the font. Such a design error is unexcusable for a TrueType font; this gets
3490             in the way when an application tries to find the best way to show a character. Using
3491             (non-C variant of) my “C” re-build not only fixes this (and some others) problems,
3492             but also makes the font nicely scalable — the original works well only in the size 16px.
3493            
3494             If you set both: the C variables with rich enough fonts,
3495             B C,
3496             then you may have the best of both worlds: the situation when a character cannot
3497             be shown via C settings will be extremely rare, so the possiblity of delay
3498             due to C is irrelevant.
3499            
3500             =head2 Firefox misinterprets keypresses
3501            
3502             =over 4
3503            
3504             =item *
3505            
3506             Multiple prefix keys are not supported.
3507            
3508             =item *
3509            
3510             C and C are recognized as a character-generating
3511             keypress (good!), but the character they produce bears little relationship
3512             to what keyboard produces. (In our examples, the character may be available
3513             only via multiple prefix keys!)
3514            
3515             =item *
3516            
3517             After a prefix key, C is not recognized as a
3518             character-generating key.
3519            
3520             =item *
3521            
3522             C is not recognized as a character-generating key.
3523            
3524             =item *
3525            
3526             C is not recognized as a character-generating key sequence (recall
3527             that C should be pressed all the time, and other keys C<+ HEXDIGITS> should be
3528             pressed+released sequentially).
3529            
3530             =item *
3531            
3532             When keyboard has an “extra” modifier key in addition to C (an
3533             analogue of C key), combining it with C or with C is interpreted
3534             by Firefox as if only C or C were pressed.
3535            
3536             =item *
3537            
3538             When keyboard generates different characters on C than on C
3539             (possible with assigning extra modifier bits to C), FireFox interprets any
3540             C as if it were C.
3541            
3542             C when C produces a character, this character is understood
3543             correctly by FF. Same for C (but again, while this works on numeric
3544             keypad, it is still buggy if C is on, or if the key is C.)
3545            
3546             =item *
3547            
3548             The keyboard may have C which produces the same characters as C, but
3549             which behaves differently when combined with other keys. FireFox ignores these
3550             differences.
3551            
3552             This is combinable with other remarks above: e.g., C is interpreted
3553             by FireFox as C.
3554            
3555             =item *
3556            
3557             In addition to this, Firefox replaces C and C modifiers by
3558             an I: Firefox pretends that I C is down. (Here
3559             C is a fake key C which Window pretends is down when either one
3560             of C or C is down.) Since the situation when C
3561             is down, but neither C nor C are down is not possible, this
3562             may access parts of the keyboard layout not visible to other applications.
3563             (Same for C and C.)
3564            
3565             The net effect is that key combinations involving C or C keys
3566             may behave wrong in Firefox. For example, with version C<0.63> of
3567             L, C and C
3568             are ignored on character-producing keys.
3569            
3570             =item *
3571            
3572             If C produces C< — > (this is C), and
3573             C produces the “cedilla deadkey”, then pressing C
3574             acts as both: first C are inserted, then C<ç>.
3575            
3576             =item *
3577            
3578             A subtle variation of the previous failure mode: If C produces
3579             deadkey X, and C produces the deadkey Y, then combining C
3580             with C gives the expected Y*a combination. However, if combining with
3581             something more complicated (C or C), with what
3582             deadkey Y is not combinable, B the bugs strike:
3583            
3584             =over 4
3585            
3586             =item 1
3587            
3588             in the first case the deadkey behaves as X: it produces a pair of characters
3589             C; here C produces C<α>. (Keep in mind that inserting two
3590             characters is the expected behaviour outside of Firefox, but Firefox usually
3591             “eats” an undefined deadkey combination; and note that it is X, not the
3592             expected Y!).
3593            
3594             =item 2
3595            
3596             in the second case it produces only the character C<ф> generated by C. Here
3597             the behaviour is neither as outside Firefox (where it would produce C) nor as
3598             usual in Firefox (where it would eat the undefined sequence).
3599            
3600             =back
3601            
3602             =back
3603            
3604             Of these problems, C has only C one, but a very cursory inspection shows
3605             other problems: C are not recognized as character-generating keys. (And IE9 just
3606             crashes in most of these situations…)
3607            
3608             =head2 C-keypresses triggering some actions
3609            
3610             For example, newer versions of windows have graphics driver reacting on Cs by
3611             rotating the screen. Usually, when you know which application is stealing your keypresses, one
3612             can find a way to disable or reconfigure this action.
3613            
3614             For screen rotation: Right-Click on desktop, “Graphics Options”, “Hot Keys”, disable. The way to
3615             reconfigure this is to use “Graphics Properties” instead of “Graphics Options” (but this may depend
3616             on your graphics subsystem).
3617            
3618             =head2 C-keypresses going nowhere
3619            
3620             Some C-keypresses do not result in the corresponding letter on
3621             keyboard being inserted. It looks like they are stolen by some system-wide
3622             hotkeys. See:
3623            
3624             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
3625            
3626             If these keypresses would perform some action, one might be able to deduce
3627             how to disable the hotkeys. So the real problem comes when the keypress
3628             is silently dropped.
3629            
3630             I found out one scenario how this might happen, and how to fix this particular
3631             situation. (Unfortunately, it did not fix what I see, when C [but not
3632             C] is stolen.) Installing a shortcut, one can associate a hotkey to
3633             the shortcut. Unfortunately, the UI allows (and encourages!) hotkeys of the
3634             form (which are equivalent to C) - instead
3635             of safe combinations like C or
3636             C (which — by convention — are ignored by keyboard drivers, and do not generate
3637             characters). If/when an application linked to by this shortcut is
3638             gone, the hotkey remains, but now it does nothing (no warning or dialogue comes).
3639            
3640             If the shortcut is installed in one of "standard places", one can find it.
3641             Save this to F (replace F by the suitable drive letter
3642             here and below)
3643            
3644             on error resume next
3645             set WshShell = WScript.CreateObject("WScript.Shell")
3646             Dim A
3647             Dim Ag
3648             Set Ag=Wscript.Arguments
3649             If Ag.Count > 0 then
3650             For x = 0 to Ag.Count -1
3651             A = A & Ag(x)
3652             Next
3653             End If
3654             Set FSO = CreateObject("Scripting.FileSystemObject")
3655             f=FSO.GetFile(A)
3656             set lnk = WshShell.CreateShortcut(A)
3657             If lnk.hotkey <> "" then
3658             msgbox A & vbcrlf & lnk.hotkey
3659             End If
3660            
3661             Save this to F
3662            
3663             set findhotkey=k:\findhotkey
3664             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3665             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3666             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3667             cd /d %UserProfile%\desktop
3668             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3669             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3670             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3671             cd /d %AllUsersProfile%\desktop
3672             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3673             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3674             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3675             cd /d %UserProfile%\Start Menu
3676             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3677             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3678             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3679             cd /d %AllUsersProfile%\Start Menu
3680             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3681             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3682             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3683             cd /d %APPDATA%
3684             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3685             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3686             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3687             cd /d %HOMEDRIVE%%HOMEPATH%
3688             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3689             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3690             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3691            
3692             (In most situations, only the section after the last C is important;
3693             in my configuration all the "interesting" stuff is in C<%APPDATA%>. Running
3694             this should find all shortcuts which define hot keys.
3695            
3696             Run the cmd file. Repeat in the "All users"/"Public" directory. It should
3697             show a dialogue for every shortcut with a hotkey it finds. (But, as I said,
3698             it did not fix I problem: C works in F test window,
3699             and nowhere else I tried...)
3700            
3701             =head2 C-keypresses starting bloatware applications
3702            
3703             (Seen on IdeaPad.) Some pre-installed programs may steal C-keypresses;
3704             it may be hard to understand what is the name of the application even when
3705             the stealing results in user-visible changes.
3706            
3707             One way to deal with it is to start C in C (or
3708             C
) panel, and click on CPU column until one gets decreasing-order
3709             of CPU percentage. Then one can try to detect which process is becoming
3710             active by watching top rows when the action happens (or when one manages to
3711             get back to the desktop from the full-screen bloatware); one may need to
3712             repeat triggering this action several times in a row. After you know
3713             the name of executable, you can google to find out how to disable it, and/or
3714             whether it is safe to kill this process.
3715            
3716             B On IdeaPad, it was F (safe to kill). It was stealing
3717             C and C.
3718            
3719             B On MSI, a similar stealer was F (some claim it is used to show on-screen
3720             animation when special laptop keys are pressed; if you do not need them, it is safe
3721             to kill). It was stealing C. (But to find I one, I needed to
3722             kill all suspicious apps one by one…)
3723            
3724             =back
3725            
3726             =head1 WINDOWS GOTCHAS for keyboard developers using MSKLC
3727            
3728             =head2 Several similar F created keyboards may confuse the system
3729            
3730             Apparently, the system may get majorly confused when the C
3731             of the project gets changed without changing the DLL (=project) name.
3732            
3733             (Tested only with Win7 and the name in the DESCRIPTIONS section
3734             coinciding with the name on the KBD line - both in F<*.klc> file.)
3735            
3736             The symptoms: I know how one can get 4 different lists of keyboards:
3737            
3738             =over 4
3739            
3740             =item 1
3741            
3742             Click on the keyboard icon in the C - usually shown
3743             on the toolbar; positioned to the right of the language code EN/RU
3744             etc (keyboard icon is not shown if only one keyboard is associated
3745             to the current language).
3746            
3747             =item
3748            
3749             Go to the C settings (e.g., right-click on the
3750             Language bar, Settings, General.
3751            
3752             =item
3753            
3754             on this C page, press C button, go to the language
3755             in question.
3756            
3757             =item
3758            
3759             Check the F<.klc> files for recently installed Input Languages.
3760            
3761             =item
3762            
3763             In MS Keyboard Layout Creator, go to C
3764             list.
3765            
3766             =back
3767            
3768             It looks like the first 4 get in sync if one deletes all related keyboards,
3769             then installs the necessary subset. I do not know how to fix 5 - MSKLC
3770             continues to show the old name for this project.
3771            
3772             Another symptom: Current language indicator (like C) on the language
3773             bar disappears. (Reboot time?)
3774            
3775             Is it related to C<***\Local Settings\MuiCache\***> hive???
3776            
3777             Possible workaround: manually remove the entry in C
3778             (the last 4 digits match the codepage in the F<.klc> file).
3779            
3780             =head2 Too long description (or funny characters in description?)
3781            
3782             If the name in the C section is too long, the name shown in
3783             the list C<2> above may be empty.
3784            
3785             (Checked only on Win7 and when the name in the DESCRIPTIONS section
3786             coincides with the name on the C line - both in F<*.klc> file.
3787             Length=63 works fine, Length=64 triggers the bug.)
3788            
3789             (Fixed by shortening the name [but see
3790             L<"Several similar F created keyboards may confuse the system">
3791             above!], so maybe it was
3792             not the length but some particular character (C<+>?) which was confusing
3793             the system. (I saw a report on F bug when description had apostroph
3794             character C<'>.)
3795            
3796             =head2 F ruins names of dead key when reading a F<.klc>
3797            
3798             When reading a F<.klc> file, MS Keyboard Layout Creator may ruin the names
3799             of dead keys. Symptom: open the dialogue for a dead key mapping
3800             (click the key, check that C has checkmark, click on the
3801             C<...> button near the C checkbox); then the name (the first
3802             entry field) contains some junk. (Looks like a long ASCII string
3803            
3804             U+0030 U+0030 U+0061 U+0039
3805            
3806             .)
3807            
3808             B if all one needs is to compile a F<.klc>, one can run
3809             F directly.
3810            
3811             B correct ALL these names manually in MSKLC. If the names are
3812             the Unicode name for the dead character, just click the C button
3813             near the entry field. Do this for ALL the dead keys in all the registers
3814             (including C!). If C is not made "semantically meaningful",
3815             there are 6 views of the keyboard (C
3816             AltGr, AltGr+Shift>) - check them all for grayed out keys (=deadkeys).
3817            
3818             Check for success: C, use a temporary name.
3819             Inspect near the end of the generated F<.klc> file. If OK, you can
3820             go to the Project/Build menu. (Likewise, this way lets you find which
3821             deadkey's names need to be fixed.)
3822            
3823             !!! This is time-consuming !!! Make sure that I things are OK
3824             before you do this (by C, C).
3825            
3826             BTW: It might be that this is cosmetic only. I do not know any bad
3827             effect - but I did not try to use any tool with visual feedback on
3828             the currently active sub-layout of keyboard.
3829            
3830             =head2 Double bug in F with dead characters above 0x0fff
3831            
3832             This line in F<.klc> file is treated correctly by F's builtin keyboard tester:
3833            
3834             39 SPACE 0 0020 00a0@ 0020 2009@ 200a@ // ,  , ,  ,   // SPACE, NO-BREAK SPACE, SPACE, THIN SPACE, HAIR SPACE
3835            
3836             However, via F it produces the following two bugs:
3837            
3838             static ALLOC_SECTION_LDATA MODIFIERS CharModifiers = {
3839             &aVkToBits[0],
3840             7,
3841             {
3842             // Modification# // Keys Pressed
3843             // ============= // =============
3844             0, //
3845             1, // Shift
3846             2, // Control
3847             SHFT_INVALID, // Shift + Control
3848             SHFT_INVALID, // Menu
3849             SHFT_INVALID, // Shift + Menu
3850             3, // Control + Menu
3851             4 // Shift + Control + Menu
3852             }
3853             };
3854             .....................................
3855             {VK_SPACE ,0 ,' ' ,WCH_DEAD ,' ' ,WCH_LGTR ,WCH_LGTR },
3856             {0xff ,0 ,WCH_NONE ,0x00a0 ,WCH_NONE ,WCH_NONE ,WCH_NONE },
3857             .....................................
3858             static ALLOC_SECTION_LDATA LIGATURE2 aLigature[] = {
3859             {VK_SPACE ,6 ,0x2009 ,0x2009 },
3860             {VK_SPACE ,7 ,0x200a ,0x200a },
3861            
3862             Essentially, C<2009@ 200a@> produce C (= multiple 16-bit chars)
3863             instead of deadkeys. Moreover, these ligatures are put on non-existing
3864             "modifications" 6, 7 (the maximal modification defined is 4; so the code uses
3865             the C flags instead of "modification number" in
3866             the ligatures table.
3867            
3868             =head2 F keyboards handle C, C , C and C differently than US keyboard
3869            
3870             The US keyboard produces (as the
3871             “string value”) the corresponding Control-letter when
3872             C is pressed. (In console applications,
3873             C<\x00> is not visible.) F does not reproduces this
3874             behaviour. This may break an application if
3875             it was not specifically tested with “complicated” keyboards.
3876            
3877             The only way to fix this from the “naive” keyboard
3878             layout DLL (i.e., the kind that F generates) which I found is to
3879             explicitly include C as a handled combination, and return
3880             C on such keypresses. (This is enabled in the generated
3881             keyboards generated by this module - not customizable in v0.12.)
3882            
3883             =head2 "There was a problem loading the file" from F
3884            
3885             Make line endings in F<.klc> DOSish.
3886            
3887             =head2 C do not work
3888            
3889             Make line endings in F<.klc> DOSish (when given as input to F -
3890             it gives no error messages, and deadkeys work [?!]).
3891            
3892             =head2 Error 2011 (ooo-us, line 33): There are not enough columns in the layout list.
3893            
3894             The maximal line end of F is exceeded (a line or two ahead). Try remoing
3895             inline comments. If helps, change he workflow to cut off long lines (250 bytes is OK).
3896            
3897             =head2 C
3898            
3899            
3900            
3901             from F. This means that the internal table of virtual keys
3902             mapped to non-C (sic!) scancodes is overloaded.
3903            
3904             Time to switch to direct generation of F<.c> file? Or you need to
3905             triage the “added” virtual keys, and decide which are less important
3906             so you can delete them from the F<.klc> file.
3907            
3908             =head2 Only the first 8 with-modifiers columns are processed by F
3909            
3910             Time to switch to direct generation of F<.c> file?
3911            
3912             =head2 Only the first digit of the which-modifier-column is output by F in C
3913            
3914             Time to switch to direct generation of F<.c> file?
3915            
3916             =head2 F produces C section with meaningless entries for prefix keys C<0x08>, C<0x0A>, C<0x0D>
3917            
3918             These entries do not stop keyboard from working. They look like C...
3919            
3920             Time to switch to direct generation of F<.c> file?
3921            
3922             =head2 It is not clear how to compile F<.C> files emitted by F
3923            
3924             This distribution includes a script F which can do this. It is
3925             inspired by
3926            
3927             http://stackoverflow.com/questions/3360746/how-can-i-compile-programmer-dvorak
3928             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
3929            
3930             It allows us to build using the cycle
3931            
3932             =over 4
3933            
3934             =item *
3935            
3936             Build skeleton F<.klc> file.
3937            
3938             =item *
3939            
3940             Convert to B using F.
3941            
3942             =item *
3943            
3944             Patch against bugs in F.
3945            
3946             =item *
3947            
3948             Patch in features not supported by F.
3949            
3950             =item *
3951            
3952             Compile and link DLLs.
3953            
3954             =back
3955            
3956             (This assumes that the installer was already built by F using a
3957             “simplified-to-nothing” F<.klc> file which does not trigger the F bugs).
3958            
3959             (See also L.)
3960            
3961             =head2 F cannot ignore column=15 of the keybinding definition table
3962            
3963             (Compare with L<"Windows ignores column=15 of the keybinding definition table">.)
3964            
3965             F requires that all the columns are associated to a modifier-bitmap.
3966             But column=15 should not be associated to any.
3967            
3968             The workaround is to associate it to the bitmap which should not be bound to any
3969             column (like C<4=KBDALT>). In the output C<.C> file, one would have 15 instead
3970             of C for the bitmap 4, but C is defined to be 15 anyway…
3971            
3972             =head2 F ignores bits above 0x20 in the modification columns descriptor
3973            
3974             Time to switch to direct generation of F<.C> files?
3975            
3976             =head2 F cannot assign more than one bitmask to a modification column
3977            
3978             Time to switch to direct generation of F<.C> files?
3979            
3980             (Quite often, one combination of modifiers should produce the same characters as
3981             another one. The format of keyboard layout tables allows them to share a
3982             modification column. The format of F<.klc> files does not allow sharing.)
3983            
3984             =head2 F forgets to emit C/6/8
3985            
3986             If the F<.klc> file has many modification columns, the emitted aVkToWcharTable
3987             contains only C/2.
3988            
3989             =head2 F confuses LIGATURES on unusual keys
3990            
3991             For example, C may be replaced by C in the LIGATURES table.
3992            
3993             Time to switch to direct generation of F<.C> files?
3994            
3995             =head2 F places C at end of the generated F<.c> file
3996            
3997             The offset of this structure should be no more than 0x10000. Thus keyboards
3998             with large tables of prefixed keys may fail to load. This may be related to
3999             the bug L<"If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy">.
4000            
4001             Time to switch to direct generation of F<.C> files?
4002            
4003             =head2 Error "the required resource DATABASE is missing" from F
4004            
4005             The localized C in F<.klc> file contains a character outside of
4006             the repertoir of the codepage in question. Removing offending characters, or
4007             removing the C altogether should fix this. (But either way, the name of
4008             layout in the C of the Language Bar may become empty.) Having a
4009             different localized description has a side effect that the name of the layout
4010             shown in the Language Bar popups is localized.
4011            
4012             (The localized description is what put into the C of the
4013             DLL file; it is this resource which is mentioned in the registry. (There
4014             will be no such resource when the localized C is missing.)
4015            
4016             (The failure of F is not reproducible after a reboot!)
4017            
4018             Apparently, this has nothing to do with the length, so the (older) conjectures
4019             below are wrong (although the F<.RC> file generated by MSKLC has the [non-localized] name
4020             truncated after 40 chars in the field C — but not in other fields):
4021            
4022             It looks like there is a buffer overflow in MSKLC, and sometimes the generated
4023             F in the install package would just exit with this error. The
4024             apparent reason is the length of the C-like fields.
4025            
4026             Workaround: it looks like the C field is not used in F.
4027             So generate an “extra dummied” F<.klc> file I (with shortened descriptions),
4028             make an install package from it, and mix the F from the “extra
4029             dummied” variant with the rest of the install package from a
4030             “less dummied” F<.klc> file.
4031            
4032             The alternative is to get rid of F completely, and ask users
4033             to run the appropriate F<.msi> file from the install package by hand
4034             (choosing basing on 32-bit vs 64-bit architecture).
4035            
4036             =head2 Summary of the productive workflow with F<.klc>:
4037            
4038             If direct generation of F<.C> files is out of question, the following workflow
4039             may be used (some of these steps may be omitted depending on how complicated
4040             your F<.klc> layout is; for practical implementation, see
4041             L creation|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build-iz.pl>
4042             and L to F<.dll>
4043             processing|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build_here.cmd>):
4044            
4045             =over 4
4046            
4047             =item
4048            
4049             Make an “extra dummied” F<.klc> (short descriptions, short dummy C,
4050             C, C, C sections, no C section). Run
4051             it through GUI MSKLC (C, then C, C).
4052             Store the generated F, rename the directory.
4053            
4054             =item
4055            
4056             Make a “less dummied” F<.klc> file (as above, but with the correct description).
4057             Do as above, and mix in the F from the previous step.
4058            
4059             =item
4060            
4061             Run the “real” F<.klc> file through the F CLI. Fix errors in the
4062             generated F<.C> and F<.H> files (using scripts and patches if needed).
4063            
4064             (One may need to remove a few lines in the C section to avoid buffer overflows too.)
4065            
4066             =item
4067            
4068             Compile the fixed F<.C> files. (One may need to split them in two to
4069             decrease the offset of the static table in the DLL to the level
4070             Windows can handle: less than 64K.) Mix the generated F<.dll> files
4071             with the install package made above.
4072            
4073             =back
4074            
4075             =head1 WINDOWS GOTCHAS for application developers (problems in kernel)
4076            
4077             =head2 Many applications need to know the state of hidden flag C
4078            
4079             To decide what to do with a keypress, an application may need to know
4080             whether C is enabled in the keyboard (in other words, if
4081             C is faked when C is pressed). For example, when
4082             the kernel processes accelerators, it would not trigger C
4083             if C was pressed with C in the presence of this flag — even
4084             though C I visible as being pressed (one needs to press
4085             C).
4086            
4087             An application with configurable bindings may need to emulate this action
4088             of TranslateMessage(). One of the ways to do this may be to do (when
4089             C and C are down)
4090            
4091             =over 4
4092            
4093             =item *
4094            
4095             Set a global flag disabling processing of C in the application;
4096            
4097             =item *
4098            
4099             Call TranslateAccelerator() with an improbably virtual key (C or
4100             some such) and appropriate ad hoc translation table;
4101            
4102             =item *
4103            
4104             Check whether accelerator was recognized (if so, C is not enabled).
4105            
4106             =back
4107            
4108             Possible problems with this approach: the “improbable key” should better not
4109             trigger some system accelerator (this is why one should not use “ordinary”
4110             keys). Additionally, some system accelerators react on Windows key as a
4111             modifier; so acceleration table may specify this as a certain flag. This
4112             would imply that the algorithm above may not work when C key is
4113             down. (Not tested.)
4114            
4115             (Or maybe these C bindings are not accelerators, and are
4116             processed in a different part of keyboard input events. — Then there is
4117             little to worry about.)
4118            
4119             =head1 WINDOWS GOTCHAS for keyboard developers (problems in kernel)
4120            
4121             =head2 It is hard to understand what a keyboard really does
4122            
4123             To inspect the output of the keyboard in the console mode (may be 8-bit,
4124             depending on how Perl is compiled), one can run
4125            
4126             perl -MWin32::Console -wle 0 || cpan install Win32::Console
4127             perl -we "sub mode2s($){my $in = shift; my @o; $in & (1<<$_) and push @o, (qw(rAlt lAlt rCtrl lCtrl Shft NumL ScrL CapL Enh ? ??))[$_] for 0..10; qq(@o)} use Win32::Console; my $c = Win32::Console->new( STD_INPUT_HANDLE); my @k = qw(T down rep vkey vscan ch ctrl); for (1..20) {my @in = $c->Input; print qq($k[$_]=), ($in[$_] < 0 ? $in[$_] + 256 : $in[$_]), q(; ) for 0..$#in; print(@in ? mode2s $in[-1] : q(empty)); print qq(\n)}"
4128            
4129             This installs Win32::Console module (if needed; included with ActiveState Perl)
4130             then reports 20 following console events (press and keep C key
4131             to exit by generating a “harmless” chain of events). B the reported
4132             input character is not processed (via ToUnicode(); hence chained keys and
4133             multiple chars per key are reported only as low-level), and is reported as
4134             a signed 8-bit integer (so the report for above-8bit characters is
4135             completely meaningless).
4136            
4137             T=1; down=1; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4138             T=1; down=0; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4139            
4140             This reports single (T=1) events for keypress/keyrelease (down=1/0) of
4141             C. One can see that C generates C modifiers
4142             (this is just a transcription of C,
4143             that C is on virtual key 65 (this is C) with virtual scancode
4144             30, and that the generated character (it was C<æ>) is C<240>.
4145            
4146             The character is approximated to the current codepage. For example, this is
4147             C entering C<β = U+03b2> in codepage C:
4148            
4149             T=1; down=1; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4150             T=1; down=0; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4151            
4152             Note that C<223 = 0xDF>, and C. So I is substituted by
4153             I.
4154            
4155             There is also a script F in this distribution
4156             which does a little
4157             bit more than this. One can also give this script the argument C (or C,
4158             where C is the 0-based number among the listed keyboard layouts) to report
4159             ToUnicode() results, or argument C to report what is produced by reading raw
4160             charactes (as opposed to events) from the console.
4161            
4162             =head2 It is not documented how to make a with-prefix-key(s) combination produce 0-length string
4163            
4164             Use C<0000@> (in F<.klc>), or DEADKEY 0 in a F<.c> file. Explanation: what a prefix key
4165             is doing is making the kernel remember a word (the state of the finite automaton), and not
4166             producing any output character. Having no prefix key corresponds to the state being 0.
4167            
4168             Hence makeing prefix_key=0 is the same as switching the finite automaton to the initial
4169             state, and not producing any character — and this exactly what is requested in the question.
4170            
4171             =head2 If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy
4172            
4173             Installation reports success, the keyboard appears in the list in the Language Bar's "Settings".
4174             But the keyboard is not listed in the menu of the Language Bar itself. (This is not fixed
4175             by a reboot.)
4176            
4177             Deinstalling (by F's installer) in such a case removes one (apparently, the last) of the listed keyboards for the language;
4178             at least it is removed from the menu of the Language Bar itself. However, the list in the “Settings”
4179             does not change! One can't restore the (wrongly) removed (unrelated!) layout by manipulating the latter list.
4180             (I did not try to check what will happen if only one keyboard for the language is available — is it removed
4181             for good?) I condition is fixed by a reboot: the “missing” “unrelated” layout jumps to existence.
4182            
4183             I did not find a way to restore the deleted keyboard layout (without a reboot). Experimenting with these is kinda painful:
4184             with each failure,
4185             I add one extra keyboard to the list in the “Settings”; - so the list is growing and growing! [Better
4186             add useless-to-you keyboards, since until the reboot you will never be able to install them again.]
4187            
4188             B this condition reappeared in update from v0.61 to v0.63 of B layouts. Between
4189             these versions, there was
4190             a very small increment of the size: one modification column was added, and two deadkeys were added.
4191             Removing a bunch of (useless?) dead keys descriptions fixed this again; but now I have my doubts on
4192             whether it was due to I increasing the size of C… Maybe it is due to the total
4193             size of certain segments in the DLL.
4194            
4195             (This may be related to the bug L<"F places C at end of the generated F<.c> file">.)
4196            
4197             =head2 Windows ignores column=15 of the keybinding definition table
4198            
4199             Note that 15 is C; this column number is used to indicate that
4200             this particular combination of modifiers does not produce keys. In particular,
4201             the generator must avoid this column number.
4202            
4203             Workaround: put junk into this column, and use different columns for useful modifier
4204             combinations. The mapping from modifiers to columns should not be necessarily 1-to-1.
4205             (But see L<"F cannot ignore column=15 of the keybinding definition table">.)
4206            
4207             =head2 Windows combines modifier bitmaps for C, C and C on C
4208            
4209             (At least when C is special in the keyboard,) the modifier bitmap bound to this
4210             key is actually bit-or of bitmaps above. Essentially, this prohibits assigning
4211             interesting flag combinations to C.
4212            
4213             The (very limited) workaround is to ensure that the flags one puts on C contain
4214             all the flags assigned to the above VK codes. (This does not change anything, but
4215             at least makes the assignments less confusing for human inspection.)
4216            
4217             =head2 Windows ignores C if its modifier bitmaps is not standard
4218            
4219             Adding C to C disables console sending non-modified char on keydown.
4220             Together with the previous problem, this looks like essentially prohibiting
4221             putting interesting bitmaps on the left modifier keys.
4222            
4223             Workaround: one can add C on C. It looks like the combination
4224             C is compatible with Windows' handling of C (both in console,
4225             and for accessing/highlighting the menu entries). (However, since only C
4226             is going to be stripped for handling of C, the modification column for
4227             C should duplicate the modification column for no-C-flags. Same with
4228             C added.)
4229            
4230             =head2 When C produces C, problems in Notepad
4231            
4232             Going to the Save As dialogue in Notepad loses "speciality of AltGr" (it highlights Menu);
4233             one need to switch layouts via LAlt+LShift to restore.
4234            
4235             I do not know any workaround.
4236            
4237             =head2 Console applications cannot detect when a keypress may be interpreted as a “command”
4238            
4239             The typical logic of an (advanced) application is that it interprets certain keypresses
4240             (combinations of keys with modifiers) as “commands”. To do this in presence of user-switchable
4241             keyboards, when it is not known in compile time which key sequences generate characters,
4242             the application must be able to find at runtime which keypresses are characters-generating,
4243             and which are not. The latter keypresses are candidates to be checked whether they should trigger commands
4244             of the application.
4245            
4246             For final keypresses of a character-generating key-sequence, an application gets a notification
4247             from the ReadConsoleEvent() API call that this keypress generates a character. However, for the
4248             keypresses of the sequence which are non the last one (“dead” keys), there is no such notification.
4249            
4250             Therefore, there is no way to avoid dead keys triggering actions in an application. What is the
4251             difference with non-console applications? First of all, they get such a notification (with the
4252             standard TranslateMessage()/DispatchMessage() sequence of API calls, on WM_KEYDOWN, one can
4253             PeekMessage() for WM_SYSDEADCHAR/WM_DEADCHAR and/or WM_SYSCHAR/WM_CHAR). Second, the windowed
4254             application may call ToUnicode(Ex)() to calculate this information itself.
4255            
4256             Well, why a console application cannot use the second method? First, the active keyboard layout
4257             of a console application is the default one. When user switches the keyboard layout of the console,
4258             the application gets no notification of this, and its keyboard layout does not change. This makes
4259             ToUnicode() useless. Moreover, due to
4260             security architecture, the console application cannot query the ID of the thread serving the message
4261             loop of the console, so cannot query GetKeyboardLayout() of this thread. Hence ToUnicodeEx() is
4262             useless too.
4263            
4264             (There may be a lousy workaround: run ToUnicodeEx() on B the installed keyboard layouts, and
4265             check which of them are excluded by comparing with results of ReadConsoleEvent(). Interpret
4266             contradictions as user changing the keyboard layout. Of course, on several keypresses following
4267             a change of keyboard layout one may get unexpected results. And if two similar
4268             keyboards are installed, one may also never get definite answer on which of them is currently active.)
4269            
4270             (To handle this workaround, one must have a way to call ToUnicode() in a way which does not change
4271             the internal state of the keyboard driver. Observe:
4272            
4273             =over 4
4274            
4275             =item *
4276            
4277             Such a way is not documented.
4278            
4279             =item *
4280            
4281             Watch the character reported by ReadConsoleEvent() on the C event for deadkeys. This is
4282             the character which a deadkey would produce if it is pressed twice (and is 0 if pressing it twice
4283             results in a deadkey again). The only explanation for this I can fathom is that the console's
4284             message queue thread calls such a non-disturbing-state version of ToUnicode().
4285            
4286             Why it should be “non-disturbing”? Otherwise it would reset the state “this deadkey was pressed”,
4287             and the following keypress would be interpreted as not preceded by a deadkey. And this is not
4288             what happens. (If one does it with usual ToUnicode() call, DOWN reports a deadkey, but UP reports
4289             “ignored”; to see this, run F with arguments C
4290             with a keyboard which produces ç on C. Here C is the number of the keyboard in the list
4291             of available keyboards reported by C).
4292            
4293             Well, when one I that some API calls are possible, it is just a SMP to find it out
4294             (see F). It turns out that given argument C achieves
4295             the behaviour of a console during KeyUp event. (As a side benefit, it also avoids another
4296             glitch in Windows' keyboard processing: it reports the character value in presence of C
4297             modifier — recall that ToUnicodeEx() ignores C unless C is present too. Well, I
4298             checked this so far only on KeyUp event, where console producess mysterious results.)
4299            
4300             =item *
4301            
4302             However, even without using undocumented flags, it is not hard to construct such a non-disturbing version of ToUnicode(). The only
4303             ingredient needed is a way to reset the state to “no deadkeys pressed” one. Then just store
4304             keypresses/releases from the time the last such state was found, call ToUnicode(), reset state,
4305             and call ToUnicode() again for all the stored keypresses/releases; then update the stored state
4306             appropriately.
4307            
4308             =item *
4309            
4310             But I strongly doubt that console's message loop does anything so advanced. My bet would be that
4311             it uses a non-documented call or non-documented flags. (Especially since the approach above does
4312             not handle C the same way as the console does.)
4313            
4314             =back
4315            
4316             =head2 In console, which combinations of keypresses may deliver characters?
4317            
4318             In addition to the problem outlined in the preceding section, a console application should
4319             better support input of character-by-numeric-code, and of copy-and-pasted strings. Actually,
4320             the second situation, although undocumented, is well-engineered, so let us document these two
4321             here. (These two should better be documented together, since pasting may fake input by
4322             repeated character-by-numeric-code.)
4323            
4324             Pasting happens character-by-character (more precise, by UTF-16 codepoints), but C
4325             would group them together:
4326            
4327             =over 4
4328            
4329             =item *
4330            
4331             When pasting a character present in a keyboard layout with at most C modifier,
4332             a fully correct emulation of a sequence C
4333             is produced (without C if it is not needed). The character (as usual) is delivered
4334             on both C events.
4335            
4336             =item *
4337            
4338             When pasting a character present in a keyboard layout, but needing I modifiers (not
4339             only C), a partial emulation of a certain key tap is produced:
4340             C. The character (as usual) is delivered
4341             on both C events.
4342            
4343             Quirks: first, if C is needed, its press/release are not emulated, but the flags on
4344             the C events indicate presence of a C. Second (by this, the
4345             pasting may be distinguished from “real” keypress), C press/release are not emulated,
4346             but it is indicated as "present" in flags of all 4 events.
4347            
4348             =item *
4349            
4350             When pasting control-characters (available via the C-maps of the layout),
4351             the press/release of C is not emulated (but the flags indicate C downs); however,
4352             if C is needed, its press/release is emulated (and flags for I events do not
4353             have C is down).
4354            
4355             Pasting C delivers only U+000D (CR) — the typical maps have it on C and C<^M>,
4356             and C is delivered.
4357            
4358             =item *
4359            
4360             Otherwise, an emulation of C is sent, with the C delivering a character:
4361             C. The C
4362             are very unusual combinations of scancode/vkey for C<6> and C<3> on the numeric keyboard:
4363             they are delivered as if C (or C) is down, but the flags indicate that
4364             these modifiers are "not present".
4365            
4366             The “honest” C delivers U+003f, which is "C" (as above, it is delivered on release
4367             of C).
4368            
4369             =item *
4370            
4371             In general, entering characters-by-numeric-code (entering the decimal — or “KP+” then hex — while
4372             C is down) produces the resulting character when C is released. Processing this may create
4373             a significant problem for applications which interpret C as “commands” (e.g., if
4374             they interpret C as “word-left”).
4375            
4376             There may several work-arounds. First, usually hex input is much more important than decimal,
4377             and usually, C is not bound to commands. Then the application may ignore characters
4378             delivered on C B the C was immediately followed by the press/release
4379             of C; additionally, it should disable the interpret-as-commands logic while C is down,
4380             and its press was followed by press/release of C.
4381            
4382             Second, it is not crucial to deliver Unicode characters numbered in single-digits. So one may
4383             require that commands are triggered by C only when pressed one-by-one (releasing
4384             C between them), and consider multi-digit presses as input-by-number only.
4385            
4386             Finally, Windows aborts entering character-by-numeric-code if any unexpected key press interferes.
4387             For example, C is “C”, but pressing-releasing C after pressing down C
4388             would not deliver anything. If an application follows the same logic (in reverse!) when recognizing
4389             keypressing resulting in “commands”, the users would have at least a “technical ability” to enter
4390             both commands, I enter characters-by-numeric-code.
4391            
4392             =back
4393            
4394             This is tested I in the situation when a layout has C present, and all the
4395             "with-extra-modifiers" characters are on bitmap entries with C bit marked. This is
4396             a situation with discussed in the section L<"A convenient assignment of C bitmaps to modifier keys">.
4397            
4398             It is plausible that only C, C and C bits in a bitmap returned by C are
4399             acted upon (with C flag added based on C). Some popular keyboard layouts
4400             use C bit on the C key; under this assumption, the characters available via C key
4401             would be delivered with at most C modifier.
4402            
4403             All the emulated events do not have C indicated as "present" in their flags.
4404            
4405             =head2 Behaviour of C vs C
4406            
4407             When both combinations produce characters (say, X and Y), it is not clear
4408             how an application shouild decide whether it got C event (for menu
4409             entry starting with Y), or an C event.
4410            
4411             A partial workaround (if the semantic of the layout fits into the limited number
4412             of bits in the ORed mask): make all the keys which may be combined with
4413             C to have the C bit in the mask set; add some extra bit
4414             to C keys to be able to distinguish them. Then at least the
4415             kernel will produce the correct character on the ToUnicode() call (hence
4416             in TranslateMessage()). [A potential that an application may be confused
4417             is still large.]
4418            
4419             =head2 Customization of what C is doing is very limited
4420            
4421             (See the description of the semantic of C in L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">.)
4422            
4423             A partial workaround (if the semantic of the layout fits into the limited number
4424             of bits in the ORed mask): make all the modifier combinations (except for the
4425             base layer) to have C and C bits set; add some extra bits to
4426             C keys and C keys (apparently, only C will work with C)
4427             to be able to distinguish them. Then the C flag will affect all
4428             these combinations too.
4429            
4430             =head2 C combination: multiple problems
4431            
4432             First of all, sometimes C is ignored when used with this combination.
4433             (Fixed by reboot. When this happens, C does not work also with combinations
4434             with C and/or C). On the
4435             other hand, C works as intended. (I even got an impression that
4436             sometimes C works when C is active; cannot reproduce this,
4437             though.)
4438            
4439             I suspect this is related to the binding (usually not active) of C to switch between
4440             keyboards of a language. It may have suddently jumped to existence (without my interaction).
4441             Simultaneously, this option disappeared from the UI to change keyboard options
4442             (L in Windows 7). It might be that
4443             press/release of C is filtered out in presence of C? (Looks
4444             like this for C now...)
4445            
4446             (I also saw what looks like C key being stuck in some rare situations — fixed
4447             by pressing it again. Do not know how to reproduce this. It is interesting to
4448             note that one of the bits in the mask of the C key is 0x80, and there is
4449             a define for this bit in F named C — but it is undocumented,
4450             and, judging by names, one might think that C would work in pair with the flag
4451             C of CAttributes>.)
4452            
4453             B Apparently, key up/down for many combinations of C are
4454             not delivered to applications.
4455             Key up/down for C<`/5/6/-/=/Z/X/C/V/M/,/./Enter/rShift> are not delivered here when used with C modifiers
4456             (at least in a console). Adding C does not change this. Same for C
4457             and C (but not for keypad ones!).
4458            
4459             Moreover, when used with C or C, this behaves as if both these
4460             keys were pressed. Same with the pair C and C (is it hardware-dependent???).
4461            
4462             (Time to time C do not work — neither with nor without C.)
4463            
4464             No workarounds are known. Although I could reproduce this on 3 physically different
4465             keyboards, this is, most probably, a design defect of hardware keyboards. Compare with
4466             L and
4467             L.
4468             Another related tidbit: apparently, L
4469             after pressing some modifier keys|http://ccm.net/forum/affich-24692-keyboard-mess-up-after-shift-key-held-too-lon?page=2>
4470            
4471             =head2 C combination: many keys are not delivered to applications
4472            
4473             Apparently, key up/down for many combinations of C are
4474             not delivered to applications.
4475             For example, C and C — neither with nor without C; same
4476             for C (at least in a console). Adding C
4477             does not change this. Same for C.
4478            
4479             No workarounds are known (except that C and C (without C)
4480             may be replaced by C and C).
4481            
4482             B in the bottom row of the keyboard, all the keys (except C) are
4483             either in the list above, or in the list for C modifiers. See also the
4484             references in the discussion of the previous problem (with C).
4485            
4486             =head2 Too long C of the layout is not shown in Language Bar Settings
4487            
4488             (the description is shown in the Language Bar itself). The examples are (behave the same)
4489            
4490             Greek-QWERTY (Pltn) Grn=⇑␣=^ˡⒶˡ-=Lat; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4491             US-Intl Grn=⇑␣=^ˡⒶˡ-=Grk; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4492            
4493             (Or maybe it is the semicolons in the names???). If this happens, one can still assign
4494             distinctive icons to the layout, and distinguish them via going to C.
4495            
4496             =head1 UNICODE TABLE GOTCHAS
4497            
4498             The position of Unicode consortium is, apparently, that the “name” of
4499             a Unicode character is “just an identifier”. In other words, its
4500             (primary) function is to identify a character uniquely: different
4501             characters should have different names, and that's it. Any other function
4502             is secondary, and “if it works, fine”; if it does not work, tough luck.
4503             If the name does not match how people use the character (and with the
4504             giant pool of defined characters, this has happened a few times), this is not
4505             a reason to abandon the name.
4506            
4507             This position makes the practice of maintaining backward compatibility
4508             easy. There is L.
4509            
4510             However, this module tries to extract a certain amount of I
4511             from the giant heap of characters defined in Unicode; the principal concept
4512             is “a mutator”. Most mutators are defined by programmatic inspection of names
4513             of characters and relations between names of different characters. (In other
4514             words, we base such mutators on names, not glyphs.) Here we
4515             sketch the irregularities uncovered during this process.
4516            
4517             APL symbols with C and C look reverted w.r.t. other
4518             C and C symbols.
4519            
4520             C, C, C, C C, C
4521             are defined with C or C at end, but (may) drop it when combined
4522             with modifiers via C. Likewise for C, C,
4523             C, C, C, C.
4524            
4525             Sometimes opposite happens, and C appears out of blue sky; compare:
4526            
4527             2A18 INTEGRAL WITH TIMES SIGN
4528             2A19 INTEGRAL WITH INTERSECTION
4529            
4530             C I a combination of C with C, but it is not marked as such
4531             in its name.
4532            
4533             Sometimes a name of diacritic (after C) acquires an C at end
4534             (see C).
4535            
4536             Oftentimes the part to the left of C is not resolvable: sometimes it
4537             is underspecified (e.g, just C), sometimes it is overspecified
4538             (e.g., in C), sometime it should be understood
4539             as a glyph-of-written-word (e.g, in C). Sometimes it just
4540             does not exist (e.g., C -
4541             there is C, but not the reversed variant).
4542             Sometimes it is a defined synonym (C).
4543            
4544             Sometimes it has something appended (C).
4545            
4546             Sometimes C is just a clarification (C).
4547            
4548             1 AND
4549             1 ANTENNA
4550             1 ARABIC MATHEMATICAL OPERATOR HAH
4551             1 ARABIC MATHEMATICAL OPERATOR MEEM
4552             1 ARABIC ROUNDED HIGH STOP
4553             1 ARABIC SMALL HIGH LIGATURE ALEF
4554             1 ARABIC SMALL HIGH LIGATURE QAF
4555             1 ARABIC SMALL HIGH LIGATURE SAD
4556             1 BACK
4557             1 BLACK SUN
4558             1 BRIDE
4559             1 BROKEN CIRCLE
4560             1 CIRCLED HORIZONTAL BAR
4561             1 CIRCLED MULTIPLICATION SIGN
4562             1 CLOSED INTERSECTION
4563             1 CLOSED LOCK
4564             1 COMBINING LEFTWARDS HARPOON
4565             1 COMBINING RIGHTWARDS HARPOON
4566             1 CONGRUENT
4567             1 COUPLE
4568             1 DIAMOND SHAPE
4569             1 END
4570             1 EQUIVALENT
4571             1 FISH CAKE
4572             1 FROWNING FACE
4573             1 GLOBE
4574             1 GRINNING CAT FACE
4575             1 HEAVY OVAL
4576             1 HELMET
4577             1 HORIZONTAL MALE
4578             1 IDENTICAL
4579             1 INFINITY NEGATED
4580             1 INTEGRAL AVERAGE
4581             1 INTERSECTION BESIDE AND JOINED
4582             1 KISSING CAT FACE
4583             1 LATIN CAPITAL LETTER REVERSED C
4584             1 LATIN CAPITAL LETTER SMALL Q
4585             1 LATIN LETTER REVERSED GLOTTAL STOP
4586             1 LATIN LETTER TWO
4587             1 LATIN SMALL CAPITAL LETTER I
4588             1 LATIN SMALL CAPITAL LETTER U
4589             1 LATIN SMALL LETTER LAMBDA
4590             1 LATIN SMALL LETTER REVERSED R
4591             1 LATIN SMALL LETTER TC DIGRAPH
4592             1 LATIN SMALL LETTER TH
4593             1 LEFT VERTICAL BAR
4594             1 LOWER RIGHT CORNER
4595             1 MEASURED RIGHT ANGLE
4596             1 MONEY
4597             1 MUSICAL SYMBOL
4598             1 NIGHT
4599             1 NOTCHED LEFT SEMICIRCLE
4600             1 ON
4601             1 OR
4602             1 PAGE
4603             1 RIGHT ANGLE VARIANT
4604             1 RIGHT DOUBLE ARROW
4605             1 RIGHT VERTICAL BAR
4606             1 RUNNING SHIRT
4607             1 SEMIDIRECT PRODUCT
4608             1 SIX POINTED STAR
4609             1 SMALL VEE
4610             1 SOON
4611             1 SQUARED UP
4612             1 SUMMATION
4613             1 SUPERSET BESIDE AND JOINED BY DASH
4614             1 TOP
4615             1 TOP ARC CLOCKWISE ARROW
4616             1 TRIPLE VERTICAL BAR
4617             1 UNION BESIDE AND JOINED
4618             1 UPPER LEFT CORNER
4619             1 VERTICAL BAR
4620             1 VERTICAL MALE
4621             1 WHITE SUN
4622             2 CLOSED MAILBOX
4623             2 CLOSED UNION
4624             2 DENTISTRY SYMBOL LIGHT VERTICAL
4625             2 DOWN-POINTING TRIANGLE
4626             2 HEART
4627             2 LEFT ARROW
4628             2 LINE INTEGRATION
4629             2 N-ARY UNION OPERATOR
4630             2 OPEN MAILBOX
4631             2 PARALLEL
4632             2 RIGHT ARROW
4633             2 SMALL CONTAINS
4634             2 SMILING CAT FACE
4635             2 TIMES
4636             2 TRIPLE HORIZONTAL BAR
4637             2 UP-POINTING TRIANGLE
4638             2 VERTICAL KANA REPEAT
4639             3 CHART
4640             3 CONTAINS
4641             3 TRIANGLE
4642             4 BANKNOTE
4643             4 DIAMOND
4644             4 PERSON
4645             5 LEFTWARDS TWO-HEADED ARROW
4646             5 RIGHTWARDS TWO-HEADED ARROW
4647             8 DOWNWARDS HARPOON
4648             8 UPWARDS HARPOON
4649             9 SMILING FACE
4650             11 CIRCLE
4651             11 FACE
4652             11 LEFTWARDS HARPOON
4653             11 RIGHTWARDS HARPOON
4654             15 SQUARE
4655            
4656             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx-us2 | sort -n > oxx-us2-sorted-kw
4657            
4658             C specify fill - not combining. C is not combining, same for Cs.
4659            
4660             Only C is combining. Triangle is combining only with underbar and dot above.
4661            
4662             C means C. C - C (so do many others.)
4663             C means C; but C means C - go figure!
4664             C is not a decomposition (it is "something circled").
4665            
4666             Another way of compositing is C (but not C!) and C. See also C, C
4667             - but only C. Avoid C after these.
4668            
4669             C should replace C. C means C, same for C.
4670             C means C - actually just a bug - http://www.reddit.com/r/programming/comments/fv8ao/unicode_600_standard_published/?
4671             C means C. C means C.
4672             C means C. C means C.
4673            
4674             C means C. C looks genuinely missing...
4675            
4676             C means one of two, left or right???
4677            
4678             This better be convertible by rounding/sharpening mutators, but see
4679             C
4680            
4681             2268 LESS-THAN BUT NOT EQUAL TO; 1.1
4682             2269 GREATER-THAN BUT NOT EQUAL TO; 1.1
4683             228A SUBSET OF WITH NOT EQUAL TO; 1.1
4684             228B SUPERSET OF WITH NOT EQUAL TO; 1.1
4685             @ Relations
4686             22E4 SQUARE IMAGE OF OR NOT EQUAL TO; 1.1
4687             22E5 SQUARE ORIGINAL OF OR NOT EQUAL TO; 1.1
4688             @@ 2A00 Supplemental Mathematical Operators 2AFF
4689             @ Relational operators
4690             2A87 LESS-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4691             x (less-than but not equal to - 2268)
4692             2A88 GREATER-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4693             x (greater-than but not equal to - 2269)
4694             2AB1 PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4695             2AB2 SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4696             2AB5 PRECEDES ABOVE NOT EQUAL TO; 3.2
4697             2AB6 SUCCEEDS ABOVE NOT EQUAL TO; 3.2
4698             @ Subset and superset relations
4699             2ACB SUBSET OF ABOVE NOT EQUAL TO; 3.2
4700             2ACC SUPERSET OF ABOVE NOT EQUAL TO; 3.2
4701            
4702             Looking into v6.1 reference PDFs, 2268,2269,2ab5,2ab6,2acb,2acc have two horizontal bars,
4703             228A,228B,22e4,22e5,2a87,2a88,2ab1,2ab2 have one horizontal bar, Hence C and C
4704             are equivalent; so are C, C, C
4705             and C. (Square variants come only with one horizontal line?)
4706            
4707            
4708             Set C<$ENV{UI_KEYBOARDLAYOUT_UNRESOLVED}> to enable warnings. Then do
4709            
4710             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx | sort -n > oxx-sorted-kw
4711            
4712             =head1 SEE ALSO
4713            
4714             The keyboard(s) generated with this module: L, L
4715            
4716             On diacritics:
4717            
4718             http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
4719             http://en.wikipedia.org/wiki/Tonos#Unicode
4720             http://en.wikipedia.org/wiki/Early_Cyrillic_alphabet#Numerals.2C_diacritics_and_punctuation
4721             http://en.wikipedia.org/wiki/Vietnamese_alphabet#Tone_marks
4722             http://diacritics.typo.cz/
4723            
4724             http://en.wikipedia.org/wiki/User:TEB728/temp (Chars of languages)
4725             http://www.evertype.com/alphabets/index.html
4726            
4727             Accents in different Languages:
4728             http://fonty.pl/porady,12,inne_diakrytyki.htm#07
4729             http://en.wikipedia.org/wiki/Latin-derived_alphabet
4730            
4731             On typography marks
4732            
4733             http://wiki.neo-layout.org/wiki/Striche
4734             http://www.matthias-kammerer.de/SonsTypo3.htm
4735             http://en.wikipedia.org/wiki/Soft_hyphen
4736             http://en.wikipedia.org/wiki/Dash
4737             http://en.wikipedia.org/wiki/Ditto_mark
4738            
4739             On keyboard layouts:
4740            
4741             http://en.wikipedia.org/wiki/Keyboard_layout
4742             http://en.wikipedia.org/wiki/Keyboard_layout#US-International
4743             http://en.wikipedia.org/wiki/ISO/IEC_9995
4744             http://www.pentzlin.com/info2-9995-3-V3.pdf (used almost nowhere - only half of keys in Canadian multilanguage match)
4745             http://en.wikipedia.org/wiki/QWERTY#Canadian_Multilingual_Standard
4746             http://en.wikipedia.org/wiki/Unicode_input
4747             Discussion of layout changes and position of €:
4748             https://www.libreoffice.org/bugzilla/show_bug.cgi?id=5981
4749            
4750             History of QUERTY
4751             http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/publications/PreQWERTY.html
4752             http://kanji.zinbun.kyoto-u.ac.jp/db-machine/~yasuoka/QWERTY/
4753            
4754             http://msdn.microsoft.com/en-us/goglobal/bb964651
4755             http://eurkey.steffen.bruentjen.eu/layout.html
4756             http://ru.wikipedia.org/wiki/%D0%A4%D0%B0%D0%B9%D0%BB:Birman%27s_keyboard_layout.svg
4757             http://bepo.fr/wiki/Accueil
4758             http://www.unibuc.ro/e/prof/paliga_v_s/soft-reso/ (Academic for Mac)
4759             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/ru
4760             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/keypad
4761             http://www.evertype.com/celtscript/type-keys.html (Old Irish mechanical typewriters)
4762             http://eklhad.net/linux/app/halfqwerty.xkb (One-handed layout)
4763             http://www.doink.ch/an-x11-keyboard-layout-for-scholars-of-old-germanic/ (and references there)
4764             http://www.neo-layout.org/
4765             https://commons.wikimedia.org/wiki/File:Neo2_keyboard_layout.svg
4766             Images in (download of)
4767             http://www.mzuther.de/en/contents/osd-neo2
4768             Neo2 sources:
4769             http://wiki.neo-layout.org/browser/windows/kbdneo2/Quelldateien
4770             Shift keys at center, nice graphic:
4771             http://www.tinkerwithabandon.com/twa/keyboarding.html
4772             Physical keyboard:
4773             http://www.konyin.com/?page=product.Multilingual%20Keyboard%20for%20UNITED%20STATES
4774             Polytonic Greek
4775             http://www.polytoniko.org/keyb.php?newlang=en
4776             Portable keyboard layout
4777             http://www.autohotkey.com/forum/viewtopic.php?t=28447
4778             One-handed
4779             http://www.autohotkey.com/forum/topic1326.html
4780             Typing on numeric keypad
4781             http://goron.de/~johns/one-hand/#documentation
4782             On screen keyboard indicator
4783             http://www.autohotkey.com/docs/scripts/KeyboardOnScreen.htm
4784             Keyboards of ЕС-1840/1/5
4785             http://aic-crimea.narod.ru/Study/Shen/PC/1/5-4-1.htm
4786             (http://www.aic-crimea.narod.ru/Study/Shen/PC/main.htm) Руководство пользователя ПЭВМ
4787             http://fdd5-25.net/fddforum/index.php?PHPSESSID=201bd45ab972f1ab4b440dcb6c7ca18f&topic=489.30
4788             Phonetic Hebrew layout(s) (1st has many duplicates, 2nd overweighted)
4789             http://bc.tech.coop/Hebrew-ZC.html
4790             http://help.keymanweb.com/keyboards/keyboard_galaxiehebrewkm6.php
4791             Greek (Galaxy) with a convenient mapping (except for Ψ) and BibleScript
4792             http://www.tavultesoft.com/keyboarddownloads/%7B4D179548-1215-4167-8EF7-7F42B9B0C2A6%7D/manual.pdf
4793             With 2-letter input of Unicode names:
4794             http://www.jlg-utilities.com
4795             Medievist's
4796             http://www.personal.leeds.ac.uk/~ecl6tam/
4797             Yandex visual keyboards
4798             http://habrahabr.ru/company/yandex/blog/108255/
4799             Implementation in FireFox
4800             http://mxr.mozilla.org/mozilla-central/source/widget/windows/KeyboardLayout.cpp#1085
4801             Implementation in Emacs 24.3 (ToUnicode() in fns)
4802             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32inevt.c
4803             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c
4804             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32term.c
4805             Naive implementations:
4806             http://social.msdn.microsoft.com/forums/en-US/windowssdk/thread/07afec87-68c1-4a56-bf46-a38a9c2232e9/
4807             Quality of a keyboard
4808             http://www.tavultesoft.com/keymandev/quality/whitepaper1.1.pdf
4809            
4810             Manipulating keyboards on Windows and X11
4811            
4812             http://symbolcodes.tlt.psu.edu/keyboards/winkeyvista.html (using links there: up to Win7)
4813             http://windows.microsoft.com/en-us/windows-8/change-keyboard-layout
4814             http://www.howtoforge.com/changing-language-and-keyboard-layout-on-various-linux-distributions
4815            
4816             MSKLC parser
4817            
4818             http://pastebin.com/UXc1ub4V
4819            
4820             By author of MSKLC Michael S. Kaplan (do not forget to follow links)
4821            
4822             Input on Windows:
4823             http://seit.unsw.adfa.edu.au/staff/sites/hrp/personal/Sanskrit-External/Unicode-KbdsonWindows.pdf
4824            
4825             http://blogs.msdn.com/b/michkap/archive/2006/03/26/560595.aspx
4826             http://blogs.msdn.com/b/michkap/archive/2006/04/22/581107.aspx
4827             Chaining dead keys:
4828             http://blogs.msdn.com/b/michkap/archive/2011/04/16/10154700.aspx
4829             Mapping VK to VSC etc:
4830             http://blogs.msdn.com/b/michkap/archive/2006/08/29/729476.aspx
4831             [Link] Remapping CapsLock to mean Backspace in a keyboard layout
4832             (if repeat, every second Press counts ;-)
4833             http://colemak.com/forum/viewtopic.php?id=870
4834             Scancodes from kbd.h get in the way
4835             http://blogs.msdn.com/b/michkap/archive/2006/08/30/726087.aspx
4836             What happens if you start with .klc with other VK_ mappings:
4837             http://blogs.msdn.com/b/michkap/archive/2010/11/03/10085336.aspx
4838             Keyboards with Ctrl-Shift states:
4839             http://blogs.msdn.com/b/michkap/archive/2010/10/08/10073124.aspx
4840             On assigning Ctrl-values
4841             http://blogs.msdn.com/b/michkap/archive/2008/11/04/9037027.aspx
4842             On hotkeys for switching layouts:
4843             http://blogs.msdn.com/b/michkap/archive/2008/07/16/8736898.aspx
4844             Text services
4845             http://blogs.msdn.com/b/michkap/archive/2008/06/30/8669123.aspx
4846             Low-level access in MSKLC
4847             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
4848             http://blogs.msdn.com/b/michkap/archive/2011/04/09/10151666.aspx
4849             On font linking
4850             http://blogs.msdn.com/b/michkap/archive/2006/01/22/515864.aspx
4851             Unicode in console
4852             http://blogs.msdn.com/michkap/archive/2005/12/15/504092.aspx
4853             Adding formerly "invisible" keys to the keyboard
4854             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4855             Redefining NumKeypad keys
4856             http://blogs.msdn.com/b/michkap/archive/2007/07/04/3690200.aspx
4857             BUT!!!
4858             http://blogs.msdn.com/b/michkap/archive/2010/04/05/9988581.aspx
4859             And backspace/return/etc
4860             http://blogs.msdn.com/b/michkap/archive/2008/10/27/9018025.aspx
4861             kbdutool.exe, run with the /S ==> .c files
4862             Doing one's own WM_DEADKEY processing'
4863             http://blogs.msdn.com/b/michkap/archive/2006/09/10/748775.aspx
4864             Dead keys do not work on SG-Caps
4865             http://blogs.msdn.com/b/michkap/archive/2008/02/09/7564967.aspx
4866             Dynamic keycaps keyboard
4867             http://blogs.msdn.com/b/michkap/archive/2005/07/20/441227.aspx
4868             Backslash/yen/won confusion
4869             http://blogs.msdn.com/b/michkap/archive/2005/09/17/469941.aspx
4870             Unicode output to console
4871             http://blogs.msdn.com/b/michkap/archive/2010/10/07/10072032.aspx
4872             Install/Load/Activate an input method/layout
4873             http://blogs.msdn.com/b/michkap/archive/2007/12/01/6631463.aspx
4874             http://blogs.msdn.com/b/michkap/archive/2008/05/23/8537281.aspx
4875             Reset to a TT font from an application:
4876             http://blogs.msdn.com/b/michkap/archive/2011/09/22/10215125.aspx
4877             How to (not) treat C-A-Q
4878             http://blogs.msdn.com/b/michkap/archive/2012/04/26/10297903.aspx
4879             Treating Brazilian ABNT c1 c2 keys
4880             http://blogs.msdn.com/b/michkap/archive/2006/10/07/799605.aspx
4881             And JIS ¥|-key
4882             (compare with http://www.scs.stanford.edu/11wi-cs140/pintos/specs/kbd/scancodes-7.html
4883             http://hp.vector.co.jp/authors/VA003720/lpproj/others/kbdjpn.htm )
4884             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4885             Suggest a topic:
4886             http://blogs.msdn.com/b/michkap/archive/2007/07/29/4120528.aspx#7119166
4887            
4888             Installable Keyboard Layouts - Apple Developer (“.keylayout” files; modifiers not editable; cache may create problems;
4889             to enable deadkeys in X11, one may need extra work)
4890            
4891             http://developer.apple.com/technotes/tn2002/tn2056.html
4892             http://wordherd.com/keyboards/
4893             http://stackoverflow.com/questions/999681/how-to-remap-context-menu-key-in-mac-os-x
4894             http://apple.stackexchange.com/questions/21691/ukelele-generated-custom-keyboard-layouts-not-working-in-lion
4895             http://wiki.openoffice.org/wiki/X11Keymaps
4896             http://www.tenshu.net/2012/11/using-caps-lock-as-new-modifier-key-in.html
4897             http://raw.github.com/lreddie/ukelele-steps/master/USExtended.keylayout
4898             http://scripts.sil.org/cms/scripts/page.php?item_id=keylayoutmaker
4899            
4900             ANSI/ISO/ABNT/JIS/Russian Apple’s keyboards
4901            
4902             https://discussions.apple.com/thread/1508293
4903             http://www.dtp-transit.jp/apple/mac/post_1137.html
4904             http://www.dtp-transit.jp/images/apple-keyboards-US-JIS.jpg
4905             http://m10lmac.blogspot.co.il/2007/02/fixing-brazilian-keyboard-layout.html
4906             http://www2d.biglobe.ne.jp/~msyk/keyboard/layout/mac-jiskbd.html
4907             http://commons.wikimedia.org/wiki/File:KB_Russian_Apple_Macintosh.svg
4908            
4909             JIS variations (OADG109 vs A)
4910            
4911             http://ja.wikipedia.org/wiki/JIS%E3%82%AD%E3%83%BC%E3%83%9C%E3%83%BC%E3%83%89
4912            
4913             Different ways to access chars on Mac (1ˢᵗ suggests adding a Discover via plists via Keycaps≠Strings)
4914            
4915             http://apple.stackexchange.com/questions/49565/how-can-i-expand-the-number-of-special-characters-i-can-type-using-my-keyboard
4916             http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/eventoverview/TextDefaultsBindings/TextDefaultsBindings.html#//apple_ref/doc/uid/20000468-CJBDEADF
4917             http://www.hcs.harvard.edu/~jrus/Site/System%20Bindings.html Default keybindings
4918             http://www.hcs.harvard.edu/~jrus/Site/Cocoa%20Text%20System.html
4919             http://hints.macworld.com/article.php?story=2005051118320432 Mystery keys on Mac
4920             http://www.snark.de/index.cgi/0007 Patching ADB drivers
4921             http://www.snark.de/mac/usbkbpatch/index_en.html Patching USB drivers (gives LCtrl vs RCtrl etc???)
4922             http://www.lorax.com/FreeStuff/TextExtras.html (has no docs???)
4923             http://stevelosh.com/blog/2012/10/a-modern-space-cadet/ Combining different approaches
4924             http://brettterpstra.com/2012/12/08/a-useful-caps-lock-key/ (simplified version of ↖)
4925             http://david.rothlis.net/keyboards/microsoft_natural_osx/ Num Lock is claimed as not working
4926            
4927             Compose on Mac requires hacks:
4928            
4929             http://apple.stackexchange.com/questions/31487/add-compose-key-to-os-x
4930            
4931             Convert Apple to MSKLC
4932            
4933             http://typophile.com/node/90606
4934            
4935             Keyboards on Mac:
4936            
4937             http://homepage.mac.com/thgewecke/mlingos9.html
4938             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
4939            
4940             Tool to produce:
4941            
4942             http://wordherd.com/keyboards/
4943             http://developer.apple.com/library/mac/#technotes/tn2056/_index.html
4944            
4945             VK_OEM_8 Kana modifier - Using instead of AltGr
4946            
4947             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
4948            
4949             Limitations of using KANA toggle
4950            
4951             http://www.kbdedit.com/manual/ex12_trilang_ser_cyr_lat_gre.html
4952            
4953             FE (Far Eastern) keyboard source code example (NEC AT is 106 with SPECIAL MULTIVK flags changed on some scancodes, OEM_7/8 producing 0x1e 0x1f, and no OEM_102):
4954            
4955             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/ibm02/kbdibm02.c__.htm
4956             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/kbdnecat/kbdnecat.c__.htm
4957             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/106/kbd106.c__.htm
4958            
4959             Investigation on relation between VK_ asignments, KBDEXT, KBDNUMPAD etc:
4960             http://code.google.com/p/ergo-dvorak-for-developers/source/browse/trunk/kbddvp.c
4961            
4962             PowerShell vs ISE (and how to find them [On Win7: WinKey Accessories]
4963             http://blogs.msdn.com/b/powershell/archive/2009/04/17/differences-between-the-ise-and-powershell-console.aspx
4964             http://blogs.msdn.com/b/michkap/archive/2013/01/23/10387424.aspx
4965             http://blogs.msdn.com/b/michkap/archive/2013/02/15/10393862.aspx
4966             http://blogs.msdn.com/b/michkap/archive/2013/02/19/10395086.aspx
4967             http://blogs.msdn.com/b/michkap/archive/2013/02/20/10395416.aspx
4968            
4969             Google for "Get modification number for Shift key" for code to query the kbd DLL directly ("keylogger")
4970             http://web.archive.org/web/20120106074849/http://debtnews.net/index.php/article/debtor/2008-09-08/1088.html
4971             http://code.google.com/p/keymagic/source/browse/KeyMagicDll/kbdext.cpp?name=0419d8d626&r=d85498403fd59bca9efc04b4e5bb4406d39439a0
4972            
4973             How to read Unicode in an ANSI Window:
4974             http://social.msdn.microsoft.com/Forums/en-US/windowsgeneraldevelopmentissues/thread/d455e846-d18b-4086-98de-822658bcebf0/
4975             http://blog.tavultesoft.com/2011/06/accepting-unicode-input-in-your-windows-application.html
4976            
4977             HTML consolidated entity names and discussion, MES charsets:
4978            
4979             http://www.w3.org/TR/xml-entity-names
4980             http://www.w3.org/2003/entities/2007/w3centities-f.ent
4981             http://www.cl.cam.ac.uk/~mgk25/ucs/mes-2-rationale.html
4982             http://web.archive.org/web/20000815100817/http://www.egt.ie/standards/iso10646/pdf/cwa13873.pdf
4983            
4984             Ctrl2cap
4985            
4986             http://technet.microsoft.com/en-us/sysinternals/bb897578
4987            
4988             Low level scancode mapping
4989            
4990             http://www.annoyances.org/exec/forum/winxp/r1017256194
4991             http://web.archive.org/web/20030211001441/http://www.microsoft.com/hwdev/tech/input/w2kscan-map.asp
4992             http://msdn.microsoft.com/en-us/windows/hardware/gg463447
4993             http://www.annoyances.org/exec/forum/winxp/1034644655
4994             ???
4995             http://netj.org/2004/07/windows_keymap
4996             the free remapkey.exe utility that's in Microsoft NT / 2000 resource kit.
4997            
4998             perl -wlne "BEGIN{$t = {T => q(), qw( X e0 Y e1 )}} print qq( $t->{$1}$2\t$3) if /^#define\s+([TXY])([0-9a-f]{2})\s+(?:_EQ|_NE)\((?:(?:\s*\w+\s*,){3})?\s*([^\W_]\w*)\s*(?:(?:,\s*\w+\s*){2})?\)\s*(?:\/\/.*)?$/i" kbd.h >ll2
4999             then select stuff up to the first e1 key (but DECIMAL is not there T53 is DELETE??? take from MSKLC help/using/advanced/scancodes)
5000            
5001             CapsLock as on typewriter:
5002            
5003             http://web.archive.org/web/20120717083202/http://www.annoyances.org/exec/forum/winxp/1071197341
5004            
5005             Scancodes visible on the low level:
5006            
5007             http://openbsd.7691.n7.nabble.com/Patch-Support-F13-F24-on-PC-122-terminal-keyboard-td224992.html
5008             http://www.seasip.info/Misc/1227T.html
5009            
5010             Scancodes visible on Windows (with USB)
5011            
5012             http://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/translate.pdf
5013            
5014             Problems on X11:
5015            
5016             http://www.x.org/releases/X11R7.7/doc/kbproto/xkbproto.html (definition of XKB???)
5017             http://www.x.org/releases/current/doc/kbproto/xkbproto.html
5018            
5019             http://wiki.linuxquestions.org/wiki/Configuring_keyboards (current???)
5020             http://wiki.linuxquestions.org/wiki/Accented_Characters (current???)
5021             http://wiki.linuxquestions.org/wiki/Altering_or_Creating_Keyboard_Maps (current???)
5022             https://help.ubuntu.com/community/ComposeKey (documents almost 1/2 of the needed stuff)
5023             http://www.gentoo.org/doc/en/utf-8.xml (2005++ ???)
5024             http://en.gentoo-wiki.com/wiki/X.Org/Input_drivers (2009++ HAS: How to make CapsLock change layouts)
5025             http://www.freebsd.org/cgi/man.cgi?query=setxkbmap&sektion=1&manpath=X11R7.4
5026             http://people.uleth.ca/~daniel.odonnell/Blog/custom-keyboard-in-linuxx11
5027             http://shtrom.ssji.net/skb/xorg-ligatures.html (of 2008???)
5028             http://tldp.org/HOWTO/Danish-HOWTO-2.html (of 2005???)
5029             http://www.tux.org/~balsa/linux/deadkeys/index.html (of 1999???)
5030             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
5031             http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h
5032            
5033             EIGHT_LEVEL FOUR_LEVEL_ALPHABETIC FOUR_LEVEL_SEMIALPHABETIC PC_SYSRQ : see
5034             http://cafbit.com/resource/mackeyboard/mackeyboard.xkb
5035            
5036             ./xkb in /etc/X11 /usr/local/X11 /usr/share/local/X11 /usr/share/X11
5037             (maybe it is more productive to try
5038             ls -d /*/*/xkb /*/*/*/xkb
5039             ?)
5040             but what dead_diaeresis means is defined here:
5041             Apparently, may be in /usr/X11R6/lib/X11/locale/en_US.UTF-8/Compose /usr/share/X11/locale/en_US.UTF-8/Compose
5042             http://wiki.maemo.org/Remapping_keyboard
5043             http://www.x.org/releases/current/doc/man/man8/mkcomposecache.8.xhtml
5044            
5045             B have XIM input method in GTK disables Control-Shift-u way of entering HEX unicode.
5046            
5047             How to contribute:
5048             http://www.freedesktop.org/wiki/Software/XKeyboardConfig/Rules
5049            
5050             B the problems with handling deadkeys via .Compose are that: .Compose is handled by
5051             applications, while keymaps by server (since they may be on different machines, things can
5052             easily get out of sync); .Compose knows nothing about the current "Keyboard group" or of
5053             the state of CapsLock etc (therefore emulating "group switch" via composing is impossible).
5054            
5055             JS code to add "insert these chars": google for editpage_specialchars_cyrilic, or
5056            
5057             http://en.wikipedia.org/wiki/User:TEB728/monobook.jsx
5058            
5059             Latin paleography
5060            
5061             http://en.wikipedia.org/wiki/Latin_alphabet
5062             http://tlt.its.psu.edu/suggestions/international/bylanguage/oenglish.html
5063             http://guindo.pntic.mec.es/~jmag0042/LATIN_PALEOGRAPHY.pdf
5064             http://www.evertype.com/standards/wynnyogh/ezhyogh.html
5065             http://www.wordorigins.org/downloads/OELetters.doc
5066             http://www.menota.uio.no/menota-entities.txt
5067             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2957.pdf (Uncomplete???)
5068             http://skaldic.arts.usyd.edu.au/db.php?table=mufi_char&if=mufi (No prioritization...)
5069            
5070             Summary tables for Cyrillic
5071            
5072             http://ru.wikipedia.org/wiki/%D0%9A%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D0%B0#.D0.A1.D0.BE.D0.B2.D1.80.D0.B5.D0.BC.D0.B5.D0.BD.D0.BD.D1.8B.D0.B5_.D0.BA.D0.B8.D1.80.D0.B8.D0.BB.D0.BB.D0.B8.D1.87.D0.B5.D1.81.D0.BA.D0.B8.D0.B5_.D0.B0.D0.BB.D1.84.D0.B0.D0.B2.D0.B8.D1.82.D1.8B_.D1.81.D0.BB.D0.B0.D0.B2.D1.8F.D0.BD.D1.81.D0.BA.D0.B8.D1.85_.D1.8F.D0.B7.D1.8B.D0.BA.D0.BE.D0.B2
5073             http://ru.wikipedia.org/wiki/%D0%9F%D0%BE%D0%B7%D0%B8%D1%86%D0%B8%D0%B8_%D0%B1%D1%83%D0%BA%D0%B2_%D0%BA%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D1%8B_%D0%B2_%D0%B0%D0%BB%D1%84%D0%B0%D0%B2%D0%B8%D1%82%D0%B0%D1%85
5074             http://en.wikipedia.org/wiki/List_of_Cyrillic_letters - per language tables
5075             http://en.wikipedia.org/wiki/Cyrillic_alphabets#Summary_table
5076             http://en.wiktionary.org/wiki/Appendix:Cyrillic_script
5077            
5078             Extra chars (see also the ordering table on page 8)
5079             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3194.pdf
5080            
5081             Typesetting Old and Modern Church Slavonic
5082             http://www.sanu.ac.rs/Cirilica/Prilozi/Skup.pdf
5083             http://irmologion.ru/ucsenc/ucslay8.html
5084             http://irmologion.ru/csscript/csscript.html
5085             http://cslav.org/success.htm
5086             http://irmologion.ru/developer/fontdev.html#allocating
5087            
5088             Non-dialogue of Slavists and Unicode experts
5089             http://www.sanu.ac.rs/Cirilica/Prilozi/Standard.pdf
5090             http://kodeks.uni-bamberg.de/slavling/downloads/2008-07-26_white-paper.pdf
5091            
5092             Newer: (+ combining ф)
5093             http://tug.org/pipermail/xetex/2012-May/023007.html
5094             http://www.unicode.org/alloc/Pipeline.html As below, plus N-left-hook, ДЗЖ ДЧ, L-descender, modifier-Ь/Ъ
5095             http://www.synaxis.info/azbuka/ponomar/charset/charset_1.htm
5096             http://www.synaxis.info/azbuka/ponomar/charset/charset_2.htm
5097             http://www.synaxis.info/azbuka/ponomar/roadmap/roadmap.html
5098             http://www.ponomar.net/cu_support.html
5099             http://www.ponomar.net/files/out.pdf
5100             http://www.ponomar.net/files/variants.pdf (5 VS for Mark's chapter, 2 VS for t, 1 VS for the rest)
5101            
5102             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3772.pdf typikon (+[semi]circled), ε-form
5103             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3971.pdf inverted ε-typikon
5104             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3974.pdf two variants of o/O
5105             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3998.pdf Mark's chapter
5106             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3563.pdf Reversed tse
5107            
5108             IPA
5109            
5110             http://upload.wikimedia.org/wikipedia/commons/f/f5/IPA_chart_2005_png.svg
5111             http://en.wikipedia.org/wiki/Obsolete_and_nonstandard_symbols_in_the_International_Phonetic_Alphabet
5112             http://en.wikipedia.org/wiki/Case_variants_of_IPA_letters
5113             Table with Unicode points marked:
5114             http://www.staff.uni-marburg.de/~luedersb/IPA_CHART2005-UNICODE.pdf
5115             (except for "Lateral flap" and "Epiglottal" column/row.
5116             (Extended) IPA explained by consortium:
5117             http://unicode.org/charts/PDF/U0250.pdf
5118             IPA keyboard
5119             http://www.rejc2.co.uk/ipakeyboard/
5120            
5121             http://en.wikipedia.org/wiki/International_Phonetic_Alphabet_chart_for_English_dialects#cite_ref-r_11-0
5122            
5123            
5124             Is this discussing KBDNLS_TYPE_TOGGLE on VK_KANA???
5125            
5126             http://mychro.mydns.jp/~mychro/mt/2010/05/vk-f.html
5127            
5128             Windows: fonts substitution/fallback/replacement
5129            
5130             http://msdn.microsoft.com/en-us/goglobal/bb688134
5131            
5132             Problems on Windows:
5133            
5134             http://en.wikipedia.org/wiki/Help:Special_characters#Alt_keycodes_for_Windows_computers
5135             http://en.wikipedia.org/wiki/Template_talk:Unicode#Plane_One_fonts
5136            
5137             Console font: Lucida Console 14 is viewable, but has practically no Unicode support.
5138             Consolas (good at 16) has much better Unicode support (sometimes better sometimes worse than DejaVue)
5139             Dejavue is good at 14 (equal to a GUI font size 9 on 15in 1300px screen; 16px unifont is native at 12 here)
5140             http://cristianadam.blogspot.com/2009/11/windows-console-and-true-type-fonts.html
5141            
5142             Apparently, Windows picks up the flavor (Bold/Italic/Etc) of DejaVue at random; see
5143             http://jpsoft.com/forums/threads/strange-results-with-cp-1252.1129/
5144             - he got it in bold. I''m getting it in italic... Workaround: uninstall
5145             all flavors but one (the BOOK flavor), THEN enable it for the console... Then reinstall
5146             (preferably newer versions).
5147            
5148             Display (how WikiPedia does it):
5149            
5150             http://en.wikipedia.org/wiki/Help:Special_characters#Displaying_special_characters
5151             http://en.wikipedia.org/wiki/Template:Unicode
5152             http://en.wikipedia.org/wiki/Template:Unichar
5153             http://en.wikipedia.org/wiki/User:Ruud_Koot/Unicode_typefaces
5154             In CSS: .IPA, .Unicode { font-family: "Arial Unicode MS", "Lucida Sans Unicode"; }
5155             http://web.archive.org/web/20060913000000/http://en.wikipedia.org/wiki/Template:Unicode_fonts
5156            
5157             Inspect which font is used by Firefox:
5158            
5159             https://addons.mozilla.org/en-US/firefox/addon/fontinfo/
5160            
5161             Windows shortcuts:
5162            
5163             http://windows.microsoft.com/en-US/windows7/Keyboard-shortcuts
5164             http://www.redgage.com/blogs/pankajugale/all-keyboard-shortcuts--very-useful.html
5165             https://skydrive.live.com/?cid=2ee8d462a8f365a0&id=2EE8D462A8F365A0%21141
5166             http://windows.microsoft.com/en-us/windows-8/new-keyboard-shortcuts
5167            
5168             On meaning of Unicode math codepoints
5169            
5170             http://milde.users.sourceforge.net/LUCR/Math/unimathsymbols.pdf
5171             http://milde.users.sourceforge.net/LUCR/Math/data/unimathsymbols.txt
5172             http://www.ams.org/STIX/bnb/stix-tbl.ascii-2006-10-20
5173             http://www.ams.org/STIX/bnb/stix-tbl.layout-2006-05-15
5174             http://mirrors.ibiblio.org/CTAN/macros/latex/contrib/unicode-math/unimath-symbols.pdf
5175             http://mirrors.ibiblio.org/CTAN//biblio/biber/documentation/utf8-macro-map.html
5176             http://tex.stackexchange.com/questions/14/how-to-look-up-a-symbol-or-identify-a-math-symbol-or-character
5177             http://unicode.org/Public/math/revision-09/MathClass-9.txt
5178             http://www.w3.org/TR/MathML/
5179             http://www.w3.org/TR/xml-entity-names/
5180             http://www.w3.org/TR/xml-entity-names/bycodes.html
5181            
5182             Transliteration (via iconv [it is locale-dependent], example rules for Greek)
5183            
5184             http://sourceware.org/bugzilla/show_bug.cgi?id=12031
5185            
5186             Monospaced fonts with combining marks (!)
5187            
5188             https://bugs.freedesktop.org/show_bug.cgi?id=18614
5189             https://bugs.freedesktop.org/show_bug.cgi?id=26941
5190            
5191             Indic ISCII - any hope with it? (This is not representable...:)
5192            
5193             http://unicode.org/mail-arch/unicode-ml/y2012-m09/0053.html
5194            
5195             (Percieved) problems of Unicode (2001)
5196            
5197             http://www.ibm.com/developerworks/library/u-secret.html
5198            
5199             On a need to have input methods for unicode
5200            
5201             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0226.html
5202            
5203             On info on Unicode chars
5204            
5205             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0415.html
5206            
5207             Zapf dingbats encoding, and other fine points of AdobeGL:
5208            
5209             ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
5210             http://web.archive.org/web/20001015040951/http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
5211            
5212             Yet another (IMO, silly) way to handle '; fight: ' vs ` ´
5213            
5214             http://www.cl.cam.ac.uk/~mgk25/ucs/apostrophe.html
5215            
5216             Surrogate characters on IE
5217            
5218             HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\International\Scripts\42
5219             http://winvnkey.sourceforge.net/webhelp/surrogate_fonts.htm
5220             http://msdn.microsoft.com/en-us/library/aa918682.aspx Script IDs
5221            
5222             Quoting tchrist:
5223             I, C, and C from L if you like.>
5224            
5225             Tom's unicode scripts
5226            
5227             http://search.cpan.org/~bdfoy/Unicode-Tussle-1.03/lib/Unicode/Tussle.pm
5228            
5229             =head2 F<.XCompose>: on docs and examples
5230            
5231             Syntax of C<.XCompose> is (partially) documented in
5232            
5233             http://www.x.org/archive/current/doc/man/man5/Compose.5.xhtml
5234             http://cgit.freedesktop.org/xorg/lib/libX11/tree/man/Compose.man
5235            
5236             # Modifiers are not documented
5237             # (Shift, Alt, Lock, Ctrl with aliases Meta, Caps [Alt/Meta binds Mod1];
5238             # ! means that not mentioned supported modifiers must be off;
5239             # None means that all recognizerd modifiers are off.)
5240            
5241             Semantic (e.g., which of keybindings has a preference) is not documented.
5242             Experiments (see below) show that a longer binding wins; if same
5243             length, one which is loaded later wins (as far as they match exactly, both
5244             the keys, and the set of required modifiers and their states).
5245             Note that a given keypress may match several I lists of
5246             modifier; one defined earlier wins.
5247            
5248             For example, in
5249            
5250             ~Ctrl Shift : "a1"
5251             Shift ~Ctrl : "ab1"
5252             ~Meta Shift : "b1"
5253             ~Ctrl ~Meta Shift : "ba1"
5254             Shift ~Meta : "b2"
5255             Shift ~Meta ~Lock : "b3"
5256            
5257             there is no way to trigger the output C<"a1"> (since the next row captures
5258             essentially the same keypress into a longer binding). The only binding which
5259             is explicitly overwritten is one for C<"b1">. Hence pressing
5260             C would trigger the binding C<"b2">, and there is no way to trigger
5261             the bindings for C<"b3"> and C<"ba1">.
5262            
5263             # (the source of imLcPrs.c shows that the expansion of the
5264             # shorter sequence is stored too - but the presence of
5265             # ->succession means that the code to process the resulting
5266             # tree ignores the expansion).
5267            
5268             The interaction of C<.Compose> with
5269             L
5270             of passed-through C and C modifiers is not documented.
5271            
5272             Before the syntax was documented: For the best approximation,
5273             read the parser's code, e.g., google for
5274            
5275             inurl:compose.c XCompose
5276             site:cgit.freedesktop.org "XCompose"
5277             site:cgit.freedesktop.org "XCompose" filetype:c
5278             _XimParseStringFile
5279            
5280             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcIm.c
5281             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcPrs.c
5282             http://uim.googlecode.com/svn-history/r6111/trunk/gtk/compose.c
5283             http://uim.googlecode.com/svn/tags/uim-1.5.2/gtk/compose.c
5284            
5285             The actual use of the compiled compose table:
5286            
5287             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcFlt.c
5288            
5289             Apparently, the first node (= defined last) in the tree which
5290             matches keysym and modifiers is chosen. So to override C<< >>,
5291             looks like (checked to work!) C<< ~Ctrl >> may be used...
5292             On the other hand, defining both C<< >> and (later) C<< ~Ctrl >>,
5293             one would expect that C<< >> should still trigger the
5294             expansion of C<< >> — but it does not... See also:
5295            
5296             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcLkup.c
5297            
5298             The file F<.XCompose> is processed by X11 I on startup. The changes
5299             to this file should be seen immediately by all newly started clients
5300             (but GTK or QT applications may need extra config - see below)
5301             unless the directory F<~/.compose-cache> is present and has a cache
5302             file compatible with binary architecture (then until cache
5303             expires - one day after creation - changes are not seen). The
5304             name F<.XCompose> may be overriden by environment variable C.
5305            
5306             To get (better?) examples, google for C<"multi_key" partial alpha "DOUBLE-STRUCK">.
5307            
5308             # include these first, so they may be overriden later
5309             include "%H/my-Compose/.XCompose-kragen"
5310             include "%H/my-Compose/.XCompose-ootync"
5311             include "%H/my-Compose/.XCompose-pSub"
5312            
5313             Check success: kragen: C<\ space> --> ␣; ootync: C --> ℉; pSub: C<0 0> --> ∞ ...
5314            
5315             Older versions of X11 do not understand %L %S. - but understand %H
5316            
5317             E.g. Debian Squeeze 6.0.6; according to
5318            
5319             http://packages.debian.org/search?keywords=x11-common
5320            
5321             it has C).
5322            
5323             include "/etc/X11/locale/en_US.UTF-8/Compose"
5324             include "/usr/share/X11/locale/en_US.UTF-8/Compose"
5325            
5326             Import default rules from the system Compose file:
5327             usually as above (but supported only on newer systems):
5328            
5329             include "%L"
5330            
5331             detect the success of the lines above: get C<#> by doing C ...
5332            
5333             The next file to include have been generated by
5334            
5335             perl -wlne 'next if /#\s+CIRCLED/; print if />\s+<.*>\s+<.*>\s+<.*/' /usr/share/X11/locale/en_US.UTF-8/Compose
5336             ### Std tables contain quadruple prefix for GREEK VOWELS and CIRCLED stuff
5337             ### only. But there is a lot of triple prefix...
5338             perl -wne 'next if /#\s+CIRCLED/; $s{$1}++ or print qq( $1) if />\s+<.*>\s+<.*>\s+<.*"(.*)"/' /usr/share/X11/locale/en_US.UTF-8/Compose
5339             ## – — ☭ ª º Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ Ǟ ǟ Ǡ ǡ Ǭ ǭ Ǻ ǻ Ǿ ǿ Ȫ ȫ Ȭ ȭ Ȱ ȱ ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ˠ ˡ ˢ ˣ ˤ ΐ ΰ Ḉ ḉ Ḕ ḕ Ḗ ḗ Ḝ ḝ Ḯ ḯ Ḹ ḹ Ṍ ṍ Ṏ ṏ Ṑ ṑ Ṓ ṓ Ṝ ṝ Ṥ ṥ Ṧ ṧ Ṩ ṩ Ṹ ṹ Ṻ ṻ Ấ ấ Ầ ầ Ẩ ẩ Ẫ ẫ Ậ ậ Ắ ắ Ằ ằ Ẳ ẳ Ẵ ẵ Ặ ặ Ế ế Ề ề Ể ể Ễ ễ Ệ ệ Ố ố Ồ ồ Ổ ổ Ỗ ỗ Ộ ộ Ớ ớ Ờ ờ Ở ở Ỡ ỡ Ợ ợ Ứ ứ Ừ ừ Ử ử Ữ ữ Ự ự ἂ ἃ ἄ ἅ ἆ ἇ Ἂ Ἃ Ἄ Ἅ Ἆ Ἇ ἒ ἓ ἔ ἕ Ἒ Ἓ Ἔ Ἕ ἢ ἣ ἤ ἥ ἦ ἧ Ἢ Ἣ Ἤ Ἥ Ἦ Ἧ ἲ ἳ ἴ ἵ ἶ ἷ Ἲ Ἳ Ἴ Ἵ Ἶ Ἷ ὂ ὃ ὄ ὅ Ὂ Ὃ Ὄ Ὅ ὒ ὓ ὔ ὕ ὖ ὗ Ὓ Ὕ Ὗ ὢ ὣ ὤ ὥ ὦ ὧ Ὢ Ὣ Ὤ Ὥ Ὦ Ὧ ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾈ ᾉ ᾊ ᾋ ᾌ ᾍ ᾎ ᾏ ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾘ ᾙ ᾚ ᾛ ᾜ ᾝ ᾞ ᾟ ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ ᾨ ᾩ ᾪ ᾫ ᾬ ᾭ ᾮ ᾯ ᾲ ᾴ ᾷ ῂ ῄ ῇ ῒ ῗ ῢ ῧ ῲ ῴ ῷ ⁱ ⁿ ℠ ™ שּׁ שּׂ а̏ А̏ е̏ Е̏ и̏ И̏ о̏ О̏ у̏ У̏ р̏ Р̏ 🙌
5340            
5341             The folloing exerpt from NEO compose tables may be good if you use
5342             keyboards which do not generate dead keys, but may generate Cyrillic keys;
5343             in other situations, edit filtering/naming on the following download
5344             command and on the C line below. (For my taste, most bindings are
5345             useless since they contain keysymbols which may be generated with NEO, but
5346             not with less intimidating keylayouts.)
5347            
5348             (Filtering may be important, since having a large file may
5349             significantly slow down client's startup (without F<~/.compose-cache>???).)
5350            
5351             # perl -wle 'foreach (qw(base cyrillic greek lang math)) {my @i=@ARGV; $i[-1] .= qq($_.module?format=txt); system @i}' wget -O - http://wiki.neo-layout.org/browser/Compose/src/ | perl -wlne 'print unless /<(U[\dA-F]{4,6}>|dead_|Greek_)/' > .XCompose-neo-no-Udigits-no-dead-no-Greek
5352             include "%H/.XCompose-neo-no-Udigits-no-dead-no-Greek"
5353             # detect the success of the line above: get ♫ by doing Compose Compose (but this binding is overwritten later!)
5354            
5355             ###################################### Neo's Math contains junk at line 312
5356            
5357             Print with something like (loading in a web browser after this):
5358            
5359             perl -l examples/filter-XCompose ~/.XCompose-neo-no-Udigits-no-dead-no-Greek > ! o-neo
5360             env LC_ALL=C sort -f o-neo | column -x -c 130 > ! /tmp/oo-neo-x
5361            
5362             =head2 “Systematic” parts of rules in a few F<.XCompose>
5363            
5364             ================== .XCompose b=bepo o=ootync k=kragen p=pSub s=std
5365             b Double-Struck b
5366             o circled ops b
5367             O big circled ops b
5368             r rotated b 8ACETUv ∞
5369            
5370             - sub p
5371             = double arrows po
5372             g greek po
5373             m math p |=Double-Struck rest haphasard...
5374             O circles p Oo
5375             S stars p Ss
5376             ^ sup p added: i -
5377             | daggers p
5378            
5379             Double mathop ok +*&|%8CNPQRZ AE
5380            
5381             # thick-black arrows o
5382             -,Num- arrows o
5383             N/N fractions o
5384             hH pointing hands o
5385             O circled ops o
5386             o degree o
5387             rR roman nums o
5388             \ UP upper modifiers o
5389             \ DN lower modifiers o
5390             { set theoretic o
5391             | arrows |-->flavors o
5392             UP / roots o
5393             LFT DN 6-quotes, bold delim o
5394             RT DN 9-quotes, bold delim o
5395             UP,DN super,sub o
5396            
5397             DOUBLE-separated-by-& op k ( )
5398             in-() circled k xx for tensor
5399             in-[] boxed, dice, play-cards k
5400             BKSP after revert k
5401             < after revert k
5402             ` after small-caps k
5403             ' after hook k
5404             , after hook below k
5405             h after phonetic k
5406            
5407             # musical k
5408             %0 ROMAN k %_0 for two-digit
5409             % roman k %_ for two-digit
5410             * stars k
5411             *. var-greek k
5412             * greek k
5413             ++, 3 triple k
5414             + double k
5415             , quotes k
5416             !, / negate k
5417             6,9 6,9-quotes k
5418             N N fractions k
5419             = double-arrows, RET k
5420             CMP x2 long names k
5421             f hand, pencils k
5422             \ combining??? k
5423             ^ super, up modifier k
5424             _ low modifiers k
5425             |B, |W chess, checkers, B&W k
5426             | double-struck k
5427             ARROWS ARROWS k
5428            
5429             ! dot below s
5430             " diaeresis s
5431             ' acute s
5432             trail < left delimiter s
5433             trail > right delimiter s
5434             trail \ slopped variant s
5435             ( ... ) circled s
5436             ( greek aspirations s
5437             ) greek aspirations s
5438             + horn s
5439             , cedilla s
5440             . dot above s
5441             - hor. bar s
5442             / diag, vert hor. bar s
5443             ; ogonek s
5444             = double hor.bar s
5445             trail = double hor.bar s
5446             ? hook above s
5447             b breve s
5448             c check above s
5449             iota iota below s
5450             trail 0338 negated s
5451             o ring above s
5452             U breve s
5453             SOME HEBREW
5454             ^ circumblex s
5455             ^ _ superscript s
5456             ^ undbr superscript s
5457             _ bar s
5458             _ subscript s
5459             underbr subscript s
5460             ` grave s
5461             ~ greek dieresis s
5462             ~ tilde s
5463             overbar bar s
5464             ´ acute s ´ is not '
5465             ¸ cedilla s ¸ is cedilla
5466            
5467             =head1 LIMITATIONS
5468            
5469             Currently only output for Windows keyboard layout drivers (via MSKLC) is available.
5470            
5471             Currently only the keyboards with US-mapping of hardware keys to "the etched
5472             symbols" are supported (think of German physical keyboards where Y/Z keycaps
5473             are swapped: Z is etched between T and U, and Y is to the left of X, or French
5474             which swaps A and Q, or French or Russian physical keyboards which have more
5475             alphabetical keys than 26).
5476            
5477             While the architecture of assembling a keyboard of small easy-to-describe
5478             pieces is (IMO) elegant and very powerful, and is proven to be useful, it
5479             still looks like a collection of independent hacks. Many of these hacks
5480             look quite similar; it would be great to find a way to unify them, so
5481             reduce the repertoir of operations for assembly.
5482            
5483             The current documentation of the module’s functionality is not complete.
5484            
5485             The implementation of the module is crumbling under its weight. Its
5486             evolution was by bloating (even when some design features were simplified).
5487             Since initially I had very little clue to which level of abstraction and
5488             flexibility the keyboard description would evolve, bloating accumulated
5489             to incredible amounts.
5490            
5491             =head1 COPYRIGHT
5492            
5493             Copyright (c) 2011-2013 Ilya Zakharevich
5494            
5495             This library is free software; you can redistribute it and/or modify
5496             it under the same terms as Perl itself, either Perl version 5.8.0 or,
5497             at your option, any later version of Perl 5 you may have available.
5498            
5499             The distributed examples may have their own copyrights.
5500            
5501             =head1 TODO
5502            
5503             UniPolyK-MultiSymple
5504            
5505             Multiple linked faces (accessible as described in ChangeLog); designated
5506             Primary- and Secondary- switch keys (as Shift-Space and AltGr-Space now).
5507            
5508             C as a deadkey may be not a good idea: following it by a special key
5509             (such as C, or C) may insert the deadkey character???
5510             Hence the character should be highly visible... (Now the key is invisible,
5511             so this is irrelevant...)
5512            
5513             Currently linked layers must have exactly the same number of keys in VK-tables.
5514            
5515             VK tables for TAB, BACK were BS. Same (remains) for the rest of unusual keys... (See TAB-was.)
5516             But UTOOL cannot handle them anyway...
5517            
5518             Define an extra element in VK keys: linkable. Should be sorted first in the kbd map,
5519             and there should be the same number in linked lists. Non-linkable keys should not
5520             be linked together by deadkey access...
5521            
5522             Interaction of FromToFlipShift with SelectRX not intuitive. This works: Diacritic[](SelectRX[[0-9]](FlipShift(Latin)))
5523            
5524             DefinedTo cannot be put on Cyrillic 3a9 (yo to superscript disappears - due to duplication???).
5525            
5526             ... so we do it differently now, but: LinkLayer was not aggressively resolving all the occurences of a character on a layer
5527             before we started to combine it with Diacritic_if_undef... - and Cyrillic 3a9 is not helped...
5528            
5529             via_parent() is broken - cannot replace for Diacritic_if_undef.
5530            
5531             Currently, we map ephigraphic letters to capital letters - is it intuitive???
5532            
5533             dotted circle ◌ 25CC
5534            
5535             DeadKey_Map200A= FlipLayers
5536             #DeadKey_Map200A_0= Id(Russian-AltGr)
5537             #DeadKey_Map200A_1= Id(Russian)
5538             performs differently from the commented variant: it adds links to auto-filled keys...
5539            
5540             Why ¨ on THIN SPACE inserts OGONEK after making ¨ multifaceted???
5541            
5542             When splitting a name on OVER/BELOW/ABOVE, we need both sides as modifiers???
5543            
5544             Ỳ currently unreachable (appears only in Latin-8 Celtic, is not on Wikipedia)
5545            
5546             Somebody is putting an extra element at the end of arrays for layers??? - Probably SPACE...
5547            
5548             Need to treat upside-down as a pseudo-decomposition.
5549            
5550             We decompose reversed-smallcaps in one step - probably better add yet another two-steps variant...
5551            
5552             When creating a treat SYMBOL/SIGN/FINAL FORM/ISOLATED FORM/INITIAL FORM/MEDIAL FORM;
5553             note that SIGN may be stripped: LESS-THAN SIGN becomes LESS-THAN WITH DOT
5554            
5555             We do not do canonical-merging of diacritics; so one needs to specify VARIA in addition to GRAVE ACCENT.
5556            
5557             We use a smartish algorithm to assign multiple diacritics to the same deadkey. A REALLY smart algorithm
5558             would use information about when a particular precombined form was introduced in Unicode...
5559            
5560             Inspector tool for NamesList.txt:
5561            
5562             grep " WITH .* " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|DOTS|ACCENT|HALF RING|VARIA|OXIA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5563             grep " WITH " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|CIRCUMFLEX|CEDILLA|OGONEK|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|CURL|BELT|HORN|DOTS|LOOP|ACCENT|RING|TICK|HALF RING|COMMA|FLOURISH|TITLO|UPTURN|DESCENDER|VRACHY|QUILL|BASE|ARC|CHECK|STRIKETHROUGH|NOTCH|CIRCLE|VARIA|OXIA|PSILI|DASIA|DIALYTIKA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5564            
5565             AltGrMap should be made CapsLock aware (impossible: smart capslock works only on the first layer, so
5566             the dead char must be on the first layer). [May work for Shift-Space - but it has a bag of problems...]
5567            
5568             Alas, CapsLock'ing a composition cannot be made stepwise. Hence one must calculate it directly.
5569             (Oups, Windows CapsLock is not configurable on AltGr-layer. One may need to convert
5570             it to VK_KANA???)
5571            
5572             WarnConflicts[exceptions] and NoConflicts translation map parsing rules.
5573            
5574             Need a way to map to a different face, not a different layer.
5575            
5576             Vietnamese: to put second accent over ă, ơ (o/horn), put them over ae/oe; - including
5577             another ˘ which would "cancel the implied one", so will get o-horn itself. - Except
5578             for acute accent which should replaced by ¨, and hook must be replaced by ˆ. (Over ae/oe
5579             there is only macron and diaeresis over ae.)
5580            
5581             Or: for the purpose of taking a second accent, AltGr-A behaves as Ă (or Â?), AltGr-O
5582             behaves as Ô (or O-horn Ơ?). Then Å and O/ behave as the other one... And ˚ puts the
5583             dot *below*, macron puts a hook. Exception: ¨ acts as ´ on the unaltered AE.
5584            
5585             While Å takes acute accent, one can always input it via putting ˚ on Á.
5586            
5587             If Ê is on the keyboard (and macron puts a hook), then the only problem is how to enter
5588             a hook alone (double circumflex is not precombined), dot below (???), and accents on u-horn ư.
5589            
5590             Mogrification rules for double accents: AE Å OE O/ Ù mogrify into hatted/horned versions; macron
5591             mogrifies into a hook; second hat modifies a hat into a horn. The only problem: one won't be
5592             able to enter double grave on U - use the OTHER combination of ¨ and `... And how to enter
5593             dot below on non-accented aue? Put ¨ on umlaut? What about Ë?
5594            
5595             To allow . or , on VK_DECIMAL: maybe make CapsLock-dependent?
5596            
5597             http://blogs.msdn.com/b/michkap/archive/2006/09/13/752377.aspx
5598            
5599             How to write this diacritic recipe: insert hacheck on AltGr-variant, but only if
5600             the breve on the base layer variant does not insert hacheck (so inserts breve)???
5601            
5602             Sorting diacritics by usefulness: we want to apply one of accents from the
5603             given list to a given key (with l layers of 2 shift states). For each accent,
5604             we have 2l possible variants for composition; assign to 2 variants differing
5605             by Shift the minimum penalty of the two. For each layer we get several possible
5606             combinations of different priority; and for each layer, we have a certain number
5607             of slots open. We can redistribute combinations from the primary layer to
5608             secondary one, but not between secondary layers.
5609            
5610             Work with slots one-by-one (so that the assignent is "monotinic" when the number
5611             of slots increases). Let m be the number of layers where slots are present.
5612             Take highest priority combinations; if the number of "extra" combinations
5613             in the primary layer is at least m, distribute the first m of them to
5614             secondary layers. If n
5615             have no their own combinations first, then other n-k layers. More precisely,
5616             if n<=k, use the first n of "free" layers; if n>k, fill all free layers, then
5617             the last n-k of non-free layers.
5618            
5619             Repeat as needed (on each step, at most one slot in each layer appears).
5620            
5621             But we do not need to separate case-differing keys! How to fix?
5622            
5623             All done, but this works only on the current face! To fix, need to pass
5624             to the translator all the face-characters present on the given key simultaneously.
5625            
5626             ===== Accent-key TAB accesses extra bindinges (including NUM->numbered one)
5627             (may be problematic with some applications???
5628             -- so duplicate it on + and @ if they is not occupied
5629             -- there is nothing related to AT in Unicode)
5630            
5631             Diacritics_0218_0b56_0c34= May create such a thing...
5632             (0b56_0c34 invisible to the user).
5633            
5634             Hmm - how to combine penaltized keys with reversion? It looks like
5635             the higher priority bindings would occupy the hottest slots in both
5636             direct and reverse bindings...
5637            
5638             Maybe additional forms Diacrtitics2S_* and Diacrtitics2E_* which fight
5639             for symbols of the same penalty from start and from end (with S winning
5640             on stuff exactly in the middle...). (The E-form would also strip the last |-group.)
5641            
5642             ' Shift-Space (from US face) should access the second level of Russian face.
5643             To avoid infinite cycles, face-switch keys to non-private faces should be
5644             marked in each face...
5645            
5646             "Acute makes sharper" is applicable to () too to get <>-parens...
5647            
5648             Another ways of combining: "OR EQUAL TO", "OR EQUIVALENT TO", "APL FUNCTIONAL
5649             SYMBOL QUAD", "APL FUNCTIONAL SYMBOL *** UNDERBAR", "APL FUNCTIONAL SYMBOL *** DIAERESIS".
5650            
5651             When recognizing symbols for GREEK, treat LUNATE (as NOP). Try adding HEBREW LETTER at start as well...
5652            
5653             Compare with: 8 basic accents: http://en.wikipedia.org/wiki/African_reference_alphabet (English 78)
5654            
5655             When a diacritic on a base letter expands to several variants, use them all
5656             (with penalty according to the flags).
5657            
5658             Problem: acute on acute makes double acute modifier...
5659            
5660             Penalized letter are temporarily completely ignored; need to attach them in the end...
5661             - but not 02dd which should be completely ignore...
5662            
5663             Report characters available on diacritic chains, but not accessible via such chains.
5664             Likewise for characters not accessible at all. Mark certain chains as "Hacks" so that
5665             they are not counted in these lists.
5666            
5667             Long s and "preceded by" are not handled since the table has its own (useless) compatibility decompositions.
5668            
5669             ╒╤╕
5670             ╞╪╡
5671             ╘╧╛
5672             ╓╥╖
5673             ╟╫╢
5674             ╙╨╜
5675             ╔╦╗
5676             ╠╬╣
5677             ╚╩╝
5678             ┌┬┐
5679             ├┼┤
5680             └┴┘
5681             ┎┰┒
5682             ┠╂┨
5683             ┖┸┚
5684             ┍┯┑
5685             ┝┿┥
5686             ┕┷┙
5687             ┏┳┓
5688             ┣╋┫
5689             ┗┻┛
5690             On top of a light-lines grid (3×2, 2×3, 2×2; H, V, V+H):
5691             ┲┱
5692             ╊╉
5693             ┺┹
5694             ┢╈┪
5695             ┡╇┩
5696             ╆╅
5697             ╄╇
5698             ╼†━†╾†╺†╸†╶†─†╴†╌†┄†┈† †╍†┅†┉†
5699             ╼━╾╺╸╶─╴╌┄┈ ╍┅┉
5700            
5701            
5702            
5703            
5704            
5705            
5706            
5707            
5708            
5709             ╎┆┊╏┇┋
5710            
5711             ╲ ╱
5712            
5713             ╭╮
5714             ╰╯
5715             ◤▲◥
5716             ◀■▶
5717             ◣▼◢
5718             ◜△◝
5719             ◁□▷
5720             ◟▽◞
5721             ◕◓◔
5722             ◐○◑
5723            
5724             ▗▄▖
5725             ▐█▌
5726             ▝▀▘
5727             ▛▀▜
5728             ▌ ▐
5729             ▙▄▟
5730            
5731             ░▒▓
5732            
5733            
5734             =head2 Implementation details
5735            
5736             Since the C accessor may have different effects at different moment of
5737             a face C synthesis, here is the order in which C changes:
5738            
5739             ini_layers: essentially, contains what is given in the key “layers” of the face recipe
5740             Later, a version of these layers with exportable keys marked is created as ini_layers_prefix.
5741             ini_filled_layers: adds extra (fake) keys containing control characters and created via-VK-keys
5742             (For these extended layers, the previous version can be inspected via ini_copy1.)
5743             (created when exportable keys are handled.)
5744            
5745             The next modification is done not by modifying the list of names of layers
5746             associated to the face, but by editing the corresponding layers in place.
5747             (The unmodified version of layer, one containing the exportable keys, is
5748             accessible via C.) On this step one adds the missing characters via
5749             from the face specified in the C key.
5750            
5751             =cut
5752            
5753             # '
5754             my (%Globals, $DEBUG);
5755            
5756             sub set__value ($$$) {
5757 0     0 0   my($class, $key) = (shift, shift);
5758 0 0         (ref $class ? $class->{$key} : $Globals{$key}) = shift;
5759             }
5760             sub get__value ($$) {
5761 0     0 0   my($class, $key) = (shift, shift);
5762 0 0 0       if (ref $class and defined(my $v = $class->{$key})) {
5763 0           $v;
5764             } else {
5765 0           $Globals{$key};
5766             }
5767             }
5768             sub set_NamesList ($$;$) {
5769 0     0 0   my $class = shift;
5770 0           set__value($class, 'NamesList', shift);
5771 0           set__value($class, 'AgeList', shift);
5772             }
5773 0     0 0   sub get_NamesList ($) { get__value(shift, 'NamesList') }
5774 0     0 0   sub get_AgeList ($) { get__value(shift, 'AgeList') }
5775            
5776             sub new ($;$) {
5777 0     0 0   my $class = shift;
5778 0 0         die "too many arguments to UI::KeyboardLayout->new" if @_ > 1;
5779 0 0         my $data = @_ ? {%{shift()}} : {};
  0            
5780 0   0       bless $data, (ref $class or $class);
5781             }
5782            
5783             sub put_deep($$$$@) {
5784 0     0 0   my($self, $hash, $v, $k) = (shift, shift, shift, shift);
5785 0 0 0       return $self->put_deep($hash->{$k} ||= {}, $v, @_) if @_;
5786 0           $hash->{$k} = $v;
5787             }
5788            
5789             # Sections [foo/bar] [visual -> foo/bar]; directives foo=bar or @foo=bar,baz
5790             sub parse_configfile ($$) { # Trailing whitespace is ignored, whitespace about "=" is not
5791 0     0 0   my ($self, $s, %v, @KEYS) = (shift, shift);
5792 0           $s =~ s/[^\S\n]+$//gm;
5793 0           $s =~ s/^\x{FEFF}//; # BOM are not stripped by Perl from UTF-8 files with -C31
5794 0           (my $pre, my %f) = split m(^\[((?:visual\s*->\s*)?[\w/]*)\]\s*$ \n?)mx, $s; # //x is needed to avoid $\
5795 0 0         warn "Part before the first section in configfile ignored: `$pre'" if length $pre;
5796 0           for my $k (keys %f) {
5797             # warn "Section `$k'";
5798 0           my($v, $V, @V) = $f{$k};
5799 0 0         if ($k =~ s{^visual\s*->\s*}{[unparsed]/}) { # Make sure that prefixes do not allow visual line to be confused with a config
5800 0           $v =~ s[(^(?!#|[/\@+]?\w+=).*)]//ms; # find non-comment non-assignment
5801 0           @V = "unparsed_data=$1";
5802             }
5803             # warn "xxx: @V";
5804 0           push @KEYS, $k;
5805 0           my @k = split m(/), $k;
5806 0 0         @k = () if "@k" eq ''; # root
5807 0           for my $l ((grep !/^#/, split(/\n/, $v)), @V) {
5808 0 0         die "unrecognized config file line: `$l' in `$s'"
5809             unless my($arr, $at, $slash, $kk, $vv) = ($l =~ m[^((?:(\@)|(/)|\+)?)(\w+)=(.*)]s);
5810 0 0         my $spl = $at ? qr/,/ : ( $slash ? qr[/] : qr[(?!)] );
    0          
5811 0 0         $vv = [ length $vv ? (split $spl, $vv, -1) : $vv ] if $arr; # create empty element if $vv is empty
    0          
5812 0           my $slot = $self->get_deep(\%v, @k);
5813 0 0 0       if ($slot and exists $slot->{$kk}) {
5814 0 0         if ($arr) {
5815 0 0 0       if (ref($slot->{$kk} || 0) eq 'ARRAY') {
5816 0           $vv = [@{$slot->{$kk}}, @$vv];
  0            
5817             } else {
5818 0           warn "Redefinition of non-array entry `$kk' in `$k' by array one, old value ignored"
5819             }
5820             } else {
5821 0           warn "Redefinition of entry `$kk' in `$k', old value ignored"
5822             }
5823             }
5824             # warn "Putting to the root->@k->`$kk'";
5825 0           $self->put_deep(\%v, $vv, @k, $kk);
5826             }
5827             }
5828 0           $v{'[keys]'} = \@KEYS;
5829             # warn "config parsed";
5830 0           \%v
5831             }
5832            
5833             sub process_key_chunk ($$$$$) {
5834 0     0 0   my $self = shift;
5835 0           my $name = shift;
5836 0           my $skip_first = shift;
5837 0           (my $k = shift) =~ s/\p{Blank}(?=\p{NonspacingMark})//g; # Allow combining marks to be on top of SPACE
5838 0           my $sep2 = shift;
5839 0           $k = $self->stringHEX2string($k);
5840 0           my @k = split //, $k;
5841 0 0 0       if (defined $sep2 and 3 <= @k and $k =~ /$sep2/) { # Allow separation by $sep2, but only if too long
      0        
5842 0           @k = split /$sep2/, $k;
5843 0 0 0       shift @k if not length $k[0] and @k == 2;
5844 0 0 0       warn "Zero length expansion in the key slot <$k>\n" if not @k or grep !length, @k;
5845             }
5846 0 0 0       undef $k[0] if ($k[0] || '') eq "\0" and $skip_first;
      0        
5847 0 0 0       push @k, ucfirst $k[0] if @k == 1 and defined $k[0] and 1==length $k[0] and $k[0] ne ucfirst $k[0];
      0        
      0        
5848 0 0         $name = "VisLr=$name" if $name;
5849             # warn "Multi-char key in <<@k>>" if grep $_ && 1
5850 0 0         warn "More that 2 Shift-states in <<@k>>" if @k > 2;
5851             #warn "Sep2 in $name, $skip_first, <$k> ==> <@k>\n" if defined $sep2 and $k =~ /$sep2/;
5852 0 0         map {defined() ? [$_, undef, undef, $name] : $_} @k;
  0            
5853             # @k
5854             } # -> list of chars
5855            
5856             sub process_key ($$$$$$;$) { # $sep may appear only in a beginning of the first key chunk
5857 0     0 0   my ($self, $k, $limit, $sep, $ln, $l_off, $sep2, @tr) = (shift, shift, shift, shift, shift, shift, shift);
5858 0           my @k = split m((?!^)\Q$sep), $k;
5859 0 0         die "Key descriptor `$k' separated by `$sep' has too many parts: expected $limit, got ", scalar @k
5860             if @k > $limit;
5861 0   0       defined $k[$_] and $k[$_] =~ s/^--(?=.)/\0/ and $tr[$_]++ for 0..$#k;
      0        
5862 0 0         $k[0] = '' if $k[0] eq '--'; # Allow a filler (multi)-chunk
5863 0 0         map [$self->process_key_chunk( $ln->[$l_off+$_], $tr[$_], (defined($k[$_]) ? $k[$_] : ''), $sep2)], 0..$#k;
5864             } # -> list of arrays of chars
5865            
5866             sub decode_kbd_layers ($@) {
5867 0     0 0   my ($self, $lineN, $row, $line_in_row, $cur_layer, @out, $N, $l0) = (shift, 0, -1);
5868 0           my %needed = qw(unparsed_data x visual_rowcount 2 visual_per_row_counts [2;2] visual_prefixes * prefix_repeat 3 in_key_separator / layer_names ???);
5869 0           my %extra = (qw(keyline_offsets 1 in_key_separator2), undef);
5870 0           my $opt;
5871 0           for my $k (keys %needed, keys %extra) {
5872 0 0         my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5873 0 0 0       die "option `$k' not specified" unless $from or exists $extra{$k};
5874 0           $opt->{$k} = $from->{$k};
5875             }
5876             die "option `visual_rowcount' differs from length of `visual_per_row_counts': $opt->{visual_rowcount} vs. ",
5877 0 0         scalar @{$opt->{visual_per_row_counts}} unless $opt->{visual_rowcount} == @{$opt->{visual_per_row_counts}};
  0            
  0            
5878 0           my @lines = grep !/^#/, split /\s*\n/, $opt->{unparsed_data};
5879 0           my ($C, $lc, $pref) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes);
5880 0 0         die "Number of uncommented rows (" . scalar @lines . ") in a visual template not divisible by the rowcount $C: `$opt->{unparsed_data}'"
5881             if @lines % $C;
5882 0 0         $pref = [map {$_ eq ' ' ? qr/\s/ : qr/\Q$_/ } split(//, $pref), (' ') x $C];
  0            
5883             # my $line_in_row = [];
5884 0           my @counts;
5885             my $sep2;
5886 0 0         $sep2 = qr/$opt->{in_key_separator2}/ if defined $opt->{in_key_separator2};
5887 0           while (@lines) {
5888             # push @out, $line_in_row = [] unless $C % $c;
5889 0 0         $row++, $line_in_row = $cur_layer = 0 unless $lineN % $C;
5890 0           $lineN++;
5891 0           my $l1 = shift @lines;
5892 0           my $PREF = qr/(?:$pref->[$line_in_row]){$opt->{prefix_repeat}}/;
5893 0 0         $PREF = '\s' if $pref->[$line_in_row] eq qr/\s/;
5894 0 0         $l1 =~ s/\s*\x{202c}$// if $l1 =~ s/^[\x{202d}\x{202e}]//; # remove PDF if removed LRO, RLO
5895 0 0         die "line $lineN in visual layers has unexpected prefix:\n\tPREF=/$PREF/\n\tLINE=`$l1'" unless $l1 =~ s/^$PREF\s*(?<=\s)//;
5896 0           my @k1 = split /\s+(?!\p{NonspacingMark})/, $l1;
5897 0 0         $l0 = $l1, $N = @k1 if $line_in_row == 0;
5898             # warn "Got keys: ", scalar @k1;
5899 0 0         die sprintf "number of keys in lines differ: %s vs %s in:\n\t`%s'\n\t`%s'\n\t<%s>",
5900             scalar @k1, $N, $l0, $l1, join(">\t<", @k1) unless @k1 == $N; # One can always fill by --
5901 0           for my $key (@k1) {
5902 0           my @kk = $self->process_key($key, $lc->[$line_in_row], $opt->{in_key_separator}, $opt->{layer_names}, $cur_layer, $sep2);
5903 0           push @{$out[$cur_layer + $_]}, $kk[$_] || [] # (defined $kk[$_] ? [$kk[$_],undef,undef,$opt->{layer_names}[$cur_layer + $_]] : [])
5904 0   0       for 0..($lc->[$line_in_row]-1);
5905             }
5906 0           $cur_layer += $lc->[$line_in_row++];
5907 0 0         push @counts, scalar @k1 if 1 == $lineN % $C;
5908             }
5909             # warn "layer[0] = ", join ', ', map "@$_", @{$out[0]};
5910 0           die "Got ", scalar @out, " layers, but ", scalar @{$opt->{layer_names}}, " layer names"
5911 0 0         unless @out == @{$opt->{layer_names}};
  0            
5912 0           my(%seen, %out);
5913 0   0       $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0            
5914 0           @out{ @{$opt->{layer_names}} } = @out;
  0            
5915 0           \%out, \@counts, $opt->{keyline_offsets};
5916             }
5917            
5918             sub decode_rect_layers ($@) {
5919 0     0 0   my ($self, $cnt, %extra, $opt, @out) = (shift, 0, qw(empty N/A));
5920 0           my %needed = qw(unparsed_data x rect_rows_cols [4;4] rect_horizontal_counts [2;2] layer_names ??? COLgap 0 ROWgap 0);
5921 0           for my $k (keys %needed, keys %extra) {
5922 0 0         my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5923 0 0 0       die "option `$k' not specified" unless $from or exists $extra{$k};
5924 0           $opt->{$k} = $from->{$k};
5925             }
5926 0           $cnt += $_ for @{ $opt->{rect_horizontal_counts} };
  0            
5927             die "total of option `rect_horizontal_counts' differs from count of `layer_names': $cnt vs. ",
5928 0 0         scalar @{$opt->{layer_names}} unless $cnt == @{$opt->{layer_names}};
  0            
  0            
5929 0           $cnt = @{ $opt->{rect_horizontal_counts} };
  0            
5930 0           (my $D = $opt->{unparsed_data}) =~ s/^(#.*\n)+//;
5931 0           $D =~ s/^(#.*(\n|\z))+\z//m;
5932 0           my @lines = split /\s*\n/, $D;
5933 0           my ($C, $lc, $pref, $c0, $r0) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes COLgap ROWgap);
5934             die "Number of uncommented rows (" . scalar @lines . ") in a visual rect template not matching rows(rect_rows_cols) x cnt(rect_horizontal_counts) = $opt->{rect_rows_cols}[0] x $cnt: `$opt->{unparsed_data}'"
5935 0 0         if @lines != $cnt * $opt->{rect_rows_cols}[0] + ($cnt-1)*$r0;
5936 0           my $c = 0;
5937 0           while (@lines) {
5938 0           die "Too many rect vertically: expect only ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
5939 0 0         if $c >= @{ $opt->{rect_horizontal_counts} };
  0            
5940 0           my @L = splice @lines, 0, $opt->{rect_rows_cols}[0];
5941 0           my ($cR, $L) = 0;
5942 0           while (++$cR <= $r0) { # Inter-row gap
5943 0 0         last unless @lines;
5944 0 0         ($L = shift @lines) =~ /^#/ or die "Line expected to be inter-row comment line No. $cR: <<<$L>>>"
5945             }
5946 0           my $l = length $L[0];
5947 0   0       $l == length or die "Lengths of lines encoding rect do not match: expect $l, got `" . join("\n",'',@L,'') . "'" for @L[1..$#L];
5948             $l == $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] + ($opt->{rect_horizontal_counts}[$c] - 1)*$c0
5949             or die "Wrong line length in rect: expect $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] gaps=$c0, got $l in `"
5950 0   0       . join("\n",'',@L,'') . "'" for @L[1..$#L];
5951 0           while (length $L[0]) {
5952 0           my @c;
5953 0           push @c, split //, substr $_, 0, $opt->{rect_rows_cols}[1], '' for @L;
5954 0   0       $_ eq $opt->{empty} and $_ = undef for @c;
5955 0           push @out, [map [$_], @c];
5956 0 0 0       next unless $c0 and length $L[0]; # Inter-col gap
5957 0           for my $i (0..$#L) {
5958 0 0         next unless (my $gap = substr $L[$i], 0, $c0, '') =~ /\S/;
5959 0           die "Inter-column gap not whitespace: line No. $i (0-based), gap No. $#out: <<<$gap>>>"
5960             }
5961             }
5962 0           $c++;
5963             }
5964 0           die "Too few vertical rect: got $c, expect ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
5965 0 0         if $c != @{ $opt->{rect_horizontal_counts} };
  0            
5966 0           my(%seen, %out);
5967 0   0       $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0            
5968 0           @out{ @{$opt->{layer_names}} } = @out;
  0            
5969 0           for my $i ( 0 .. ($#{ $opt->{layer_names} } - 1) ) {
  0            
5970 0           my($base,$shift) = ($out[$i], $out[$i+1]);
5971 0   0       $out{$opt->{layer_names}[$i] . '²'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
5972             }
5973 0           \%out, [($opt->{rect_rows_cols}[1]) x $opt->{rect_rows_cols}[0]];
5974             }
5975            
5976             sub get_deep ($$@) {
5977 0     0 0   my($self, $h) = (shift, shift);
5978 0 0         return $h unless @_;
5979 0           my $k = shift @_;
5980 0 0         return unless exists $h->{$k};
5981 0           $self->get_deep($h->{$k}, @_);
5982             }
5983            
5984             sub get_deep_via_parents ($$$@) { # quadratic algorithm
5985 0     0 0   my($self, $h, $idx, $IDX) = (shift, shift, shift);
5986             #warn "Deep: `@_'";
5987 0 0         ((defined $h) ? return $h : return) unless @_;
    0          
5988 0           my $k = pop @_;
5989             {
5990             #warn "Deep::: `@_'";
5991 0           my $H = $self->get_deep($h, @_);
  0            
5992             (@_ or return), $IDX++, # Start extraction from array
5993 0 0 0       pop, redo unless exists $H->{$k};
5994 0           my $v = $H->{$k};
5995             #warn "Deep -> `$v'";
5996 0 0 0       return $v unless ref($v || 1) and $IDX and defined $idx;
      0        
      0        
5997 0           return $v->[$idx];
5998             }
5999 0           return;
6000             }
6001            
6002             sub fill_kbd_layers ($$) { # We do not do deep processing here...
6003 0     0 0   my($self, $h, %o, %c, %O) = (shift, shift);
6004 0           my @K = grep m(^\[unparsed]/(KBD|RECT)\b), @{$h->{'[keys]'}};
  0            
6005             # my $H = $h->{'[unparsed]'};
6006 0           for my $k (@K) {
6007 0           my (@parts, @h) = split m(/), $k;
6008 0   0       ref $self and push @h, $self->get_deep($self, @parts[1..$_]) || {} for 0..$#parts;
      0        
6009 0   0       push @h, $self->get_deep($h, @parts[1..$_]) || {} for 0..$#parts; # Drop [unparsed]/ prefix...
6010 0   0       push @h, $self->get_deep($h, @parts[0..$_]) || {} for -1..$#parts;
6011 0 0         my ($in, $counts, $offsets) = ($k =~ m(^\[unparsed]/KBD\b) ? $self->decode_kbd_layers( reverse @h )
6012             : $self->decode_rect_layers( reverse @h ) );
6013 0   0       exists $o{$_} and die "Visual spec `$k' overwrites exiting layer `$k'" for keys %$in;
6014 0           my $cnt = (@o{keys %$in} = values %$in);
6015 0           @c{keys %$in} = ($counts) x $cnt;
6016 0 0         @O{keys %$in} = ($offsets) x $cnt if $offsets;
6017             }
6018 0           \%o, \%c, \%O
6019             }
6020            
6021             sub key2hex ($$;$) {
6022 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6023 0 0 0       return -1 if $ignore and not defined $k;
6024 0           return sprintf '%04x', ord $k; # if ord $k <= 0xFFFF;
6025             # sprintf '%06x', ord $k;
6026             }
6027            
6028             sub keyORarray2hex ($$;$) {
6029 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6030 0 0 0       return -1 if $ignore and not defined $k;
6031 0 0 0       $k = $k->[0] if $k and ref $k;
6032 0           $self->key2hex($k, $ignore);
6033             }
6034            
6035             sub keys2hex ($$;$) {
6036 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6037 0 0 0       return -1 if $ignore and not defined $k;
6038 0           return join '.', map {sprintf '%04x', ord} split //, $k; # if ord $k <= 0xFFFF;
  0            
6039             # sprintf '%06x', ord $k;
6040             }
6041            
6042             sub coverage_hex_sub($$$) { # Unfinished!!! XXXX UNUSED
6043 0     0 0   my ($self, $layer, $to) = (shift, shift, shift);
6044             ++$to->{ $self->key2hex($_->[0], 'undef_ok') }, ++$to->{ $self->key2hex($_->[1], 'undef_ok') }
6045 0           for @{$self->{layers}{$layer}};
  0            
6046             }
6047            
6048             # my %MANUAL_MAP = qw( 0020 0020 00a0 00a0 2007 2007 ); # We insert entry for SPACE manually
6049             # my %MANUAL_MAP_ch = map chr hex, %MANUAL_MAP;
6050            
6051             sub coverage_hex($$) {
6052 0     0 0   my ($self, $face) = (shift, shift);
6053 0           my $layers = $self->{faces}{$face}{layers};
6054 0   0       my $to = ($self->{faces}{$face}{'[coverage_hex]'} ||= {}); # or die "Panic!"; # Synthetic faces may not have this...
6055 0           my @Layers = map $self->{layers}{$_}, @$layers;
6056 0           for my $sub (@Layers) {
6057 0           ++$to->{ $self->keyORarray2hex($_, 'undef_ok') } for map +(@$_[0,1]), @$sub;
6058             }
6059             }
6060            
6061             sub deep_copy($$) {
6062 0     0 0   my ($self, $o) = (shift, shift);
6063 0 0         return $o unless ref $o;
6064 0 0         return [map $self->deep_copy($_), @$o] if "$o" =~ /^ARRAY\(/; # We should not have overloaded elements
6065 0 0         return {map $self->deep_copy($_), %$o} if "$o" =~ /^HASH\(/;
6066             }
6067             sub DEEP_COPY($@) {
6068 0     0 0   my ($self) = (shift);
6069 0           map $self->deep_copy($_), @_;
6070             }
6071            
6072             sub deep_undef_by_hash($$@) {
6073 0     0 0   my ($self, $h) = (shift, shift);
6074 0           for (@_) {
6075 0 0         next unless defined;
6076 0 0         if (ref $_) {
    0          
6077 0 0         die "a reference not an ARRAY in deep_undef_by_hash()" unless 'ARRAY' eq ref $_;
6078 0           $self->deep_undef_by_hash($h, @$_);
6079             } elsif ($h->{$_}) {
6080 0           undef $_
6081             }
6082             }
6083             }
6084            
6085             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6086             sub pre_link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first face; assign in the direction $hh ---> $HH
6087 0     0 0   my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6088 0           my ($hn,$Hn, %seen_deobsc) = map $self->{faces}{$_}{layers}, $hh, $HH;
6089             #warn "Link $hh --> $HH;\t(@$hn) -> (@$Hn)" if "$hh $HH" =~ /00a9/i;
6090 0 0         die "Can't link sets of layers `$hh' `$HH' of different sizes: ", scalar @$hn, " != ", scalar @$Hn if @$hn != @$Hn;
6091            
6092 0           my $already_linked = $self->{faces}{$hh}{'[linked]'}{$HH}++;
6093 0           $self->{faces}{$HH}{'[linked]'}{$hh}++;
6094 0           for my $L (@$Hn) {
6095 0 0         next if $skipfix;
6096             die "Layer `$L' of face `$HH' is being relinked via `$HH' -> `$hh'???"
6097 0 0         if $self->{layers}{'[ini_copy]'}{$L};
6098             #warn "ini_copy: `$L'";
6099 0           $self->{layers}{'[ini_copy]'}{$L} = $self->deep_copy($self->{layers}{$L});
6100             }
6101 0           for my $K (0..$#{$self->{layers}{$hn->[0]}}) { # key number
  0            
6102             #warn "One key data, FROM: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6103 0           my @h = map $self->{layers}{$_}[$K], @$hn; # arrays of [lowercase,uppercase]
6104             #warn "One key data, TO: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6105 0           my @H = map $self->{layers}{$_}[$K], @$Hn;
6106 0 0 0       my @p = map [map {$_ and ref and $_->[2]} @$_], @h; # Prefix
  0            
6107 0 0 0       my @c = map [map {($_ and ref) ? $_->[0] : $_} @$_], @h; # deep copy, remove extra info
  0            
6108 0 0 0       my @C = map [map {($_ and ref) ? $_->[0] : $_} @$_], @H;
  0            
6109             # Find which of keys on $H[0] obscure symbol keys from $h[0]
6110 0 0 0       my @symb0 = grep {$p[0][$_] or ($c[0][$_] || '') =~ /[\W_]/} 0, 1; # not(wordchar but not _): prefix/symbols on $h[0]
  0            
6111             defined $H[0][$_] or not defined $C[0][$_] or $skipwarn
6112             or warn "Symbol char `$c[0][$_]' not copied to the second face while the slot is empty"
6113 0   0       for @symb0;
      0        
      0        
6114 0 0         my @obsc = grep { defined $C[0][$_] and $c[0][$_] ne $C[0][$_]} @symb0; # undefined positions will be copied later
  0            
6115             #warn "K=$K,\tobs=@obsc;\tsymb0=@symb0";
6116             # If @obsc == 1, put on non-shifted location; may overwrite only ?-binding if it exists
6117             #return unless @obsc;
6118 0           my %map;
6119 0           my @free_first = ((grep {not defined $C[1][$_]} 0, 1), grep defined $C[1][$_], 0, 1);
  0            
6120 0 0 0       @free_first = (1,0) if 1 == ($obsc[0] || 0) and $free_first[0] = 0 and not defined $C[1][1]; # un-Shift ONLY if needed
      0        
      0        
6121 0 0         @map{@obsc} = @free_first[0 .. $#obsc] unless $skipfix;
6122             # %map = map +($_, $free_first[$map{$_}]), keys %map;
6123 0           for my $k (keys %map) {
6124 0 0 0       if ($skipfix) {
    0          
6125 0 0         my $s = $k ? ' (shifted)' : '';
6126             warn "Key `$C[0][$k]'$s in layer $Hn->[0] does not match symbol $c[0][$k] in layer $hn->[0], and skipfix is requested...\n"
6127 0 0 0       unless ref($skipwarn || '') ? $skipwarn->{$c[0][$k]} : $skipwarn;
    0          
6128             } elsif (defined $C[1][$map{$k}] and $p[0][$k]) {
6129 0           warn "Prefix `$c[0][$k]' in layer $hn->[0] obscured on a key with `$C[1][$map{$k}]' in layer=1: $Hn->[0]"
6130             } else {
6131 0 0         if (defined $C[1][$map{$k}]) {
6132 0 0         next if $seen_deobsc{$c[0][$k]}; # See ъЪ + palochkas obscuring \| on the secondary \|-key in RussianPhonetic
6133             # So far, the only "obscuring" with useful de-obscuring is when the obscuring symbol is a letter
6134 0 0 0       die "existing secondary AltGr-binding `$C[1][$map{$k}]' blocks de-obscuring `$c[0][$k]';\n symbols to de-obscure are at positions [@symb0] in [@{$c[0]}]"
  0            
6135             unless ($C[0][$k] || '.') =~ /[\W\d_]/;
6136             next
6137 0           }
6138 0           $H[1][$map{$k}] = $h[0][$k]; # !!!! Modify in place
6139 0           $seen_deobsc{$c[0][$k]}++;
6140             }
6141             }
6142             # Inherit keys from $h
6143 0 0         for my $L (0..($skipfix? -1 : $#H)) {
6144 0           for my $shift (0,1) {
6145 0 0         next if defined $H[$L][$shift];
6146 0           $H[$L][$shift] = $h[$L][$shift];
6147             }
6148             }
6149 0 0         next if $already_linked;
6150 0           for my $i (0..@$hn) { # layer type
6151 0           for my $j (0,1) { # case
6152             #??? ++$seen_hex[$_]{ key2hex(($_ ? $key2 : $key1)->[$i][$j], 'undef') } for 0,1;
6153 0 0 0       push @{$self->{faces}{$hh}{need_extra_keys_to_access}{$HH}}, $H[$i][$j] if defined $C[$i][$j] and not defined $h[$i][$j];
  0            
6154 0 0 0       push @{$self->{faces}{$HH}{need_extra_keys_to_access}{$hh}}, $h[$i][$j] if defined $c[$i][$j] and not defined $H[$i][$j];
  0            
6155            
6156             }
6157             }
6158             }
6159             }
6160            
6161             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6162             sub link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first keyboard
6163 0     0 0   my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6164 0           $self->pre_link_layers ($hh, $HH, $skipfix, $skipwarn);
6165             #warn "Linking with FIX: $hh, $HH" unless $skipfix;
6166             # We expect that $hh is base-face, and $HH is a satellite.
6167 0           $self->face_make_backlinks($HH, $self->{faces}{$HH}{'[char2key_prefer_first]'}, $self->{faces}{$HH}{'[char2key_prefer_last]'}, $skipfix, 'skipwarn');
6168             # To insert Flip_AltGr_Key into a face, we need to know where it is on the base face, and put it into the corresponding
6169             # slot of the satellite face. After face_make_backlinks(), we can find it in the base face.
6170             # Moreover, we must do it BEFORE calling faces_link_via_backlinks().
6171 0 0         if (defined (my $flip = $self->{faces}{$hh}{'[Flip_AltGr_Key]'})) {{
6172 0 0         defined ( my $flipped = $self->{faces}{$HH}{'[invAltGr_Accessor]'} ) or last;
  0            
6173             # warn "adding AltGr-inv for $hh, accessor=", $self->key2hex($flipped);
6174 0           $flip = $self->charhex2key($flip);
6175             # warn "face_back on $hh: ", join ' ', keys %{$self->{face_back}{$hh} || {}};
6176 0 0         if (my $where = $self->{face_back}{$hh}{$flip}) {
6177 0           my($l, $k, $shift) = @{ $where->[0] };
  0            
6178             # warn "Hex face_back l=$l, k=$k, shift-$shift on $hh";
6179 0           my($L, $expl, $dead) = ($self->{faces}{$HH}{layers}, '???');
6180 0           $L = $self->{layers}{$L->[$l]};
6181 0           my $C = my $c = $L->[$k][$shift];
6182 0 0 0       $c = $c->[0], $dead = $C->[2], $expl = $C->[3] || '???' if $c and ref $c;
      0        
6183 0   0       my $DEAD = $dead || '';
6184 0 0 0       warn "adding Flip_AltGr => <<$flipped>> to $hh\'s satellite $HH: already occuplied by <<<$c>>> (via $expl), dead=$DEAD"
      0        
6185             if defined $c and ($c ne $flipped or not $dead);
6186 0           $L->[$k][$shift] = [$flipped, undef, 1, 'Prefix for AltGr inversion'];
6187 0           delete $self->{faces}{$hh}{'Face_link_map'}{$HH}; # Reuse old copy
6188             # warn "Added to $HH; k=$k\[$l, $shift]";
6189             } else {
6190 0           warn "failed: adding AltGr-inv for $hh, flip=$flip, accessor=", $self->key2hex($flipped);
6191             }
6192             }}
6193 0           $self->face_make_backlinks($hh, $self->{faces}{$hh}{'[char2key_prefer_first]'}, $self->{faces}{$hh}{'[char2key_prefer_last]'}, 'skip');
6194 0           $self->faces_link_via_backlinks($hh, $HH);
6195             # $self->faces_link_via_backlinks($HH, $hh);
6196             }
6197            
6198             sub face_make_backlinks($$$$;$$) { # It is crucial to proceed layers in
6199             # parallel: otherwise the semantic of char2key_prefer_first suffers
6200 0   0 0 0   my ($self, $F, $prefer_first, $prefer_last, $skipfix, $skipwarn) = (shift, shift, shift || {}, shift || {}, shift, shift);
      0        
6201             #warn "Making backlinks for `$F'";
6202 0           my $LL = $self->{faces}{$F}{layers};
6203 0 0         if ($self->{face_back}{$F}) { # reuse old copy
6204 0 0         return if $skipfix; # reuse old copy
6205 0           die "An obsolete copy of `$F' is cashed";
6206             }
6207 0   0       my $seen = ($self->{face_back}{$F} ||= {}); # maps char to array of possitions it appears in, each [key, shift]
6208             # Since prefer_first should better operate in terms of keys, not layers; so the loop in $k should be the external one
6209 0           my $last = $#{ $self->{layers}{$LL->[0]} };
  0            
6210 0           my %warn;
6211 0           for my $k (0..$last) {
6212 0           for my $Lc (0..$#$LL) {
6213 0           my $L = $LL->[$Lc];
6214             # $self->layer_make_backlinks($_, $prefer_first) for @$L;
6215 0           my $a = $self->{layers}{$L};
6216 0 0         unless ($#$a == $last) { # Detect typos if we can (i.e., if no overflow into special ranges)
6217 0           my $fst = 1e100; # infinity
6218 0   0       $fst > $_->[0] and $fst = $_->[0] for values %start_SEC;
6219 0 0 0       die "Layer `$L' has lastchar $#$a, expected $last" unless $last >= $fst or $#$a >= $fst;
6220             }
6221             ##########
6222 0           for my $shift (0..$#{$a->[$k]}) {
  0            
6223 0 0         next unless defined (my $c = $a->[$k][$shift]);
6224 0 0         $c = $c->[0] if 'ARRAY' eq ref $c; # Treat prefix keys as usual chars
6225 0 0         if ($prefer_first->{$c}) {
6226             #warn "Layer `$L' char `$c': prefer first";
6227 0 0 0       @{ $seen->{$c} } = reverse @{ $seen->{$c} } if $seen->{$c} and $prefer_last->{$c}; # prefer 2nd of 3 (2nd from the end)
  0            
  0            
6228 0           push @{ $seen->{$c} }, [$Lc, $k, $shift];
  0            
6229             } else {
6230 0 0 0       $warn{$c}++ if @{ $seen->{$c} || [] } and not $prefer_last->{$c} and $c ne ' '; # XXXX Special-case ' ' ????
  0 0 0        
6231 0           unshift @{ $seen->{$c} }, [$Lc, $k, $shift];
  0            
6232             }
6233             }
6234             }
6235             }
6236 0 0 0       warn "The following chars appear several times in face `$F', but are not clarified\n\t (by `char2key_prefer_first', `char2key_prefer_last'):\n\t<",
6237             join('> <', sort keys %warn), '>' if %warn and not $skipwarn;
6238             }
6239            
6240             sub flip_layer_N ($$$) { # Increases layer number if number of layers is >2 (good for order Plain/AltGr/S-Ctrl)
6241 0     0 0   my ($self, $N, $max) = (shift, shift, shift);
6242 0 0         return 0 if $N == $max;
6243 0           $N + 1
6244             }
6245            
6246             sub faces_link_via_backlinks($$$;$) { # It is crucial to proceed layers in
6247             # parallel: otherwise the semantic of char2key_prefer_first suffers
6248 0     0 0   my ($self, $F1, $F2, $no_inic) = (shift, shift, shift, shift);
6249 0 0         return if $self->{faces}{$F1}{'Face_link_map'}{$F2}; # Reuse old copy
6250             #warn "Making links for `$F1' -> `$F2'";
6251 0 0         my $seen = $self->{face_back}{$F1} or die "Panic!"; # maps char to array of possitions it appears in, each [layer, key, shift]
6252 0           my $LL = $self->{faces}{$F2}{layers};
6253             #!$no_inic and $self->{layers}{'[ini_copy1]'}{$_} and warn "ini_copy1 of `$_' exists" for @$LL;
6254             #!$no_inic and $self->{layers}{'[ini_copy]'}{$_} and warn "ini_copy of `$_' exists" for @$LL;
6255 0   0       my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
6256 0 0         @LL = map $self->{layers}{$_}, @$LL if $no_inic;
6257 0           my($maxL, %r, %altR) = $#LL;
6258             # XXXX Must use $self->{layers}{'[ini_copy]'}{$L} for the target
6259 0           for my $c (sort keys %$seen) {
6260 0           my $arr = $seen->{$c};
6261 0 0         warn "Empty back-mapping array for `$c' in face `$F1'" unless @$arr;
6262             # if (@$arr > 1) {
6263             # }
6264             my ($to) = grep defined, (map {
6265             #warn "Check `$c': <@$_> ==> <", (defined $LL[$_->[0]][$_->[1]][$_->[2]] ? $LL[$_->[0]][$_->[1]][$_->[2]] : 'undef'), '>';
6266 0           $LL[$_->[0]][$_->[1]][$_->[2]]
  0            
6267             } @$arr);
6268 0           my ($To) = grep defined, (map { $LL[$self->flip_layer_N($_->[0], $maxL)][$_->[1]][$_->[2]] } @$arr);
  0            
6269 0           $r{$c} = $to; # Keep prefix keys as array refs
6270 0           $altR{$c} = $To; # Ditto
6271             }
6272 0           $self->{faces}{$F1}{'Face_link_map'}{$F2} = \%r;
6273 0           $self->{faces}{$F1}{'Face_link_map_INV'}{$F2} = \%altR;
6274             }
6275            
6276             sub charhex2key ($$) {
6277 0     0 0   my ($self, $c) = (shift, shift);
6278 0 0         return chr hex $c if $c =~ /^[0-9a-f]{4,}$/i;
6279 0           $c
6280             }
6281            
6282             sub __manyHEX($$) { # for internal use only
6283 0     0     my ($self, $s) = (shift, shift);
6284 0           $s =~ s/\.?(\b[0-9a-f]{4,}\b)\.?/ chr hex $1 /ieg;
  0            
6285 0           $s
6286             }
6287            
6288             sub stringHEX2string ($$) { # One may surround HEX by ".", but only if needed. If not needed, "." is preserved...
6289 0     0 0   my ($self, $s) = (shift, shift);
6290 0           $s =~ s/(?:\b\.)?((?:\b[0-9a-f]{4,}\b(?:\.\b)?)+)/ $self->__manyHEX("$1") /ieg;
  0            
6291 0           $s
6292             }
6293            
6294             sub layer_recipe ($$) {
6295 0     0 0   my ($self, $l) = (shift, shift);
6296 0 0         return unless exists $self->{layer_recipes}{$l};
6297 0           $self->recipe2str($self->{layer_recipes}{$l})
6298             }
6299            
6300             sub massage_faces ($) {
6301 0     0 0   my $self = shift;
6302             # warn "Massaging faces...";
6303 0           for my $f (keys %{$self->{faces}}) { # Needed for (pre_)link_layers...
  0            
6304 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6305             #warn "Massaging face `$f'...";
6306 0           for my $key ( qw( Flip_AltGr_Key Diacritic_if_undef DeadChar_DefaultTranslation DeadChar_32bitTranslation extra_report_DeadChar
6307             PrefixChains ctrl_after_modcol create_alpha_ctrl keep_missing_ctrl output_layers layers_modifiers
6308             layers_mods_keys mods_keys_KBD AltGrInv_AltGr_as_Ctrl
6309             ComposeKey_Show AltGr_Invert_Show Apple_Override Apple_Duplicate Apple_HexInput
6310             ComposeKey Explicit_AltGr_Invert Auto_Diacritic_Start CapsLOCKoverride) ) {
6311 0           $self->{faces}{$f}{"[$key]"} = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), $key);
6312             }
6313             $self->{faces}{$f}{'[char2key_prefer_first]'}{$_}++ # Make a hash
6314 0 0         for @{ $self->{faces}{$f}{char2key_prefer_first} || [] } ;
  0            
6315             $self->{faces}{$f}{'[char2key_prefer_last]'}{$_}++ # Make a hash
6316 0 0         for @{ $self->{faces}{$f}{char2key_prefer_last} || [] } ;
  0            
6317 0 0         $self->{faces}{$f}{'[AltGrInv_AltGr_as_Ctrl]'} = 1 unless defined $self->{faces}{$f}{'[AltGrInv_AltGr_as_Ctrl]'};
6318            
6319 0           my $idx = $self->get_deep($self, 'faces', (split m(/), $f), 'MetaData_Index');
6320             # defined $self->{faces}{$f}{"[$_]"} and not ref $self->{faces}{$f}{"[$_]"}
6321             # or
6322             $self->{faces}{$f}{"[$_]"} = $self->get_deep_via_parents($self, $idx, 'faces', (split m(/), $f), $_)
6323 0           for qw(LRM_RLM ALTGR SHIFTLOCK);
6324            
6325 0           my %R = qw(ComposeKey_Show ⎄ AltGr_Invert_Show ⤨); # On Apple only
6326 0   0       defined $self->{faces}{$f}{"[$_]"} or $self->{faces}{$f}{"[$_]"} = $R{$_} for keys %R;
6327             $self->{faces}{$f}{"[ComposeKey_Show]"}[0] = '⎄' # Make a safe default
6328 0 0 0       if ref $self->{faces}{$f}{"[ComposeKey_Show]"} and not length $self->{faces}{$f}{"[ComposeKey_Show]"}[0];
6329            
6330 0           my ($compK, %compK) = $self->{faces}{$f}{'[ComposeKey]'};
6331 0 0 0       if ($compK and ref $compK) {
    0          
6332 0           for my $cK (@$compK) {
6333 0           my @kkk = split /,/, $cK;
6334 0 0 0       $compK{ $self->key2hex($self->charhex2key($kkk[3])) }++ if defined $kkk[3] and length $kkk[3];
6335             }
6336             } elsif (defined $compK) {
6337 0           $compK{ $self->key2hex($self->charhex2key($compK)) }++;
6338             }
6339 0           $self->{faces}{$f}{'[ComposeKeys]'} = \%compK;
6340            
6341 0 0         unless ($self->{faces}{$f}{layers}) {
6342 0 0         next unless $self->{face_recipes}{$f};
6343 0           $self->face_by_face_recipe($f, $f);
6344             }
6345 0 0         for my $ln ( 0..$#{$self->{faces}{$f}{layers} || []} ) {
  0            
6346 0           my $ll = my $l = $self->{faces}{$f}{layers}[$ln];
6347 0 0         next if $self->{layers}{$l}; # Else, auto-vivify
6348             #warn "Creating layer `$l' for face `$f'...";
6349 0           my @r = $self->layer_recipe($l);
6350 0 0         $ll = $r[0] if @r;
6351 0           warn "Massaging: Using layout_recipe `$ll' for layer '$l'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$l};
6352 0           $ll = $self->make_translated_layers($ll, $f, [$ln], '0000');
6353             #warn "... Result `@$ll' --> $self->{layers}{$ll->[0]}";
6354 0 0         $self->{layers}{$l} = $self->{layers}{$ll->[0]} unless $self->{layers}{$l}; # Could autovivify in between???
6355             }
6356 0           (my ($seen, $seen_dead), $self->{faces}{$f}{'[dead_in_VK]'}) = $self->massage_VK($f);
6357 0           $self->{faces}{$f}{'[dead_in_VK_array]'} = $seen_dead;
6358 0           $self->{faces}{$f}{'[coverage_hex]'}{$self->key2hex($_)}++ for @$seen;
6359 0 0         for my $S (@{ $self->{faces}{$f}{AltGrCharSubstitutions} || []}) {
  0            
6360 0           my $s = $self->stringHEX2string($S);
6361 0           $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
6362 0 0         die "Expect 2 chars in AltGr-char substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
6363 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s[0]} }, [$s[1], 'manual'];
  0            
6364 0 0 0       push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{lc $s[0]} }, [lc $s[1], 'manual']
  0            
6365             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
6366 0 0 0       push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{uc $s[0]} }, [uc $s[1], 'manual']
  0            
6367             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
6368             }
6369 0 0         s/^\s+//, s/\s+$//, $_ = $self->stringHEX2string($_) for @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0            
6370 0 0         my %h = @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0            
6371 0 0         $self->{faces}{$f}{'[imported2key]'} = \%h if %h;
6372 0           my ($l0, $c);
6373 0 0         unless ($c = $self->{layer_counts}{$l0 = $self->{faces}{$f}{layers}[0]}) {
6374 0           $l0 = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'geometry_via_layer');
6375 0 0         $c = $self->{layer_counts}{$l0} if defined $l0;
6376             }
6377 0 0         my $o = $self->{layer_offsets}{$l0} if defined $l0;
6378 0 0         $self->{faces}{$f}{'[geometry]'} = $c if $c;
6379 0 0         $self->{faces}{$f}{'[g_offsets]'} = $o if $o;
6380             }
6381 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6382 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6383 0 0         for my $F (@{ $self->{faces}{$f}{AltGrCharSubstitutionFaces} || []}) { # Now has a chance to have real layers
  0            
6384 0           for my $L (0..$#{$self->{faces}{$f}{layers}}) {
  0            
6385 0           my $from = $self->{faces}{$f}{layers}[$L];
6386 0 0         next unless my $to = $self->{faces}{$F}{layers}[$L];
6387 0           $_ = $self->{layers}{$_} for $from, $to;
6388 0           for my $k (0..$#$from) {
6389 0 0 0       next unless $from->[$k] and $to->[$k];
6390 0           for my $shift (0..1) {
6391 0 0 0       next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6392 0   0       $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6393 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "F=$F"];
  0            
6394             }
6395             }
6396             }
6397             }
6398             } # ^^^ This is not used yet???
6399 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6400 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6401 0 0         for my $N (0..$#{ $self->{faces}{$f}{AltGrCharSubstitutionLayers} || []}) { # Now has a chance to have real layers
  0            
6402 0           my $TO = my $to = $self->{faces}{$f}{AltGrCharSubstitutionLayers}[$N];
6403 0 0         my $from = $self->{faces}{$f}{layers}[$N] or next;
6404 0           $_ = $self->{layers}{$_} for $from, $to;
6405 0           for my $k (0..$#$from) {
6406 0 0 0       next unless $from->[$k] and $to->[$k];
6407 0           for my $shift (0..1) {
6408 0 0 0       next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6409 0   0       $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6410 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "L=$TO"];
  0            
6411             }
6412             }
6413             }
6414             }
6415 0           for my $f (keys %{$self->{faces}}) { # Linking uses the number of slots in layer 0 as the limit; fill to make into max
  0            
6416 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6417 0           my $L = $self->{faces}{$f}{layers};
6418 0           my @last = map $#{$self->{layers}{$_}}, @$L;
  0            
6419 0           my $last = $last[0];
6420 0   0       $last < $_ and $last = $_ for @last;
6421 0           push @{$self->{layers}{$L->[0]}}, [] for 1..($last-$last[0]);
  0            
6422             }
6423 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6424 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6425 0           my $o = $self->{faces}{$f}{LinkFace};
6426 0 0         $self->pre_link_layers($o, $f) if defined $o; # May add keys to $f
6427             # warn("pre_link <$o> <$f>\n") if defined $o;
6428             }
6429 0           for my $f (keys %{$self->{faces}}) {
  0            
6430 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6431 0           $self->face_make_backlinks($f, $self->{faces}{$f}{'[char2key_prefer_first]'}, $self->{faces}{$f}{'[char2key_prefer_last]'});
6432             }
6433 0           for my $f (keys %{$self->{faces}}) {
  0            
6434 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6435 0           my $o = $self->{faces}{$f}{LinkFace};
6436 0 0         next unless defined $o;
6437 0           $self->faces_link_via_backlinks($f, $o);
6438 0           $self->faces_link_via_backlinks($o, $f);
6439             }
6440 0           for my $f (keys %{$self->{faces}}) {
  0            
6441 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6442 0 0         if (defined( my $r = $self->{faces}{$f}{"[CapsLOCKoverride]"} )) {
6443 0           warn "Massaging CapsLock for face `$f'...\n" if debug_face_layout_recipes;
6444 0           $self->{faces}{$f}{'[CapsLOCKlayers]'} = $self->layers_by_face_recipe($r, $f, $r);
6445             }
6446 0   0       my ($DDD, $export, $vk) = map $self->{faces}{$f}{"[$_]"} ||= {}, qw(DEAD export dead_in_VK);
6447 0   0       my ($ddd) = map $self->{faces}{$f}{"[$_]"} ||= [], qw(dead);
6448 0           $self->coverage_hex($f);
6449 0           my $S = $self->{faces}{$f}{layers};
6450 0           my ($c,%s,@d) = 0;
6451 0 0         for my $D (@{$self->{faces}{$f}{layerDeadKeys} || []}) { # deprecated...
  0            
6452 0 0         $c++, next unless length $D; # or $D ~= /^\s*--+$/ ; # XXX How to put empty elements in an array???
6453 0           $D =~ s/^\s+//;
6454 0           (my $name, my @k) = split /\s+/, $D;
6455 0           @k = map $self->charhex2key($_), @k;
6456             die "name of layerDeadKeys' element in face `$f' does not match:\n\tin `$D'\n\t`$name' vs `$self->{faces}{$f}{layers}[$c]'"
6457 0 0         unless $self->{faces}{$f}{layers}[$c] =~ /^\Q$name\E(<.*>)?$/; # Name might have changed in VK processing
6458 0   0       1 < length and die "not a character as a deadkey: `$_'" for @k;
6459 0           $ddd->[$c] = {map +($_,1), @k};
6460 0   0       ($s{$_}++ or push @d, $_), $DDD->{$_} = 1 for @k;
6461 0           $c++;
6462             }
6463 0 0         for my $k (split /\p{Blank}+(?:\|{3}\p{Blank}+)?/,
6464             (defined $self->{faces}{$f}{faceDeadKeys} ? $self->{faces}{$f}{faceDeadKeys} : '')) {
6465 0 0         next unless length $k;
6466 0           $k = $self->charhex2key($k);
6467 0 0         1 < length $k and die "not a character as a deadkey: `$k'";
6468 0           $ddd->[$_]{$k} = 1 for 0..$#{ $self->{faces}{$f}{layers} }; # still used...
  0            
6469 0           $DDD->{$k} = 1;
6470 0 0         $s{$k}++ or push @d, $k;
6471             }
6472 0 0         for my $k (split /\p{Blank}+/, (defined $self->{faces}{$f}{ExportDeadKeys} ? $self->{faces}{$f}{ExportDeadKeys} : '')) {
6473 0 0         next unless length $k;
6474 0           $k = $self->charhex2key($k);
6475 0 0         1 < length $k and die "not a character as an exported deadkey: `$k'";
6476 0           $export->{$k} = 1;
6477             }
6478 0 0         if (my $LL = $self->{faces}{$f}{'[ini_layers]'}) {
6479 0           my @out;
6480 0           for my $L ( @$LL ) {
6481 0           push @out, "$L++prefix+";
6482 0           my $l = $self->{layers}{$out[-1]} = $self->deep_copy($self->{layers}{$L});
6483 0           for my $n (0 .. $#$l) {
6484 0           my $K = $l->[$n];
6485 0           for my $k (@$K) {
6486             #warn "face `$f' layer `$L' ini_layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6487 0 0 0       $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6488 0 0 0       if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6489 0   0       @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6490             }
6491             }
6492             }
6493             }
6494 0           $self->{faces}{$f}{'[ini_layers_prefix]'} = \@out;
6495 0           $LL = $self->{faces}{$f}{'[ini_filled_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0            
6496 0           my @OUT;
6497 0           for my $L ( @$LL ) {
6498 0           push @OUT, "$L++PREFIX+";
6499 0           my $l = $self->{layers}{$OUT[-1]} = $self->deep_copy($self->{layers}{$L});
6500 0           for my $n (0 .. $#$l) {
6501 0           my $K = $l->[$n];
6502 0           for my $k (@$K) {
6503             #warn "face `$f' layer `$L' layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6504 0 0 0       $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6505 0 0 0       if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6506 0   0       @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6507             }
6508             }
6509             }
6510             }
6511 0           $self->{faces}{$f}{layers} = \@OUT;
6512             } else {
6513 0           warn "Face `$f' has no ini_layers";
6514             }
6515 0           $self->{faces}{$f}{'[dead_array]'} = \@d;
6516 0 0 0       for my $D (@{$self->{faces}{$f}{faceDeadKeys2} || $self->{faces}{$f}{layerDeadKeys2} || []}) { # layerDeadKeys2 obsolete
  0            
6517 0           $D =~ s/^\s+//; $D =~ s/\s+$//;
  0            
6518 0           my @k = split //, $self->stringHEX2string($D);
6519 0 0         2 != @k and die "not two characters as a chained deadkey: `@k'";
6520             #warn "dead2 for <@k>";
6521 0           $self->{faces}{$f}{'[dead2]'}{$k[0]}{$k[1]}++;
6522             # $k[1] is "untranslated"; it is not good for [DEAD]:
6523             #$self->{faces}{"$f###" . $self->key2hex($k[0])}{'[DEAD]'}{$k[1]}++;
6524             }
6525             }
6526             $self
6527 0           }
6528            
6529             sub massage_hash_values($) {
6530 0     0 0   my($self) = (shift);
6531 0           for my $K ( @{$self->{'[keys]'}} ) {
  0            
6532 0           my $h = $self->get_deep($self, split m(/), $K);
6533 0 0         $_ = $self->charhex2key($_) for @{ $h->{char2key_prefer_first} || []}, @{ $h->{char2key_prefer_last} || []};
  0 0          
  0            
6534             }
6535            
6536             }
6537             #use Dumpvalue;
6538            
6539             sub print_codepoint ($$;$) {
6540 0     0 0   my ($self, $k, $prefix) = (shift, shift, shift);
6541 0 0         my $K = ($k =~ /$rxCombining/ ? " $k" : $k);
6542 0 0         $prefix = '' unless defined $prefix;
6543 0           my $kk = join '.', map $self->key2hex($_), split //, $k;
6544 0           my $UN = join ' + ', map $self->UName($_, 'verbose', 'vbell'), split //, $k;
6545 0           printf "%s%s\t<%s>\t%s\n", $prefix, $kk, $K, $UN;
6546             }
6547            
6548             sub require_unidata_age ($) {
6549 0     0 0   my $self = shift;
6550 0           my $f = $self->get_NamesList;
6551 0 0         $self->load_compositions($f) if defined $f;
6552            
6553 0           $f = $self->get_AgeList;
6554 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6555 0           $self;
6556             }
6557            
6558             sub print_coverage_string ($$) {
6559 0     0 0   my ($self, $s, %seen) = (shift, shift);
6560 0           $seen{$_}++ for split //, $s;
6561            
6562 0           my $f = $self->get_NamesList;
6563 0 0         $self->load_compositions($f) if defined $f;
6564            
6565 0           $f = $self->get_AgeList;
6566 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6567            
6568 0           require Unicode::UCD;
6569            
6570 0           $self->print_codepoint($_) for sort keys %seen;
6571             }
6572            
6573             sub print_coverage ($$) {
6574 0     0 0   my ($self, $F) = (shift, shift);
6575            
6576 0           my $f = $self->get_NamesList;
6577 0 0         $self->load_compositions($f) if defined $f;
6578            
6579 0           $f = $self->get_AgeList;
6580 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6581            
6582 0           my $file = $self->{'[file]'};
6583 0 0         $file = (defined $file) ? "file $file" : 'string descriptor';
6584 0           my $v = $self->{VERSION};
6585 0 0         $file .= " version $v" if defined $v;
6586 0 0         $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6587            
6588 0           print "############# Generated with UI::KeyboardLayout v$UI::KeyboardLayout::VERSION for $file, face=$F\n#\n";
6589            
6590 0           my $is32 = $self->{faces}{$F}{'[32-bit]'};
6591 0 0         my $cnt32 = keys %{$is32 || {}};
  0            
6592 0           my $c1 = @{ $self->{faces}{$F}{'[coverage1only]'} }; # - $cnt32;
  0            
6593 0           my $c2 = @{ $self->{faces}{$F}{'[coverage1]'} } - @{ $self->{faces}{$F}{'[coverage1only]'} };
  0            
  0            
6594 0           my $more = ''; #$cnt32 ? " (and up to $cnt32 not available on Windows - at end of this section above FFFF)" : '';
6595 0           my @multi;
6596 0           for my $n (0, 1) {
6597 0           $multi[$n]{$_}++ for grep 1 < length, @{ $self->{faces}{$F}{"[coverage$n]"} };
  0            
6598             }
6599 0           my @multi_c = map { scalar keys %{$multi[$_]} } 0, 1;
  0            
  0            
6600 0 0         my %comp = %{ $self->{faces}{$F}{'[inCompose]'} || {} };
  0            
6601 0           delete $comp{$_} for @{ $self->{faces}{$F}{"[coverage0]"} }, @{ $self->{faces}{$F}{"[coverage1]"} };
  0            
  0            
6602 0 0         my @comp = grep {2 > length and 0x10000 > ord} sort keys %comp;
  0            
6603             printf "######### %i = %i + %i + %i + %i bindings [1-char + base multi-char-strings (MCS) + “extra layers” MCS + only via Compose key]\n",
6604 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 + @comp,
6605 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
  0            
6606             $multi_c[0], $multi_c[1], scalar @comp;
6607             printf "######### %i = %i + %i + %i%s [direct + via single prefix keys and “extra layers” (both=%i) + via repeated prefix key] chars\n",
6608 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
6609 0           scalar @{ $self->{faces}{$F}{'[coverage0]'} } - $multi_c[0],
6610 0           $c1 - $multi_c[1], $c2, $more, @{ $self->{faces}{$F}{'[coverage00+]'} } + $c1 - $multi_c[0] - $multi_c[1];
  0            
6611 0           for my $k (@{ $self->{faces}{$F}{'[coverage00+]'} }) {
  0            
6612 0           $self->print_codepoint($k);
6613             }
6614 0           print "############# Base multi-char strings:\n";
6615 0           for my $k (@{ $self->{faces}{$F}{'[coverage00++]'} }) {
  0            
6616 0           $self->print_codepoint($k);
6617             }
6618 0           print "############# Via single prefix keys:\n";
6619 0           for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0            
6620 0 0         $self->print_codepoint($k) if 2 > length $k;
6621             }
6622 0           print "############# Multi-char via single prefix keys:\n";
6623 0           for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0            
6624 0 0         $self->print_codepoint($k) if 1 < length $k;
6625             }
6626 0           my $h1 = $self->{faces}{$F}{'[coverage1only_hash]'};
6627 0           print "############# Via repeated prefix keys:\n";
6628 0           for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0            
6629 0 0 0       $h1->{$k} or $self->print_codepoint($k) if 2 > length $k;
6630             }
6631 0           print "############# Multi-char via repeated prefix keys:\n";
6632 0           for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0            
6633 0 0 0       $h1->{$k} or $self->print_codepoint($k) if 1 < length $k;
6634             }
6635 0           print "############# Only via Compose key:\n";
6636 0           for my $k (@comp) {
6637 0           $self->print_codepoint($k, '= ');
6638             }
6639 0           print "############# Have lost the competition (for prefixed position), but available elsewhere:\n";
6640 0           for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0            
6641 0 0 0       next unless $self->{faces}{$F}{'[coverage_hash]'}{$k} and not $self->{faces}{$F}{'[from_dia_chains]'}{$k};
6642 0           $self->print_codepoint($k, '+ '); # May be in from_dia_chains, but be obscured later...
6643             }
6644 0           print "############# Have lost the competition (not counting those explicitly prohibited by \\\\):\n";
6645 0           for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0            
6646 0 0         next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6647 0           $self->print_codepoint($k, '- ');
6648             }
6649 0           my ($tot_diac, $lost_diac) = (0,0);
6650             $tot_diac++, $self->{faces}{$F}{'[coverage_hash]'}{$_} || $lost_diac++
6651 0   0       for keys %{ $self->{'[map2diac]'} };
  0            
6652 0           print "############# Lost among known classified modifiers/standalone/combining ($lost_diac/$tot_diac):\n";
6653 0           for my $k (sort keys %{ $self->{'[map2diac]'} }) {
  0            
6654 0 0         next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6655 0           $self->print_codepoint($k, '?- ');
6656             }
6657 0           print "############# Per key list:\n";
6658 0           my $OOut = $self->print_table_coverage($F);
6659 0           my ($OUT, $CC, $CC1) = ('', 0, 0);
6660 0           for my $r ([0x2200, 0x40], [0x2240, 0x40], [0x2280, 0x40], [0x22c0, 0x40],
6661             [0x27c0, 0x30], [0x2980, 0x40], [0x29c0, 0x40],
6662             [0x2a00, 0x40], [0x2a40, 0x40], [0x2a80, 0x40], [0x2ac0, 0x40], [0xa720, 0x80-0x20], [0xa780, 0x80] ) {
6663 0 0 0       my $C = join '', grep { (0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_} }
  0            
6664             map chr($_), $r->[0]..($r->[0]+$r->[1]-1); # before a720, the tables are filled up...
6665 0 0         ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0            
6666 0           $OUT .= "-==-\t$C\n";
6667             }
6668 0           print "############# Not covered in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6669 0           ($OUT, $CC, $CC1) = ('', 0, 0);
6670 0           for my $r ([0x2200, 0x80], [0x2280, 0x80],
6671             [0x27c0, 0x30], [0x2980, 0x80],
6672             [0x2a00, 0x80], [0x2a80, 0x80], [0xa720, 0x100-0x20] ) {
6673 0           my $C = join '', grep {(0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_}
6674 0 0 0       and !$self->{faces}{$F}{'[in_dia_chains]'}{$_}} map chr($_), $r->[0]..($r->[0]+$r->[1]-1);
      0        
6675 0 0         ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0            
6676 0           $OUT .= "-==-\t$C\n";
6677             }
6678 0           print "############# Not competing, in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6679 0           $OOut
6680             }
6681            
6682             my %html_esc = qw( & & < < > > );
6683             my %ctrl_special = qw( \r Enter \n Control-Enter \b BackSpace \x7f Control-Backspace \t Tab
6684             \x1b Esc; Control-[ \x1d Control-] \x1c Control-\ ^C Control-Break \x1e Control-^ \x1f Control-_ \x00 Control-@);
6685             my %alt_symb;
6686 1     1   27674 { no warnings 'qw';
  1         2  
  1         149  
6687             # ZWS ZWNJ ZWJ LRM RLM WJ=ZWNBSP Func Times Sep Plus
6688             my %a = (qw(200b ∅ 200c ‸ 200d & 200e → 200f ← 2060 ⊕ 2061 () 2062 × 2063 | 2064 +),
6689             # SPC NBSP obs-N obs-M n m m/3 m/4 m/6 figure=digit punctuation thin hair Soft-hyphen
6690             qw(0020 ␣ 00a0 ⍽ 2000 N 2001 M 2002 n 2003 m 2004 ᵐ⁄₃ 2005 ᵐ⁄₄ 2006 ᵐ⁄₆ 2007 ᵈ 2008 , 2009 ᵐ⁄₅ 200a ᵐ⁄₈ 00ad -),
6691             # LineSep ParSep LRE RLE PopDirForm LRO RLO narrowNBSP
6692             qw(2028 ⏎ 2029 ¶ 202a ⇒ 202b ⇐ 202c ↺ 202d ⇉ 202e ⇇ 202f ⁿ));
6693             @alt_symb{map chr hex, keys %a} = values %a;
6694             }
6695            
6696             # Make: span for control, soft-hyphen, white-space; include in with popup; include in span with special highlight
6697             sub char_2_html_span ($$$$$$;@) {
6698 0     0 0   my ($self, $base_c, $C, $c, $F, $opts, @types, $expl, $title, $vbell) = @_;
6699 0           my $aInv = $self->charhex2key($self->{faces}{$F}{'[Flip_AltGr_Key]'});
6700 0 0 0       $expl = $C->[3] if 'ARRAY' eq ref $C and $C->[3];
6701 0 0         $expl =~ s/(?=\p{NonspacingMark})/ /g if $expl;
6702 0   0       my $prefix = (ref $C and $C->[2]);
6703 0           my $cc = $c;
6704 0   0       $aInv = ($base_c || 'N/A') eq $aInv;
6705 0   0       my $docs = ($prefix and $self->{faces}{$F}{'[prefixDocs]'}{$self->key2hex($cc)}); # or $pre and warn "No docs: face=`$F', c=`$cc'\n";
6706 0 0         $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if defined $docs;
  0            
6707             # warn "... is_D2: ", $self->array2string([$c, $baseK[$L][$shift]]);
6708 0           $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
6709 0           $c =~ s/([&<>])/$html_esc{$1}/g;
6710 0           my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
6711 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
6712 0   0       my $alpha_ctrl = ($create_a_c and $cc =~ /[\cA-\cZ]/);
6713 0 0 0       my $with_shift = (($create_a_c > 1 and $alpha_ctrl) ? '(Shift-)' : '');
6714 0   0       $c =~ s{([\x00-\x1F\x7F])}{ my $C = $self->control2prt("$1"); my $S = $ctrl_special{$C} || '';
  0            
  0            
6715 0 0 0       ($S and $S .= ", "), $S .= "Control-$with_shift".chr(0x40+ord $1) if $alpha_ctrl;
6716 0 0         $C = "$C" if $S; $C }ge;
  0            
6717 0   0       my $type = ($cc =~ /[^\P{Blank}\x00-\x1f]/ && 'WS'); # Blank and not control char
6718 0           my ($fill, $prefill, $zw) = ('', '');
6719 0 0 0       if ($type or $c =~ /($rxZW)$/o) {
6720 0 0         my $alt = ($alt_symb{$cc} ? qq( convention="$alt_symb{$cc}") : '');
6721 0           $fill = ""; # Soft hyphen etc
6722             }
6723 0 0         if ($type) { # Putting WS inside l makes gaps between adjacent WS blocks
6724 0           $prefill = '';
6725 0           $fill .= '';
6726             }
6727 0 0         push @types, 'no-mirror-rtl' if "\x{34f}" eq $cc; # CGJ
6728 0   0       $zw = !!$fill || $cc eq "\x{034f}";
6729 0           $vbell = !defined $C;
6730 0 0         unless (defined $title) {
6731 0   0       $title = ((ord $cc >= 0x80 or $cc eq ' ') && sprintf '%04X %s', ord $cc, $self->UName($cc, 'verbose', $vbell));
6732 0 0 0       if ($title and $docs) {
6733 0           $title = "$docs (on $title)";
6734             }
6735 0   0       $title ||= ($docs || '');
      0        
6736 0 0 0       if (defined $expl and length $expl and (1 or 0x7f <= ord $cc)) {
      0        
6737 0 0         $title .= ' ' if length $title;
6738 0           $title .= " {via $expl}";
6739             }
6740 0 0 0       $title .= ' (visual bell indicates unassigned keypress)' if $title and !$expl and $vbell;
      0        
6741 0 0         $title = 'This prefix key accesses this column with AltGr-invertion' if $aInv;
6742 0 0         $title =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $title;
  0            
6743 0 0         $title = qq( title='$title') if $title;
6744             }
6745 0 0 0       if ($type) { # Already covered
    0 0        
    0 0        
    0 0        
    0 0        
    0          
    0          
    0          
    0          
    0          
6746             } elsif ($zw) {
6747 0           push @types,'ZW';
6748             } elsif (not defined $C) {
6749 0           push @types,'vbell';
6750             } elsif ($title =~ /(\b(N-ARY|BIG(?!\s+YUS\b)|GREEK\s+PROSGEGRAMMENI|KORONIS|SOF\s+PASUQ|PUNCTUATION\s+(?:GERESH|GERSHAYIM)|PALOCHKA|CYRILLIC\s.*\s(DZE|JE|QA|WE|A\s+IE)|ANO\s+TELEIA|GREEK\s+QUESTION\s+MARK)|"\w+\s+(?:BIG|LARGE))\b.*\s+\[/) { # "0134 BIG GUY#"
6751 0           push @types,'nAry';
6752             } elsif ($title =~ /\b(OPERATOR|SIGN|SYMBOL|PROOF|EXISTS|FOR\s+ALL|(DIVISION|LOGICAL)\b.*)\s+\[/) {
6753 0           push @types,'operator';
6754             } elsif ($title =~ /\b(RELATION|PERPENDICULAR|PARALLEL\s*TO|DIVIDES|FRACTION\s+SLASH)\s+\[/) {
6755 0           push @types,'relation';
6756             } elsif ($title =~ /\[.*\b(IPA)\b|\bCLICK\b/) {
6757 0           push @types,'ipa';
6758             } elsif ($title =~ /\bLETTER\s+[AEUIYO]\b/ and
6759             $title =~ /\b(WITH|AND)\s+(HOOK\s+ABOVE|HORN)|(\s+(WITH|AND)\s+(CIRCUMFLEX|BREVE|ACUTE|GRAVE|TILDE|DOT\s+BELOW)\b){2}/) {
6760 0           push @types,'viet';
6761             } elsif (0 <= index(lc '⁊ǷꝥƕǶᵹ', lc $cc) or 0xa730 <= ord $cc and 0xa78b > ord $cc or 0xa7fb <= ord $cc and 0xa7ff >= ord $cc) {
6762 0           push @types,'paleo';
6763             } elsif ($title =~ /(\s+(WITH|AND)\s+((DOUBLE\s+)?\w+(\s+(BELOW|ABOVE))?)\b){2}/) {
6764 0           push @types,'doubleaccent';
6765             }
6766 0 0 0       push @types, ($1 ? 'withSubst' : 'isSubst') if ($expl || '') =~ /\sSubst\{(\S*\}\s+\S)?/;
    0          
6767 0 0         push @types, 'altGrInv' if $aInv;
6768 0 0         my $q = ("@types" =~ /\s/ ? "'" : '');
6769             # ($prefill, $fill) = ("$prefill", "$fill");
6770 0 0         @types = " class=$q@types$q" if @types;
6771 0 0 0       my($T,$OPT) = ($opts && $opts->{ltr} ? ('bdo', ' dir=ltr') : ('span', '')); # Just `span´ does not work in FF15
6772 0 0 0       $c = '†' if $aInv and $cc ne ($base_c || 'N/A'); #  
      0        
6773 0           "<$T$OPT@types$title>$prefill$c$fill"
6774             }
6775            
6776             sub print_table_coverage ($$;$$) {
6777 0   0 0 0   my ($self, $F, $html, $extra_headers) = (shift, shift, shift, shift || '');
6778 0           my $f = $self->{'[file]'};
6779 0 0         $f = (defined $f) ? "file $f" : 'string descriptor';
6780 0           my $v = $self->{VERSION};
6781 0 0         $f .= " version $v" if defined $v;
6782 0 0         $f .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6783 0 0         print <
6784            
6785             "http://www.w3.org/TR/html4/loose.dtd">
6786            
6787            
6788            
6789             $extra_headers
6832            
6833            
6834             [1] />"; $COLS ), next unless $dFace; "; $header\n \n" " if $html; # Do not make RTL chars mix up the order
6835             EOP
6836 0           my($LL, $INV, %s, @d, %access, %docs) = ($self->{faces}{$F}{layers}, $self->{faces}{$F}{'[Flip_AltGr_Key]'});
6837 0 0         $s{$self->charhex2key($INV)}++ if defined $INV; # Skip in reports '
6838 0           my @LL = map $self->{layers}{$_}, @$LL;
6839 0 0 0       $s{$_}++ or push @d, $_ for map @{ $self->{faces}{$F}{"[$_]"} || [] }, qw(dead_array dead_in_VK_array extra_report_DeadChar);
  0            
6840 0           my (@A, %isD2, @Dface, @DfaceKey, %d_seen) = [];
6841 0           my $compK = $self->{faces}{$F}{'[ComposeKeys]'};
6842             #warn 'prefix keys to report: <', join('> <', @d), '>';
6843 0           for my $ddK (@d) {
6844 0           (my $dK = $ddK) =~ s/^\s+//;
6845 0           my $c = $self->key2hex($self->charhex2key($dK));
6846 0 0         next if $d_seen{$c}++;
6847             ($compK->{$c} or warn("??? Skip non-array prefix key `$c' for face `$F', k=`$dK'")), next
6848 0 0 0       unless defined (my $FF = $self->{faces}{$F}{'[deadkeyFace]'}{$c});
6849 0           $access{$FF} = [$self->charhex2key($dK)];
6850 0           push @Dface, $FF;
6851 0           push @DfaceKey, $c;
6852 0           $docs{$FF} = $self->{faces}{$F}{'[prefixDocs]'}{$c}; # and warn "Found docs: face=`$F', c=`$c'\n";
6853 0           push @A, [$self->charhex2key($dK)];
6854             }
6855            
6856 0           my ($lastDface, $prevCol, $COLS, @colOrn, %S, @joinedPairs) = ($#Dface, -1, '', [qw(0 column1)]);
6857 0 0         for my $kk (split /\p{Blank}+\|{3}\p{Blank}+/,
6858             (defined $self->{faces}{$F}{faceDeadKeys} ? $self->{faces}{$F}{faceDeadKeys} : ''), -1) {
6859 0           my $cnt = 0;
6860 0   0       length and $cnt++ for split /\p{Blank}+/, $kk;
6861 0           push @joinedPairs, $cnt;
6862             }
6863 0           pop @joinedPairs;
6864 0           my $done = 0;
6865 0           push @colOrn, [$done += $_, 'endPair'] for @joinedPairs;
6866 0           my @skip_sections;
6867 0           for my $s (values %start_SEC) {
6868 0           $skip_sections[$_]++ for $s->[0]..($s->[0]+$s->[1]-1)
6869             }
6870            
6871 0           for my $reported (1, 0) {
6872 0 0         for my $DD (@{ $self->{faces}{$F}{$reported ? 'LayoutTable_add_double_prefix_keys' : 'faceDeadKeys2'} }) {
  0            
6873 0           (my $dd = $DD) =~ s/^\s+//;
6874             # XXXX BUG in PERL??? This gives 3: DB<4> x scalar (my ($x, $y) = split //, 'ab')
6875 0 0         2 == (my (@D) = split //, $self->stringHEX2string($dd)) or die "Not a double character in LayoutTable_add_double_prefix_keys for `$F': `$DD' -> `", $self->stringHEX2string($dd), "'";
6876 0 0         my $map1 = $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$self->key2hex($D[0])}
    0          
6877             or ($reported ? die "Can't find prefix key face for `$D[0]' in `$F'" : next); # inverted faces bring havoc
6878 0 0         defined (my $Dead2 = $map1->{$self->key2hex($D[1])}) or die "Can't map `$D[1]' in `$F'+prefix `$D[0]'"; # in hex already
6879 0 0         $Dead2 = $Dead2->[0] if 'ARRAY' eq ref $Dead2;
6880 0 0         defined (my $ddd = $self->{faces}{$F}{'[deadkeyFace]'}{$Dead2}) or die "Can't find prefix key face for `$D[1]' -> `$Dead2' in `$F'+prefix `$D[0]'";
6881 0 0         next if $S{"@D"}++;
6882 0 0         push(@Dface, $ddd), push @DfaceKey, $Dead2 if $reported;
6883 0   0       $access{$ddd} ||= \@D;
6884 0           $docs{$ddd} = $self->{faces}{$F}{'[prefixDocs]'}{$Dead2};
6885 0 0         push @A, \@D if $reported;
6886             # warn "set is_D2: @D";
6887 0           $isD2{$D[0]}{$D[1]}++;
6888             }
6889             }
6890 0 0         push @colOrn, [$lastDface+1, 'pre_ExtraCols'] if $#Dface != $lastDface;
6891 0           for my $orn (@colOrn) {
6892 0           my $skip = $orn->[0] - $prevCol - 1;
6893 0 0         warn("Multiple classes on columns of report unsupported: face=$F, col [@$orn]"), next if $skip < 0;
6894 0           $prevCol = $orn->[0];
6895 0 0         my $many = $skip > 1 ? " span=$skip" : '';
6896 0 0         $skip = $skip > 0 ? "\n " : '';
6897 0           $COLS .= "$skip\n
6898             }
6899 0 0         print <
6900            
6901            
6902             EOP
6903 0   0       my ($k, $first_ctrl, $post_ctrl, @last_in_row) = (-1, map $self->{faces}{$F}{"[$_]"} || 0, qw(start_ctrl end_ctrl));
6904 0 0         $last_in_row[ $k += $_ ]++ for @{ $self->{faces}{$F}{'[geometry]'} || [] };
  0            
6905             #warn 'prefix key faces to report: <', join('> <', @Dface), '>';
6906 0           my @maps = (undef, map $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$_}, @DfaceKey); # element of Dface may be false if this is non-autonamed AltGr-inverted face
6907 0 0         my $dead = $html ? "\x{2620}" : "\x{2620}";
6908 0 0         my $dead_i = $html ? "\x{2620}" : "\x{2620}";
6909 0           my $header = '';
6910 0           for my $dFace ('', @Dface) { # '' is no-dead
6911 0           my $base_t = 'Characters immediately on keys (without prefix keys); the first two are without/with Shift, two others same, but with added AltGr (excluding the special-key zone)';
6912 0           my $prefix_t = 'After tapping a prefix key, the base keys are replaced by what is in the column of the prefix key';
6913 0 0         $header .= qq( ↓Base Prefix→
6914 0           my @a = map {(my $a = $_) =~ s/^(?=$rxCombining)/\x{25cc}/o; $a } @{ $access{$dFace} };
  0            
  0            
  0            
6915 0           my $docs = $docs{$dFace};
6916 0 0         $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $docs;
  0            
6917 0 0         my $withDocs = (defined $docs ? "@a" : "@a");
6918 0           $header .= " $withDocs
6919             }
6920 0 0         print "
6921             if $html;
6922 0           my $vbell = '♪';
6923 0           my $OOut = '';
6924 0           for my $n ( 0 .. $#{ $LL[0] } ) {
  0            
6925 0           my ($out, $out_c, $prev, @KKK, $base_c) = ('', 0, '');
6926 0           my @baseK;
6927 0 0 0       next if $n >= $first_ctrl and $n < $post_ctrl or $skip_sections[$n];
      0        
6928 0           for my $dn (0..@Dface) { # 0 is no-dead
6929 0 0 0       next if $dn and not $maps[$dn];
6930 0 0         $out .= $html ? '' : ($prev =~ /\X{7}/ ? ' ' : "\t") if length $out;
    0          
    0          
6931 0 0         my $is_D2 = $isD2{ @{$A[$dn]} == 1 ? $A[$dn][0] : 'n/a' };
  0            
6932             # warn "is_D2: ", $self->array2string([$dn, $is_D2, $A[$dn], $A[$dn][0]]);
6933 0           my $o = '';
6934 0           for my $L (0..$#$LL) {
6935 0           for my $shift (0..1) {
6936 0           my $c = $LL[$L][$n][$shift];
6937 0           my ($pre, $expl, $C, $expl1, $invert_dead) = ('', '', $c);
6938 0 0         $o .= ' ', next unless defined $c;
6939 0           $out_c++;
6940 0 0 0       $pre = $dead if not $dn and 'ARRAY' eq ref $c and $c->[2];
      0        
6941 0 0         $c = $c->[0] if 'ARRAY' eq ref $c;
6942 0 0         $KKK[$L][$shift] = $c unless $dn;
6943 0           $base_c = $KKK[$L][$shift];
6944             # warn "int_struct -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $pre;
6945 0 0         if ($dn) {
6946 0           $C = $c = $maps[$dn]{$self->key2hex($c)};
6947 0 0         $c = $vbell unless defined $c;
6948 0 0 0       $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
6949 0 0 0       $pre = $invert_dead ? $dead_i : $dead if 'ARRAY' eq ref $c and $c->[2];
    0          
6950 0 0         $c = $c->[0] if 'ARRAY' eq ref $c;
6951 0           $c = $self->charhex2key($c);
6952             } else {
6953             # warn "coverage0_prefix -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
6954 0 0 0       $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
6955 0 0 0       $pre = $invert_dead ? $dead_i : $dead if $pre or $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
    0          
6956             }
6957 0 0         $baseK[$L][$shift] = $c unless $dn;
6958 0 0 0       $pre ||= $dead if $dn and $is_D2->{$baseK[$L][$shift]};
      0        
6959            
6960 0 0         if ($html) {
6961 0           $c = $self->char_2_html_span($base_c, $C, $c, $F, {ltr => 1}, 'l');
6962             } else {
6963 0           $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
6964 0           $c =~ s{([\x00-\x1F\x7F])}{ $self->control2prt("$1") }ge;
  0            
6965             }
6966 0           $c = "$pre$c";
6967 0           $o .= $c;
6968             }
6969             }
6970 0           $o =~ s/ +$//;
6971 0           $prev = $o;
6972 0           $out .= $o;
6973             }
6974 0 0         my $class = $last_in_row[$n] ? ' class=lastKeyInKRow' : '';
6975 0 0         $out = " $out
6976 0 0         $OOut .= "$out\n", print "$out\n" if $out_c;
6977             }
6978 0 0         my @extra = map {(my $s = $_) =~ s/^\s+//; "\n\n

$s"} @{ $self->{faces}{$F}{TableSummaryAddHTML} || [] };

  0            
  0            
  0            
6979 0           my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
6980 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
6981 0   0       my $extra_ctrl = ($create_a_c >= 1) && '/[/]/\\';
6982 0   0       $extra_ctrl .= ($create_a_c >= 2) && '/^/_';
6983 0   0       my $more .= ($create_a_c >= 1) && ' Most of Ctrl-letters are omitted from the table; deduce them from reports for C/H/I/J/M/Z.';
6984 0 0         print <
6985            
6986            
6987            
6988             @extra

Highlights (homographs and special needs): zero-width or SOFT HYPHEN: , whitespace: , Vietnamese; other double-accent; paleo-Latin;

6989             or IPA.
6990             Or name having RELATION, PERPENDICULAR,
6991             PARALLEL, DIVIDES, FRACTION SLASH; or BIG, LARGE, N-ARY, CYRILLIC PALOCHKA/DZE/JE/QA/WE/A-IE,
6992             ANO TELEIA, KORONIS, PROSGEGRAMMENI, GREEK QUESTION MARK, SOF PASUQ, PUNCTUATION GERESH/GERSHAYIM; or OPERATOR, SIGN,
6993             SYMBOL, PROOF, EXISTS, FOR ALL, DIVISION, LOGICAL; or AltGr-inverter prefix;
6994             or via a rule involving/exposing a “BlueKey” substitution rule.
6995             (Some browsers fail to show highlights for whitespace/zero-width.)
6996            

Vertical lines separate: the column of the base face, paired

6997             prefix keys with “inverted bindings”, and explicitly selected multi-key prefixes. Horizontal lines separate key rows of
6998             the keyboard (including a fake row with the “left extra key” [one with <> or \\| - it is missing on many keyboards]
6999             and the KP_Decimal key [often marked as . Del on numeric keypad]); the last group is for semi-fake keys for
7000             Enter/C-Enter/Backspace/C-Backspace/Tab and C-Break$extra_ctrl (make sense after prefix keys) and special keys explicitly added
7001             in .kbdd files (usually SPACE).$more
7002            

Hover mouse over any appropriate place to get more information.

7003             In popups: brackets enclose Script, Range, “1st Unicode version with this character”;
7004             braces enclose “the reason why this position was assigned to this character” (VisLr means that a visual table was
7005             used; in Subst{HOW}, L=Layer and F=Face mean that a “BlueKey” substitution rule was defined
7006             via a special layer/face).
7007            
7008            
7009             EOP
7010 0           $OOut
7011             }
7012            
7013             sub coverage_face0 ($$;$) {
7014 0     0 0   my ($self, $F, $after_import, $after) = (shift, shift, shift);
7015 0           my $H = $self->{faces}{$F};
7016 0           my $LL = $H->{layers};
7017 0 0         return $H->{'[coverage0]'} if exists $H->{'[coverage0]'};
7018 0           my (%seen, %seen_prefix, %imported);
7019 0 0         my $d = { %{ $H->{'[DEAD]'} || {} }, %{ $H->{'[dead_in_VK]'} || {} } };
  0 0          
  0            
7020             # warn "coverage0 for `$F'" if $after_import;
7021 0           for my $l (@$LL) {
7022 0           my $L = $self->{layers}{$l};
7023 0           for my $k (@$L) {
7024             warn "Face `$F', layer `$l': coverage check is run too late: after the importation translation is performed"
7025 0 0 0       if not $after_import and $F !~ /^(.*)##Inv#([a-f0-9]{4,})$/is and grep {defined and ref and $_->[4]} @$k;
  0 0 0        
      0        
7026 0 0 0       $seen{ref() ? $_->[0] : $_}++ for grep {defined and !(ref and $_->[2]) and !$d->{ref() ? $_->[0] : $_}} @$k;
  0 0 0        
    0          
7027 0 0 0       $seen_prefix{ref() ? $_->[0] : $_}++ for grep {defined and (ref and $_->[2] or $d->{ref() ? $_->[0] : $_})} @$k;
  0 0 0        
    0          
7028 0 0 0       $imported{"$_->[0]:$_->[1]"}++ for grep {defined and ref and 2 == ($_->[2] || 0)} @$k; # exportable
  0   0        
7029             }
7030 0 0         unless ($after++) {
7031 0           $H->{'[layer0coverage0]'} = [sort keys %seen];
7032             }
7033             }
7034 0           $H->{'[coverage0_prefix]'} = \%seen_prefix;
7035 0           $H->{'[coverage0]'} = [sort keys %seen];
7036 0 0         $H->{'[coverage00]'} = [grep { 2>length and 0x10000 > ord } @{$H->{'[coverage0]'}}];
  0            
  0            
7037 0   0       $H->{'[coverage0+]'} = [grep {!(2>length and 0x10000 > ord)} @{$H->{'[coverage0]'}}];
  0            
  0            
7038 0           $H->{'[coverage00+]'} = [grep { 2>length } @{$H->{'[coverage0]'}}];
  0            
  0            
7039 0           $H->{'[coverage00++]'} = [grep { 1{'[coverage0]'}}];
  0            
  0            
7040 0           $H->{'[imported]'} = [sort keys %imported];
7041 0           $H->{'[coverage00hash]'} = { map { ($_, 1) } @{ $H->{'[coverage00]'} } };
  0            
  0            
7042 0           $H->{'[coverage0]'};
7043             }
7044            
7045             # %imported is analysed: if manual deadkey is specified, this value is used, otherwised new value is generated and rememebered.
7046             # (but is not put in the keymap???]
7047             sub massage_imported ($$) {
7048 0     0 0   my ($self, $f) = (shift, shift);
7049 0 0         return unless my ($F, $KKK) = $f =~ /^(.*)###([a-f0-9]{4,})$/is;
7050 0           my $H = $self->{faces}{$F};
7051 0 0         for my $i ( @{ $self->{faces}{$f}{'[imported]'} || [] } ) {
  0            
7052 0 0         my($k,$face) = $i =~ /^(.):(.*)/s or die "Unrecognized imported: `$i'";
7053 0           my $K;
7054 0 0 0       if (exists $H->{'[imported2key]'}{$i} or exists $H->{'[imported2key_auto]'}{$i}) {
    0 0        
7055 0 0         $K = exists $H->{'[imported2key]'}{$i} ? $H->{'[imported2key]'}{$i} : $H->{'[imported2key_auto]'}{$i};
7056             } elsif ($H->{'[coverage0_prefix]'}{$k} or $H->{'[auto_dead]'}{$k}) { # it is already used
7057             # Assign a fake prefix key to imported map
7058             warn("Imported prefix keys exist, but Auto_Diacritic_Start is not defined in face `$F'"), return
7059 0 0         unless defined $H->{'[first_auto_dead]'};
7060 0           $K = $H->{'[imported2key_auto]'}{$i} = $self->next_auto_dead($H);
7061             } else { # preserve the prefix key
7062 0           $K = $H->{'[imported2key_auto]'}{$i} = $k;
7063 0           $H->{'[auto_dead]'}{$k}++;
7064             }
7065 0 0         my $LL = $self->{faces}{$face}{'[deadkeyLayers]'}{$self->key2hex($k)}
7066             or die "Cannot import a deadkey `$k' from `$face'";
7067 0           $LL = [@$LL]; # Deep copy, so may override
7068 0           my $KK = $self->key2hex($K);
7069 0 0         if (my $over = $H->{'[AdddeadkeyLayers]'}{$KK}) {
7070             #warn "face `$F': additional bindings for deadkey $KK exist.\n";
7071 0           $LL = [$self->make_translated_layers_stack($over, $LL)];
7072             }
7073 0           $H->{'[imported2key_all]'}{"$k:$face"} = $self->charhex2key($KK);
7074 0           $H->{'[deadkeyLayers]'}{$KK} = $LL;
7075 0           my $new_facename = "$F#\@#\@#\@$i";
7076 0           $self->{faces}{$new_facename}{layers} = $LL;
7077 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
7078 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
7079            
7080 0           $self->coverage_face0($new_facename);
7081             }
7082             }
7083            
7084             sub massage_imported2 ($$) {
7085 0     0 0   my ($self, $f) = (shift, shift);
7086 0           warn "... Importing into face=`$f" if debug_import;
7087 0 0         return unless my ($F, $KKK) = ($f =~ /^(.*)###([a-f0-9]{4,})$/is); # what about multiple prefixes???
7088 0 0         return unless my $HH = $self->{faces}{$F}{'[imported2key_all]'};
7089 0           my $H = $self->{faces}{$f};
7090 0           warn "Importing into face=`$F' prefix=$KKK" if debug_import;
7091 0           my $LL = $H->{layers};
7092 0           my @unresolved;
7093 0           for my $l (@$LL) {
7094 0           my $L = $self->{layers}{$l};
7095 0           for my $k (@$L) {
7096 0 0 0       for my $kk (grep {defined and ref and $_->[2]} @$k) { # exportable
  0            
7097 0           $kk = [@$kk]; # deep copy
7098 0 0         if (2 == $kk->[2]) { # exportable
7099 0 0         my $v = (defined $kk->[4] ? $kk->[4] : $kk->[0]);
7100 0           my $j = $HH->{"$v:$kk->[1]"};
7101             # push(@unresolved, "$v:$kk->[1]"),
7102 0 0         warn "Can't resolve `$v:$kk->[1]' to an imported dead key, face=`$F' prefix=$KKK; layer=$l"
7103             unless defined $j;
7104 0           warn "Importing `$v:$kk->[1]' as `$j', face=`$F' prefix=$KKK; layer=$l" if debug_import;
7105 0           @$kk[0,4] = ($j, $v);
7106             } else {
7107             #warn "massage_imported2: shift $kk->[2] <<= 3 key `$kk->[0]' face `$f' layer `$l'\n" if $kk->[2] >> 3;
7108 0           $kk->[2] >>= 3; # ByPairs makes <<= 3 !
7109             }
7110             }
7111             }
7112             }
7113 0           delete $self->{faces}{$f}{'[coverage0]'};
7114 0           $self->coverage_face0($f, 'after_import'); # recalculate
7115             # $H->{'[unresolved_imported]'} = \@unresolved if @unresolved;
7116             }
7117            
7118             sub massage_char_substitutions($$) { # Read $self->{Substitutions}
7119 0     0 0   my($self, $data) = (shift, shift);
7120 0 0         die "Too late to load char substitutions" if $self->{Compositions};
7121 0 0         for my $K (keys %{ $data->{Substitutions} || {}}) {
  0            
7122 0           my $arr = $data->{Substitutions}{$K};
7123 0           for my $S (@$arr) {
7124 0           my $s = $self->stringHEX2string($S);
7125 0           $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
7126 0 0         die "Expect 2 chars in substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
7127 0           $self->{'[Substitutions]'}{""}{$s[0]} = [[0, $s[1]]]; # Format as in Compositions
7128 0 0 0       $self->{'[Substitutions]'}{""}{lc $s[0]} = [[0, lc $s[1]]]
7129             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
7130 0 0 0       $self->{'[Substitutions]'}{""}{uc $s[0]} = [[0, uc $s[1]]]
7131             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
7132             }
7133             }
7134             }
7135            
7136             sub new_from_configfile ($$) {
7137 0     0 0   my ($class, $F) = (shift, shift);
7138 0 0         open my $f, '< :utf8', $F or die "Can't open `$F' for read: $!";
7139 0           my $s = do {local $/; <$f>};
  0            
  0            
7140 0 0         close $f or die "Can't close `$F' for read: $!";
7141             #warn "Got `$s'";
7142 0           my $self = $class->new_from_configfile_string($s);
7143 0           $self->{'[file]'} = $F;
7144 0           $self;
7145             }
7146            
7147             sub new_from_configfile_string ($$) {
7148 0     0 0   my ($class, $ss) = (shift, shift);
7149 0 0         die "too many arguments to UI::KeyboardLayout->new_from_configfile" if @_;
7150 0           my $data = $class->parse_configfile($ss);
7151             # Dumpvalue->new()->dumpValue($data);
7152 0           my ($layers, $counts, $offsets) = $class->fill_kbd_layers($data);
7153 0           @{$data->{layers}}{keys %$layers} = values %$layers;
  0            
7154 0           @{$data->{layer_counts} }{keys %$counts} = values %$counts;
  0            
7155 0           @{$data->{layer_offsets}}{keys %$offsets} = values %$offsets;
  0            
7156 0   0       $data = bless $data, (ref $class or $class);
7157 0           $data->massage_hash_values;
7158 0           $data->massage_diacritics; # Read $self->{Diacritics}
7159 0           $data->massage_char_substitutions($data); # Read $self->{Substitutions}
7160 0           $data->massage_faces;
7161            
7162 0           $data->massage_deadkeys_win($data); # Process (embedded) MSKLC-style deadkey maps
7163 0           $data->scan_for_DeadKey_Maps(); # Makes a direct-access synonym, scan for DeadKey_Maps* keys
7164 0           $data->create_DeadKey_Maps();
7165 0           $data->create_composite_layers; # Needs to be after simple deadkey maps are known
7166            
7167 0           for my $F (keys %{ $data->{faces} }) {
  0            
7168 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7169 0           $data->coverage_face0($F); # creates coverage0, imported array (c0 excludes diacritics), coverage0_prefix hash
7170             }
7171 0           for my $F (keys %{ $data->{faces} }) {
  0            
7172 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7173 0           $data->massage_imported($F); # calc new values for imported prefix keys, augments imported maps with Add-maps
7174             }
7175 0           for my $F (keys %{ $data->{faces} }) {
  0            
7176 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7177 0           $data->massage_imported2($F); # changes imported prefix keys to appropriate values for the target personality
7178             }
7179 0           $data->create_prefix_chains;
7180 0           $data->create_inverted_faces;
7181 0           $data->link_composite_layers; # Needs to be after imported keys are reassigned...
7182 0           for my $F (keys %{ $data->{faces} }) { # Fine-tune inverted-AltGr faces
  0            
7183 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7184 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7185            
7186 0           my $D = $data->{faces}{$F}{'[deadkeyFace]'};
7187 0           my $Ex = $data->{faces}{$F}{'[AltGr_Invert_Show]'};
7188 0           for my $d (keys %$D) {
7189 0           $data->{faces}{$F}{'[deadkeyFaceHexMap]'}{$d} = $data->linked_faces_2_hex_map($F, $D->{$d});
7190 0 0         defined (my $auto_inv_AltGr = $data->{faces}{$F}{'[deadkeyInvAltGrKey]'}{$d}) or next;
7191 0           my $b1 = $data->{faces}{$F}{'[deadkeyFaceInvAltGr]'}{my $a = $data->charhex2key($auto_inv_AltGr)};
7192 0 0         $data->{faces}{$F}{'[deadkeyFaceHexMapInv]'}{$d} = $data->linked_faces_2_hex_map($F, $b1) if $b1;
7193 0           my $D = $data->{faces}{$F}{'[prefixDocs]'}{$d};
7194 0 0         $data->{faces}{$F}{'[prefixDocs]'}{$data->key2hex($a)} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$d]]");
7195 0           my $S = $data->{faces}{$F}{'[Show]'}{$d};
7196 0 0         $data->{faces}{$F}{'[Show]'}{$data->key2hex($a)} = (defined $S ? $S : $data->charhex2key($d)) . $Ex;
7197             }
7198            
7199 0           my($flip_AltGr, @protect_chr) = $data->{faces}{$F}{'[Flip_AltGr_Key]'}; # Who put it into deadkeyFace???
7200 0 0         if (defined $flip_AltGr) {
7201 0           $flip_AltGr = $data->key2hex($data->charhex2key($flip_AltGr));
7202 0           push @protect_chr, $flip_AltGr;
7203             $data->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr} = 'AltGr-inverted base face'
7204 0 0         unless defined $data->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr};
7205 0 0         $data->{faces}{$F}{'[Show]'}{$flip_AltGr} = $Ex unless defined $data->{faces}{$F}{'[Show]'}{$flip_AltGr};
7206             }
7207 0   0       my $expl = $data->{faces}{$F}{'[Explicit_AltGr_Invert]'} || [];
7208 0           for my $i (1..(@$expl/2)) {
7209 0           my @C = map $data->key2hex($expl->[2*$i + $_]), -2, -1;
7210 0           push @protect_chr, $C[1];
7211 0           my $D = $data->{faces}{$F}{'[prefixDocs]'}{$C[0]};
7212 0 0         $data->{faces}{$F}{'[prefixDocs]'}{$C[1]} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$C[0]]]");
7213 0           my $S = $data->{faces}{$F}{'[Show]'}{$C[0]};
7214 0 0         $data->{faces}{$F}{'[Show]'}{$C[1]} = (defined $S ? $S : $data->charhex2key($C[0])) . $Ex;
7215             }
7216 0           $data->{faces}{$F}{'[auto_dead]'}{ord $data->charhex2key($_)}++ for @protect_chr;
7217             # warn " Keys HexMap: ", join ', ', sort keys %{$data->{faces}{$F}{'[deadkeyFaceHexMap]'}};
7218             }
7219            
7220 0           for my $F (keys %{ $data->{faces} }) { # Finally, collect the stats
  0            
7221 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7222 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7223 0           my %seenExtra;
7224 0 0         my @extras = ( "@{ $data->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0            
7225 0           my %is_extra = map { ($data->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0            
7226 0           for my $deadKEY ( sort keys %{ $data->{faces}{$F}{'[deadkeyFace]'}} ) {
  0            
7227 0           my $deadKey = $data->charhex2key($deadKEY);
7228 0 0         next unless $is_extra{$deadKey};
7229 0           my $FFF = $data->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7230 0 0         my $cov1 = $data->{faces}{$FFF}{'[coverage0]'} # XXXX not layer0coverage0 - may slide down to layer0
7231             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7232             $seenExtra{$_}++
7233 0 0 0       for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0            
7234             }
7235 0           $data->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7236            
7237 0 0         next unless my $prefix = $data->{faces}{$F}{'[ComposeKey]'};
7238 0           $data->auto_dead_can_wrap($F); # All manual deadkeys are set, so auto may be flexible
7239 0           $data->create_composekey($F, $prefix);
7240             }
7241            
7242 0           for my $F (keys %{ $data->{faces} }) { # Finally, collect the stats
  0            
7243 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7244 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7245 0           my($seen_prefix, %seen0, %seen00, %seen1, %seen1only, %seenExtra) = $data->{faces}{$F}{'[coverage0_prefix]'};
7246             # warn("Face `$F' has no [deadkeyFace]"),
7247 0 0         next unless $data->{faces}{$F}{'[deadkeyFace]'};
7248             # next;
7249 0           my (%check_later, %coverage1_prefix);
7250             # warn "...... face `$F',\tprefixes0 ", keys %$seen_prefix;
7251             # $seen_prefix = {%$seen_prefix}; # Deep copy
7252             # $seen_prefix->{$_}++ for @{ $data->{faces}{$F}{'[dead_in_VK_array]'} || [] };
7253 0 0         my @extras = ( "@{ $data->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0            
7254 0           my %is_extra = map { ($data->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0            
7255 0           for my $deadKEY ( sort keys %{ $data->{faces}{$F}{'[deadkeyFace]'}} ) {
  0            
7256 0 0         unless (%seen0) { # Do not calculate if $F has no deadkeys...
7257 0           $seen0{$_}++ for @{ $data->{faces}{$F}{'[coverage00]'} };
  0            
7258 0           %seen00 = %seen0;
7259             }
7260             ### XXXXX Directly linked faces may have some chars unreachable via the switch-prefixKey
7261 0           my ($deadKey, $not_in_0) = $data->charhex2key($deadKEY);
7262             # It does not make sense to not include it into the summary: 0483 on US is such...
7263 0 0         $not_in_0++, $check_later{$deadKey}++ unless $seen_prefix->{$deadKey}; # For multi-prefix maps, and extra layers
7264 0           my ($FFF, @dd2) = $data->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7265 0 0         my $cov1 = $data->{faces}{$FFF}{$is_extra{$deadKey} ? '[coverage0]' : '[coverage00]'} # XXXX not layer0coverage0 - may slide down to layer0
    0          
7266             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7267             ($seen0{$_}++ or $seen1{$_}++),
7268             ($not_in_0 and not $is_extra{$deadKey}) || $seen00{$_} || $seen1only{$_}++, # Only for multi-prefix maps
7269             $is_extra{$deadKey} && $seenExtra{$_}++ # Only for extra modifiers maps
7270 0 0 0       for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0   0        
      0        
      0        
7271 0 0         if (my $d2 = $data->{faces}{$F}{'[dead2]'}{$deadKey}) {
7272 0           my $map = $data->linked_faces_2_hex_map($F, $FFF);
7273             # warn "linked map (face=$F) = ", keys %$d2;
7274 0 0 0       @dd2 = map $data->charhex2key($_), map {($_ and ref $_) ? $_->[0] : $_} map $map->{$data->key2hex($_)}, keys %$d2;
  0            
7275             # warn "sub-D2 (face=$F) = ", @dd2;
7276             }
7277             #warn "2nd level prefixes for `$deadKey': ", keys %{$data->{faces}{$FFF}{'[coverage0_prefix]'} || {}};
7278             #warn "2nd level prefixes for `$deadKey': <@dd2> ", keys %{$data->{faces}{$F}{'[dead2]'}{$deadKey} || {}};
7279 0 0         unless ($not_in_0) {
7280             # warn "sub-cov0 (face=$F) = ", keys %{ $data->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
7281 0 0         $coverage1_prefix{$_}++ for keys %{ $data->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
  0            
7282             # warn "sub-D2 (face=$F) = ", @dd2;
7283 0           $coverage1_prefix{$_}++ for @dd2;
7284             }
7285             # warn "...... deadkey `$deadKey' reached0 in face `$F'" unless $not_in_0;
7286             }
7287            
7288 0   0       my @check = grep { !$coverage1_prefix{$_} and !$is_extra{$_} } keys %check_later;
  0            
7289 0 0         my @only_extra = grep { !$coverage1_prefix{$_} and $is_extra{$_} } keys %check_later;
  0            
7290 0           $data->{faces}{$F}{'[only_extra]'} = { map {($_, 1)} @only_extra };
  0            
7291            
7292 0 0         my $_s = (@check > 1 ? 's' : '');
7293 0 0         warn("Prefix key$_s <@check> not reached (without double prefix keys?) in face `$F'; later=", keys %check_later, " ; cov1=", keys %coverage1_prefix) if @check;
7294 0           $data->{faces}{$F}{'[coverage1]'} = [sort keys %seen1];
7295 0           $data->{faces}{$F}{'[coverage1only]'} = [sort keys %seen1only];
7296 0           $data->{faces}{$F}{'[coverage1only_hash]'} = \%seen1only;
7297 0           $data->{faces}{$F}{'[coverage_hash]'} = \%seen0;
7298 0           $data->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7299             }
7300             $data
7301 0           }
7302            
7303             sub massage_deadkeys_win ($$) {
7304 0     0 0   my($self, $h, @process, @to) = (shift, shift);
7305 0           my @K = grep m(^\[unparsed]/DEADKEYS\b), @{$h->{'[keys]'}};
  0            
7306             # warn "Found deadkey sections `@K'";
7307             # my $H = $h->{'[unparsed]'};
7308 0           for my $k (@K) {
7309 0           push @process, $self->get_deep($h, (split m(/), $k), 'unparsed_data');
7310 0           (my $k1 = $k) =~ s(^\[unparsed]/)();
7311 0           push @to, $k1
7312             }
7313 0           @K = grep m(^DEADKEYS\b), @{$h->{'[keys]'}};
  0            
7314 0           for my $k (@K) {
7315 0           my $slot = $self->get_deep($h, split m(/), $k);
7316 0 0         next unless exists $slot->{klc_filename};
7317             open my $fh, '< :encoding(UTF-16)', $slot->{klc_filename}
7318 0 0         or die "open of =`$slot->{klc_filename}' failed: $!";
7319 0           local $/;
7320 0           my $in = <$fh>;
7321 0           push @process, $in;
7322 0           push @to, $k;
7323             }
7324 0           for my $k1 (@to) {
7325             #warn "DK sec `$k' -> `$v', <", join('> <', keys %{$h->{'[unparsed]'}{DEADKEYS}{la_ru}}), ">";
7326             #warn "DK sec `$k' -> `$v', <$h->{'[unparsed]'}{DEADKEYS}{la_ru}{unparsed_data}>";
7327 0           my $v = shift @process;
7328 0           my($o,$d,$t) = $self->read_deadkeys_win($v); # Translation tables, names, rest of input
7329 0           my (@parts, @h) = split m(/), $k1;
7330 0           my %seen = (%$o, %$d);
7331 0           for my $kk (keys %seen) {
7332             #warn "DK sec `$k1', deadkey `$kk'. Map: ", $self->array2string( [%{$o->{$kk} || {}}] );
7333 0           my $slot = $self->get_deep($h, @parts, $kk);
7334             warn "Deadkey `$kk' defined for `$k1' conflicts with previous definition"
7335 0 0 0       if $slot and grep exists $slot->{$_}, qw(map name);
7336 0 0         $self->put_deep($h, $o->{$kk}, @parts, $kk, 'map') if exists $o->{$kk};
7337 0 0         $self->put_deep($h, $d->{$kk}, @parts, $kk, 'name') if exists $d->{$kk};
7338             }
7339             }
7340             $self
7341 0           }
7342            
7343             # http://bepo.fr/wiki/Pilote_Windows
7344             # http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
7345             # http://msdn.microsoft.com/en-us/library/windows/desktop/ms646280%28v=vs.85%29.aspx
7346            
7347 1     1   6103 my %oem_keys = do {{ no warnings 'qw' ; reverse (qw(
  1         4  
  1         4962  
7348             OEM_MINUS -
7349             OEM_PLUS =
7350             OEM_4 [
7351             OEM_6 ]
7352             OEM_1 ;
7353             OEM_7 '
7354             OEM_3 `
7355             OEM_5 \
7356             OEM_COMMA ,
7357             OEM_PERIOD .
7358             OEM_2 /
7359             OEM_102 \#
7360             SPACE #
7361             DECIMAL .#
7362             DECIMAL ,#
7363             ABNT_C1 /#
7364             ABNT_C1 ¥
7365             ABNT_C1 ¦
7366             )) }}; #'# Here # marks "second occurence" of keys...
7367             # Extra bindings: see http://www.fysh.org/~zefram/keyboard/xt_scancodes.txt (after “===”)
7368             # e005 Messenger (or Files); e007 Redo; e008 undo; e009 ApplicationLeft; e00a Paste;
7369             # e00b,e011,e012,e01f ScrollWheel-to-key-emulation
7370             # e013 Word; e014 Excel; e015 Calendar; e016 Log Off; e017 Cut; e018 Copy; e01e ApplicationRight
7371             # e03b -- e044 (Microsoft/Logitech Fkeys_without_Flock, F1...F10)
7372             # e063 Wake; e064 My Pictures [or Keypad-) ]
7373             # For type 4 of keyboard (same as types 1,3, except OEM_AX, (NON)CONVERT, ABNT_C1)
7374             # except KANA,(NON)CONVERT,; scancode of YEN,| for OEM_8 is our invention; after OEM_8 all is junk (non-scancodes???)...
7375             my %scan_codes = (reverse qw(
7376             02 1
7377             03 2
7378             04 3
7379             05 4
7380             06 5
7381             07 6
7382             08 7
7383             09 8
7384             0a 9
7385             0b 0
7386             0c OEM_MINUS
7387             0d OEM_PLUS
7388             10 Q
7389             11 W
7390             12 E
7391             13 R
7392             14 T
7393             15 Y
7394             16 U
7395             17 I
7396             18 O
7397             19 P
7398             1a OEM_4
7399             1b OEM_6
7400             1e A
7401             1f S
7402             20 D
7403             21 F
7404             22 G
7405             23 H
7406             24 J
7407             25 K
7408             26 L
7409             27 OEM_1
7410             28 OEM_7
7411             29 OEM_3
7412             2b OEM_5
7413             2c Z
7414             2d X
7415             2e C
7416             2f V
7417             30 B
7418             31 N
7419             32 M
7420             33 OEM_COMMA
7421             34 OEM_PERIOD
7422             35 OEM_2
7423             39 SPACE
7424             56 OEM_102
7425             53 DECIMAL
7426            
7427             01 ESCAPE
7428             0C OEM_MINUS
7429             0D OEM_PLUS
7430             0E BACK
7431             0F TAB
7432             1A OEM_4
7433             1B OEM_6
7434             1C RETURN
7435             1D LCONTROL
7436             27 OEM_1
7437             28 OEM_7
7438             29 OEM_3
7439             2A LSHIFT
7440             2B OEM_5
7441             33 OEM_COMMA
7442             34 OEM_PERIOD
7443             35 OEM_2
7444             36 RSHIFT
7445             37 MULTIPLY
7446             38 LMENU
7447             3A CAPITAL
7448             3B F1
7449             3C F2
7450             3D F3
7451             3E F4
7452             3F F5
7453             40 F6
7454             41 F7
7455             42 F8
7456             43 F9
7457             44 F10
7458             45 NUMLOCK
7459             46 SCROLL
7460             47 HOME
7461             48 UP
7462             49 PRIOR
7463             4A SUBTRACT
7464             4B LEFT
7465             4C CLEAR
7466             4D RIGHT
7467             4E ADD
7468             4F END
7469             50 DOWN
7470             51 NEXT
7471             52 INSERT
7472             e053 DELETE
7473             54 SNAPSHOT
7474             56 OEM_102
7475             57 F11
7476             58 F12
7477             59 CLEAR
7478             5A OEM_WSCTRL
7479             5B OEM_FINISH
7480             5C OEM_JUMP
7481             5C OEM_AX
7482             5D EREOF
7483             5E OEM_BACKTAB
7484             5F OEM_AUTO
7485             62 ZOOM
7486             63 HELP
7487             64 F13
7488             65 F14
7489             66 F15
7490             67 F16
7491             68 F17
7492             69 F18
7493             6A F19
7494             6B F20
7495             6C F21
7496             6D F22
7497             6E F23
7498             6F OEM_PA3
7499             70 KANA
7500             71 OEM_RESET
7501             73 ABNT_C1
7502             76 F24
7503             79 CONVERT
7504             7B NONCONVERT
7505             7B OEM_PA1
7506             7C TAB
7507             7E ABNT_C2
7508             7F OEM_PA2
7509             e010 MEDIA_PREV_TRACK
7510             e019 MEDIA_NEXT_TRACK
7511             e01C RETURN
7512             e01D RCONTROL
7513             e020 VOLUME_MUTE
7514             e021 LAUNCH_APP2
7515             e022 MEDIA_PLAY_PAUSE
7516             e024 MEDIA_STOP
7517             e02E VOLUME_DOWN
7518             e030 VOLUME_UP
7519             e032 BROWSER_HOME
7520             e035 DIVIDE
7521             e037 SNAPSHOT
7522             e038 RMENU
7523             e046 CANCEL
7524             e047 HOME
7525             e048 UP
7526             e049 PRIOR
7527             e04B LEFT
7528             e04D RIGHT
7529             e04F END
7530             e050 DOWN
7531             e051 NEXT
7532             e052 INSERT
7533             e053 DELETE
7534             e05B LWIN
7535             e05C RWIN
7536             e05D APPS
7537             e05E POWER
7538             e05F SLEEP
7539             e065 BROWSER_SEARCH
7540             e066 BROWSER_FAVORITES
7541             e067 BROWSER_REFRESH
7542             e068 BROWSER_STOP
7543             e069 BROWSER_FORWARD
7544             e06A BROWSER_BACK
7545             e06B LAUNCH_APP1
7546             e06C LAUNCH_MAIL
7547             e06D LAUNCH_MEDIA_SELECT
7548             e11D PAUSE
7549            
7550             7D OEM_8
7551            
7552             10 SHIFT
7553             11 CONTROL
7554             12 MENU
7555             15 KANA
7556             15 HANGUL
7557             17 JUNJA
7558             18 FINAL
7559             19 HANJA
7560             19 KANJI
7561             1C CONVERT
7562             1D NONCONVERT
7563             1E ACCEPT
7564             1F MODECHANGE
7565             29 SELECT
7566             2A PRINT
7567             2B EXECUTE
7568            
7569             60 NUMPAD0
7570             61 NUMPAD1
7571             62 NUMPAD2
7572             63 NUMPAD3
7573             64 NUMPAD4
7574             65 NUMPAD5
7575             66 NUMPAD6
7576             67 NUMPAD7
7577             68 NUMPAD8
7578             69 NUMPAD9
7579             6C SEPARATOR
7580             B4 MEDIA_LAUNCH_MAIL
7581             B5 MEDIA_LAUNCH_MEDIA_SELECT
7582             B6 MEDIA_LAUNCH_APP1
7583             B7 MEDIA_LAUNCH_APP2
7584            
7585             E5 PROCESSKEY
7586             E7 PACKET
7587             F6 ATTN
7588             F7 CRSEL
7589             F8 EXSEL
7590             FA PLAY
7591             FC NONAME
7592             FD PA1
7593             FE OEM_CLEAR
7594            
7595             )); # http://www.opensource.apple.com/source/WebCore/WebCore-1C25/platform/gdk/KeyboardCodes.h
7596             # the part after PAUSE is junk...
7597            
7598             # [ ] \ space
7599             my %oem_control = (qw(
7600             OEM_4 [001b
7601             OEM_6 ]001d
7602             OEM_5 \001c
7603             SPACE 0020
7604             OEM_102 \001c
7605             )); # In ru layouts, only entries which match the char are present
7606             my %do_control = map /^(.)(.+)/, values %oem_control;
7607             $do_control{' '} = '0020';
7608             delete $do_control{0};
7609            
7610             my %default_bind = ( (map {( "NUMPAD$_" => [[$_]] )} 0..9 ),
7611             TAB => [["\t", "\t"]],
7612             ADD => [["+", "+"]],
7613             SUBTRACT => [["-", "-"]],
7614             MULTIPLY => [["*", "*"]],
7615             DIVIDE => [["/", "/"]],
7616             RETURN => [["\r", "\r"], ["\n"]],
7617             BACK => [["\b", "\b"], ["\x7f"]],
7618             ESCAPE => [["\e", "\e"], ["\e"]],
7619             CANCEL => [["\cC", "\cC"], ["\cC"]],
7620             );
7621            
7622             sub get_VK ($$) {
7623 0     0 0   my ($self, $f) = (shift, shift);
7624 0 0         $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'VK') || {}
7625             # $self->{faces}{$f}{VK} || {}
7626             }
7627            
7628             sub massage_VK ($$) {
7629 0     0 0   my ($self, $f, %seen, %seen_dead, @dead, @ctrl) = (shift, shift);
7630 0           my $l0 = $self->{faces}{$f}{layers}[0];
7631 0           $self->{faces}{$f}{'[non_VK]'} = @{ $self->{layers}{$l0} };
  0            
7632 0           my $create_a_c = $self->{faces}{$f}{'[create_alpha_ctrl]'};
7633 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7634 0 0         my $EXTR = [ ["\r","\n"], ["\b","\x7F"], ["\t","\cC"], ["\x1b","\x1d"], # Enter/C-Enter/Bsp/C-Bsp/Tab/Cancel/Esc=C-[/C-]
    0          
7635             ["\x1c", ($create_a_c ? "\cZ" : ())], ($create_a_c>1 ? (["\x1e", "\x1f"], ["\x00"]) : ())]; # C-\ C-z, C-^ C-_
7636 0 0         if ($create_a_c) {
7637 0           my %s;
7638 0           push @ctrl, scalar @$EXTR;
7639 0           $s{$_}++ for $self->flatten_arrays($EXTR);
7640 0           my @ctrl_l = grep !$s{$_}, map chr($_), 1..26;
7641 0           push @$EXTR, [shift @ctrl_l, shift @ctrl_l] while @ctrl_l > 1;
7642 0 0         push @$EXTR, [@ctrl_l] if @ctrl_l;
7643 0           push @ctrl, scalar @$EXTR;
7644             }
7645 0           my @extra = ( $EXTR, map [([]) x @$EXTR], 1..$#{ $self->{faces}{$f}{layers} } );
  0            
7646 0           my $VK = $self->get_VK($f);
7647 0           $self->{faces}{$f}{'[VK_off]'} = \ my %VK_off;
7648 0           for my $K (sort keys %$VK) {
7649 0           my ($v, @C) = $VK->{$K};
7650 0 0 0       $v->[0] = $scan_codes{$K} or die("Can't find the scancode for the VK key `$K'")
7651             unless length $v->[0];
7652             # warn 'Key: <', join('> <', @$v), '>';
7653 0           my $c = 0;
7654 0           $VK_off{$K} = @{ $extra[0] }; # Where in the layouts is the VK key
  0            
7655 0           for my $k (@$v[1..$#$v]) {
7656 0 0         ($k, my $dead) = ($k =~ /^(.+?)(\@?)$/) or die "Empty key in VK list";
7657 0 0         $seen{$k eq '-1' ? '' : ($k = $self->charhex2key($k))}++;
7658 0 0 0       $seen_dead{$k}++ or push @dead, $k if $dead and $k ne '-1';
      0        
7659 0 0         my $kk = ($k eq '-1' ? undef : $k);
7660 0 0         push @{ $extra[int($c/2)] }, [] unless $c % 2;
  0            
7661 0 0         push @{ $extra[int($c/2)][-1] }, ($dead ? [$kk, undef, 1] : $kk); # $extra[$N] is [[$k0, $k1] ...]
  0            
7662 0 0         $kk .= $dead if defined $kk;
7663 0           push @C, $kk;
7664 0           $c++;
7665             }
7666             # warn 'Key: <', join('> <', @C), '>';
7667 0           @$v = ($v->[0], @C);
7668             }
7669 0           $self->{faces}{$f}{'[ini_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0            
7670 0 0         if (@extra) {
7671 0           my($start_append, @Ln);
7672 0           for my $l (0 .. $#{ $self->{faces}{$f}{layers} } ) { # Assume that in every layer a few positions after end of the
  0            
7673 0           my $oLn = my $Ln = $self->{faces}{$f}{layers}[$l]; # first layer are empty
7674 0           my $L = $self->{layers}{$Ln};
7675 0 0         unless ($l) {
7676 0           $start_append = @$L;
7677 0           $self->{faces}{$f}{'[start_ctrl0]'} = $start_append;
7678 0   0       $self->{faces}{$f}{'[start_ctrl]'} = $start_append + ($ctrl[0]||0);
7679 0   0       $self->{faces}{$f}{'[end_ctrl]'} = $start_append + ($ctrl[1]||0);
7680 0           $_ += $start_append for values %VK_off;
7681             }
7682 0           my @L = map [$_->[0], $_->[1]], @$L; # Each element is []; 1-level deep copy
7683 0           my $add = $start_append + @{ $extra[$l] } - @L;
  0            
7684 0           $L[$start_append+$_] = [] for 0..$add-1; # Avoid splicing after the end of array
7685 0           splice @L, $start_append, @{ $extra[$l] }, @{ $extra[$l] };
  0            
  0            
7686 0           push @Ln, ($Ln .= "<$f>");
7687 0           $self->{layers}{$Ln} = \@L;
7688             # At this moment ini_copy should not exist yet
7689 0 0         warn "ini_copy of `$oLn' exists; --> `$Ln'" if $self->{layers}{'[ini_copy]'}{$oLn};
7690             # $self->{layers}{'[ini_copy]'}{$Ln} = $self->{layers}{'[ini_copy]'}{$oLn} if $self->{layers}{'[ini_copy]'}{$oLn};
7691             #??? Why does not this works???
7692             #warn "ini_copy1: `$Ln' --> `$oLn'";
7693 0           $self->{layers}{'[ini_copy1]'}{$Ln} = $self->deep_copy($self->{layers}{$oLn});
7694             }
7695 0           $self->{faces}{$f}{layers} = \@Ln;
7696             }
7697 0           ([keys %seen], \@dead, \%seen_dead)
7698             }
7699            
7700             sub format_key ($$$$) {
7701 0     0 0   my ($self, $k, $dead, $used) = (shift, shift, shift, shift);
7702 0 0         return -1 unless defined $k;
7703 0 0         my $mod = ($dead ? '@' : '') and $used->{$k}++;
    0          
7704 0 0         return "$k$mod" if $k =~ /^[A-Z0-9]$/i;
7705 0 0 0       return '%%' if 1 != length $k or ord $k > 0xFFFF;
7706 0           $self->key2hex($k) . $mod;
7707             }
7708            
7709             sub auto_capslock($$) {
7710 0     0 0   my ($self, $u) = (shift, shift);
7711 0           my %fix = qw( ӏ Ӏ ); # Perl 5.8.8 uc is wrong
7712 0 0 0       return 0 unless defined $u->[0] and defined $u->[1] and $u->[0] ne $u->[1];
      0        
7713 0 0 0       return 1 if ($fix{$u->[0]} || uc($u->[0])) eq $u->[1];
7714 0 0 0       return 1 if ($fix{$u->[0]} || ucfirst($u->[0])) eq $u->[1];
7715 0           return 0;
7716             }
7717            
7718             my %double_scan_VK = ('56 OEM_102' => '7D OEM_8', # ISO vs JIS (right) keyboard
7719             # '73 ABNT_C1' => '7E ABNT_C2', # ABNT (right) = JIS (left) keyboard vs ABNT (numpad)
7720             # '53 DECIMAL' => '7E ABNT_C2', # NUMPAD-period vs ABNT (numpad) [Does not work??? DECIMAL too late?]
7721             '34 OEM_PERIOD' => '7E ABNT_C2', # period vs ABNT (numpad)
7722             '7B NONCONVERT' => '79 CONVERT'); # JIS keyboard: left of SPACE, right of SPACE
7723             my %shift_control_extra = (2 => "\x00", 6 => "\x1e", OEM_MINUS => "\x1f");
7724            
7725             { my(%seen, %seen_scan, %seen_VK, @add_scan_VK, @ligatures, @decimal);
7726 0     0 0   sub reset_units ($) { @decimal = @ligatures = @add_scan_VK = %seen_scan = %seen_VK = %seen = () }
7727            
7728             sub output_unit00 ($$$$$$$;$$) {
7729 0     0 0   my ($self, $face, $k, $U, $N, $deadkeys, $Used, $known_scancode, $skippable) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
7730 0 0 0       my $sc = ($known_scancode or $scan_codes{$k}) or warn("Can't find the scancode for the key `$k'"), return;
7731 0           my(@cntrl, %s, $cnt); # Set Control-KEY if is [ or ] or \
7732 0 0 0       my $u = [map { defined() ? [map {($_ and ref $_) ? $_->[0] : $_} @$_] : $_ } @$U]; # deep copy with $_->[0] on a key-array
  0 0          
  0            
7733 0 0 0       @cntrl = chr hex $do_control{$u->[0][0]} if $do_control{$u->[0][0] || 'N/A'}; # \ ---> ^\
7734 0 0 0       @cntrl = @{ $default_bind{$k}[1] } if !@cntrl and $default_bind{$k}[1];
  0            
7735 0           my $create_a_c = $self->{faces}{$face}{'[create_alpha_ctrl]'};
7736 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7737 0 0 0       @cntrl = (chr(0x1F & ord $k)) x $create_a_c if $k =~ /^[A-Z]$/ and $create_a_c;
7738 0 0 0       @cntrl = (undef, $shift_control_extra{$k}) if $create_a_c > 1 and $shift_control_extra{$k};
7739 0   0       $deadkeys ||= []; # known_scancode is true when we start from VK, and $deadkeys is (arr of arrays) vs (hash per layer)
7740             my @KK = map [$_->[2], $_->[0], # 0:layer#, 1:shift#, 2:char, 3:deadkeys(layer), 4:char_array ==> 0:char, 1:layer, 2:dead
7741 0 0         (ref $_->[4] ? $_->[4][2] : ($known_scancode ? $_->[3][$_->[1]] : $_->[3]{defined $_->[2] ? $_->[2] : 'n/a'}))],
    0          
    0          
7742             map [@$_[0,1], $u->[$_->[0]][$_->[1]], $deadkeys->[$_->[0]], $U->[$_->[0]][$_->[1]]],
7743             map +([$_, 0], [$_, 1]), 0..$#$u;
7744 0   0       defined and $s{$_}++ for map $_->[0], @KK;
7745 0   0       ($_->[2] || 0) >= 3 and $_->[0] = $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) for @KK;
      0        
7746             #($KK[$_][2] || 0) >= 3 and warn "face=$face N=$N c=$_ <$KK[$_][0]> --> $KK[$_][2]\n" for 0..$#KK;
7747             #my @UUU = map $U->[$_->[0]][$_->[1]], map +([$_, 0], [$_, 1]), 0..$#$u;
7748             #ref $UUU[$_] and ($UUU[$_][2] || 0) >= 3 and warn "face=$face N=$N cc=$_ <$UUU[$_][0]> --> $UUU[$_][2]\n" for 0..$#UUU;
7749            
7750 0   0       $cnt = keys %s || @cntrl;
7751 0 0 0       if (my $extra = $self->{faces}{$face}{'[output_layers]'} and defined $N) { # $N not supported on VK...
7752 0           my $b = @{ $self->{faces}{$face}{layers} };
  0            
7753 0           for my $f ($b..$#$extra) {
7754             # warn "Extra layer number $f, base=$b requested while the character N=$N has " . (scalar @$u) . " layers" if $f+$b <= $#$u;
7755 0 0         (my $lll = $extra->[$f]) =~ s/^prefix(NOTSAME(case)?)?=// or die "Extra layer: expected `prefix=PREFIX', see: `$extra->[$f]'";
7756 0           my($notsame, $case) = ($1,$2);
7757 0           my $c = $self->key2hex($self->charhex2key($lll));
7758 0 0         my $L = $self->{faces}{$face}{'[deadkeyLayers]'}{$c} or die "Unknown prefix character `$c´ in extra layers";
7759 0           my @L = map $self->{layers}{$_}[$N], @$L;
7760 0           my(@CC, @pp, @OK);
7761 0 0         for my $l (@L[0 .. ($notsame ? $b-1 : 0)]) {
7762 0           my(%s1, @was);
7763 0           for my $sh (0..$#$l) {
7764 0 0         my @C = map {defined() ? (ref() ? $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) : $_) : $_} $l->[$sh];
  0 0          
7765 0 0         my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0          
7766 0 0 0       ($CC[$sh], $pp[$sh]) = ($C[0], $p[0]) if not defined $CC[$sh] and defined $C[0];
7767 0 0 0       ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$C[0]}) = ($C[0], $p[0], 1,1) if !$OK[$sh] and defined $C[0] and not $s{$C[0]};
      0        
7768             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$was[0]}) = (@was, 1,1) # use unshifted if needed
7769 0 0 0       if $sh and !$OK[$sh] and defined $C[0] and defined $was[0] and not $s{$was[0]} and not $s1{$was[0]};
      0        
      0        
      0        
      0        
7770 0 0         @was = ($C[0], $p[0]) unless $sh; # may omit `unless´
7771 0 0         $cnt++ if defined $CC[$sh];
7772             }
7773             }
7774             # Avoid read-only values (can get via $#KK) which cannot be autovivified
7775 0 0         push @KK, ([]) x (2*$f - @KK) if @KK < 2*$f; # splice can't do with a gap after the end of array
7776 0           splice @KK, 2*$f, 0, map [$CC[$_], $f-$b, $pp[$_]], 0..$#CC;
7777             }
7778             }
7779 0 0 0       return if $skippable and not $cnt;
7780 0           my $CL;
7781 0 0 0       if (my $Caps = $self->{faces}{$face}{'[CapsLOCKlayers]'} and defined $N) { # $N not supported on VK...
7782 0           $CL = [map $self->{layers}{$_}[$N], @$Caps];
7783             # warn "See CapsLock layers: <<<", join('>>> <<<', @$Caps), ">>>";
7784             }
7785 0 0 0       if ($skippable and not defined $KK[0][0] and not defined $KK[1][0]) {
      0        
7786 0           for my $shft (0,1) {
7787 0 0         $KK[$shft] = [$default_bind{$k}[0][$shft], 0] if defined $default_bind{$k}[0][$shft];
7788             ### $KK[$shft] = [$decimal[$shft], 0] if $k eq 'DECIMAL' and @decimal;
7789             }
7790             }
7791 0           my $pre_ctrl = $self->{faces}{$face}{'[ctrl_after_modcol]'};
7792 0 0         $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
7793 0 0 0       $#cntrl = $create_a_c - 1 if $pre_ctrl < 2*@$u or $self->{faces}{$face}{'[keep_missing_ctrl]'};
7794 0 0         warn "cac=$create_a_c #cntrl=$#cntrl pre=$pre_ctrl \@u=", scalar @$u if $#cntrl < 2*$ctrl_after - 1;
7795 0           splice @KK, $pre_ctrl, 0, map [$_, 0], @cntrl;
7796 0 0         splice @KK, 15, 0, [undef, 0] if @KK >= 16; # col=15 is the fake one
7797            
7798 0 0         if ($k eq 'DECIMAL') { # may be described both via visual maps and NUMPAD
7799 0 0         my @d = @{ $decimal[1] || [] };
  0            
7800 0   0       defined $KK[$_][0] or $KK[$_] = $d[$_] for 0..$#d; # fill on the second round
7801 0           @decimal = ([$k, $u, $sc, $Used], [@KK]);
7802 0           return;
7803             }
7804             # warn "Undefined \$N ==> <<<", join '>>> <<<', map $_->[0], @KK unless defined $N; # SPACE and ABNT_C1 ???
7805 0           $self->output_unit_KK($k, $u, $sc, $Used, $CL, @KK);
7806             }
7807            
7808             sub output_unit_KK($$@) {
7809 0     0 0   my ($self, $k, $u, $sc, $Used, $CL, @KK) = @_;
7810 0   0       my @K = map $self->format_key($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KK;
7811             #warn "keys with ligatures: <@K>" if grep $K[$_] eq '%%', 0..$#K;
7812 0           push @ligatures, map [$k, $_, $KK[$_][0]], grep $K[$_] eq '%%', 0..$#K;
7813 0           my $keys = join "\t", @K;
7814 0           my @kk = map $_->[0], @KK;
7815 0 0         my $fill = ((8 <= length $k) ? '' : "\t");
7816 0 0         my $expl = join ", ", map +(defined() ? (0x20 > ord() ? '^'.chr(0x40+ord) : $_) : ' '), @kk;
    0          
7817 0 0         my $expl1 = exists $self->{UNames} ? "\t// " . join ", ", map +((defined $_) ? $self->UName($_) : ' '), @kk : '';
    0          
7818 0   0       my($CL0, $extra) = ($CL and $CL->[0]);
7819 0 0 0       undef $CL0 unless $CL0 and @$CL0 and grep defined, map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
  0 0 0        
      0        
7820 0 0         my $capslock = (defined $CL0 ? 2 : $self->auto_capslock($u->[0]));
7821 0           $capslock |= (($self->auto_capslock($u->[1])) << 2);
7822 0 0         $capslock = 'SGCap' if $capslock == 2; # Not clear if we can combine a string SGCap with 0x4 in a .klc file
7823 0 0         if ($CL0) {
7824 0           my $a_cl = $self->auto_capslock($u->[0]);
7825 0 0         my @KKK = @KK[$a_cl ? (1,0) : (0,1)];
7826 0 0 0       defined(($CL0->[$_] and ref $CL0->[$_]) ? $CL0->[$_][0] : $CL0->[$_]) and $KKK[$_] = $CL0->[$_] for 0, 1;
      0        
7827             # my @c = map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
7828             # my @d = map { ($_ and ref $_) ? $_->[2] : {} } @$CL0; # dead
7829             # my @f = map $self->format_key($c[$_], $d[$_], ), 0 .. $#$CL0;
7830             # $extra = [@f];
7831 0   0       $extra = [map $self->format_key($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KKK];
7832             }
7833 0           $seen_scan{$sc}++;
7834 0           $seen_VK{$k}++;
7835 0           ($sc, $k, $fill, <
7836             $capslock\t$keys\t// $expl$expl1
7837             EOP
7838             }
7839            
7840             sub output_unit0 ($$$$$$$;$$) {
7841 0 0   0 0   my(@i) = &output_unit00 or return;
7842 0   0       my @add = split '/', ($double_scan_VK{uc "$i[0] $i[1]"} || '');
7843             #warn "<<<<< Secondary key <$add> for <$i[0] $i[1]>" if $add;
7844 0           push @add_scan_VK, map [split(/ /, $_), @i[2,3]], grep $_, @add;
7845 0 0         my $add = ($i[4] ? "-1\t-1\t\t0\t" . join("\t", @{$i[4]}) . "\n" : '');
  0            
7846 0           "$i[0]\t$i[1]$i[2]\t$i[3]$add"
7847             }
7848            
7849             sub output_added_units ($) {
7850 0     0 0   my ($self, @i, @o, @dec) = shift;
7851 0           for my $i (@add_scan_VK) {
7852 0 0 0       next if $seen_scan{$i->[0]} or $seen_VK{$i->[1]}; # Cannot duplicate either one...
7853 0           push @i, $i;
7854             }
7855 0 0         if ($decimal[0]) {
7856             # @decimal = ([$self->output_unit_KK($k, $u, $sc, $Used, @KK)], [@KK]);
7857 0           my ($k, $u, $sc, $Used) = @{$decimal[0]};
  0            
7858 0           push @dec, [$self->output_unit_KK($k, $u, $sc, $Used, undef, @{$decimal[1]})];
  0            
7859             }
7860 0           for my $i (@i, @dec) {
7861 0 0         my $add = ($i->[4] ? "-1\t-1\t\t0\t" . join("\t", @{$i->[4]}) . "\n" : '');
  0            
7862 0           push @o, "$i->[0]\t$i->[1]$i->[2]\t$i->[3]$add";
7863             }
7864             @o
7865 0           }
7866            
7867             my $enc_UTF16LE;
7868             sub to_UTF16LE_units ($) {
7869 0     0 0   my $k = shift;
7870 0 0         unless ($k =~ /^[\x00-\x{FFFF}]*$/) {
7871 0 0         (require Encode), $enc_UTF16LE = Encode::find_encoding('UTF-16LE') unless $enc_UTF16LE;
7872 0 0         die "Can't arrange encoding to UTF-16LE" unless $enc_UTF16LE;
7873 0           $k = $enc_UTF16LE->encode($k);
7874             # warn join '> <', ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7875             # warn join '> <', map {unpack 'v', $_} ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7876 0           $k = join '', map chr(unpack 'v', $_), ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7877             }
7878 0           $k;
7879             }
7880            
7881             sub output_ligatures ($) {
7882 0     0 0   my ($self, @o, %s) = shift;
7883 0           for my $l (@ligatures) {
7884 0 0         warn("Repeated LIGATURE $l->[0] $l->[1]"), next if $s{"$l->[0] $l->[1]"}++;
7885 0           my $k = to_UTF16LE_units $l->[2];
7886 0           my @k = ((map $self->key2hex($_), split //, $k), ('') x 4);
7887 0 0         my @expl = exists $self->{UNames} ? "// " . join " + ", map $self->UName($_), split //, $l->[2] : ();
7888 0 0         my $add = ((8 <= length $l->[0]) ? '' : "\t");
7889 0           push @o, (join "\t", "$l->[0]$add", $l->[1], @k[0..3], @expl) . "\n";
7890             }
7891             @o
7892 0           }
7893            
7894             sub base_unit ($$$$) {
7895 0     0 0   my ($self, $basesub, $u, $ingroup, $k) = (shift, shift, shift, shift);
7896 0 0         if (!$ingroup) {
7897 0           my @c = map $self->{layers}{$_}[$u][0], @$basesub;
7898 0           my($c) = grep defined, @c;
7899 0 0         my $c0 = $c = $c->[0] if 'ARRAY' eq ref $c;
7900 0 0         $c .= '#' if $seen{uc $c}++;
7901 0 0         $c = '#' if $c eq ' ';
7902 0           $c = uc $c;
7903 0           return [0, $c, $c0]
7904             } # Now do the VK groups
7905 0           for my $v (values %start_SEC) {
7906 0 0 0       $k = $v->[2]($self, $u, $v), last if $v->[0] <= $u and $v->[0] + $v->[1] > $u;
7907             }
7908 0           [1, $k]
7909             }
7910            
7911             sub output_unit ($$$$$$$$) {
7912 0     0 0   my ($self, $face, $layers, $u, $deadkeys, $Used, $canskip, $baseK, $k) = (shift, shift, shift, shift, shift, shift, shift, shift);
7913 0           my $U = [map $self->{layers}{$_}[$u], @$layers];
7914 0 0         defined ($k = $baseK->[$u]) or return;
7915 0           $self->output_unit0($face, $k, $U, $u, $deadkeys, $Used, undef, $canskip);
7916             }
7917             }
7918            
7919             sub output_layout_win ($$$$$$$) {
7920 0     0 0   my ($self, $face, $layers, $deadkeys, $Used, $cnt, $baseK) = (shift, shift, shift, shift, shift, shift, shift);
7921             # die "Count of non-VK entries mismatched: $cnt vs ", scalar @{$self->{layers}{$layers->[0]}}
7922             # unless $cnt <= scalar @{$self->{layers}{$layers->[0]}};
7923 0           map $self->output_unit($face, $layers, $_, $deadkeys, $Used, $_ >= $cnt, $baseK), 0..$#$baseK;
7924             }
7925            
7926             sub output_VK_win ($$$) {
7927 0     0 0   my ($self, $face, $Used, @O) = (shift, shift, shift);
7928 0           my $VK = $self->get_VK($face);
7929 0           for my $k (keys %$VK) {
7930 0           my $v = $VK->{$k};
7931             # warn 'Key: <', join('> <', @$v), '>';
7932 0 0         my (@dead) = map +(/^(.+)\@$/ ? [$1, 1] : [$_]), @$v[1..$#$v];
7933 0           my (@k, @o, @oo, $x, $y) = map $_->[0], @dead;
7934 0           @dead = map $_->[1], @dead;
7935 0   0       push @o, [$x, $y] while @dead and ($x, $y) = splice @dead, 0, 2;
7936 0   0       push @oo, [$x, $y] while @k and ($x, $y) = splice @k, 0, 2;
7937 0           push @O, $self->output_unit0($face, $k, \@oo, undef, \@o, $Used, $v->[0]);
7938             }
7939             @O
7940 0           }
7941            
7942             sub read_deadkeys_win ($$) {
7943 0     0 0   my ($self, $t, $dead, $next, @p, %o) = (shift, shift, '', '');
7944            
7945 0           $t =~ s(\s*//.*)()g; # remove comments
7946 0           $t =~ s([^\S\n]+$)()gm; # remove trailing whitespace (including \r!)
7947             # deadkey lines, empty lines, HEX HEX keymap lines
7948 0 0         $t =~ s/(^(?=DEADKEY)(?:(?:(?:DEADKEY|\s*[0-9a-f]{4,})\s+[0-9a-f]{4,})?(?:\n|\Z))*)(?=(.*))/DEADKEYS\n\n/mi
7949             and ($dead, $next) = ($1, $2);
7950 0 0 0       warn "Unknown keyword follows deadkey descriptions in MSKLC map file: `$next'; dead=<$dead>"
7951             if length $next and not $next =~ /^(KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
7952             # $dead =~ /\S/ or warn "EMPTY DEADKEY section";
7953             #warn "got `$dead' from `$t'";
7954            
7955             # when a pattern has parens, split does not remove the leading empty fields (?!!!)
7956 0           (undef, my %d) = split /^DEADKEY\s+([0-9a-f]+)\s*\n/im, $dead;
7957 0           for my $d (keys %d) {
7958             #warn "split `$d' from `$d{$d}'";
7959 0           @p = split /\n+/, $d{$d};
7960 0           my @bad;
7961 0 0         die "unrecognized part in deadkey map for $d: `@bad'"
7962             if @bad = grep !/^\s*([0-9a-f]+)\s+([0-9a-f]+)$/i, @p;
7963 0           %{$o{lc $d}} = map /^\s*([0-9a-f]+)\s+([0-9a-f]+)/i, @p;
  0            
7964             }
7965            
7966             # empty lines, HEX "NAME" lines
7967 0 0         if ($t =~ s/^KEYNAME_DEAD\n((?:(?:\s*[0-9a-f]{4,}\s+".*")?(?:\n|\Z))*)(?=(.*))/KEYNAMES_DEAD\n\n/mi) {
    0          
7968 0           ($dead, $next) = ($1,$2);
7969 0 0 0       warn "Unknown keyword follows deadkey names descriptions in MSKLC map file: `$next'"
7970             if length $next and not $next =~ /^(DEADKEY|KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
7971 0 0         $dead =~ /\S/ or warn "EMPTY KEYNAME_DEAD section";
7972 0           %d = map /^([0-9a-f]+)\s+"(.*)"\s*$/i, split /\n\s*/, $dead;
7973 0           $d{lc $_} = $d{$_} for keys %d;
7974 0   0       $self->{'[seen_knames]'} ||= {};
7975 0           @{$self->{'[seen_knames]'}}{map {chr hex $_} keys %d} = values %d; # XXXX Overwrites older values
  0            
  0            
7976             } elsif ($dead =~ /\S/) {
7977 0           warn "no KEYNAME_DEAD section found" if 0;
7978             }
7979 0           \%o, \%d, $t; # %o - translation tables; %d - names; $t is what is left of input
7980             }
7981            
7982             sub massage_template ($$$) {
7983 0     0 0   my ($self, $t, $r, %seen, %miss) = (shift, shift, shift);
7984 0 0         my $keys = join '|', sort {length $b <=> length $a or $a cmp $b} keys %$r; # Prefer matching a longer key
  0            
7985 0           $t =~ s/($keys)/ # warn "Plugging in `$1'";
7986 0           $seen{$1}++, $r->{$1} /ge; # Can't use \b: see SORT_ORDER_ID_ LOCALE_ID
7987 0   0       $seen{$_} or $miss{$_}++ for keys %$r;
7988 0 0         warn "The following parts missing in the template: ", join ' ', sort keys %miss if %miss;
7989 0           $t
7990             }
7991            
7992             # http://msdn.microsoft.com/en-us/library/dd373763
7993             # http://msdn.microsoft.com/en-us/library/dd374060
7994             my $template_win = <<'EO_TEMPLATE';
7995             KBD DLLNAME "LAYOUTNAME"
7996            
7997             COPYRIGHT "(c) COPYR_YEARS COMPANYNAME"
7998            
7999             COMPANY "COMPANYNAME"
8000            
8001             LOCALENAME "LOCALE_NAME"
8002            
8003             LOCALEID "SORT_ORDER_ID_LOCALE_ID"
8004            
8005             VERSION 1.0
8006            
8007             SHIFTSTATE
8008            
8009             BITS_TEMPLATE
8010             ATTRIBS
8011             LAYOUT ;an extra '@' at the end is a dead key
8012            
8013             //SC VK_ Cap COL_HEADERS
8014             //-- ---- ---- COL_EXPL
8015             LAYOUT_KEYS
8016             DO_LIGA
8017             DEADKEYS
8018            
8019             KEYNAME
8020            
8021             01 Esc
8022             0e Backspace
8023             0f Tab
8024             1c Enter
8025             1d Ctrl
8026             2a Shift
8027             36 "Right Shift"
8028             37 "Num *"
8029             38 Alt
8030             39 Space
8031             3a "Caps Lock"
8032             3b F1
8033             3c F2
8034             3d F3
8035             3e F4
8036             3f F5
8037             40 F6
8038             41 F7
8039             42 F8
8040             43 F9
8041             44 F10
8042             45 Pause
8043             46 "Scroll Lock"
8044             47 "Num 7"
8045             48 "Num 8"
8046             49 "Num 9"
8047             4a "Num -"
8048             4b "Num 4"
8049             4c "Num 5"
8050             4d "Num 6"
8051             4e "Num +"
8052             4f "Num 1"
8053             50 "Num 2"
8054             51 "Num 3"
8055             52 "Num 0"
8056             53 "Num Del"
8057             54 "Sys Req"
8058             57 F11
8059             58 F12
8060             5C AX
8061             70 KANA
8062             73 "ABNT C1"
8063             79 CONVERT
8064             7c F13
8065             7d F14
8066             7e F15
8067             7f F16
8068             80 F17
8069             81 F18
8070             82 F19
8071             83 F20
8072             84 F21
8073             85 F22
8074             86 F23
8075             87 F24
8076            
8077             KEYNAME_EXT
8078            
8079             1c "Num Enter"
8080             1d "Right Ctrl"
8081             35 "Num /"
8082             37 "Prnt Scrn"
8083             38 "Right Alt"
8084             45 "Num Lock"
8085             46 Break
8086             47 Home
8087             48 Up
8088             49 "Page Up"
8089             4b Left
8090             4d Right
8091             4f End
8092             50 Down
8093             51 "Page Down"
8094             52 Insert
8095             53 Delete
8096             54 <00>
8097             56 Help
8098             5b "Left Windows"
8099             5c "Right Windows"
8100             5d Application
8101            
8102             KEYNAMES_DEAD
8103            
8104             DESCRIPTIONS
8105            
8106             LOCALE_ID LAYOUTNAME
8107            
8108             LANGUAGENAMES
8109            
8110             LOCALE_ID LANGUAGE_NAME
8111            
8112             ENDKBD
8113            
8114             EO_TEMPLATE
8115             # "
8116            
8117             my $template_osx = <<'EO_TEMPLATE';
8118            
8119            
8120            
8121            
8122            
8123            
8124            
8125            
8126            
8127            
8128            
8129            
8130            
8131            
8132            
8133            
8134            
8135            
8136            
8137            
8138            
8139            
8140            
8141            
8142            
8143            
8144            
8145            
8146            
8147            
8148            
8149            
8150            
8151            
8152            
8153            
8154            
8155            
8156            
8157            
8158            
8159            
8160            
8161            
8162            
8163            
8164            
8165            
8166            
8167            
8168             OSX_KEYMAP_0_AND_COMMAND
8169            
8170            
8171            
8172             OSX_KEYMAP_SHIFT
8173            
8174            
8175            
8176             OSX_KEYMAP_CAPS
8177            
8178            
8179            
8180             OSX_KEYMAP_OPTION
8181            
8182            
8183            
8184             OSX_KEYMAP_OPTION_SHIFT
8185            
8186            
8187            
8188             OSX_KEYMAP_OPTION_CAPS
8189            
8190            
8191            
8192             OSX_KEYMAP_OPTION_COMMAND
8193            
8194            
8195            
8196             OSX_KEYMAP_CTRL
8197            
8198            
8199            
8200             OSX_KEYMAP_COMMAND
8201            
8202            
8203            
8204            
8205             OSX_ACTIONS_BASE
8206            
8207             OSX_ACTIONS
8208            
8209            
8210            
8211             OSX_TERMINATORS_BASE
8212            
8215             OSX_TERMINATORS2
8216            
8217            
8218             EO_TEMPLATE
8219             # "
8220            
8221             sub KEY2hex ($$) {
8222 0     0 0   my ($self, $k) = (shift, shift);
8223 0 0         return $self->key2hex($k) unless 'ARRAY' eq ref $k;
8224             #warn "see a deadkey `@$k'";
8225 0           $k = [@$k]; # deeper copy
8226 0           $k->[0] = $self->key2hex($k->[0]);
8227 0           $k;
8228             }
8229            
8230             sub linked_faces_2_hex_map ($$$$) {
8231 0     0 0   my ($self, $name, $b, $inv) = (shift, shift, shift, shift);
8232 0           my $L = $self->{faces}{$name};
8233 0 0         my $remap = $L->{$inv ? 'Face_link_map_INV' : 'Face_link_map'}{$b};
8234 0           die "Face `$b' not linked to face `$name'; HAVE: <", join('> <', keys %{$L->{Face_link_map}}), '>'
8235 0 0 0       if $self->{faces}{$b} != $L and not $remap;
8236 0 0         my $cover = $L->{'[coverage_hex]'} or die "Face $name not preprocessed";
8237             # warn "Keys of the Map `$name' -> '$b': <", join('> <', keys %$remap), '>';
8238             # $remap ||= {map +(chr hex $_, chr hex $remap->{$_}), keys %$cover}; # This one in terms of chars, not hex
8239 0           my @k = keys %$remap;
8240             # warn "Map `$name' -> '$b': <", join('> <', map +($self->key2hex($_), $self->key2hex($remap->{$_})), @k), '>';
8241 0 0         return { map +($self->key2hex($_), (defined $remap->{$_} ? $self->KEY2hex($remap->{$_}) : undef)), @k }
8242             }
8243            
8244             my $dead_descr;
8245             #my %control = split / /, "\n \\n \r \\r \t \\t \b \\b \cC \\x03 \x7f \\x7f \x1b \\x1b \x1c \\x1c \x1d \\x1d";
8246             my %control = split / /, "\n \\n \r \\r \t \\t \b \\b";
8247             $control{$_->[0]} ||= $_->[1] for map [chr($_), '^'.chr(0x40+$_)], 1..26;
8248             sub control2prt ($$) {
8249 0     0 0   my($self, $c) = (shift, shift);
8250 0 0 0       return $c unless ord $c < 0x20 or ord $c == 0x7f;
8251 0 0         $control{$c} or sprintf '\\x%02x', ord $c;
8252             }
8253            
8254             sub dead_with_inversion ($$$$$) {
8255 0     0 0   my($self, $is_hex, $to, $nameF, $H) = (shift, shift, shift, shift, shift);
8256 0   0       my $invert_dead = (3 == ($to->[2] || 0) or 3 == (($to->[2] || 0) >> 3));
8257 0           $to = $to->[0];
8258 0 0         if ($invert_dead) {
8259 0 0         $to = $self->key2hex($to) unless $is_hex;
8260 0 0         defined ($to = $H->{'[deadkeyInvAltGrKey]'}{$to}) or die "Cannot invert prefix key `$to' in face `$nameF'";
8261             # warn "invert $to in face=$nameF, inv=$invertAlt0 --> $inv\n";
8262 0 0         $to = $self->key2hex($to) if $is_hex;
8263             }
8264 0           $to;
8265             }
8266            
8267             sub output_deadkeys ($$$$$$;$) {
8268 0     0 0   my ($self, $nameF, $d, $Dead2, $flip_AltGr_hex, $prefix_flippedMap_hex, $OUT_Apple) = (shift, shift, shift, shift, shift, shift, shift);
8269 0           my $H = $self->{faces}{$nameF};
8270             # warn "emit `$nameF' d=`$d' f=$H->{'[deadkeyFace]'}{$d}";
8271             # if (my $unres = $H->{'[unresolved_imported]'}) {
8272             # warn "Can't resolve `@$unres' to an imported dead key; face=`$nameF'" unless $H->{'[unresolved_imported_warned]'}++;
8273             # }
8274             #warn "See dead2 in <$nameF> for <$d>" if $dead2;
8275 0   0       my $dead2 = ($Dead2 || {})->{$self->charhex2key($d)} || {};
8276 0 0         my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($nameF))->{SPACE} || [] };
  0            
  0            
  0            
8277 0           @sp = map $self->charhex2key($_), @sp;
8278 0           @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
8279            
8280 0           my @maps = map $H->{"[deadkeyFaceHexMap$_]"}{$d}, '', 'Inv';
8281 0 0         pop @maps unless defined $maps[-1];
8282 0           my($D, @DD) = ($d, $d, $prefix_flippedMap_hex);
8283 0           my ($OUT, $keys) = '';
8284             # There are 3 situations:
8285             # 0) process one map without AltGr-inversion; 1) Process one map which is the AltGr-inversion of the principal one;
8286             # 2) process one map with AltGr-inversion (in 1-2 the inversion may have a customization put over it).
8287             # The problem is to recognize when deadkeys in the inversion come from non-inverted one, or from customization
8288             # And, in case (1), we must consider flip_AltGr specially... (the case (2) is now treated during face preparation)
8289 0   0       my($is_invAltGr_Base_with_chain, $AMap, $default) = ($D eq ($flip_AltGr_hex || 'n/a') and $H->{'[have_AltGr_chain]'});
8290 0           $default = $self->default_char($nameF);
8291 0 0         $default = $self->key2hex($default) if defined $default;
8292 0 0 0       if ($#maps or $is_invAltGr_Base_with_chain) { # One of the maps we will process is AltGr-inverted; calculate AltGr-inversion
8293 0           $self->faces_link_via_backlinks($nameF, $nameF, 'no_ini'); # Create AltGr-invert self-mapping
8294 0           $AMap = $self->linked_faces_2_hex_map($nameF, $nameF, 1);
8295             #warn "deadkey=$D flip=$flip_AltGr_hex" if defined $default;;
8296             }
8297 0           my($docs, $map_AltGr_over, $over_dead2) = ($H->{'[prefixDocs]'}{$D}, {}, {});
8298 0 0         if ($is_invAltGr_Base_with_chain) {
8299 0 0         if (my $override_InvAltGr = $H->{'[InvAltGrFace]'}{''}) { # NOW: needed only for invAltGr
8300 0           $map_AltGr_over = $self->linked_faces_2_hex_map($nameF, $override_InvAltGr);
8301             }
8302 0 0 0       $over_dead2 = $Dead2->{$self->charhex2key($flip_AltGr_hex)} || {} if defined $flip_AltGr_hex; # used in CyrPhonetic v0.04
8303 0           $dead2 = { %{ $H->{'[DEAD]'} }, %{ $H->{'[dead_in_VK]'} } };
  0            
  0            
8304             # $docs ||= 'AltGr-inverted base face';
8305             }
8306            
8307             # warn "output map for `$D' invert=", !!$is_invAltGr_Base_with_chain, ' <',join('> <', sort keys %$dead2),'>';
8308 0           for my $invertAlt0 (0..$#maps) {
8309 0   0       my $invertAlt = $invertAlt0 || $is_invAltGr_Base_with_chain;
8310 0           my $map = $maps[$invertAlt0];
8311 0           $d = $DD[$invertAlt0];
8312 0 0         my $docs1 = (defined $docs ? sprintf("\t// %s%s", ($invertAlt0 ? 'AltGr inverted: ' : ''), $docs) : '');
    0          
8313 0           $OUT .= "DEADKEY\t$d$docs1\n\n";
8314 0           my $OUT_Apple_map = $d;
8315             # Good order: first alphanum, then punctuation, then space
8316 0           my @keys = sort keys %$map; # Sorting not OK for 6-byte keys - but can't have them on Win
8317             @keys = (grep(( lc(chr hex $_) ne uc(chr hex $_)and not $sp{chr hex $_} ), @keys),
8318             grep(((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) !~ /\p{Blank}/) and not $sp{chr hex $_}), @keys),
8319 0   0       grep((((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) =~ /\p{Blank}/) or $sp{chr hex $_}) and $_ ne '0020'), @keys),
      0        
      0        
8320             grep( $_ eq '0020', @keys)); # make SPACE last
8321 0           for my $n (@keys) { # Not OK for 6-byte keys (impossible on Win)
8322             # warn "doing $n\n";
8323 0           my ($to, $import_dead, $EXPL) = $map->{$n};
8324 0 0 0       if ($to and 'ARRAY' eq ref $to) {
8325 0           $EXPL = $to->[3];
8326 0 0         $EXPL =~ s/(?=\p{NonspacingMark})/ /g if $EXPL;
8327 0   0       $import_dead = (1 <= ($to->[2] || 0)); # was: exportable; now: any dead
8328 0           $to = $self->dead_with_inversion('hex', $to, $nameF, $H);
8329             }
8330 0 0 0       warn "0000: face `$nameF' d=`$d': $n --> $to" if $to and $to eq '0000';
8331 0           my $map_n = $map->{$n};
8332 0 0 0       $map_n = $map_n->[0] if $map_n and ref $map_n;
8333 0 0 0       $H->{'[32-bit]'}{chr hex $map_n}++, next if hex $n > 0xFFFF and $map_n; # Cannot be put in a map...
8334 0 0 0       if ($to and hex $to > 0xFFFF) { # Value cannot be put in a map...
8335             # warn "32-bit: n=$n map{n}=$map_n to=$to";
8336 0           $H->{'[32-bit]'}{chr hex $map_n}++;
8337 0 0         next unless defined ($to = $H->{'[DeadChar_32bitTranslation]'});
8338 0           $to =~ s/^\s+//; $to =~ s/\s+$//;
  0            
8339 0           $to = $self->key2hex($to);
8340             }
8341 0           my $was_to = $to;
8342 0 0 0       $to ||= $default or next;
8343             # Tricky: dead keys may come from the override map (which is indexed by NOT-INVERTED KEYS!); it is already merged into
8344             # the map - unless for inverted base face
8345             my ($alt_n, $use_dead2) = (($is_invAltGr_Base_with_chain and defined $map_AltGr_over->{$n})
8346             ? ($n, $over_dead2)
8347 0 0 0       : (($invertAlt ? $AMap->{$n} : $n), $dead2));
    0          
8348 0 0 0       $alt_n = $alt_n->[0] if $alt_n and ref $alt_n; # AMap may have "complex" values
8349             #warn "$D --> $d, `$n', `$alt_n', `$AMap->{$n}'; `$map_AltGr_over->{$n}' i=$invertAlt i0=$invertAlt0 d=$use_dead2->{chr hex $alt_n}";
8350             #warn "... n=`$n', alt=`$alt_n' Amap=`$AMap->{$n}'\n" if $AMap->{$n};
8351 0 0 0       my $DEAD = ( (defined $alt_n and $use_dead2->{chr hex $alt_n}) ? '@' : '' );
8352             #warn "AltGr flip: $nameF:$D: $n --> $H->{'[dead2_AltGr_chain]'}{$D}" if $n eq ($flip_AltGr_hex || 'n/a');
8353 0           my $from = $self->control2prt(chr hex $n);
8354             # This is now done inside the map:
8355 0           if (0 and (hex $n) == hex ($flip_AltGr_hex || 'ffffff') and @maps == 2 and !$invertAlt) {
8356             if (defined $was_to or $DEAD) {
8357             warn "AltGr_Flip key=", hex $n, " overwrites '$was_to', DEAD=", $DEAD||$import_dead||0, " on face=$nameF\[$d]";
8358             }
8359             ($DEAD, $to) = ('@', $DD[1]); # Join Inv to not-Inv on $flip_AltGr_hex; Do not overwrite existing binding... Warn???
8360             }
8361 0 0 0       $to = $default
      0        
      0        
      0        
      0        
      0        
8362             if !($DEAD or $import_dead)
8363             and defined $default and (0x7f == hex $to or 0x20 > hex $to) and (0x7f == hex $n or 0x20 > hex $n);
8364 0 0 0       if (($DEAD or $import_dead) and $d eq $to) {
      0        
8365 0 0 0       if (($flip_AltGr_hex or 'n/a') eq $d) { # This is what routinely happens in Flip_AltGr face
8366 0           $import_dead = $DEAD = '';
8367 0   0       $to = $H->{'[DeadChar_32bitTranslation]'} || '003f'; # ? = U+003f
8368 0           $to =~ s/^\s+//; $to =~ s/\s+$//;
  0            
8369 0           $to = $self->key2hex($to);
8370 0           $EXPL = 'removal of immediate deadkey loop';
8371             } else {
8372 0           warn "Immediate deadkey loop: face `$nameF' d=`$d': $n --> $to";
8373             }
8374             }
8375 0 0         my $expl = exists $self->{UNames} ? "\t// " . join "\t-> ", # map $self->UName($_),
8376             # chr hex $n, chr hex $map->{$n} : '';
8377             $self->UName(chr hex $n), $self->UName(chr hex $to, 'verbose', 'vbell') : '';
8378 0 0 0       $expl .= " (via $EXPL)" if $expl and $EXPL;
8379 0           my $to1 = $self->control2prt(chr hex $to);
8380             # warn "Both import_dead and DEAD properties hold for `$from' --> '$to1' via deadkey $d face=$nameF" if $DEAD and $import_dead;
8381 0 0         $DEAD = '@' if $import_dead;
8382 0           $OUT .= sprintf "%s\t%s%s\t// %s -> %s%s\n", $n, $to, $DEAD, $from, $to1, $expl;
8383 0 0 0       $OUT_Apple->{$n}{$OUT_Apple_map} = [$to, undef, $DEAD && 1] if $OUT_Apple and 0x20 <= hex $n and 0x7f != hex $n;
      0        
      0        
8384             }
8385 0           $OUT .= "\n";
8386 0   0       $keys ||= @keys;
8387             }
8388 0 0         warn "DEADKEY $d for face `$nameF' empty" unless $keys;
8389 0           (!!$keys, $OUT, $OUT_Apple)
8390             }
8391            
8392             sub massage_diacritics ($) { # "
8393 0     0 0   my ($self) = (shift);
8394 0           my %char2dia;
8395 0           for my $dia (sort keys %{$self->{Diacritics}}) { # Make order deterministic
  0            
8396 0           my @v = map { s/\p{Blank}//g; $_ } @{ $self->{Diacritics}{$dia} };
  0            
  0            
  0            
8397             # $self->{'[map2diac]'}{$_} = $dia for split //, join '', @v; # XXXX No check for duplicates???
8398 0           for my $cc ( [ split //, join '', @v[0..3] ], [ split //, join '', @v[4..$#v] ] ) { # modifiers, combining
8399 0           $char2dia{$cc->[$_]}{$_} = $dia for 0..$#$cc; # XXXX No check for duplicates???
8400             }
8401 0           my @vv = map [ split // ], @v;
8402 0           $self->{'[diacritics]'}{$dia} = \@vv;
8403             }
8404 0           for my $c (keys %char2dia) {
8405 0           my @pos = sort {$a <=> $b} keys %{ $char2dia{$c} };
  0            
  0            
8406             # warn("map2diac( $c ): @pos; ", join '; ', values %{ $char2dia{$c} });
8407 0           $self->{'[map2diac]'}{$c} = $char2dia{$c}{$pos[0]}; # prefer the earliest possible occurence
8408             }
8409             }
8410            
8411             sub extract_diacritic ($$$$$$@) {
8412 0     0 0   my ($self, $dia, $idx, $which, $need, $skip2, @elt0) = (shift, shift, shift, shift, shift, shift);
8413 0           my @v = map @$_, my $elt0 = shift; # first one full
8414 0 0         push @v, map @$_[($skip2 ? 2 : 0)..$#$_], @_; # join the rest, omitting the first 2 (assumed: accessible in other ways)
8415 0 0 0       @elt0 = $elt0 if $skip2 and $skip2 eq 'skip2-include0';
8416 0 0         push @v, grep defined, map @$_[0..1], @elt0, @_ if $skip2;
8417             # @v = grep +((ord $_) >= 128 and $_ ne $dia), @v;
8418 0           @v = grep +(ord $_) >= 0x80, @v;
8419 0 0 0       die "diacritic ` $dia ' has no $which no.$idx (0-based) assigned"
8420             unless $idx >= $need or defined $v[$idx];
8421             # warn "Translating for dia=<$dia>: idx=$idx <$which> -> <$v[$idx]> of <@v>" if defined $v[$idx];
8422 0           return $v[$idx];
8423             }
8424            
8425             sub diacritic2self ($$$$$$$$$) {
8426 0     0 0   my ($self, $dia, $c, $face, $N, $space, $c_base, $c_noalt, $seen_before) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
8427             # warn("Translating for dia=<$dia>: got undef"),
8428 0 0         return $c unless defined $c;
8429             # $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix keys behave as usual keys
8430             # return undef if
8431 0   0       my $prefix = (ref $c and $c->[2]); # Ignore deadkeys (unless we act on $c_base or $c_noalt - UNIMPLEMENTED);
8432 0   0       $_ and 'ARRAY' eq ref $_ and $_ = $_->[0] for $c, $c_base, $c_noalt; # Prefix keys behave as usual keys
      0        
8433             #warn " Translating for dia=<$dia>: got <$c>";
8434 0 0         die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8435 0 0         my $v = $self->{'[diacritics]'}{$name} or die "Panic!";
8436 0 0         my ($first) = grep 0x80 <= ord, @{$v->[0]} or die "diacritic ` $dia ' does not define any non-7bit modifier";
  0            
8437 0 0         return $first if $c eq ' ';
8438 0           my $spaces = keys %$space;
8439 0           my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
8440 0 0         $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
8441 0 0         $flip_AltGr = 'n/a' unless defined $flip_AltGr;
8442 0   0       my $is_flip_AltGr = (defined $flip_AltGr and $prefix and $c eq $flip_AltGr);
8443 0 0 0       if ($c eq $dia and $prefix) {
8444             #warn "Translating2combining dia=<$dia>: got <$c> --> <$v->[4][0]>";
8445             # This happens with caron which reaches breve as the first:
8446             # warn "The diacritic ` $dia ' differs from the first non-7bit entry ` $first ' in its list" unless $dia eq $first;
8447 0 0         die "diacritic ` $dia ' has no default combining char assigned" unless defined $v->[4][0];
8448 0           return $v->[4][0];
8449             }
8450 0   0       my $limits = $self->{Diacritics_Limits}{ALL} || [(0) x 7];
8451 0 0 0       if ($space->{$c}) { # SPACE is handled above (we assume it is on index 0)...
    0 0        
    0 0        
8452             # ~ and ^ have only 3 spacing variants; one of them must be on ' ' - and we omit the first 2 of non-principal block...
8453 0           return $self->extract_diacritic($dia, $space->{$c}, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8454             } elsif (0 <= (my $off = index "\r\t\n\x1b\x1d\x1c\b\x7f\x1e\x1f\x00", $c)
8455             and not $prefix) { # Enter, Tab, C-Enter, C-[, C-], C-\, Bspc, C-Bspc, C-^, C-_, C-@
8456             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8457 0           return $self->extract_diacritic($dia, $spaces + $off, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8458             } elsif (!$spaces and $c =~ /^\p{Blank}$/ and not $prefix) { # NBSP and, (eg) Thin space 2007 -> second/third modifier
8459             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8460 0           my @pre = grep /^\p{Blank}$/, keys %$seen_before; # no prefix keys in $seen_before
8461 0 0         push @pre, 'something' unless $seen_before->{' '}; # there is no sense to address slot number 0
8462 0           return $self->extract_diacritic($dia, scalar @pre, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8463             }
8464 0 0 0       if ($c eq "|" or $c eq "\\" and not $prefix) {
      0        
8465             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8466 0           return $self->extract_diacritic($dia, ($c eq "|"), 'vertical+etc spacing variant', $limits->[2], !'skip2', @$v[2..3]);
8467             }
8468 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "|" or $c_noalt eq "\\")) {
      0        
      0        
8469             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8470 0           return $self->extract_diacritic($dia, ($c_noalt eq "|"), 'vertical+dotlike combining', $limits->[6], 'skip2', @$v[6,7,4,5]);
8471             }
8472 0 0 0       if ($c eq "/" or $c eq "?" and not $prefix) {
      0        
8473 0           return $self->extract_diacritic($dia, ($c eq "?"), 'prime-like+etc spacing variant', $limits->[3], 'skip2', @$v[3]);
8474             }
8475 0 0 0       if ($c_noalt and ($c_noalt eq "'" or $c_noalt eq '"')) {
      0        
8476 0           return $self->extract_diacritic($dia, 1 + ($c_noalt eq '"') + 2*$N, 'combining', $limits->[4], 'skip2', @$v[4..7]); # 1 for double-prefix
8477             }
8478 0 0 0       if ($c eq "_" or $c eq "-" and not $prefix) {
      0        
8479 0           return $self->extract_diacritic($dia, ($c eq "_"), 'lowered+etc spacing variant', $limits->[1], 'skip2', @$v[1..3]);
8480             }
8481 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "_" or $c_noalt eq "-")) {
      0        
      0        
8482 0           return $self->extract_diacritic($dia, ($c_noalt eq "_"), 'lowered combining', $limits->[5], 'skip2', @$v[5..7,4]);
8483             }
8484 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq ";" or $c_noalt eq ":")) {
      0        
      0        
8485 0           return $self->extract_diacritic($dia, ($c_noalt eq ":"), 'combining for symbols', $limits->[7], 'skip2', @$v[7,4..6]);
8486             }
8487 0 0 0       if ($N == 1 and defined $c_base and 0 <= (my $ind = index "`1234567890=[],.'", $c_base)) {
      0        
8488 0           return $self->extract_diacritic($dia, 2 + $ind, 'combining', $limits->[4], 'skip2-include0', @$v[4..7]); # -1 for `, 1+2 for double-prefix and AltGr-/?
8489             }
8490 0 0 0       if ($N == 0 and 0 <= (my $ind = index "[{]}", $c) and not $prefix) {
      0        
8491 0           return $self->extract_diacritic($dia, 2 + $ind, 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8492             }
8493 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "/" or $c_noalt eq "?")) {
      0        
      0        
8494 0           return $self->extract_diacritic($dia, 6 + ($c_noalt eq "?"), 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8495             }
8496 0           return undef;
8497             }
8498            
8499             sub diacritic2self_2 ($$$$$$) { # Takes a key: array of arrays [lc,uc]
8500 0     0 0   my ($self, $dia, $c, $face, $space, @out, %seen) = (shift, shift, shift, shift, shift);
8501 0           my $c0 = $c->[0][0]; # Base character
8502 0           for my $N (0..$#$c) {
8503 0           my($c1, @res) = $c->[$N];
8504 0           for my $shift (0..$#$c1) {
8505 0           my($c2, $pref) = $c1->[$shift];
8506 0           push @res, $self->diacritic2self($dia, $c2, $face, $N, $space, $c0, $c->[0][$shift], \%seen);
8507 0 0         $pref = $c2->[2], $c2 = $c2->[0] if ref $c2;
8508 0 0 0       $seen{$c2}++ if defined $c2 and not $pref;
8509             }
8510 0           push @out, \@res;
8511             }
8512             @out
8513 0           }
8514            
8515             # Combining stuff:
8516             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ or next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8517             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ and next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8518            
8519             sub cache_dialist ($@) { # downstream, it is crucial that a case pair comes from "one conversion"
8520 0     0 0   my ($self, %seen, %caseseen, @out) = (shift);
8521 0           warn("caching dia: [@_]") if warnCACHECOMP;
8522 0           for my $d (@_) {
8523 0 0         next unless my $h = $self->{Compositions}{$d};
8524 0           $seen{$_}++ for keys %$h;
8525             }
8526 0           for my $c (keys %seen) {
8527 0 0         next if $caseseen{$c};
8528             # uc may include a wrong guy: uc(ſ) is S, and this may break the pair s/S if ſ comes before s, and S gets a separate binding;
8529             # so be very conservative with which case pair we include...
8530 0 0 0       my @case = grep { $_ ne $c and $seen{$_} and lc $_ eq lc $c } lc $c, uc $c or next;
  0 0          
8531 0           push @case, $c;
8532 0           $caseseen{$_} = \@case, delete $seen{$_} for @case;
8533             } # Currently (?), downstream does not distinguish case pairs from Shift-pairs...
8534 0           for my $cases ( values %caseseen, map [$_], keys %seen ) { # To avoid pairing symbols, keep them in separate slots too
8535 0           my (@dia, $to);
8536 0           for my $dia (@_) {
8537 0 0         push @dia, $dia if grep $self->{Compositions}{$dia}{$_}, @$cases;
8538             }
8539 0           for my $diaN (0..$#dia) {
8540             $to = $self->{Compositions}{$dia[$diaN]}{$_} and
8541             (warnCACHECOMP and warn("cache dia; c=`$_' of `@$cases'; dia=[$dia[$diaN]]")),
8542 0   0       $out[$diaN]{$_} = $to for @$cases;
8543             }
8544             }
8545             #warn("caching dia --> ", scalar @out);
8546             @out
8547 0           }
8548            
8549             my %cached_aggregate_Compositions;
8550             sub dia2list ($$) {
8551 0     0 0   my ($self, $dia, @dia) = (shift, shift);
8552             #warn "Split dia `$dia'";
8553 0 0         if ((my ($pre, $mid, $post) = split /(\+|--)/, $dia, 2) > 1) { # $mid is not counted in that "2"
8554 0           for my $p ($self->dia2list($pre)) {
8555 0           push @dia, map "$p$mid$_", $self->dia2list($post);
8556             }
8557             # warn "Split dia to `@dia'";
8558 0           return @dia;
8559             }
8560 0 0         return $dia if $dia =~ /^!?\\/; # (De)Penalization lists
8561 0           $dia = $self->charhex2key($dia);
8562 0 0         unless ($dia =~ /^-?(\p{NonspacingMark}|<(?:font=)?[-\w!]+>|(maybe_)?[ul]c(first)?|dectrl)$/) {
8563 0 0         die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8564 0 0         my $v = $self->{'[diacritics]'}{$name} or die "A spacing character <$dia> was requested to be treated as a composition one, but we do not know translation";
8565 0 0         die "Panic!" unless defined ($dia = $v->[4][0]);
8566             }
8567 0 0         if ($dia =~ /^(-)?<(reverse-)?any(1)?-(other-)?\b([-\w]+?)\b((?:-![-\w]+\b)*)>$/) {
8568 0   0       my($neg, $rev, $one, $other, $match, $rx, $except, @except)
8569             = ($1||'', $2, $3, $4, $5, "(?:(?
8570 0           my $cached;
8571 0           (my $dia_raw = $dia) =~ s/^-//;
8572 0 0         $cached = $cached_aggregate_Compositions{$dia_raw} and return map "$neg$_", @$cached;
8573            
8574 0           @except = map { s/^(?=\w)/\\b/; s/(?<=\w)$/\\b/; $_} @except;
  0            
  0            
  0            
8575 0 0         $except = join('|', @except[1..$#except]), $except = qr($except) if @except;
8576             #warn "Exceptions: $except" if @except;
8577 0           $rx =~ s/-/\\b\\W+\\b/g;
8578 0           my ($A, $B, $AA, $BB);
8579 0           my @out = keys %{$self->{Compositions}};
  0            
8580 0           @out = grep !/^Cached\d+=
8581 0 0 0       @out = grep {length > 1 ? /$rx/ : (lc $self->UName($_) || '') =~ /$rx/ } @out;
  0            
8582 0 0 0       @out = grep {length > 1 ? !/$except/ : (lc $self->UName($_) || '') !~ /$except/ } @out;
  0            
8583             # make before ; penalize those with and/over inside
8584 0           @out = sort {($A=$a) =~ s/>/\cA/g, ($B=$b) =~ s/>/\cA/g; ($AA=$a) =~ s/\w+\W*/a/g, ($BB=$b) =~ s/\w+\W*/a/g; # Number of words
  0            
  0            
8585 0 0 0       /.\b(and|over)\b./ and s/^/~/ for $A,$B; $AA cmp $BB or $A cmp $B or $a cmp $b} @out;
  0   0        
8586 0 0         @out = grep length($match) != length, @out if $other;
8587 0 0         @out = grep !/\bAND\s/, @out if $one;
8588 0 0         @out = reverse @out if $rev; # xor $reverse;
8589 0 0 0       if (!dontCOMPOSE_CACHE and @out > 1 and not $neg) { # Optional caching; will modify composition tables
8590 0           my @cached = $self->cache_dialist(@out); # but not decomposition ones, hence `not $neg'
8591 0           @out = map "Cached$_=$dia_raw", 0..$#cached;
8592 0           $self->{Compositions}{$out[$_]} = $cached[$_] for 0..$#cached;
8593 0           $cached_aggregate_Compositions{$dia} = \@out;
8594             }
8595 0 0         @out = map "-$_", @out if $neg;
8596 0           return @out;
8597             } else { # etc
8598             #warn "Dia=`$dia'";
8599 0           return $dia;
8600             }
8601             }
8602            
8603             sub flatten_arrays ($$) {
8604 0     0 0   my ($self, $a) = (shift, shift);
8605 0 0         warn "method flatten_arrays() takes one argument" if @_;
8606 0 0 0       return $a unless ref($a || '') eq 'ARRAY';
8607 0           map $self->flatten_arrays($_), @$a;
8608             }
8609            
8610             sub array2string ($$) {
8611 0     0 0   my ($self, $a) = (shift, shift);
8612 0 0         warn "method array2string() takes one argument" if @_;
8613 0 0         return '(undef)' unless defined $a;
8614 0 0 0       return "<$a>" unless ref($a || '') eq 'ARRAY';
8615 0           '[ ' . join(', ', map $self->array2string($_), @$a) . ' ]';
8616             }
8617            
8618             sub dialist2lists ($$) {
8619 0     0 0   my ($self, $Dia, @groups) = (shift, shift);
8620 0           for my $group (split /\|/, $Dia, -1) {
8621 0           my @dia;
8622 0           for my $dia (split /,/, $group) {
8623 0           push @dia, $self->dia2list($dia);
8624             }
8625 0           push @groups, \@dia; # Do not omit empty groups
8626             } # Now get all the chars, and precompile results for them
8627             @groups
8628 0           }
8629            
8630             sub document_char ($$$;$) {
8631 0     0 0   my ($self, $c, $doc, $old) = (shift, shift, shift, shift);
8632 0 0 0       return $c if not defined $c or not defined $doc;
8633 0 0 0       $doc = "$old->[3] ⇒ $doc" if $old and ref $old and defined $old->[3];
      0        
8634 0 0         $c = [$c] unless ref $c;
8635 0 0         $c->[3] = $doc if defined $doc;
8636 0           $c
8637             }
8638            
8639             sub document_chars_on_key ($$$;$) { # Usable with all_layers
8640 0     0 0   my ($self, $c, $doc, $old, @o) = (shift, shift, shift, shift);
8641 0           for my $layer (@$c) {
8642 0           push @o, [ map {$self->document_char($_, $doc, $old)} @$layer ];
  0            
8643             }
8644             @o
8645 0           }
8646            
8647             #use Dumpvalue;
8648             my %translators = ( Id => sub ($) {shift}, Empty => sub ($) { return undef },
8649             dectrl => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8650             return undef if 0x20 <= ord $c; chr(0x40 + ord $c)},
8651             maybe_ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; ucfirst $c},
8652             maybe_lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; lc $c},
8653             maybe_uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; uc $c},
8654             ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8655             my $c1 = ucfirst $c; return undef if $c1 eq $c; $c1},
8656             lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8657             my $c1 = lc $c; return undef if $c1 eq $c; $c1},
8658             uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8659             my $c1 = uc $c; return undef if $c1 eq $c; $c1} );
8660             sub make_translator ($$$$$) { # translator may take some values from "environment"
8661             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
8662             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
8663 0   0 0 0   my ($self, $name, $deadkey, $face, $N, $used_deadkey) = (shift, shift, shift || 0, shift, shift, ''); # $deadkey used eg for diagnostics
8664 0 0         die "Undefined recipe in a translator for face `$face', layer $N on deadkey `$deadkey'" unless defined $name;
8665 0 0         if ($name =~ /^Imported\[([\/\w]+)(?:,([\da-fA-F]{4,}))?\]$/) {
8666 0 0         my($d, @sec) = (($2 ? "$2" : undef), split m(/), "$1");
8667 0 0         $d = $deadkey, $used_deadkey ="/$deadkey" unless defined $d;
8668 0 0         my $fromKBDD = $self->get_deep($self, 'DEADKEYS', @sec, lc $d, 'map') # DEADKEYS/bepo with 00A4 ---> DEADKEYS/bepo/00a4
8669             or die "DEADKEYS section for `$d' with parts `@sec' not found";
8670             # indexed by lc hex
8671 0 0   0     return sub { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; defined($c = $fromKBDD->{$self->key2hex($c)}) or return $c; $self->document_char(chr hex $c, $name, $cc) }, '';
  0 0          
  0 0          
  0            
  0            
  0            
8672             }
8673 0 0         die "unrecognized Imported argument: `$1'" if $name =~ /^Imported(\[.*)/s;
8674 0 0         return $translators{$name}, '' if $translators{$name};
8675 0 0         if ($name =~ /^PrefixDocs\[(.+)\]$/) {
8676 0           $self->{faces}{$face}{'[prefixDocs]'}{$deadkey} = $1;
8677 0           return $translators{Empty}, '';
8678             }
8679 0 0         if ($name =~ /^Show\[(.+)\]$/) {
8680 0           $self->{faces}{$face}{'[Show]'}{$deadkey} = $self->stringHEX2string($1);
8681 0           return $translators{Empty}, '';
8682             }
8683 0 0         if ($name =~ /^HTML_classes\[(.+)\]$/) {
8684 0 0         (my @c = split /,/, "$1") % 3 and die "HTML_classes[] for key `$deadkey' not come in triples";
8685 0   0       my $C = ( $self->{faces}{$face}{'[HTML_classes]'}{$deadkey || ''} ||= {} ); # Above, deadkey is ||= 0
      0        
8686             # warn "I create HTML_classes for face=$face, prefix=`$deadkey'";
8687 0           while (@c) {
8688 0           my ($where, $class, $chars) = splice @c, 0, 3;
8689 0           ( $chars = $self->stringHEX2string($chars) ) =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
8690 0           push @{ $C->{$where}{$_} }, $class for split //, $chars;
  0            
8691             }
8692 0           return $translators{Empty}, '';
8693             }
8694 0 0         if ($name =~ /^Space(Self)?2Id(?:\[(.+)\])?$/) {
8695 0 0         my $dia = $self->charhex2key((defined $2) ? $2 : do {$used_deadkey = "/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0            
  0            
8696 0 0         my $self_OK = $1 ? $dia : 'n/a';
8697 0 0 0 0     return sub ($) { my $c = (shift() || '[none]'); $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix key as usual letter
  0            
8698 0 0 0       ($c eq ' ' or $c eq $self_OK and defined $dia) ? $self->document_char($dia, $name) : undef }, $used_deadkey;
  0            
8699             }
8700 0 0         if ($name =~ /^ShiftFromTo\[(.+)\]$/) {
8701 0           my ($f,$t) = split /,/, "$1";
8702 0           $_ = hex $self->key2hex($self->charhex2key($_)) for $f, $t;
8703 0           $t -= $f; # Treat prefix keys as usual keys:
8704 0 0   0     return sub ($) { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char(chr($t + ord $c), $name, $cc) }, '';
  0 0          
  0            
  0            
  0            
8705             }
8706 0 0         if ($name =~ /^SelectRX\[(.+)\]$/) {
8707 0           my ($rx) = qr/$1/; # Treat prefix keys as usual keys:
8708 0 0   0     return sub ($) { my $cc = my $c=shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; return undef unless $c =~ $rx; $cc }, '';
  0 0          
  0 0          
  0            
  0            
  0            
8709             }
8710 0 0         if ($name =~ /^FlipShift$/) {
8711 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; map [@$_[1,0]], @$c }, '', 'all_layers';
  0            
  0            
  0            
8712             }
8713 0 0         if ($name =~ /^AssignTo\[(\w+),(\d+)\]$/) {
8714 0           my ($sec, $cnt) = ($1, $2);
8715 0 0         $cnt = 0, warn "Unrecognized section `$sec' in AssignTo" unless my $S = $start_SEC{$sec};
8716 0 0         warn("Too many keys ($cnt) put into section `$sec', max=$S->[1]"), $cnt = $S->[1] if $cnt > $S->[1];
8717 0 0   0     my $toTarget = sub { my $slot = shift; return unless $slot < $cnt; $slot + $S->[0] };
  0            
  0            
  0            
8718 0     0     return sub ($) { @{shift()} }, '', ['all_layers', $toTarget];
  0            
  0            
8719             }
8720 0 0         if ($name =~ /^FromTo(FlipShift)?\[(.+)\]$/) {
8721 0           my $flip = $1;
8722 0           my ($f,$t) = split /,/, "$2", 2;
8723             exists $self->{layers}{$_} or $_ = ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8724 0   0       for $f, $t; # Be conservative for caching...
8725 0           my $B = "~~~{$f>>>$t}";
8726 0           $_ = $self->{layers}{$_} for $f, $t;
8727 0           my (%h, $kk);
8728 0           for my $k (0..$#$f) {
8729 0 0 0       my @fr = map {($_ and ref) ? $_->[0] : $_} @{$f->[$k]};
  0            
  0            
8730 0 0 0       my @to = map {($_ and ref) ? $_->[0] : $_} @{$t->[$k]};
  0            
  0            
8731 0 0         if ($flip) {
8732 0 0         $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[1-$_] for 0,1;
8733             } else {
8734 0 0         $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[$_] for 0,1;
8735             }#
8736             } # Treat prefix keys as usual keys:
8737 0 0   0     return sub ($) { my $cc = my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($h{$c}, $name, $cc) }, $B;
  0 0          
  0            
  0            
  0            
8738             }
8739 0 0         if ($name =~ /^InheritPrefixKeys\[(.+)\]$/) {
8740 0           my $base = $1;
8741             exists $self->{layers}{$_} or $_= ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8742 0   0       for $base;
8743 0           my $baseL = $self->{layers}{$base};
8744 0           my (%h);
8745 0           for my $k (0..$#$baseL) {
8746 0           for my $shift (0..1) {
8747 0 0         my $C = $baseL->[$k][$shift] or next;
8748 0 0 0       next unless ref $C and $C->[2]; # prefix
8749 0           $h{"$N $k $shift $C->[0]"} = $C;
8750             }
8751             } # Treat prefix keys as usual keys:
8752 0 0 0 0     return sub ($) { my $c = shift; defined $c or return $c; return $c if 'ARRAY' eq ref $c and $c->[2]; $h{"@_ $c"} or $c }, $base;
  0 0          
  0 0          
  0            
  0            
8753             }
8754 0 0         if ($name =~ /^ByColumns\[(.+)\]$/) {
8755 0 0         my @chars = map {length() ? $self->charhex2key($_) : undef} split /,/, "$1";
  0            
8756 0 0         my $g = $self->{faces}{$face}{'[geometry]'}
8757             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
8758 0   0       my $o = ($self->{faces}{$face}{'[g_offsets]'} or [(0) x @$g]);
8759 0           $o = [@$o]; # deep copy
8760 0           my ($tot, %c) = 0;
8761             # warn "geometry: [@$g] [@$o]";
8762 0           for my $r (@$g) {
8763 0           my $off = shift @$o;
8764 0           $c{$tot + $_} = $_ + $off for 0..($r-1);
8765 0           $tot += $r;
8766             }
8767 0 0 0 0     return sub ($$$$) { (undef, my ($L, $k, $shift)) = @_; return undef if $L or $shift or $k >= $tot; $self->document_char($chars[$c{$k}], "ByColumn[$c{$k}]") }, '';
  0   0        
  0            
  0            
8768             }
8769 0 0         if ($name =~ /^ByRows\[(.+)\]$/) {
8770 0           s(^\s+(?!\s|///\s+))(), s((?
8771 0           my (@recipes, @subs) = split m(\s+///\s+), $recipes;
8772 0           my $LL = $#{ $self->{faces}{$face}{layers} }; # Since all_layers, we are called only for layer 0; subrecipes may need more
  0            
8773 0           for my $rec (@recipes) {
8774 0 0   0     push(@subs, sub {return undef}), next unless length $rec;
  0            
8775             #warn "recipe=`$rec'; face=`$face'; N=$N; deadkey=`$deadkey'; last_layer=$LL";
8776 0           my ($tr) = $self->make_translator_for_layers( $rec, $deadkey, $face, [0..$LL] );
8777             #warn " done";
8778 0           push @subs, $tr;
8779             }
8780 0 0         my $g = $self->{faces}{$face}{'[geometry]'}
8781             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
8782 0           my ($tot, $row, %r) = (0, 0);
8783             # warn "geometry: [@$g] [@$o]";
8784 0           for my $r (@$g) {
8785 0           $r{$tot + $_} = $row for 0..($r-1);
8786 0           $tot += $r;
8787 0           $row++;
8788             }
8789             # return sub ($$$$) { (undef, undef, my $k) = @_; return undef if $k >= $tot; return undef if $#recipes < (my $r = $r{$k});
8790             # die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
8791             # goto &{$subs[$r]} }, '';
8792 0 0 0 0     return sub ($$) { (undef, my $k) = @_; return [] if $k >= $tot or $#recipes < (my $r = $r{$k});
  0            
8793 0 0         die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
8794 0           goto &{$subs[$r]} }, '', 'all_layers';
  0            
  0            
8795             }
8796 0 0         if ($name =~ /^(?:Diacritic|Mutate)(SpaceOK)?(Hack)?(2Self)?(DupsOK)?(32OK)?(?:\[(.+)\])?$/) {
8797 0           my ($spaceOK, $hack, $toSelf, $dupsOK, $w32OK) = ($1, $2, $3, $4, $5);
8798 0 0         my $Dia = ((defined $6) ? $6 : do {$used_deadkey ="/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0            
  0            
8799 0 0         if ($toSelf) {
8800 0 0         die "Mutate2Self does not make sense with SpaceOK/Hack/DupsOK/32OK" if grep $_, $hack, $spaceOK, $dupsOK, $w32OK;
8801 0           $Dia = $self->charhex2key($Dia);
8802 0 0         my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($face))->{SPACE} || [] };
  0            
  0            
  0            
8803 0           @sp = map $self->charhex2key($_), @sp;
8804 0           my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
8805 0 0         $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
8806 0 0         @sp = grep $flip_AltGr ne $_, @sp if defined $flip_AltGr; # It has a different function...
8807 0           @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
8808             # warn "SPACE on $Dia: <", join('> <', %sp), '>';
8809             return sub ($) {
8810 0     0     $self->document_chars_on_key([$self->diacritic2self_2($Dia, shift, $face, \%sp)], $name)
8811 0           }, $used_deadkey, 'all_layers';
8812             }
8813            
8814 0           my $isPrimary;
8815 0 0         $Dia =~ s/^\+// and $isPrimary++; # Wait until are expanded
8816            
8817 0           my $f = $self->get_NamesList;
8818 0 0         $self->load_compositions($f) if defined $f;
8819            
8820 0           $f = $self->get_AgeList;
8821 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
8822             # New processing: - = strip 1 from end; -3/ = strip 1 from the last 3
8823             #warn "Doing `$Dia'";
8824             #print "Doing `$Dia'\n";
8825             #warn "Age of <à> is <$self->{Age}{à}>";
8826 0           $Dia =~ s(){ (my $R = $1) =~ s/-/_/g;
  0            
8827 0 0         die "Named recipe `$1' unknown" unless exists $self->{faces}{$face}{"Named_DIA_Recipe__$R"};
8828             # (my $r = $self->{faces}{$face}{"Named_DIA_Recipe__$R"}) =~ s/^\s+//;
8829 0           $self->recipe2str($self->{faces}{$face}{"Named_DIA_Recipe__$R"}) }ge;
8830 0 0         $Dia =~ s/\|{3,4}/|/g if $isPrimary;
8831 0           my($skip, $limit, @groups, @groups2, @groups3) = (0);
8832 0           my($have4, @Dia) = (1, split /\|\|\|\|/, $Dia, -1);
8833 0 0         $have4 = 0, @Dia = split /\|\|\|/, $Dia, -1 if 1 == @Dia;
8834 0 0         if (1 < @Dia) {
8835 0 0         die "Too many |||- or ||||-sections in <$Dia>" if @Dia > 3;
8836 0           my @Dia2 = split /\|\|\|/, $Dia[1], -1;
8837 0 0         die "Too many |||-sections in the second ||||-section in <$Dia>" if @Dia2 > 2;
8838             # splice @Dia, 1, 1, @Dia2;
8839 0 0         @Dia2 = @Dia, shift @Dia2 unless $have4;
8840 0 0         $skip = (@Dia2 > 1 ? 1 + ($Dia2[0] =~ tr/|/|/) : 0);
8841 0 0 0       $Dia[1] .= "|$Dia[2]", pop @Dia if not $have4 and @Dia == 3;
8842             # $limit = 1 + ($Dia[-1] =~ tr/|/|/) + $skip;
8843 0           $limit = 0; # Not needed with the current logic...
8844 0           my @G = map [$self->dialist2lists($_)], @Dia; # will reverse when merging many into one cached...
8845 0           @groups = @{shift @G};
  0            
8846 0 0         @groups2 = @{shift @G} if @G;
  0            
8847 0 0         @groups3 = @{shift @G} if @G;
  0            
8848             } else {
8849 0           @groups = $self->dialist2lists($Dia);
8850             }
8851             #warn "Dia `$Dia' -> ", $self->array2string([$limit, $skip, @groups]);
8852 0           my $L = $self->{faces}{$face}{layers};
8853 0           my @L = map $self->{layers}{$_}, @$L;
8854 0   0       my $Sub = $self->{faces}{$face}{'[AltSubstitutions]'} || {};
8855             # warn "got AltSubstitutions: <",join('> <', %$Sub),'>' if $Sub;
8856             return sub {
8857 0     0     my $K = shift; # bindings of the key
8858 0 0         return ([]) x @$K unless grep defined, $self->flatten_arrays($K); # E.g, ByPairs and SelectRX produce many empty entries...
8859             #warn "Undefined base key for diacritic <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>' unless defined $K->[0][0];
8860             #warn "Input for <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>';
8861 0           my $base = $K->[0][0];
8862 0 0         $base = '' unless defined $base;
8863 0 0         $base = $base->[0] if ref $base;
8864 0 0 0       return ([]) x @$K if not $spaceOK and $base eq ' '; # Ignore possiblity that SPACE is a deadKey
8865 0           my $sorted = $self->sort_compositions(\@groups, $K, $Sub, $dupsOK, $w32OK);
8866 0           my ($sorted2, $sorted3, @idx_sorted3);
8867 0 0         $sorted2 = $self->sort_compositions(\@groups2, $K, $Sub, $dupsOK, $w32OK) if @groups2;
8868 0 0         $sorted3 = $self->sort_compositions(\@groups3, $K, $Sub, $dupsOK, $w32OK) if @groups3;
8869 0 0         @idx_sorted3 = @$sorted + (@groups2 ? @$sorted2 : 0) if @groups3; # used for warnings only
    0          
8870             $self->{faces}{$face}{'[in_dia_chains]'}{$_}++
8871 0 0 0       for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_}
  0 0          
8872             # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
8873 0 0         map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()}
  0 0          
  0 0          
  0 0          
  0 0          
8874 0 0         @$sorted, @{$sorted2 || []}, @{$sorted3 || []});
  0 0          
8875             # map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out
8876 0           require Dumpvalue if printSORTEDLISTS;
8877 0           Dumpvalue->new()->dumpValue(["Key $base", $sorted]) if printSORTEDLISTS;
8878 0           warn $self->report_sorted_l($base, [@$sorted, @{$sorted2 || []}, @{$sorted3 || []}], [scalar @$sorted, $skip + scalar @{$sorted || []}, @idx_sorted3])
8879             if warnSORTEDLISTS;
8880 0           my $LLL = '';
8881 0 0         if ($sorted2) {
8882 0           my (@slots, @LL);
8883 0           for my $l (0..$#L) {
8884 0           push @slots, $self->shift_pop_compositions($sorted2, $l, !'from end', !'omit', $limit, $skip, my $ll = []);
8885 0           push @LL, $ll;
8886 0           print 'From Layers <', join('> <', map {defined() ? $_ : 'undef'} @$ll), ">\n" if printSORTEDLISTS;
8887 0           $LLL .= ' | ' . join(' ', map {defined() ? $_ : 'undef'} @$ll) if warnSORTEDLISTS;
8888             }
8889 0           print 'TMP Extracted ', $self->array2string($slots[0]), "\n" if printSORTEDLISTS;
8890 0           print 'TMP Extracted ', $self->array2string([@slots[1..$#slots]]), " deadKey=$deadkey\n" if printSORTEDLISTS;
8891 0   0       my $appended = $self->append_keys($sorted3 || $sorted2, \@slots, \@LL, !$sorted3 && 'prepend');
      0        
8892 0           Dumpvalue->new()->dumpValue(["Key $base; II", $sorted2]) if printSORTEDLISTS;
8893 0           if (warnSORTEDLISTS) {
8894             $LLL =~ s/^[ |]+//;
8895             $_++ for @idx_sorted3; # empty or 1 elt
8896             warn "TMP Extracted: ", $self->array2string(\@slots), " from layers $LLL\n"; # 1 is for what is prepended by append_keys()
8897             warn $self->report_sorted_l($base, [@$sorted, @$sorted2, @{$sorted3 || []}], # Where to put bold/dotted-bold separators:
8898             [scalar @$sorted, !!$appended + $skip + scalar @$sorted, @idx_sorted3], ($appended ? [1 + scalar @$sorted] : ()));
8899             }
8900             }
8901 0           my(@out, %seen);
8902 0           for my $Ln (0..$#L) {
8903 0           $out[$Ln] = $self->shift_pop_compositions($sorted, $Ln);
8904 0 0 0       $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @{$out[$Ln]};
  0            
  0            
8905             }
8906 0           for my $L (@out) { # $L is an array indexed by shift state
8907 0 0 0       $L = [map {(not $_ or ref $_) ? $_ : [$_,undef,undef,'Diacritic operator']} @$L];
  0            
8908             }
8909             # Insert non-yet-inserted characters from $sorted2, $sorted3
8910 0           for my $extra (['from end', $sorted2, 2], [0, $sorted3, 3]) {
8911 0 0         next unless $extra->[1];
8912 0           $self->deep_undef_by_hash(\%seen, $extra->[1]);
8913 0           for my $Ln (0..$#L) {
8914 0           my $o = $out[$Ln];
8915 0 0 0       unless (defined $o->[0] and defined $o->[1]) {
8916 0           my $o2 = $self->shift_pop_compositions($extra->[1], $Ln, $extra->[0], !'omit', !'limit', 0, undef, defined $o->[0], defined $o->[1]);
8917 0 0 0       $o2 = [map {(!defined $_ or ref) ? $_ : [$_,undef,undef,"Diacritic operator (choice $extra->[2])"]} @$o2];
  0            
8918 0   0       defined $o->[$_] or $o->[$_] = $o2->[$_] for 0,1;
8919 0 0 0       $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @$o;
  0            
8920             }
8921             }
8922             }
8923 0           print 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if printSORTEDLISTS;
8924 0           warn 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if warnSORTEDLISTS;
8925             $self->{faces}{$face}{'[from_dia_chains]'}{$_}++
8926 0 0 0       for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out);
  0 0          
  0 0          
8927             #warn "Age of <à> is <$self->{Age}{à}>";
8928             #warn "Output: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays(\@out)), '>';
8929 0           return @out;
8930 0           }, $used_deadkey, 'all_layers';
8931             }
8932 0 0         if ($name =~ /^DefinedTo\[(.+)\]$/) {
8933 0           my $to = $self->charhex2key($1);
8934 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; $self->document_char($to, 'DefinedTo', $c) }, '';
  0            
  0            
  0            
8935             }
8936 0 0         if ($name =~ /^ByPairs((Inv)?Prefix)?(Apple)?\[(.+)\]$/) {
8937 0           my ($prefix, $invert, $Apple, $in, @Pairs, %Map) = ($1, $2, $3, $4);
8938 0           $in =~ s/^\s+//;
8939 0           @Pairs = split /\s+(?!\p{NonspacingMark})/, $in;
8940 0           for my $p (@Pairs) {
8941 0           while (length $p) {
8942 0 0         die "Odd number of characters in a ByPairs map <$in>"
8943             unless $p =~ s/^((?:\p{Blank}\p{NonspacingMark}|(?:\b\.)?[0-9a-f]{4,}\b(?:\.\b)?|.){2})//i;
8944 0           (my $Pair = $1) =~ s/\p{Blank}//g;
8945             #warn "Pair = <$Pair>";
8946             # Cannot do it earlier, since HEX can introduce new blanks
8947 0           $Pair =~ s/(?<=[0-9a-f]{4})\.$//i; # Remove . which was on \b before extracting substring
8948 0           $Pair = $self->stringHEX2string($Pair);
8949             #warn " --> <$Pair>";
8950 0 0         die "Can't split ByPairs rule into a pair: I see <$Pair>" unless 2 == scalar (my @c = split //, $Pair);
8951             die qq("From" character <$c[0] duplicated in a ByPairs map <$in>)
8952 0 0         if exists $Map{$c[0]};
8953 0 0         $Map{$c[0]} = ($prefix ? [$c[1], undef, ($invert ? 3 : 1)<<3] : $c[1]); # massage_imported2 makes >> 3
    0          
8954             }
8955             }
8956 0 0         die "Empty ByPairs map <$in>" unless %Map; # Treat prefix keys as usual keys:
8957 0 0         if ($Apple) {
8958 0           $self->{faces}{$face}{'[AppleMap]'}[$N]{$_} = $Map{$_} for keys %Map;
8959 0           %Map = ();
8960             }
8961 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map{$c}, 'explicit tuneup') }, '';
  0 0          
  0            
  0            
  0            
8962             }
8963 0           my $map = $self->get_deep($self, 'DEADKEYS', split m(/), $name);
8964 0 0         die "Can't resolve character map `$name'" unless defined $map;
8965 0 0         unless (exists $map->{map}) {{
8966 0           my($k1) = keys %$map;
  0            
8967 0 0 0       die "Character map `$name' does not contain HEX: `$k1'" if %$map and not $k1 =~ /^[0-9a-f]{4,}$/;
8968 0 0         die "Character map is a parent-type map, but no deadkey to use specified" unless defined $deadkey;
8969 0           my $Map = { map +(chr hex $_, $map->{$_}), keys %$map };
8970             die "Character map `$name' does not contain `$deadkey', contains <", (join '> <', keys %$map), ">"
8971 0 0         unless exists $Map->{chr hex $deadkey};
8972 0 0         $map = $Map->{chr hex $deadkey}, $used_deadkey = "/$deadkey" if %$Map;
8973 0 0         $map = {map => {}}, warn "Character map for `$name' empty" unless %$map;
8974             }}
8975 0 0         die "Can't resolve character map `$name' `map': <", (join '> <', %$map), ">" unless defined $map->{map};
8976 0           $map = $map->{map};
8977 0           my $Map = { map +(chr hex $_, chr hex($map->{$_})), keys %$map }; # hex form is not unique
8978             ( sub ($) { # Treat prefix keys as usual keys:
8979 0 0   0     my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map->{$c}, "DEADKEYS=$name")
  0 0          
  0            
  0            
8980 0           }, $used_deadkey )
8981             }
8982            
8983             sub depth1_A_translator($$) { # takes a ref to an array of chars
8984 0     0 0   my ($self, $tr) = (shift, shift);
8985             return sub ($) {
8986 0     0     my $in = shift;
8987 0           [map $tr->($_), @$in]
8988             }
8989 0           }
8990            
8991             sub depth2_translator($$) { # takes a ref to an array of arrays of chars
8992 0     0 0   my ($self, $tr) = (shift, shift);
8993             return sub ($$) {
8994 0     0     my ($in, $k, @out) = (shift, shift);
8995 0           for my $L (0..$#$in) {
8996 0           my $Tr = $tr->[$L];
8997 0 0         die "Undefined translator for layer=$L; total=", scalar @$tr unless defined $Tr;
8998 0           push @out, [map $Tr->($in->[$L][$_], $L, $k, $_), 0..$#{$in->[$L]}]
  0            
8999             }
9000             @out
9001 0           }
9002 0           }
9003            
9004             sub make_translator_for_layers ($$$$$) { # translator may take some values from "environment"
9005             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
9006             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
9007 0   0 0 0   my ($self, $name, $deadkey, $face, $NN) = (shift, shift, shift || 0, shift, shift); # $deadkey used eg for diagnostics
9008 0           my ($Tr, $used, $for_layers) = $self->make_translator( $name, $deadkey, $face, $NN->[0] );
9009 0 0         ($for_layers, my $cvt) = (ref $for_layers ? @$for_layers : $for_layers);
9010 0 0         return $Tr, [map "$used![$_]", @$NN], $cvt if $for_layers;
9011 0           my @Tr = map [$self->make_translator($name, $deadkey, $face, $_)], @$NN;
9012 0           $self->depth2_translator([map $_->[0], @Tr]), [map $_->[1], @Tr], $cvt;
9013             }
9014            
9015             sub make_translated_layers_tr ($$$$$$$) { # Apply translation map
9016 0     0 0   my ($self, $layers, $tr, $append, $deadkey, $face, $NN) = (shift, shift, shift, shift, shift, shift, shift);
9017 0           my ($Tr, $used, $cvt) = $self->make_translator_for_layers($tr, $deadkey, $face, $NN);
9018             #warn " tr=<$tr>, key=<$deadkey>, used=<$used>";
9019 0   0       my @new_names = map "$tr$used->[$_]($layers->[$_])$append" . ($append and $NN->[$_]), 0..$#$NN;
9020 0 0         return @new_names unless grep {not exists $self->{layers}{$_}} @new_names;
  0            
9021             # warn "Translating via `$tr' from layer [$layer]: <", join('> <', map "@$_", @{$self->{layers}{$layer}}), '>';
9022 0           my (@L, @LL) = map $self->{layers}{$_}, @$layers;
9023 0           for my $n (0..$#{$L[0]}) { # key number
  0            
9024 0           my @C = $Tr->( [ map $L[$_][$n], 0..$#L ], $n ); # rearrange one key into $X[$Layer][$shift]
9025 0 0         if ($cvt) {
9026 0   0       defined $cvt->($n) and $LL[$_][$cvt->($n)] = $C[$_] for 0..$#L;
9027             } else {
9028 0           push @{$LL[$_]}, $C[$_] for 0..$#L;
  0            
9029             }
9030             }
9031 0           $self->{layers}{$new_names[$_]} = $LL[$_] for 0..$#L;
9032             @new_names
9033 0           }
9034            
9035             sub key2string ($$) {
9036 0     0 0   my ($self, $key, @o) = (shift, shift);
9037 0 0         return '<>' unless defined $key;
9038 0 0         return '[]' unless grep defined, @$key;
9039 0           for my $k (@$key) {
9040 0 0         push(@o, 'undef'), next unless defined $k;
9041 0 0         push @o, ((ref $k) ? (defined $k->[0] ? $k->[0] : '') : $k);
    0          
9042             }
9043             "[@o]"
9044 0           }
9045            
9046             sub layer2string ($$) {
9047 0     0 0   my ($self, $layer, $last, $rest) = (shift, shift, -1, '');
9048 0           my @o = map $self->key2string($_), @$layer;
9049 0   0       2 < length $o[$_] and $last = $_ for 0..$#o;
9050 0 0         $rest = '...' if $last != $#o;
9051 0           (join ' ', @o[0..$last]) . $rest
9052             }
9053            
9054             sub make_translated_layers_stack ($$@) { # Stacking
9055 0     0 0   my ($self, @out, $ref) = (shift);
9056 0           my $c = @{$_[0]};
  0            
9057             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the first elt"
9058 0   0       for @_;
9059 0           for my $lN (0..$c-1) { # layer Number
9060 0           my @layers = map $_->[$lN], @_;
9061 0           push @out, "@layers";
9062 0           if (debug_stacking) {
9063             warn "Stack in-layer $lN `$_': ", $self->layer2string($self->{layers}{$_}), "\n" for @layers;
9064             }
9065 0 0         next if exists $self->{layers}{"@layers"};
9066 0           my (@L, @keys) = map $self->{layers}{$_}, @layers;
9067 0           for my $lI (0..$#L) {
9068 0           my $l = $L[$lI];
9069             # warn "... Layer$lN: `$layers[$lI]'..." if debug_stacking;
9070 0           for my $k (0..$#$l) {
9071 0           for my $kk (0..$#{$l->[$k]}) {
  0            
9072 0           if (debug_STACKING and defined( my $cc = $l->[$k][$kk] )) {
9073             $cc = $cc->[0] if ref $cc;
9074             warn "...... On $k/$kk (${lI}th lN=$lN): I see `$cc': ", !defined $keys[$k][$kk], "\n" ;
9075             }
9076 0 0 0       $keys[$k][$kk] = $l->[$k][$kk] if defined $l->[$k][$kk] and not defined $keys[$k][$kk]; # Shallow copy
9077             }
9078 0   0       $keys[$k] ||= [];
9079             }
9080             }
9081 0           $self->{layers}{"@layers"} = \@keys;
9082 0           warn "Stack out-layer $lN `@layers':\n\t", $self->layer2string(\@keys), "\n" if debug_stacking;
9083             }
9084 0           warn 'Stack out-layers:', (join "\n\t", '', @out), "\n" if debug_stacking;
9085 0           @out;
9086             }
9087            
9088             sub make_translated_layers_noid ($$$@) { # Stacking
9089 0     0 0   my ($self, $whole, $refr, @out, $ref, @seen) = (shift, shift, shift);
9090 0           my $c = @$refr;
9091             #warn "noid: join ", scalar @_, " faces of $c layers; ref=[@$refr] first=[@{$_[0]}]";
9092             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the reference face"
9093 0   0       for @_;
9094 0           my @R = map $self->{layers}{$_}, @$refr;
9095 0 0         if ($whole) {
9096 0           my $last = $#{$R[0]};
  0            
9097 0           for my $key (0..$last) {
9098 0           for my $l (@R) {
9099 0 0         $seen[$key]{$_}++ for map {ref() ? $_->[0] : $_} grep defined, @{ $l->[$key] };
  0            
  0            
9100             #warn "$key of $last: keys=", join(',',keys %{$seen[$key]});
9101             }
9102             }
9103             }
9104 0           my $name = 'NOID([' . join('], [', map {join ' +++ ', @$_} @_) . '])';
  0            
9105 0           for my $l (0..$c-1) {
9106 0           my (@layers) = map $_->[$l], @_;
9107 0 0         if ($whole) {
9108 0           $name .= "'" # Keep names of layers distinct, but since they are all interdependent, do not construct basing on layer names
9109             } else {
9110 0           $name = "NOID[$refr->[$l]](" . (join ' +++ ', @layers) . ')'
9111             }
9112 0           push @out, $name;
9113             #warn ". Doing layer number $l, name=`$name'...";
9114 0 0         next if exists $self->{layers}{$name};
9115 0           my ($Refr, @L, @keys) = map $self->{layers}{$_}, $refr->[$l], @layers;
9116 0           for my $ll (@L) {
9117             #warn "... Another layer for $l...";
9118 0           for my $k (0..$#$ll) {
9119 0           for my $kk (0..$#{$ll->[$k]}) {
  0            
9120             #warn "...... On $k/$kk: I see `$ll->[$k][$kk]'; seen=`$seen[$k]{$ll->[$k][$kk]}'; keys=", join(',',keys %{$seen[$k]}) if defined $ll->[$k][$kk];
9121 0           my $ch = $ll->[$k][$kk];
9122 0           my $rch = $R[$l][$k][$kk];
9123 0 0 0       $ch = $ch->[0] if $ch and ref $ch;
9124 0 0 0       $rch = $rch->[0] if $rch and ref $rch;
9125             $keys[$k][$kk] = $ll->[$k][$kk] # Deep copy
9126             if defined $ch and not defined $keys[$k][$kk]
9127 0 0 0       and ($whole ? !$seen[$k]{$ch} : $ch ne ( defined $rch ? $rch : '' ));
    0 0        
    0          
9128             }
9129 0   0       $keys[$k] ||= [];
9130             }
9131             }
9132 0           $self->{layers}{$name} = \@keys;
9133             }
9134 0           warn "NOID --> <@out>\n" if debug_noid;
9135 0           @out;
9136             }
9137            
9138             sub paren_match_q ($$) {
9139 0     0 0   my ($self, $s) = (shift, shift);
9140 0           ($s =~ tr/(/(/) == ($s =~ tr/)/)/)
9141             }
9142            
9143             sub brackets_match_q ($$) {
9144 0     0 0   my ($self, $s) = (shift, shift);
9145 0           ($s =~ tr/[/[/) == ($s =~ tr/]/]/)
9146             }
9147            
9148             sub join_min_paren_brackets_matched ($$@) {
9149 0     0 0   my ($self, $join, @out) = (shift, shift, shift);
9150             #warn 'joining <', join('> <', @out, @_),'>';
9151 0           while (@_) {
9152 0   0       while (@_ and not ($self->paren_match_q($out[-1]) and $self->brackets_match_q($out[-1]))) {
      0        
9153 0           $out[-1] .= $join . shift;
9154             }
9155 0 0         push @out, shift if @_;
9156             }
9157             @out
9158 0           }
9159            
9160             sub face_by_face_recipe ($$$) {
9161 0     0 0   my($self, $f, $base) = (shift, shift, shift);
9162 0 0         return if $self->{faces}{$f}{layers};
9163 0 0         return unless $self->{face_recipes}{$f};
9164             die "Can't determine number of layers in face `$f': face_recipe exists, but not numLayers"
9165 0 0         unless defined (my $n = $self->{faces}{$base}{numLayers});
9166 0           warn "Massaging face `$f': use face_recipes...\n" if debug_face_layout_recipes;
9167 0           $self->{faces}{$f}{layers} = [('Empty') x $n]; # Preliminary (so know the length???)
9168 0           $self->{faces}{$f}{layers} = $self->layers_by_face_recipe($f, $base);
9169             }
9170            
9171             sub layers_by_face_recipe ($$$;$) {
9172 0     0 0   my ($self, $face, $base, $r) = (shift, shift, shift, shift);
9173 0 0 0       die "No face recipe for `$face' found" unless my $R = $self->{face_recipes}{$face} or defined $r;
9174 0 0         $r = $R if $R;
9175 0           $r = $self->recipe2str($r);
9176             #print "face recipe `$face'\n";
9177 0           my $LL = $self->{faces}{$base}{layers};
9178 0           warn "Using face_recipes for `$face', base=$base ==> `$r'\n" if debug_face_layout_recipes;
9179 0           my $L = $self->{faces}{$face}{layers} = $self->make_translated_layers($r, $base, [0..$#$LL]);
9180             #print "face recipe `$face' -> ", $self->array2string($L), "\n";
9181             # warn "Using face_recipes `$face' -> ", $self->array2string($L) if debug_face_layout_recipes;
9182 0           warn "Massaged face `$face' ->", (join "\n\t", '', @$L), "\n" if debug_face_layout_recipes;
9183             #warn "face recipe `$face' --> ", $self->array2string([map $self->{layers}{$_}, @$L]);
9184 0           $L;
9185             }
9186            
9187             sub export_layers ($$$;$) {
9188 0     0 0   my ($self, $face, $base, $full) = (shift, shift, shift, shift);
9189             # warn "Doing FullFace on <$face>, base=<$base>\n" if $full;
9190             ($full ? undef : $self->{faces}{$face}{'[ini_layers_prefix]'} || $self->{faces}{$face}{'[ini_layers]'}) ||
9191             $self->{faces}{$face}{layers}
9192 0 0 0       || $self->layers_by_face_recipe($face, $base)
    0 0        
9193             }
9194            
9195             sub pseudo_layer ($$$$) {
9196 0     0 0   my ($self, $recipe, $face, $N) = (shift, shift, shift, shift);
9197 0           my $ll = my $l = $self->pseudo_layer0($recipe, $face, $N);
9198             # warn "Pseudo-layer recipe `$recipe', face=`$face', N=$N ->\n\t$l\n" if $recipe =~ /Greek__/;
9199             #warn("layer recipe: `$l'"),
9200 0 0         ($l = $self->layer_recipe($l)) =~ s/^\s+// if exists $self->{layer_recipes}{$ll};
9201 0           warn "pseudo_layer(`$recipe'): Using layout_recipe `$l' for layer '$ll'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$ll};
9202 0 0         return $l if $self->{layers}{$l};
9203 0           ($self->make_translated_layers($l, $face, [$N]))->[0]
9204             # die "Component `$l' of a pseudo-layer cannot be resolved"
9205             }
9206            
9207             sub pseudo_layer0 ($$$$) {
9208 0     0 0   my ($self, $recipe, $face, $N) = (shift, shift, shift, shift);
9209 0 0         if ($recipe eq 'LinkFace') {
9210 0 0         my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9211 0           return ($self->export_layers($L, $face))->[$N];
9212             }
9213 0 0         return ($self->export_layers($face, $face))->[$N] if $recipe eq 'Self';
9214 0 0         if ($recipe =~ /^Layers\((.*\+.*)\)$/) {
9215 0           my @L = split /\+/, "$1";
9216 0           return $L[$N];
9217             }
9218 0           my $N1 = $self->flip_layer_N($N, $#{ $self->{faces}{$face}{layers} });
  0            
9219 0 0         if ($recipe eq 'FlipLayersLinkFace') {
9220 0 0         my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9221 0           return ($self->export_layers($L, $face))->[$N1];
9222             }
9223             #warn "Doing flip/face via `$recipe', N=$N, N1=$N1, face=`$face'";
9224 0 0         return ($self->export_layers($face, $face))->[$N1] if $recipe eq 'FlipLayers';
9225             # my $gr_debug = ($recipe =~ /Greek__/);
9226 0           if (debug_PERL_dollar1_scoping) {
9227             return ($self->export_layers("$3", $face, !!$1))->[$2 ? $N : $N1]
9228             if $recipe =~ /^(Full)?(?:(Face)|FlipLayers)\((.*)\)$/;
9229             } else {
9230 0           my $m1; # Apparently, in perl5.10, if replace $m1 by $1 below, $1 loses its TRUE value between match and evaluation of $1
9231             # ($gr_debug and warn "Pseudo-layer `$recipe', face=`$face', N=$N, N1=$N1\n"),
9232 0 0 0       return ($self->export_layers("$3", $face, !!$1))->[$m1 ? $N : $N1]
    0          
9233             if $recipe =~ /^(Full)?(?:(Face)|FlipLayers)\((.*)\)$/ and ($m1 = $2, 1);
9234             }
9235 0 0         if ($recipe =~ /^prefix(NOTSAME(case)?)?=(.+)$/) { # `case´ unsupported
9236             # Analogue of NOID with the principal layers as reference, and layers of DeadKey as sources
9237 0           my($notsame, $case) = ($1,$2);
9238 0           my $hexPrefix = $self->key2hex($self->charhex2key($3));
9239 0           $self->ensure_DeadKey_Map($face, $hexPrefix);
9240 0 0         my $layers = $self->{faces}{$face}{'[deadkeyLayers]'}{$hexPrefix} or die "Unknown prefix character `$hexPrefix´ in layers-from-prefix-key";
9241 0 0 0       return $layers->[$N] if $N or not $notsame;
9242 0           my $name = "NOTSAME[$face]$layers->[$N]";
9243 0 0         return $self->{layers}{$name} if $self->{layers}{$name};
9244 0           my @LL = map $self->{layers}{$_}, @$layers;
9245 0           my $L0 = $self->{faces}{$face}{layers};
9246 0           my @L0 = map $self->{layers}{$_}, @$L0;
9247 0           my @OUT;
9248 0           for my $charN (0..$face->{'[non_VK]'}-1) {
9249 0           my (@L, %s) = map $_->[$charN], @LL;
9250 0           for my $layers0 (map $_->[$charN], @$L0) {
9251 0           for my $sh (@$layers0) {
9252 0 0         $s{ref($sh) ? $sh->[0] : $sh}++ if defined $sh;
    0          
9253             }
9254             }
9255 0           my(@CC, @pp, @OK);
9256 0 0 0       for my $l (@L[0 .. (($notsame && !$N) ? @{ $self->{faces}{$face}{layers} } - 1 : 0)]) {
  0            
9257 0           my(%s1, @was, @out);
9258 0           for my $sh (0..$#$l) { # $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face})
9259 0 0         my @C = map {defined() ? (ref() ? $_->[0] : $_) : $_} $l->[$sh];
  0 0          
9260 0 0         my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0          
9261 0 0 0       ($CC[$sh], $pp[$sh]) = ($C[0], $p[0]) if not defined $CC[$sh] and defined $C[0];
9262 0 0 0       ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$C[0]}) = ($C[0], $p[0], 1,1) if !$OK[$sh] and defined $C[0] and not $s{$C[0]};
      0        
9263             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$was[0]}) = (@was, 1,1) # use unshifted if needed
9264 0 0 0       if $sh and !$OK[$sh] and defined $C[0] and defined $was[0] and not $s{$was[0]} and not $s1{$was[0]};
      0        
      0        
      0        
      0        
9265 0 0         @was = ($C[0], $p[0]) unless $sh; # may omit `unless´
9266             # $cnt++ if defined $CC[$sh];
9267             }
9268             }
9269 0           push @OUT, \@CC;
9270             }
9271 0           $self->{layers}{$name} = \@OUT;
9272 0           return $name;
9273             }
9274 0           die "Unrecognized Face recipe `$recipe'"
9275             }
9276            
9277             # my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
9278            
9279             # A stand-alone word is either LinkFace, or is interpreted as a name of
9280             # translation function applied to the current face.
9281             # A name which is an argument to a function is allowed to be a layer name
9282             # (but note that then both layers of the face will be mapped to that same
9283             # layer - unless one restricts the recipe to a particular layer 0/1 of the
9284             # face).
9285             # In particular: to specify a layer, use Id(LayerName).
9286             #use Dumpvalue;
9287             sub make_translated_layers ($$$$;$$) { # support Self/FlipLayers/LinkFace/FlipShift, stacking and maps
9288 0     0 0   my ($self, $recipe, $face, $NN, $deadkey, $noid, $append, $ARG) = (shift, shift, shift, shift, shift, shift, '');
9289             # XXX We can't cache created layer by name, since it depends on $recipe and $N too???
9290             # return $recipe if exists $self->{layers}{$recipe};
9291             # my $FACE = $recipe . join '===', '', @$NN, '';
9292             # return $self->{faces}{$FACE}{layers} if exists $self->{faces}{$FACE}{layers};
9293 0 0         return [map $self->pseudo_layer($recipe, $face, $_), @$NN]
9294             if $recipe =~ /^(prefix(?:NOTSAME(?:case)?)?=.*|(FlipLayers)?LinkFace|FlipLayers|Self|((Full)?(Face|FlipLayers)|Layers)\([^()]+\))$/;
9295 0           $recipe =~ s/^(FlipShift)$/$1(Self)/;
9296 0 0         my @parts = grep /\S/, $self->join_min_paren_brackets_matched('', split /(\s+)/, $recipe)
9297             or die "Whitespace face recipe `$recipe'?!";
9298 0 0         if (@parts > 1) {
9299             #warn "parts of the translation spec: <", join('> <', @parts), '>';
9300 0           my @layers = map $self->make_translated_layers($_, $face, $NN, $deadkey), @parts;
9301 0           warn "Stacking/NOID for layers `@parts'", (join "\n\t", '', map {join ' &&& ', @$_} @layers), "\n" if debug_noid or debug_stacking;
9302             #print "Stacking for `$recipe'\n" if $DEBUG;
9303             #Dumpvalue->new()->dumpValue(\@layers) if $DEBUG;
9304 0 0         return [$self->make_translated_layers_noid($noid eq 'NotSameKey', @layers)]
9305             if $noid;
9306 0           return [$self->make_translated_layers_stack(@layers)];
9307             }
9308 0 0         if ( $recipe =~ /\)$/ ) {
9309 0 0         if ( $recipe =~ /^[^(]*\[/ ) { # Tricky: allow () inside Func[](args)
9310 0           my $pos;
9311 0           while ( $recipe =~ /(?=\]\()/g ) {
9312 0 0         $pos = 1 + pos $recipe, last if $self->brackets_match_q(substr $recipe, 0, 1 + pos $recipe)
9313             }
9314 0 0         die "Can't parse `$recipe' as Func[Arg1](Arg2)" unless $pos;
9315 0           $ARG = substr $recipe, $pos + 1, length($recipe) - $pos - 2;
9316 0           $recipe = substr $recipe, 0, $pos;
9317             } else {
9318 0           my $o = $recipe;
9319 0 0         ($recipe, $ARG) = ($recipe =~ /^(.*?)\((.*)\)$/s) or warn "Can't parse recipe `$o'";
9320             }
9321             } else {
9322 0           $ARG = '';
9323             }
9324             #warn "Translation sub-spec: recipe = <$recipe>, ARG=<$ARG>";
9325 0 0         if ($recipe =~ /^If(Not)?Prefix\[(.*)\]$/s) { # No embedded \\]
9326 0           my $neg = $1;
9327 0           my @prefix = map $self->charhex2key($_), split /,/, "$2";
9328 0 0 0       return $self->make_translated_layers($ARG, $face, $NN, $deadkey, $noid) if $neg xor grep $_ eq $deadkey, @prefix;
9329 0           ($recipe, $ARG) = ('Empty', [('Empty') x @$NN]);
9330             }
9331 0 0         if (length $ARG) {
9332 0 0         if (exists $self->{layers}{$ARG}) {
9333 0           $ARG = [($ARG) x @$NN];
9334             } else {
9335 0 0         ($ARG = $self->layer_recipe($ARG)) =~ s/^\s+// if exists $self->{layer_recipes}{my $a = $ARG};
9336 0           warn "make_translated_layers: Using layout_recipe `$ARG' for layer '$a'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$a};
9337 0           ($noid) = ($recipe =~ /^(NotId|NotSameKey)$/);
9338 0           $ARG = $self->make_translated_layers($ARG, $face, $NN, $deadkey, $noid);
9339 0 0         return $ARG if $noid;
9340             }
9341             } else {
9342 0           $ARG = [map $self->{faces}{$face}{layers}[$_], @$NN];
9343 0           $append = "#$face#";
9344             }
9345 0           [$self->make_translated_layers_tr($ARG, $recipe, $append, $deadkey, $face, $NN)]; # Either we saw (), or $recipe is not a face recipe!
9346             }
9347            
9348             sub massage_translated_layers ($$$$;$) {
9349 0     0 0   my ($self, $in, $face, $NN, $deadkey) = (shift, shift, shift, shift, shift, '');
9350             #warn "Massaging `$deadkey' for `$face':$N";
9351 0 0         return $in unless my $r = $self->get_deep($self, 'faces', (my @p = split m(/), $face), '[Diacritic_if_undef]');
9352 0           $r =~ s/^\s+//;
9353             #warn " -> end recipe `$r'";
9354 0           my $post = $self->make_translated_layers($r, $face, $NN, $deadkey);
9355 0           return [$self->make_translated_layers_stack($in, $post)];
9356             }
9357            
9358             sub default_char ($$) {
9359 0     0 0   my ($self, $F) = (shift, shift);
9360 0           my $default = $self->get_deep($self, 'faces', $F, '[DeadChar_DefaultTranslation]');
9361 0 0         $default =~ s/^\s+//, $default = $self->charhex2key($default) if defined $default;
9362 0           $default;
9363             }
9364            
9365             sub create_inverted_face ($$$$$) {
9366 0     0 0   my ($self, $F, $KK, $chain, $flip_AltGr) = (shift, shift, shift, shift, shift);
9367 0           my $H = $self->{faces}{$F};
9368 0           my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9369 0           my $new_facename = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9370 0           my ($LL, %Map) = $H->{'[deadkeyLayers]'}{$KK};
9371 0 0         $LL = $H->{layers} if $KK eq '';
9372             %Map = ($flip_AltGr, [$chain->{$KK and $self->charhex2key($KK)}, undef, 1, 'AltGrInv-faces-chain'])
9373 0 0 0       if defined $flip_AltGr and defined $chain->{$KK and $self->charhex2key($KK)};
      0        
      0        
9374 0           $self->patch_face($LL, $new_facename, $H->{"[InvdeadkeyLayers]"}{$KK}, $KK, \%Map, $F, 'invert');
9375            
9376             # warn "Joining <$F>, <$new_facename>";
9377 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9378 0 0 0       if ($KK eq '' and defined $flip_AltGr) {
9379 0           $H->{'[deadkeyFace]'}{$self->key2hex($flip_AltGr)} = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9380             }
9381 0 0         if ($H->{"[InvdeadkeyLayers]"}{$KK}) { # There are overrides for the inverted face. Make a map for them...
9382             #warn "Overriding face for inverted `$KK' in face $F; new_facename=$new_facename";
9383 0           $H->{'[InvAltGrFace]'}{$KK} = "$new_facename\@override";
9384 0           $self->{faces}{"$new_facename\@override"}{layers} = $H->{"[InvdeadkeyLayers]"}{$KK};
9385 0           $self->link_layers($F, "$new_facename\@override", 'skipfix', 'no-slot-warn');
9386             }
9387 0           $new_facename;
9388             }
9389            
9390             sub auto_dead_can_wrap ($$) { # Call after all the manually set prefix key are already established, so one can avoid them
9391 0     0 0   my ($self, $F) = (shift, shift);
9392 0           $self->{faces}{$F}{'[ad_can_wrap]'}++
9393             }
9394            
9395             sub next_auto_dead ($$) {
9396 0     0 0   my ($self, $H, $o) = (shift, shift);
9397 0 0         unless ($H->{'[autodead_wrapped]'}) {
9398 0   0       1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ and ($o < 0x1000 or not $H->{'[ad_can_wrap]'}); # Bug in kbdutool
      0        
9399 0 0 0       $H->{'[first_auto_dead]'} = 0xa0 if $o >= 0x1000 and $H->{'[ad_can_wrap]'} and not $H->{'[autodead_wrapped]'}++;
      0        
9400             }
9401 0 0         if ($H->{'[autodead_wrapped]'}) { # This does not deal with manual assignment of inverted prefixes??? Inv_AltGr???
9402 0   0       1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ or $H->{'[deadkeyFaceHexMap]'}{$self->key2hex(chr $o)};
9403             # if ($o == 0x00a3) {
9404             # warn "$o: Keys HexMap: ", join ', ', sort keys %{$H->{'[deadkeyFaceHexMap]'}};
9405             # }
9406             }
9407 0           chr $o;
9408             }
9409            
9410             sub recipe2str ($$) {
9411 0     0 0   (undef, my $recipe) = (shift, shift);
9412 0 0         if ('ARRAY' eq ref $recipe) {
9413 0           $recipe = [@$recipe]; # deep copy
9414 0           s/\s+$//, s/^\s+// for @$recipe;
9415 0           s/(?
9416 0           $recipe = join '', @$recipe;
9417             }
9418 0           $recipe =~ s/^\s+//;
9419 0           $recipe
9420             }
9421            
9422             sub scan_for_DeadKey_Maps ($) { # Makes a direct-access synonym, scan for DeadKey_Maps* keys
9423 0     0 0   my ($self, %h, $expl) = (shift);
9424             #Dumpvalue->new()->dumpValue($self);
9425 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9426 0           for my $FF (@F) {
9427 0           (my $F = $FF) =~ s(^faces/?)();
9428 0           my(@FF, @HH) = split m(/), $FF;
9429 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9430 0           my @FF1 = @FF;
9431 0           push(@HH, $self->get_deep($self, @FF1)), pop @FF1 while @FF1; # All the parents
9432 0           my $H = $HH[0];
9433 0 0         next if $H->{PartialFace};
9434 0 0 0       $self->{faces}{$F} = $H if $F =~ m(/) and exists $H->{layers}; # Make a direct-access copy
9435             #warn "Face section `${FF}'s parents: ", scalar @HH;
9436             #warn "Mismatch of hashes for `$FF'" unless $self->{faces}{$F} == $H;
9437            
9438             # warn "compositing: faces `$F'; -> <", (join '> <', %$H), ">";
9439 0           for my $HH (@HH) {
9440 0           for my $k ( keys %$HH ) {
9441             # warn "\t`$k' -> `$HH->{$k}'";
9442 0 0         next unless $k =~ m(^DeadKey_(Inv|Add)?Map([0-9a-f]{4,})?(?:_(\d+))?$)i;
9443             #warn "\t`$k' -> `$HH->{$k}'";
9444 0   0       my($inv, $key, $layers) = ($1 || '', $2, $3);
9445 0 0         $key = $self->key2hex($self->charhex2key($key)) if defined $key; # get rid of uc/lc hex problem
9446             # XXXX The problem is that the parent may define layers in different ways (_0,_1 or no); ignore it for now...
9447 0 0 0       $H->{'[DeadKey__Maps]'}{$key || ''}{$inv}{(defined $layers) ? $layers : 'All'} ||= $HH->{$k};
      0        
9448             }
9449             }
9450             }
9451             }
9452            
9453             #use Dumpvalue;
9454             sub ensure_DeadKey_Map_by_recipe ($$$$;$$) {
9455 0   0 0 0   my ($self, $F, $hexPrefix, $recipe, $layers, $inv) = (shift, shift, shift, shift, shift, shift || '');
9456 0           my $H = $self->{faces}{$F};
9457 0 0 0       return if $H->{"[${inv}deadkeyLayersCreated]"}{$hexPrefix}{$layers and "@$layers"}++;
9458             #Dumpvalue->new()->dumpValue($self);
9459 0           my $massage = !($recipe =~ s/\s+NoDefaultTranslation$//);
9460 0   0       $layers ||= [ 0 .. $#{$self->{faces}{$F}{layers}} ];
  0            
9461             #warn "Doing key `$hexPrefix' inv=`$inv' face=`$F', recipe=`$recipe'";
9462 0           my $new = $self->make_translated_layers($recipe, $F, $layers, $hexPrefix);
9463 0 0 0       $new = $self->massage_translated_layers($new, $F, $layers, $hexPrefix) if $massage and not $inv;
9464 0           for my $NN (0..$#$layers) { # Create a layer according to the spec
9465             #warn "DeadKey Layer for face=$F; layer=$layer, k=$k:\n\t$HH->{$k}, key=`", ($hexPrefix||''),"'\n\t\t";
9466             #$DEBUG = $hexPrefix eq '0192';
9467             #print "Doing key `$hexPrefix' face=$F layer=`$layer' recipe=`$recipe'\n" if $DEBUG;
9468             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $DEBUG;
9469             #warn "new=<<<", join('>>> <<<', @$new),'>>>';
9470 0           $H->{"[${inv}deadkeyLayers]"}{$hexPrefix}[$layers->[$NN]] = $new->[$NN];
9471             #warn "Face `$F', layer=$layer key=$hexPrefix\t=> `$new'" if $H->{layers}[$layer] =~ /00a9/i;
9472             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $self->charhex2key($hexPrefix) eq chr 0x00a9;
9473             }
9474             }
9475            
9476             sub ensure_DeadKey_Map ($$$;$) {
9477 0     0 0   my ($self, $F, $hexPrefix, $hexPrefixWas, %h, $expl) = (shift, shift, shift, shift);
9478 0 0         $hexPrefixWas = $hexPrefix unless defined $hexPrefixWas;
9479 0           my $H = $self->{faces}{$F};
9480 0           my $v0 = $H->{'[DeadKey__Maps]'}{$hexPrefixWas};
9481 0           for my $inv (keys %$v0) {
9482 0           my $v1 = $v0->{$inv};
9483 0 0 0       my $K = (($inv and "$inv $hexPrefix" eq "Inv 0000") ? '' : $hexPrefix);
9484 0           for my $layers (keys %$v1) {
9485 0           my $recipe = $self->recipe2str($v1->{$layers});
9486 0 0         $layers = ($layers eq 'All' ? '' : [$layers]);
9487 0           $self->ensure_DeadKey_Map_by_recipe($F, $K, $recipe, $layers, $inv);
9488             }
9489             }
9490             }
9491            
9492             sub create_DeadKey_Maps ($) {
9493 0     0 0   my ($self, %h, $expl) = (shift);
9494             #Dumpvalue->new()->dumpValue($self);
9495 0           for my $F (keys %{ $self->{faces} }) {
  0            
9496 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9497 0           my $H = $self->{faces}{$F};
9498 0           my $flip_AltGr = $H->{'[Flip_AltGr_Key]'};
9499 0 0         $flip_AltGr = (defined $flip_AltGr) ? $self->charhex2key($flip_AltGr) : 'N/A';
9500             # Treat first the specific maps (for one deadkey) then the deadkeys which were not seen via the universal map
9501 0           for my $key (keys %{$H->{'[DeadKey__Maps]'}}) {
  0            
9502 0           my $v0 = $H->{'[DeadKey__Maps]'}{$key};
9503             my @keys = (($key ne '')
9504             ? $key
9505 0   0       : (grep {not $H->{'[DeadKey__Maps]'}{$_} and not $H->{'[ComposeKeys]'}{$_}}
9506 0 0         map $self->key2hex($_), grep $_ ne $flip_AltGr, keys %{ $H->{'[DEAD]'} }));
  0            
9507 0           $self->ensure_DeadKey_Map($F, $_, $key) for @keys;
9508             }
9509             }
9510             }
9511            
9512             #use Dumpvalue;
9513             sub create_composite_layers ($) {
9514 0     0 0   my ($self, %h, $expl) = (shift);
9515             #Dumpvalue->new()->dumpValue($self);
9516 0           for my $F (keys %{ $self->{faces} }) {
  0            
9517 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9518 0           my $H = $self->{faces}{$F};
9519 0 0         next if $H->{PartialFace};
9520 0 0         next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9521             #warn "Face: <", join( '> <', %$H), ">";
9522 0           my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0            
9523 0           my $first_auto_dead = $H->{'[Auto_Diacritic_Start]'};
9524 0 0         $H->{'[first_auto_dead]'} = ord $self->charhex2key($first_auto_dead) if defined $first_auto_dead;
9525 0           for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Given a deadkey: join layers into a face, and link to it
  0            
9526 0           for my $layer ( 0 .. $#{ $H->{layers} } ) {
  0            
9527             #warn "Checking for empty layers, Face `$face', layer=$layer key=$KK";
9528             $self->{layers}{"[empty$layerL]"} ||= [map[], 1..$layerL], $H->{'[deadkeyLayers]'}{$KK}[$layer] = "[empty$layerL]"
9529 0 0 0       unless defined $H->{'[deadkeyLayers]'}{$KK}[$layer]
9530             }
9531             # Join the syntetic layers (now well-formed) into a new synthetic face:
9532 0           my $new_facename = "$F###$KK";
9533 0           $self->{faces}{$new_facename}{layers} = $H->{'[deadkeyLayers]'}{$KK};
9534 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9535             #warn "Joining <$F>, <$new_facename>";
9536             # $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn'); # Now moved to link_composite_layers
9537             }
9538             }
9539             $self
9540 0           }
9541            
9542             sub create_prefix_chains ($) {
9543 0     0 0   my ($self, %h, $expl) = (shift);
9544 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9545 0           for my $FF (@F) {
9546 0           (my $F = $FF) =~ s(^faces/?)();
9547 0           my(@FF, @HH) = split m(/), $FF;
9548 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9549 0           push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9550 0           my($H, %KK) = $HH[0];
9551 0 0         for my $chain ( @{ $H->{'[PrefixChains]'} || [] } ) {
  0            
9552 0           (my $c = $chain) =~ s/^\s+//;
9553 0 0         my @prefix = map { $_ and $self->charhex2key($_) } split /,/, $c, -1; # trailing empty means all are prefixes
  0            
9554 0 0         length(my $trail_nonprefix = $prefix[-1]) or pop @prefix;
9555 0           my $start = shift @prefix;
9556 0 0         warn "PrefixChain for `$start' in font `$F' is empty" unless @prefix > 1;
9557 0           for my $Kn (1..$#prefix) {
9558 0           my($from, $to) = @prefix[$Kn-1, $Kn];
9559 0   0       $KK{$from}{$start} = [$to, undef, $Kn != $#prefix || !$trail_nonprefix, 'PrefixChains'];
9560             }
9561             }
9562 0           for my $K (keys %KK) {
9563 0           my $KK = $self->key2hex($K);
9564 0 0         die "Key `$KK=$K' in PrefixChain for font=`$F' is not a prefix" unless my $KF = $H->{'[deadkeyFace]'}{$KK};
9565 0           my $new_facename = "$F*==>*Chain*$KK";
9566 0           my $LL = $H->{'[deadkeyLayers]'}{$KK};
9567 0           $self->patch_face($LL, $new_facename, undef, "chain-in-$KK", $KK{$K}, $F, !'invert');
9568 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9569 0           $H->{'[deadkeyLayers]'}{$KK} = $self->{faces}{$new_facename}{layers};
9570 0           $self->coverage_face0($new_facename, 'after import');
9571             }
9572             }
9573             $self
9574 0           }
9575            
9576             sub link_composite_layers ($) { # as above, but finish
9577 0     0 0   my ($self, %h, $expl) = (shift);
9578 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9579 0           for my $FF (@F) {
9580 0           (my $F = $FF) =~ s(^faces/?)();
9581 0           my(@FF, @HH) = split m(/), $FF;
9582 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9583 0           push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9584 0           my $H = $HH[0];
9585 0           for my $new_facename (values %{$H->{'[deadkeyFace]'}}) {
  0            
9586             #warn "Joining <$F>, <$new_facename>";
9587 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9588             }
9589             }
9590             $self
9591 0           }
9592            
9593             sub create_inverted_faces ($) {
9594 0     0 0   my ($self) = (shift);
9595             #Dumpvalue->new()->dumpValue($self);
9596 0           for my $F (keys %{$self->{faces} }) {
  0            
9597 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9598 0           my $H = $self->{faces}{$F};
9599 0 0         next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9600 0   0       my $expl = $H->{'[Explicit_AltGr_Invert]'} || [];
9601 0 0         $expl = [], warn "Odd number of elements of Explicit_AltGr_Invert in face $F, ignore" if @$expl % 2;
9602 0           $expl = {map $self->charhex2key($_), @$expl};
9603            
9604             #warn "Face: <", join( '> <', %$H), ">";
9605 0           my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0            
9606 0           for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Create AltGr-inverted face if there is at least one key in the AltGr face:
  0            
9607 0           my $LL = $H->{'[deadkeyLayers]'}{$KK};
9608             # To check that a key is defined, we do not care about whether a shift-state is encoded as a string, or as an array:
9609 0 0 0       next unless defined $H->{'[first_auto_dead]'} and grep defined, map $self->flatten_arrays($_), map $self->{layers}{$_}, @$LL[1..$#$LL];
9610 0 0         $H->{'[deadkeyInvAltGrKey]'}{''} = $self->next_auto_dead($H) unless exists $H->{'[deadkeyInvAltGrKey]'}{''}; # Prefix key for principal invertred face
9611             my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK} =
9612 0 0         ((exists $expl->{$self->charhex2key($KK)}) ? $expl->{$self->charhex2key($KK)} : $self->next_auto_dead($H));
9613 0           $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr} = "$F##Inv#$KK";
9614 0           $self->{faces}{ $H->{'[deadkeyFace]'}{$KK} }{'[invAltGr_Accessor]'} = $auto_chr;
9615             }
9616 0 0         next unless defined (my $flip_AltGr = $H->{'[Flip_AltGr_Key]'});
9617 0           $flip_AltGr = $self->charhex2key($flip_AltGr);
9618 0 0         $H->{'[deadkeyFaceInvAltGr]'}{ $H->{'[deadkeyInvAltGrKey]'}{''} } = "$F##Inv#" if exists $H->{'[deadkeyInvAltGrKey]'}{''};
9619 0           my ($prev, %chain) = '';
9620 0 0         for my $k ( @{ $H->{chainAltGr} || [] }) {
  0            
9621 0           my $K = $self->charhex2key($k);
9622 0           my $KK = $self->key2hex($K);
9623             warn("Deadkey ` $K ' of face $F has no associated AltGr-inverted face"), next
9624 0 0         unless exists $H->{'[deadkeyInvAltGrKey]'}{$KK};
9625 0           $chain{$prev} = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9626             #warn "chain `$prev' --> `$K' => $H->{'[deadkeyInvAltGrKey]'}{$KK}";
9627             # $H->{'[dead2_AltGr_chain]'}{(length $prev) ? $self->key2hex($prev) : ''}++;
9628 0           $prev = $K;
9629             }
9630 0 0         $H->{'[have_AltGr_chain]'} = 1 if length $prev;
9631 0           for my $KK (keys %{$H->{'[deadkeyInvAltGrKey]'}}) { # Now know which deadkeys take inversion, and via what prefix
  0            
9632 0           my $new = $self->create_inverted_face($F, $KK, \%chain, $flip_AltGr);
9633 0           $self->coverage_face0($new);
9634             }
9635             # We do not link the AltGr-inverted faces to the "parent" faces here. Currently, it should be done when
9636             # outputting a kbd description...
9637             }
9638             $self
9639 0           }
9640            
9641             #use Dumpvalue;
9642             sub patch_face ($$$$$$$;$) { # flip layers paying attention to linked AltGr-inverted faces, and overrides
9643 0     0 0   my ($self, $LL, $newname, $prefix, $mapId, $Map, $face, $inv, @K) = (shift, shift, shift, shift, shift, shift, shift, shift);
9644 0 0         if (%$Map) { # Borrow from make_translated_layer_tr()
9645 0 0   0     my $Tr = sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if ref $c; my $o = $Map->{$c} ;
  0 0          
  0            
  0            
9646             #warn "Tr: `$c' --> `$o'" if defined $o;
9647             #$o
9648 0           };
9649 0           $Tr = $self->depth1_A_translator($Tr);
9650 0           my $LLL = $self->{faces}{$face}{layers};
9651 0 0         my $mod_name = ($inv ? 'AltGr' : '');
9652 0           for my $n (0..$#$LL) { # Layer number
9653 0           my $new_Name = "$face##Chain$mod_name#$n.." . $mapId;
9654             #warn "AltGr-chaining: name=$new_Name; `$chainKey' => `$nextL'";
9655 0   0       $self->{layers}{$new_Name} ||= [ map $Tr->($_), @{ $self->{layers}{ $LLL->[$n] } }];
  0            
9656 0           push @K, $new_Name;
9657             }
9658             }
9659 0 0         my @prefix = $prefix ? $prefix : ();
9660 0           my @n1 = (0..$#$LL);
9661 0 0         @n1 = map $self->flip_layer_N($_, $#$LL), @n1 if $inv;
9662 0           my @invLL = @$LL[@n1];
9663 0 0         push @prefix, \@K if @K;
9664 0           $self->{faces}{$newname}{layers} = [$self->make_translated_layers_stack(@prefix, \@invLL)];
9665             }
9666            
9667             # use Dumpvalue;
9668             my %subst_Shift = qw( -- - -S S t- t tS T ); # There is no space for 8 MODs, so we contract tS into T
9669             sub fmt_bitmap_mods ($$$;$) {
9670 0     0 0   my ($self, $b, $col, $short, @b) = (shift, shift, shift, shift, qw(Shift Ctrl Alt Kana Roya Loya Z t));
9671 0 0         my ($j, $empty, @ind) = ($short ? ('', '-', 1..$#b, 0) : ("\t", '', 0..$#b)); # better have Shift at end (Ctrl-Alt-Shift)...
9672 0 0         my $O = join $j, map {($b & (1<<$_)) ? ($short ? substr $b[$_], 0, 1 : $b[$_]) : $empty} @ind;
  0 0          
9673 0 0         $O =~ s/(..)$/$subst_Shift{$1}/ if $short;
9674 0           $O =~ s/\t+$//;
9675 0 0         $O = 'Invalid' if $col == 15;
9676 0           $O
9677             }
9678            
9679             sub BaseKeys ($$) {
9680 0     0 0   my($self, $K) = (shift, shift);
9681 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
9682 0 0         return $F->{baseKeysWin} if $F->{baseKeysWin};
9683 0           my $cnt = $F->{'[non_VK]'};
9684 0           my $b = $F->{BaseLayer};
9685 0           my $layers = $F->{layers};
9686 0 0 0       $b = $self->make_translated_layers($b, $K->[-1], [0])->[0] if defined $b and not $self->{layers}{$b};
9687 0 0         my $basesub = [((defined $b) ? $b : ()), $F->{layers}[0]];
9688 0           my $max = -1;
9689 0   0       $max < $#{$self->{layers}{$_}} and $max = $#{$self->{layers}{$_}} for @$basesub;
  0            
  0            
9690 0   0       $max < $_->[0] + $_->[1] and $max = $_->[0] + $_->[1] for values %start_SEC;
9691             # warn "Basekeys: max=$max; cnt=$cnt";
9692 0           my(@o, @oo);
9693 0           for my $u (0..$max) {
9694 0           my $c = $self->base_unit($basesub, $u, $u >= $cnt); # [0 || 1, VK]
9695 0           my($k, $kk) = ($c->[1], $c->[2]); # uc(With prepended #), orig (or undef if not array)
9696 0 0         if (!$c->[0]) { # Main island of keyboard
9697 0 0 0       $k = $oem_keys{$k} or warn("Can't find a key with VKEY `$c', unit=$u, lim=$cnt"), return
9698             unless $k =~ /^[A-Z0-9]$/;
9699             } else {
9700 0           my $U = [map $self->{layers}{$_}[$u], @$layers];
9701 0           my $keys = grep defined, map $self->flatten_arrays($_->[$u]), @$U;
9702 0 0 0       $keys and warn "Can't find the range of keys to which unit `$u' belongs (max=$max; cnt=$cnt)" unless defined $k;
9703 0           $kk = $k;
9704             }
9705 0           push @o, $k;
9706 0           push @oo, $kk;
9707             }
9708             # warn "BaseKeys: @o";
9709 0           $F->{baseKeysRaw} = \@oo;
9710 0           $F->{baseKeysWin} = \@o;
9711             }
9712            
9713            
9714             sub fill_win_template ($$$;$$) {
9715 0     0 0   my @K = qw( COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME );
9716 0           my ($self, $t, $k, $dummy, $dummyDscr, %h) = (shift, shift, shift, shift, shift);
9717 0           $self->reset_units;
9718 0           my $B = $self->BaseKeys($k);
9719             # Dumpvalue->new()->dumpValue($self);
9720 0           my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
9721 0           $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
9722 0 0         $h{LAYOUTNAME} = "KBD Layout $h{DLLNAME}" if $dummyDscr; # error "the required resource DATABASE is missing" from setup.exe
9723 0           my $LLL = length($h{LAYOUTNAME}) + grep ord >= 0x10000, split //, $h{LAYOUTNAME};
9724 0 0         warn "The DESCRIPTION of the layout [@$k] is longer than 63 chars;\n the name shown in LanguageBar/Settings may be empty"
9725             if $LLL > 63;
9726 0           $h{LAYOUTNAME} =~ s/([\\""])/\\$1/g; # C-like syntax (directly copied to resource files???)
9727             # warn "Translate: ", %h;
9728 0           my $F = $self->get_deep($self, @$k); # Presumably a face hash, as in $k = [qw(faces US)]
9729 0           $F->{'[dead-used]'} = [map {}, @{$F->{layers}}]; # Which of deadkeys are reachable on the keyboard
  0            
9730 0           my $cnt = $F->{'[non_VK]'};
9731 0 0         if (grep $F->{"[$_]"}, qw(LRM_RLM ALTGR SHIFTLOCK)) {
9732 0           $h{ATTRIBS} = (join "\n ", "\nATTRIBUTES", grep $F->{"[$_]"}, qw(LRM_RLM ALTGR SHIFTLOCK)) . "\n" ;
9733             } else {
9734 0           $h{ATTRIBS} = ''; # default
9735             }
9736 0 0         if ($dummy) {
9737 0           @h{qw(DO_LIGA COL_HEADERS COL_EXPL KEYNAMES_DEAD DEADKEYS)} = ('') x 5;
9738 0           @h{qw(LAYOUT_KEYS BITS_TEMPLATE)} = (<
9739             10 Q 0 q -1 -1 // LATIN SMALL LETTER Q, ,
9740             EOT
9741             0 // Column 4 :
9742             1 // Column 5 : Shift
9743             2 // Column 6 : Ctrl
9744             3 // Column 7 : Shift Ctrl
9745             6 // Column 12 : Ctrl Alt t
9746             7 // Column 13 : Shift Ctrl Alt t
9747             EOT
9748             } else {
9749 0           $h{LAYOUT_KEYS} = join '', $self->output_layout_win($k->[-1], $F->{layers}, $F->{'[dead]'}, $F->{'[dead-used]'}, $cnt, $B);
9750 0           $h{LAYOUT_KEYS} .= join '', $self->output_VK_win($k->[-1], $F->{'[dead-used]'});
9751 0           $h{LAYOUT_KEYS} .= join '', $self->output_added_units();
9752            
9753 0           $h{DO_LIGA} = join '', $self->output_ligatures();
9754 0 0         $h{DO_LIGA} = <
9755            
9756             LIGATURE
9757            
9758             // VK_ ModCol# Char0 Char1 Char2 Char3
9759             // --------- ------- ----- ----- ----- -----
9760            
9761            
9762             EOPREF
9763            
9764             ### Deadkeys??? need_extra_keys_to_access???
9765 0           my ($OUT, $OUT_NAMES) = ('', "KEYNAME_DEAD\n\n");
9766            
9767 0           my $f = $self->get_AgeList;
9768 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
9769            
9770 0           my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
9771 0 0         $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
9772 0           for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0            
9773 0 0         next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
9774 0           my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
9775 0 0         $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
9776             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
9777 0           (my $nonempty, my $MAP) = $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr);
9778 0           $OUT .= "$MAP\n";
9779 0 0         my @K = ($deadKey, ($auto_inv_AltGr ? $auto_inv_AltGr : ()));
9780 0   0       my @N = map $self->{DEADKEYS}{$_} || $self->{'[seen_knames]'}{chr hex $_} || $F->{'[prefixDocs]'}{$_} || $self->UName($_), @K;
9781 0           s/(?=[""\\])/\\/g for @N;
9782             # if (defined $N and length $N) {
9783 0           $nn{$K[$_]} = $N[$_] for 0..$#K;
9784             # }# else { warn "DeadKey `$deadKey' for face `@$k' has no name associated" }
9785             }
9786             # Apparently, if the name table is too long, the keyboard is not activatable (installs OK on Win7_64,
9787             # is in Settings' list, but is not in the panel's list). Omit the multiple-Compose entries as a workaround...
9788 0   0       $nn{$_} =~ /\bCompose\s+(Compose\b|(?!key)\S+)/ or $OUT_NAMES .= qq($_\t"$nn{$_}"\n) for sort keys %nn;
9789             #warn "Translate: ", %h;
9790 0           $h{DEADKEYS} = $OUT;
9791 0           $h{KEYNAMES_DEAD} = $OUT_NAMES;
9792 0           my %mods = qw( S 1 C 2 A 4 K 8 X 16 Y 32 Z 64 T 128 R 16 L 32);
9793 0           $_ += 0 for values %mods; # Convert to numbers, so | works as expected
9794 0           my @cols;
9795 0 0         my %tr_mods_keys = ( @{ $F->{'[mods_keys_KBD]'} || [qw(rA CA)] } );
  0            
9796 0   0       my $mods_keys = $F->{'[layers_mods_keys]'} || ['', 'rA'];
9797 0   0       my $mods = $F->{'[layers_modifiers]'} || []; # || ['', 'CA']; # Plain, and Control-Alt
9798 0 0         $#$mods = $#$mods_keys if $#$mods < $#$mods_keys;
9799 0           for my $MOD ( @$mods ) {
9800 0           my $mask = 0;
9801 0 0         my $mod = ((defined $MOD) ? $MOD : ''); # Copy
9802 0 0         unless ($mod =~ /\S/) {
9803 0           my @K = grep /./, split /(?<=[A-Z])(?=[rl]?[A-Z])/, $mods_keys->[scalar @cols];
9804             #warn "cols=(@cols), K=(@K)\n";
9805 0           $mod = join '', map $tr_mods_keys{$_}, @K;
9806             }
9807 0           $mask |= $mods{$_} for split //, $mod;
9808 0           push @cols, $mask;
9809             }
9810 0           @cols = map {($_, $_ | $mods{S})} @cols; # Add shift
  0            
9811            
9812 0   0       my($ctrl_f,$ctrl_F) = ($mods{C}, $tr_mods_keys{lC} || $tr_mods_keys{C} || $tr_mods_keys{rC} || 'C'); # Prefer left-Ctrl
9813             # $ctrl_f |= $mods{$_} for split //, $ctrl_F; # kbdutool complains if there is no column for 'C'
9814            
9815 0           my $pre_ctrl = $self->get_deep($self, @$k, '[ctrl_after_modcol]');
9816 0 0         $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
9817 0           my $create_a_c = $self->get_deep($self, @$k, '[create_alpha_ctrl]');
9818 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
9819 0 0         splice @cols, $pre_ctrl, 0, $ctrl_f, ($create_a_c>1 ? $ctrl_f|$mods{S} : ()); # Control (and maybe Control-Shift)
9820 0 0         splice @cols, 15, 0, $mods{A} if @cols >= 16; # col=15 is the fake one; assigning it to Alt is the best palliative to fixing MSKLC
9821 0           $h{COL_HEADERS} = join "\t", map sprintf('%-3d[%d]', $cols[$_], $_), 0..$#cols;
9822 0           $h{COL_EXPL} = join "\t", map $self->fmt_bitmap_mods($cols[$_], $_, 'short'), 0..$#cols;
9823 0           $h{BITS_TEMPLATE} = join "\n", map { "$cols[$_]\t// Column " . (4+$_) . " :\t" . $self->fmt_bitmap_mods($cols[$_], $_) } 0..$#cols;
  0            
9824             # $h{BITS_TEMPLATE} =~ s(^(?=.*\bInvalid$))(#)m; # XXX Actually, MSKLC is not ignoring the leading #
9825             }
9826 0           $self->massage_template($template_win, \%h);
9827             }
9828            
9829             sub AppleMap_i_j ($$$$$;$$$$);
9830             sub AppleMap_prefix ($$;$$$$$$);
9831            
9832             # https://developer.apple.com/library/mac/technotes/tn2056/_index.html
9833             sub fill_osx_template ($$) {
9834 0     0 0   my @K = qw( OSX_LAYOUTNAME LAYOUTNAME OSX_ID OSX_ADD_VERSION OSX_DUP_KEYS COPYR_YEARS COMPANYNAME );
9835 0           my ($self, $k, %h, %ids) = (shift, shift);
9836 0           $self->reset_units;
9837 0           my $B = $self->BaseKeys($k);
9838             # Dumpvalue->new()->dumpValue($self);
9839 0           my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
9840 0           $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
9841            
9842 0   0       $h{OSX_LAYOUTNAME} ||= $h{LAYOUTNAME};
9843 0           delete $h{LAYOUTNAME};
9844 0 0         $h{OSX_ID} = -17 unless defined $h{OSX_ID}; # (Arbitrary) Negative number
9845 0           my $v = $self->{VERSION};
9846 0 0 0       if (defined $v and defined $h{OSX_ADD_VERSION}) {
9847 0 0         if ($h{OSX_ADD_VERSION} > 0) {
    0          
    0          
9848 0           my $c = $h{OSX_ADD_VERSION} - 1;
9849 0           $h{OSX_LAYOUTNAME} =~ s/^(\s*(\S+($|\s+)){$c}\S+)(?!\S)/$1 v$v/;
9850             } elsif ($h{OSX_ADD_VERSION} < -1) {
9851 0           my $c = -$h{OSX_ADD_VERSION} - 2;
9852 0           $h{OSX_LAYOUTNAME} =~ s/((?
9853             } elsif ($h{OSX_ADD_VERSION} == -1) {
9854 0           $h{OSX_LAYOUTNAME} =~ s/\z/ v$v/;
9855             } else {
9856 0           $h{OSX_LAYOUTNAME} =~ s/^/v$v /;
9857             }
9858             }
9859 0           delete $h{OSX_ADD_VERSION};
9860 0           my $dupk = delete $h{OSX_DUP_KEYS};
9861 0 0         $dupk = {@$dupk} if $dupk;
9862            
9863             # OSX_CREATOR version OSX_CREATOR_VERSION on OSX_EDIT_DATE
9864 0           my $file = $self->{'[file]'};
9865 0 0         $file = (defined $file) ? "keyboard layout file $file" : 'string descriptor';
9866 0 0         $file .= " version $v" if defined $v;
9867 0 0         $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
9868 0           $h{OSX_CREATOR} = "UI::KeyboardLayout";
9869 0           $h{OSX_CREATOR_VERSION} = "$UI::KeyboardLayout::VERSION with $file";
9870 0           my @t = (gmtime)[5,4,3,2,1,0];
9871 0           $t[0] += 1900; $t[1]++;
  0            
9872 0           $h{OSX_EDIT_DATE} = sprintf '%d-%02d-%02d at %d:%02d:%02d GMT', @t;
9873            
9874 0           my $F = $self->get_deep($self, @$k);
9875 0           my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
9876 0 0         $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
9877 0           my %map; # Indexed by hex (??? What about UTF-16???)
9878 0           for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0            
9879 0 0         next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
9880 0           my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
9881 0 0         $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
9882             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
9883 0           $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr, \%map);
9884             }
9885            
9886 0           my %how = qw( OSX_KEYMAP_0_AND_COMMAND 0;0;0
9887             OSX_KEYMAP_SHIFT 0;1;0
9888             OSX_KEYMAP_CAPS 0;0;1
9889             OSX_KEYMAP_OPTION 1;0;0
9890             OSX_KEYMAP_OPTION_SHIFT 1;1;0
9891             OSX_KEYMAP_OPTION_CAPS 1;0;1
9892             OSX_KEYMAP_OPTION_COMMAND 1;0;0
9893             OSX_KEYMAP_CTRL 0;0;0;-1
9894             OSX_KEYMAP_COMMAND 0;0;0;1
9895             ); # In US Extended, OPT-CMD is the same as OPT
9896             # OSX_KEYMAP_COMMAND_AS_BASE 0;0;0;0
9897 0   0       my($OVERR, $ov) = $F->{'[Apple_Override]'} || [];
9898 0           for my $o (@$OVERR) {
9899 0           my($K, $dead, $out) = split /,/, $o, 3;
9900 0 0         if ($out =~ /^hex[46]\z/) {
9901 0           $out = ['lit', $out]
9902             } else {
9903 0           $out = [0, $self->stringHEX2string($out)]
9904             }
9905 0           $ov->{$K} = [$out->[1], undef, $dead, $out->[0]];
9906             }
9907 0   0       my $DUP = $F->{'[Apple_Duplicate]'} || [0x6e, 10, 0x47, 10, 0x66, 49, 0x68, 49]; # Mnu => ISO, KP_Clear => ISO, L/R-SPace => Space
9908 0           $ov->{dup} = {@$DUP};
9909 0           $ov->{extra_actions} = {};
9910 0           for my $m (keys %how) {
9911 0           my($l, $shift, $capsl, $use_base) = split /;/, $how{$m};
9912 0           $h{$m} = $self->AppleMap_i_j ($k, $l, $shift, $capsl, $use_base, \%ids, \%map, $ov);
9913             }
9914             # warn "Need separate OSX_KEYMAP_COMMAND for k=$k\n" unless $h{OSX_KEYMAP_COMMAND} eq $h{OSX_KEYMAP_0_AND_COMMAND};
9915             # my $vbell = $self->get_deep_via_parents($self, undef, @$k, '[DeadChar_DefaultTranslation]');
9916             # $vbell =~ s/^\s+(?=.)//, $vbell = $self->charhex2key($vbell) if defined $vbell;
9917             # undef $vbell; # Terminators are used as visual feedback when prefix is pressed!
9918 0           my($S, %act) = $F->{'[Show]'};
9919 0           @h{qw(OSX_ACTIONS_BASE OSX_ACTIONS OSX_TERMINATORS_BASE OSX_TERMINATORS2)}
9920             = map +($self->AppleMap_prefix(\%ids, 'do_initiating', $_, \%map, $S, $ov, \%act),
9921             $self->AppleMap_prefix(\%ids, !'do_initiating', $_, \%map, $S, $ov, \%act)), '', 'term';
9922            
9923 0           $self->massage_template($template_osx, \%h);
9924             }
9925            
9926             my $unused = <<'EOR';
9927             # extract compositions, add to char downgrades; -> composition, => compatibility composition
9928             perl -wlne "$k=$1, next if /^([\da-f]+)/i; undef $a; $a = qq($k -> $1) if /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/; $a = qq($k => $2 $1) if /^\s+#\s*((?:<.*?>\s+)?)([0-9A-F]+(?:\s+[0-9A-F]+)*)/; next unless $a; $a =~ s/\s*$/ / unless $a =~ />\s+\S.*\s\S/; print $a" NamesList.txt >compose2b-NamesList.txt
9929             # expand recursively
9930             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2; sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; return t($1).$2} END{print qq($_\t:$h{$_} ), join q( ), sort split /\s+/, t($_) for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3c-NamesList.txt
9931            
9932             #### perl -wlne "($k,$r)=/^(\S+)\s+:[-=]\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; END { for my $k (sort {hex $a <=> hex $b} keys %r) { my @r = split /\s+/, $r{$k}; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
9933             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=)); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k}; my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
9934            
9935            
9936             # Recursively decompose; :- composition, := compatibility composition
9937             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2 if $2 eq q(=); sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; my @rr = t($1); return $rr[0].$2, $h{$i} || $rr[1]} END{my(@rr, $h); @rr=t($_), $h = $rr[1] || q(-), (@i = split /\s+/, $rr[0]), print qq($_\t:$h ), join q( ), $i[0], sort @i[1..$#i] for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3e-NamesList.txt
9938             # Recompose parts to get "merge 2" decompositions; <- and <= if involve composition, :- and := otherwise
9939             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=) ? q(=) : undef); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k} || q(-); my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; my %s; for my $o (1..$#r) {my @rr = @r; next if $s{$rr[$o]}++; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<), $hk{$k} || $hr{$kk} || q(-), qq( $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3e-NamesList.txt >compose4b-NamesList.txt
9940             # List of possible modifiers for each char, introduced by -->, separated by //
9941             perl -C31 -wlne "sub f($) {my $i=shift; return $i unless $i=~/^\w/; qq($i ).chr hex $i} sub ff($) {join q( ), map f($_), split /\s+/, shift} my($c,$B,$m) = /^(\S+)\s+[:<][-=]\s+(\S+)\s+(\S+)\s*$/ or die; push @{$c{$B}}, ff qq($m $c); END { for my $k (sort {hex $a <=> hex $b} keys %c) { print f($k), qq(\t--> ), join q( // ), sort @{$c{$k}} } }" compose4b-NamesList.txt >compose5d-NamesList.txt
9942             # Find what appears as modifiers:
9943             perl -F"\s+//\s+|\s+-->\s+" -wlane "s/\s+[0-9A-F]{4,}(\s\S+)?\s*$//, print for @F[1..$#F]" ! | sort -u >!-words
9944            
9945             Duplicate: 0296 <== [ 003F ] ==> <1 0295> (prefered)
9946             <ʖ> LATIN LETTER INVERTED GLOTTAL STOP
9947             <ʕ> LATIN LETTER PHARYNGEAL VOICED FRICATIVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9948             Duplicate: 0384 <== [ 0020 0301 ] ==> <1 00B4> (prefered)
9949             <΄> GREEK TONOS
9950             <´> ACUTE ACCENT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9951             Duplicate: 1D43 <== [ 0061 ] ==> <1 00AA> (prefered)
9952             <ᵃ> MODIFIER LETTER SMALL A
9953             <ª> FEMININE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9954             Duplicate: 1D52 <== [ 006F ] ==> <1 00BA> (prefered)
9955             <ᵒ> MODIFIER LETTER SMALL O
9956             <º> MASCULINE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9957             Duplicate: 1D9F <== [ 0065 ] ==> <1 1D4C> (prefered)
9958             <ᶟ> MODIFIER LETTER SMALL REVERSED OPEN E
9959             <ᵌ> MODIFIER LETTER SMALL TURNED OPEN E at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9960             Duplicate: 1E7A <== [ 0055 0304 0308 ] ==> <0 01D5> (prefered)
9961             <Ṻ> LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
9962             <Ǖ> LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9963             Duplicate: 1E7B <== [ 0075 0304 0308 ] ==> <0 01D6> (prefered)
9964             <ṻ> LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
9965             <ǖ> LATIN SMALL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9966             Duplicate: 1FBF <== [ 0020 0313 ] ==> <1 1FBD> (prefered)
9967             <᾿> GREEK PSILI
9968             <᾽> GREEK KORONIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9969             Duplicate: 2007 <== [ 0020 ] ==> <1 00A0> (prefered)
9970             < > FIGURE SPACE
9971             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9972             Duplicate: 202F <== [ 0020 ] ==> <1 00A0> (prefered)
9973             < > NARROW NO-BREAK SPACE
9974             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9975             Duplicate: 2113 <== [ 006C ] ==> <1 1D4C1> (prefered)
9976             <ℓ> SCRIPT SMALL L
9977             <퓁> MATHEMATICAL SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9978             Duplicate: 24B8 <== [ 0043 ] ==> <1 1F12B> (prefered)
9979             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C
9980             <> CIRCLED ITALIC LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9981             Duplicate: 24C7 <== [ 0052 ] ==> <1 1F12C> (prefered)
9982             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R
9983             <> CIRCLED ITALIC LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9984             Duplicate: 2E1E <== [ 007E ] ==> <1 2A6A> (prefered)
9985             <⸞> TILDE WITH DOT ABOVE
9986             <⩪> TILDE OPERATOR WITH DOT ABOVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9987             Duplicate: 33B9 <== [ 004D 0056 ] ==> <1 1F14B> (prefered)
9988             <㎹> SQUARE MV MEGA
9989             <> SQUARED MV at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9990             Duplicate: FC03 <== [ 064A 0649 0654 ] ==> <1 FBF9> (prefered)
9991             <ﰃ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM
9992             <ﯹ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9993             Duplicate: FC68 <== [ 064A 0649 0654 ] ==> <1 FBFA> (prefered)
9994             <ﱨ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM
9995             <ﯺ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9996             Duplicate: FD55 <== [ 062A 062C 0645 ] ==> <1 FD50> (prefered)
9997             <ﵕ> ARABIC LIGATURE TEH WITH MEEM WITH JEEM INITIAL FORM
9998             <ﵐ> ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
9999             Duplicate: FD56 <== [ 062A 062D 0645 ] ==> <1 FD53> (prefered)
10000             <ﵖ> ARABIC LIGATURE TEH WITH MEEM WITH HAH INITIAL FORM
10001             <ﵓ> ARABIC LIGATURE TEH WITH HAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10002             Duplicate: FD57 <== [ 062A 062E 0645 ] ==> <1 FD54> (prefered)
10003             <ﵗ> ARABIC LIGATURE TEH WITH MEEM WITH KHAH INITIAL FORM
10004             <ﵔ> ARABIC LIGATURE TEH WITH KHAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10005             Duplicate: FD5D <== [ 0633 062C 062D ] ==> <1 FD5C> (prefered)
10006             <ﵝ> ARABIC LIGATURE SEEN WITH JEEM WITH HAH INITIAL FORM
10007             <ﵜ> ARABIC LIGATURE SEEN WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10008             Duplicate: FD87 <== [ 0644 062D 0645 ] ==> <1 FD80> (prefered)
10009             <ﶇ> ARABIC LIGATURE LAM WITH MEEM WITH HAH FINAL FORM
10010             <ﶀ> ARABIC LIGATURE LAM WITH HAH WITH MEEM FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10011             Duplicate: FD8C <== [ 0645 062C 062D ] ==> <1 FD89> (prefered)
10012             <ﶌ> ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM
10013             <ﶉ> ARABIC LIGATURE MEEM WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10014             Duplicate: FD92 <== [ 0645 062C 062E ] ==> <1 FD8E> (prefered)
10015             <ﶒ> ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
10016             <ﶎ> ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10017             Duplicate: FDB5 <== [ 0644 062D 0645 ] ==> <1 FD88> (prefered)
10018             <ﶵ> ARABIC LIGATURE LAM WITH HAH WITH MEEM INITIAL FORM
10019             <ﶈ> ARABIC LIGATURE LAM WITH MEEM WITH HAH INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10020             Duplicate: FE34 <== [ 005F ] ==> <1 FE33> (prefered)
10021             <︴> PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
10022             <︳> PRESENTATION FORM FOR VERTICAL LOW LINE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10023            
10024             Duplicate: 0273 <== [ 006E ] ==> <1 014B> (prefered)
10025             <ɳ> LATIN SMALL LETTER N WITH RETROFLEX HOOK
10026             <ŋ> LATIN SMALL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10027             Duplicate: 1DAF <== [ 006E ] ==> <1 1D51> (prefered)
10028             <ᶯ> MODIFIER LETTER SMALL N WITH RETROFLEX HOOK
10029             <ᵑ> MODIFIER LETTER SMALL ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10030             Duplicate: 2040 <== [ 007E ] ==> <1 203F> (prefered)
10031             <⁀> CHARACTER TIE
10032             <‿> UNDERTIE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10033             Duplicate: 207F <== [ 004E ] ==> <1 014A> (prefered)
10034             <ⁿ> SUPERSCRIPT LATIN SMALL LETTER N
10035             <Ŋ> LATIN CAPITAL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10036             Duplicate: 224B <== [ 007E ] ==> <1 2248> (prefered)
10037             <≋> TRIPLE TILDE
10038             <≈> ALMOST EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10039             Duplicate: 2256 <== [ 003D ] ==> <1 224D> (prefered)
10040             <≖> RING IN EQUAL TO
10041             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10042             Duplicate: 2257 <== [ 003D ] ==> <1 224D> (prefered)
10043             <≗> RING EQUAL TO
10044             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10045             Duplicate: 225E <== [ 225F ] ==> <1 225C> (prefered)
10046             <≞> MEASURED BY
10047             <≜> DELTA EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10048             Duplicate: 2263 <== [ 003D ] ==> <1 2261> (prefered)
10049             <≣> STRICTLY EQUIVALENT TO
10050             <≡> IDENTICAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10051             Duplicate: 2277 <== [ 003D 0338 ] ==> <1 2276> (prefered)
10052             <≷> GREATER-THAN OR LESS-THAN
10053             <≶> LESS-THAN OR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10054             Duplicate: 2279 <== [ 003D ] ==> <1 2278> (prefered)
10055             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10056             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10057             Duplicate: 2279 <== [ 003D 0338 0338 ] ==> <1 2278> (prefered)
10058             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10059             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10060             Duplicate: 2982 <== [ 003A ] ==> <1 2236> (prefered)
10061             <⦂> Z NOTATION TYPE COLON
10062             <∶> RATIO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10063             Duplicate: 2993 <== [ 0028 ] ==> <1 2985> (prefered)
10064             <⦓> LEFT ARC LESS-THAN BRACKET
10065             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10066             Duplicate: 2994 <== [ 0029 ] ==> <1 2986> (prefered)
10067             <⦔> RIGHT ARC GREATER-THAN BRACKET
10068             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10069             Duplicate: 2995 <== [ 0029 ] ==> <1 2986> (prefered)
10070             <⦕> DOUBLE LEFT ARC GREATER-THAN BRACKET
10071             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10072             Duplicate: 2996 <== [ 0028 ] ==> <1 2985> (prefered)
10073             <⦖> DOUBLE RIGHT ARC LESS-THAN BRACKET
10074             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10075             Duplicate: 29BC <== [ 0025 ] ==> <1 2030> (prefered)
10076             <⦼> CIRCLED ANTICLOCKWISE-ROTATED DIVISION SIGN
10077             <‰> PER MILLE SIGN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10078             Duplicate: 2A17 <== [ 222B ] ==> <1 2A10> (prefered)
10079             <⨗> INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
10080             <⨐> CIRCULATION FUNCTION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10081             Duplicate: 2A34 <== [ 00D7 ] ==> <1 22C9> (prefered)
10082             <⨴> MULTIPLICATION SIGN IN LEFT HALF CIRCLE
10083             <⋉> LEFT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10084             Duplicate: 2A35 <== [ 00D7 ] ==> <1 22CA> (prefered)
10085             <⨵> MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
10086             <⋊> RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10087             Duplicate: 2A36 <== [ 00D7 ] ==> <1 2A2F> (prefered)
10088             <⨶> CIRCLED MULTIPLICATION SIGN WITH CIRCUMFLEX ACCENT
10089             <⨯> VECTOR OR CROSS PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10090             Duplicate: 2A50 <== [ 00D7 ] ==> <1 2A33> (prefered)
10091             <⩐> CLOSED UNION WITH SERIFS AND SMASH PRODUCT
10092             <⨳> SMASH PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10093             Duplicate: 2ACF <== [ 25C1 ] ==> <1 2A1E> (prefered)
10094             <⫏> CLOSED SUBSET
10095             <⨞> LARGE LEFT TRIANGLE OPERATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10096             Duplicate: 2AFB <== [ 2223 ] ==> <1 2AF4> (prefered)
10097             <⫻> TRIPLE SOLIDUS BINARY RELATION
10098             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10099             Duplicate: 2AFB <== [ 007C ] ==> <1 2AF4> (prefered)
10100             <⫻> TRIPLE SOLIDUS BINARY RELATION
10101             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10102             Duplicate: 2AFD <== [ 002F ] ==> <1 2215> (prefered)
10103             <⫽> DOUBLE SOLIDUS OPERATOR
10104             <∕> DIVISION SLASH at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10105             Duplicate: 2AFF <== [ 007C ] ==> <1 2AFE> (prefered)
10106             <⫿> N-ARY WHITE VERTICAL BAR
10107             <⫾> WHITE VERTICAL BAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10108             Duplicate: 3018 <== [ 0028 ] ==> <1 27EE> (prefered)
10109             <〘> LEFT WHITE TORTOISE SHELL BRACKET
10110             <⟮> MATHEMATICAL LEFT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10111             Duplicate: 3019 <== [ 0029 ] ==> <1 27EF> (prefered)
10112             <〙> RIGHT WHITE TORTOISE SHELL BRACKET
10113             <⟯> MATHEMATICAL RIGHT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10114             Duplicate: A760 <== [ 0059 ] ==> <1 A73C> (prefered)
10115             <Ꝡ> LATIN CAPITAL LETTER VY
10116             <Ꜽ> LATIN CAPITAL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10117             Duplicate: A761 <== [ 0079 ] ==> <1 A73D> (prefered)
10118             <ꝡ> LATIN SMALL LETTER VY
10119             <ꜽ> LATIN SMALL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10120             Duplicate: 1D4C1 <== [ 006C ] ==> <1 2113> (prefered)
10121             <𝓁> MATHEMATICAL SCRIPT SMALL L
10122             <ℓ> SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10123             Duplicate: 1F12B <== [ 0043 ] ==> <1 24B8> (prefered)
10124             <🄫> CIRCLED ITALIC LATIN CAPITAL LETTER C
10125             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10126             Duplicate: 1F12C <== [ 0052 ] ==> <1 24C7> (prefered)
10127             <🄬> CIRCLED ITALIC LATIN CAPITAL LETTER R
10128             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10129             Duplicate: 1F14B <== [ 004D 0056 ] ==> <1 33B9> (prefered)
10130             <🅋> SQUARED MV
10131             <㎹> SQUARE MV MEGA at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10132             Duplicate: A789 <== [ 003A ] ==> <1 02F8> (prefered)
10133             <꞉> MODIFIER LETTER COLON
10134             <˸> MODIFIER LETTER RAISED COLON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8032, <$f> line 39278.
10135             Duplicate: 02EF <== [ 0020 0306 ] ==> <1 02EC> (prefered)
10136             <˯> 02EF MODIFIER LETTER LOW DOWN ARROWHEAD
10137             <ˬ> 02EC MODIFIER LETTER VOICING at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8634, <$f> line 39278.
10138             Duplicate: 2B95 <== [ 2192 ] ==> <1 27A1> (prefered)
10139             <⮕> 2B95 RIGHTWARDS BLACK ARROW
10140             <➡> 27A1 BLACK RIGHTWARDS ARROW at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10141             Duplicate: 1F7C6 <== [ 2727 ] ==> <1 2726> (prefered)
10142             <🟆> 1F7C6 FOUR POINTED BLACK STAR
10143             <✦> 2726 BLACK FOUR POINTED STAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10144             Duplicate: 27C2 <== [ 005F ] ==> <1 221F> (prefered)
10145             <⟂> 27C2 PERPENDICULAR
10146             <∟> 221F RIGHT ANGLE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10537, <$f> line 43944.
10147             Duplicate: 2ADB <== [ 0028 ] ==> <1 220B> (prefered)
10148             <⫛> 2ADB TRANSVERSAL INTERSECTION
10149             <∋> 220B CONTAINS AS MEMBER at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10537, <$f> line 43944.
10150             Duplicate: 1F5A4 <== [ 2661 ] ==> <1 2665> (prefered)
10151             <🖤> 1F5A4 BLACK HEART
10152             <♥> 2665 BLACK HEART SUIT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10488, <$f> line 48770.
10153             EOR
10154            
10155             my (%known_dups) = map +($_,1), # As of Unicode 9.0 (questionable: 2982 2ACF 2ADB)
10156             qw(0296 0384 1D43 1D52 1D9F 1E7A 1E7B 1FBF 2007
10157             202F 2113 24B8 24C7 2E1E 33B9 FC03 FC68 FD55 FD56 FD57 FD5D FD87 FD8C
10158             FD92 FDB5 FE34 2B95 1F7C6 27C2 2ADB 1F5A4
10159             0273 1DAF 2040 207F 224B 2256 2257 225E 2263 2277 2279 2982 2993 2994 2995 2996 29BC
10160             2A17 2A34 2A35 2A36 2A50 2ACF 2AFB 2AFD 2AFF 3018 3019 A760 A761 1D4C1 1F12B 1F12C 1F14B A789 02EF);
10161            
10162             sub decompose_r($$$$); # recursive
10163             sub decompose_r($$$$) { # returns array ref, elts are [$compat, @expand]
10164 0     0 0   my ($self, $t, $i, $cache, @expand) = (shift, shift, shift, shift);
10165 0 0         return $cache->{$i} if $cache->{$i};
10166 0 0         return $cache->{$i} = [[0, $i]] unless my $In = $t->{$i};
10167 0           for my $in (@$In) {
10168 0           my $compat = $in->[0];
10169             #warn "i=<$i>, compat=<$compat>, rest=<$in->[1]>";
10170 0           my $expand_in = $self->decompose_r($t, $in->[1], $cache);
10171 0           $expand_in = $self->deep_copy($expand_in);
10172             #warn "Got: $in->[1] -> <@$expand> from $i = <@$in>";
10173 0           for my $expand (@$expand_in) {
10174 0 0 0       warn "Expansion funny: <@$expand>" if @$expand < 2 or $expand->[0] !~ /^[01]$/;
10175 0           $compat = ( shift(@$expand) | $compat);
10176 0 0         warn "!Malformed: $i -> $compat <@$expand>" if $expand->[0] =~ /^[01]$/;
10177 0           push @expand, [ $compat, @$expand, @$in[2..$#$in] ];
10178             }
10179             }
10180 0           return $cache->{$i} = \@expand;
10181             }
10182            
10183 0 0   0 0   sub fromHEX ($) { my $i = shift; $i =~ /^\w/ and hex $i}
  0            
10184            
10185             my %operators = (DOT => ['MIDDLE DOT', 'FULL STOP'], RING => ['DEGREE SIGN'], DIAMOND => ['WHITE DIAMOND'],
10186             'DOUBLE SOLIDUS' => ['PARALLEL TO'], MINUS => ['HYPHEN-MINUS']);
10187            
10188             # THIS IS A MULTIMAP (later entry for a TARGER wins)! ■□ ◼◻ ◾◽ ◇◆◈⟐⟡⟢⟣⌺ △▲▵▴▽▼▿▾⟁⧊⧋
10189             my %uni_manual = (phonetized => [qw( 0 ə s ʃ z ʒ j ɟ v ⱱ n ɳ N ⁿ n ŋ V ɤ ! ǃ ? ʔ ¿ ʕ | ǀ f ʄ F ǂ x ʘ X ǁ
10190             g ʛ m ɰ h ɧ d ᶑ C ʗ)], # z ɮ (C ʗ is "extras")
10191             phonetize2 => [qw( e ɘ E ɞ i ɻ I ɺ)], # Use some capitalized sources (no uc variants)...
10192             phonetize3 => [qw( a ɒ A Ɒ e ɜ E ɝ)], # Use some capitalized sources (no uc variants)...
10193             phonetize0 => [qw( e ə)],
10194             paleo => [qw( & ⁊ W Ƿ w ƿ h ƕ H Ƕ G Ȝ g ȝ )],
10195             # cut&paste from http://en.wikipedia.org/wiki/Coptic_alphabet
10196             # perl -C31 -wne "chomp; ($uc,$lc,undef,undef,$gr) = split /\t/;($ug,$lg)=split /,\s+/, $gr; print qq( $lg $lc $ug $uc)" coptic2 >coptic-tr
10197             # Fix stigma, koppa; p/P are actually 900; a/A are for AKHMIMIC KHEI (variant of KHEI on h/H);
10198             # 2e17 ⸗ double hyphen; sampi's are duplicated in both places
10199             greek2coptic => [qw(
10200             α ⲁ Α Ⲁ β ⲃ Β Ⲃ γ ⲅ Γ Ⲅ δ ⲇ Δ Ⲇ ε ⲉ Ε Ⲉ ϛ ⲋ Ϛ Ⲋ ζ ⲍ Ζ Ⲍ η ⲏ Η Ⲏ ϙ ϭ Ϙ Ϭ ϡ ⳁ Ϡ Ⳁ
10201             θ ⲑ Θ Ⲑ ι ⲓ Ι Ⲓ κ ⲕ Κ Ⲕ λ ⲗ Λ Ⲗ μ ⲙ Μ Ⲙ ν ⲛ Ν Ⲛ ξ ⲝ Ξ Ⲝ ο ⲟ Ο Ⲟ
10202             π ⲡ Π Ⲡ ρ ⲣ Ρ Ⲣ σ ⲥ Σ Ⲥ τ ⲧ Τ Ⲧ υ ⲩ Υ Ⲩ φ ⲫ Φ Ⲫ χ ⲭ Χ Ⲭ ψ ⲯ Ψ Ⲯ ω ⲱ Ω Ⲱ )],
10203             latin2extracoptic => [qw( - ⸗
10204             s ϣ S Ϣ f ϥ F Ϥ x ϧ X Ϧ h ϩ H Ϩ j ϫ J Ϫ t ϯ T Ϯ p ⳁ P Ⳁ a ⳉ A Ⳉ )],
10205             addline => [qw( 0 ∅ ∅ ⦱ + ∦ ∫ ⨏ • ⊝ / ⫽ ⫽ ⫻ ∮ ⨔ × ⨳ × ⩐ )], # ∮ ⨔ a cheat
10206             addhline => [qw( = ≣ = ≡ ≡ ≣ † ‡ + ∦ / ∠ | ∟ . ∸ ∨ ⊻ ∧ ⊼ ◁ ⩤ * ⩮
10207             ⊨ ⫢ ⊦ ⊧ ⊤ ⫧ ⊥ ⫨ ⊣ ⫤ ⊳ ⩥ ⊲ ⩤ ⋄ ⟠ ∫ ⨍ ⨍ ⨎ • ⦵ ( ∈ ) ∋
10208             ∪ ⩌ ∩ ⩍ ≃ ≅ ⨯ ⨲ )], # conflict with modifiers: qw( _ ‗ ); ( ∈ ) ∋ not very useful - but logical - with ∈∋ as bluekeys... 2 ƻ destructive
10209             addvline => [qw( ⊢ ⊩ ⊣ ⫣ ⊤ ⫪ ⊥ ⫫ □ ⎅ | ‖ ‖ ⦀ ∫ ⨒ ≢ ⩨ ⩨ ⩩ • ⦶
10210             \ ⫮ ° ⫯ . ⫰ ⫲ ⫵ ∞ ⧞ = ⧧ ⧺ ⧻ + ⧺ ∩ ⨙ ∪ ⨚ 0 ⦽ _ ⟂ _ ∟ )], # + ⫲
10211             addtilde => [qw( 0 ∝ / ∡ \ ∢ ∫ ∱ ∮ ⨑ : ∻ - ≂ ≠ ≆ ~ ≋ ~ ≈ ∼ ≈ ≃ ≊ ≈ ≋ = ≌
10212             ≐ ≏ ( ⟅ ) ⟆ ∧ ⩄ ∨ ⩅ ∩ ⩆ ∪ ⩇ )], # not on 2A**
10213             adddot => [qw( : ⫶ " ∵ ∫ ⨓ ∮ ⨕ □ ⊡ ◇ ⟐ ( ⦑ ) ⦒ ≟ ≗ ≐ ≑
10214             - ┄ — ┄ ─ ┈ ━ ┅ ═ ┉ | ┆ │ ┊ ┃ ┇ ║ ┋ )], # ⫶ is tricolon, not vert. … "; (m-)dash/bar, (b)[h/v]draw, bold/dbl
10215             adddottop => [qw( + ∔ )],
10216             addleft => [qw( = ≔ × ⨴ × ⋉ \ ⋋ + ⨭ → ⧴ ∫ ⨐ ∫ ⨗ ∮ ∳ ⊂ ⟈ ⊃ ⫐ ⊳ ⧐ ⊢ ⊩ ⊩ ⊪ ⊣ ⟞
10217             ◇ ⟢ ▽ ⧨ ≡ ⫢ • ⥀ ⋈ ⧑ ≟ ⩻ ≐ ≓ | ⩘ ≔ ⩴ ⊲ ⫷)], # × ⨴ is hidden
10218             addright => [qw( = ≕ × ⨵ × ⋊ / ⋌ + ⨮ - ∹ ∫ ⨔ ∮ ∲ ⊂ ⫏ ⊃ ⟉ ⊲ ⧏ ⊢ ⟝ ⊣ ⫣
10219             ◇ ⟣ △ ⧩ • ⥁ ⋈ ⧒ ≟ ⩼ ≐ ≒ | ⩗ ⊳ ⫸ : ⧴)], # × ⨵ is hidden
10220             sharpen => [qw( < ≺ > ≻ { ⊰ } ⊱ ( ⟨ ) ⟩ ∧ ⋏ ∨ ⋎ . ⋄ ⟨ ⧼ ⟩ ⧽ ∫ ⨘
10221             ⊤ ⩚ ⊥ ⩛ ◇ ⟡ ▽ ⧍ • ⏣ ≟ ≙ + ⧾ - ⧿)], # ⋆
10222             unsharpen => [qw( < ⊏ > ⊐ ( ⟮ ) ⟯ ∩ ⊓ ∪ ⊔ ∧ ⊓ ∨ ⊔ . ∷ ∫ ⨒ ∮ ⨖ { ⦉ } ⦊
10223             / ⧄ \ ⧅ ° ⧇ ◇ ⌺ • ⌼ ≟ ≚ ≐ ∺ ( 〘 ) 〙 )], # + ⊞ - ⊟ * ⊠ . ⊡ × ⊠, ( ⦗ ) ⦘ ( 〔 ) 〕
10224             whiten => [qw( [ ⟦ ] ⟧ ( ⟬ ) ⟭ { ⦃ } ⦄ ⊤ ⫪ ⊥ ⫫ ; ⨟ ⊢ ⊫ ⊣ ⫥ ⊔ ⩏ ⊓ ⩎ ∧ ⩓ ∨ ⩔ _ ‗ = ≣
10225             : ⦂ | ⫾ | ⫿ • ○ < ⪡ > ⪢ ⊓ ⩎ ⊔ ⩏ )], # or blacken □ ■ ◻ ◼ ◽ ◾ ◇ ◆ △ ▲ ▵ ▴ ▽ ▼ ▿ ▾
10226             quasisynon => [qw( ∈ ∊ ∋ ∍ ≠ ≶ ≠ ≷ = ≸ = ≹ ≼ ⊁ ≽ ⊀ ≺ ⋡ ≻ ⋠ < ≨ > ≩ Δ ∆
10227             ≤ ⪕ ≥ ⪖ ⊆ ⊅ ⊇ ⊄ ⊂ ⊉ ⊃ ⊈ ⊏ ⋣ ⊐ ⋢ ⊳ ⋬ ⊲ ⋭ … ⋯ / ⟋ \ ⟍
10228             ( ⦇ ) ⦈ [ ⨽ ] ⨼ ∅ ⌀
10229             ⊤ ⫟ ⊥ ⫠ ⟂ ⫛ □ ∎ ▽ ∀ ‖ ∥ ≟ ≞ ≟ ≜ ~ ‿ ~ ⁀ ■ ▬ )], # ( ⟬ ) ⟭ < ≱ > ≰ ≤ ≯ ≥ ≮ * ⋆
10230             amplify => [qw( < ≪ > ≫ ≪ ⋘ ≫ ⋙ ∩ ⋒ ∪ ⋓ ⊂ ⋐ ⊃ ⋑ ( ⟪ ) ⟫ ∼ ∿ = ≝ ∣ ∥ . ⋮
10231             ∈ ∊ ∋ ∍ - − / ∕ \ ∖ √ ∛ ∛ ∜ ∫ ∬ ∬ ∭ ∭ ⨌ ∮ ∯ ∯ ∰ : ⦂ ` ⎖
10232             : ∶ ≈ ≋ ≏ ≎ ≡ ≣ × ⨯ + ∑ Π ∏ Σ ∑ ρ ∐ ∐ ⨿ ⊥ ⟘ ⊤ ⟙ ⟂ ⫡ ; ⨾ □ ⧈ ◇ ◈
10233             ⊲ ⨞ ⊢ ⊦ △ ⟁ ∥ ⫴ ⫴ ⫼ / ⫽ ⫽ ⫻ • ● ⊔ ⩏ ⊓ ⩎ ∧ ⩕ ∨ ⩖ ▷ ⊳ ◁ ⊲
10234             ⋉ ⧔ ⋊ ⧕ ⋈ ⧓ ⪡ ⫷ ⪢ ⫸ ≟ ≛ ≐ ≎ ⊳ ⫐ ⊲ ⫏ { ❴ } ❵ × ⨶ )], # ` ⋆ ☆ ⋆ ★ ; ˆ ∧ conflicts with combining-ˆ; * ∏ stops propagation *->×->⋈, : ⦂ hidden; ∥ ⫴; × ⋈ not needed; ∰ ⨌ - ???; ≃ ≌ not useful
10235             turnaround => [qw( ∧ ∨ ∩ ∪ ∕ ∖ ⋏ ⋎ ∼ ≀ ⋯ ⋮ … ⋮ ⋰ ⋱ _ ‾
10236             8 ∞ ∆ ∇ Α ∀ Ε ∃ ∴ ∵ ≃ ≂
10237             ∈ ⫛ ∈ ∋ ∋ ⫙ ∉ ∌ ∊ ∍ ∏ ∐ ± ∓ ⊓ ⊔ ≶ ≷ ≸ ≹ ⋀ ⋁ ⋂ ⋃ ⋉ ⋊ ⋋ ⋌ ⋚ ⋛ ≤ ⋜ ≥ ⋝ ≼ ⋞ ≽ ⋟ )], # XXXX Can't do both directions
10238             superize => [qw( h ʱ ' ʹ < ˂ > ˃ ^ ˑ ( ˓ ) ˒ ⊢ ˫ 0 ᵊ * ˟ × ˟ ~ ﹋ ≈ ﹌ ─ ‾
10239             □ ⸋ . ⸳ @ ♭), '#' => '♯'], # ' Additions to !
10240             subize => [qw( < ˱ > ˲ _ ˍ ' ˏ " ˶ ˵ ˵ . ˳ ° ˳ ˘ ˯ ˘ ˬ ( ˓ ) ˒ 0 ₔ ~ ﹏ ═ ‗), ',' => '¸'], # '
10241             subize2 => [qw( < ˂ > ˃ )], # these are in older Unicode, so would override if in subize
10242             # Most of these are for I/O on very ancient systems (only ∘ and ∅ are not auto-detected on quadapl):
10243             aplbox => [qw( | ⌷ = ⌸ ÷ ⌹ ◇ ⌺ ∘ ⌻ ○ ⌼ / ⍁ \ ⍂ < ⍃ > ⍄ ← ⍇ → ⍈ ∨ ⍌ Δ ⍍ ↑ ⍐ ∧ ⍓ ∇ ⍔ ↓ ⍗ ' ⍞ : ⍠ ≠ ⍯ ? ⍰ ∅ ⎕ )], #'
10244             round => [qw( < ⊂ > ⊃ = ≖ = ≗ = ≍ ∫ ∮ ∬ ∯ ∭ ∰ ∼ ∾ - ⊸ □ ▢ ∥ ≬ ‖ ≬ • ⦁
10245             … ∴ ≡ ≋ ⊂ ⟃ ⊃ ⟄ ⊤ ⫙ ⊥ ⟒ ( ⦖ ) ⦕ ( ⦓ ) ⦔ ( ⦅ ) ⦆ ⊳ ⪧ ⊲ ⪦ ≟ ≘ ≐ ≖ . ∘
10246             [ ⟬ ] ⟭ { ⧼ } ⧽ % ⦼ % ‰ × ⦻ ⨯ ⨷ ∧ ∩ ∨ ∪ )]); # = ≈
10247            
10248             sub parse_NameList ($$) {
10249 0     0 0   my ($self, $f, $k, $kk, $name, $_c, %basic, %cached_full, %compose, $version,
10250             %into2, %ordered, %candidates, %N, %comp2, %NM, %BL, $BL, %G, %NS) = (shift, shift);
10251 0           binmode $f; # NameList.txt is in Latin-1, not unicode
10252 0           while (my $s = <$f>) { # extract compositions, add to char downgrades; -> composition, => compatibility composition
10253 0 0         if ($s =~ /^\@\@\@\s+The\s+Unicode\s+Standard\s+(.*?)\s*$/i) {
10254 0           $version = $1;
10255             }
10256 0 0         if ($s =~ /^([\da-f]+)\b\s*(.*?)\s*$/i) {
10257 0           my ($K, $Name, $C, $t) = ($1, $2, $self->charhex2key("$1"));
10258 0           $N{$Name} = $K;
10259 0           $NM{$C} = $Name; # Not needed for compositions, but handy for user-visible output
10260 0           $BL{$C} = $self->charhex2key($BL); # Used for sorting
10261             # Finish processing of preceding text
10262 0 0         if (defined $kk) { # Did not see (official) decomposition
10263             # warn("see combining: $K $C $Name"),
10264 0 0 0       $NS{$_c}++ if $name =~ /\bCOMBINING\b/ and not ($_c =~ /\p{NonSpacingMark}/);
10265 0 0         if ($name =~ /^(.*?)\s+(?:(WITH)\s+|(?=(?:OVER|ABOVE|PRECEDED\s+BY|BELOW(?=\s+LONG\s+DASH))\s+\b(?!WITH\b|AND\b)))(.*?)\s*$/) {
10266 0           push @{$candidates{$k}}, [$1, $3];
  0            
10267 0           my ($b, $with, $ext) = ($1, $2, $3);
10268 0           my @ext = split /\s+AND\s+/, $ext;
10269 0 0 0       if ($with and @ext > 1) {
10270 0           for my $i (0..$#ext) {
10271 0           my @ext1 = @ext;
10272 0           splice @ext1, $i, 1;
10273 0           push @{$candidates{$k}}, ["$b WITH ". (join ' AND ', @ext1), $ext[$i]];
  0            
10274             }
10275             }
10276             }
10277 0 0         if ($name =~ /^(.*)\s+(?=OR\s)(.*?)\s*$/) { # Find the latest possible...
10278 0           push @{$candidates{$k}}, [$1, $2];
  0            
10279             }
10280 0 0         if (($t = $name) =~ s/\b(COMBINING(?=\s+CYRILLIC\s+LETTER)|BARRED|SLANTED|APPROXIMATELY|ASYMPTOTICALLY|(?
10281 0           push @{$candidates{$k}}, [$t, "calculated-$+"];
  0            
10282 0 0         $candidates{$k}[-1][1] .= '-epigraphic' if $t =~ /\bEPIGRAPHIC\b/; # will be massaged away from $t later
10283             $candidates{$k}[-1][0] =~ s/\s+SYMBOL$// and $candidates{$k}[-1][1] .= '-symbol'
10284 0 0 0       if $candidates{$k}[-1][1] =~ /\bLUNATE\b/;
10285             # warn("smallcapital $name"),
10286 0 0         $candidates{$k}[-1][1] .= '-smallcaps' if $t =~ /\bSMALL\s+CAPITAL\b/; # will be massaged away from $t later
10287             # warn "Candidates: <$candidates{$k}[0]>; <$candidates{$k}[1]>";
10288             }
10289 0 0         if (($t = $name) =~ s/\b(WHITE|BLACK|CIRCLED)\s+//) {
10290 0           push @{$candidates{$k}}, [$t, "fake-$1"];
  0            
10291             }
10292 0 0         if (($t = $name) =~ s/\bBLACK\b/WHITE/) {
10293 0           push @{$candidates{$k}}, [$t, "fake-black"];
  0            
10294             }
10295 0 0         if (($t = $name) =~ s/^(?:RAISED|MODIFIER\s+LETTER(?:\s+RAISED)?(\s+LOW)?)\s+//) {
10296 0 0         push @{$candidates{$k}}, [$t, $1 ? "fake-sub" : "fake-super"];
  0            
10297             }
10298 0 0         if (($t = $name) =~ s/\bBUT\s+NOT\b/OR/) {
10299 0           push @{$candidates{$k}}, [$t, "fake-but-not"];
  0            
10300             }
10301 0 0         if (($t = $name) =~ s/(^LATIN\b.*\b\w)UM((?:\s+ROTUNDA)?)$/$1$2/) { # Paleo-latin
10302 0           push @{$candidates{$k}}, [$t, "fake-umify"];
  0            
10303             }
10304 0 0 0       if ((0xa7 == ((hex $k)>>8)) and ($t = $name) =~ s/\b(\w|CO|VEN)(?!\1)(\w)$/$2/) { # Paleo-latin (CON/VEND + digraph)
10305 0           push @{$candidates{$k}}, [$t, "fake-paleocontraction-by-last"];
  0            
10306             }
10307 0 0         if (($t = $name) =~ s/(?:(\bMIDDLE-WELSH)\s+)?\b(\w)(?=\2$)//) {
10308 0 0         push @{$candidates{$k}}, [$t, "fake-doubleletter" . ($1 ? "-$1" : '')];
  0            
10309             }
10310 0 0         if (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+\b(.*?)\b\s*\b((?:UNDERBAR|TILDE|DIAERESIS|VANE|STILE|JOT|OVERBAR|BAR)(?!$))\b\s*/$2/) {
10311             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10312 0           push @{$candidates{$k}}, [$t, "calculated-$1-$3apl"];
  0            
10313 0           my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10314 0           $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10315             }
10316             # Allow QUAD at end only if $2 is not-empty
10317 0 0         if (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+\b(.*?)\b\s*\b(QUAD(?:(?!$)|(?!\2))|(?:UNDERBAR|TILDE|DIAERESIS|VANE|STILE|JOT|OVERBAR|BAR)$)\b\s*/$2/) {
    0          
10318             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10319 0           push @{$candidates{$k}}, [$t, "calculated-$1-$3apl"];
  0            
10320 0           my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10321 0           $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10322             } elsif (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+//) {
10323             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10324 0           push @{$candidates{$k}}, [$t, "calculated-$1"];
  0            
10325 0           my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10326 0           $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10327             }
10328 0 0         if (($t = $name) =~ s/\b(LETTER\s+SMALL\s+CAPITAL)/CAPITAL LETTER/) {
10329 0           push @{$candidates{$k}}, [$t, "smallcaps"];
  0            
10330             }
10331 0 0 0       if (($t = $name) =~ s/\b(LETTER\s+)E([SZN])[HG]$/$1$2/ # esh/eng/ezh
      0        
      0        
      0        
10332             # next two not triggered since this is actually decomposed:
10333             or ($t = $name) =~ s/(?<=\bLETTER\sV\s)WITH\s+RIGHT\s+HOOK$//
10334             or ($t = $name) =~ s/\bDOTLESS\s+J\s+WITH\s+STROKE$/J/
10335             or $name eq 'LATIN SMALL LETTER SCHWA' and $t = 'DIGIT ZERO') {
10336 0           push @{$candidates{$k}}, [$t, "phonetized"] if 0;
10337             }
10338             }
10339 0           ($k, $name, $_c) = ($K, $Name, $C);
10340 0 0         $G{$k} = $name if $name =~ /^GREEK\s/; # Indexed by hex
10341 0           $kk = $k;
10342 0           next;
10343             }
10344 0 0         if ($s =~ /^\@\@\s+([\da-f]+)\b/i) {
10345 0 0         die unless $s =~ /^\@\@\s+([\da-f]+)\s.*\s([\da-f]+)\s*$/i;
10346 0           $BL = $1;
10347             }
10348 0           my $a; # compatibility_p, composed, decomposition string
10349 0 0         $a = [0, split /\s+/, "$1"] if $s =~ /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/;
10350 0 0 0       $a = [1, split /\s+/, "$2"], ($1 and push @$a, $1)
10351             if $s =~ /^\s+#\s*(?:(<.*?>)\s+)?([0-9A-F]+(?:\s+[0-9A-F]+)*)/; # Put at end
10352 0 0         next unless $a;
10353 0 0         if ($a->[-1] eq '') {{ # Clarify
10354 0           my ($math, $type) = ('', '');
  0            
10355             # warn("Unexpected name with : <$name>"), unless $name =~ s/^MATHEMATICAL\s+// and $math = "math-";
10356 0 0 0       warn("Unexpected name with : $k <$name>"), last # In BMP, MATHEMATICAL is omited
      0        
      0        
10357             unless $name =~ /^(?:MATHEMATICAL\s+)?((?:(?:BLACK-LETTER|FRAKTUR|BOLD|ITALIC|SANS-SERIF|DOUBLE-STRUCK|MONOSPACE|SCRIPT)\b\s*?)+)(?=\s+(?:SMALL|CAPITAL|DIGIT|NABLA|PARTIAL|N-ARY|\w+\s+SYMBOL)\b)/
10358             or $name =~ /^HEBREW\s+LETTER\s+(WIDE|ALTERNATIVE)\b/
10359             or $name =~ /^(ARABIC\s+MATHEMATICAL(?:\s+(?:INITIAL|DOTLESS|STRETCHED|LOOPED|TAILED|DOUBLE-STRUCK))?)\b/
10360             or $name =~ /^(PLANCK|INFORMATION)/; # information source
10361 0 0         $type = $1 if $1;
10362 0           $type =~ s/BLACK-LETTER/FRAKTUR/; # http://en.wikipedia.org/wiki/Black-letter#Unicode
10363 0           $type =~ s/INFORMATION/Letterlike/; # http://en.wikipedia.org/wiki/Letterlike_Symbols_%28Unicode_block%29
10364 0 0         $type = '=' . join '-', map lc($_), split /\s+/, $type if $type;
10365 0           $a->[-1] = "";
10366             }}
10367 0 0         push @$a, '' unless @$a > 2;
10368 0           push @{$basic{$k}}, $a; # 1 2044 --\
  0            
10369 0 0 0       undef $kk unless $a->[-1] eq '' # Disable guesswork processing
      0        
      0        
      0        
10370             or @$a == 3 and (chr hex $a->[-2]) =~ /\W|\p{Lm}/ and $a->[-1] !~ /^[-1]) =~ /\w/;
10371             # print "@$a";
10372             }
10373             # $candidates{'014A'} = ['LATIN CAPITAL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10374             # $candidates{'014B'} = ['LATIN SMALL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10375             # XXXX Better have this together with pseudo-upgrade???
10376 0           push @{$candidates{'00b5'}}, ['GREEK SMALL LETTER MU', 'faked-calculated-SYMBOL']; # Pretend on MICRO SIGN...
  0            
10377             # $candidates{'00b5'} = ['GREEK SMALL LETTER MU', 'calculated-SYMBOL']; # Pretend on MICRO SIGN...
10378 0           for my $k (keys %basic) { # hex
10379 0           for my $exp (@{$basic{$k}}) {
  0            
10380 0           my $base = $exp->[1]; # hex
10381 0           my $name = $NM{$self->charhex2key($base)};
10382 0 0 0       next if not $name and ($k =~ /^[12]?F[89A]..$/ or hex $base >= 0x4E00 and hex $base <= 0x9FCC); # ideographs; there is also 3400 region...
      0        
10383 0 0         warn "Basic: `$k' --> `@$exp', base=`$base' --> `",$self->charhex2key($base),"'" unless $name;
10384 0 0         if ((my $NN = $name) =~ s/\s+OPERATOR$//) {
10385             #warn "operator: `$k' --> <$NN>, `@$exp', base=`$base' --> `",$self->charhex2key($base),"'";
10386 0 0         push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
  0            
  0            
10387             }
10388             }
10389             }
10390 0           for my $how (keys %uni_manual) { # Some stuff is easier to describe in terms of char, not names
10391 0           my $map = $uni_manual{$how};
10392 0 0         die "manual translation map for $how has an odd number of entries" if @$map % 2;
10393             # for my $from (keys %$map) {
10394 0           while (@$map) {
10395 0           my $to = pop @$map; # Give precedence to later entries
10396 0           my $from = pop @$map;
10397 0           for my $shift (0,1) {
10398 0 0         if ($shift) {
10399 0           my ($F, $T) = (uc $from, uc $to);
10400 0 0 0       next unless $F ne $from and $T ne $to;
10401 0           ($from, $to) = ($F, $T);
10402             }
10403 0           push @{$candidates{uc $self->key2hex($to)}}, [$NM{$from}, "manual-$how"];
  0            
10404             }
10405             }
10406             }
10407 0           for my $g (keys %G) {
10408 0 0         (my $l = my $name = $G{$g}) =~ s/^GREEK\b/LATIN/ or die "Panic";
10409 0 0         next unless my $L = $N{$l}; # is HEX
10410             #warn "latinize: $L\t$l";
10411 0           push @{$candidates{$L}}, [$name, 'faked-latinize'];
  0            
10412 0 0         next unless my ($lat, $first, $rest, $add) = ($l =~ /^(LATIN\s+(?:SMALL|CAPITAL)\s+LETTER\s+(\w))(\w+)(?:\s+(\S.*))?$/);
10413 0 0         $lat =~ s/P$/F/, $first = 'F' if "$first$rest" eq 'PHI';
10414 0 0         die unless my $LL = $N{$lat};
10415 0 0         $add = (defined $add ? "-$add" : ''); # None of 6.1; only iIuUaAgGdf present of 6.1
10416 0           push @{$candidates{$L}}, [$lat, "faked-greekize$add"];
  0            
10417             #warn "latinize++: $L\t$l;\t`$add'\t$lat";
10418             }
10419 0           my %iu_TR = qw(INTERSECTION CAP UNION CUP);
10420 0           my %_TR = map { (my $in = $_) =~ s/_/ /g; $in } qw(SMALL_VEE LOGICAL_OR
  0            
  0            
10421             UNION_OPERATOR_WITH_DOT MULTISET_MULTIPLICATION
10422             UNION_OPERATOR_WITH_PLUS MULTISET_UNION
10423             DEL NABLA
10424             QUOTE APOSTROPHE
10425             SQUISH VERTICAL_LINE
10426             SLASH SOLIDUS
10427             BACKSLASH REVERSE_SOLIDUS
10428             DIVIDE DIVISION_SIGN
10429             QUESTION QUESTION_MARK
10430             UP_CARET LOGICAL_AND
10431             DOWN_CARET LOGICAL_OR
10432             JOT DEGREE_SIGN);
10433 0           my($_TR_rx) = map qr/$_/, join '|', keys %_TR;
10434 0           for my $c (keys %candidates) { # Done after all the names are known; hex of the char
10435 0           my ($CAND, $app, $t, $base, $b) = ($candidates{$c}, '');
10436 0           for my $Cand (@$CAND) { # (all keys in hex) [MAYBE_CHAR_NAME, how_obtained]
10437             #warn "candidates: $c <$Cand->[0]>, <@$Cand[1..$#$Cand]>";
10438             # An experiment shows that the FORMS are properly marked as non-canonical decompositions; so they are not needed here
10439 0 0         (my $with = my $raw = $Cand->[1]) =~ s/\s+(SIGN|SYMBOL|(?:FINAL|ISOLATED|INITIAL|MEDIAL)\s+FORM)$//
10440             and $app = " $1"; # $app is just a candidate; actually, not useful at all
10441 0           for my $Mod ( (map ['', $_], $app, '', ' SIGN', ' SYMBOL', ' OF', ' AS MEMBER', ' TO'), # `SUBSET OF', `CONTAINS AS MEMBER', `PARALLEL TO'
10442             (map [$_, ''], 'WHITE ', 'WHITE UP-POINTING ', 'N-ARY '), ['WHITE ', ' SUIT'] ) {
10443 0           my ($prepend, $append) = @$Mod;
10444 0 0 0       next if $raw =~ /-SYMBOL$/ and 0 <= index($append, "SYMBOL"); #
10445 0           warn "raw=`$raw', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10446 0           $t++;
10447 0           $b = "$prepend$Cand->[0]$append";
10448 0 0         $b =~ s/\bTWO-HEADED\b/TWO HEADED/ unless $N{$b};
10449 0 0         $b =~ s/\bTIMES\b/MULTIPLICATION SIGN/ unless $N{$b};
10450 0 0         $b =~ s/(?:(?<=\bLEFT)|(?<=RIGHT))(?=\s+ARROW\b)/WARDS/ unless $N{$b};
10451 0 0         $b =~ s/\bLINE\s+INTEGRATION\b/CONTOUR INTEGRAL/ unless $N{$b};
10452 0 0         $b =~ s/\bINTEGRAL\s+AVERAGE\b/INTEGRAL/ unless $N{$b};
10453 0 0         $b =~ s/\s+(?:SHAPE|OPERATOR|NEGATED)$// unless $N{$b};
10454 0 0         $b =~ s/\bCIRCLED\s+MULTIPLICATION\s+SIGN\b/CIRCLED TIMES/ unless $N{$b};
10455 0 0         $b =~ s/^(CAPITAL|SMALL)\b/LATIN $1 LETTER/ unless $N{$b}; # TURNED SMALL F
10456 0 0         $b =~ s/\b(CAPITAL\s+LETTER)\s+SMALL\b/$1/ unless $N{$b}; # Q WITH HOOK TAIL
10457 0 0         $b =~ s/\bEPIGRAPHIC\b/CAPITAL/ unless $N{$b}; # XXXX is it actually capital?
10458             $b =~ s/^LATIN\s+LETTER\s+SMALL\s+CAPITAL\b/LATIN CAPITAL LETTER/ # and warn "smallcapital -> <$b>"
10459 0 0 0       if not $N{$b} or $with=~ /smallcaps/; # XXXX is it actually capital?
10460 0 0         $b =~ s/^GREEK\s+CAPITAL\b(?!=\s+LETTER)/GREEK CAPITAL LETTER/ unless $N{$b};
10461 0 0         $b =~ s/^GREEK\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)/GREEK SMALL LETTER/ unless $N{$b};
10462 0 0         $b =~ s/^CYRILLIC\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)(?=\s+LETTER\b)/CYRILLIC SMALL/ unless $N{$b};
10463 0 0         $b =~ s/\bEQUAL(\s+TO\s+SIGN\b)?/EQUALS SIGN/ unless $N{$b};
10464 0 0         $b =~ s/\bMINUS\b/HYPHEN-MINUS/ unless $N{$b};
10465 0 0         $b =~ s/\b(SQUARE\s+)(INTERSECTION|UNION)(?:\s+OPERATOR)?\b/$1$iu_TR{$2}/ unless $N{$b};
10466 0 0         $b =~ s/(?<=WARDS)$/ ARROW/ unless $N{$b}; # APL VANE
10467             # warn "_TR: <$1> in $b; <>" if $b =~ /\b($_TR_rx)\b/ and not $_TR{$1};
10468 0 0         $b =~ s/\b($_TR_rx)\b/$_TR{$1}/ unless $N{$b};
10469             $b = "GREEK SMALL LETTER $b" and ($b =~ /\bDELTA\b/ and $b =~ s/\bSMALL\b/CAPITAL/)
10470 0 0 0       if not $N{$b} and $N{"GREEK SMALL LETTER $b"};
      0        
      0        
10471             # $b =~ s/\bDOT\b/FULL STOP/ unless $N{$b};
10472             # $b =~ s/^MICRO$/GREEK SMALL LETTER MU/ unless $N{$b};
10473            
10474 0           warn " b =`$b', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10475 0 0         if (defined ($base = $N{$b})) {
10476 0 0         undef $base, next if $base eq $c;
10477 0 0         $with = $raw if $t;
10478 0           warn "<$Cand->[0]> WITH <$Cand->[1]> resolved via SIGN/SYMBOL/.* FORM: strip=<$app> add=<$prepend/$append>\n"
10479             if debug_GUESS_MASSAGE and ($append or $app or $prepend);
10480             last
10481 0           }
10482             }
10483 0 0         if (defined $base) {
    0          
10484 0           $base = [$base];
10485             } elsif ($raw =~ /\bOPERATOR$/) {
10486 0 0         $base = [map $N{$_}, @{ $operators{$Cand->[0]} }] if exists $operators{$Cand->[0]};
  0            
10487             }
10488 0 0         (warnUNRES and warn("Unresolved: <$Cand->[0]> WITH <$Cand->[1]>")), next unless defined $base;
10489 0           my @modifiers = split /\s+AND\s+/, $with;
10490 0 0         @modifiers = map { s/\s+/-/g; /^[\da-f]{4,}$/i ? $_ : "" } @modifiers;
  0            
  0            
10491             #warn " $c --> <@$base>; <@modifiers>...\t$b <- $NM{chr hex $c}" ;
10492 0           unshift @{$basic{$c}}, [1, $_, @modifiers] for @$base;
  0            
10493 0 0         if ($b =~ s/\s+(OPERATOR|SIGN)$//) { # ASTERISK (note that RING is a valid name, but has no relation to RING OPERATOR
10494 0 0         unshift @{$basic{$c}}, [1, $base, @modifiers] if defined ($base = $N{$b}); # ASTERISK
  0            
10495             #$base = '[undef]' unless defined $base;
10496             #warn("operator via <$b>, <$c> => `$base'");
10497             (debug_OPERATOR and warn "operator: `$c' ==> `$_', <@modifiers> via <$b>\n"),
10498 0 0         unshift @{$basic{$c}}, [1, $_, @modifiers] for map $N{$_}, @{ $operators{$b} || [] }; # ASTERISK
  0            
  0            
10499             }
10500             # push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
10501             # $basic{$c} = [ [1, $base, @modifiers ] ]
10502             }
10503             }
10504 0           $self->decompose_r(\%basic, $_, \%cached_full) for keys %basic; # Now %cached_full is fully expanded - has trivial expansions too
10505 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %cached_full) { # order of chars in Unicode matters (all keys in hex)
  0            
10506 0           my %seen_compose;
10507 0           for my $exp (@{ $cached_full{$c} }) {
  0            
10508 0           my @exp = @$exp; # deep copy
10509 0 0         die "Expansion too short: <@exp>" if @exp < 2;
10510 0 0         next if @exp < 3; # Skip trivial decompositions
10511 0           my $compat = shift @exp;
10512 0           my @PRE = @exp;
10513 0           my $base = shift @exp;
10514 0 0         @exp = ($base, sort {fromHEX $a <=> fromHEX $b or $a cmp $b} @exp); # Any order will do; do not care about Unicode rules
  0            
10515             #warn "Malformed: [@exp]" if "@exp" =~ /^
10516 0 0         next if $seen_compose{"$compat; @exp"}++; # E.g., WHITE may be added in several ways...
10517 0 0         push @{$ordered{$c}}, [$compat, @exp > 3 ? @exp : @PRE]; # with 2 modifiers order does not matter for the algo below, but we catch U"¯ vs U¯".
  0            
10518 0           warn qq(Duplicate: $c <== [ @exp ] ==> <@{$compose{"@exp"}[0]}> (prefered)\n\t<), chr hex $c,
10519             qq(>\t$c\t$NM{chr hex $c}\n\t<), chr hex $compose{"@exp"}[0][1], qq(>\t$compose{"@exp"}[0][1]\t$NM{chr hex $compose{"@exp"}[0][1]})
10520 0 0 0       if $compose{"@exp"} and "@exp" !~ /<(font|pseudo-upgrade)>/ and $c ne $compose{"@exp"}[0][1] and not $known_dups{$c};
      0        
      0        
10521             #warn "Compose rule: `@exp' ==> $compat, `$c'";
10522 0           push @{$compose{"@exp"}}, [$compat, $c];
  0            
10523             }
10524             } # compose mapping done
10525 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %ordered) { # all nontrivial! Order of chars in Unicode matters...
  0            
10526 0           my(%seen_compose, %seen_contract) = ();
10527 0           for my $v (@{ $ordered{$c} }) { ## When (FOO and FOO OPERATOR) + tilde are both remapped to X: X+operator == X
  0            
10528 0           my %seen;
10529 0           for my $off (reverse(2..$#$v)) {
10530             # next if $seen{$v->[$off]}++; # chain of compat, or 2A76 -> ?2A75 003D < = = = >
10531 0           my @r = @$v; # deep copy
10532 0           splice @r, $off, 1;
10533 0           my $compat = shift @r;
10534             #warn "comp: $compat, $c; $off [@$v] -> $v->[$off] + [@r]";
10535 0 0         next if $seen_compose{"$compat; $v->[$off]; @r"}++;
10536             # next unless my $contracted = $compose{"@r"}; # This omits trivial compositions
10537 0 0         my $contracted = [@{$compose{"@r"} || []}]; # Deep copy
  0            
10538             # warn "Panic $c" if @$contracted and @r == 1;
10539 0 0         push @$contracted, [0, @r] if @r == 1; # Not in %compose
10540             # QUAD-INT: may be INT INT INT INT, may be INT amp INT INT etc; may lead to same compositions...
10541             #warn "contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10542 0 0         @$contracted = grep {$_->[1] ne $c and not $seen_contract{"$_->[0]; $compat; $v->[$off]; $_->[1]"}++} @$contracted;
  0            
10543             #warn " contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10544 0           for my $contr (@$contracted) { # May be empty: Eg, fractions decompose into 2 3 and cannot be composed in 2 steps
10545 0   0       my $calculated = $contr->[0] || $off != $#$v;
10546 0           push @{ $into2{$self->charhex2key($c)} }, [(($compat | $contr->[0])<<1)|$calculated, $self->charhex2key($contr->[1]), $self->charhex2key($v->[$off])]; # each: compat, char, combine
  0            
10547 0           push @{ $comp2{$v->[$off]}{$contr->[1]} }, [ (($compat | $contr->[0])<<1)|$calculated, $c]; # each: compat, char
  0            
10548             }
10549             }
10550             }
10551             } # (de)compose-into-2 mapping done
10552 0           for my $h2 (values %comp2) { # Massage into the natural order - prefer canonical (de)compositions
10553 0           for my $h (values %$h2) { # RValues!!! [compat, charHEX] each
10554             # my @a = sort { "@$a" cmp "@$b" } @$h;
10555 0 0         my @a = sort { $a->[0] <=> $b->[0] or $self->charhex2key($a->[1]) cmp $self->charhex2key($b->[1]) } @$h;
  0            
10556 0           $h = \@a;
10557             }
10558             }
10559 0           \%into2, \%comp2, \%NM, \%BL, \%NS, $version
10560             }
10561            
10562             sub print_decompositions($;$) {
10563 0     0 0   my $self = shift;
10564 0 0         my $dec = @_ ? shift : do { my $f = $self->get_NamesList;
  0            
10565 0 0         $self->load_compositions($f) if defined $f;
10566 0           $self->{Decompositions}} ;
10567 0           for my $c (sort keys %$dec) {
10568 0           my $arr = $dec->{$c};
10569 0 0         my @out = map +($_->[0] ? '? ' : '= ') . "@$_[1,2]", @$arr;
10570 0           print "$c\t->\t", join(",\t", @out), "\n";
10571             }
10572             }
10573            
10574             sub print_compositions($$) {
10575 0 0   0 0   goto &print_compositions_ch if @_ == 1;
10576 0           my ($self, $comp) = (shift, shift);
10577 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0            
10578 0           print "$c\n";
10579 0 0         for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0            
  0            
10580 0           my $arr = $comp->{$c}{$b};
10581 0 0         my @out = map +($_->[0] ? '?' : '=') . $_->[1], @$arr;
10582 0           print "\t$b\t->\t", join(",\t\t", @out), "\n";
10583             }
10584             }
10585             }
10586            
10587             sub print_compositions_ch($$) {
10588 0     0 0   my $self = shift;
10589 0 0         my $comp = @_ ? shift : do { my $f = $self->get_NamesList;
  0            
10590 0 0         $self->load_compositions($f) if defined $f;
10591 0           $self->{Compositions}} ;
10592 0           for my $c (sort keys %$comp) { # composing char
10593 0           print "$c\n";
10594 0           for my $b (sort keys %{$comp->{$c}}) { # base char
  0            
10595 0           my $arr = $comp->{$c}{$b};
10596 0 0         my @out = map +($_->[0] ? '? ' : '= ') . $_->[1], @$arr;
10597 0           print "\t$b\t->\t", join(",\t\t", @out), "\n";
10598             }
10599             }
10600             }
10601            
10602             sub load_compositions($$) {
10603 0     0 0   my ($self, $comp, @comb) = (shift, shift);
10604 0 0         return $self if $self->{Compositions};
10605 0 0         my %comp = %{ $self->{'[Substitutions]'} || {} };
  0            
10606 0 0         open my $f, '<', $comp or die "Can't open $comp for read";
10607 0           ($self->{Decompositions}, $comp, $self->{UNames}, $self->{UBlock}, $self->{exComb}, $self->{uniVersion}) = $self->parse_NameList($f);
10608 0 0         close $f or die "Can't close $comp for read";
10609             #warn "(De)Compositions and UNames loaded";
10610             # Having hex as index is tricky: is it 4-digits or more? Is it in uppercase?
10611 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0            
10612 0 0         for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0            
  0            
10613 0           my $arr = $comp->{$c}{$b};
10614 0           my @out = map [$self->charhex2key($_->[0]), $self->charhex2key($_->[1])], @$arr;
10615 0           $comp{$self->charhex2key($c)}{$self->charhex2key($b)} = \@out;
10616             }
10617             }
10618 0           $self->{Compositions} = \%comp;
10619 0           my $comb = join '', keys %{$self->{exComb}}; # should not have metachars here...
  0            
10620 0 0         $rxCombining = qr/\p{nonSpacingMark}|[$comb]/ if $comb;
10621 0           $self
10622             }
10623            
10624             sub load_uniage($$) {
10625 0     0 0   my ($self, $fn) = (shift, shift);
10626             # get_AgeList
10627 0 0         open my $f, '<', $fn or die "Can't open `$fn' for read: $!";
10628 0           local $/;
10629 0           my $s = <$f>;
10630 0 0         close $f or die "Can't close `$fn' for read: $!";
10631 0           $self->{Age} = $self->parse_derivedAge($s);
10632 0           $self
10633             }
10634            
10635             sub load_unidata($$) {
10636 0     0 0   my ($self, $comp) = (shift, shift);
10637 0           $self->load_compositions($comp);
10638 0 0         return $self unless @_;
10639 0           $self->load_uniage(shift);
10640             }
10641            
10642             my(%charinfo, %UName_v); # Unicode::UCD::charinfo extremely slow
10643             sub UName($$$;$) {
10644 0     0 0   my ($self, $c, $verbose, $vbell, $app, $n, $i, $A) = (shift, shift, shift, shift, '');
10645 0           $c = $self->charhex2key($c);
10646 0 0 0       return $UName_v{$c} if $verbose and exists $UName_v{$c} and ($vbell or 0x266a != ord $c);
      0        
      0        
10647 0 0 0       if (not exists $self->{UNames} or $verbose) {
10648 0           require Unicode::UCD;
10649 0   0       $i = ($charinfo{$c} ||= Unicode::UCD::charinfo(ord $c) || {});
      0        
10650 0           $A = $self->{Age}{$c};
10651 0   0       $n = $self->{UNames}{$c} || ($i->{name}) || "<$c>";
10652 0 0 0       if ($verbose and (%$i or $A)) {
      0        
10653 0           my $scr = $i->{script};
10654 0           my $bl = $i->{block};
10655 0           $scr = join '; ', grep defined, $scr, $bl, $A;
10656 0 0 0       $scr = "Com/MiscSym1.1" if $vbell and 0x266a == ord $c; # EIGHT NOTE: we use as "visual bell"
10657 0 0         $app = " [$scr]" if length $scr;
10658             }
10659 0 0 0       return($UName_v{$c} = "$n$app") if $verbose and ($vbell or 0x266a != ord $c);
      0        
10660 0           return "$n$app"
10661             }
10662 0 0         $self->{UNames}{$c} || ($c =~ /[\x{d800}-\x{dfff}\x00-\x1f\x7f-\xAF]/ ? '['.$self->key2hex($c).']' : "[$c]")
    0          
10663             }
10664            
10665             sub parse_derivedAge ($$) {
10666 0     0 0   my ($self, $s, %C) = (shift, shift);
10667 0           for my $l (split /\n/, $s) {
10668 0 0         next if $l =~ /^\s*(#|$)/;
10669 0 0         die "Unexpected line in DerivedAge: `$l'"
10670             unless $l =~ /^([0-9a-f]{4,})(?:\.\.([0-9a-f]{4,}))?\s*;\s*(\d\.\d)\b/i;
10671 0   0       $C{chr $_} = $3 for (hex $1) .. hex($2 || $1);
10672             }
10673 0           \%C;
10674             }
10675            
10676             # use Dumpvalue;
10677             # my $first_time_dump;
10678             sub get_compositions ($$$$;$) { # Now only the undo-brach is used...
10679 0     0 0   my ($self, $m, $C, $undo, $unAltGr, @out) = (shift, shift, shift, shift, shift);
10680             # return unless defined $C and defined (my $r = $self->{Compositions}{$m}{$C});
10681             # Dumpvalue->new()->dumpValue($self->{Compositions}) unless $first_time_dump++;
10682 0 0         return undef unless defined $C;
10683 0 0         $C = $C->[0] if 'ARRAY' eq ref $C; # Treat prefix keys as usual keys
10684 0           warn "doing <$C> <@$m>: undo=$undo C=", $self->key2hex($C), ", maps=", join ' ', map $self->key2hex($_), @$m if warnDO_COMPOSE; # if $m eq 'A';
10685 0 0         if ($undo) {
10686 0 0         return undef unless my $dec = $self->{Decompositions}{$C};
10687             # order in @$m matters; so does one in Decompositions - but less so
10688             # Hence the external loop should be in @$m
10689 0           for my $M (@$m) {
10690 0           push @out, $_ for grep $M eq $_->[2], @$dec;
10691 0 0         if (@out) { # We took the first guy from $m which allows such decomposition
10692 0 0         warn "Decomposing <$C> <$M>: multiple answers: <", (join '> <', map "@$_", @out), ">" unless @out == 1;
10693 0           warn "done undo <$C> <@$m>: -> ", $self->array2string(\@out) if warnDO_COMPOSE; # if $m eq 'A';
10694 0           return $out[0][1]
10695             }
10696             }
10697 0           return undef;
10698             }
10699 0 0         if ($unAltGr) {{
10700 0 0         last unless $unAltGr = $unAltGr->{$C};
  0            
10701 0           my(@seen, %seen);
10702 0           for my $comp ( @$m ) {
10703 0           my $a1 = $self->{Compositions}{$comp}{$unAltGr};;
10704 0 0 0       push @seen, $a1 if $a1 and not $seen{$a1->[0][1]}++;
10705             #warn "Second binding `$a1->[0][1]' for `$unAltGr' (on `$C') - after $seen[0][0][1]" if @seen == 2;
10706 0 0 0       next unless defined (my $a2 = $self->{Compositions}{$comp}{$C}) or @seen == 2;
10707             #warn " --> AltGr-binding `$a2->[0][1]' (on `$C')" if @seen == 2 and defined $a2;
10708 0 0 0       warn "Conflict between the second binding `$a1->[0][1]' for `$unAltGr' and AltGr-binding `$a2->[0][1]' (on `$C')"
      0        
      0        
10709             if $a2 and $a1 and @seen == 2 and $a1->[0][1] ne $a2->[0][1];
10710 0   0       return ((@seen == 2 and $a1) or $a2)->[0][1];
10711             }
10712             }}
10713 0 0         return undef unless my ($r) = grep defined, map $self->compound_composition($_,$C), @$m;
10714 0 0 0       warn "Composing <$C> <@$m>: multiple answers: <", (join '> <', map "@$_", @$r), ">" unless @$r == 1 or $C eq ' ';
10715             # warn("done <$C> <$m>: <$r->[0][1]>"); # if $m eq 'A';
10716 0           $r->[0][1]
10717             }
10718            
10719             sub compound_composition ($$$) {
10720 0     0 0   my ($self, $M, $C, $doc, $doc1, @res, %seen) = (shift, shift, shift, '', '');
10721 0 0         return undef unless defined $C;
10722 0 0 0       $doc1 = $C->[3] if 'ARRAY' eq ref $C and defined $C->[3]; # may be used via
10723 0 0         $doc = "$doc1 ⇒ " if length $doc1;
10724 0 0         $C = $C->[0] if 'ARRAY' eq ref $C;
10725 0           warn "composing `$M' with base <$C>" if warnDO_COMPOSE;
10726 0           $C = [[0, $C, $doc1]]; # Emulate element of return of Compositions ("one translation, explicit")
10727 0           for my $m (reverse split /\+|-(?=-)/, $M) {
10728 0           my @res;
10729 0 0         if ($m =~ /^(?:-|(?:[ul]c(?:first)?|dectrl)$)/) {
10730 0 0         if ($m =~ s/^-//) {
    0          
    0          
    0          
    0          
10731 0           @res = map $self->get_compositions([$m], $_->[1], 'undo'), @$C;
10732 0           @res = map [[0,$_]], grep defined, @res;
10733             } elsif ($m eq 'lc') {
10734 0 0 0       @res = map {($_->[1] eq lc($_->[1]) or 1 != length lc($_->[1])) ? () : [[0, lc $_->[1]]]} @$C
  0            
10735             } elsif ($m eq 'uc') {
10736 0 0 0       @res = map {($_->[1] eq uc($_->[1]) or 1 != length uc($_->[1])) ? () : [[0, uc $_->[1]]]} @$C
  0            
10737             } elsif ($m eq 'ucfirst') {
10738 0 0 0       @res = map {($_->[1] eq ucfirst($_->[1]) or 1 != length ucfirst($_->[1])) ? () : [[0, ucfirst $_->[1]]]} @$C
  0            
10739             } elsif ($m eq 'dectrl') {
10740 0 0         @res = map {(0x20 <= ord($_->[1])) ? () : [[0, chr(0x40 + ord $_->[1])]]} @$C
  0            
10741             } else {
10742 0           die "Panic"
10743             }
10744             } else {
10745             #warn "compose `$m' with bases <", join('> <', map $_->[1], @$C), '>';
10746 0           @res = map $self->{Compositions}{$m}{$_->[1]}, @$C;
10747             }
10748 0           @res = map @$_, grep defined, @res;
10749 0 0         return undef unless @res;
10750 0           $C = [map [$_->[0], $_->[1], "$doc$M"], @res];
10751             }
10752             $C
10753 0           }
10754            
10755             sub compound_composition_many ($$$$) { # As above, but takes an array of [char, docs]
10756 0     0 0   my ($self, $M, $CC, $ini, @res) = (shift, shift, shift, shift);
10757 0 0         return undef unless $CC;
10758 0 0 0       my $doc = (($ini and ref $ini and defined $ini->[3]) ? "$ini->[3] ⇒ Subst{" : '');
10759 0   0       my $doc1 = $doc && '}';
10760 0           for my $C (@$CC) {
10761             # $C = $C->[0] if 'ARRAY' eq ref $C;
10762 0 0         next unless defined $C;
10763 0           my $in = $self->compound_composition($M, [$C->[0], undef, undef, "$doc$C->[1]$doc1"]);
10764 0 0         push @res, @$in if defined $in;
10765             }
10766 0 0         return undef unless @res;
10767             \@res
10768 0           }
10769            
10770             # Design goals: we assign several diacritics to a prefix key (possibly with
10771             # AltGr on the "Base key" and/or other "multiplexers" in between). We want:
10772             # *) a lc/uc paired result to sit on Shift-paired keypresses;
10773             # *) avoid duplication among multiplexers (a secondary goal);
10774             # *) allow some diacritics in the list to be prefered ("groups" below);
10775             # *) when there is a choice, prefer non-bizzare (read: with smaller Unicode
10776             # "Age" version) binding to be non-multiplexed.
10777             # We allow something which was not on AltGr to acquire AltGr when it gets a
10778             # diacritic.
10779            
10780             # It MAY happen that an earlier binding has empty slots,
10781             # but a later binding exists (to preserve lc/uc pairing, and shift-state)
10782            
10783             ### XXXX Unclear: how to catenate something in front of such a map...
10784             # we do $composition->[0][1], which means we ignore additional compositions! And we ignore HOW, instead of putting it into penalty
10785            
10786             sub sort_compositions ($$$$$;$) {
10787 0     0 0   my ($self, $m, $C, $Sub, $dupsOK, $w32OK, @res, %seen, %Penalize, %penalize, %OK, %ok, @C) = (shift, shift, shift, shift, shift, shift);
10788 0           warn "compounding ", $self->array2string($C) if warnSORTCOMPOSE;
10789 0           for my $c (@$C) {
10790 0 0 0       push @C, [map {($_ and 'ARRAY' eq ref $_) ? $_->[0] : $_} @$c]
  0            
10791             }
10792 0           my $char = $C[0][0];
10793 0 0         $char = 'N/A' unless defined $char;
10794 0           for my $MM (@$m) { # |-groups
10795 0           my(%byPenalty, @byLayers);
10796 0           for my $M (@$MM) { # diacritic in a group; may flatten each layer, but do not flatten separately each shift state: need to pair uc/lc
10797 0 0         if ((my $P = $M) =~ s/^(!)?\\(\\)?//) {
10798 0           my($neg, $strong) = ($1, $2);
10799             # warn "Penalize: <$P>"; # Actually, it is not enough to penalize; one should better put it in a different group...
10800 0 0         if ($P =~ s/\[(.*)\]$//) {
10801             #$P = $self->stringHEX2string($P);
10802 0           my $match;
10803 0   0       $char eq $_ and $match++ for split //, $self->stringHEX2string("$1");
10804 0 0         next unless $match;
10805             }
10806             #$P = $self->stringHEX2string($P);
10807 0 0         if ($neg) {
10808 0 0         $strong ? $OK{$_}++ : $ok{$_}++ for split //, $P;
10809             } else {
10810 0 0         $strong ? $Penalize{$_}++ : $penalize{$_}++ for split //, $P;
10811             }
10812             next
10813 0           }
10814 0           for my $L (0..$#C) { # Layer number; indexes a shift-pair
10815             # my @res2 = map {defined($_) ? $self->{Compositions}{$M}{$_} : undef } @{ $C[$L] };
10816 0           my @Res2 = map $self->compound_composition($M, $_), @{ $C->[$L] }; # elt: [$synth, $char]
  0            
10817 0           my @working_with = grep defined, @{ $C[$L] }; # ., KP_Decimal gives [. undef]
  0            
10818 0           warn "compound `$M' of [@working_with] -> ", $self->array2string(\@Res2) if warnSORTCOMPOSE;
10819 0           (my $MMM = $M) =~ s/(^|\+)$//; # Hack: the rule always fails if present, empty always succeeds
10820             my @Res3 = map $self->compound_composition_many($MMM, (defined() ? $Sub->{($_ and ref) ? $_->[0] : $_} : $_), $_),
10821 0 0 0       @{ $C->[$L] };
  0 0          
10822 0           warn "compound+ `$M' of [@working_with] -> ", $self->array2string(\@Res3) if warnSORTCOMPOSE;
10823 0           for my $shift (0..$#Res3) {
10824 0 0         if (defined $Res2[$shift]) {
10825 0 0         push @{ $Res2[$shift]}, @{$Res3[$shift]} if $Res3[$shift]
  0            
  0            
10826             } else {
10827 0           $Res2[$shift] = $Res3[$shift]
10828             }
10829             }
10830             # defined $Res2[$_] ? ($Res3[$_] and push @{$Res2[$_]}, @{$Res2[$_]}) : ($Res2[$_] = $Res3[$_]) for 0..$#Res3;
10831 0           @Res2 = $self->DEEP_COPY(@Res2);
10832 0           my ($ok, @ini_compat);
10833 0           do {{ # Run over found translations
10834 0 0         my @res2 = map {defined() ? $_->[0] : undef} @Res2; # process next unprocessed translations
  0            
  0            
10835 0   0       defined and (shift(@$_), (@$_ or undef $_)) for @Res2; # remove what is being processed
      0        
10836 0           $ok = grep $_, @res2;
10837 0 0 0       @res2 = map {(not defined() or (!$dupsOK and $seen{$_->[1]}++)) ? undef : $_} @res2; # remove duplicates
  0            
10838 0 0         my @compat = map {defined() ? $_->[0] : undef} @res2;
  0            
10839 0 0         my @_from_ = map {defined() ? $_->[2] : undef} @res2;
  0            
10840 0   0       defined and s/((?
10841 0 0         @res2 = map {defined() ? $_->[1] : undef} @res2;
  0            
10842 0 0 0       @res2 = map {0x10000 > ord($_ || 0) ? $_ : undef} @res2 unless $w32OK; # remove those needing surrogates
  0 0          
10843 0   0       defined $ini_compat[$_] or $ini_compat[$_] = $compat[$_] for 0..$#compat;
10844 0 0         my @extra_penalty = map {!!$compat[$_] and $ini_compat[$_] < $compat[$_]} 0..$#compat;
  0            
10845 0 0         next unless my $cnt = grep defined, @res2;
10846 0           my($penalty, $p) = [('zzz') x @res2]; # above any "5.1", "undef" ("unassigned"???)
10847             # Take into account the "compatibility", but give it lower precedence than the layer:
10848             # for no-compatibility: do not store the level;
10849             defined $res2[$_] and $penalty->[$_] gt ( $p = ($OK{$res2[$_]} ? '+' : '-') . ($self->{Age}{$res2[$_]} || 'undef') .
10850             ($ok{$res2[$_]} ? '+' : '-') . "#$extra_penalty[$_]#" . ($self->{UBlock}{$res2[$_]} || '') )
10851 0 0 0       and $penalty->[$_] = $p for 0..$#res2;
    0 0        
      0        
      0        
10852 0   0       my $have1 = not (defined $res2[0] and defined $res2[1]); # Prefer those with both entries
10853             # Break a non-lc/uc paired translations into separate groups
10854 0   0       my $double_occupancy = ($cnt == 2 and $res2[0] ne $res2[1] and lc $res2[0] eq lc $res2[1]); # Case fold
10855 0           warn " seeing random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
10856 0 0 0       next if $double_occupancy and grep {defined and $Penalize{$_}} @res2;
  0 0          
10857 0 0 0       if ($double_occupancy and grep {defined and $penalize{$_}} @res2) {
  0 0          
10858 0   0       defined $res2[$_] and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
10859             } else {
10860 0   0       defined and $Penalize{$_} and $cnt--, $have1=1, undef $_ for @res2;
      0        
10861 0   0       defined $res2[$_] and $penalize{$res2[$_]} and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
      0        
10862             }
10863 0 0         next unless $cnt;
10864 0 0 0       if (not $double_occupancy and $cnt == 2 and (1 or $penalty->[0] ne $penalty->[1])) { # Break (penalty here is not a good idea???)
      0        
10865 0           warn " breaking random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
10866 0           push @{ $byPenalty{"$penalty->[0]1"}[0][$L] }, [ [$res2[0],undef,undef,$_from_[0]]];
  0            
10867 0           push @{ $byPenalty{"$penalty->[1]1"}[0][$L] }, [undef, [$res2[1],undef,undef,$_from_[1]]];
  0            
10868 0           next; # Now: $double_occupancy or $cnt == 1 or $penalty->[0] eq $penalty->[1]
10869             }
10870 0 0         $p = (defined $res2[0] ? $penalty->[0] : 'zzz'); # may have been undef()ed due to Penalty...
10871 0 0 0       $p = $penalty->[1] if @$penalty > 1 and defined $res2[1] and $p gt $penalty->[1];
      0        
10872 0           push @{ $byPenalty{"$p$have1"}[$double_occupancy][$L] },
10873             # [map {defined $res2[$_] ? $res2[$_] : undef} 0..$#res2];
10874 0 0         [map {defined $res2[$_] ? [$res2[$_],undef,undef,$_from_[$_]] : undef} 0..$#res2];
  0            
10875             }} while $ok;
10876 0           warn " --> combined of [@working_with] -> ", $self->array2string([\@res, %byPenalty]) if warnSORTCOMPOSE;
10877             }
10878             } # sorted bindings, per Layer
10879 0           push @res, [ @byPenalty{ sort keys %byPenalty } ]; # each elt is an array ref indexed by layer number; elt of this is [lc uc]
10880             }
10881             #warn 'Compositions: ', $self->array2string(\@res);
10882             \@res
10883 0           } # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
10884            
10885             sub equalize_lengths ($$@) {
10886 0   0 0 0   my ($self, $extra, $l) = (shift, shift || 0, 0);
10887 0   0       $l <= length and $l = length for @_;
10888 0           $l += $extra;
10889 0   0       $l > length and $_ .= ' ' x ($l - length) for @_;
10890             }
10891            
10892             sub report_sorted_l ($$$;$$) { # 6 levels: |-group, priority, double-occupancy, layer, count, shift
10893 0     0 0   my ($self, $k, $sorted, $bold, $bold1, $top2, %bold) = (shift, shift, shift, shift, shift);
10894 0 0 0       $k = $k->[0] if 'ARRAY' eq ref($k || 0);
10895 0 0         $k = '' unless defined $k;
10896 0 0 0       $k = "<$k>" if defined $k and $k !~ /[^┃┋║│┆\s]/;
10897 0           my @L = ($k, ''); # Up to 100 layers - an overkill, of course??? One extra level to store separators...
10898 0 0         $bold{$_} = '┋' for @{$bold1 || []};
  0            
10899 0 0         $bold{$_} = '┃' for @{$bold || []};
  0            
10900 0           for my $group (0..$#$sorted) { # Top level
10901 0           $self->equalize_lengths(0, @L);
10902 0   0       $_ .= ' ' . ($bold{$group} || '║') for @L;
10903 0           my $prio2;
10904 0           for my $prio (@{ $sorted->[$group] }) {
  0            
10905 0 0         if ($prio2++) {
10906 0           $self->equalize_lengths(0, @L);
10907 0           $_ .= ' │' for @L;
10908             }
10909 0           my $double2;
10910 0           for my $double (reverse @$prio) {
10911 0 0         if ($double2++) {
10912 0           $self->equalize_lengths(0, @L);
10913 0           $_ .= ' ┆' for @L;
10914             }
10915 0           for my $layer (0..$#$double) {
10916 0           for my $set (@{$double->[$layer]}) {
  0            
10917 0           for my $shift (0,1) {
10918 0 0         next unless defined (my $k = $set->[$shift]);
10919 0 0         $k = $k->[0] if ref $k;
10920 0 0         $k = " $k" if $k =~ /$rxCombining/;
10921 0 0         if (2*$layer + $shift >= $#L) { # Keep last layer pristine for correct separators...
10922 0           my $add = 2*$layer + $shift - $#L + 1;
10923 0           push @L, ($L[-1]) x $add;
10924             }
10925 0           $L[ 2*$layer + $shift ] .= " $k";
10926             }
10927             }
10928             }
10929             }
10930             }
10931             }
10932 0   0       pop @L while @L and $L[-1] !~ /[^┃┋║│┆\s]/;
10933 0           join "\n", @L, '';
10934             }
10935            
10936             sub append_keys ($$$$;$) { # $KK is [[lc,uc], ...]; modifies $C in place
10937 0     0 0   my ($self, $C, $KK, $LL, $prepend, @KKK, $cnt) = (shift, shift, shift, shift, shift);
10938 0           for my $L (0..$#$KK) { # $LL contains info about from which layer the given binding was stolen
10939 0           my $k = $KK->[$L];
10940 0 0 0       next unless defined $k and (defined $k->[0] or defined $k->[1]);
      0        
10941 0           $cnt++;
10942 0 0         my @kk = map {$_ and ref $_ ? $_->[0] : $_} @$k;
  0 0          
10943 0   0       my $paired = (@$k == 2 and defined $k->[0] and defined $k->[1] and $kk[0] ne $kk[1] and $kk[0] eq lc $kk[1]);
10944 0 0 0       my @need_special = map { $LL and $L and defined $k->[$_] and defined $LL->[$L][$_] and 0 == $LL->[$L][$_]} 0..$#$k;
  0   0        
      0        
10945 0 0         if (my $special = grep $_, @need_special) { # count
10946 0 0         ($prepend ? push(@{ $KKK[$paired][0] }, $k) : unshift(@{ $KKK[$paired][0] }, $k)),
  0 0          
  0            
10947             next if $special == grep defined, @$k;
10948 0           $paired = 0;
10949 0 0         my $to_level0 = [map { $need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0            
10950 0 0         $k = [map {!$need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0            
10951 0 0         $prepend ? push @{ $KKK[$paired][0] }, $to_level0 : unshift @{ $KKK[$paired][0] }, $to_level0;
  0            
  0            
10952             }
10953 0 0         $prepend ? push @{ $KKK[$paired][$L] }, $k : unshift @{ $KKK[$paired][$L] }, $k; # 0: layer has only one slot
  0            
  0            
10954             }
10955             #print "cnt=$cnt\n";
10956 0 0         return unless $cnt;
10957 0 0         push @$C, [[@KKK]] unless $prepend; # one group of one level of penalty
10958 0 0         unshift @$C, [[@KKK]] if $prepend; # one group of one level of penalty
10959 0           1
10960             }
10961            
10962             sub shift_pop_compositions ($$$;$$$$) { # Limit is how many groups to process
10963 0   0 0 0   my($self, $C, $L, $backwards, $omit, $limit, $ignore_groups, $store_level, $skip_lc, $skip_uc)
      0        
      0        
10964             = (shift, shift, shift, shift, shift || 0, shift || 1e100, shift || 0, shift, shift, shift);
10965 0           my($do_lc, $do_uc) = (!$skip_lc, !$skip_uc);
10966 0   0       my($both, $first, $out_lc, $out_uc, @out, @out_levels, $have_out, $groupN) = ($do_lc and $do_uc);
10967 0 0         my @G = $backwards ? reverse @$C : @$C;
10968 0           for my $group (@G[$omit..$#G]) {
10969 0 0         last if --$limit < 0;
10970 0           $groupN++;
10971 0           for my $penalty_group (@$group) { # each $penalty_group is indexed by double_occupancy and layer
10972             # each layer in sorted; if $both, we prefer to extract a paired translation; so it is enough to check the first elt on each layer
10973 0           my $group_both = $both;
10974 0 0         if ($both) {
10975 0 0 0       $group_both = 0 unless $penalty_group->[1] and @{ $penalty_group->[1][$L] || [] } or @{ $penalty_group->[1][0] || [] };
  0 0 0        
  0 0          
10976             } # if $group_both == 0, and $both: double-group is empty, so we can look only in single/unrelated one.
10977             # if $both = $group_both == 0: may not look in double group, so can look only in single/unrelated one
10978             # if $both = $group_both == 1: must look in double-group only.
10979 0 0         for my $Set (($L ? [0, $penalty_group->[$group_both][0]] : ()), [$L, $penalty_group->[$group_both][$L]]) {
10980 0           my $set = $Set->[1];
10981 0 0 0       next unless $set and @$set; # @$set consists of [unshifted, shifted] pairs
10982 0 0         if ($group_both) { # we know we meet a double element at start of the group
10983 0 0         my $OUT = $backwards ? pop @$set : shift @$set; # we know we meet a double element at start of the group
10984 0 0         return [] if $groupN <= $ignore_groups;
10985 0 0         @$store_level = ($Set->[0]) x 2 if $store_level;
10986 0           return $OUT;
10987             }
10988             ## or ($both and defined $elt->[0] and defined $elt->[1]);
10989 0           my $spliced = 0;
10990 0 0         for my $eltA ($backwards ? map($#$set - $_, 0..$#$set) : 0..$#$set) {
10991 0           my $elt = $eltA - $spliced;
10992 0   0       my $lc_ok = ($do_lc and defined $set->[$elt][0]);
10993 0   0       my $uc_ok = ($do_uc and defined $set->[$elt][1]);
10994 0 0 0       next if not ($lc_ok or $uc_ok);
10995 0   0       my $have_both = (defined $set->[$elt][0] and defined $set->[$elt][1]);
10996 0   0       my $found_both = ($lc_ok and $uc_ok); # If defined $have_out, cannot have $found_both; moreover $have_out ne $uc_ok
10997 0 0 0       die "Panic!" if defined $have_out and ($found_both or $have_out eq $uc_ok);
      0        
10998             # next if not $found_both and defined $have_out and $have_out eq $uc_ok;
10999 0 0         my $can_splice = $have_both ? $both : 1;
11000 0 0         my $can_return = $both ? $have_both : 1;
11001 0           my $OUT = my $out = $set->[$elt]; # Can't return yet: @out may contain a part of info...
11002 0 0 0       unless ($groupN <= $ignore_groups or defined $have_out and $have_out eq $uc_ok) { # In case !$do_return or $have_out
      0        
11003 0           $out[$uc_ok] = $out->[$uc_ok]; # In case !$do_return or $have_out
11004 0           $out_levels[$uc_ok] = $Set->[0];
11005             }
11006             #warn 'Doing <', join('> <', map {defined() ? $_ : 'undef'} @{ $set->[$elt] }), "> L=$L; splice=$can_splice; return=$can_return; lc=$lc_ok uc=$uc_ok";
11007 0 0         if ($can_splice) { # Now: $both and not $have_both; must edit in place
11008 0           splice @$set, $elt, 1;
11009 0 0         $spliced++ unless $backwards;
11010             } else { # Must edit in place
11011 0           $OUT = [@$out]; # Deep copy
11012 0           undef $out->[$uc_ok]; # only one matched...
11013             }
11014 0 0         $OUT = [] if $groupN <= $ignore_groups;
11015 0 0         if ($can_return) {
11016 0 0         if ($found_both) {
11017 0 0         @$store_level = map {$_ and $Set->[0]} @$OUT if $store_level;
  0 0          
11018 0           return $OUT;
11019             } else {
11020 0 0         @$store_level = @out_levels if $store_level;
11021 0           return \@out;
11022             }
11023             # return($found_both ? $OUT : \@out);
11024             } # Now: had $both and !$had_both; must condinue
11025 0           $have_out = $uc_ok;
11026 0           $both = 0; # $group_both is already FALSE
11027 0 0         ($lc_ok ? $do_lc : $do_uc) = 0;
11028             #warn "lc/uc: $do_lc/$do_uc";
11029             }
11030             }
11031             }
11032             }
11033 0 0         @$store_level = @out_levels if $store_level;
11034             return \@out
11035 0           }
11036            
11037             my ($rebuild_fake, $rebuild_style) = ("\n\t\t\t/* To be auto-generated */\n", <<'EOR');
11038            
11039             .klayout span, .klayout-wrapper .over-shift {
11040             font-size: 29pt ;
11041             font-weight: bolder;
11042             text-wrap: none;
11043             white-space: nowrap;
11044             }
11045             .klayout kbd, .asSpan { display: inline-block; }
11046             .asSpan2 { display: inline-table; }
11047            
11048             /* Not used; allows /-diagonals to be highlighted with nth-last-of-type() */
11049             .klayout kbd.hidden-align { display: none; }
11050            
11051             kbd span.lc, kbd span.uc { display: inline; }
11052            
11053             /* Hide lc only if in .uc or hovering over -uc and not inside; similarly for uc */
11054             /* States: .klayout-wrapper:not(:hover) | .klayout.uclc:hover NORMAL = UCLC
11055             .klayout-uc:hover .klayout:not(:hover) UC
11056             .klayout-wrapper:hover .klayout-uc:not(:hover) LC */
11057             .klayout.lc kbd span.uc, .klayout.uc kbd span.lc,
11058             .klayout-uc:hover:not(:active) .klayout:not(.lc):not(:hover) kbd span.lc,
11059             .klayout-uc:hover:active .klayout:not(.uc):not(:hover) kbd span.uc,
11060             .klayout-wrapper:hover:not(:active) .klayout-uc:not(:hover) .klayout:not(.uc) kbd span.uc,
11061             .klayout-wrapper:hover:active .klayout-uc:not(:hover) .klayout:not(.lc) kbd span.lc { display: none; }
11062            
11063             /* These should be active unless hovering over wrapper, and not internal .klayout */
11064             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11065             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11066             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11067             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11068             font-size: 70%;
11069             }
11070             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11071             .klayout.uclc:not(.in-wrapper) kbd span.uc, .klayout.uclc:not(.in-wrapper) kbd span.lc,
11072             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11073             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.uc,
11074             .klayout.uclc.do-alt:hover kbd span.uc,
11075             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.lc,
11076             .klayout.uclc.do-alt:hover kbd span.lc,
11077             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11078             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11079             position: absolute;
11080             z-index: 10;
11081             border: 1px dotted green;
11082             line-height: 0.8em; /* decreasing this moves up; should be changed with padding-bottom */
11083             }
11084             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc,
11085             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc,
11086             .klayout.uclc kbd span.uc {
11087             right: 0.2em;
11088             top: -0.05em;
11089             padding-bottom: 0.15em; /* Less makes _ not fit inside border... */
11090             }
11091             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc,
11092             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc,
11093             .klayout.uclc kbd span.lc {
11094             left: 0.2em;
11095             bottom: 0em;
11096             }
11097             /* Same for left/right placement */
11098             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-left,
11099             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-left,
11100             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-left { /* repeat is needed to protect against :not(.base) about 25lines below */
11101             left: 0.35em;
11102             right: auto;
11103             }
11104             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-left,
11105             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-left,
11106             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-left {
11107             left: 0.0em;
11108             }
11109             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-right,
11110             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-right,
11111             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-right {
11112             right: 0.0em;
11113             }
11114             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-right,
11115             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-right,
11116             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-right {
11117             left: auto;
11118             right: 0.35em;
11119             }
11120             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-right
11121             { left: auto; right: 0.0em; position: absolute; }
11122             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-left
11123             { left: 0.0em; right: auto; position: absolute; }
11124             .klayout kbd .on-right:not(.prefix), .on-right-ex { color: firebrick; }
11125             .klayout kbd .on-right:not(.prefix).vbell { color: Coral; }
11126             .klayout kbd .on-left { z-index: 10; }
11127             .klayout kbd .on-right { z-index: 9; }
11128            
11129             .klayout-wrapper:hover .klayout.uclc:not(:hover) kbd.shift {outline: 6px dotted green;}
11130            
11131             kbd span, kbd div { vertical-align: bottom; } /* no effect ???!!! */
11132            
11133             kbd {
11134             color: #444;
11135             /* line-height: 1.6em; */
11136             width: 1.4em; /* +0.24em border +0.08em margin; total 1.72em */
11137            
11138             /* +0.3em border; */
11139             min-height: 0.83em; /* These two should be changed together to get uc letters centered... */
11140             line-height: 0.75em; /* Increasing by the same amount works fine??? */
11141             /* One also needs to change the vertical offsets of arrows from_*, and System-key icon */
11142            
11143             text-align: center;
11144             cursor: pointer;
11145             padding: 0.0em 0.0em 0.0em 0.0em;
11146             margin: 0.04em;
11147             white-space: nowrap;
11148             vertical-align: top;
11149             position: relative;
11150            
11151             background-color: #FFFFFF;
11152            
11153             background-image: -moz-linear-gradient(left, rgba(0,0,0,0.2), rgba(64,64,64,0.2), rgba(64,64,64,0.2), rgba(128,128,128,0.2));
11154             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(0,0,0,0.2)), color-stop(33%,rgba(64,64,64,0.2)), color-stop(66%,rgba(64,64,64,0.2)), color-stop(100%,rgba(128,128,128,0.2)));
11155             background-image: -webkit-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11156             background-image: -o-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11157             background-image: -ms-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11158             background-image: linear-gradient(0deg, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11159             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#dddddd', endColorstr='#e5e5e5',GradientType=1 );
11160            
11161             border-top: solid 0.1em #CCC;
11162             border-right: solid 0.12em #AAA;
11163             border-bottom: solid 0.2em #999;
11164             border-left: solid 0.12em #BBB;
11165             -webkit-border-radius: 0.22em;
11166             -moz-border-radius: 0.22em;
11167             border-radius: 0.22em;
11168             z-index: 0;
11169            
11170             -webkit-box-shadow:
11171             0.03em 0.1em 0.1em 0.06em #888,
11172             0.05em 0.1em 0.06em 0.06em #aaa;
11173             -moz-box-shadow:
11174             0.03em 0.1em 0.1em 0.06em #888,
11175             0.05em 0.1em 0.06em 0.06em #aaa;
11176             box-shadow:
11177             0.03em 0.1em 0.1em 0.00em #888 ,
11178             0.05em 0.1em 0.06em 0.0em #aaa ;
11179             }
11180            
11181             kbd:hover, .klayout-wrapper:hover .klayout:not(:hover) kbd.shift {
11182             color: #222;
11183             background-image: -moz-linear-gradient(left, rgba(128,128,128,0.2), rgba(192,192,192,0.2), rgba(192,192,192,0.2), rgba(255,255,255,0.2));
11184             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(128,128,128,0.2)), color-stop(33%,rgba(192,192,192,0.2)), color-stop(66%,rgba(192,192,192,0.2)), color-stop(100%,rgba(255,255,255,0.2)));
11185             background-image: -webkit-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11186             background-image: -o-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11187             background-image: -ms-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11188             background-image: linear-gradient(0deg, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11189             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#e5e5e5', endColorstr='#ffffff',GradientType=1 );
11190             }
11191             kbd:active, kbd.selected, .klayout-uc:hover:not(:active) .klayout:not(:hover) kbd.shift, .klayout-wrapper:active .klayout-uc:not(:hover) kbd.shift {
11192             margin-top: 0.14em; /* This variant is with "solid" buttons, the commented one is with "rubber" ones */
11193             border-top: solid 0.10em #CCC;
11194             border-right: solid 0.12em #9a9a9a; /* Make right/bottom a tiny way darker */
11195             border-bottom: solid 0.1em #8a8a8a;
11196             border-left: solid 0.12em #BBB;
11197             /* margin-top: 0.11em;
11198             border-top: solid 0.13em #999;
11199             border-right: solid 0.12em #BBB;
11200             border-bottom: solid 0.1em #CCC;
11201             border-left: solid 0.12em #AAA; */
11202             padding: 0.0em 0.0em 0.0em 0.0em;
11203            
11204             -webkit-box-shadow:
11205             0.05em 0.03em 0.1em 0.1em #aaa;
11206             -moz-box-shadow:
11207             0.05em 0.03em 0.1em 0.1em #aaa;
11208             box-shadow:
11209             0.05em 0.03em 0.1em 0em #aaa;
11210            
11211             }
11212             kbd img {
11213             padding-left: 0.25em;
11214             vertical-align: middle;
11215             height: 22px; width: 22px;
11216             opacity: 0.8;
11217             }
11218             kbd:hover img {
11219             opacity: 1;
11220             }
11221             kbd span.shrink {
11222             font-size: 85%;
11223             }
11224             .klayout.do-altgr kbd span.shrink.altgr {
11225             font-size: 72%;
11226             }
11227             kbd .small {
11228             font-size: 62%;
11229             }
11230             kbd .vsmall {
11231             font-size: 39%;
11232             }
11233            
11234             kbd .base, kbd .base-lc, kbd .base-uc {
11235             -webkit-touch-callout: none;
11236             -webkit-user-select: none;
11237             -khtml-user-select: none;
11238             -moz-user-select: none;
11239             -ms-user-select: none;
11240             -o-user-select: none;
11241             user-select: none;
11242             }
11243            
11244             /* Special rules for do-alt-display. Without alt2, places the base on left and right;
11245             with alt2, places base on the left (unless base-right is present) */
11246            
11247             /* .klayout.do-alt.uclc kbd span.lc, .klayout.do-alt.uclc kbd span.uc { */
11248             .klayout.do-alt.uclc:not(.in-wrapper) kbd span.uc, .klayout.do-alt.uclc:not(.in-wrapper) kbd span.lc,
11249             .klayout.do-alt.uclc:hover kbd span.uc, .klayout.do-alt.uclc:hover kbd span.lc,
11250             .klayout.do-alt.uclc.force kbd span.uc, .klayout.do-alt.uclc.force kbd span.lc,
11251             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.uc,
11252             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.lc {
11253             font-size: 85%;
11254             }
11255            
11256             .klayout.do-alt.sz125 kbd span.uc, .klayout.do-alt.sz125 kbd span.lc, /* exclude below: too specific otherwise */
11257             .klayout.do-alt.sz125 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11258             font-size: 125%;
11259             line-height: 0.98em; /* decreasing this moves up; should be changed with padding-bottom */
11260             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11261             }
11262             .klayout.do-alt.sz120 kbd span.uc, .klayout.do-alt.sz120 kbd span.lc, /* exclude below: too specific otherwise */
11263             .klayout.do-alt.sz120 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11264             font-size: 120%;
11265             line-height: 1.02em; /* decreasing this moves up; should be changed with padding-bottom */
11266             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11267             }
11268             .klayout.do-alt kbd span.uc, .klayout.do-alt kbd span.lc, /* exclude below: too specific otherwise */
11269             .klayout.do-alt.sz115 kbd span.uc, .klayout.do-alt.sz115 kbd span.lc,
11270             .klayout.do-alt kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall),
11271             .klayout.do-alt.sz115 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11272             font-size: 115%;
11273             line-height: 1.05em; /* decreasing this moves up; should be changed with padding-bottom */
11274             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11275             }
11276             .klayout.do-alt.sz110 kbd span.uc, .klayout.do-alt.sz110 kbd span.lc, /* exclude below: too specific otherwise */
11277             .klayout.do-alt.sz110 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11278             font-size: 110%;
11279             line-height: 1.12em; /* decreasing this moves up; should be changed with padding-bottom */
11280             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11281             }
11282             .klayout.do-alt.sz100 kbd span.uc, .klayout.do-alt.sz100 kbd span.lc, /* exclude below: too specific otherwise */
11283             .klayout.do-alt.sz100 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11284             line-height: 1.2em; /* decreasing this moves up; should be changed with padding-bottom */
11285             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11286             }
11287            
11288             .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11289             font-size: 90%;
11290             }
11291             .klayout.do-alt.alt2 kbd span.base-lc, .klayout.do-alt.alt2 kbd span.base-uc {
11292             font-size: 80%;
11293             }
11294            
11295             .klayout.do-alt kbd span.base-uc {
11296             right: 15%;
11297             top: 35%; /* Combine rel-parent and rel-us offsets : */
11298             }
11299             .klayout.do-alt kbd span.base-lc {
11300             left: 15%;
11301             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11302             }
11303             .klayout.do-alt.alt2 kbd span.base-uc {
11304             left: 35%;
11305             top: 30%; /* Combine rel-parent and rel-us offsets : */
11306             }
11307             .klayout.do-alt.alt2 kbd span.base-lc {
11308             left: 15%;
11309             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11310             }
11311             .klayout.do-alt.alt2.base-right kbd span.base-uc {
11312             right: 15%;
11313             left: auto; /* Combine rel-parent and rel-us offsets : */
11314             }
11315             .klayout.do-alt.alt2.base-right kbd span.base-lc {
11316             right: 35%;
11317             left: auto; /* Combine rel-parent and rel-us offsets : */
11318             }
11319             .klayout.do-alt.alt2.base-center kbd span.base-uc {
11320             left: 60%; /* Combine rel-parent and rel-us offsets : */
11321             }
11322             .klayout.do-alt.alt2.base-center kbd span.base-lc {
11323             left: 40%; /* Combine rel-parent and rel-us offsets : */
11324             }
11325            
11326             .klayout.do-alt kbd span.base {
11327             font-size: 120%;
11328             left: 25%;
11329             top: 65%; /* Combine rel-parent and rel-us offsets : */
11330             }
11331             .klayout.do-alt.large-base.large-base kbd span.base { /* Make .large-base override .alt2 */
11332             font-size: 200%;
11333             left: 50%;
11334             top: 50%; /* Combine rel-parent and rel-us offsets : */
11335             }
11336             .klayout.do-alt.alt2 kbd span.base {
11337             font-size: 110%;
11338             left: 25%;
11339             top: 75%; /* Combine rel-parent and rel-us offsets : */
11340             }
11341             .klayout.do-alt.alt2.base-right kbd span.base {
11342             right: 25%;
11343             left: auto; /* Combine rel-parent and rel-us offsets : */
11344             }
11345             .klayout.do-alt.alt2.base-center kbd span.base {
11346             left: 50%; /* Combine rel-parent and rel-us offsets : */
11347             }
11348             .klayout.do-alt kbd span.base, .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11349             position: absolute;
11350             z-index: -1;
11351            
11352             opacity: 0.25;
11353             filter: alpha(opacity=25); /* IE6-IE8 */
11354            
11355             color: blue;
11356             line-height: 1em; /* Tight-fitting box */
11357             height: 1em;
11358             width: 1em;
11359             margin: -0.5em -0.5em -0.5em -0.5em; /* -0.5em is the geometric center */
11360             }
11361             .klayout.do-alt kbd {
11362             min-height: 1.2em; /* Should be changed together to get uc letters centered... */
11363             line-height: 1.2em; /* Increasing by the same amount works fine??? */
11364             }
11365             .klayout.do-altgr span.altgr {outline: 9px dotted green;}
11366            
11367             kbd.with_x-NONONO:before {
11368             position: absolute;
11369             z-index: -10;
11370            
11371             opacity: 0.25;
11372             filter: alpha(opacity=25); /* IE6-IE8 */
11373            
11374             content: "✖";
11375             color: red;
11376             font-size: 120%;
11377            
11378             line-height: 1em; /* Tight-fitting box */
11379             height: 1em;
11380             width: 1em;
11381            
11382             top: 50%; /* Combine rel-parent and rel-us offsets : */
11383             left: 50%;
11384             margin: -0.43em 0 0 -0.5em; /* -0.5em is the geometric center; but it is not in the center of ✖...*/
11385             }
11386             kbd.from_sw:after, kbd.from_ne:after, kbd.from_nw:after, kbd.to_ne:after, kbd.to_nw:before, kbd.to_w:after, kbd.from_w:after {
11387             position: absolute;
11388             z-index: 1;
11389             font-size: 80%;
11390             color: red;
11391             text-shadow: 1px 1px #ffff88, -1px -1px #ffff88, -1px 1px #ffff88, 1px -1px #ffff88;
11392             text-shadow: 1px 1px rgba(255,255,0,0.3), -1px -1px rgba(255,255,0,0.3), -1px 1px rgba(255,255,0,0.3), 1px -1px rgba(255,255,0,0.3);
11393             }
11394             kbd.from_sw.grn:after, kbd.from_ne.grn:after, kbd.from_nw.grn:after, kbd.to_ne.grn:after, kbd.to_nw.grn:before, kbd.to_w.grn:after, kbd.from_w.grn:after {
11395             color: green;
11396             }
11397             kbd.from_sw.blu:after, kbd.from_ne.blu:after, kbd.from_nw.blu:after, kbd.to_ne.blu:after, kbd.to_nw.blu:before, kbd.to_w.blu:after, kbd.from_w.blu:after {
11398             color: blue;
11399             }
11400             kbd.from_sw.ylw:after, kbd.from_ne.ylw:after, kbd.from_nw.ylw:after, kbd.to_ne.ylw:after, kbd.to_nw.ylw:before, kbd.to_w.ylw:after, kbd.from_w.ylw:after {
11401             color: #FFB400;
11402             }
11403             kbd.from_sw:not(.pure), kbd.xfrom_sw, kbd.from_ne:not(.pure), kbd.from_nw:not(.pure), kbd.to_ne:not(.pure), kbd.to_nw:not(.pure) {
11404             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11405             }
11406             kbd.from_sw:after {
11407             left: -0.0em;
11408             bottom: -0.65em;
11409             }
11410             kbd.from_sw:after, kbd.to_ne:after {
11411             content: "⇗";
11412             }
11413             kbd.from_se:after, kbd.to_nw:before {
11414             content: "⇖";
11415             }
11416             kbd.from_ne:after, kbd.from_nw:after {
11417             top: -0.55em;
11418             }
11419             kbd.to_ne:after, kbd.to_nw:before { top: -0.85em;}
11420             kbd.to_nw:before { left: 0.01em;}
11421             kbd.from_ne:after { content: "⇙"; }
11422             kbd.from_ne:after, kbd.to_ne:after { right: -0.0em; }
11423             kbd.from_nw:after { content: "⇘"; left: -0.0em; }
11424             kbd.to_w:after, kbd.from_w:after {
11425             top: 45%;
11426             left: -0.7em;
11427             }
11428             kbd.to_w.high:after, kbd.from_w.high:after {
11429             top: -15%;
11430             left: -0.5em;
11431             }
11432             kbd.to_w:after { content: "⇐"; }
11433             kbd.from_w:after { content: "⇒"; }
11434            
11435             /* Compensate for higher keys */
11436             .klayout.do-alt kbd.from_sw:after {
11437             bottom: -0.90em;
11438             }
11439             .klayout.do-alt kbd.from_ne:after, .klayout.do-alt kbd.from_nw:after {
11440             top: -0.85em;
11441             }
11442            
11443             span.prefix {
11444             color: yellow;
11445             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black;
11446             }
11447             span.prefix.prefix2 {
11448             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11449             3px 0px firebrick, -3px 0px firebrick, 0px 3px firebrick, 0px -3px firebrick;
11450             }
11451             span.very-special {
11452             text-shadow: 1px 1px lime, -1px -1px lime, -1px 1px lime, 1px -1px lime;
11453             }
11454             span.special {
11455             text-shadow: 2px 2px dodgerblue, -2px -2px dodgerblue, -2px 2px dodgerblue, 2px -2px dodgerblue;
11456             }
11457             .thinspecial span.special {
11458             text-shadow: 1px 1px dodgerblue, -1px -1px dodgerblue, -1px 1px dodgerblue, 1px -1px dodgerblue;
11459             }
11460             span.not-surr:not(.prefix) {
11461             text-shadow: 2px 2px white, -2px -2px white, -2px 2px white, 2px -2px white;
11462             }
11463             span.need-learn {
11464             text-shadow: 1px 1px coral, -1px -1px coral, -1px 1px coral, 1px -1px coral;
11465             }
11466             span.need-learn.on-right {
11467             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11468             2px 2px coral, -2px -2px coral, -2px 2px coral, 2px -2px coral;
11469             }
11470             span.may-guess {
11471             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11472             }
11473            
11474             kbd.win_logo.ubuntu:before {
11475             content: url(http://linux.bihlman.com/wp-content/plugins/wp-useragent/img/24/os/ubuntu-2.png);
11476             }
11477             kbd.win_logo:before {
11478             position: absolute;
11479             z-index: -10;
11480            
11481             content: url(40px-computer_glenn_rolla_01.svg.med.png);
11482             height: 100%;
11483             width: 100%;
11484            
11485             top: 0%; /* Combine rel-parent and rel-us offsets : */
11486             left: 0%;
11487             /* margin: -0.5em -0.5em -0.5em -0.5em; */ /* -0.5em is the geometric center */
11488             }
11489             .do-alt kbd.win_logo:before { /* How to vcenter automatically??? */
11490             top: 20%;
11491             }
11492            
11493             /* Mark vowel's diagonals (for layout of diacritics) */
11494             .ddiag .arow > kbd:nth-of-type(2), .ddiag .arow > kbd:nth-last-of-type(7),
11495             .diag .arow > kbd:nth-of-type(2), .diag .arow > kbd:nth-of-type(7),
11496             .diag .drow > kbd:nth-of-type(2), .diag .drow > kbd:nth-of-type(7),
11497             .diag .arow > kbd:nth-of-type(10), .diag .drow > kbd:nth-of-type(10), kbd.red-bg
11498             { background-color: #ffcccc; }
11499             .ddiag .arow > kbd:nth-last-of-type(6), .ddiag .arow > kbd:nth-of-type(4),
11500             .diag .arow > kbd:nth-of-type(8), .diag .arow > kbd:nth-of-type(3),
11501             .diag .drow > kbd:nth-of-type(8), .diag .drow > kbd:nth-of-type(3), kbd.green-bg
11502             { background-color: #ccffcc; }
11503             .ddiag .arow > kbd:nth-last-of-type(8), .ddiag .arow > kbd:nth-last-of-type(5),
11504             .diag .arow > kbd:nth-of-type(9), .diag .arow > kbd:nth-of-type(4),
11505             .diag .drow > kbd:nth-of-type(9), .diag .drow > kbd:nth-of-type(4), kbd.blue-bg
11506             { background-color: #ccccff; }
11507            
11508             /* Mark non-vowel's diagonals (for layout of diacritics) */
11509             .hide45end .arow > kbd:nth-of-type(5), .hide45end .arow > kbd:nth-of-type(6),
11510             .hide45end .arow > kbd:nth-of-type(11),
11511             .hide45end .drow > kbd:nth-of-type(5), .hide45end .drow > kbd:nth-of-type(6),
11512             .hide45end .drow > kbd:nth-of-type(11), kbd.semi-hidden
11513             { opacity: 0.45; }
11514            
11515             span.vbell { color: SandyBrown; }
11516             span.three-cases { outline: 3px dotted yellow; }
11517             span.three-cases-long { outline: 3px dotted MediumSpringGreen; }
11518            
11519             span.withSubst { outline: 1px dotted blue; outline-offset: -1px; }
11520             span.isSubst { outline: 1px solid blue; outline-offset: -1px; }
11521            
11522             .use-operator span.operator { background-color: rgb(255,192,203) /*pink*/; }
11523             span.relation { background-color: rgb(255,160,122) /*lightsalmon*/; }
11524             span.ipa { background-color: rgb(173,255,47) /*greenyellow*/; }
11525             span.nAry { background-color: rgb(144,238,144) /*lightgreen*/; }
11526             span.paleo { background-color: rgb(240,230,140) /*Khaki*/; }
11527             .use-viet span.viet { background-color: rgb(220,220,220) /*Gainsboro*/; }
11528             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgb(255,228,196) /*Bisque*/; }
11529             span.ZW { background-color: rgb(220,20,60) /*crimson*/; }
11530             span.WS { background-color: rgb(128,0,0) /*maroon*/; }
11531            
11532             .use-operator span.operator { background-color: rgba(255,192,203,0.5) /*pink*/; }
11533             span.relation { background-color: rgba(255,160,122,0.5) /*lightsalmon*/; }
11534             span.ipa { background-color: rgba(173,255,47,0.5) /*greenyellow*/; }
11535             span.nAry { background-color: rgba(144,238,144,0.5) /*lightgreen*/; }
11536             span.paleo { background-color: rgba(240,230,140,0.5) /*Khaki*/; }
11537             .use-viet span.viet { background-color: rgba(220,220,220,0.5) /*Gainsboro*/; }
11538             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgba(255,228,196,0.5) /*Bisque*/; }
11539             span.ZW { background-color: rgba(220,20,60,0.5) /*crimson*/; }
11540             span.WS { background-color: rgba(128,0,0,0.5) /*maroon*/; }
11541            
11542             span.lFILL[convention]:before { content: attr(convention);
11543             color: white;
11544             font-size: 50%; }
11545            
11546             span.lFILL:not([convention]) { margin: 0ex 0.35ex; }
11547             span.l-NONONO { margin: 0ex 0.06ex; }
11548             span.yyy { padding: 0px !important; }
11549            
11550             div.rtl-hover:hover div:not(:hover) kbd span:not(.no-mirror-rtl):not(.base):not(.base-uc):not(.base-lc) { direction: rtl; }
11551            
11552             div.zero { position: relative;}
11553             div.zero div.over-shift { position: absolute; height: 1.13em; z-order: 999;}
11554             /* div.zero div.over-shift { outline: 3px dotted yellow;} */
11555             .do-alt + div.zero div.over-shift { height: 1.5em; }
11556             div.zero.l div.over-shift { left: 0.04pt; width: 4.24em;}
11557             div.zero.r div.over-shift { left: 21.12em; width: 3.56em;} /* (1.72em - 0.04em) × 10 + 4.24em + 0.08 */
11558             div.zero.tp div.over-shift { top: 7.8em;}
11559             .over-shift-outline div.zero.btm div.over-shift { outline: 3px dotted blue;}
11560             div.zero.btm div.over-shift { bottom: 1.13em;}
11561             .do-alt + div.zero.btm div.over-shift { bottom: 1.5em;}
11562             /* div.zero:hover { outline: 6px dotted yellow;} */
11563            
11564             EOR
11565            
11566             sub apply_filter_div ($$;$) {
11567 0   0 0 0   my($self, $txt, $opt) = (shift, shift, shift || {});
11568 0           $txt =~ s(^(]*\skbd_rebuild="([^""]*?)"[^'">]*>).*?^(
11569 0 0         ( $1 . ($opt->{fake} ? $rebuild_fake : $self->html_keyboard_diagram("$2", $opt)) . $3 )msge;
11570 0           $txt;
11571             }
11572             sub apply_filter_style ($$;$) {
11573 0   0 0 0   my($self, $txt, $opt) = (shift, shift, shift || {});
11574 0           $txt =~ s(^(\s*/\*\s*START\s+auto-generated\s+style\s*\*/).*?(/\*\s*END\s+auto-generated\s+style\s*\*/))
11575 0 0         ( $1 . ($opt->{fake} ? $rebuild_fake : $rebuild_style) . $2 )msge;
11576 0           $txt;
11577             }
11578            
11579             my @HTML_KBD_FIXED = ('
11580            
11581             ',
11582             'Backspace
11583            
11584            
Tab',
11585             '
11586            
11587            
CapsLock',
11588             'Enter
11589            
11590            
Shift',
11591             'Shift
11592            
11593            
CtrlAlt',
11594             'AltGrMenuCtrl
11595            
11596             ');
11597            
11598             sub classes_by_chars ($$$$$$$$$$) {
11599 0     0 0   my ($self, $h_classes, $opt, $layer, $lc0, $uc0, $lc, $uc, $k_base, $k, %cl) =
11600             (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift);
11601 0           for my $L ('', $layer) {
11602 0           for my $c (grep defined, $lc0, $uc0) {
11603 0           $cl{$_}++ for @{ $h_classes->{"$k_base$L"}{$c} }; # k for key-based-on-background char
  0            
11604 0           for my $o (@$opt) {
11605 0           $cl{$_}++ for @{ $h_classes->{"$k_base$L=$o"}{$c} } # k=opt for key-based-on-background char
  0            
11606             }
11607             }
11608 0           for my $c (grep defined, $lc, $uc) {
11609 0           $cl{$_}++ for @{ $h_classes->{"$k$L"}{$c} }; # K for key-based-on-foreground char
  0            
11610 0           for my $o (@$opt) {
11611 0           $cl{$_}++ for @{ $h_classes->{"$k$L=$o"}{$c} } # K=opt for key-based-on-background char
  0            
11612             }
11613             }
11614             }
11615 0           keys %cl;
11616             }
11617            
11618             sub apply_kmap($$$) {
11619 0     0 0   my ($self, $kmap, $c) = (shift, shift, shift);
11620 0 0         return $c unless $kmap;
11621 0 0         $c = $c->[0] if ref $c;
11622 0 0         return $c unless defined ($c = $kmap->{$self->key2hex($c)});
11623 0 0         return chr hex $c unless ref $c;
11624 0           $c = [@$c]; # deep copy
11625 0           $c->[0] = chr hex $c->[0];
11626 0           $c;
11627             }
11628            
11629             sub do_keys ($$$@) { # calculate classes related to the “whole key”, and emit the “content” of the key
11630 0     0 0   my ($self, $opt, $base, $out, $lc0, $uc0, %c_classes) = (shift, shift, 1, '');
11631 0           for my $in (@_) {
11632 0           my ($lc, $uc, $f, $kmap, $layerN, $h_classes, $name, @classes) = @$in;
11633 0   0       $kmap and $_ = $self->apply_kmap($kmap, $_) for ($lc, $uc);
11634 0   0       ref and $_ = $_->[0] for $lc, $uc;
11635 0 0         ($lc0, $uc0) = ($lc, $uc), $base = 0 if $base;
11636             # k/K for key-based-on-(background/foreground) char; k=opt/K=opt likewise
11637 0           $c_classes{$_}++ for $self->classes_by_chars($h_classes, $opt, $layerN, $lc0, $uc0, $lc, $uc, 'k', 'K');
11638             }
11639 0           my @extra = sort keys %c_classes;
11640 0 0         my $q = ("@extra" =~ /\s/ ? '"' : '');
11641 0 0         my $cl = @extra ? " class=$q@extra$q" : '';
11642             # push @extra, 'from_se' if $k[0][0] =~ /---/i; # lc, uc, $h_classes, name, classes:
11643 0           join '', $out, "", (map $self->a_pair($opt, $lc0, $uc0, $self->apply_kmap($_->[3], $_->[0]),
11644             $self->apply_kmap($_->[3], $_->[1]),
11645             $_->[2], $_->[4], $_->[5], $_->[6], [@$_[7..$#$_]]), @_), ''
11646             }
11647            
11648 0     0 0   sub h($) { (my $c = shift) =~ s/([&<>])/$html_esc{$1}/g; $c }
  0            
11649             sub tags_by_rx {
11650 0     0 0   my ($c, @o) = shift;
11651 0 0         die "Need odd number of arguments" if @_ & 1;
11652 0           while (@_) {
11653 0           my $tag = shift;
11654 0 0         push @o, $tag if $c =~ shift;
11655             }
11656 0           return @o;
11657             }
11658            
11659             sub a_pair ($$$$$$$$$$;@) {
11660 0   0 0 0   my($self, $opts, $lc0, $uc0, $LC, $UC, $F, $layerN, $h_classes, $name, $extra) =
11661             (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift, shift || []);
11662             # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
11663 0 0 0       my ($lc1, $uc1) = map {(defined and ref()) ? $_->[0] : $_} $LC, $UC;
  0            
11664            
11665 0           $extra = [@$extra];
11666 0           my $e = @$extra;
11667            
11668 0 0         my ($lc, $uc) = map {defined() ? $_ : '♪'} $lc1, $uc1;
  0            
11669             # return join '', map {defined() ? $_ : ''} $lc, $uc;
11670            
11671 0           my $opt = { map {($_, 1)} @$opts };
  0            
11672 0   0       my $base = (($name || '') eq 'base');
11673 0   0       my $prefix2 = (ref($LC) and ref($UC) and $LC->[2] and $UC->[2] && $uc eq $lc);
11674 0 0 0       if ($prefix2 or ($uc eq ucfirst $lc and $lc eq lc $uc and $lc ne 'ß' and defined($lc1) == defined($uc1))) {
      0        
      0        
      0        
11675 0 0         if ($uc ne $lc) {
11676 0   0       ref and $_->[2] and die "Do not expect a character `$_->[0]' to be a deadkey..." for $LC, $UC;
      0        
11677             }
11678 0 0 0       my @pref_i = map { ref $_ and (3 == ($_->[2] || 0) or (3 << 3) == ($_->[2] || 0)) } $LC, $UC;
  0   0        
      0        
11679 0 0 0       $prefix2 and $pref_i[1] and not $pref_i[0] and unshift @$extra, 'prefix2';
      0        
11680 0 0 0       $LC and ref $LC and $LC->[2] and unshift @$extra, 'prefix';
      0        
11681 0           push @$extra, $self->classes_by_chars($h_classes, $opts, $layerN, $lc0, undef, $lc1, undef, 'c', 'C');
11682             # unshift @$extra, tags_by_rx $lc, 'need-learn' => ($opt->{cyr} ? qr/N-A/i : qr/[ϝϙϲͻϿϾͲ℧ϗ]N-A/i);
11683             # push @$extra, 'vbell' unless defined $lc1;
11684 0 0 0       push @$extra, (1 < length uc $lc1 ? 'three-cases-long' : 'three-cases')
    0          
11685             if defined $lc1 and uc $lc1 ne ucfirst $lc1;
11686 0 0         push @$extra, $name if $name;
11687 0 0         my $q = ("@$extra" =~ /\s/ ? '"' : '');
11688 0           @$extra = sort @$extra;
11689 0 0         my $cl = @$extra ? " class=$q@$extra$q" : '';
11690 0 0         $base ? "" . h($uc) . "" : $self->char_2_html_span(undef, $UC, $uc, $F, {}, @$extra)
11691             # "" . $out . "";
11692             } else {
11693 0           my (@e_lc, @e_uc);
11694 0           my @do = ([$lc, [], 'lc', $LC, $lc0, $lc1], [$uc, [], 'uc', $UC, $uc0, $uc1]);
11695             # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
11696 0   0       $_->[3] and ref $_->[3] and $_->[3][2] and push @{$_->[1]}, 'prefix' for @do;
  0   0        
      0        
11697 0   0       $_->[3] and ref $_->[3] and (3 == ($_->[3][2] || 0) or (3 << 3) == ($_->[3][2] || 0)) and push @{$_->[1]}, 'prefix2' for @do;
  0   0        
      0        
      0        
11698 0           push @{$_->[1]}, $self->classes_by_chars($h_classes, $opts, $layerN, $_->[4], undef, $_->[5], undef, 'c', 'C'),
11699             tags_by_rx $_->[0], 'not-surr' => qr/[„‚“‘”’«‹»›‐–—―‒‑‵‶‷′″‴⁗〃´]/i # white
11700 0           for @do;
11701 0           push @{$_->[1]}, 'vbell' for grep !defined $_->[5], @do;
  0            
11702             join '', map {
11703 0 0         push @{$_->[1]}, ($name ? "$name-$_->[2]" : $_->[2]);
  0            
  0            
11704 0           my $ee = [sort @$extra, @{$_->[1]}];
  0            
11705 0 0         my $q = ("@$ee" =~ /\s/) ? '"' : '';
11706 0 0         my $o = ($base ? "" . h($_->[0]) . ""
11707             : $self->char_2_html_span(undef, $_->[3], $_->[0], $F, {}, @$ee));
11708             # "[2]$q>$o";
11709             } @do;
11710             }
11711             }
11712            
11713             my $kbdrow = 0;
11714             sub keys2html_diagram ($$$$@) {
11715 0     0 0   my ($self, $opts, $cnt, $new_row) = (shift, shift, shift, shift);
11716 0 0         my %opts = map { /^\w+=/ ? split /=/, $_, 2 : ($_, 1)} @$opts;
  0            
11717 0   0       my $off = (($opts{oneRow} && $kbdrow++) || 0) % 3;
11718 0           $off = "\xA0" x (2*$off);
11719 0 0         my @fixed = ($opts{oneRow} ? ("$off") : @HTML_KBD_FIXED);
11720 0           my $out = shift @fixed;
11721             # $cnt = $#{$layers_info->[0]} if $cnt > $#{$layers_info->[0]};
11722 0           my @keys = (0..($cnt-1));
11723 0   0       my $start = ($opts{startKey} || 0) % $cnt;
11724 0   0       my $CNT = $opts{cntKeys} || $cnt;
11725 0           @keys = (@keys) x ( 1 + int( ($start+$CNT-1)/$cnt ) );
11726 0           @keys = @keys[$start .. ($start + $CNT - 1)];
11727             KEY:
11728 0           for my $kn (@keys) { # Ordinal of keyboard's key
11729 0 0 0       $out .= (shift(@fixed) || '') if $new_row->{$kn};
11730 0           my ($symb, @keys, $last) = 0;
11731 0           for my $KK (@_) { # Layers
11732 0           my($layer, @rest) = @$KK; # rest = face, kmap, layerN, class_hash, name, classes
11733 0           push @keys, [@{$layer->[$kn]}[0,1], @rest];
  0            
11734             }
11735 0           $out .= $self->do_keys($opts, @keys);
11736             }
11737 0           $out .= join '', @fixed;
11738 0 0         $out .= "" if $opts{oneRow};
11739 0           $out
11740             }
11741            
11742             sub html_keyboard_diagram ($$$) {
11743 0     0 0   my($self, $OPT, $global_opt, @opt, @layers, $face0, $is_layer) = (shift, shift, shift);
11744 0           my %tr = qw(l 0 c 1 h 2);
11745 0           for my $arg (split /\s+/, $OPT) {
11746 0 0         push(@opt, $arg), next if $arg =~ s(^/opt=)(); # BELOW: `base' becomes NAME, `on-right' becomes CLASSES
11747 0 0         die "unrecognized `rebuild' option: `$arg'" # +=l,0,0 +base=l,0,0 +=l,0,1 +=l,ƒ,0 on-right+=c,0,1
11748             unless my($classes, $name, $f, $prefix, $which) = ( $arg =~ m{^((?:[-\w]+(?:,[-\w]+)*)?)\+([-\w]*)=(\w+),([\da-f]{4}|[^\x20-\x7e][^,]*|[02]?),(\d+|-)$}i );
11749 0 0         $f = $self->{face_shortcuts}{$f} if exists $self->{face_shortcuts}{$f};
11750 0 0 0       $face0 ||= $f unless $which eq '-';
11751 0           $prefix =~ s/◌(?=\p{NonspacingMark})//g;
11752 0           $prefix = $self->charhex2key($prefix);
11753 0   0       my $L = ($which eq '-' and $which = 0, [$f]);
11754 0 0 0       warn "unknown layer $L->[0]" if $L and not $self->{layers}{$L->[$which]};
11755             die "html_keyboard_diagram(): unknown face `$f'"
11756 0 0 0       unless $L ||= ($self->{faces}{$f}{layers} or $self->export_layers($f, $f));
      0        
11757 0 0 0       my $kmap = $self->{faces}{$f}{'[deadkeyFaceHexMap]'}{$self->key2hex($prefix)}
11758             or not length $prefix or die "output_html_keyboard_diagram(): Unknown prefix key `$prefix' for face $f";
11759             # create_composite_layers() translates 0000 key to ''
11760             # warn "I see HTML_classes for face=$f, prefix=`$prefix'" if $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''};
11761 0   0       my $h_classes = $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''} || {};
11762 0           push(@layers, [$self->{layers}{$L->[$which]}, $f, $kmap, $which, $h_classes, $name, split /,/, $classes]);
11763             }
11764 0 0         die "there must be exactly one /opt= argument in <<$OPT>>" unless @opt == 1;
11765 0           my $opt = [split /,/, $opt[0], -1];
11766 0 0         my ($cnt, @g, %new_row) = (0, @{ $self->{faces}{$face0}{'[geometry]'} || [] }); # keep only 1 from the last row
  0            
11767 0 0         @g or die "Face `$face0' has no associated layer with geometry info; did you set geometry_via_layer?";
11768 0           pop @g;
11769 0           $new_row{ $cnt += $_ }++ for @g;
11770 0           my ($pre, $post) = ('', '');
11771 0 0         ($pre, $post) = ("\n
", "
\nHover mouse here to see how characters look in RTL context.\n")
11772             if grep /^rtl-hover(-Trivia)?$/, @$opt;
11773 0 0         $post .= " Trivia: note mirroring of <{[()]}>." if grep /^rtl-hover-Trivia$/, @$opt;
11774 0           $pre . $self->keys2html_diagram($opt, $cnt+1, \%new_row, @layers) . $post;
11775             }
11776            
11777            
11778             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne 'next unless /\bXK_(\w+)\s+0x00([a-fA-F\d]+)/; print chr hex $2, qq(\t$1)' > ! oooo1
11779             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne "next unless /\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+\/\*(?:\(?|\s+)U\+([a-fA-F\d]+)/; print chr hex $3, qq(\t$1)" > oooo3
11780            
11781             # See XK_ARMENIAN for an alternative way to encode Unicode to XK_: 0x1000587 /* U+0587
11782             my(%KeySyms,%deadSyms,%invKeySyms);
11783             sub load_KeySyms($) {
11784 0 0   0 0   return if %KeySyms;
11785 0           my$self = shift;
11786 0 0         my $names = $self->get__value('KeySyms') or return;
11787 0           my(%macro);
11788 0           for my $fn (@$names) {
11789 0 0         open my $fh, '<', $fn or warn("Cannot open $fn: $!"), next;
11790 0           while (defined(my $l = <$fh>)) {
11791 0           chomp $l;
11792 0 0         $deadSyms{$1}++ if $l =~ /\bXK_dead_(\w+)\s+0x([a-fA-F\d]+)\b/;
11793 0           my $dup = ( $l =~ m[\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+/\*.*\b(obsolete|alias)\b] );
11794 0 0         next unless $l =~ m[\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+/\*\s*(\()?U\+([a-fA-F\d]+)];
11795 0 0 0       warn "not yet defined: <$l>" if $dup and not $macro{$2};
11796 0 0         warn "sym re-defined: <$l>" if $KeySyms{$1};
11797             # warn "macro re-defined: <$l>\n" if $macro{$2} and not $dup; # several offenders
11798 0           $KeySyms{$1} = my $c = chr hex $4;
11799 0 0         $invKeySyms{$c} = $1 unless $3;
11800 0 0         $macro{$2} = $1 unless $dup;
11801             }
11802             }
11803             }
11804            
11805             # These preloaded symbols are enough to cover single-UTF-16 bindings in .Compose (except circled katakana/hangul)
11806             my @enc_dotcompose; # Have many-to-1, inverting hash would lose info; Do not distinguish Left/leftarrow etc.
11807 1     1   34896 { no warnings 'qw';
  1         2  
  1         2306  
11808             @enc_dotcompose = (qw#
11809             ` grave
11810             ' apostrophe
11811             " quotedbl
11812             ~ asciitilde
11813             ! exclam
11814             ? question
11815             @ at
11816             #, # `
11817             qw!
11818             # numbersign
11819             $ dollar
11820             % percent
11821             ^ asciicircum
11822             & ampersand
11823             * asterisk
11824             ( parenleft
11825             ) parenright
11826             [ bracketleft
11827             ] bracketright
11828             { braceleft
11829             } braceright
11830             - minus
11831             + plus
11832             = equal
11833             _ underscore
11834             < less
11835             > greater
11836             \ backslash
11837             / slash
11838             | bar
11839             , comma
11840             . period
11841             : colon
11842             ; semicolon
11843             _bar underbar
11844            
11845            
11846             ¡ exclamdown
11847             ¢ cent
11848             £ sterling
11849             ¤ currency
11850             ¥ yen
11851             ¦ brokenbar
11852             § section
11853             ¨ diaeresis
11854             © copyright
11855             ª ordfeminine
11856             « guillemotleft
11857             ¬ notsign
11858             ­ hyphen
11859             ® registered
11860             ¯ macron
11861             ° degree
11862             ± plusminus
11863             ² twosuperior
11864             ³ threesuperior
11865             ´ acute
11866             µ mu
11867             ¶ paragraph
11868             · periodcentered
11869             ¸ cedilla
11870             ¹ onesuperior
11871             º masculine
11872             » guillemotright
11873             ¼ onequarter
11874             ½ onehalf
11875             ¾ threequarters
11876             ¿ questiondown
11877             À Agrave
11878             Á Aacute
11879             Â Acircumflex
11880             Ã Atilde
11881             Ä Adiaeresis
11882             Å Aring
11883             Æ AE
11884             Ç Ccedilla
11885             È Egrave
11886             É Eacute
11887             Ê Ecircumflex
11888             Ë Ediaeresis
11889             Ì Igrave
11890             Í Iacute
11891             Î Icircumflex
11892             Ï Idiaeresis
11893             Ð ETH
11894             Ð Eth
11895             Ñ Ntilde
11896             Ò Ograve
11897             Ó Oacute
11898             Ô Ocircumflex
11899             Õ Otilde
11900             Ö Odiaeresis
11901             × multiply
11902             Ø Oslash
11903             Ø Ooblique
11904             Ù Ugrave
11905             Ú Uacute
11906             Û Ucircumflex
11907             Ü Udiaeresis
11908             Ý Yacute
11909             Þ THORN
11910             Þ Thorn
11911             ß ssharp
11912             à agrave
11913             á aacute
11914             â acircumflex
11915             ã atilde
11916             ä adiaeresis
11917             å aring
11918             æ ae
11919             ç ccedilla
11920             è egrave
11921             é eacute
11922             ê ecircumflex
11923             ë ediaeresis
11924             ì igrave
11925             í iacute
11926             î icircumflex
11927             ï idiaeresis
11928             ð eth
11929             ñ ntilde
11930             ò ograve
11931             ó oacute
11932             ô ocircumflex
11933             õ otilde
11934             ö odiaeresis
11935             ÷ division
11936             ø oslash
11937             ø ooblique
11938             ù ugrave
11939             ú uacute
11940             û ucircumflex
11941             ü udiaeresis
11942             ý yacute
11943             þ thorn
11944             ÿ ydiaeresis
11945            
11946             Cyr_ђ Serbian_dje
11947             ѓ Macedonia_gje
11948             є Ukrainian_ie
11949             Cyr_ѕ Macedonia_dse
11950             Cyr_і Ukrainian_i
11951             Cyr_ї Ukrainian_yi
11952             Cyr_ћ Serbian_tshe
11953             Cyr_ќ Macedonia_kje
11954             ґ Ukrainian_ghe_with_upturn
11955             Cyr_ў Byelorussian_shortu
11956             № numerosign
11957             Cyr_Ђ Serbian_DJE
11958             Ѓ Macedonia_GJE
11959             Є Ukrainian_IE
11960             Cyr_Ѕ Macedonia_DSE
11961             Cyr_І Ukrainian_I
11962             Cyr_Ї Ukrainian_YI
11963             Cyr_Ћ Serbian_TSHE
11964             Cyr_Ќ Macedonia_KJE
11965             Ґ Ukrainian_GHE_WITH_UPTURN
11966             Cyr_Ў Byelorussian_SHORTU
11967            
11968             ’sq rightsinglequotemark
11969             ‘sq leftsinglequotemark
11970             • enfilledcircbullet
11971             ♀ femalesymbol
11972             ♂ malesymbol
11973             NBSP nobreakspace
11974             … ellipsis
11975             ∩# intersection
11976             ∫ integral
11977             ≤ lessthanequal
11978             ≥ greaterthanequal
11979            
11980             d` dead_grave
11981             d' dead_acute
11982             d^ dead_circumflex
11983             d~ dead_tilde
11984             d¯ dead_macron
11985             dd# dead_breve----
11986             d^. dead_abovedot
11987             d" dead_diaeresis
11988             d^° dead_abovering
11989             d'' dead_doubleacute
11990             d^v dead_caron
11991             d, dead_cedilla
11992             dd# dead_ogonek---
11993             d_ι dead_iota
11994             d_voiced dead_voiced_sound
11995             d_½voiced dead_semivoiced_sound
11996             d. dead_belowdot
11997             dd# dead_hook---
11998             dd# dead_horn---
11999             d/ dead_stroke
12000             d^, dead_abovecomma
12001             dd# dead_abovereversedcomma---
12002             d`` dead_doublegrave
12003             d``# dead_double_grave
12004             d_° dead_belowring
12005             d__ dead_belowmacron
12006             dd# dead_belowcircumflex---
12007             d_~ dead_belowtilde
12008             dd# dead_belowbreve---
12009             d_" dead_belowdiaeresis
12010             d_invbrev dead_invertedbreve
12011             d_inv_brev dead_inverted_breve
12012             d_, dead_belowcomma
12013             dd# dead_currency
12014            
12015             d^( dead_dasia
12016             d^) dead_psili
12017            
12018             Ś Sacute
12019             Š Scaron
12020             Ş Scedilla
12021             Ť Tcaron
12022             Ź Zacute
12023             Ž Zcaron
12024             Ż Zabovedot
12025             ą aogonek
12026             ˛ ogonek
12027             ł lstroke
12028             ľ lcaron
12029             ś sacute
12030             ˇ caron
12031             š scaron
12032             ş scedilla
12033             ť tcaron
12034             ź zacute
12035             ˝ doubleacute
12036             ž zcaron
12037             ż zabovedot
12038             Ŕ Racute
12039             Ă Abreve
12040             Ĺ Lacute
12041             Ć Cacute
12042             Č Ccaron
12043             Ę Eogonek
12044             Ě Ecaron
12045             Ď Dcaron
12046             Đ Dstroke
12047             Ń Nacute
12048             Ň Ncaron
12049             Ő Odoubleacute
12050             Ř Rcaron
12051             Ů Uring
12052             Ű Udoubleacute
12053             Ţ Tcedilla
12054             ŕ racute
12055             ă abreve
12056             ĺ lacute
12057             ć cacute
12058             č ccaron
12059             ę eogonek
12060             ě ecaron
12061             ď dcaron
12062             đ dstroke
12063             ń nacute
12064             ň ncaron
12065             ő odoubleacute
12066             ř rcaron
12067             ů uring
12068             ű udoubleacute
12069             ţ tcedilla
12070             ˙ abovedot
12071            
12072             Ŗ Rcedilla
12073             Ĩ Itilde
12074             Ļ Lcedilla
12075             Ē Emacron
12076             Ģ Gcedilla
12077             Ŧ Tslash
12078             ŗ rcedilla
12079             ĩ itilde
12080             ļ lcedilla
12081             ē emacron
12082             ģ gcedilla
12083             ŧ tslash
12084             Ŋ ENG
12085             ŋ eng
12086             Ā Amacron
12087             Į Iogonek
12088             Ė Eabovedot
12089             Ī Imacron
12090             Ņ Ncedilla
12091             Ō Omacron
12092             Ķ Kcedilla
12093             Ų Uogonek
12094             Ũ Utilde
12095             Ū Umacron
12096             ā amacron
12097             į iogonek
12098             ė eabovedot
12099             ī imacron
12100             ņ ncedilla
12101             ō omacron
12102             ķ kcedilla
12103             ų uogonek
12104             ũ utilde
12105             ū umacron
12106            
12107             Ơ Ohorn
12108             ơ ohorn
12109             Ư Uhorn
12110             ư uhorn
12111            
12112             < leftcaret
12113             > rightcaret
12114             ∨ downcaret
12115             ∧ upcaret
12116             ¯ overbar
12117             ⊤ downtack
12118             ∩ upshoe
12119             ⌊ downstile
12120             _ underbar
12121             ∘ jot
12122             ⎕ quad
12123             ⊥ uptack
12124             ○ circle
12125             ⌈ upstile
12126             ∪ downshoe
12127             ⊃ rightshoe
12128             ⊂ leftshoe
12129             ⊣ lefttack
12130             ⊢ righttack
12131            
12132             ≤ lessthanequal
12133             ≠ notequal
12134             ≥ greaterthanequal
12135             ∫ integral
12136             ∴ therefore
12137             ∝ variation
12138             ∞ infinity
12139             ∇ nabla
12140             ∼ approximate
12141             ≃ similarequal
12142             ⇔ ifonlyif
12143             ⇒ implies
12144             ≡ identical
12145             √ radical
12146             ⊂ includedin
12147             ⊃ includes
12148             ∩ intersection
12149             ∪ union
12150             ∧ logicaland
12151             ∨ logicalor
12152             ∂ partialderivative
12153             ƒ function
12154             ← leftarrow
12155             ↑ uparrow
12156             → rightarrow
12157             ↓ downarrow
12158             ◆ soliddiamond
12159             ▒ checkerboard
12160            
12161             CP Multi_key
12162            
12163             +# KP_Add
12164             -# KP_Subtract
12165             *# KP_Multiply
12166             /# KP_Divide
12167             .# KP_Decimal
12168             =# KP_Equal
12169             SPC# KP_Space
12170            
12171             ← Left → Right ↑ Up ↓ Down
12172             !, map {("$_#", "KP_$_")} 0..9);
12173             } # `
12174            
12175             my %dec_dotcompose = reverse @enc_dotcompose;
12176             # perl -C31 -wne "/^(.)\tCyrillic_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-cyr
12177             # perl -C31 -wne "/^(.)\thebrew_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-heb
12178             my %cyr = qw( GHE_bar Ғ ghe_bar ғ ZHE_descender Җ zhe_descender җ KA_descender Қ ka_descender қ KA_vertstroke Ҝ ka_vertstroke ҝ
12179             EN_descender Ң en_descender ң U_straight Ү u_straight ү U_straight_bar Ұ u_straight_bar ұ HA_descender Ҳ
12180             ha_descender ҳ CHE_descender Ҷ che_descender ҷ CHE_vertstroke Ҹ che_vertstroke ҹ SHHA Һ shha һ SCHWA Ә schwa ә
12181             I_macron Ӣ i_macron ӣ O_bar Ө o_bar ө U_macron Ӯ u_macron ӯ io ё je ј lje љ nje њ dzhe џ IO Ё JE Ј LJE Љ NJE Њ
12182             DZHE Џ yu ю a а be б tse ц de д ie е ef ф ghe г ha х i и shorti й ka к el л em м en н o о pe п ya я er р es с te т
12183             u у zhe ж ve в softsign ь yeru ы ze з sha ш e э shcha щ che ч hardsign ъ YU Ю A А BE Б TSE Ц DE Д IE Е EF Ф GHE Г
12184             HA Х I И SHORTI Й KA К EL Л EM М EN Н O О PE П YA Я ER Р ES С TE Т U У ZHE Ж VE В SOFTSIGN Ь YERU Ы ZE З SHA Ш E Э
12185             SHCHA Щ CHE Ч HARDSIGN Ъ );
12186             my %heb = qw( doublelowline ‗ aleph א bet ב gimel ג dalet ד he ה waw ו zain ז chet ח tet ט yod י finalkaph ך kaph כ lamed ל
12187             finalmem ם mem מ finalnun ן nun נ samech ס ayin ע finalpe ף pe פ finalzade ץ zade צ qoph ק resh ר shin ש taw ת
12188             beth ב gimmel ג daleth ד samekh ס zayin ז het ח teth ט zadi צ kuf ק taf ת );
12189             my %grk = qw( ALPHAaccent Ά EPSILONaccent Έ ETAaccent Ή IOTAaccent Ί IOTAdieresis Ϊ OMICRONaccent Ό UPSILONaccent Ύ
12190             UPSILONdieresis Ϋ OMEGAaccent Ώ accentdieresis ΅ horizbar ― alphaaccent ά epsilonaccent έ etaaccent ή iotaaccent ί
12191             iotadieresis ϊ iotaaccentdieresis ΐ omicronaccent ό upsilonaccent ύ upsilondieresis ϋ upsilonaccentdieresis ΰ
12192             omegaaccent ώ ALPHA Α BETA Β GAMMA Γ DELTA Δ EPSILON Ε ZETA Ζ ETA Η THETA Θ IOTA Ι KAPPA Κ LAMDA Λ LAMBDA Λ MU Μ
12193             NU Ν XI Ξ OMICRON Ο PI Π RHO Ρ SIGMA Σ TAU Τ UPSILON Υ PHI Φ CHI Χ PSI Ψ OMEGA Ω alpha α beta β gamma γ delta δ
12194             epsilon ε zeta ζ eta η theta θ iota ι kappa κ lamda λ lambda λ mu μ nu ν xi ξ omicron ο pi π rho ρ sigma σ
12195             finalsmallsigma ς tau τ upsilon υ phi φ chi χ psi ψ omega ω );
12196             $dec_dotcompose{"Cyrillic_$_"} = "Cyr_$cyr{$_}" for keys %cyr;
12197             $dec_dotcompose{"hebrew_$_"} = "heb_$heb{$_}" for keys %heb;
12198             $dec_dotcompose{"Greek_$_"} = "Gr_$grk{$_}" for keys %grk;
12199            
12200             sub shorten_dotcompose ($$;$) { # Shorten but leave readable disambiguous (to allow more concise printout)
12201 0     0 0   shift; # self [Later we massage out Cyr_ Gr_ uni_ prefixes
12202 0           (my $in = shift) =~ s/\b(Cyr|Ukr|Gr|heb|Ar)[a-z]+(?=_)/$1/;
12203 0           $in =~ s/\b(dead)(?=_)/d/;
12204 0           $in =~ s/\b(Gr_\w+dier|d_diaer)esis/$1/;
12205 0 0         $in =~ s/^U([a-fA-F\d]{4,6})$/ 'uni_' . chr hex $1 /e if shift;
  0            
12206 0           $in
12207             }
12208            
12209             sub dec_dotcompose ($$;$) {
12210 0     0 0   my($self, $in, $dec_U) = (shift, shift, shift);
12211 0           my($pre, $post) = split /:/, $in, 2;
12212 0 0         $post or warn("Can't parse <<$in>>"), return;
12213 0 0         my @pre = ($pre =~ /<(\w+)>/g) or warn("Unknown format of IN in <<$in>>"), return;
12214 0 0         my($p) = ($post =~ /"(.+?)"/) or warn("Unknown format of OUT in <<$in>>"), return;
12215 0           @pre = map { exists $KeySyms{$_}
12216             ? $KeySyms{$_}
12217 0 0         : ( exists $dec_dotcompose{$_} ? $dec_dotcompose{$_} : $self->shorten_dotcompose($_, $dec_U) ) } @pre;
    0          
12218 0           (@pre, $p)
12219             }
12220            
12221             # Stats: about 250 in: egrep "CP.*d_|d_.*CP" o-std
12222             sub process_dotcompose ($$$;$) {
12223 0     0 0   my($self, $fh, $sub, $dec_U) = (shift, shift, shift, shift);
12224 0           while (<$fh>) {
12225 0 0         next if /^\s*(#|include\b)/;
12226 0 0         next unless /\S/;
12227 0 0         next unless my @in = $self->dec_dotcompose($_, $dec_U);
12228 0           $sub->($self, $in[-1], @in[0..$#in-1]);
12229             }
12230             }
12231            
12232             sub filter_dotcompose ($;$) {
12233 0   0 0 0   my ($self, $fh) = (shift, shift || \*ARGV);
12234             $self->process_dotcompose($fh, sub ($$@) {
12235 0     0     my($self, $out) = (shift, shift);
12236 0           print "@_ $out\n"; # Two spaces to allow for combining marks
12237 0           });
12238             }
12239            
12240             sub put_val_deep ($$$$@) {
12241 0     0 0   my($self, $h, $term, $val, $k) = (shift, shift, shift, shift, shift);
12242 0 0         die "No key(s) in put_val_deep()" unless @_;
12243 0           while (@_) {
12244 0           my $oh = $h;
12245 0 0         $h->{$k} = {} unless defined $h->{$k};
12246 0           $h = $h->{$k};
12247 0 0         if ('HASH' ne ref $h) {
12248 0 0         die "Encountered non-HASH in put_val_deep(): <$k>" unless $term;
12249 0           my $ov = $h;
12250 0           $h = $oh->{$k} = { $term => $ov };
12251             }
12252 0           $k = shift;
12253             }
12254 0 0         if (exists $h->{$k}) {
12255 0 0 0       if (not ref $h->{$k}) {
    0          
12256 0           $h->{$k} = $val; # later rule wins
12257             } elsif ($term and 'HASH' eq ref $h) {
12258 0           $h->{$k}{$term} = $val;
12259             } else {
12260 0           die "Encountered non-HASH in put_val_deep(): <$k>";
12261             }
12262             } else {
12263 0           $h->{$k} = $val; # later rule wins
12264             }
12265             }
12266            
12267             sub compose_array_2_hash ($$$$) {
12268 0     0 0   my($self, $a, $h, $opt) = (@_);
12269 0           for my $l (@$a) {
12270 0           my($out, $term, @in) = @$l;
12271 0 0         my $Term = (ref $term ? $term->{term} : $term) ;
12272 0           $self->put_val_deep( $h, $term, $self->key2hex($out), map $self->key2hex($_), @in);
12273 0 0         $self->put_val_deep( $opt, $term, $term, map $self->key2hex($_), @in) if ref $term;
12274             }
12275             }
12276            
12277             sub compose_line_2_array ($$$$$@) {
12278 0     0 0   my($self, $a, $out, $massage, $term, @in) = (@_);
12279 0 0         if ($massage) {
12280 0           s/^(uni|Gr|Cyr|heb)_(?![\x00-\x7e])(?=.$)//, s/^space$/ / for @in; # copy
12281             #warn "compose: @in $out";
12282 0 0         return unless $in[0] eq 'CP';
12283 0           shift @in;
12284             }
12285             # Filter warnings via: egrep -v " d[^ ]|#" 00b | egrep -- "^---CP:" >00b2
12286             (printSkippedComposeKey and warn("---CP: @in $out")), # The last make sense only in the context of keysymbol operations???
12287             return if 1 != length $out or 0x10000 <= ord $out
12288 0 0 0       or grep {1 != length or 0x10000 <= ord} @in or grep $out eq $_, @in; # Allow for one char only
  0 0 0        
      0        
12289             #warn "CP: @in $out";
12290 0           push @$a, [$out, $term, @in];
12291             }
12292            
12293             sub compose_2_array ($$$$@) {
12294 0     0 0   my($self, $method, $fh, $a) = (shift, shift, shift, shift);
12295            
12296 0 0         if ($method eq 'dotcompose') {
    0          
    0          
12297             $self->process_dotcompose($fh, sub ($$@) {
12298 0     0     my($self, $out) = (shift, shift);
12299 0           $self->compose_line_2_array($a, $out, 'massage', !!'terminate', @_);
12300 0           }, 'decode U');
12301             } elsif ($method eq 'entity') {
12302 0           while (my $line = <$fh>) {
12303 0 0         next unless $line =~ /^\s*
12304 0           my($out, @in) = (chr hex "$1", split /\s*,\s*/, "$2");
12305 0           $in[0] =~ s/\s+$//;
12306 0           @in = split /\s*,\s*/, $in[0];
12307 0           @in = sort {length($a) <=> length($b)} @in;
  0            
12308 0           for my $in (@in) { # Avoid entries more than 2x longer than the shortest possible
12309 0 0 0       next if length($in) > $avoid_overlong_synonims_Entity*length $in[0] or length($in) > $maxEntityLen;
12310 0           my @IN = split //, $in;
12311 0           $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12312             }
12313             }
12314             } elsif ($method eq 'rfc1345') { # http://tools.ietf.org/html/rfc1345
12315 0           my %cvt = qw(gt > lt < amp &);
12316 0           while (my $line = <$fh>) {
12317 0 0         next unless ($line =~ /^\s+SP\s+0020\s+SPACE\s*$/) .. ($line =~ /^
12318 0 0         next unless $line =~ /^\s+(\S+)\s+([a-fA-F\d]{4})\s/;
12319 0           my($out, $in) = (chr hex "$2", "$1");
12320 0 0         next if "$2" =~ /^e0/i; # Skip private parts
12321 0           $in =~ s/&([lg]t|amp);/$cvt{$1}/g;
12322 0 0         next if 1 == length $in;
12323 0           my @IN = split //, $in;
12324 0           $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12325             }
12326 0           $self->compose_line_2_array($a, '€', !'massage', $self->key2hex(' '), 'E', 'u'); # http://en.wikipedia.org/wiki/Unicode_input#Character_mnemonics
12327             } else {
12328 0           die "Unknown compose parser: $method";
12329             }
12330             }
12331            
12332             sub composefile_2_array ($$$$@) {
12333 0     0 0   my($self, $method, $fn, $a) = (shift, shift, shift, shift);
12334 0 0         open my $fh, '< :encoding(utf8)', $fn or die "Can't open `$fn' for read: $!";
12335 0           $self->compose_2_array($method, $fh, $a);
12336 0 0         close $fh or die "Can't close `$fn' for read: $!";
12337             }
12338            
12339             sub merge_hash_to ($$$) { # We do NOT do deep copy
12340 0     0 0   my($self, $from, $to) = (shift, shift, shift);
12341 0           for my $k (keys %$from) { # ignore if the existing value is not hash
12342 0 0 0       next if 'HASH' ne ref($to->{$k} || {}); # existing non-hash (terminator) wins over a terminator or a longer binding
12343 0 0         $to->{$k} = $from->{$k}, next unless exists $to->{$k}; # existing hash wins over new terminator.
12344 0           $self->merge_hash_to($from->{$k}, $to->{$k});
12345             }
12346             }
12347            
12348             sub create_composeArray ($$$) {
12349 0     0 0   my ($self, $key, $method) = (shift, shift, shift);
12350 0 0         my $names = $self->get__value($key) or return;
12351 0           my @A;
12352 0           for my $fn (@$names) {
12353 0           $self->composefile_2_array($method, $fn, my $a = []);
12354 0           push @A, $a;
12355             # $self->compose_array_2_hash($a, my $h = {});
12356             # $self->merge_hash_to($h, $H);
12357             # warn "CP< ", join ', ', keys %$h;
12358             }
12359             # warn "CP= ", join ', ', keys %$H;
12360 0           \@A;
12361             }
12362            
12363             sub compose_Array_2_hash ($$) {
12364 0     0 0   my ($self, $A) = (shift, shift);
12365 0           my($H, $OPT) = ({}, {}); # indexed by HEX
12366 0           for my $a (@$A) {
12367 0           $self->compose_array_2_hash($a, my $h = {}, my $opt = {});
12368 0           $self->merge_hash_to($h, $H);
12369 0           $self->merge_hash_to($opt, $OPT);
12370             # warn "CP< ", join ', ', keys %$h;
12371             }
12372             # warn "CP= ", join ', ', keys %$H;
12373 0           $H;
12374             }
12375            
12376             sub composehash_2_prefix ($$$$$$$$) {
12377 0     0 0   my($self, $F, $prefix, $h, $n, $prefixCompose, $show, $comp_show) = (shift, shift, shift, shift, shift, shift, shift, shift);
12378 0   0       my($H, $added) = ($self->{faces}{$F}, $h->{'[Added]'} || {});
12379 0 0         my(%orig, %map, %seen) = map { ( $_, exists($added->{$_}) ? $added->{$_} : $_ ) } keys %$h;
  0            
12380 0 0 0       for my $c (sort {($added->{$a} || '') cmp ($added->{$b} || '') or $a cmp $b} keys %$h) { # order affects the order of auto-prefixes
  0   0        
12381 0 0         next if $c =~ /^\[(G?Prefix(_Show)?|Added)\]$/;
12382 0           my $v = $h->{$c};
12383 0 0 0       if (ref $v and $seen{"$v"}) {
    0          
12384 0           $v = $seen{"$v"};
12385             } elsif (ref $v) {
12386 0   0       my $p = $v->{'[Prefix]'} || $self->key2hex($self->next_auto_dead($H));
12387 0           my $cc = $c; # Name should not reflect linking
12388             # warn(" [@$n] $cc => $added->{$c}"),
12389 0 0         $cc = $added->{$c} if exists $added->{$c};
12390 0           my $name_append = my $name_show = chr hex $cc;
12391 0 0         $name_append = 'Compose' if $name_append eq $self->charhex2key($prefixCompose);
12392 0 0         $name_show = '⎄' if $name_show eq $self->charhex2key($prefixCompose);
12393 0 0         $name_append = $self->key2hex($name_append) if $name_append =~ /\s/;
12394             # $name_show = $self->key2hex($name_show) if $name_show =~ /\s/ and $name_show ne ' ';
12395 0           my $c;
12396 0           ($name_show = "$show$name_show")
12397 0           =~ s[^((⎄[₁₂₃₄₅₆₇₈₉]?|\Q$comp_show\E){2,})][ $2 . (($c = length($1)/length($2)) =~ tr/0-9/⁰¹²³⁴⁵⁶⁷⁸⁹/, $c) ]e;
12398 0 0         $name_show = $v->{'[Prefix_Show]'} if defined $v->{'[Prefix_Show]'};
12399 0           $self->composehash_2_prefix($F, $p, $v, my $nn = [@$n, $name_append], $prefixCompose, $name_show, $comp_show);
12400 0           $self->{faces}{$F}{'[prefixDocs]'}{$p} = "@$nn";
12401 0           $self->{faces}{$F}{'[Show]'}{$p} = $name_show;
12402 0           $v = $seen{"$v"} = [$p, undef, 1];
12403             } else {
12404 0           $H->{'[inCompose]'}{$self->charhex2key($v)}++;
12405 0           $v = [$v];
12406             }
12407 0           $map{$c} = $v;
12408             }
12409 0           $H->{'[deadkeyFaceHexMap]'}{$prefix} = \%map;
12410             }
12411            
12412             sub composehash_add_linked ($$$$) {
12413 0     0 0   my($self, $hexH, $charH, $prefCharH, $delay, %add) = (shift, shift, shift, shift, {});
12414 0           for my $h (keys %$hexH) {
12415 0 0         $self->composehash_add_linked($hexH->{$h}, $charH, $prefCharH) if ref $hexH->{$h};
12416 0 0         next unless defined (my $to = $charH->{my $c = chr hex $h});
12417 0 0         $to = $to->[0] if ref $to;
12418 0           my $toC = $self->charhex2key($to);
12419 0           my $back = $prefCharH->{$toC};
12420 0 0         $back = $back->[0] if ref $back;
12421 0           my $now = $h eq $self->key2hex($back);
12422 0 0         next if exists $hexH->{$to = $self->key2hex($to)};
12423             # warn " ... link $c to $toC (now=$now, back = $prefCharH->{$toC}) @{$prefCharH->{$toC}||[]})";
12424             # warn " ... link $c to $toC (now=$now, back = $back)";
12425 0           $add{$to} = $h;
12426 0 0         ($now ? $hexH : $delay)->{$to} = $hexH->{$h};
12427             }
12428 0 0         $hexH->{'[Added]'} = \%add if %add;
12429             # warn " ... almost done";
12430 0 0         %$hexH = (%$delay, %$hexH) if keys %$delay;
12431             }
12432            
12433             sub create_composekey ($$$) {
12434 0     0 0   my($self, $F, $prefix, @PREFIX) = (shift, shift, shift);
12435 0           my $linkedF = $self->{faces}{$F}{LinkFace};
12436 0   0       my $linked = $linkedF && $self->{faces}{$linkedF}{Face_link_map}{$F};
12437 0 0 0       $linked &&= {map {ref($_ || 0) ? $_->[0] : $_} %$linked};
  0   0        
12438 0   0       my $rlinked = $linked && $self->{faces}{$F}{Face_link_map}{$linkedF};
12439             # $linked ||= {};
12440             # warn " Compose: $F: F linked to $linked->{F}" if $linked and $linked->{F};
12441             # $F eq 'Latin' and
12442             # warn " Compose: $F: ", join ', ', sort keys %{$self->{faces}{$linkedF}{Face_link_map}{$F}}
12443             # if $self->{faces}{$linkedF}{Face_link_map}{$F};
12444 0 0 0       if ($prefix and ref $prefix) {
12445 0           @PREFIX = map { my @a = split /,/;
  0            
12446 0   0       defined $a[$_] and length $a[$_] and $a[$_] = $self->key2hex($self->charhex2key($a[$_])) for 3,4;
      0        
12447 0           [@a]} @$prefix;
12448             } else {
12449 0           $prefix = $self->key2hex($self->charhex2key($prefix));
12450 0           @PREFIX = ( ['ComposeFiles', 'dotcompose', 'warn', $prefix, ''],
12451             ['EntityFiles', 'entity', 'warn', '', $prefix],
12452             ['rfc1345Files', 'rfc1345', 'warn', '', $prefix]);
12453             }
12454 0           $self->load_KeySyms;
12455 0           my $p0 = my $first_prefix = $PREFIX[0][3]; # use for first found map
12456 0           my @Hashes;
12457 0 0         my @Arrays = @{ $self->{'[ComposeArrays]'} || [] };
  0            
12458 0 0         unless (@Arrays) { # Shared between faces
12459 0           my @Show;
12460 0           for my $i (0..$#PREFIX) { # FileList, type, OK_to_miss, prefix, prefix-in-last ... prefix-in-pre-last ...
12461 0           my $pref = $PREFIX[$i];
12462 0           my $arr;
12463 0 0 0       unless ($arr = $self->create_composeArray($pref->[0], $pref->[1]) and @$arr) {
12464 0 0         warn "Compose list of type $pref->[1] could not be created from FileList variable $pref->[0]" if $pref->[2];
12465 0           next;
12466             }
12467 0           push @Arrays, [$arr, $pref];
12468 0           push @Show, $i;
12469             }
12470 0           $self->{'[ComposeArrays]'} = \@Arrays;
12471 0           $self->{'[ComposeShowIdx]'} = \@Show;
12472             }
12473 0           my($v, $vv) = map $self->{faces}{$F}{$_}, qw( [coverage00hash] [coverageExtra] );
12474             # warn "Filter hashes $F ", scalar keys %$v, ' ', scalar keys %$vv, ' ', scalar @{$self->{faces}{$F}{'[coverage00]'}};
12475 0           for my $A (@Arrays) { # one per type
12476 0           my($arr, $pref) = @$A;
12477 0           my @NN;
12478 0           for my $a (@$arr) { # $a one per input file
12479 0           my @N;
12480 0           for my $l (@$a) {
12481 0           my($out, $term, @in) = @$l;
12482 0 0 0       next if grep {not ($v->{$_} or $vv->{$_})} @in;
  0            
12483             # my $c;
12484             # warn "in=<@in>, k=$c, 00=", !!$v->{$c}, " Extra=", !!$vv->{$c} if ($c) = grep {ord() <= 0x30ff and ord >= 0x30f0} @in;
12485 0           push @N, $l;
12486             }
12487 0           push @NN, \@N;
12488             }
12489             # warn "Compose face=$F: keys <@$arr> @$pref";
12490             # warn "Compose face=$F: keys ", join ' ', map scalar @$_, @$arr;
12491 0           push @Hashes, [$self->compose_Array_2_hash(\@NN), $pref];
12492             }
12493 0           my @hashes;
12494 0           my $Comp_show = $self->{faces}{$F}{'[ComposeKey_Show]'};
12495 0           my $IDX = $self->{'[ComposeShowIdx]'};
12496 0           for my $i (0..$#Hashes) { # Now process separately for every personality --- NOT YET
12497 0           my $H = $Hashes[$i];
12498 0           my($chained, $hash, $pref) = ('G', @$H); # Global
12499 0           $hash = $self->deep_copy($hash);
12500 0 0         $self->composehash_add_linked($hash, $linked, $rlinked) if $linked;
12501 0           my $pref0 = $pref->[3];
12502 0           my $prefix_repeat;
12503 0 0 0       if (@hashes and defined $pref->[4] and length $pref->[4]) {
    0 0        
12504 0           die "Chain-ComposeKey $pref->[4] already bound in the previous ComposeHash, keys = ", join ', ', keys %{$hashes[-1]{$pref->[4]}}
12505 0 0         if $hashes[-1]{$pref->[4]};
12506 0           $hashes[-1]{$pref->[4]} = $hash; # Bind to double/etc press
12507 0           $chained = '';
12508             } elsif ($first_prefix) { # The previous type could be not found; use the first defined accessor
12509 0           $pref0 = $first_prefix;
12510 0           undef $first_prefix;
12511             } else {
12512 0           warn "Hanging ComposeHash (no access prefix key) for ", join('///', @$pref);
12513             }
12514 0           push @hashes, $hash;
12515 0 0         $hash->{"[${chained}Prefix]"} = $pref0 if length $pref0;
12516 0 0 0       $hash->{"[Prefix_Show]"} = $Comp_show->[$IDX->[$i]] if ref $Comp_show and length $Comp_show->[$IDX->[$i]];
12517             }
12518 0 0         return unless @hashes;
12519 0           my @idx = split //, '₁₂₃₄₅₆₇₈₉';
12520 0           my $c = 0;
12521 0           for my $i ( 0..$#hashes ) {
12522 0           my $h = $hashes[$i];
12523 0           my $I = $IDX->[$i];
12524 0 0         next unless my $p = $h->{'[GPrefix]'}; # Not chained (chained are processed as subhashes by composehash_2_prefix()
12525 0 0         my $post = ($c ? "[$c]" : '');
12526 0           my $comp_show = $h->{'[Prefix_Show]'};
12527 0 0         unless (defined $comp_show) {
12528 0           my $c1;
12529 0 0         my $spost = ($c ? (($c1 = $c) =~ tr/0-9/₀₁₂₃₄₅₆₇₈₉/, $c1) : '');
12530 0 0         if (ref $Comp_show) { # Elt0 has a sane default
12531 0           $comp_show = "$Comp_show->[0]$spost";
12532             } else {
12533 0           $comp_show = "$Comp_show$spost";
12534             }
12535             }
12536 0           $self->{faces}{$F}{'[Show]'}{$p} = $comp_show;
12537             # push @Show, (ref $comp_show ? $comp_show->[$i] : $comp_show);
12538 0           $self->composehash_2_prefix($F, $p, $h, ["Compose$post"], $p0, $comp_show, $comp_show);
12539 0           $self->{faces}{$F}{'[prefixDocs]'}{$p} = "Compose$post key";
12540 0           ++$c;
12541             }
12542             }
12543            
12544             my(@AppleSym, %AppleSym);
12545             sub _AppleMap () { # http://forums.macrumors.com/archive/index.php/t-780577.html
12546             # https://github.com/tekezo/Karabiner/blob/version_10.7.0/src/bridge/generator/keycode/data/KeyCode.data
12547             # It has a definition of 0x34; moreover, it also defines some keys above 0x80 (including ≤ 0x80 on some German keyboard???)
12548 0     0     chomp(my $lst = <<'EOF'); # 0..50; 65..92; 93..95 ↱KEYPAD; · = special ↱JIS (≥93=0x5d)
12549             asdfhgzxcv§bqweryt123465=97-80]ou[ip·lj'k;\,/nm.· `··············.·*·+·····/··-··=01234567·89¥_,
12550             EOF
12551             # ' # KEYPAD above starts on 65=0x41
12552 0           my @lst = split //, $lst;
12553 0           my $last = $#lst;
12554             # in addition to US Extended, we defined 64, 73 (BR), 102, 104 (hex 40 49 66 68) and 93-95 from JIS
12555 0           my @kVK_ = split /\n/, <
12556             24 Return 0d
12557             30 Tab 09
12558             ####31 Space
12559             33 Delete 08
12560             34 Enter_PowerBook 03 # Same as KeypadEnter
12561             35 Escape 1b
12562             37 Command
12563             38 Shift
12564             39 CapsLock
12565             3A Option
12566             3B Control
12567             3C RightShift
12568             3D RightOption
12569             3E RightControl
12570             3F Function
12571             40 F17 +
12572             42 ????????????? 1d # Same as RightArrow
12573             46 ?????????????? 1c # Same as LeftArrow
12574             47 ANSI_KeypadClear 1b # ??? Same as Escape
12575             48 VolumeUp 1f # ??? Same as DownArrow
12576             49 VolumeDown + # C1 of ABNT: /
12577             4A Mute
12578             ###4B ANSI_KeypadDivide /
12579             4C ANSI_KeypadEnter 03
12580             4D ??????? 1e # Same as UpArrow
12581             4F F18 +
12582             50 F19 +
12583             5A F20
12584             60 F5 +
12585             61 F6 +
12586             62 F7 +
12587             63 F3 +
12588             64 F8 +
12589             65 F9 +
12590             67 F11 +
12591             69 F13 +
12592             6A F16 +
12593             6B F14 +
12594             6D F10 +
12595             6E __PC__Menu +
12596             6F F12 +
12597             71 F15 +
12598             72 Help 05
12599             73 Home 01
12600             74 PageUp 0b
12601             75 ForwardDelete 7f
12602             76 F4 +
12603             77 End 04
12604             78 F2 +
12605             79 PageDown 0c
12606             7A F1 +
12607             7B LeftArrow 1c
12608             7C RightArrow 1d
12609             7D DownArrow 1f
12610             7E UpArrow 1e
12611             # ISO keyboards only
12612             ####0A ISO_Section §
12613             # JIS keyboards only
12614             ####5D JIS_Yen ¥
12615             ####5E JIS_Underscore _
12616             ####5F JIS_KeypadComma ,
12617             66 JIS_Eisu SPACE # Left of space (On CapsLock on Windows; compare http://commons.wikimedia.org/wiki/File:MacBookProJISKeyboard-1.jpg with http://en.wikipedia.org/wiki/Keyboard_layout#Japanese)
12618             68 JIS_Kana SPACE # Right of space (as on Windows, but without intervening key)
12619             # Defined in US Extended:
12620             6C ?????? +
12621             70 ?????? +
12622             # ?????
12623             ###BRIGHTNESS_DOWN 0x91
12624             ###BRIGHTNESS_UP 0x90
12625             ###DASHBOARD 0x82
12626             ###EXPOSE_ALL 0xa0
12627             ###LAUNCHPAD 0x83
12628             ###MISSION_CONTROL 0xa0
12629             #
12630             ###GERMAN_PC_LESS_THAN 0x80
12631             ###PC_POWER 0x7f
12632             EOF
12633 0           my %seen;
12634 0           for my $i (0..$#lst) {
12635 0 0         if ($lst[$i] eq '·') {
12636 0           undef $lst[$i];
12637             } else {
12638 0   0       my $pref = (defined $AppleSym{$lst[$i]} and '#');
12639 0           $AppleSym{"$pref$lst[$i]"} = $i;
12640             }
12641             }
12642             # $AppleSym{'#'} = $AppleSym{' '}; # Space is in a table as #
12643 0           my %map = ('+' => "\x10", 'SPACE' => ' ');
12644 0           for my $kVK (@kVK_) {
12645 0 0         warn ("unexpected OSX scan: <<$kVK>>"), next unless $kVK =~ /^\s*(#)|([A-F\d]{2})\s+(\?+|\w+)\s*(.*)/i;
12646 0 0         next if $1;
12647 0           my($hex, $name, $rest, $comment) = ($2, $3, $4);
12648 0           $AppleSym[hex $hex] = $name;
12649 0           $AppleSym{$name} = hex $hex;
12650 0 0         if(length $rest) {
12651 0 0         warn ("unexpected OSX scan expansion in $hex/$name: <<$rest>>"), next
12652             unless ( my($HEX,$lit,$sp), $comment) = ( $rest =~ /^(?:(?:([A-F\d]{2})|([^\w\s+])|(SPACE|\+))\s*)?(?:#\s*(.*))?$/i );
12653 0 0         if ($sp) {
    0          
12654 0 0         $rest = $map{$sp} or warn "Bad map in OSX basemap"
12655             } elsif ($HEX) {
12656 0           $rest = chr hex $HEX;
12657             } else {
12658 0           $rest = $lit;
12659             }
12660 0           my $idx = hex $hex;
12661 0 0 0       $idx > $last or not defined $lst[$idx] or warn "Non-special <<$lst[$idx]>> when overriding offset=$idx=hex($hex) in OSX basemap";
12662 0           $lst[$idx] = $rest;
12663             }
12664             }
12665             @lst
12666 0           }
12667            
12668             my @AppleMap;
12669            
12670             # Extra keys on Windows side: INSERT, and duplication-by-NumLock of the keypad.
12671             # Extra keys on Apple side: CLEAR on the KP, and KP-Equal.
12672            
12673             # Current solution: merge win-KP_Clear with apple-KP_CLear (1st in the center, 2nd in the ul-corner!)
12674             # merge INSERT with KP=
12675            
12676             # How to work with NumLock-modifications? There are 3 states: NumLock-, Base-, Shift.
12677            
12678             # Not in Apple maps:
12679             # F21-F24 HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD NUMPAD0-NUMPAD9
12680             my %Apple_recode;
12681 1     1   2255 { no warnings 'qw';
  1         2  
  1         3461  
12682             %Apple_recode = (qw(
12683             DIVIDE #/ MULTIPLY * SUBTRACT #- ADD + DECIMAL #.
12684             RETURN ANSI_KeypadEnter DELETE ForwardDelete #\ § OEM_102 §
12685             PRIOR PageUp CLEAR ANSI_KeypadClear NEXT PageDown INSERT #=
12686             ABNT_C1 VolumeDown APP __PC__Menu
12687             ), SPACE => ' ', map +("NUMPAD$_", "#$_"), 0..9);
12688             }
12689             my %Apple_skip = map +($_, 1), (map "F$_", 21..24); #, (map "NUMPAD$_", 0..9);
12690             # ==> HOME UP PRIOR LEFT CLEAR RIGHT END DOWN NEXT INSERT DELETE RETURN
12691             # ==> PRIOR CLEAR NEXT INSERT
12692            
12693             sub AppleMap_Base ($$) {
12694 0     0 0   my($self, $K) = (shift, shift);
12695 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
12696 0 0         return $F->{Apple2layout} if $F->{Apple2layout};
12697 0 0         @AppleMap = _AppleMap unless @AppleMap;
12698 0 0         warn 'AppleMap too long' if $#AppleMap >= 127;
12699 0           $self->reset_units;
12700 0           my $BB = $self->BaseKeys($K); # VK per position (except via-VK keys)
12701 0           my $B = $F->{baseKeysRaw}; # chars on key (if the first occurence???) OR VK
12702 0           my(@o, @A, @AA); # A: kbdd --> Apple; AA: Apple --> kbdd
12703 0           $_ = [@$_] for $B, $BB; # 1-level deep copy
12704 0           my $o = $F->{'[VK_off]'};
12705 0           for my $b ($B, $BB) { # Explicitly add via-VK keys
12706 0           for my $vk (keys %$o) {
12707 0 0         warn "[@$K]: $vk defined on \@$o->{$vk} as $b->[$o->{$vk}]" if defined $b->[$o->{$vk}];
12708 0 0         $b->[$o->{$vk}] = $vk unless defined $b->[$o->{$vk}];
12709             # warn "[@$K]: $vk \@ $o->{$vk}"; # SPACE @ 116 (on izKeys)
12710             }
12711             }
12712             # warn "[[@$K]] @$B\n\t@$BB\n";
12713             # warn "\t", !(grep $_ eq ' ', @$B), "\t", !(grep $_ eq ' ', @$BB), "\n";
12714 0           for my $i (0..$#$B) { # Primary mappings
12715 0           my $k = $B->[$i];
12716 0           my $kk = $BB->[$i];
12717 0 0         next unless defined $k;
12718 0 0         $A[$i] = $AppleSym{$kk}, next if exists $AppleSym{$kk};
12719 0 0 0       $A[$i] = $AppleSym{$Apple_recode{$kk}}, next if exists $AppleSym{$Apple_recode{$kk} || 123};
12720 0 0         $A[$i] = $AppleSym{$k}, next if exists $AppleSym{$k};
12721 0 0 0       $A[$i] = $AppleSym{$Apple_recode{$k}}, next if exists $AppleSym{$Apple_recode{$k} || 123};
12722 0 0         $A[$i] = "\u\L$k" . 'Arrow', next if exists $AppleSym{"\u\L$k" . 'Arrow'};
12723 0 0         $A[$i] = "\u\L$k", next if exists $AppleSym{"\u\L$k"};
12724 0 0         next if $Apple_skip{$k};
12725 0           push @o, $k;
12726             }
12727 0           for my $i (0..126) { # Primary backwards mappings
12728 0 0         next unless defined $A[$i];
12729 0 0         warn "Duplicate backward Apple mapping: old=$AA[$A[$i]] --> $A[$i] <-- $i=new" if defined $AA[$A[$i]];
12730 0           $AA[$A[$i]] = $i;
12731             }
12732 0           for my $i (0..126) { # Secondary backwards mappings
12733 0 0 0       next if defined $AA[$i] or ($AppleSym[$i] || '') !~ /^#(.)$/ or not defined $AA[$AppleSym{$1}];
      0        
      0        
12734 0           $AA[$i] = $AA[$AppleSym{$1}]
12735             }
12736 0 0         warn "Not in Apple maps: @o" if @o;
12737 0           $F->{layout2Apple} = \@A;
12738 0           $F->{Apple2layout} = \@AA;
12739             }
12740            
12741             # fake is needed (apparently, the compiler does not allocate the named states smartly???)
12742             my @state_cnt = qw( 4of4 4096 3of4 256 2of4 16 1of4 0 0of4 0
12743             1of6 0 2of6 2 3of6 16 4of6 256 0of6 0
12744             );
12745             my @state_cnt_a = (@state_cnt, qw(
12746             5of6 4 6of6 64
12747             )); # At end, so may be skipped via merge_states_6_and_4
12748             my @state_cnt_b = (@state_cnt, qw(
12749             5of6 64 6of6 64
12750             ));
12751             my $in_group_4of6_plan_c = 2;
12752             my @state_cnt_c = (@state_cnt, '5of6' => 16 * $in_group_4of6_plan_c, '6of6' => 64);
12753             my $use_plan_b; # unimplemented
12754             my $use_plan_c = 1; # untested
12755            
12756             sub alloc_slots ($$) {
12757 0     0 0   my($tot, $a, %start) = (shift, shift);
12758 0           my @a = @$a; # deep copy
12759 0           while (@a) {
12760 0           my($how, $c) = splice @a, 0, 2;
12761 0           $start{$how} = [$tot, $tot+$c-1];
12762 0           $tot += $c;
12763             }
12764 0           \%start;
12765             }
12766            
12767             sub output_state_range ($$$$$$) { # Apparently, only ranges up to 256 states are supported.
12768 0     0 0   my($self, $from, $to, $mult, $next, $out, $o) = (shift, shift, shift, shift, shift, shift, ''); # $out is the ord(OUTPUT)
12769 0 0         $o .= "\t\t\t\n" if $to - $from > 255;
12770 0           while ($to - $from > 255) {
12771 0           $o .= $self->output_state_range($from, $from+255, $mult, $next, $out);
12772 0           $from += 256;
12773 0 0         $out += 256*$mult if defined $out;
12774 0 0         $next += 256*$mult if defined $next;
12775             }
12776 0 0         XML_format($out = chr $out) if defined $out;
12777 0           my @out;
12778 0 0         push @out, qq(next="$next") if defined $next;
12779 0 0         push @out, qq(output="$out") if defined $out ;
12780 0           $o .= <
12781            
12782             EOS
12783 0           $o
12784             }
12785            
12786             my $merge_states_6_and_4 = 1;
12787             my $do_hex5 = 0; # Won’t install with this… (Even with $merge_states_6_and_4)
12788            
12789             sub output_hex_input ($$$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
12790 0     0 0   my($self, $states, $HEX, $o) = (shift, shift, shift, '');
12791 0 0         unless ($HEX =~ /[0-9a-f]/i) {
12792 0 0         return $do_hex5 ? <
12793            
12794            
12795            
12796             EOS
12797            
12798            
12799             EOS
12800             }
12801 0           my $i = hex $HEX;
12802 0           my @O = map { [$states->{($_+1).'of4'}[0] + $i] } 0..3;
  0            
12803 0           $O[4] = [undef, $i];
12804             # $O[4] = qq(output="$HEX;");
12805             # $O[4] = qq(next="5000");
12806 0           $o .= <
12807            
12808            
12809             EOS
12810             #
12811             #
12812             #
12813             #
12814             $o .= <output_state_range($states->{"${_}of4"}[0], $states->{"${_}of4"}[1], 16, $O[$_][0], $O[$_][1])
12815            
12816             EOS
12817 0           for 2..4; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12818            
12819             # return $o unless 15 >= hex $HEX; # debugging only
12820            
12821 0           @O = map { [$states->{($_+1).'of6'}[0] + $i] } 0..5;
  0            
12822 0           $O[2][0]--; # We start with U+01..., not U+00....
12823 0           $O[6] = [undef, 0xDC00 + $i];
12824 0 0         $o .= $do_hex5 ? <
12825            
12826             EOS
12827            
12828             EOS
12829             # $states->{"2of6"}[0] is U+0xxxxx=hex5 hex5 and hex6 differs only in treatment of 0, and of 1 0
12830             # $states->{"2of6"}[1] is U+1xxxxx hex5: 1 0 —→ U+010xxx
12831             # $states->{"3of6"}[0] is U+01xxxx hex6: 1 0 —→ U+10xxxx
12832             # $states->{"3of6"}[1] is U+10xxxx hex5: 0 —→ hex4, 1 —→ U+01xxxx, rest X —→ U+0Xxxx
12833             # hex6: 0 —→ hex5, 1 —→ U+1xxxxx, rest X —→ U+0Xxxx
12834 0 0         $o .= <
12835            
12836             EOS
12837             # What follows is a complete mess, since with $do_hex5 the resulting layout won’t install
12838 0 0 0       $o .= <
12839            
12840            
12841            
12842             EOS
12843 0 0 0       $o .= <
12844            
12845             EOS
12846 0 0 0       $o .= <
12847            
12848             EOS
12849 0 0         $o .= <
12850            
12851            
12852            
12853            
12854             EOS
12855 0 0         $o .= <
12856            
12857            
12858             EOS
12859 0 0 0       $o .= <
12860            
12861             EOS
12862             $o .= <output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
12863            
12864             EOS
12865 0           for 3; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12866             # VARIANT (A): for every one of 256 states, individually emit a surrogate (with multiplier 4), and set the next state (in B..B+3)
12867             # VARIANT (C): for every $in_group of 256 states, emit its surrogate (with multiplier 4).
12868             # This creates a spread of "next states" of size M-3, with M = 4*$in_group.
12869             # Create next state in ranges (B .. B+M-3) (B+M .. B+2M-3) (B+2M .. B+3M-3) (B+3M .. B+4M-3)
12870             # depending on ($i & 3). [Later, we should process every range with multiplier=0.]
12871 0 0 0       my $next_base = ($merge_states_6_and_4 and not $use_plan_c) ? $states->{"3of4"}[0] + 0xDC : $states->{"5of6"}[0];
12872 0 0         my $in_group = $use_plan_c ? $in_group_4of6_plan_c : 1;
12873 0 0         my $spread_next = $use_plan_c ? 4*$in_group_4of6_plan_c - 3 : 1;
12874 0 0         $o .= $use_plan_c ? <
12875            
12879             EOS
12880            
12881             EOS
12882 0           for my $j (0 .. ((0x100/$in_group)-1)) {
12883 0           my($J, $n, $O) = ($states->{"4of6"}[0] + $j*$in_group, $next_base + ($i & 0x3)*$spread_next, 0xD800 + 4*$j*$in_group + ($i>>2));
12884 0           XML_format($O = chr $O);
12885 0 0         if ($use_plan_c) {
12886 0           my $T = $J + $in_group_4of6_plan_c -1;
12887 0           $o .= <
12888            
12889             EOS
12890             } else {
12891 0           $o .= <
12892            
12893             EOS
12894             # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12895             }
12896             }
12897 0 0         if ($use_plan_c) {
12898 0 0         my $doc = $merge_states_6_and_4 ? '; redirect to low surrogates' : '';
12899 0           $o .= <
12900            
12903             EOS
12904 0           for my $k (1 .. $in_group_4of6_plan_c) {
12905             # for my $j (0 .. 3) {
12906 0           my $n = $next_base + ($k-1)*4;
12907 0           my $T = $n + 3;
12908 0 0         my $next = ($merge_states_6_and_4 ? $states->{"4of4"}[0] + 0xDC0 + $i: $O[5][0]);
12909 0           $o .= <
12910            
12911             EOS
12912             }
12913             }
12914            
12915 0 0         unless ($merge_states_6_and_4) {
12916             $o .= $self->output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
12917 0 0         for ($use_plan_c ? 6 : 5) .. 6; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12918             }
12919             $o
12920 0           }
12921            
12922             sub output_hex_term ($$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
12923 0     0 0   my($self, $states) = (shift, shift);
12924 0           my $o = <
12925            
12926            
12927             EOS
12928 0           my @hd = (0..9, 'A'..'F');
12929 0           for my $n (1 .. 3) {
12930 0           for my $i (0 .. ((16**$n)-1)) {
12931 0           my $N = $n + 1;
12932 0           my $I = $states->{"${N}of4"}[0] + $i;
12933 0           my $hex = sprintf "%0${n}X", $i;
12934 0           $o .= <
12935            
12936             EOS
12937             }
12938             }
12939 0 0         $o .= $do_hex5 ? <
12940            
12941            
12942            
12943             EOS
12944            
12945            
12946             EOS
12947            
12948 0           return $o; # the rest creates problems: see iz-Latin-hex6-vis3a.keylayout
12949            
12950 0           $o .= <
12951            
12952            
12953             EOS
12954 0           for my $n (2 .. 3) {
12955 0           for my $i (0 .. ((16**($n-1))-1)) {
12956 0           my $N = $n + 1;
12957 0           my $I = $states->{"${N}of6"}[0] + $i;
12958 0           my $hex = sprintf "%0${n}X", $i + 16**($n-2);
12959 0           $o .= <
12960            
12961             EOS
12962             }
12963             }
12964             $o
12965 0           }
12966            
12967             my $junkHEX = <
12968             After +0yz or +10z (16*16 states); instead of 4434 should put 4434 + 0..3
12969            
12970            
12971             WRONG!!! Need different multipliers for next and for output; so need 256 individual declarations
12972             Instead: use multiplier="4" (so that the output char is correct; next state takes 4K values, out of which we
12973             need only last two bits (manually inserted via next="" above); so we need 1K declarations for per-ultimate???
12974            
12975             So: maybe have 16 declarations for "After +0yz or +10z"; this way, next state takes 64 values, of which
12976             we may make account for by 16 declarations. (32 total per 22 chars 0-9a-fA-F.)
12977            
12978             Or: maybe have 16 declarations for "After +0yz or +10z"; each creates a range of 64 possible "next" states;
12979             but we create 4 groups of such states. So we may make account for by 4 declarations. (20 total per 22 chars 0-9a-fA-F.)
12980             EOJ
12981            
12982             #sub XML_format ($) { $_[0] =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
12983             # # Avoid "Malformed UTF-8 character (fatal)" by not puting in a REx
12984             # $_[0] =~ s/(.)/ sprintf '&#x%04X;', ord $1 /ego if length $_[0] eq 1 and 0xd000 <= ord $_[0] and 0xdfff >= ord $_[0]}
12985             sub XML_format ($) {
12986 0     0 0   my @c = split //, $_[0];
12987 0           for my $c (@c) {
12988 0 0 0       if (0xd000 <= ord $c and 0xdfff >= ord $c) {
12989 0           $c = sprintf '&#x%04X;', ord $c;
12990             } else {
12991 0           $c =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
  0            
12992             }
12993             }
12994 0           $_[0] = join '', @c;
12995             }
12996             sub XML_format_UTF_16 ($) {
12997 0     0 0   $_[0] = to_UTF16LE_units $_[0];
12998 0           XML_format $_[0];
12999             }
13000            
13001             my %OEM2ctrl = (qw( OEM_102 0 OEM_MINUS), "\x1f", OEM_4 => "\x1b", OEM_5 => "\x1c", OEM_6 => "\x1d",
13002             CLEAR => "\x1b"); # [, \, ]
13003             my %OEM2cmd = (qw( OEM_102 § OEM_MINUS - ));
13004             sub AppleMap_i_j ($$$$$;$$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
13005 0   0 0 0   my($self, $K, $l, $sh, $caps, $use_base, $dd, $map, $override) =
      0        
      0        
13006             (shift, shift, shift, shift, shift, shift, shift || {}, shift || {}, shift || {dup => {}});
13007 0           my $A2l = [ @{ $self->AppleMap_Base($K) } ]; # Deep copy
  0            
13008 0           my $dup = $override->{dup};
13009 0           for my $from (keys %$dup) {
13010 0           $A2l->[$from] = $A2l->[$dup->{$from}];
13011             }
13012 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
13013 0           my $L = [map $self->{layers}{$_}, @{$F->{layers}}];
  0            
13014 0           $L = $L->[$l];
13015 0   0       my $B = $use_base && $self->BaseKeys($K); # Partially implemented: use BaseKeys instead of the real $F (VK_ code)
13016 0 0 0       $B = [map {defined() && /^\w$/ ? lc $_ : $_} @$B] if ($use_base || 0) > 0;
  0 0 0        
13017 0 0         @AppleMap = _AppleMap unless @AppleMap;
13018 0 0         warn 'AppleMap too long' if $#AppleMap >= 127;
13019 0           my $o = '';
13020 0           for my $i (0..127) {
13021 0           my($I, $d, $c, $force_o) = ($A2l->[$i], 0); # offset inside the layout array
13022 0 0 0       $c = $override->{"$l-$sh-$caps-vk=$i"} || $override->{"$l-$sh--vk=$i"} unless $use_base; # $caps is 0 or 1
13023             # $force_o++ if defined $use_base and $use_base eq '0';
13024 0 0 0       $c = $use_base ? $B->[$I] : $L->[$I][$sh] if not defined $c and defined $I;
    0          
13025 0 0 0       if (($use_base || 0) < 0) { # Control
    0          
13026 0           $force_o++;
13027 0 0         if (!defined $c) { # ignore
    0          
    0          
13028             } elsif ($c =~ /^[A-Z]$/) {
13029 0           $c = chr( 1 + ord($c) - ord 'A');
13030             } elsif ($c !~ /^[-0-9=.*\/+]$/) {
13031 0           $c = $OEM2ctrl{$c}; # mostly undef
13032             }
13033             } elsif ($use_base) {
13034 0           my $tr;
13035 0 0         if (!defined $c) { # ignore
    0          
    0          
13036             } elsif (defined($tr = $OEM2cmd{$c})) {
13037 0           $c = $tr;
13038             } elsif (defined($tr = $oem_control{$c})) {
13039 0           $tr =~ s/(?<=.).*//;
13040 0           $c = $tr;
13041             } else {
13042 0           undef $c;
13043             }
13044             }
13045 0 0         $c = $AppleMap[$i] unless defined $c; # Fallback to US (apparently, there is no unbound "ASCII" keys in maps???); dbg to "\xffff" #
13046            
13047 0 0         $o .= <
13048            
13049             EOK
13050 0 0 0       $d = $c->[2] || 0 if ref $c;
13051 0 0         $c = $c->[0] if ref $c;
13052             # On windows, CapsLock flips the case; on Mac, it upcases
13053             # ($c) = grep {$_ ne $c} uc $c, ucfirst lc $c, lc $c if !$d and $caps and (lc $c ne uc $c or lc $c ne ucfirst lc $c);
13054 0 0 0       $c = uc $c if !$d and $caps;
13055 0 0         $dd->{$c}[1]++ if $d > 0; # 0 for normal char, 1 for base prefix; not for hex4/hex6
13056 0 0         $override->{extra_actions}{$c}++ if $d < 0;
13057 0   0       my $M = (!$force_o and $d >= 0 and $map->{$self->keys2hex($c)});
13058 0 0         my $pr = $M ? 'a_' : '';
13059 0 0 0       $dd->{$c}[0] = $c if $M or $d > 0; # 0 for normal char, 1 for base prefix
13060 0 0 0       my($how, $pref) = ($d || $M) ? ('action', ($M ? 'a_' : '') . ($d > 0 ? 'pr_' : (!$d && '_'))) : ('output', '');
    0 0        
    0          
13061 0 0         ($how eq 'output') ? XML_format_UTF_16 $c : XML_format $c;
13062 0           $o .= <
13063            
13064             EOK
13065             }
13066             $o
13067 0           }
13068            
13069             my $hex_states;
13070             sub AppleMap_prefix_map ($$$$$;$$) {
13071 0   0 0 0   my($o, $self, $kk, $pref, $M, $v, $doHEX, $override) = ('', shift, shift, shift, shift || {}, shift, shift, shift || {});
      0        
13072 0           XML_format (my $k = $kk);
13073 0 0         my $pr = $M ? 'a_' : '';
13074 0 0         my $prefix = $pref ? 'pr_' : '_';
13075 0           $o .= <
13076            
13077             EOK
13078             # A character and a prefix key with the same ordinal differ only in this:
13079 0           XML_format (my $oo = $v->[0]);
13080 0 0         my $todo = $pref ? qq(next="st_$oo") : qq(output="$oo");
13081 0           $o .= <
13082            
13083             EOK
13084 0 0         for my $st (sort keys %{$M || {}}) {
  0            
13085 0           my $v0 = $M->{$st};
13086 0           XML_format ($st = my $st0 = chr hex $st);
13087 0           my $KK = $self->key2hex($kk);
13088 0           my $ST0 = $self->key2hex($st0);
13089             my $v = $override->{"+$st0+$kk"} || $override->{"+$ST0+$kk"}
13090 0   0       || $override->{"+$st0+$KK"} || $override->{"+$ST0+$KK"} || $v0;
13091 0   0       my($d, $T) = $v->[2] || 0;
13092 0 0         $T = chr hex $v->[0] if $d >= 0;
13093 0 0         if ($d > 0) {
    0          
13094 0           XML_format $T;
13095 0           $T = qq(next="st_$T");
13096             } elsif ($d < 0) { # Literal state
13097 0           $T = qq(next="$v->[0]");
13098             } else {
13099 0           XML_format_UTF_16 $T;
13100 0           $T = qq(output="$T");
13101             }
13102 0           $o .= <
13103            
13104             EOK
13105             }
13106 0 0 0       $o .= $self->output_hex_input($hex_states, $v->[0]) if $doHEX and $v->[0] =~ /^[-u\x20_+=0-9a-f]\z/i;
13107 0           $o .= <
13108            
13109             EOK
13110 0           $o;
13111             }
13112            
13113             sub AppleMap_prefix ($$;$$$$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
13114 0   0 0 0   my($self, $dd, $do_initing, $term, $map, $show, $override, $act) = (shift, shift, shift, shift, shift || {}, shift, shift, shift);
13115 0           my $o = '';
13116            
13117 0 0         my %e = %{ $override->{extra_actions} || {}}; # Deep copy
  0            
13118 0 0 0       ($do_hex5 and $e{hex5}++), $e{hex6}++ if $e{hex4};
13119 0           my @o = @$override{grep /^\+/, keys %$override}; # honest bindings, not extra_actions/etc
13120 0           @o = map chr hex $_->[0], grep $_->[2] > 0, @o; # dead keys
13121 0 0         unless (%$act) { # Treat states created by the actions only
13122 0           my %states;
13123 0           $states{$_}++ for keys(%e), @o, grep $dd->{$_}[1], keys %$dd;
13124 0           for my $v (values %$map) { # hash indexed by the prefix key
13125 0           for my $out (values %$v) {
13126 0 0         next if not $out->[2];
13127 0           my $k = $self->charhex2key($out->[0]);
13128 0           $states{$k}++;
13129 0           my $v;
13130 0 0 0       $act->{$k} = [$k] unless $v = $dd->{$k} and $v->[1]; # Skip if terminator was already created; do not create fake values
13131             }
13132             }
13133 0           my $states = 10 + keys(%states); # Was 4100; 10: "just in case"
13134 0 0         $hex_states = alloc_slots( $states, $use_plan_c ? \@state_cnt_c : ($use_plan_b ? \@state_cnt_b : \@state_cnt_a));
    0          
13135             }
13136            
13137 0 0 0       if ($term and not $do_initing) { # Treat states created by the actions only
13138 0           $dd = $act; # A terminator MUST be created for every state
13139             }
13140            
13141 0           my $doHEX = grep $e{"hex$_"}, 4,5,6;
13142 0           for my $kk (sort keys %$dd) {
13143 0           my $v = $dd->{$kk};
13144 0           XML_format (my $k = $kk);
13145 0 0         next if !!$do_initing != !!$v->[1];
13146            
13147 0 0         if ($term) {
13148 0           my $Show = $show->{$self->key2hex($kk)};
13149 0 0         $Show = $kk unless defined $Show;
13150 0           $Show =~ s/^(?=$rxCombining)/ /;
13151 0           XML_format $Show;
13152 0           $o .= qq(\t\n);
13153 0           next;
13154             }
13155            
13156 0           my $M = $map->{$self->keys2hex($kk)};
13157 0           $o .= $self->AppleMap_prefix_map($kk, $do_initing, $M, $v, $doHEX, $override);
13158             }
13159 0 0 0       for my $a ( ($do_initing and not $term) ? sort keys %e : () ) {
13160 0   0       my $add = ($a =~ /^hex4\z/ and ($do_hex5 ? <
13161            
13162            
13163             EOS
13164            
13165             EOS
13166 0           $o .= <
13167            
13168            
13169             $add
13170             EOS
13171             }
13172 0 0 0       $o .= $self->output_hex_term($hex_states) if $term and $doHEX and not $do_initing; # Do only once, at the end
      0        
13173 0           $o
13174             }
13175            
13176             1;
13177            
13178             __END__