| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Encode::Guess::Educated; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 7 |  |  | 7 |  | 344232 | use utf8; | 
|  | 7 |  |  |  |  | 74 |  | 
|  | 7 |  |  |  |  | 37 |  | 
| 4 | 7 |  |  | 7 |  | 272 | use v5.10; | 
|  | 7 |  |  |  |  | 24 |  | 
|  | 7 |  |  |  |  | 292 |  | 
| 5 | 7 |  |  | 7 |  | 36 | use strict; | 
|  | 7 |  |  |  |  | 39 |  | 
|  | 7 |  |  |  |  | 206 |  | 
| 6 | 7 |  |  | 7 |  | 36 | use warnings; | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 307 |  | 
| 7 | 7 |  |  | 7 |  | 32 | use warnings FATAL => "utf8"; | 
|  | 7 |  |  |  |  | 19 |  | 
|  | 7 |  |  |  |  | 447 |  | 
| 8 | 7 |  |  | 7 |  | 6699 | use charnames qw(:full); | 
|  | 7 |  |  |  |  | 278844 |  | 
|  | 7 |  |  |  |  | 51 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 7 |  |  | 7 |  | 1607 | use Carp 	 qw(carp croak cluck confess); | 
|  | 7 |  |  |  |  | 15 |  | 
|  | 7 |  |  |  |  | 598 |  | 
| 11 | 7 |  |  | 7 |  | 1006 | use Encode 	 qw(:fallback_all find_encoding encode decode); | 
|  | 7 |  |  |  |  | 21140 |  | 
|  | 7 |  |  |  |  | 2171 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | # intentionally suppress import of guess_encoding() | 
| 14 | 7 |  |  | 7 |  | 7199 | use Encode::Guess qw(); | 
|  | 7 |  |  |  |  | 39514 |  | 
|  | 7 |  |  |  |  | 265 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 7 |  |  | 7 |  | 128 | use List::Util 	 qw(sum max); | 
|  | 7 |  |  |  |  | 14 |  | 
|  | 7 |  |  |  |  | 1091 |  | 
| 17 | 7 |  |  | 7 |  | 128 | use Scalar::Util qw(refaddr reftype blessed looks_like_number); | 
|  | 7 |  |  |  |  | 14 |  | 
|  | 7 |  |  |  |  | 874 |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 | 7 |  |  | 7 |  | 9262 | use autouse "Unicode::UCD" 	=> qw(charinfo); | 
|  | 7 |  |  |  |  | 5995 |  | 
|  | 7 |  |  |  |  | 48 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | ######################################################################## | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # forward definitions for functions in this module | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | sub known_encoding(_); | 
| 26 |  |  |  |  |  |  | sub debug; | 
| 27 |  |  |  |  |  |  | sub debugging(); | 
| 28 |  |  |  |  |  |  | sub pull_examples; | 
| 29 |  |  |  |  |  |  | sub str2nummistr(_); | 
| 30 |  |  |  |  |  |  | sub strnum_sort; | 
| 31 |  |  |  |  |  |  | sub uniq; | 
| 32 |  |  |  |  |  |  | sub uniquote(_); | 
| 33 |  |  |  |  |  |  | sub whoami(); | 
| 34 |  |  |  |  |  |  | sub whowasi(); | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | ######################################################################## | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | $| = 1; | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | our $VERSION = 0.03; | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | my @default_suspects = qw( | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | iso-8859-1 | 
| 45 |  |  |  |  |  |  | iso-8859-15 | 
| 46 |  |  |  |  |  |  | iso-8859-2 | 
| 47 |  |  |  |  |  |  | iso-8859-5 | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | cp1252 | 
| 50 |  |  |  |  |  |  | cp1250 | 
| 51 |  |  |  |  |  |  | cp1251 | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | MacRoman | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | ); | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | my %default_training_data; | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | ######################################################################## | 
| 60 |  |  |  |  |  |  | ######################################################################## | 
| 61 |  |  |  |  |  |  | ######################################################################## | 
| 62 |  |  |  |  |  |  | ## OO API FOLLOWS | 
| 63 |  |  |  |  |  |  | ######################################################################## | 
| 64 |  |  |  |  |  |  | ######################################################################## | 
| 65 |  |  |  |  |  |  | ######################################################################## | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | sub panic { | 
| 68 | 0 |  |  | 0 | 0 | 0 | confess "INTERNAL ERROR: @_"; | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | sub _validate_list_context() { | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 1650 |  |  | 1650 |  | 14958 | my ($package,     $filename,    $line, | 
| 74 |  |  |  |  |  |  | $subroutine,  $hasargs,     $wantarray, | 
| 75 |  |  |  |  |  |  | $evaltext,    $is_require, | 
| 76 |  |  |  |  |  |  | $hints,       $bitmask,     $hinthash) = caller(1); | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 1650 | 50 |  |  |  | 7948 | $wantarray			|| panic "wanted to be called in list context"; | 
| 79 |  |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | sub _validate_argc($$) { | 
| 82 | 452961 |  |  | 452961 |  | 603319 | my($have, $want) = @_; | 
| 83 | 452961 | 50 |  |  |  | 988320 | $have == $want 		|| panic "have $have arguments but wanted $want"; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub _validate_argc_min($$) { | 
| 87 | 750 |  |  | 750 |  | 1441 | my($have, $want) = @_; | 
| 88 | 750 | 50 |  |  |  | 2729 | $have >= $want 		|| panic "have $have arguments but wanted $want or more"; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub _validate_object_invocant { | 
| 92 | 295912 |  |  | 295912 |  | 390542 | my($self) = @_; | 
| 93 | 295912 | 50 |  |  |  | 1013232 | blessed($self) 		|| panic "object method call invoked at class method"; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | sub _validate_class_invocant { | 
| 97 | 6 |  |  | 6 |  | 17 | my($class) = @_; | 
| 98 | 6 | 50 |  |  |  | 51 | !blessed($class) 		|| panic "object method call invoked at class method"; | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | sub _validate_private_method() { | 
| 102 | 642342 | 50 |  | 642342 |  | 1567851 | caller(1) eq __PACKAGE__ 	|| panic "don't call private methods"; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | sub _validate_defined($) { | 
| 106 | 304871 |  |  | 304871 |  | 380856 | my($scalar) = @_; | 
| 107 | 304871 | 50 |  |  |  | 679912 | defined($scalar) 		|| panic "expected defined argument"; | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | sub _validate_nonref($) { | 
| 111 | 478319 |  |  | 478319 |  | 604870 | my($arg) = @_; | 
| 112 | 478319 | 50 |  |  |  | 1022176 | !ref($arg) 			|| panic "expected nonreference argument"; | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | sub _validate_known_encoding($) { | 
| 116 | 147625 |  |  | 147625 |  | 186027 | my($encoding) = @_; | 
| 117 | 147625 |  |  |  |  | 250239 | _validate_defined($encoding); | 
| 118 | 147625 |  |  |  |  | 234100 | _validate_nonref($encoding); | 
| 119 | 147625 | 50 |  |  |  | 268229 | known_encoding($encoding) 	|| panic "unknown encoding $encoding"; | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub _validate_numeric($) { | 
| 123 | 149795 |  |  | 149795 |  | 185687 | my($n) = @_; | 
| 124 | 149795 | 50 |  |  |  | 426618 | looks_like_number($n) 	|| panic "$n doesn't look like a number"; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub _validate_nonnumeric($) { | 
| 128 | 170 |  |  | 170 |  | 426 | my($n) = @_; | 
| 129 | 170 | 50 |  |  |  | 1010 | !looks_like_number($n) 	|| panic "$n doesn't look like a number"; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | sub _validate_nonnegative_integer($) { | 
| 133 | 8476 |  |  | 8476 |  | 8653 | my($int) = @_; | 
| 134 | 8476 |  |  |  |  | 11955 | _validate_nonref($int); | 
| 135 | 8476 | 50 |  |  |  | 24105 | $int =~ /^[0-9]+\z/ 	|| panic "expected positive integer, not $int"; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | sub _validate_positive_integer($) { | 
| 139 | 8022 |  |  | 8022 |  | 8151 | my($int) = @_; | 
| 140 | 8022 |  |  |  |  | 10931 | _validate_nonref($int); | 
| 141 | 8022 | 50 |  |  |  | 36683 | $int =~ /^[1-9][0-9]*\z/ 	|| panic "expected positive integer, not $int"; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | sub _validate_numeric_range($$$) { | 
| 145 | 149795 |  |  | 149795 |  | 223925 | my($n, $low, $high) = @_; | 
| 146 | 149795 |  |  |  |  | 265843 | _validate_numeric($n); | 
| 147 | 149795 | 50 | 33 |  |  | 633394 | $n >= $low && $n <= $high 	|| panic "expected $low <= $n <= $high"; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | sub _validate_reftype($$) { | 
| 151 | 432 |  |  | 432 |  | 899 | my($type, $arg) = @_; | 
| 152 | 432 | 50 |  |  |  | 2161 | reftype($arg) eq $type 	|| panic "expected reftype of $type"; | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | sub _validate_strlen($) { | 
| 156 | 157246 |  |  | 157246 |  | 187519 | my($string) = @_; | 
| 157 | 157246 |  |  |  |  | 237777 | _validate_defined($string); | 
| 158 | 157246 |  |  |  |  | 256604 | _validate_nonref($string); | 
| 159 | 157246 | 50 |  |  |  | 343232 | length($string) > 0 	|| panic "expected lengthier string"; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub _validate_no_wide_characters($) { | 
| 163 | 454 |  |  | 454 |  | 973 | my($str) = @_; | 
| 164 | 454 | 50 |  |  |  | 113104 | $str !~ /[^\x00-\xFF]/ 	|| panic "unexpected wide characters"; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | sub _validate_has_nonascii($) { | 
| 168 | 426 |  |  | 426 |  | 737 | my($str) = @_; | 
| 169 | 426 | 50 |  |  |  | 2267 | $str =~ /\P{ASCII}/ 	|| panic "expected non-ASCII in string"; | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | sub _validate_is_plainfile($) { | 
| 173 | 18 |  |  | 18 |  | 31 | my($path) = @_; | 
| 174 | 18 | 50 |  |  |  | 599 | -e $path 			|| panic "can't stat $path: $!"; | 
| 175 | 18 | 50 |  |  |  | 57 | -f _ 			|| panic "$path isn't a regular file"; | 
| 176 | 18 | 50 |  |  |  | 68 | -s _ 			|| panic "$path is empty"; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | ######################################################################## | 
| 180 |  |  |  |  |  |  | ######################################################################## | 
| 181 |  |  |  |  |  |  | ######################################################################## | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | # class constructor | 
| 185 |  |  |  |  |  |  | sub new :method { | 
| 186 |  |  |  |  |  |  |  | 
| 187 | 6 |  |  | 6 | 0 | 4233 | _validate_class_invocant(@_); | 
| 188 | 6 |  |  |  |  | 30 | _validate_argc(@_ => 1); | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 6 |  |  |  |  | 13 | my($class) = @_; | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 6 |  |  |  |  | 85 | my $self = { | 
| 193 |  |  |  |  |  |  | TRAINING_DATA => undef, | 
| 194 |  |  |  |  |  |  | SUSPECTS      => [ ], | 
| 195 |  |  |  |  |  |  | BYTES	      => [ ], | 
| 196 |  |  |  |  |  |  | REPORT 	      => { | 
| 197 |  |  |  |  |  |  | GUESSED_ENCODING	 => undef, | 
| 198 |  |  |  |  |  |  | TOTAL_HIGH_BYTES 	 => undef, | 
| 199 |  |  |  |  |  |  | DISTINCT_HIGH_BYTES  => undef, | 
| 200 |  |  |  |  |  |  | DATA_LENGTH_IN_BYTES => undef, | 
| 201 |  |  |  |  |  |  | SAMPLE		 => undef, | 
| 202 |  |  |  |  |  |  | ERROR		 => undef, | 
| 203 |  |  |  |  |  |  | AS_STRING		 => undef, | 
| 204 |  |  |  |  |  |  | }, | 
| 205 |  |  |  |  |  |  | }; | 
| 206 |  |  |  |  |  |  |  | 
| 207 | 6 |  |  |  |  | 25 | bless $self, $class; | 
| 208 |  |  |  |  |  |  |  | 
| 209 | 6 |  |  |  |  | 39 | $self->set_training_data( $class->get_training_data() ); | 
| 210 | 6 |  |  |  |  | 877 | $self->set_suspects( $class->get_suspects() ); | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 6 |  |  |  |  | 51 | return $self; | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | sub _clear_report { | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 170 |  |  | 170 |  | 1047 | _validate_argc(@_ => 1); | 
| 218 | 170 |  |  |  |  | 427 | _validate_object_invocant(@_); | 
| 219 | 170 |  |  |  |  | 402 | _validate_private_method(); | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 170 |  |  |  |  | 295 | my $self = shift(); | 
| 222 |  |  |  |  |  |  |  | 
| 223 | 170 |  |  |  |  | 287 | for my $field (keys %{ $self->{REPORT} }) { | 
|  | 170 |  |  |  |  | 1225 |  | 
| 224 | 1346 |  |  |  |  | 2831 | $self->{REPORT}{$field} = undef; | 
| 225 |  |  |  |  |  |  | } | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | sub enable_debugging { | 
| 230 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 2); | 
| 231 | 0 |  |  |  |  | 0 | my($self, $bool) = @_; | 
| 232 | 0 | 0 |  |  |  | 0 | our $DEBUG = $bool ? 1 : 0; | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | sub get_guessed_encoding :method { | 
| 236 |  |  |  |  |  |  |  | 
| 237 | 142 |  |  | 142 | 0 | 373 | _validate_argc(@_ => 1); | 
| 238 | 142 |  |  |  |  | 463 | _validate_object_invocant(@_); | 
| 239 |  |  |  |  |  |  |  | 
| 240 | 142 |  |  |  |  | 370 | my($self) = @_; | 
| 241 | 142 |  |  |  |  | 1810 | return $self->{REPORT}{GUESSED_ENCODING}; | 
| 242 |  |  |  |  |  |  | } | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | sub get_report_distinct_high_bytes :method { | 
| 245 |  |  |  |  |  |  |  | 
| 246 | 142 |  |  | 142 | 0 | 508 | _validate_argc(@_ => 1); | 
| 247 | 142 |  |  |  |  | 423 | _validate_object_invocant(@_); | 
| 248 |  |  |  |  |  |  |  | 
| 249 | 142 |  |  |  |  | 280 | my $self = shift(); | 
| 250 | 142 |  |  |  |  | 1370 | return $self->{REPORT}{DISTINCT_HIGH_BYTES}; | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub get_report_data_length :method { | 
| 254 |  |  |  |  |  |  |  | 
| 255 | 142 |  |  | 142 | 0 | 442 | _validate_argc(@_ => 1); | 
| 256 | 142 |  |  |  |  | 407 | _validate_object_invocant(@_); | 
| 257 |  |  |  |  |  |  |  | 
| 258 | 142 |  |  |  |  | 281 | my $self = shift(); | 
| 259 | 142 |  |  |  |  | 631 | return $self->{REPORT}{DATA_LENGTH_IN_BYTES}; | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | sub get_report_total_high_bytes { | 
| 263 |  |  |  |  |  |  |  | 
| 264 | 284 |  |  | 284 | 0 | 865 | _validate_argc(@_ => 1); | 
| 265 | 284 |  |  |  |  | 656 | _validate_object_invocant(@_); | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 284 |  |  |  |  | 573 | my $self = shift(); | 
| 268 | 284 |  |  |  |  | 1525 | return $self->{REPORT}{TOTAL_HIGH_BYTES}; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | sub get_errmsg { | 
| 272 |  |  |  |  |  |  |  | 
| 273 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 1); | 
| 274 | 0 |  |  |  |  | 0 | _validate_object_invocant(@_); | 
| 275 |  |  |  |  |  |  |  | 
| 276 | 0 |  |  |  |  | 0 | my $self = shift(); | 
| 277 | 0 |  |  |  |  | 0 | return $self->{REPORT}{ERROR}; | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | sub get_report_sample { | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 284 |  |  | 284 | 0 | 918 | _validate_argc(@_ => 1); | 
| 284 | 284 |  |  |  |  | 677 | _validate_object_invocant(@_); | 
| 285 |  |  |  |  |  |  |  | 
| 286 | 284 |  |  |  |  | 610 | my $self = shift(); | 
| 287 | 284 |  |  |  |  | 1035 | return $self->{REPORT}{SAMPLE}; | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | sub _set_guessed_encoding :method { | 
| 292 |  |  |  |  |  |  |  | 
| 293 | 640 |  |  | 640 |  | 1559 | _validate_argc(@_ => 2); | 
| 294 | 640 |  |  |  |  | 1418 | _validate_object_invocant(@_); | 
| 295 | 640 |  |  |  |  | 1222 | _validate_private_method(); | 
| 296 |  |  |  |  |  |  |  | 
| 297 | 640 |  |  |  |  | 1057 | my($self, $encoding) = @_; | 
| 298 |  |  |  |  |  |  |  | 
| 299 | 640 |  |  |  |  | 1320 | _validate_known_encoding($encoding); | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 640 |  |  |  |  | 2322 | $self->{REPORT}{GUESSED_ENCODING} = $encoding; | 
| 302 |  |  |  |  |  |  | } | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | sub _set_report_data_length :method { | 
| 305 |  |  |  |  |  |  |  | 
| 306 | 170 |  |  | 170 |  | 476 | _validate_argc(@_ => 2); | 
| 307 | 170 |  |  |  |  | 444 | _validate_object_invocant(@_); | 
| 308 | 170 |  |  |  |  | 410 | _validate_private_method(); | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 170 |  |  |  |  | 309 | my($self, $bytecount) = @_; | 
| 311 |  |  |  |  |  |  |  | 
| 312 | 170 |  |  |  |  | 522 | _validate_nonnegative_integer($bytecount); | 
| 313 |  |  |  |  |  |  |  | 
| 314 | 170 |  |  |  |  | 400 | $self->{REPORT}{DATA_LENGTH_IN_BYTES} = $bytecount; | 
| 315 |  |  |  |  |  |  | } | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | sub _set_report_distinct_high_bytes { | 
| 318 |  |  |  |  |  |  |  | 
| 319 | 142 |  |  | 142 |  | 400 | _validate_argc(@_ => 2); | 
| 320 | 142 |  |  |  |  | 344 | _validate_object_invocant(@_); | 
| 321 | 142 |  |  |  |  | 310 | _validate_private_method(); | 
| 322 |  |  |  |  |  |  |  | 
| 323 | 142 |  |  |  |  | 263 | my($self, $bytecount) = @_; | 
| 324 |  |  |  |  |  |  |  | 
| 325 | 142 |  |  |  |  | 475 | _validate_nonnegative_integer($bytecount); | 
| 326 |  |  |  |  |  |  |  | 
| 327 | 142 |  |  |  |  | 420 | $self->{REPORT}{DISTINCT_HIGH_BYTES} = $bytecount; | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | sub _set_report_total_high_bytes { | 
| 332 |  |  |  |  |  |  |  | 
| 333 | 142 |  |  | 142 |  | 412 | _validate_argc(@_ => 2); | 
| 334 | 142 |  |  |  |  | 343 | _validate_object_invocant(@_); | 
| 335 | 142 |  |  |  |  | 349 | _validate_private_method(); | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 142 |  |  |  |  | 306 | my($self, $bytecount) = @_; | 
| 338 |  |  |  |  |  |  |  | 
| 339 | 142 |  |  |  |  | 483 | _validate_nonnegative_integer($bytecount); | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 142 |  |  |  |  | 440 | $self->{REPORT}{TOTAL_HIGH_BYTES} = $bytecount; | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | } | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | sub _set_errmsg { | 
| 346 | 0 |  |  | 0 |  | 0 | _validate_argc(@_ => 2); | 
| 347 | 0 |  |  |  |  | 0 | _validate_object_invocant(@_); | 
| 348 | 0 |  |  |  |  | 0 | _validate_private_method(); | 
| 349 |  |  |  |  |  |  |  | 
| 350 | 0 |  |  |  |  | 0 | my($self, $msg) = @_; | 
| 351 |  |  |  |  |  |  |  | 
| 352 | 0 |  |  |  |  | 0 | _validate_strlen($msg); | 
| 353 |  |  |  |  |  |  |  | 
| 354 | 0 |  |  |  |  | 0 | $self->{REPORT}{ERROR} = $msg; | 
| 355 |  |  |  |  |  |  | } | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | sub _set_report_sample { | 
| 358 | 142 |  |  | 142 |  | 1009 | _validate_argc(@_ => 2); | 
| 359 | 142 |  |  |  |  | 779 | _validate_object_invocant(@_); | 
| 360 | 142 |  |  |  |  | 721 | _validate_private_method(); | 
| 361 |  |  |  |  |  |  |  | 
| 362 | 142 |  |  |  |  | 403 | my($self, $sample) = @_; | 
| 363 |  |  |  |  |  |  |  | 
| 364 | 142 |  |  |  |  | 601 | _validate_strlen($sample); | 
| 365 | 142 |  |  |  |  | 593 | _validate_has_nonascii($sample); | 
| 366 |  |  |  |  |  |  |  | 
| 367 | 142 |  |  |  |  | 8534 | $self->{REPORT}{SAMPLE} = $sample; | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  |  | 
| 370 |  |  |  |  |  |  | sub get_byte_table :method { | 
| 371 | 145765 |  |  | 145765 | 0 | 305432 | _validate_argc(@_ => 1); | 
| 372 | 145765 |  |  |  |  | 286134 | _validate_object_invocant(@_); | 
| 373 |  |  |  |  |  |  |  | 
| 374 | 145765 |  |  |  |  | 211796 | my($self) = @_; | 
| 375 | 145765 |  |  |  |  | 286122 | return $self->{BYTES}; | 
| 376 |  |  |  |  |  |  | } | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | sub _reset_byte_table :method { | 
| 379 | 6 |  |  | 6 |  | 33 | _validate_argc(@_ => 1); | 
| 380 | 6 |  |  |  |  | 30 | _validate_object_invocant(@_); | 
| 381 | 6 |  |  |  |  | 28 | _validate_private_method(); | 
| 382 |  |  |  |  |  |  |  | 
| 383 | 6 |  |  |  |  | 12 | my($self) = @_; | 
| 384 | 6 |  |  |  |  | 32 | $self->{BYTES} = [ ]; | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | sub set_training_data :method { | 
| 388 | 6 |  |  | 6 | 0 | 28 | _validate_argc(@_ => 2); | 
| 389 | 6 |  |  |  |  | 37 | _validate_object_invocant(@_); | 
| 390 |  |  |  |  |  |  |  | 
| 391 | 6 |  |  |  |  | 13 | my($self,$scores) = @_; | 
| 392 |  |  |  |  |  |  |  | 
| 393 | 6 |  |  |  |  | 26 | _validate_reftype(HASH => $scores); | 
| 394 |  |  |  |  |  |  |  | 
| 395 | 6 |  |  |  |  | 10 | my $keycount = 0; | 
| 396 | 6 |  |  |  |  | 30 | while (my($k, $v) = each %$scores) { | 
| 397 | 8022 |  |  |  |  | 7814 | $keycount++; | 
| 398 | 8022 |  |  |  |  | 11228 | _validate_nonnegative_integer($k); | 
| 399 | 8022 |  |  |  |  | 11224 | _validate_positive_integer($v); | 
| 400 |  |  |  |  |  |  | } | 
| 401 |  |  |  |  |  |  |  | 
| 402 | 6 | 50 |  |  |  | 40 | $keycount >   0 | 
| 403 |  |  |  |  |  |  | || croak "no training data"; | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | # e.g., Latin1 has 96 high-byte code points + 32 from the C1 control set | 
| 406 | 6 | 50 |  |  |  | 24 | $keycount >  90 | 
| 407 |  |  |  |  |  |  | || carp  "not much training data"; | 
| 408 |  |  |  |  |  |  |  | 
| 409 | 6 |  |  |  |  | 87 | $self->{TRAINING_DATA} = $scores; | 
| 410 | 6 |  |  |  |  | 42 | $self->_reset_byte_table();   # new training set invalidates old cache | 
| 411 |  |  |  |  |  |  | } | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | sub get_training_data :method { | 
| 414 | 58 |  |  | 58 | 0 | 306 | _validate_argc(@_ => 1); | 
| 415 |  |  |  |  |  |  |  | 
| 416 | 58 |  |  |  |  | 112 | my($self) = @_; | 
| 417 |  |  |  |  |  |  |  | 
| 418 | 58 | 100 |  |  |  | 242 | if (blessed($self)) { | 
| 419 | 52 |  |  |  |  | 150 | return $self->{TRAINING_DATA}; | 
| 420 |  |  |  |  |  |  | } else { | 
| 421 |  |  |  |  |  |  | # yes, this is supposed to be a copy | 
| 422 | 6 |  |  |  |  | 5300 | return { %default_training_data }; | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | } | 
| 426 |  |  |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | sub get_suspects :method { | 
| 428 | 785 |  |  | 785 | 0 | 3489 | _validate_argc(@_ => 1); | 
| 429 |  |  |  |  |  |  |  | 
| 430 | 785 |  |  |  |  | 1328 | my($self) = @_; | 
| 431 |  |  |  |  |  |  |  | 
| 432 | 785 | 100 |  |  |  | 2885 | if (blessed($self)) { | 
| 433 |  |  |  |  |  |  | return wantarray | 
| 434 | 754 |  |  |  |  | 4178 | ?   @{ $self->{SUSPECTS} } | 
|  | 0 |  |  |  |  | 0 |  | 
| 435 | 754 | 50 |  |  |  | 1726 | : [ @{ $self->{SUSPECTS} } ]; | 
| 436 |  |  |  |  |  |  | } else { | 
| 437 |  |  |  |  |  |  | return wantarray | 
| 438 |  |  |  |  |  |  | ?   @default_suspects | 
| 439 | 31 | 50 |  |  |  | 258 | : [ @default_suspects ]; | 
| 440 |  |  |  |  |  |  | } | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | } | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | sub set_suspects :method { | 
| 445 |  |  |  |  |  |  |  | 
| 446 | 136 |  |  | 136 | 0 | 260531 | _validate_argc_min(@_ => 2); | 
| 447 | 136 |  |  |  |  | 435 | _validate_object_invocant(@_); | 
| 448 |  |  |  |  |  |  |  | 
| 449 | 136 |  |  |  |  | 691 | my($self,@suspects) = @_; | 
| 450 | 136 |  |  |  |  | 328 | for my $enc (@suspects) { | 
| 451 | 1218 |  |  |  |  | 2290 | _validate_known_encoding($enc); | 
| 452 | 1218 |  |  |  |  | 3211 | $self->_encache(known_encoding($enc)); | 
| 453 |  |  |  |  |  |  | } | 
| 454 | 136 |  |  |  |  | 1010 | $self->{SUSPECTS} = \@suspects; | 
| 455 |  |  |  |  |  |  | } | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | sub add_suspects :method { | 
| 458 |  |  |  |  |  |  |  | 
| 459 | 2 |  |  | 2 | 0 | 17 | _validate_argc_min(@_ => 2); | 
| 460 | 2 |  |  |  |  | 7 | _validate_object_invocant(@_); | 
| 461 |  |  |  |  |  |  |  | 
| 462 | 2 |  |  |  |  | 8 | my($self,@suspects) = @_; | 
| 463 | 2 |  |  |  |  | 6 | for my $enc (@suspects) { | 
| 464 | 2 |  |  |  |  | 6 | _validate_known_encoding($enc); | 
| 465 | 2 |  |  |  |  | 7 | $self->_encache(known_encoding($enc)); | 
| 466 |  |  |  |  |  |  | } | 
| 467 | 2 |  |  |  |  | 6 | unshift @{ $self->{SUSPECTS} }, @suspects; | 
|  | 2 |  |  |  |  | 11 |  | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | ########## | 
| 471 |  |  |  |  |  |  | # USAGE: | 
| 472 |  |  |  |  |  |  | #   $weight = $self->_get_byte_weight($encoding, $byte) | 
| 473 |  |  |  |  |  |  | # | 
| 474 |  |  |  |  |  |  | # Mostly this method exists so we put all the sanity checks in one place. | 
| 475 |  |  |  |  |  |  | # | 
| 476 |  |  |  |  |  |  | sub _get_byte_weight :method { | 
| 477 |  |  |  |  |  |  |  | 
| 478 | 137889 |  |  | 137889 |  | 261554 | _validate_argc(@_ => 3); | 
| 479 | 137889 |  |  |  |  | 227183 | _validate_private_method(); | 
| 480 | 137889 |  |  |  |  | 248485 | _validate_object_invocant(@_); | 
| 481 |  |  |  |  |  |  |  | 
| 482 | 137889 |  |  |  |  | 231728 | my($self, $encoding, $byte) = @_; | 
| 483 |  |  |  |  |  |  |  | 
| 484 | 137889 |  |  |  |  | 237910 | _validate_known_encoding($encoding); | 
| 485 | 137889 |  |  |  |  | 314504 | _validate_numeric_range($byte, 128, 255); | 
| 486 |  |  |  |  |  |  |  | 
| 487 | 137889 |  |  |  |  | 169743 | $byte &= 127; | 
| 488 | 137889 |  |  |  |  | 277519 | my $bt = $self->get_byte_table; | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 137889 | 50 |  |  |  | 316644 | croak "missing table for byte 128+$byte" unless $bt->[$byte]; | 
| 491 | 137889 | 50 |  |  |  | 321026 | croak "missing encoding entry for $encoding at byte 128+$byte" | 
| 492 |  |  |  |  |  |  | unless exists $bt->[$byte]{$encoding}; | 
| 493 |  |  |  |  |  |  |  | 
| 494 | 137889 |  |  |  |  | 219295 | my $weight = $bt->[$byte]{$encoding}; | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 137889 |  |  |  |  | 253731 | return $weight; | 
| 497 |  |  |  |  |  |  | } | 
| 498 |  |  |  |  |  |  |  | 
| 499 |  |  |  |  |  |  | ########## | 
| 500 |  |  |  |  |  |  | # USAGE: $self->_set_byte_weight($encoding, $byte, $weight) | 
| 501 |  |  |  |  |  |  | # (private method call from within another object method) | 
| 502 |  |  |  |  |  |  | # | 
| 503 |  |  |  |  |  |  | # Mostly this method exists so we can log it for debugging. | 
| 504 |  |  |  |  |  |  | # | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | sub _set_byte_weight :method { | 
| 507 |  |  |  |  |  |  |  | 
| 508 | 6656 |  |  | 6656 |  | 12476 | _validate_argc(@_ => 4); | 
| 509 | 6656 |  |  |  |  | 13953 | _validate_private_method(); | 
| 510 | 6656 |  |  |  |  | 14134 | _validate_object_invocant(@_); | 
| 511 |  |  |  |  |  |  |  | 
| 512 | 6656 |  |  |  |  | 11734 | my($self, $encoding, $byte, $weight) = @_; | 
| 513 |  |  |  |  |  |  |  | 
| 514 | 6656 |  |  |  |  | 10344 | _validate_known_encoding($encoding); | 
| 515 | 6656 |  |  |  |  | 16896 | _validate_numeric_range($byte, 128, 255); | 
| 516 |  |  |  |  |  |  |  | 
| 517 | 6656 | 100 |  |  |  | 10368 | if (defined $weight) { | 
| 518 | 5250 |  |  |  |  | 9198 | _validate_numeric_range($weight, 0.0, 1.0); | 
| 519 | 5250 |  |  |  |  | 13550 | debug("enc $encoding %02X => %e", $byte, $weight); | 
| 520 |  |  |  |  |  |  | } else { | 
| 521 | 1406 |  |  |  |  | 4558 | debug("enc $encoding %02X => impossible", $byte); | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  |  | 
| 524 | 6656 |  |  |  |  | 15699 | my $bt = $self->get_byte_table; | 
| 525 | 6656 | 50 | 33 |  |  | 34004 | croak "bad byte table" unless $bt && reftype($bt) eq "ARRAY"; | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 6656 |  |  |  |  | 7415 | $byte &= 127;  # we only care about high bytes | 
| 528 |  |  |  |  |  |  |  | 
| 529 | 6656 | 50 |  |  |  | 20158 | if (exists $bt->[$byte]{$encoding}) { | 
| 530 |  |  |  |  |  |  | ##my $oldval = $bt->[$byte]{$encoding}; | 
| 531 | 0 |  |  |  |  | 0 | carp sprintf "byte %02X already has a slot allocated to it", ($byte|128); | 
| 532 |  |  |  |  |  |  | } | 
| 533 | 6656 |  |  |  |  | 27957 | $bt->[$byte]{$encoding} = $weight; | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | } | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | sub guess_file_encoding :method { | 
| 538 |  |  |  |  |  |  |  | 
| 539 | 18 |  |  | 18 | 0 | 13466 | _validate_argc(@_ => 2); | 
| 540 | 18 |  |  |  |  | 44 | _validate_object_invocant(@_); | 
| 541 |  |  |  |  |  |  |  | 
| 542 | 18 |  |  |  |  | 37 | my($self, $filename) = @_; | 
| 543 |  |  |  |  |  |  |  | 
| 544 | 18 |  |  |  |  | 54 | _validate_is_plainfile($filename); | 
| 545 |  |  |  |  |  |  |  | 
| 546 | 18 | 50 |  |  |  | 833 | open(my $fh, "<", $filename) 		    || croak "can't open < $filename: $!"; | 
| 547 | 18 | 50 |  |  |  | 75 | binmode($fh)				    || croak "can't binmode $filename: $!"; | 
| 548 |  |  |  |  |  |  |  | 
| 549 | 18 |  |  |  |  | 25 | my $contents = do { | 
| 550 | 18 |  |  |  |  | 72 | local $/;   # "slurp" mode (read till eof) | 
| 551 | 18 |  |  |  |  | 2453 | <$fh>; | 
| 552 |  |  |  |  |  |  | }; | 
| 553 | 18 | 50 |  |  |  | 508 | close($fh)					    || croak "can't close $filename: $!"; | 
| 554 |  |  |  |  |  |  |  | 
| 555 | 18 | 50 |  |  |  | 43 | croak "bad read from $filename: $!"	    	    unless defined($contents); | 
| 556 | 18 | 50 |  |  |  | 51 | croak "empty read from nonempty $filename"	    unless length($contents); | 
| 557 |  |  |  |  |  |  |  | 
| 558 | 18 |  |  |  |  | 76 | debug("guessing encoding of $filename"); | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 18 |  |  |  |  | 107 | return $self->guess_data_encoding($contents); | 
| 561 |  |  |  |  |  |  | } | 
| 562 |  |  |  |  |  |  |  | 
| 563 |  |  |  |  |  |  | sub guess_data_encoding :method { | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 170 |  |  | 170 | 0 | 22999 | _validate_argc(@_ => 2); | 
| 566 | 170 |  |  |  |  | 509 | _validate_object_invocant(@_); | 
| 567 |  |  |  |  |  |  |  | 
| 568 | 170 |  |  |  |  | 389 | my($self, $data) = @_; | 
| 569 |  |  |  |  |  |  |  | 
| 570 | 170 |  |  |  |  | 411 | _validate_strlen($data); | 
| 571 | 170 |  |  |  |  | 477 | _validate_no_wide_characters($data); | 
| 572 |  |  |  |  |  |  |  | 
| 573 | 170 |  |  |  |  | 822 | $self->_clear_report(); | 
| 574 |  |  |  |  |  |  |  | 
| 575 | 170 |  |  |  |  | 959 | $self->_set_report_data_length(length($data)); | 
| 576 |  |  |  |  |  |  |  | 
| 577 |  |  |  |  |  |  | # Faster to check for a single non-ASCII | 
| 578 |  |  |  |  |  |  | # than to validate whole thing is ASCII only. | 
| 579 | 170 | 50 |  |  |  | 3379 | unless ($data =~ /\P{ASCII}/) { | 
| 580 | 0 |  |  |  |  | 0 | $self->_set_guessed_encoding("ascii"); | 
| 581 | 0 |  |  |  |  | 0 | $self->_set_report_string("input contains only ascii"); | 
| 582 | 0 |  |  |  |  | 0 | return "ascii"; | 
| 583 |  |  |  |  |  |  | } | 
| 584 |  |  |  |  |  |  |  | 
| 585 | 170 | 50 |  |  |  | 947 | if (my $decoder = Encode::Guess::guess_encoding($data)) { | 
| 586 | 170 | 100 |  |  |  | 287080 | if (ref $decoder) { | 
| 587 | 28 |  |  |  |  | 150 | my $enc = $decoder->name; | 
| 588 | 28 |  |  |  |  | 172 | $self->_set_guessed_encoding($enc); | 
| 589 | 28 |  |  |  |  | 49 | my $reason = "guess from Encode::Guess::guess_encoding"; | 
| 590 | 28 |  |  |  |  | 101 | $self->_set_report_string($reason); | 
| 591 |  |  |  |  |  |  |  | 
| 592 | 28 |  |  |  |  | 105 | return $enc; | 
| 593 |  |  |  |  |  |  | } else { | 
| 594 | 142 |  |  |  |  | 827 | debug("Encode::Guess::guess_encoding failed with: %s", $decoder); | 
| 595 |  |  |  |  |  |  | } | 
| 596 |  |  |  |  |  |  | } | 
| 597 |  |  |  |  |  |  |  | 
| 598 | 142 |  |  |  |  | 8438 | my @encodings = $self->get_suspects(); | 
| 599 |  |  |  |  |  |  |  | 
| 600 | 142 |  |  |  |  | 501 | @encodings = map { known_encoding } @encodings; | 
|  | 1278 |  |  |  |  | 2409 |  | 
| 601 |  |  |  |  |  |  |  | 
| 602 | 142 |  |  |  |  | 417 | my %scores = (); | 
| 603 | 142 |  |  |  |  | 404 | my %impossible = (); | 
| 604 |  |  |  |  |  |  |  | 
| 605 | 142 |  |  |  |  | 313 | for (@encodings) { $scores{$_} = 0 } | 
|  | 1278 |  |  |  |  | 2681 |  | 
| 606 |  |  |  |  |  |  |  | 
| 607 | 142 |  |  |  |  | 415 | for ($data) { | 
| 608 | 142 |  |  |  |  | 1418 | while (/([\x80-\xFF])/g) { | 
| 609 | 15321 |  |  |  |  | 48269 | my $byte_ord = ord $1; | 
| 610 | 15321 |  |  |  |  | 26861 | debug("Checking byte table for %02X\n", $byte_ord); | 
| 611 | 15321 |  |  |  |  | 25394 | for my $enc (@encodings) { | 
| 612 | 137889 |  |  |  |  | 308528 | my $worth = $self->_get_byte_weight($enc, $byte_ord); | 
| 613 | 137889 | 100 |  |  |  | 248658 | if (defined $worth) { | 
| 614 | 118000 |  |  |  |  | 258575 | debug("   %-12s %e += %e", $enc, $scores{$enc}, $worth); | 
| 615 | 118000 |  |  |  |  | 356181 | $scores{$enc} += $worth; | 
| 616 |  |  |  |  |  |  | } else { | 
| 617 | 19889 |  |  |  |  | 35007 | debug("   %-12s cannot have byte %02X\n", $enc, $byte_ord); | 
| 618 | 19889 |  |  |  |  | 41420 | $impossible{$enc}++; | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  | } | 
| 621 |  |  |  |  |  |  | } | 
| 622 |  |  |  |  |  |  | } | 
| 623 | 142 |  |  |  |  | 709 | for my $bogus_enc (keys %impossible) { | 
| 624 | 469 |  |  |  |  | 945 | delete $scores{$bogus_enc}; | 
| 625 |  |  |  |  |  |  | } | 
| 626 |  |  |  |  |  |  |  | 
| 627 | 142 |  |  |  |  | 822 | $self->_set_report_scores(\%scores); | 
| 628 |  |  |  |  |  |  |  | 
| 629 | 142 |  |  |  |  | 675 | $self->_set_examples_from_data($data); | 
| 630 |  |  |  |  |  |  |  | 
| 631 | 142 | 50 |  |  |  | 882 | panic "no high bytes" unless $self->get_report_total_high_bytes(); | 
| 632 |  |  |  |  |  |  |  | 
| 633 | 142 |  |  |  |  | 759 | $self->_evaluate_scores(); | 
| 634 |  |  |  |  |  |  |  | 
| 635 | 142 |  |  |  |  | 681 | return $self->get_guessed_encoding; | 
| 636 |  |  |  |  |  |  |  | 
| 637 |  |  |  |  |  |  | } | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  | sub _get_report_string { | 
| 640 |  |  |  |  |  |  |  | 
| 641 | 5 |  |  | 5 |  | 20 | _validate_argc(@_ => 1); | 
| 642 | 5 |  |  |  |  | 19 | _validate_private_method(); | 
| 643 | 5 |  |  |  |  | 14 | _validate_object_invocant(@_); | 
| 644 |  |  |  |  |  |  |  | 
| 645 | 5 |  |  |  |  | 10 | my $self = shift(); | 
| 646 | 5 |  |  |  |  | 34 | return $self->{REPORT}{AS_STRING}; | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  |  | 
| 649 |  |  |  |  |  |  | sub _set_report_string { | 
| 650 |  |  |  |  |  |  |  | 
| 651 | 170 |  |  | 170 |  | 635 | _validate_argc(@_ => 2); | 
| 652 | 170 |  |  |  |  | 399 | _validate_private_method(); | 
| 653 | 170 |  |  |  |  | 368 | _validate_object_invocant(@_); | 
| 654 |  |  |  |  |  |  |  | 
| 655 | 170 |  |  |  |  | 619 | my($self, $report) = @_; | 
| 656 |  |  |  |  |  |  |  | 
| 657 | 170 |  |  |  |  | 354 | _validate_strlen($report); | 
| 658 | 170 |  |  |  |  | 563 | _validate_nonnumeric($report); | 
| 659 |  |  |  |  |  |  |  | 
| 660 | 170 |  |  |  |  | 644 | $self->{REPORT}{AS_STRING} = $report; | 
| 661 |  |  |  |  |  |  | } | 
| 662 |  |  |  |  |  |  |  | 
| 663 |  |  |  |  |  |  |  | 
| 664 |  |  |  |  |  |  | sub get_short_report { | 
| 665 |  |  |  |  |  |  |  | 
| 666 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 1); | 
| 667 | 0 |  |  |  |  | 0 | _validate_object_invocant(@_); | 
| 668 |  |  |  |  |  |  |  | 
| 669 | 0 |  |  |  |  | 0 | my($self) = @_; | 
| 670 |  |  |  |  |  |  |  | 
| 671 | 0 |  |  |  |  | 0 | my $reason = $self->_get_report_string(); | 
| 672 |  |  |  |  |  |  |  | 
| 673 |  |  |  |  |  |  | # this only makes sense for our own report, not the one from E:G::guess_encoding() | 
| 674 | 0 |  |  |  |  | 0 | for ($reason) { | 
| 675 | 0 |  |  |  |  | 0 | s/\A.*bytes=\d+\n//; | 
| 676 | 0 |  |  |  |  | 0 | s/\n\h+(\S+\h+)? => ".*"$//gm; | 
| 677 |  |  |  |  |  |  | } | 
| 678 | 0 |  |  |  |  | 0 | return $reason; | 
| 679 |  |  |  |  |  |  | } | 
| 680 |  |  |  |  |  |  |  | 
| 681 |  |  |  |  |  |  |  | 
| 682 |  |  |  |  |  |  | sub get_long_report { | 
| 683 | 5 |  |  | 5 | 0 | 22610 | _validate_argc(@_ => 1); | 
| 684 | 5 |  |  |  |  | 20 | _validate_object_invocant(@_); | 
| 685 |  |  |  |  |  |  |  | 
| 686 | 5 |  |  |  |  | 14 | my($self) = @_; | 
| 687 | 5 |  |  |  |  | 100 | my $reason = $self->_get_report_string(); | 
| 688 | 5 |  |  |  |  | 22 | return $reason; | 
| 689 |  |  |  |  |  |  | } | 
| 690 |  |  |  |  |  |  |  | 
| 691 |  |  |  |  |  |  | sub _set_report_scores { | 
| 692 | 142 |  |  | 142 |  | 474 | _validate_argc(@_ => 2); | 
| 693 | 142 |  |  |  |  | 327 | _validate_private_method(); | 
| 694 | 142 |  |  |  |  | 367 | _validate_object_invocant(@_); | 
| 695 |  |  |  |  |  |  |  | 
| 696 | 142 |  |  |  |  | 286 | my($self,$scores) = @_; | 
| 697 | 142 |  |  |  |  | 480 | _validate_reftype(HASH => $scores); | 
| 698 |  |  |  |  |  |  |  | 
| 699 | 142 |  |  |  |  | 471 | $self->{REPORT}{SCORES} = $scores; | 
| 700 |  |  |  |  |  |  | } | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  | sub get_report_scores { | 
| 703 | 284 |  |  | 284 | 0 | 811 | _validate_argc(@_ => 1); | 
| 704 | 284 |  |  |  |  | 683 | _validate_object_invocant(@_); | 
| 705 |  |  |  |  |  |  |  | 
| 706 | 284 |  |  |  |  | 537 | my($self) = @_; | 
| 707 | 284 |  |  |  |  | 1010 | return $self->{REPORT}{SCORES}; | 
| 708 |  |  |  |  |  |  | } | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | sub _pick_winner :method { | 
| 711 |  |  |  |  |  |  |  | 
| 712 | 142 |  |  | 142 |  | 412 | _validate_argc(@_ => 1); | 
| 713 | 142 |  |  |  |  | 353 | _validate_private_method(); | 
| 714 | 142 |  |  |  |  | 303 | _validate_object_invocant(@_); | 
| 715 |  |  |  |  |  |  |  | 
| 716 | 142 |  |  |  |  | 290 | my($self) = @_; | 
| 717 |  |  |  |  |  |  |  | 
| 718 | 142 |  |  |  |  | 438 | my $scores_ref = $self->get_report_scores(); | 
| 719 | 142 |  |  |  |  | 438 | my $samples    = $self->get_report_sample(); | 
| 720 |  |  |  |  |  |  |  | 
| 721 | 142 |  |  |  |  | 371 | _validate_reftype(HASH => $scores_ref); | 
| 722 |  |  |  |  |  |  |  | 
| 723 | 142 |  |  |  |  | 281 | _validate_strlen($samples); | 
| 724 | 142 |  |  |  |  | 337 | _validate_no_wide_characters($samples); | 
| 725 | 142 |  |  |  |  | 371 | _validate_has_nonascii($samples); | 
| 726 |  |  |  |  |  |  |  | 
| 727 | 142 |  |  |  |  | 527 | my @values = uniq values %$scores_ref; | 
| 728 | 142 |  |  |  |  | 17187 | my $high_score = max @values; | 
| 729 | 142 |  |  |  |  | 483 | my @candidates = grep { $scores_ref->{$_} == $high_score } keys %$scores_ref; | 
|  | 677 |  |  |  |  | 1563 |  | 
| 730 | 142 |  |  |  |  | 476 | @candidates = $self->_sort_encodings_by_priority(@candidates); | 
| 731 | 142 |  |  |  |  | 333 | my @converts = uniq map { decode($_, $samples, Encode::FB_XMLCREF | Encode::LEAVE_SRC) } @candidates; | 
|  | 172 |  |  |  |  | 1525 |  | 
| 732 | 142 | 50 |  |  |  | 392 | if (@converts == 1) { | 
| 733 | 142 |  |  |  |  | 384 | my $winner = $candidates[0]; | 
| 734 | 142 |  |  |  |  | 454 | $self->_set_guessed_encoding($winner); | 
| 735 | 142 |  |  |  |  | 609 | return $winner; | 
| 736 |  |  |  |  |  |  | } else { | 
| 737 | 0 |  |  |  |  | 0 | $self->_set_errmsg("tied scores have different Unicode conversions"); | 
| 738 | 0 |  |  |  |  | 0 | return undef; | 
| 739 |  |  |  |  |  |  | } | 
| 740 |  |  |  |  |  |  |  | 
| 741 |  |  |  |  |  |  | } | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | sub _evaluate_scores :method { | 
| 744 |  |  |  |  |  |  |  | 
| 745 | 142 |  |  | 142 |  | 520 | _validate_argc(@_ => 1); | 
| 746 | 142 |  |  |  |  | 361 | _validate_private_method(); | 
| 747 | 142 |  |  |  |  | 353 | _validate_object_invocant(@_); | 
| 748 |  |  |  |  |  |  |  | 
| 749 | 142 |  |  |  |  | 317 | my($self) = @_; | 
| 750 |  |  |  |  |  |  |  | 
| 751 | 142 |  |  |  |  | 715 | my $scores_ref = $self->get_report_scores(); | 
| 752 | 142 |  |  |  |  | 562 | _validate_reftype(HASH => $scores_ref); | 
| 753 |  |  |  |  |  |  |  | 
| 754 | 142 |  |  |  |  | 701 | my $samples = $self->get_report_sample(); | 
| 755 | 142 |  |  |  |  | 316 | _validate_strlen($samples); | 
| 756 | 142 |  |  |  |  | 436 | _validate_no_wide_characters($samples); | 
| 757 | 142 |  |  |  |  | 393 | _validate_has_nonascii($samples); | 
| 758 |  |  |  |  |  |  |  | 
| 759 | 142 |  |  |  |  | 1323 | my @values = uniq values %$scores_ref; | 
| 760 | 142 |  |  |  |  | 813 | my $sum_of_all_scores = sum @values; | 
| 761 | 142 |  |  |  |  | 549 | my $high_score = max @values; | 
| 762 |  |  |  |  |  |  |  | 
| 763 | 142 |  |  |  |  | 827 | my $explanation = sprintf("total bytes=%d, high bytes=%d, distinct high bytes=%d\n", | 
| 764 |  |  |  |  |  |  | $self->get_report_data_length, | 
| 765 |  |  |  |  |  |  | $self->get_report_total_high_bytes, | 
| 766 |  |  |  |  |  |  | $self->get_report_distinct_high_bytes, | 
| 767 |  |  |  |  |  |  | ); | 
| 768 |  |  |  |  |  |  |  | 
| 769 | 142 |  |  |  |  | 1083 | for my $score (sort {$b <=> $a} @values) { | 
|  | 515 |  |  |  |  | 1100 |  | 
| 770 |  |  |  |  |  |  |  | 
| 771 | 470 | 50 |  |  |  | 1169 | next if $score == 0; | 
| 772 | 470 | 100 |  |  |  | 1171 | my $winner = $score == $high_score ? "*" : " "; | 
| 773 |  |  |  |  |  |  |  | 
| 774 | 470 |  |  |  |  | 1009 | my $normalized_score = 100 * ($score / $sum_of_all_scores); | 
| 775 | 470 |  |  |  |  | 5672 | $explanation .= sprintf "  $winner%9.6f %+f", $normalized_score, log($score); | 
| 776 |  |  |  |  |  |  |  | 
| 777 | 470 |  |  |  |  | 1792 | my @candidates = grep { $scores_ref->{$_} == $score } keys %$scores_ref; | 
|  | 2525 |  |  |  |  | 5619 |  | 
| 778 | 470 |  |  |  |  | 1984 | @candidates = $self->_sort_encodings_by_priority(@candidates); | 
| 779 | 470 |  |  |  |  | 2199 | $explanation .= sprintf " %s\n", join(", " => @candidates); | 
| 780 |  |  |  |  |  |  |  | 
| 781 | 470 |  |  |  |  | 883 | my @converts = uniq map { decode($_, $samples, Encode::FB_XMLCREF | Encode::LEAVE_SRC) } @candidates; | 
|  | 677 |  |  |  |  | 7736 |  | 
| 782 | 470 | 50 |  |  |  | 1155 | if (@converts == 1) { | 
| 783 |  |  |  |  |  |  | # next if $converts[0] =~ /[\x80-\x9F]/; | 
| 784 | 470 |  |  |  |  | 1567 | $self->_set_guessed_encoding($candidates[0]); | 
| 785 | 470 |  |  |  |  | 2013 | $explanation .= sprintf " %-12s => \"%s\"\n", "", $converts[0]; | 
| 786 | 470 |  |  |  |  | 1569 | $explanation .= sprintf " %-12s => \"%s\"\n", "", uniquote($converts[0]); | 
| 787 |  |  |  |  |  |  | } else { | 
| 788 | 0 |  |  |  |  | 0 | for my $enc (@candidates) { | 
| 789 | 0 |  |  |  |  | 0 | my $as_utf8 = decode($enc, $samples, Encode::FB_XMLCREF | Encode::LEAVE_SRC); | 
| 790 | 0 | 0 |  |  |  | 0 | next if $as_utf8 =~ /[\x80-\x9F]/; | 
| 791 | 0 |  |  |  |  | 0 | $explanation .= sprintf " %-12s => \"%s\"\n", $enc, $as_utf8; | 
| 792 | 0 |  |  |  |  | 0 | $explanation .= sprintf " %-12s => \"%s\"\n", $enc, uniquote($as_utf8); | 
| 793 |  |  |  |  |  |  | } | 
| 794 |  |  |  |  |  |  | } | 
| 795 |  |  |  |  |  |  |  | 
| 796 |  |  |  |  |  |  | } | 
| 797 |  |  |  |  |  |  |  | 
| 798 | 142 |  |  |  |  | 754 | $self->_set_report_string($explanation); | 
| 799 |  |  |  |  |  |  |  | 
| 800 | 142 |  |  |  |  | 601 | $self->_pick_winner(); | 
| 801 |  |  |  |  |  |  |  | 
| 802 |  |  |  |  |  |  | } | 
| 803 |  |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  | sub _encache :method { | 
| 805 |  |  |  |  |  |  |  | 
| 806 | 1220 |  |  | 1220 |  | 2683 | _validate_argc(@_ => 2); | 
| 807 | 1220 |  |  |  |  | 2233 | _validate_private_method(); | 
| 808 | 1220 |  |  |  |  | 2493 | _validate_object_invocant(@_); | 
| 809 |  |  |  |  |  |  |  | 
| 810 | 1220 |  |  |  |  | 2119 | my($self, $enc) = @_; | 
| 811 |  |  |  |  |  |  |  | 
| 812 | 1220 |  |  |  |  | 2332 | _validate_known_encoding($enc); | 
| 813 |  |  |  |  |  |  |  | 
| 814 | 1220 |  |  |  |  | 2218 | our @B; local *B = $self->get_byte_table; | 
|  | 1220 |  |  |  |  | 2693 |  | 
| 815 |  |  |  |  |  |  |  | 
| 816 | 1220 | 100 |  |  |  | 5356 | return if exists $B[0]{$enc}; | 
| 817 |  |  |  |  |  |  |  | 
| 818 | 52 |  |  |  |  | 205 | debug("encaching weights for $enc"); | 
| 819 |  |  |  |  |  |  |  | 
| 820 | 52 |  |  |  |  | 400 | my $td = $self->get_training_data(); | 
| 821 | 52 |  |  |  |  | 6787 | my $total_training_data = sum values %$td; | 
| 822 |  |  |  |  |  |  |  | 
| 823 |  |  |  |  |  |  | BYTE: | 
| 824 | 52 |  |  |  |  | 149 | for my $byte_ord ( 0x80 .. 0xFF ) { | 
| 825 | 6656 |  |  |  |  | 8543 | my $unichr = eval { decode($enc, chr($byte_ord), FB_CROAK) }; | 
|  | 6656 |  |  |  |  | 26322 |  | 
| 826 |  |  |  |  |  |  |  | 
| 827 | 6656 | 100 | 66 |  |  | 182672 | if ($@ || $unichr eq "\N{REPLACEMENT CHARACTER}") { | 
| 828 | 86 |  |  |  |  | 270 | debug("byte %02X has no Unicode mapping in $enc", $byte_ord); | 
| 829 |  |  |  |  |  |  | # so intentionally leave this byte slot value at undef | 
| 830 | 86 |  |  |  |  | 245 | $self->_set_byte_weight($enc, $byte_ord, undef); | 
| 831 | 86 |  |  |  |  | 236 | next BYTE; | 
| 832 |  |  |  |  |  |  | } | 
| 833 |  |  |  |  |  |  |  | 
| 834 | 6570 | 50 |  |  |  | 14207 | die if ord($unichr) == 0xFFFD; | 
| 835 |  |  |  |  |  |  |  | 
| 836 | 6570 |  |  |  |  | 17050 | my $count = $td->{ord $unichr}; | 
| 837 |  |  |  |  |  |  |  | 
| 838 |  |  |  |  |  |  | # several different strategies for missing training data | 
| 839 | 6570 | 100 |  |  |  | 14755 | if (!defined $count) { | 
| 840 |  |  |  |  |  |  |  | 
| 841 | 1412 |  | 100 |  |  | 4788 | debug("$enc byte %02X => U+%04X (%s) missing from training set\n", | 
| 842 |  |  |  |  |  |  | $byte_ord, ord($unichr), | 
| 843 |  |  |  |  |  |  | charnames::viacode(ord($unichr)) || ""); | 
| 844 |  |  |  |  |  |  |  | 
| 845 |  |  |  |  |  |  | # if in C1 control set, very unlikely to be correct | 
| 846 | 1412 | 100 |  |  |  | 4981 | if ($unichr =~ /[\x80-\x9F]/) { | 
| 847 | 840 |  |  |  |  | 2258 | debug("enc $enc %02X => U+%02X from C1 control set at undef", | 
| 848 |  |  |  |  |  |  | $byte_ord, ord $unichr); | 
| 849 | 840 |  |  |  |  | 1995 | $self->_set_byte_weight($enc, $byte_ord, undef); | 
| 850 | 840 |  |  |  |  | 1656 | next BYTE; | 
| 851 |  |  |  |  |  |  | } | 
| 852 |  |  |  |  |  |  |  | 
| 853 |  |  |  |  |  |  | # disqualify unless a private use character or from target script set | 
| 854 | 572 | 100 | 66 |  |  | 4049 | if ($unichr !~ /\p{Private_Use}/ 		&&  # *very* occasionally used | 
|  |  |  | 66 |  |  |  |  | 
| 855 |  |  |  |  |  |  | $unichr !~ /[\p{Common}\p{Inherited}]/  &&  # eg: digits, punct, diacritics | 
| 856 |  |  |  |  |  |  | $unichr !~ /[\p{Latin}\p{Greek}]/           # regular letters | 
| 857 |  |  |  |  |  |  | ) | 
| 858 |  |  |  |  |  |  | { | 
| 859 |  |  |  |  |  |  |  | 
| 860 | 480 |  | 50 |  |  | 1780 | debug("$enc byte %02X => U+%04X (%s) outside target script set", | 
| 861 |  |  |  |  |  |  | $byte_ord, ord($unichr), | 
| 862 |  |  |  |  |  |  | charnames::viacode(ord($unichr)) || ""); | 
| 863 | 480 |  |  |  |  | 1790 | $self->_set_byte_weight($enc, $byte_ord, undef); | 
| 864 | 480 |  |  |  |  | 990 | next BYTE; | 
| 865 |  |  |  |  |  |  | } | 
| 866 |  |  |  |  |  |  |  | 
| 867 |  |  |  |  |  |  | # otherwise set the count to a neutral 0; | 
| 868 |  |  |  |  |  |  | # could (should?) do add-one smoothing here, | 
| 869 |  |  |  |  |  |  | # or even figure out some negative weight | 
| 870 | 92 |  |  |  |  | 140 | $count = 0; | 
| 871 |  |  |  |  |  |  | } | 
| 872 |  |  |  |  |  |  |  | 
| 873 | 5250 |  |  |  |  | 6707 | my $weight = $count / $total_training_data; | 
| 874 |  |  |  |  |  |  |  | 
| 875 | 5250 |  |  |  |  | 14525 | debug("$enc 0x%02X => U+%04X %8d / %8d = %e\n", | 
| 876 |  |  |  |  |  |  | $byte_ord, ord($unichr), | 
| 877 |  |  |  |  |  |  | $count, $total_training_data, $weight); | 
| 878 |  |  |  |  |  |  |  | 
| 879 |  |  |  |  |  |  | #$B[$byte_ord & 127]{$enc} = $weight; | 
| 880 |  |  |  |  |  |  |  | 
| 881 | 5250 |  |  |  |  | 14568 | $self->_set_byte_weight($enc, $byte_ord, $weight); | 
| 882 |  |  |  |  |  |  |  | 
| 883 |  |  |  |  |  |  | } | 
| 884 |  |  |  |  |  |  |  | 
| 885 |  |  |  |  |  |  | } | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | sub dump_byte_table :method { | 
| 888 |  |  |  |  |  |  |  | 
| 889 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 1); | 
| 890 | 0 |  |  |  |  | 0 | _validate_object_invocant(@_); | 
| 891 |  |  |  |  |  |  |  | 
| 892 | 0 |  |  |  |  | 0 | my($self) = @_; | 
| 893 |  |  |  |  |  |  |  | 
| 894 | 0 |  |  |  |  | 0 | say "---DUMPING BYTE TABLE---"; | 
| 895 | 0 |  |  |  |  | 0 | my $bt = $self->get_byte_table; | 
| 896 | 0 |  |  |  |  | 0 | for my $byte_ord ( 0x80 .. 0xFF ) { | 
| 897 | 0 |  |  |  |  | 0 | printf "byte 0x%02X => {\n", $byte_ord; | 
| 898 | 0 |  |  |  |  | 0 | my $href = $bt->[$byte_ord & 127]; | 
| 899 | 0 | 0 | 0 |  |  | 0 | for my $enc (sort { | 
|  | 0 |  | 0 |  |  | 0 |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 900 |  |  |  |  |  |  |  | 
| 901 |  |  |  |  |  |  | ($href->{$b} || 0) <=> ($href->{$a} || 0) | 
| 902 |  |  |  |  |  |  | || | 
| 903 |  |  |  |  |  |  | defined($href->{$b}) <=> defined($href->{$a}) | 
| 904 |  |  |  |  |  |  |  | 
| 905 |  |  |  |  |  |  | || | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | lc(str2nummistr($a)) cmp lc(str2nummistr($b)) | 
| 908 |  |  |  |  |  |  | || | 
| 909 |  |  |  |  |  |  | str2nummistr($a)  cmp    str2nummistr($b) | 
| 910 |  |  |  |  |  |  |  | 
| 911 |  |  |  |  |  |  | || | 
| 912 |  |  |  |  |  |  |  | 
| 913 |  |  |  |  |  |  | $a   cmp  $b | 
| 914 |  |  |  |  |  |  |  | 
| 915 |  |  |  |  |  |  | } keys %$href) | 
| 916 |  |  |  |  |  |  | { | 
| 917 | 0 |  |  |  |  | 0 | printf "  %-12s => ", $enc; | 
| 918 | 0 | 0 |  |  |  | 0 | if (defined $href->{$enc}) { | 
| 919 | 0 |  |  |  |  | 0 | printf "%e ", $href->{$enc}; | 
| 920 | 0 |  |  |  |  | 0 | my $unichr = decode($enc, chr($byte_ord), FB_CROAK); | 
| 921 | 0 |  |  |  |  | 0 | printf "U+%04X ", ord($unichr); | 
| 922 | 0 |  | 0 |  |  | 0 | my $name = charnames::viacode(ord $unichr) | 
| 923 |  |  |  |  |  |  | || sprintf "unnamed character U+%04X", ord $unichr; | 
| 924 | 0 |  |  |  |  | 0 | say $name; | 
| 925 |  |  |  |  |  |  | } else { | 
| 926 | 0 |  |  |  |  | 0 | say "undef"; | 
| 927 |  |  |  |  |  |  | } | 
| 928 |  |  |  |  |  |  | } | 
| 929 | 0 |  |  |  |  | 0 | print "}\n"; | 
| 930 |  |  |  |  |  |  | } | 
| 931 |  |  |  |  |  |  |  | 
| 932 | 0 |  |  |  |  | 0 | say "---END BYTE TABLE---"; | 
| 933 |  |  |  |  |  |  | } | 
| 934 |  |  |  |  |  |  |  | 
| 935 |  |  |  |  |  |  | sub dump_training_data :method { | 
| 936 |  |  |  |  |  |  |  | 
| 937 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 1); | 
| 938 | 0 |  |  |  |  | 0 | _validate_object_invocant(@_); | 
| 939 |  |  |  |  |  |  |  | 
| 940 | 0 |  |  |  |  | 0 | my($self) = @_; | 
| 941 |  |  |  |  |  |  |  | 
| 942 | 0 |  |  |  |  | 0 | require Unicode::Collate; | 
| 943 |  |  |  |  |  |  |  | 
| 944 | 0 |  |  |  |  | 0 | state $collator = Unicode::Collate->new( | 
| 945 |  |  |  |  |  |  | variable => "non-ignorable", | 
| 946 |  |  |  |  |  |  | ); | 
| 947 |  |  |  |  |  |  |  | 
| 948 | 0 |  |  |  |  | 0 | say "---DUMPING TRAINING DATA---"; | 
| 949 |  |  |  |  |  |  |  | 
| 950 | 0 |  |  |  |  | 0 | my $td = $self->get_training_data; | 
| 951 | 0 |  |  |  |  | 0 | printf "Number of keys: %d\n", scalar keys   %$td; | 
| 952 | 0 |  |  |  |  | 0 | printf "Sum of values:  %d\n", sum    values  %$td; | 
| 953 |  |  |  |  |  |  |  | 
| 954 | 0 |  |  |  |  | 0 | my(@keycodes, @keychars); | 
| 955 | 0 |  |  |  |  | 0 | @keycodes = keys %$td; | 
| 956 | 0 |  |  |  |  | 0 | @keychars = map { chr } @keycodes; | 
|  | 0 |  |  |  |  | 0 |  | 
| 957 | 0 |  |  |  |  | 0 | @keychars = $collator->sort(@keychars); | 
| 958 | 0 |  |  |  |  | 0 | @keycodes = map { ord } @keychars; | 
|  | 0 |  |  |  |  | 0 |  | 
| 959 |  |  |  |  |  |  |  | 
| 960 | 0 |  |  |  |  | 0 | my $max_value = max values %$td; | 
| 961 | 0 |  |  |  |  | 0 | my $max_width = length($max_value); | 
| 962 |  |  |  |  |  |  |  | 
| 963 | 0 |  |  |  |  | 0 | my $sum_values = sum values %$td; | 
| 964 |  |  |  |  |  |  |  | 
| 965 | 0 |  |  |  |  | 0 | for my $codepoint (@keycodes) { | 
| 966 | 0 |  |  |  |  | 0 | local $_ = chr($codepoint); | 
| 967 |  |  |  |  |  |  |  | 
| 968 |  |  |  |  |  |  | ##printf "%#06X => %*d, ", $codepoint, $max_width, $td->{$codepoint}; | 
| 969 | 0 |  |  |  |  | 0 | printf "0x%06X => %25.20e, ", $codepoint, $td->{$codepoint} / $sum_values; | 
| 970 | 0 |  |  |  |  | 0 | print "  # "; | 
| 971 |  |  |  |  |  |  |  | 
| 972 |  |  |  |  |  |  |  | 
| 973 | 0 | 0 |  |  |  | 0 | if (/[\pC\pZ]/) { | 
| 974 | 0 |  |  |  |  | 0 | print "<-> "; | 
| 975 |  |  |  |  |  |  | } else { | 
| 976 | 0 | 0 |  |  |  | 0 | print "\N{DOTTED CIRCLE}" if /\p{BC=NSM}/; | 
| 977 |  |  |  |  |  |  | #print "\N{LEFT-TO-RIGHT OVERRIDE}" if /[\p{BC=R}\p{BC=AL}\p{BC=AN}]/; | 
| 978 | 0 |  |  |  |  | 0 | print " $_ "; | 
| 979 | 0 | 0 |  |  |  | 0 | print " " unless /[\p{EA=F}\p{EA=W}]/; | 
| 980 |  |  |  |  |  |  | } | 
| 981 |  |  |  |  |  |  |  | 
| 982 | 0 |  |  |  |  | 0 | my $name   = ""; | 
| 983 | 0 |  |  |  |  | 0 | my $script = "unassigned_script"; | 
| 984 | 0 |  |  |  |  | 0 | my $cat    = "Cn"; | 
| 985 |  |  |  |  |  |  |  | 
| 986 | 0 | 0 |  |  |  | 0 | if (my $ci = charinfo($codepoint)) { | 
| 987 | 0 |  | 0 |  |  | 0 | $name   = $$ci{name}     || ""; | 
| 988 | 0 |  | 0 |  |  | 0 | $script = $$ci{script}   || "unassigned"; | 
| 989 | 0 |  | 0 |  |  | 0 | $cat    = $$ci{category} || "XX"; | 
| 990 |  |  |  |  |  |  | } | 
| 991 |  |  |  |  |  |  |  | 
| 992 | 0 |  |  |  |  | 0 | print "gc=$cat "; | 
| 993 | 0 |  |  |  |  | 0 | printf "  sc=%-10s ", $script; | 
| 994 | 0 |  |  |  |  | 0 | say $name; | 
| 995 |  |  |  |  |  |  |  | 
| 996 |  |  |  |  |  |  | } | 
| 997 |  |  |  |  |  |  |  | 
| 998 | 0 |  |  |  |  | 0 | say "---END TRAINING DATA DUMP---"; | 
| 999 |  |  |  |  |  |  |  | 
| 1000 |  |  |  |  |  |  |  | 
| 1001 |  |  |  |  |  |  | } | 
| 1002 |  |  |  |  |  |  |  | 
| 1003 |  |  |  |  |  |  | sub _sort_encodings_by_priority { | 
| 1004 |  |  |  |  |  |  |  | 
| 1005 | 612 |  |  | 612 |  | 1741 | _validate_argc_min(@_ => 2); | 
| 1006 | 612 |  |  |  |  | 1117 | _validate_private_method(); | 
| 1007 | 612 |  |  |  |  | 1208 | _validate_object_invocant(@_); | 
| 1008 | 612 |  |  |  |  | 1277 | _validate_list_context(); | 
| 1009 |  |  |  |  |  |  |  | 
| 1010 | 612 |  |  |  |  | 1225 | my($self, @enc) = @_; | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 | 612 |  |  |  |  | 776 | my %seen; | 
| 1013 |  |  |  |  |  |  |  | 
| 1014 |  |  |  |  |  |  | my %rank; | 
| 1015 | 612 |  |  |  |  | 1015 | my $priority = -1; | 
| 1016 | 612 |  |  |  |  | 2521 | my @choices = map { known_encoding($_) } $self->get_suspects; | 
|  | 5508 |  |  |  |  | 10876 |  | 
| 1017 | 612 |  |  |  |  | 1871 | for my $name (@choices) { | 
| 1018 | 5508 | 100 |  |  |  | 16640 | $rank{$name} = $priority-- unless $seen{$name}++; | 
| 1019 |  |  |  |  |  |  | } | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 | 849 |  |  |  |  | 2296 | my @sorted_encs = | 
| 1022 | 400 | 50 |  |  |  | 1059 | map { $_->[0] } | 
| 1023 |  |  |  |  |  |  | sort { | 
| 1024 | 849 |  | 50 |  |  | 2031 | $b->[1] <=> $a->[1] | 
| 1025 |  |  |  |  |  |  | || | 
| 1026 |  |  |  |  |  |  | $a->[0] cmp $b->[0] | 
| 1027 |  |  |  |  |  |  | } | 
| 1028 | 612 |  |  |  |  | 1324 | map { [ $_ => $rank{ known_encoding($_) } || -1e6 ]  } | 
| 1029 |  |  |  |  |  |  | @enc; | 
| 1030 |  |  |  |  |  |  |  | 
| 1031 | 612 |  |  |  |  | 4118 | return @sorted_encs; | 
| 1032 |  |  |  |  |  |  | } | 
| 1033 |  |  |  |  |  |  |  | 
| 1034 |  |  |  |  |  |  | ######################################################################## | 
| 1035 |  |  |  |  |  |  | # utility functions | 
| 1036 |  |  |  |  |  |  | ######################################################################## | 
| 1037 |  |  |  |  |  |  |  | 
| 1038 |  |  |  |  |  |  | sub _set_examples_from_data { | 
| 1039 |  |  |  |  |  |  |  | 
| 1040 | 142 |  |  | 142 |  | 378 | _validate_argc(@_ => 2); | 
| 1041 | 142 |  |  |  |  | 307 | _validate_private_method(); | 
| 1042 | 142 |  |  |  |  | 362 | _validate_object_invocant(@_); | 
| 1043 |  |  |  |  |  |  |  | 
| 1044 | 142 |  |  |  |  | 326 | my($self, $data) = @_; | 
| 1045 |  |  |  |  |  |  |  | 
| 1046 | 142 |  |  |  |  | 355 | my @hits = (); | 
| 1047 |  |  |  |  |  |  |  | 
| 1048 | 142 |  |  |  |  | 243 | my(%seen_string, %seen_char); | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 | 142 |  |  |  |  | 36905 | my @chars = $data =~ /[\x80-\xFF]/g; | 
| 1051 | 142 |  |  |  |  | 1237 | $self->_set_report_total_high_bytes(scalar @chars); | 
| 1052 |  |  |  |  |  |  |  | 
| 1053 | 142 |  |  |  |  | 741 | my @uniq_chars = uniq(@chars); | 
| 1054 | 142 |  |  |  |  | 896 | $self->_set_report_distinct_high_bytes(scalar @uniq_chars); | 
| 1055 |  |  |  |  |  |  |  | 
| 1056 | 142 |  |  |  |  | 152891 | while ($data =~ m{ (? | 
| 1057 |  |  |  |  |  |  | (?: \S+ \h* | \S* ) | 
| 1058 |  |  |  |  |  |  | (? \P{ASCII} ) | 
| 1059 |  |  |  |  |  |  | (?: \h* \S+ | \S* ) | 
| 1060 |  |  |  |  |  |  | ) | 
| 1061 |  |  |  |  |  |  | }gx | 
| 1062 |  |  |  |  |  |  | ) | 
| 1063 |  |  |  |  |  |  | { | 
| 1064 | 7 | 100 | 100 | 7 |  | 255178 | if (!      $seen_string{ $+{string} }++ | 
|  | 7 |  |  |  |  | 4044 |  | 
|  | 7 |  |  |  |  | 2949 |  | 
|  | 10820 |  |  |  |  | 15783166 |  | 
| 1065 |  |  |  |  |  |  | &&  ++$seen_char{   $+{char}   } < 2) | 
| 1066 |  |  |  |  |  |  | { | 
| 1067 | 1118 |  |  |  |  | 5549 | my $str = $+{string}; | 
| 1068 | 1118 |  |  |  |  | 6998156 | push @hits, $str; | 
| 1069 |  |  |  |  |  |  | } | 
| 1070 |  |  |  |  |  |  | } | 
| 1071 |  |  |  |  |  |  |  | 
| 1072 | 142 |  |  |  |  | 1814 | my $example_string = join(" ", @hits); | 
| 1073 | 142 | 50 |  |  |  | 1973 | $self->_set_report_sample($example_string) if @hits; | 
| 1074 |  |  |  |  |  |  | } | 
| 1075 |  |  |  |  |  |  |  | 
| 1076 |  |  |  |  |  |  | sub known_encoding(_) { | 
| 1077 |  |  |  |  |  |  |  | 
| 1078 | 156480 |  |  | 156480 | 0 | 292894 | _validate_argc(@_ => 1); | 
| 1079 | 156480 |  |  |  |  | 260275 | _validate_private_method(); | 
| 1080 |  |  |  |  |  |  |  | 
| 1081 | 156480 |  |  |  |  | 199311 | my($enc) = @_; | 
| 1082 |  |  |  |  |  |  |  | 
| 1083 | 156480 |  |  |  |  | 306190 | _validate_strlen($enc); | 
| 1084 | 156480 |  |  |  |  | 249539 | _validate_nonref($enc); | 
| 1085 |  |  |  |  |  |  |  | 
| 1086 | 156480 | 50 |  |  |  | 376792 | if (my $enc_obj = Encode::find_encoding($enc)) { | 
| 1087 | 156480 |  | 33 |  |  | 2449284 | return $enc_obj->name || $enc; | 
| 1088 |  |  |  |  |  |  | } else { | 
| 1089 | 0 |  |  |  |  | 0 | return undef; | 
| 1090 |  |  |  |  |  |  | } | 
| 1091 |  |  |  |  |  |  |  | 
| 1092 |  |  |  |  |  |  | } | 
| 1093 |  |  |  |  |  |  |  | 
| 1094 |  |  |  |  |  |  | # convert string with embedded decimal numbers into something | 
| 1095 |  |  |  |  |  |  | # that can be sorted by code point; that is, pad things like | 
| 1096 |  |  |  |  |  |  | # 23 into 0000027.  Also works on signed numbers and on floating | 
| 1097 |  |  |  |  |  |  | # point numbers. | 
| 1098 |  |  |  |  |  |  | # | 
| 1099 |  |  |  |  |  |  | sub str2nummistr(_) { | 
| 1100 |  |  |  |  |  |  |  | 
| 1101 | 0 |  |  | 0 | 0 | 0 | _validate_argc(@_ => 1); | 
| 1102 | 0 |  |  |  |  | 0 | _validate_private_method(); | 
| 1103 |  |  |  |  |  |  |  | 
| 1104 | 0 |  |  |  |  | 0 | my($old) = @_; | 
| 1105 |  |  |  |  |  |  |  | 
| 1106 | 0 |  |  |  |  | 0 | _validate_strlen($old); | 
| 1107 | 0 |  |  |  |  | 0 | _validate_nonref($old); | 
| 1108 |  |  |  |  |  |  |  | 
| 1109 | 0 |  |  |  |  | 0 | state $cache = { }; | 
| 1110 | 0 | 0 |  |  |  | 0 | return $$cache{$old} if defined $$cache{$old}; | 
| 1111 | 0 |  |  |  |  | 0 | my $new = $old; | 
| 1112 |  |  |  |  |  |  |  | 
| 1113 | 0 |  |  |  |  | 0 | $new =~ s{ ( | 
| 1114 |  |  |  |  |  |  | # allow a plus or minus | 
| 1115 |  |  |  |  |  |  | # let them use any kind of dash but em and en | 
| 1116 |  |  |  |  |  |  | (?: | 
| 1117 |  |  |  |  |  |  | (?! [\N{EM DASH}\N{EN DASH}]  ) | 
| 1118 |  |  |  |  |  |  | [\N{PLUS SIGN}\N{PLUS-MINUS SIGN}\N{MINUS-OR-PLUS SIGN}\p{Dash}] | 
| 1119 |  |  |  |  |  |  | ) | 
| 1120 |  |  |  |  |  |  | (?: \b \d{1,3} (?: , \d{3} )+ \b | 
| 1121 |  |  |  |  |  |  | | \d+ | 
| 1122 |  |  |  |  |  |  | ) | 
| 1123 |  |  |  |  |  |  | ) | 
| 1124 |  |  |  |  |  |  | (?: \. (\d+) )? | 
| 1125 |  |  |  |  |  |  | }{ | 
| 1126 | 0 |  |  |  |  | 0 | my ($left, $right) = ($1, $2); | 
| 1127 | 0 |  |  |  |  | 0 | $left =~ s/[\N{COMMA}\N{PLUS SIGN}\N{PLUS-MINUS SIGN}\N{MINUS-OR-PLUS SIGN}]//g; | 
| 1128 | 0 |  |  |  |  | 0 | $left =~ s/\p{Pd}/-/g; | 
| 1129 | 0 |  |  |  |  | 0 | my $result; | 
| 1130 | 0 | 0 |  |  |  | 0 | if (length $right) { | 
| 1131 | 0 |  |  |  |  | 0 | $result = sprintf(" 000%+012d.%s ", $left, $right); | 
| 1132 |  |  |  |  |  |  |  | 
| 1133 |  |  |  |  |  |  | } else { | 
| 1134 | 0 |  |  |  |  | 0 | $result = sprintf(" 000%+012d ", $left); | 
| 1135 |  |  |  |  |  |  | } | 
| 1136 |  |  |  |  |  |  |  | 
| 1137 |  |  |  |  |  |  | # terrible hack to get signed numbers to sort right | 
| 1138 | 0 |  |  |  |  | 0 | $result =~ tr[\-+][\N{CYRILLIC CAPITAL LETTER SCHWA}\N{CYRILLIC CAPITAL LETTER BE}]; | 
| 1139 |  |  |  |  |  |  |  | 
| 1140 | 0 |  |  |  |  | 0 | $result; | 
| 1141 |  |  |  |  |  |  | }xge; | 
| 1142 |  |  |  |  |  |  |  | 
| 1143 | 0 |  |  |  |  | 0 | $$cache{$old} = $new; | 
| 1144 |  |  |  |  |  |  |  | 
| 1145 | 0 |  |  |  |  | 0 | return $new; | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | } | 
| 1148 |  |  |  |  |  |  |  | 
| 1149 |  |  |  |  |  |  | sub strnum_sort { | 
| 1150 |  |  |  |  |  |  |  | 
| 1151 | 0 |  |  | 0 | 0 | 0 | _validate_argc_min(@_ => 1); | 
| 1152 | 0 |  |  |  |  | 0 | _validate_private_method(); | 
| 1153 | 0 |  |  |  |  | 0 | _validate_list_context(); | 
| 1154 |  |  |  |  |  |  |  | 
| 1155 | 0 |  |  |  |  | 0 | return  map  { $_->[0] } | 
|  | 0 |  |  |  |  | 0 |  | 
| 1156 | 0 |  |  |  |  | 0 | sort { $a->[1] cmp $b->[1] } | 
| 1157 | 0 |  |  |  |  | 0 | map  { [ $_ => lc str2nummistr($_) ] } | 
| 1158 |  |  |  |  |  |  | @_ | 
| 1159 |  |  |  |  |  |  | ; | 
| 1160 |  |  |  |  |  |  |  | 
| 1161 |  |  |  |  |  |  | } | 
| 1162 |  |  |  |  |  |  |  | 
| 1163 |  |  |  |  |  |  | sub uniq { | 
| 1164 | 1038 |  |  | 1038 | 0 | 17728 | _validate_private_method(); | 
| 1165 | 1038 |  |  |  |  | 2219 | _validate_list_context(); | 
| 1166 |  |  |  |  |  |  |  | 
| 1167 | 1038 |  |  |  |  | 1499 | my %seen; | 
| 1168 |  |  |  |  |  |  | my @retlist; | 
| 1169 | 1038 |  |  |  |  | 2402 | for (@_) { | 
| 1170 | 17524 | 100 |  |  |  | 48284 | push @retlist, $_ unless $seen{$_}++; | 
| 1171 |  |  |  |  |  |  | } | 
| 1172 | 1038 |  |  |  |  | 4809 | return @retlist; | 
| 1173 |  |  |  |  |  |  | } | 
| 1174 |  |  |  |  |  |  |  | 
| 1175 |  |  |  |  |  |  | sub uniquote(_) { | 
| 1176 | 470 |  |  | 470 | 0 | 1204 | _validate_argc(@_ => 1); | 
| 1177 |  |  |  |  |  |  |  | 
| 1178 | 470 |  |  |  |  | 777 | my($str) = @_; | 
| 1179 | 470 |  |  |  |  | 870 | _validate_nonref($str); | 
| 1180 |  |  |  |  |  |  |  | 
| 1181 | 470 |  |  |  |  | 8167 | $str =~ s{ ( \P{ASCII} ) } | 
| 1182 |  |  |  |  |  |  | { | 
| 1183 | 4081 |  |  |  |  | 7519 | my $ord = ord $1; | 
| 1184 | 4081 |  | 33 |  |  | 10188 | my $name = charnames::viacode($ord) || sprintf("U+%04X", $ord); | 
| 1185 | 4081 |  |  |  |  | 134719 | sprintf("\\N{%s}", $name); | 
| 1186 |  |  |  |  |  |  | }xge; | 
| 1187 | 470 |  |  |  |  | 3433 | return $str; | 
| 1188 |  |  |  |  |  |  | } | 
| 1189 |  |  |  |  |  |  |  | 
| 1190 |  |  |  |  |  |  | sub debugging() { | 
| 1191 | 168146 |  |  | 168146 | 0 | 244676 | _validate_private_method(); | 
| 1192 | 168146 |  |  |  |  | 399301 | return our $DEBUG; | 
| 1193 |  |  |  |  |  |  | } | 
| 1194 |  |  |  |  |  |  |  | 
| 1195 | 0 |  |  | 0 | 0 | 0 | sub whoami()  { (caller(1))[3] } | 
| 1196 | 0 |  |  | 0 | 0 | 0 | sub whowasi() { (caller(2))[3] } | 
| 1197 |  |  |  |  |  |  |  | 
| 1198 |  |  |  |  |  |  | sub debug { | 
| 1199 | 168146 |  |  | 168146 | 0 | 346055 | _validate_private_method(); | 
| 1200 | 168146 | 50 |  |  |  | 288924 | return unless debugging(); | 
| 1201 | 0 |  |  |  |  |  | my($fmt, @args) = @_; | 
| 1202 | 0 |  |  |  |  |  | my $subname = whowasi(); | 
| 1203 | 0 |  |  |  |  |  | printf STDOUT "DEBUG(%s): $fmt", $subname, @args; | 
| 1204 | 0 | 0 |  |  |  |  | print "\n" unless $fmt =~ /\n\z/; | 
| 1205 |  |  |  |  |  |  | } | 
| 1206 |  |  |  |  |  |  |  | 
| 1207 |  |  |  |  |  |  | ######################################################################## | 
| 1208 |  |  |  |  |  |  | ######################################################################## | 
| 1209 |  |  |  |  |  |  | ######################################################################## | 
| 1210 |  |  |  |  |  |  |  | 
| 1211 |  |  |  |  |  |  | # Class initializers | 
| 1212 |  |  |  |  |  |  |  | 
| 1213 |  |  |  |  |  |  | UNITCHECK { | 
| 1214 |  |  |  |  |  |  |  | 
| 1215 |  |  |  |  |  |  | #################################### | 
| 1216 |  |  |  |  |  |  | # Incidence of non-ASCII code points in PubMed Open Access as of December 2010. | 
| 1217 |  |  |  |  |  |  | # | 
| 1218 |  |  |  |  |  |  | # Table is UCA sorted and formatted using the dump_training_data | 
| 1219 |  |  |  |  |  |  | # object method, because sorting on anything else is trivial, so the | 
| 1220 |  |  |  |  |  |  | # hard one is the default. | 
| 1221 |  |  |  |  |  |  | #################################### | 
| 1222 |  |  |  |  |  |  |  | 
| 1223 |  |  |  |  |  |  | my %oed2_training = ( | 
| 1224 |  |  |  |  |  |  | 0x000314 =>     241,   # ◌ ̔  gc=Mn   sc=Inherited  COMBINING REVERSED COMMA ABOVE | 
| 1225 |  |  |  |  |  |  | 0x000301 =>     325,   # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE ACCENT | 
| 1226 |  |  |  |  |  |  | 0x000300 =>       2,   # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE ACCENT | 
| 1227 |  |  |  |  |  |  | 0x000306 =>    2214,   # ◌ ̆  gc=Mn   sc=Inherited  COMBINING BREVE | 
| 1228 |  |  |  |  |  |  | 0x000302 =>     201,   # ◌ ̂  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT | 
| 1229 |  |  |  |  |  |  | 0x00030C =>       5,   # ◌ ̌  gc=Mn   sc=Inherited  COMBINING CARON | 
| 1230 |  |  |  |  |  |  | 0x000308 =>       5,   # ◌ ̈  gc=Mn   sc=Inherited  COMBINING DIAERESIS | 
| 1231 |  |  |  |  |  |  | 0x000303 =>     106,   # ◌ ̃  gc=Mn   sc=Inherited  COMBINING TILDE | 
| 1232 |  |  |  |  |  |  | 0x000307 =>      28,   # ◌ ̇  gc=Mn   sc=Inherited  COMBINING DOT ABOVE | 
| 1233 |  |  |  |  |  |  | 0x000327 =>     710,   # ◌ ̧  gc=Mn   sc=Inherited  COMBINING CEDILLA | 
| 1234 |  |  |  |  |  |  | 0x000304 =>     129,   # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON | 
| 1235 |  |  |  |  |  |  | 0x000320 =>     133,   # ◌ ̠  gc=Mn   sc=Inherited  COMBINING MINUS SIGN BELOW | 
| 1236 |  |  |  |  |  |  | 0x000336 =>     267,   # ◌ ̶  gc=Mn   sc=Inherited  COMBINING LONG STROKE OVERLAY | 
| 1237 |  |  |  |  |  |  | 0x000323 =>       6,   # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW | 
| 1238 |  |  |  |  |  |  | 0x00032D =>      15,   # ◌ ̭  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT BELOW | 
| 1239 |  |  |  |  |  |  | 0x000345 =>       9,   # ◌ ͅ  gc=Mn   sc=Inherited  COMBINING GREEK YPOGEGRAMMENI | 
| 1240 |  |  |  |  |  |  | 0x000651 =>       2,   # ◌ ّ  gc=Mn   sc=Inherited  ARABIC SHADDA | 
| 1241 |  |  |  |  |  |  | 0x0020E9 =>       2,   # ◌ ⃩  gc=Mn   sc=Inherited  COMBINING WIDE BRIDGE ABOVE | 
| 1242 |  |  |  |  |  |  | 0x0000B4 =>      48,   #  ´  gc=Sk   sc=Common     ACUTE ACCENT | 
| 1243 |  |  |  |  |  |  | 0x0000AF =>       5,   #  ¯  gc=Sk   sc=Common     MACRON | 
| 1244 |  |  |  |  |  |  | 0x0002D8 =>       4,   #  ˘  gc=Sk   sc=Common     BREVE | 
| 1245 |  |  |  |  |  |  | 0x0000A8 =>       6,   #  ¨  gc=Sk   sc=Common     DIAERESIS | 
| 1246 |  |  |  |  |  |  | 0x0000B8 =>       1,   #  ¸  gc=Sk   sc=Common     CEDILLA | 
| 1247 |  |  |  |  |  |  | 0x002010 => 1205194,   #  ‐  gc=Pd   sc=Common     HYPHEN | 
| 1248 |  |  |  |  |  |  | 0x002013 =>  163112,   #  –  gc=Pd   sc=Common     EN DASH | 
| 1249 |  |  |  |  |  |  | 0x002014 =>     430,   #  —  gc=Pd   sc=Common     EM DASH | 
| 1250 |  |  |  |  |  |  | 0x0000B7 =>  143383,   #  ·  gc=Po   sc=Common     MIDDLE DOT | 
| 1251 |  |  |  |  |  |  | 0x002018 =>  228766,   #  ‘  gc=Pi   sc=Common     LEFT SINGLE QUOTATION MARK | 
| 1252 |  |  |  |  |  |  | 0x002019 =>  737362,   #  ’  gc=Pf   sc=Common     RIGHT SINGLE QUOTATION MARK | 
| 1253 |  |  |  |  |  |  | 0x002039 =>      11,   #  ‹  gc=Pi   sc=Common     SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 
| 1254 |  |  |  |  |  |  | 0x00203A =>      12,   #  ›  gc=Pf   sc=Common     SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 
| 1255 |  |  |  |  |  |  | 0x00FF08 =>      13,   #  ( gc=Ps   sc=Common     FULLWIDTH LEFT PARENTHESIS | 
| 1256 |  |  |  |  |  |  | 0x00FF09 =>      13,   #  ) gc=Pe   sc=Common     FULLWIDTH RIGHT PARENTHESIS | 
| 1257 |  |  |  |  |  |  | 0x00FF3B =>       4,   #  [ gc=Ps   sc=Common     FULLWIDTH LEFT SQUARE BRACKET | 
| 1258 |  |  |  |  |  |  | 0x00FF3D =>       4,   #  ] gc=Pe   sc=Common     FULLWIDTH RIGHT SQUARE BRACKET | 
| 1259 |  |  |  |  |  |  | 0x00FF5B =>     102,   #  { gc=Ps   sc=Common     FULLWIDTH LEFT CURLY BRACKET | 
| 1260 |  |  |  |  |  |  | 0x00FF5D =>     101,   #  } gc=Pe   sc=Common     FULLWIDTH RIGHT CURLY BRACKET | 
| 1261 |  |  |  |  |  |  | 0x0000A7 =>   42343,   #  §  gc=So   sc=Common     SECTION SIGN | 
| 1262 |  |  |  |  |  |  | 0x0000B6 =>     235,   #  ¶  gc=So   sc=Common     PILCROW SIGN | 
| 1263 |  |  |  |  |  |  | 0x00204B =>   13003,   #  ⁋  gc=Po   sc=Common     REVERSED PILCROW SIGN | 
| 1264 |  |  |  |  |  |  | 0x0000A9 =>       4,   #  ©  gc=So   sc=Common     COPYRIGHT SIGN | 
| 1265 |  |  |  |  |  |  | 0x00FF0F =>       1,   #  / gc=Po   sc=Common     FULLWIDTH SOLIDUS | 
| 1266 |  |  |  |  |  |  | 0x002030 =>      16,   #  ‰  gc=Po   sc=Common     PER MILLE SIGN | 
| 1267 |  |  |  |  |  |  | 0x002020 =>    8882,   #  †  gc=Po   sc=Common     DAGGER | 
| 1268 |  |  |  |  |  |  | 0x002021 =>       9,   #  ‡  gc=Po   sc=Common     DOUBLE DAGGER | 
| 1269 |  |  |  |  |  |  | 0x002032 =>     967,   #  ′  gc=Po   sc=Common     PRIME | 
| 1270 |  |  |  |  |  |  | 0x002033 =>     362,   #  ″  gc=Po   sc=Common     DOUBLE PRIME | 
| 1271 |  |  |  |  |  |  | 0x002034 =>      24,   #  ‴  gc=Po   sc=Common     TRIPLE PRIME | 
| 1272 |  |  |  |  |  |  | 0x002038 =>       2,   #  ‸  gc=Po   sc=Common     CARET | 
| 1273 |  |  |  |  |  |  | 0x0002C8 =>    2550,   #  ˈ  gc=Lm   sc=Common     MODIFIER LETTER VERTICAL LINE | 
| 1274 |  |  |  |  |  |  | 0x0002CC =>     183,   #  ˌ  gc=Lm   sc=Common     MODIFIER LETTER LOW VERTICAL LINE | 
| 1275 |  |  |  |  |  |  | 0x0002DE =>      91,   #  ˞  gc=Sk   sc=Common     MODIFIER LETTER RHOTIC HOOK | 
| 1276 |  |  |  |  |  |  | 0x0000B0 =>    2165,   #  °  gc=So   sc=Common     DEGREE SIGN | 
| 1277 |  |  |  |  |  |  | 0x00211E =>      12,   #  ℞  gc=So   sc=Common     PRESCRIPTION TAKE | 
| 1278 |  |  |  |  |  |  | 0x002190 =>       3,   #  ←  gc=Sm   sc=Common     LEFTWARDS ARROW | 
| 1279 |  |  |  |  |  |  | 0x002192 =>     168,   #  →  gc=Sm   sc=Common     RIGHTWARDS ARROW | 
| 1280 |  |  |  |  |  |  | 0x0021CC =>       7,   #  ⇌  gc=So   sc=Common     RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON | 
| 1281 |  |  |  |  |  |  | 0x002207 =>      74,   #  ∇  gc=Sm   sc=Common     NABLA | 
| 1282 |  |  |  |  |  |  | 0x00220B =>      42,   #  ∋  gc=Sm   sc=Common     CONTAINS AS MEMBER | 
| 1283 |  |  |  |  |  |  | 0x0000B1 =>     159,   #  ±  gc=Sm   sc=Common     PLUS-MINUS SIGN | 
| 1284 |  |  |  |  |  |  | 0x0000D7 =>     635,   #  ×  gc=Sm   sc=Common     MULTIPLICATION SIGN | 
| 1285 |  |  |  |  |  |  | 0x002260 =>      23,   #  ≠  gc=Sm   sc=Common     NOT EQUAL TO | 
| 1286 |  |  |  |  |  |  | 0x002212 =>    1416,   #  −  gc=Sm   sc=Common     MINUS SIGN | 
| 1287 |  |  |  |  |  |  | 0x002215 =>      19,   #  ∕  gc=Sm   sc=Common     DIVISION SLASH | 
| 1288 |  |  |  |  |  |  | 0x00221A =>     128,   #  √  gc=Sm   sc=Common     SQUARE ROOT | 
| 1289 |  |  |  |  |  |  | 0x00221E =>      53,   #  ∞  gc=Sm   sc=Common     INFINITY | 
| 1290 |  |  |  |  |  |  | 0x002225 =>     145,   #  ∥  gc=Sm   sc=Common     PARALLEL TO | 
| 1291 |  |  |  |  |  |  | 0x002227 =>      18,   #  ∧  gc=Sm   sc=Common     LOGICAL AND | 
| 1292 |  |  |  |  |  |  | 0x002228 =>      19,   #  ∨  gc=Sm   sc=Common     LOGICAL OR | 
| 1293 |  |  |  |  |  |  | 0x002229 =>       5,   #  ∩  gc=Sm   sc=Common     INTERSECTION | 
| 1294 |  |  |  |  |  |  | 0x00222A =>      12,   #  ∪  gc=Sm   sc=Common     UNION | 
| 1295 |  |  |  |  |  |  | 0x00222B =>      57,   #  ∫  gc=Sm   sc=Common     INTEGRAL | 
| 1296 |  |  |  |  |  |  | 0x002234 =>       8,   #  ∴  gc=Sm   sc=Common     THEREFORE | 
| 1297 |  |  |  |  |  |  | 0x00223C =>      93,   #  ∼  gc=Sm   sc=Common     TILDE OPERATOR | 
| 1298 |  |  |  |  |  |  | 0x002261 =>      48,   #  ≡  gc=Sm   sc=Common     IDENTICAL TO | 
| 1299 |  |  |  |  |  |  | 0x002263 =>       8,   #  ≣  gc=Sm   sc=Common     STRICTLY EQUIVALENT TO | 
| 1300 |  |  |  |  |  |  | 0x002265 =>       9,   #  ≥  gc=Sm   sc=Common     GREATER-THAN OR EQUAL TO | 
| 1301 |  |  |  |  |  |  | 0x002266 =>      28,   #  ≦  gc=Sm   sc=Common     LESS-THAN OVER EQUAL TO | 
| 1302 |  |  |  |  |  |  | 0x002267 =>       6,   #  ≧  gc=Sm   sc=Common     GREATER-THAN OVER EQUAL TO | 
| 1303 |  |  |  |  |  |  | 0x002282 =>      19,   #  ⊂  gc=Sm   sc=Common     SUBSET OF | 
| 1304 |  |  |  |  |  |  | 0x0022EE =>      11,   #  ⋮  gc=Sm   sc=Common     VERTICAL ELLIPSIS | 
| 1305 |  |  |  |  |  |  | 0x0022F0 =>       1,   #  ⋰  gc=Sm   sc=Common     UP RIGHT DIAGONAL ELLIPSIS | 
| 1306 |  |  |  |  |  |  | 0x0025B3 =>      34,   #  △  gc=So   sc=Common     WHITE UP-POINTING TRIANGLE | 
| 1307 |  |  |  |  |  |  | 0x00261B =>      36,   #  ☛  gc=So   sc=Common     BLACK RIGHT POINTING INDEX | 
| 1308 |  |  |  |  |  |  | 0x002625 =>       3,   #  ☥  gc=So   sc=Common     ANKH | 
| 1309 |  |  |  |  |  |  | 0x002627 =>       2,   #  ☧  gc=So   sc=Common     CHI RHO | 
| 1310 |  |  |  |  |  |  | 0x00263F =>      10,   #  ☿  gc=So   sc=Common     MERCURY | 
| 1311 |  |  |  |  |  |  | 0x002640 =>      25,   #  ♀  gc=So   sc=Common     FEMALE SIGN | 
| 1312 |  |  |  |  |  |  | 0x002642 =>      26,   #  ♂  gc=So   sc=Common     MALE SIGN | 
| 1313 |  |  |  |  |  |  | 0x002649 =>      12,   #  ♉  gc=So   sc=Common     TAURUS | 
| 1314 |  |  |  |  |  |  | 0x002652 =>       8,   #  ♒  gc=So   sc=Common     AQUARIUS | 
| 1315 |  |  |  |  |  |  | 0x002A7D =>      31,   #  ⩽  gc=Sm   sc=Common     LESS-THAN OR SLANTED EQUAL TO | 
| 1316 |  |  |  |  |  |  | 0x002A7E =>      15,   #  ⩾  gc=Sm   sc=Common     GREATER-THAN OR SLANTED EQUAL TO | 
| 1317 |  |  |  |  |  |  | 0x00266D =>      69,   #  ♭  gc=So   sc=Common     MUSIC FLAT SIGN | 
| 1318 |  |  |  |  |  |  | 0x00266F =>      21,   #  ♯  gc=Sm   sc=Common     MUSIC SHARP SIGN | 
| 1319 |  |  |  |  |  |  | 0x001D134 =>       1,   #  𝄴  gc=So   sc=Common     MUSICAL SYMBOL COMMON TIME | 
| 1320 |  |  |  |  |  |  | 0x001D135 =>       2,   #  𝄵  gc=So   sc=Common     MUSICAL SYMBOL CUT TIME | 
| 1321 |  |  |  |  |  |  | 0x00FFFD =>   10349,   #  �  gc=So   sc=Common     REPLACEMENT CHARACTER | 
| 1322 |  |  |  |  |  |  | 0x0002D0 =>      56,   #  ː  gc=Lm   sc=Common     MODIFIER LETTER TRIANGULAR COLON | 
| 1323 |  |  |  |  |  |  | 0x0000A2 =>      25,   #  ¢  gc=Sc   sc=Common     CENT SIGN | 
| 1324 |  |  |  |  |  |  | 0x0000A3 =>    2775,   #  £  gc=Sc   sc=Common     POUND SIGN | 
| 1325 |  |  |  |  |  |  | 0x0000E1 =>    5526,   #  á  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH ACUTE | 
| 1326 |  |  |  |  |  |  | 0x0000C1 =>       7,   #  Á  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH ACUTE | 
| 1327 |  |  |  |  |  |  | 0x0000E0 =>    2498,   #  à  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH GRAVE | 
| 1328 |  |  |  |  |  |  | 0x0000C0 =>      19,   #  À  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH GRAVE | 
| 1329 |  |  |  |  |  |  | 0x000103 =>     367,   #  ă  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE | 
| 1330 |  |  |  |  |  |  | 0x000102 =>       3,   #  Ă  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH BREVE | 
| 1331 |  |  |  |  |  |  | 0x0000E2 =>    3171,   #  â  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX | 
| 1332 |  |  |  |  |  |  | 0x0000C2 =>     507,   #  Â  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH CIRCUMFLEX | 
| 1333 |  |  |  |  |  |  | 0x0001CE =>      19,   #  ǎ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CARON | 
| 1334 |  |  |  |  |  |  | 0x0000E5 =>     391,   #  å  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH RING ABOVE | 
| 1335 |  |  |  |  |  |  | 0x0000C5 =>     123,   #  Å  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH RING ABOVE | 
| 1336 |  |  |  |  |  |  | 0x0000E4 =>    2792,   #  ä  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DIAERESIS | 
| 1337 |  |  |  |  |  |  | 0x0000C4 =>       9,   #  Ä  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH DIAERESIS | 
| 1338 |  |  |  |  |  |  | 0x0000E3 =>     265,   #  ã  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH TILDE | 
| 1339 |  |  |  |  |  |  | 0x000227 =>      25,   #  ȧ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOT ABOVE | 
| 1340 |  |  |  |  |  |  | 0x000226 =>       2,   #  Ȧ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH DOT ABOVE | 
| 1341 |  |  |  |  |  |  | 0x000101 =>   35015,   #  ā  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH MACRON | 
| 1342 |  |  |  |  |  |  | 0x000100 =>      10,   #  Ā  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH MACRON | 
| 1343 |  |  |  |  |  |  | 0x0000E6 =>   81225,   #  æ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE | 
| 1344 |  |  |  |  |  |  | 0x0000C6 =>   17648,   #  Æ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AE | 
| 1345 |  |  |  |  |  |  | 0x0001FD =>    2180,   #  ǽ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE WITH ACUTE | 
| 1346 |  |  |  |  |  |  | 0x0001FC =>       1,   #  Ǽ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AE WITH ACUTE | 
| 1347 |  |  |  |  |  |  | 0x0001E3 =>     107,   #  ǣ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE WITH MACRON | 
| 1348 |  |  |  |  |  |  | 0x0001E2 =>       1,   #  Ǣ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AE WITH MACRON | 
| 1349 |  |  |  |  |  |  | 0x000251 =>    6291,   #  ɑ  gc=Ll   sc=Latin      LATIN SMALL LETTER ALPHA | 
| 1350 |  |  |  |  |  |  | 0x000252 =>      54,   #  ɒ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED ALPHA | 
| 1351 |  |  |  |  |  |  | 0x001D4B7 =>       1,   #  𝒷  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL B | 
| 1352 |  |  |  |  |  |  | 0x00212C =>       5,   #  ℬ  gc=Lu   sc=Common     SCRIPT CAPITAL B | 
| 1353 |  |  |  |  |  |  | 0x001E03 =>       1,   #  ḃ  gc=Ll   sc=Latin      LATIN SMALL LETTER B WITH DOT ABOVE | 
| 1354 |  |  |  |  |  |  | 0x000180 =>     436,   #  ƀ  gc=Ll   sc=Latin      LATIN SMALL LETTER B WITH STROKE | 
| 1355 |  |  |  |  |  |  | 0x000107 =>      55,   #  ć  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH ACUTE | 
| 1356 |  |  |  |  |  |  | 0x000109 =>       2,   #  ĉ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CIRCUMFLEX | 
| 1357 |  |  |  |  |  |  | 0x00010D =>     123,   #  č  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CARON | 
| 1358 |  |  |  |  |  |  | 0x00010C =>      17,   #  Č  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CARON | 
| 1359 |  |  |  |  |  |  | 0x00010B =>       7,   #  ċ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH DOT ABOVE | 
| 1360 |  |  |  |  |  |  | 0x0000E7 =>    1356,   #  ç  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CEDILLA | 
| 1361 |  |  |  |  |  |  | 0x0000C7 =>      21,   #  Ç  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CEDILLA | 
| 1362 |  |  |  |  |  |  | 0x001E0B =>       2,   #  ḋ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH DOT ABOVE | 
| 1363 |  |  |  |  |  |  | 0x001E11 =>      26,   #  ḑ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH CEDILLA | 
| 1364 |  |  |  |  |  |  | 0x001E0D =>     142,   #  ḍ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH DOT BELOW | 
| 1365 |  |  |  |  |  |  | 0x001E0C =>       5,   #  Ḍ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH DOT BELOW | 
| 1366 |  |  |  |  |  |  | 0x000110 =>    2501,   #  Đ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH STROKE | 
| 1367 |  |  |  |  |  |  | 0x0000F0 =>   39272,   #  ð  gc=Ll   sc=Latin      LATIN SMALL LETTER ETH | 
| 1368 |  |  |  |  |  |  | 0x0000E9 =>   32359,   #  é  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH ACUTE | 
| 1369 |  |  |  |  |  |  | 0x0000C9 =>     193,   #  É  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH ACUTE | 
| 1370 |  |  |  |  |  |  | 0x0000E8 =>    4603,   #  è  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH GRAVE | 
| 1371 |  |  |  |  |  |  | 0x000115 =>    6957,   #  ĕ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH BREVE | 
| 1372 |  |  |  |  |  |  | 0x0000EA =>    2654,   #  ê  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX | 
| 1373 |  |  |  |  |  |  | 0x0000CA =>       5,   #  Ê  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CIRCUMFLEX | 
| 1374 |  |  |  |  |  |  | 0x00011B =>      53,   #  ě  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CARON | 
| 1375 |  |  |  |  |  |  | 0x0000EB =>    1811,   #  ë  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DIAERESIS | 
| 1376 |  |  |  |  |  |  | 0x001EBD =>      10,   #  ẽ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH TILDE | 
| 1377 |  |  |  |  |  |  | 0x000117 =>     137,   #  ė  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DOT ABOVE | 
| 1378 |  |  |  |  |  |  | 0x000229 =>    1163,   #  ȩ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CEDILLA | 
| 1379 |  |  |  |  |  |  | 0x000228 =>       1,   #  Ȩ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CEDILLA | 
| 1380 |  |  |  |  |  |  | 0x000113 =>   11859,   #  ē  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH MACRON | 
| 1381 |  |  |  |  |  |  | 0x000112 =>       1,   #  Ē  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH MACRON | 
| 1382 |  |  |  |  |  |  | 0x001E17 =>       7,   #  ḗ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH MACRON AND ACUTE | 
| 1383 |  |  |  |  |  |  | 0x001EB9 =>       7,   #  ẹ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DOT BELOW | 
| 1384 |  |  |  |  |  |  | 0x001EB8 =>       1,   #  Ẹ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DOT BELOW | 
| 1385 |  |  |  |  |  |  | 0x000259 =>     377,   #  ə  gc=Ll   sc=Latin      LATIN SMALL LETTER SCHWA | 
| 1386 |  |  |  |  |  |  | 0x00025B =>     257,   #  ɛ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN E | 
| 1387 |  |  |  |  |  |  | 0x00025A =>      26,   #  ɚ  gc=Ll   sc=Latin      LATIN SMALL LETTER SCHWA WITH HOOK | 
| 1388 |  |  |  |  |  |  | 0x00025C =>       6,   #  ɜ  gc=Ll   sc=Latin      LATIN SMALL LETTER REVERSED OPEN E | 
| 1389 |  |  |  |  |  |  | 0x001D50A =>      13,   #  𝔊  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL G | 
| 1390 |  |  |  |  |  |  | 0x0001F5 =>       5,   #  ǵ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH ACUTE | 
| 1391 |  |  |  |  |  |  | 0x00011F =>       5,   #  ğ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH BREVE | 
| 1392 |  |  |  |  |  |  | 0x00011E =>       1,   #  Ğ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH BREVE | 
| 1393 |  |  |  |  |  |  | 0x00011D =>       2,   #  ĝ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CIRCUMFLEX | 
| 1394 |  |  |  |  |  |  | 0x0001E7 =>       8,   #  ǧ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CARON | 
| 1395 |  |  |  |  |  |  | 0x000121 =>      10,   #  ġ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH DOT ABOVE | 
| 1396 |  |  |  |  |  |  | 0x001E21 =>       2,   #  ḡ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH MACRON | 
| 1397 |  |  |  |  |  |  | 0x000263 =>    1887,   #  ɣ  gc=Ll   sc=Latin      LATIN SMALL LETTER GAMMA | 
| 1398 |  |  |  |  |  |  | 0x00210E =>      10,   #  ℎ  gc=Ll   sc=Common     PLANCK CONSTANT | 
| 1399 |  |  |  |  |  |  | 0x001D4BD =>       1,   #  𝒽  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL H | 
| 1400 |  |  |  |  |  |  | 0x00210C =>       3,   #  ℌ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL H | 
| 1401 |  |  |  |  |  |  | 0x001D573 =>       4,   #  𝕳  gc=Lu   sc=Common     MATHEMATICAL BOLD FRAKTUR CAPITAL H | 
| 1402 |  |  |  |  |  |  | 0x001E23 =>       1,   #  ḣ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH DOT ABOVE | 
| 1403 |  |  |  |  |  |  | 0x001E22 =>       1,   #  Ḣ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH DOT ABOVE | 
| 1404 |  |  |  |  |  |  | 0x001E25 =>     265,   #  ḥ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH DOT BELOW | 
| 1405 |  |  |  |  |  |  | 0x001E24 =>      33,   #  Ḥ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH DOT BELOW | 
| 1406 |  |  |  |  |  |  | 0x001E2A =>       3,   #  Ḫ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH BREVE BELOW | 
| 1407 |  |  |  |  |  |  | 0x000127 =>      26,   #  ħ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH STROKE | 
| 1408 |  |  |  |  |  |  | 0x002111 =>       2,   #  ℑ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL I | 
| 1409 |  |  |  |  |  |  | 0x0000ED =>    3847,   #  í  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH ACUTE | 
| 1410 |  |  |  |  |  |  | 0x0000CD =>       5,   #  Í  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH ACUTE | 
| 1411 |  |  |  |  |  |  | 0x0000EC =>      78,   #  ì  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH GRAVE | 
| 1412 |  |  |  |  |  |  | 0x00012D =>     350,   #  ĭ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH BREVE | 
| 1413 |  |  |  |  |  |  | 0x00012C =>       3,   #  Ĭ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH BREVE | 
| 1414 |  |  |  |  |  |  | 0x0000EE =>    2216,   #  î  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CIRCUMFLEX | 
| 1415 |  |  |  |  |  |  | 0x0000CE =>       3,   #  Î  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH CIRCUMFLEX | 
| 1416 |  |  |  |  |  |  | 0x0001D0 =>       6,   #  ǐ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CARON | 
| 1417 |  |  |  |  |  |  | 0x0000EF =>    1161,   #  ï  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DIAERESIS | 
| 1418 |  |  |  |  |  |  | 0x000129 =>       2,   #  ĩ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH TILDE | 
| 1419 |  |  |  |  |  |  | 0x00012B =>   13635,   #  ī  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH MACRON | 
| 1420 |  |  |  |  |  |  | 0x00012A =>      10,   #  Ī  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH MACRON | 
| 1421 |  |  |  |  |  |  | 0x00026A =>      36,   #  ɪ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL I | 
| 1422 |  |  |  |  |  |  | 0x000269 =>       1,   #  ɩ  gc=Ll   sc=Latin      LATIN SMALL LETTER IOTA | 
| 1423 |  |  |  |  |  |  | 0x000196 =>       2,   #  Ɩ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER IOTA | 
| 1424 |  |  |  |  |  |  | 0x0001F0 =>       1,   #  ǰ  gc=Ll   sc=Latin      LATIN SMALL LETTER J WITH CARON | 
| 1425 |  |  |  |  |  |  | 0x001E33 =>      59,   #  ḳ  gc=Ll   sc=Latin      LATIN SMALL LETTER K WITH DOT BELOW | 
| 1426 |  |  |  |  |  |  | 0x001E32 =>       9,   #  Ḳ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER K WITH DOT BELOW | 
| 1427 |  |  |  |  |  |  | 0x002113 =>       1,   #  ℓ  gc=Ll   sc=Common     SCRIPT SMALL L | 
| 1428 |  |  |  |  |  |  | 0x002112 =>       1,   #  ℒ  gc=Lu   sc=Common     SCRIPT CAPITAL L | 
| 1429 |  |  |  |  |  |  | 0x001E37 =>      30,   #  ḷ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH DOT BELOW | 
| 1430 |  |  |  |  |  |  | 0x001E3D =>       7,   #  ḽ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW | 
| 1431 |  |  |  |  |  |  | 0x000141 =>      19,   #  Ł  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH STROKE | 
| 1432 |  |  |  |  |  |  | 0x00029F =>       1,   #  ʟ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL L | 
| 1433 |  |  |  |  |  |  | 0x00028E =>      10,   #  ʎ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED Y | 
| 1434 |  |  |  |  |  |  | 0x001E41 =>      16,   #  ṁ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH DOT ABOVE | 
| 1435 |  |  |  |  |  |  | 0x001E43 =>      32,   #  ṃ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH DOT BELOW | 
| 1436 |  |  |  |  |  |  | 0x000144 =>      34,   #  ń  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH ACUTE | 
| 1437 |  |  |  |  |  |  | 0x000148 =>       4,   #  ň  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH CARON | 
| 1438 |  |  |  |  |  |  | 0x0000F1 =>     731,   #  ñ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH TILDE | 
| 1439 |  |  |  |  |  |  | 0x0000D1 =>       1,   #  Ñ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH TILDE | 
| 1440 |  |  |  |  |  |  | 0x001E45 =>     114,   #  ṅ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH DOT ABOVE | 
| 1441 |  |  |  |  |  |  | 0x001E44 =>       2,   #  Ṅ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH DOT ABOVE | 
| 1442 |  |  |  |  |  |  | 0x001E47 =>     167,   #  ṇ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH DOT BELOW | 
| 1443 |  |  |  |  |  |  | 0x001E4B =>      16,   #  ṋ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW | 
| 1444 |  |  |  |  |  |  | 0x000272 =>       6,   #  ɲ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH LEFT HOOK | 
| 1445 |  |  |  |  |  |  | 0x00014B =>     257,   #  ŋ  gc=Ll   sc=Latin      LATIN SMALL LETTER ENG | 
| 1446 |  |  |  |  |  |  | 0x0000F3 =>    3029,   #  ó  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH ACUTE | 
| 1447 |  |  |  |  |  |  | 0x0000D3 =>      16,   #  Ó  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH ACUTE | 
| 1448 |  |  |  |  |  |  | 0x0000F2 =>     101,   #  ò  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH GRAVE | 
| 1449 |  |  |  |  |  |  | 0x00014F =>     142,   #  ŏ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH BREVE | 
| 1450 |  |  |  |  |  |  | 0x00014E =>       1,   #  Ŏ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH BREVE | 
| 1451 |  |  |  |  |  |  | 0x0000F4 =>    3101,   #  ô  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX | 
| 1452 |  |  |  |  |  |  | 0x0000D4 =>       1,   #  Ô  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH CIRCUMFLEX | 
| 1453 |  |  |  |  |  |  | 0x0001D2 =>       9,   #  ǒ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CARON | 
| 1454 |  |  |  |  |  |  | 0x0000F6 =>    5712,   #  ö  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DIAERESIS | 
| 1455 |  |  |  |  |  |  | 0x0000D6 =>      26,   #  Ö  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DIAERESIS | 
| 1456 |  |  |  |  |  |  | 0x00022B =>       7,   #  ȫ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DIAERESIS AND MACRON | 
| 1457 |  |  |  |  |  |  | 0x0000F5 =>      22,   #  õ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH TILDE | 
| 1458 |  |  |  |  |  |  | 0x00022F =>      13,   #  ȯ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOT ABOVE | 
| 1459 |  |  |  |  |  |  | 0x0000F8 =>     476,   #  ø  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE | 
| 1460 |  |  |  |  |  |  | 0x0000D8 =>       5,   #  Ø  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH STROKE | 
| 1461 |  |  |  |  |  |  | 0x0001FF =>      10,   #  ǿ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE AND ACUTE | 
| 1462 |  |  |  |  |  |  | 0x00014D =>   12489,   #  ō  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH MACRON | 
| 1463 |  |  |  |  |  |  | 0x00014C =>      47,   #  Ō  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH MACRON | 
| 1464 |  |  |  |  |  |  | 0x001E53 =>       5,   #  ṓ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH MACRON AND ACUTE | 
| 1465 |  |  |  |  |  |  | 0x001ECD =>       2,   #  ọ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOT BELOW | 
| 1466 |  |  |  |  |  |  | 0x000153 =>    8146,   #  œ  gc=Ll   sc=Latin      LATIN SMALL LIGATURE OE | 
| 1467 |  |  |  |  |  |  | 0x000152 =>     643,   #  Œ  gc=Lu   sc=Latin      LATIN CAPITAL LIGATURE OE | 
| 1468 |  |  |  |  |  |  | 0x000254 =>      76,   #  ɔ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN O | 
| 1469 |  |  |  |  |  |  | 0x002119 =>      35,   #  ℙ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL P | 
| 1470 |  |  |  |  |  |  | 0x001E57 =>       4,   #  ṗ  gc=Ll   sc=Latin      LATIN SMALL LETTER P WITH DOT ABOVE | 
| 1471 |  |  |  |  |  |  | 0x000278 =>     239,   #  ɸ  gc=Ll   sc=Latin      LATIN SMALL LETTER PHI | 
| 1472 |  |  |  |  |  |  | 0x00211C =>       7,   #  ℜ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL R | 
| 1473 |  |  |  |  |  |  | 0x0024C7 =>       8,   #  Ⓡ  gc=So   sc=Common     CIRCLED LATIN CAPITAL LETTER R | 
| 1474 |  |  |  |  |  |  | 0x000155 =>       3,   #  ŕ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH ACUTE | 
| 1475 |  |  |  |  |  |  | 0x000159 =>      29,   #  ř  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH CARON | 
| 1476 |  |  |  |  |  |  | 0x001E59 =>       9,   #  ṙ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH DOT ABOVE | 
| 1477 |  |  |  |  |  |  | 0x001E58 =>       1,   #  Ṙ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH DOT ABOVE | 
| 1478 |  |  |  |  |  |  | 0x001E5B =>     159,   #  ṛ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH DOT BELOW | 
| 1479 |  |  |  |  |  |  | 0x001E5A =>       9,   #  Ṛ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH DOT BELOW | 
| 1480 |  |  |  |  |  |  | 0x001E5D =>       2,   #  ṝ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH DOT BELOW AND MACRON | 
| 1481 |  |  |  |  |  |  | 0x000279 =>       8,   #  ɹ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED R | 
| 1482 |  |  |  |  |  |  | 0x00015B =>     114,   #  ś  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH ACUTE | 
| 1483 |  |  |  |  |  |  | 0x00015A =>      29,   #  Ś  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH ACUTE | 
| 1484 |  |  |  |  |  |  | 0x00015D =>       1,   #  ŝ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CIRCUMFLEX | 
| 1485 |  |  |  |  |  |  | 0x000161 =>     227,   #  š  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CARON | 
| 1486 |  |  |  |  |  |  | 0x000160 =>      12,   #  Š  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CARON | 
| 1487 |  |  |  |  |  |  | 0x001E67 =>       1,   #  ṧ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CARON AND DOT ABOVE | 
| 1488 |  |  |  |  |  |  | 0x001E61 =>      10,   #  ṡ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH DOT ABOVE | 
| 1489 |  |  |  |  |  |  | 0x001E60 =>       1,   #  Ṡ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH DOT ABOVE | 
| 1490 |  |  |  |  |  |  | 0x00015F =>      27,   #  ş  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CEDILLA | 
| 1491 |  |  |  |  |  |  | 0x00015E =>       3,   #  Ş  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CEDILLA | 
| 1492 |  |  |  |  |  |  | 0x001E63 =>     110,   #  ṣ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH DOT BELOW | 
| 1493 |  |  |  |  |  |  | 0x001E62 =>      16,   #  Ṣ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH DOT BELOW | 
| 1494 |  |  |  |  |  |  | 0x000283 =>     124,   #  ʃ  gc=Ll   sc=Latin      LATIN SMALL LETTER ESH | 
| 1495 |  |  |  |  |  |  | 0x001E6B =>       2,   #  ṫ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH DOT ABOVE | 
| 1496 |  |  |  |  |  |  | 0x000163 =>       7,   #  ţ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH CEDILLA | 
| 1497 |  |  |  |  |  |  | 0x001E6D =>     433,   #  ṭ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH DOT BELOW | 
| 1498 |  |  |  |  |  |  | 0x001E6C =>       4,   #  Ṭ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH DOT BELOW | 
| 1499 |  |  |  |  |  |  | 0x0000FA =>    2007,   #  ú  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH ACUTE | 
| 1500 |  |  |  |  |  |  | 0x0000DA =>       5,   #  Ú  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH ACUTE | 
| 1501 |  |  |  |  |  |  | 0x0000F9 =>     149,   #  ù  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH GRAVE | 
| 1502 |  |  |  |  |  |  | 0x00016D =>     306,   #  ŭ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH BREVE | 
| 1503 |  |  |  |  |  |  | 0x0000FB =>    1266,   #  û  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CIRCUMFLEX | 
| 1504 |  |  |  |  |  |  | 0x0000DB =>       6,   #  Û  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH CIRCUMFLEX | 
| 1505 |  |  |  |  |  |  | 0x0001D4 =>      11,   #  ǔ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CARON | 
| 1506 |  |  |  |  |  |  | 0x00016F =>      21,   #  ů  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH RING ABOVE | 
| 1507 |  |  |  |  |  |  | 0x0000FC =>    7400,   #  ü  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS | 
| 1508 |  |  |  |  |  |  | 0x0000DC =>      28,   #  Ü  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DIAERESIS | 
| 1509 |  |  |  |  |  |  | 0x0001D8 =>       1,   #  ǘ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE | 
| 1510 |  |  |  |  |  |  | 0x0001D6 =>      13,   #  ǖ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND MACRON | 
| 1511 |  |  |  |  |  |  | 0x000169 =>      12,   #  ũ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH TILDE | 
| 1512 |  |  |  |  |  |  | 0x001E79 =>       1,   #  ṹ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH TILDE AND ACUTE | 
| 1513 |  |  |  |  |  |  | 0x00016B =>    7760,   #  ū  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH MACRON | 
| 1514 |  |  |  |  |  |  | 0x00016A =>       1,   #  Ū  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH MACRON | 
| 1515 |  |  |  |  |  |  | 0x000265 =>       5,   #  ɥ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED H | 
| 1516 |  |  |  |  |  |  | 0x00028A =>      36,   #  ʊ  gc=Ll   sc=Latin      LATIN SMALL LETTER UPSILON | 
| 1517 |  |  |  |  |  |  | 0x00028C =>      20,   #  ʌ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED V | 
| 1518 |  |  |  |  |  |  | 0x001E83 =>       2,   #  ẃ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH ACUTE | 
| 1519 |  |  |  |  |  |  | 0x001E81 =>       1,   #  ẁ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH GRAVE | 
| 1520 |  |  |  |  |  |  | 0x000175 =>       4,   #  ŵ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH CIRCUMFLEX | 
| 1521 |  |  |  |  |  |  | 0x001E89 =>       1,   #  ẉ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH DOT BELOW | 
| 1522 |  |  |  |  |  |  | 0x001D54F =>       4,   #  𝕏  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL X | 
| 1523 |  |  |  |  |  |  | 0x001E8B =>       7,   #  ẋ  gc=Ll   sc=Latin      LATIN SMALL LETTER X WITH DOT ABOVE | 
| 1524 |  |  |  |  |  |  | 0x001E8A =>       2,   #  Ẋ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER X WITH DOT ABOVE | 
| 1525 |  |  |  |  |  |  | 0x001D550 =>       2,   #  𝕐  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL Y | 
| 1526 |  |  |  |  |  |  | 0x0000FD =>     843,   #  ý  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH ACUTE | 
| 1527 |  |  |  |  |  |  | 0x000177 =>      43,   #  ŷ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH CIRCUMFLEX | 
| 1528 |  |  |  |  |  |  | 0x0000FF =>      15,   #  ÿ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH DIAERESIS | 
| 1529 |  |  |  |  |  |  | 0x001EF9 =>       3,   #  ỹ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH TILDE | 
| 1530 |  |  |  |  |  |  | 0x001E8F =>      12,   #  ẏ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH DOT ABOVE | 
| 1531 |  |  |  |  |  |  | 0x001E8E =>       1,   #  Ẏ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH DOT ABOVE | 
| 1532 |  |  |  |  |  |  | 0x000233 =>     359,   #  ȳ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH MACRON | 
| 1533 |  |  |  |  |  |  | 0x00028F =>       2,   #  ʏ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL Y | 
| 1534 |  |  |  |  |  |  | 0x00017A =>       9,   #  ź  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH ACUTE | 
| 1535 |  |  |  |  |  |  | 0x00017E =>      37,   #  ž  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH CARON | 
| 1536 |  |  |  |  |  |  | 0x00017D =>       1,   #  Ž  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH CARON | 
| 1537 |  |  |  |  |  |  | 0x00017C =>      10,   #  ż  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH DOT ABOVE | 
| 1538 |  |  |  |  |  |  | 0x001E93 =>      13,   #  ẓ  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH DOT BELOW | 
| 1539 |  |  |  |  |  |  | 0x000225 =>     455,   #  ȥ  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH HOOK | 
| 1540 |  |  |  |  |  |  | 0x000292 =>   25134,   #  ʒ  gc=Ll   sc=Latin      LATIN SMALL LETTER EZH | 
| 1541 |  |  |  |  |  |  | 0x0001EF =>       1,   #  ǯ  gc=Ll   sc=Latin      LATIN SMALL LETTER EZH WITH CARON | 
| 1542 |  |  |  |  |  |  | 0x00021D =>   44741,   #  ȝ  gc=Ll   sc=Latin      LATIN SMALL LETTER YOGH | 
| 1543 |  |  |  |  |  |  | 0x00021C =>    3566,   #  Ȝ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER YOGH | 
| 1544 |  |  |  |  |  |  | 0x0000FE =>  174256,   #  þ  gc=Ll   sc=Latin      LATIN SMALL LETTER THORN | 
| 1545 |  |  |  |  |  |  | 0x0000DE =>   35163,   #  Þ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER THORN | 
| 1546 |  |  |  |  |  |  | 0x00A764 =>      88,   #  Ꝥ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER THORN WITH STROKE | 
| 1547 |  |  |  |  |  |  | 0x0001BF =>       6,   #  ƿ  gc=Ll   sc=Latin      LATIN LETTER WYNN | 
| 1548 |  |  |  |  |  |  | 0x0001F7 =>      11,   #  Ƿ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER WYNN | 
| 1549 |  |  |  |  |  |  | 0x0002BF =>     220,   #  ʿ  gc=Lm   sc=Common     MODIFIER LETTER LEFT HALF RING | 
| 1550 |  |  |  |  |  |  | 0x0003B1 =>   20478,   #  α  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA | 
| 1551 |  |  |  |  |  |  | 0x000391 =>      99,   #  Α  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA | 
| 1552 |  |  |  |  |  |  | 0x001F01 =>    2960,   #  ἁ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH DASIA | 
| 1553 |  |  |  |  |  |  | 0x001F09 =>     110,   #  Ἁ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA WITH DASIA | 
| 1554 |  |  |  |  |  |  | 0x001F05 =>     986,   #  ἅ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA | 
| 1555 |  |  |  |  |  |  | 0x001F0D =>      21,   #  Ἅ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA | 
| 1556 |  |  |  |  |  |  | 0x001F85 =>       6,   #  ᾅ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI | 
| 1557 |  |  |  |  |  |  | 0x001F03 =>       6,   #  ἃ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA | 
| 1558 |  |  |  |  |  |  | 0x001F81 =>       2,   #  ᾁ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI | 
| 1559 |  |  |  |  |  |  | 0x0003AC =>    4668,   #  ά  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH TONOS | 
| 1560 |  |  |  |  |  |  | 0x000386 =>      13,   #  Ά  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA WITH TONOS | 
| 1561 |  |  |  |  |  |  | 0x001FB4 =>       3,   #  ᾴ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI | 
| 1562 |  |  |  |  |  |  | 0x001F70 =>     187,   #  ὰ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH VARIA | 
| 1563 |  |  |  |  |  |  | 0x001FB0 =>     251,   #  ᾰ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH VRACHY | 
| 1564 |  |  |  |  |  |  | 0x001FB1 =>     281,   #  ᾱ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH MACRON | 
| 1565 |  |  |  |  |  |  | 0x001FB3 =>      15,   #  ᾳ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI | 
| 1566 |  |  |  |  |  |  | 0x0003B2 =>   10489,   #  β  gc=Ll   sc=Greek      GREEK SMALL LETTER BETA | 
| 1567 |  |  |  |  |  |  | 0x000392 =>     213,   #  Β  gc=Lu   sc=Greek      GREEK CAPITAL LETTER BETA | 
| 1568 |  |  |  |  |  |  | 0x0003B3 =>    6960,   #  γ  gc=Ll   sc=Greek      GREEK SMALL LETTER GAMMA | 
| 1569 |  |  |  |  |  |  | 0x000393 =>      56,   #  Γ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER GAMMA | 
| 1570 |  |  |  |  |  |  | 0x0003B4 =>    6429,   #  δ  gc=Ll   sc=Greek      GREEK SMALL LETTER DELTA | 
| 1571 |  |  |  |  |  |  | 0x000394 =>     273,   #  Δ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER DELTA | 
| 1572 |  |  |  |  |  |  | 0x0003B5 =>   11186,   #  ε  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON | 
| 1573 |  |  |  |  |  |  | 0x000395 =>      94,   #  Ε  gc=Lu   sc=Greek      GREEK CAPITAL LETTER EPSILON | 
| 1574 |  |  |  |  |  |  | 0x001F11 =>    1794,   #  ἑ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH DASIA | 
| 1575 |  |  |  |  |  |  | 0x001F19 =>      72,   #  Ἑ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER EPSILON WITH DASIA | 
| 1576 |  |  |  |  |  |  | 0x001F15 =>     664,   #  ἕ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA | 
| 1577 |  |  |  |  |  |  | 0x001F1D =>      14,   #  Ἕ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA | 
| 1578 |  |  |  |  |  |  | 0x001F13 =>       4,   #  ἓ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH DASIA AND VARIA | 
| 1579 |  |  |  |  |  |  | 0x0003AD =>    3216,   #  έ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH TONOS | 
| 1580 |  |  |  |  |  |  | 0x001F72 =>      39,   #  ὲ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH VARIA | 
| 1581 |  |  |  |  |  |  | 0x0003DD =>      35,   #  ϝ  gc=Ll   sc=Greek      GREEK SMALL LETTER DIGAMMA | 
| 1582 |  |  |  |  |  |  | 0x0003DC =>       6,   #  Ϝ  gc=Lu   sc=Greek      GREEK LETTER DIGAMMA | 
| 1583 |  |  |  |  |  |  | 0x0003DB =>       8,   #  ϛ  gc=Ll   sc=Greek      GREEK SMALL LETTER STIGMA | 
| 1584 |  |  |  |  |  |  | 0x0003DA =>       1,   #  Ϛ  gc=Lu   sc=Greek      GREEK LETTER STIGMA | 
| 1585 |  |  |  |  |  |  | 0x0003B6 =>    1179,   #  ζ  gc=Ll   sc=Greek      GREEK SMALL LETTER ZETA | 
| 1586 |  |  |  |  |  |  | 0x000396 =>      21,   #  Ζ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ZETA | 
| 1587 |  |  |  |  |  |  | 0x0003B7 =>    5019,   #  η  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA | 
| 1588 |  |  |  |  |  |  | 0x000397 =>      27,   #  Η  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ETA | 
| 1589 |  |  |  |  |  |  | 0x001F21 =>     362,   #  ἡ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH DASIA | 
| 1590 |  |  |  |  |  |  | 0x001F29 =>      19,   #  Ἡ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ETA WITH DASIA | 
| 1591 |  |  |  |  |  |  | 0x001F25 =>      72,   #  ἥ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH DASIA AND OXIA | 
| 1592 |  |  |  |  |  |  | 0x001F2D =>       3,   #  Ἥ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA | 
| 1593 |  |  |  |  |  |  | 0x001F23 =>      11,   #  ἣ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH DASIA AND VARIA | 
| 1594 |  |  |  |  |  |  | 0x001F91 =>       1,   #  ᾑ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI | 
| 1595 |  |  |  |  |  |  | 0x0003AE =>    3214,   #  ή  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH TONOS | 
| 1596 |  |  |  |  |  |  | 0x001FC4 =>       1,   #  ῄ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI | 
| 1597 |  |  |  |  |  |  | 0x001F74 =>     223,   #  ὴ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH VARIA | 
| 1598 |  |  |  |  |  |  | 0x001FC3 =>      18,   #  ῃ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI | 
| 1599 |  |  |  |  |  |  | 0x0003B8 =>    3968,   #  θ  gc=Ll   sc=Greek      GREEK SMALL LETTER THETA | 
| 1600 |  |  |  |  |  |  | 0x000398 =>      78,   #  Θ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER THETA | 
| 1601 |  |  |  |  |  |  | 0x0003B9 =>   14514,   #  ι  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA | 
| 1602 |  |  |  |  |  |  | 0x000399 =>      51,   #  Ι  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA | 
| 1603 |  |  |  |  |  |  | 0x001F31 =>    1162,   #  ἱ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DASIA | 
| 1604 |  |  |  |  |  |  | 0x001F39 =>      58,   #  Ἱ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA WITH DASIA | 
| 1605 |  |  |  |  |  |  | 0x001F35 =>     253,   #  ἵ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DASIA AND OXIA | 
| 1606 |  |  |  |  |  |  | 0x001F3D =>       7,   #  Ἵ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA | 
| 1607 |  |  |  |  |  |  | 0x001F33 =>       1,   #  ἳ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DASIA AND VARIA | 
| 1608 |  |  |  |  |  |  | 0x0003AF =>    6345,   #  ί  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH TONOS | 
| 1609 |  |  |  |  |  |  | 0x001F76 =>     123,   #  ὶ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH VARIA | 
| 1610 |  |  |  |  |  |  | 0x001FD0 =>    1053,   #  ῐ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH VRACHY | 
| 1611 |  |  |  |  |  |  | 0x0003CA =>      55,   #  ϊ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA | 
| 1612 |  |  |  |  |  |  | 0x000390 =>      42,   #  ΐ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS | 
| 1613 |  |  |  |  |  |  | 0x001FD2 =>       2,   #  ῒ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA | 
| 1614 |  |  |  |  |  |  | 0x001FD1 =>     456,   #  ῑ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH MACRON | 
| 1615 |  |  |  |  |  |  | 0x0003BA =>   10021,   #  κ  gc=Ll   sc=Greek      GREEK SMALL LETTER KAPPA | 
| 1616 |  |  |  |  |  |  | 0x00039A =>     343,   #  Κ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER KAPPA | 
| 1617 |  |  |  |  |  |  | 0x0003BB =>   10745,   #  λ  gc=Ll   sc=Greek      GREEK SMALL LETTER LAMDA | 
| 1618 |  |  |  |  |  |  | 0x00039B =>     124,   #  Λ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER LAMDA | 
| 1619 |  |  |  |  |  |  | 0x0003BC =>   10774,   #  μ  gc=Ll   sc=Greek      GREEK SMALL LETTER MU | 
| 1620 |  |  |  |  |  |  | 0x00039C =>     129,   #  Μ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER MU | 
| 1621 |  |  |  |  |  |  | 0x0003BD =>   17863,   #  ν  gc=Ll   sc=Greek      GREEK SMALL LETTER NU | 
| 1622 |  |  |  |  |  |  | 0x00039D =>      72,   #  Ν  gc=Lu   sc=Greek      GREEK CAPITAL LETTER NU | 
| 1623 |  |  |  |  |  |  | 0x0003BE =>    1433,   #  ξ  gc=Ll   sc=Greek      GREEK SMALL LETTER XI | 
| 1624 |  |  |  |  |  |  | 0x00039E =>      15,   #  Ξ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER XI | 
| 1625 |  |  |  |  |  |  | 0x0003BF =>   22190,   #  ο  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON | 
| 1626 |  |  |  |  |  |  | 0x00039F =>      77,   #  Ο  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON | 
| 1627 |  |  |  |  |  |  | 0x001F41 =>    1187,   #  ὁ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA | 
| 1628 |  |  |  |  |  |  | 0x001F49 =>      28,   #  Ὁ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON WITH DASIA | 
| 1629 |  |  |  |  |  |  | 0x001F45 =>     491,   #  ὅ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA | 
| 1630 |  |  |  |  |  |  | 0x001F4D =>       6,   #  Ὅ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA | 
| 1631 |  |  |  |  |  |  | 0x001F43 =>       4,   #  ὃ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA | 
| 1632 |  |  |  |  |  |  | 0x0003CC =>    8044,   #  ό  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH TONOS | 
| 1633 |  |  |  |  |  |  | 0x001F78 =>     241,   #  ὸ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH VARIA | 
| 1634 |  |  |  |  |  |  | 0x0003C0 =>    9528,   #  π  gc=Ll   sc=Greek      GREEK SMALL LETTER PI | 
| 1635 |  |  |  |  |  |  | 0x0003A0 =>     217,   #  Π  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PI | 
| 1636 |  |  |  |  |  |  | 0x0003D8 =>     125,   #  Ϙ  gc=Lu   sc=Greek      GREEK LETTER ARCHAIC KOPPA | 
| 1637 |  |  |  |  |  |  | 0x0003C1 =>   15430,   #  ρ  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO | 
| 1638 |  |  |  |  |  |  | 0x0003A1 =>      27,   #  Ρ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO | 
| 1639 |  |  |  |  |  |  | 0x001FE5 =>     476,   #  ῥ  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO WITH DASIA | 
| 1640 |  |  |  |  |  |  | 0x001FEC =>       6,   #  Ῥ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO WITH DASIA | 
| 1641 |  |  |  |  |  |  | 0x0003C3 =>   10221,   #  σ  gc=Ll   sc=Greek      GREEK SMALL LETTER SIGMA | 
| 1642 |  |  |  |  |  |  | 0x0003A3 =>     313,   #  Σ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER SIGMA | 
| 1643 |  |  |  |  |  |  | 0x001D6BA =>      13,   #  𝚺  gc=Lu   sc=Common     MATHEMATICAL BOLD CAPITAL SIGMA | 
| 1644 |  |  |  |  |  |  | 0x0003C2 =>   18113,   #  ς  gc=Ll   sc=Greek      GREEK SMALL LETTER FINAL SIGMA | 
| 1645 |  |  |  |  |  |  | 0x0003C4 =>   14119,   #  τ  gc=Ll   sc=Greek      GREEK SMALL LETTER TAU | 
| 1646 |  |  |  |  |  |  | 0x0003A4 =>      89,   #  Τ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER TAU | 
| 1647 |  |  |  |  |  |  | 0x0003C5 =>    4269,   #  υ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON | 
| 1648 |  |  |  |  |  |  | 0x0003A5 =>      31,   #  Υ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON | 
| 1649 |  |  |  |  |  |  | 0x001F51 =>    1287,   #  ὑ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DASIA | 
| 1650 |  |  |  |  |  |  | 0x001F59 =>      14,   #  Ὑ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON WITH DASIA | 
| 1651 |  |  |  |  |  |  | 0x001F55 =>     277,   #  ὕ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA | 
| 1652 |  |  |  |  |  |  | 0x001F5D =>       5,   #  Ὕ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA | 
| 1653 |  |  |  |  |  |  | 0x0003CD =>    2857,   #  ύ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH TONOS | 
| 1654 |  |  |  |  |  |  | 0x001F7A =>      32,   #  ὺ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH VARIA | 
| 1655 |  |  |  |  |  |  | 0x001FE0 =>     771,   #  ῠ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH VRACHY | 
| 1656 |  |  |  |  |  |  | 0x0003CB =>       4,   #  ϋ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA | 
| 1657 |  |  |  |  |  |  | 0x0003B0 =>       1,   #  ΰ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS | 
| 1658 |  |  |  |  |  |  | 0x001FE1 =>     365,   #  ῡ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH MACRON | 
| 1659 |  |  |  |  |  |  | 0x0003C6 =>    4597,   #  φ  gc=Ll   sc=Greek      GREEK SMALL LETTER PHI | 
| 1660 |  |  |  |  |  |  | 0x0003A6 =>      73,   #  Φ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PHI | 
| 1661 |  |  |  |  |  |  | 0x0003C7 =>    3506,   #  χ  gc=Ll   sc=Greek      GREEK SMALL LETTER CHI | 
| 1662 |  |  |  |  |  |  | 0x0003A7 =>      90,   #  Χ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER CHI | 
| 1663 |  |  |  |  |  |  | 0x0003C8 =>     777,   #  ψ  gc=Ll   sc=Greek      GREEK SMALL LETTER PSI | 
| 1664 |  |  |  |  |  |  | 0x0003A8 =>      29,   #  Ψ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PSI | 
| 1665 |  |  |  |  |  |  | 0x0003C9 =>    3872,   #  ω  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA | 
| 1666 |  |  |  |  |  |  | 0x0003A9 =>      72,   #  Ω  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA | 
| 1667 |  |  |  |  |  |  | 0x001F61 =>     177,   #  ὡ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA | 
| 1668 |  |  |  |  |  |  | 0x001F69 =>       4,   #  Ὡ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA WITH DASIA | 
| 1669 |  |  |  |  |  |  | 0x001F65 =>      70,   #  ὥ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA | 
| 1670 |  |  |  |  |  |  | 0x001FA1 =>      47,   #  ᾡ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI | 
| 1671 |  |  |  |  |  |  | 0x0003CE =>     870,   #  ώ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH TONOS | 
| 1672 |  |  |  |  |  |  | 0x001FF4 =>       6,   #  ῴ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI | 
| 1673 |  |  |  |  |  |  | 0x001F7C =>      12,   #  ὼ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH VARIA | 
| 1674 |  |  |  |  |  |  | 0x001FF3 =>     221,   #  ῳ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI | 
| 1675 |  |  |  |  |  |  | 0x0003E1 =>       2,   #  ϡ  gc=Ll   sc=Greek      GREEK SMALL LETTER SAMPI | 
| 1676 |  |  |  |  |  |  | 0x002C84 =>       2,   #  Ⲅ  gc=Lu   sc=Coptic     COPTIC CAPITAL LETTER GAMMA | 
| 1677 |  |  |  |  |  |  | 0x002CA4 =>       7,   #  Ⲥ  gc=Lu   sc=Coptic     COPTIC CAPITAL LETTER SIMA | 
| 1678 |  |  |  |  |  |  | 0x0004A8 =>       1,   #  Ҩ  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ABKHASIAN HA | 
| 1679 |  |  |  |  |  |  | 0x0005D7 =>       6,   #  ח  gc=Lo   sc=Hebrew     HEBREW LETTER HET | 
| 1680 |  |  |  |  |  |  | 0x010907 =>       1,   #  𐤇  gc=Lo   sc=Phoenician PHOENICIAN LETTER HET | 
| 1681 |  |  |  |  |  |  | 0x01090B =>       1,   #  𐤋  gc=Lo   sc=Phoenician PHOENICIAN LETTER LAMD | 
| 1682 |  |  |  |  |  |  | 0x010913 =>       1,   #  𐤓  gc=Lo   sc=Phoenician PHOENICIAN LETTER ROSH | 
| 1683 |  |  |  |  |  |  | 0x00FEE9 =>       1,   #  ﻩ  gc=Lo   sc=Arabic     ARABIC LETTER HEH ISOLATED FORM | 
| 1684 |  |  |  |  |  |  | 0x0016B9 =>       1,   #  ᚹ  gc=Lo   sc=Runic      RUNIC LETTER WUNJO WYNN W | 
| 1685 |  |  |  |  |  |  | 0x00209F =>      32,   # | 
| 1686 |  |  |  |  |  |  | ); | 
| 1687 |  |  |  |  |  |  |  | 
| 1688 |  |  |  |  |  |  | my %elsevier_training = ( | 
| 1689 |  |  |  |  |  |  | 0x00202A =>      21,   # <-> gc=Cf   sc=Common     LEFT-TO-RIGHT EMBEDDING | 
| 1690 |  |  |  |  |  |  | 0x002061 =>     154,   # <-> gc=Cf   sc=Common     FUNCTION APPLICATION | 
| 1691 |  |  |  |  |  |  | 0x00202B =>       1,   # <-> gc=Cf   sc=Common     RIGHT-TO-LEFT EMBEDDING | 
| 1692 |  |  |  |  |  |  | 0x002062 =>     143,   # <-> gc=Cf   sc=Common     INVISIBLE TIMES | 
| 1693 |  |  |  |  |  |  | 0x000092 =>      16,   # <-> gc=Cc   sc=Common | 
| 1694 |  |  |  |  |  |  | 0x000341 =>       6,   # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE TONE MARK | 
| 1695 |  |  |  |  |  |  | 0x000301 =>   57910,   # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE ACCENT | 
| 1696 |  |  |  |  |  |  | 0x000340 =>       4,   # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE TONE MARK | 
| 1697 |  |  |  |  |  |  | 0x000300 =>    1230,   # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE ACCENT | 
| 1698 |  |  |  |  |  |  | 0x000306 =>    1526,   # ◌ ̆  gc=Mn   sc=Inherited  COMBINING BREVE | 
| 1699 |  |  |  |  |  |  | 0x000302 =>    7848,   # ◌ ̂  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT | 
| 1700 |  |  |  |  |  |  | 0x00030C =>    1919,   # ◌ ̌  gc=Mn   sc=Inherited  COMBINING CARON | 
| 1701 |  |  |  |  |  |  | 0x00030A =>     724,   # ◌ ̊  gc=Mn   sc=Inherited  COMBINING RING ABOVE | 
| 1702 |  |  |  |  |  |  | 0x000342 =>       4,   # ◌ ͂  gc=Mn   sc=Inherited  COMBINING GREEK PERISPOMENI | 
| 1703 |  |  |  |  |  |  | 0x000308 =>   13366,   # ◌ ̈  gc=Mn   sc=Inherited  COMBINING DIAERESIS | 
| 1704 |  |  |  |  |  |  | 0x00030B =>     516,   # ◌ ̋  gc=Mn   sc=Inherited  COMBINING DOUBLE ACUTE ACCENT | 
| 1705 |  |  |  |  |  |  | 0x000303 =>    2475,   # ◌ ̃  gc=Mn   sc=Inherited  COMBINING TILDE | 
| 1706 |  |  |  |  |  |  | 0x000307 =>   16962,   # ◌ ̇  gc=Mn   sc=Inherited  COMBINING DOT ABOVE | 
| 1707 |  |  |  |  |  |  | 0x000338 =>      33,   # ◌ ̸  gc=Mn   sc=Inherited  COMBINING LONG SOLIDUS OVERLAY | 
| 1708 |  |  |  |  |  |  | 0x000327 =>    1947,   # ◌ ̧  gc=Mn   sc=Inherited  COMBINING CEDILLA | 
| 1709 |  |  |  |  |  |  | 0x000328 =>     498,   # ◌ ̨  gc=Mn   sc=Inherited  COMBINING OGONEK | 
| 1710 |  |  |  |  |  |  | 0x000304 =>   10335,   # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON | 
| 1711 |  |  |  |  |  |  | 0x00032C =>       9,   # ◌ ̬  gc=Mn   sc=Inherited  COMBINING CARON BELOW | 
| 1712 |  |  |  |  |  |  | 0x00033A =>       6,   # ◌ ̺  gc=Mn   sc=Inherited  COMBINING INVERTED BRIDGE BELOW | 
| 1713 |  |  |  |  |  |  | 0x00033B =>      18,   # ◌ ̻  gc=Mn   sc=Inherited  COMBINING SQUARE BELOW | 
| 1714 |  |  |  |  |  |  | 0x00033C =>       1,   # ◌ ̼  gc=Mn   sc=Inherited  COMBINING SEAGULL BELOW | 
| 1715 |  |  |  |  |  |  | 0x000336 =>     178,   # ◌ ̶  gc=Mn   sc=Inherited  COMBINING LONG STROKE OVERLAY | 
| 1716 |  |  |  |  |  |  | 0x000337 =>      39,   # ◌ ̷  gc=Mn   sc=Inherited  COMBINING SHORT SOLIDUS OVERLAY | 
| 1717 |  |  |  |  |  |  | 0x0020DD =>      13,   # ◌ ⃝  gc=Me   sc=Inherited  COMBINING ENCLOSING CIRCLE | 
| 1718 |  |  |  |  |  |  | 0x0020DF =>       6,   # ◌ ⃟  gc=Me   sc=Inherited  COMBINING ENCLOSING DIAMOND | 
| 1719 |  |  |  |  |  |  | 0x000321 =>       4,   # ◌ ̡  gc=Mn   sc=Inherited  COMBINING PALATALIZED HOOK BELOW | 
| 1720 |  |  |  |  |  |  | 0x000322 =>      27,   # ◌ ̢  gc=Mn   sc=Inherited  COMBINING RETROFLEX HOOK BELOW | 
| 1721 |  |  |  |  |  |  | 0x000323 =>      15,   # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW | 
| 1722 |  |  |  |  |  |  | 0x000326 =>     109,   # ◌ ̦  gc=Mn   sc=Inherited  COMBINING COMMA BELOW | 
| 1723 |  |  |  |  |  |  | 0x000331 =>    1593,   # ◌ ̱  gc=Mn   sc=Inherited  COMBINING MACRON BELOW | 
| 1724 |  |  |  |  |  |  | 0x000335 =>     139,   # ◌ ̵  gc=Mn   sc=Inherited  COMBINING SHORT STROKE OVERLAY | 
| 1725 |  |  |  |  |  |  | 0x0005B9 =>       1,   # ◌ ֹ  gc=Mn   sc=Hebrew     HEBREW POINT HOLAM | 
| 1726 |  |  |  |  |  |  | 0x0005BC =>       1,   # ◌ ּ  gc=Mn   sc=Hebrew     HEBREW POINT DAGESH OR MAPIQ | 
| 1727 |  |  |  |  |  |  | 0x000650 =>       1,   # ◌ ِ  gc=Mn   sc=Inherited  ARABIC KASRA | 
| 1728 |  |  |  |  |  |  | 0x0020D0 =>       3,   # ◌ ⃐  gc=Mn   sc=Inherited  COMBINING LEFT HARPOON ABOVE | 
| 1729 |  |  |  |  |  |  | 0x0020D1 =>       2,   # ◌ ⃑  gc=Mn   sc=Inherited  COMBINING RIGHT HARPOON ABOVE | 
| 1730 |  |  |  |  |  |  | 0x0020D7 =>     239,   # ◌ ⃗  gc=Mn   sc=Inherited  COMBINING RIGHT ARROW ABOVE | 
| 1731 |  |  |  |  |  |  | 0x0020DB =>       7,   # ◌ ⃛  gc=Mn   sc=Inherited  COMBINING THREE DOTS ABOVE | 
| 1732 |  |  |  |  |  |  | 0x003000 =>       1,   # <-> gc=Zs   sc=Common     IDEOGRAPHIC SPACE | 
| 1733 |  |  |  |  |  |  | 0x002002 =>       9,   # <-> gc=Zs   sc=Common     EN SPACE | 
| 1734 |  |  |  |  |  |  | 0x002003 =>      67,   # <-> gc=Zs   sc=Common     EM SPACE | 
| 1735 |  |  |  |  |  |  | 0x002005 =>      12,   # <-> gc=Zs   sc=Common     FOUR-PER-EM SPACE | 
| 1736 |  |  |  |  |  |  | 0x002008 =>  162990,   # <-> gc=Zs   sc=Common     PUNCTUATION SPACE | 
| 1737 |  |  |  |  |  |  | 0x002009 =>    7191,   # <-> gc=Zs   sc=Common     THIN SPACE | 
| 1738 |  |  |  |  |  |  | 0x00200A =>       2,   # <-> gc=Zs   sc=Common     HAIR SPACE | 
| 1739 |  |  |  |  |  |  | 0x0000A0 =>  249770,   # <-> gc=Zs   sc=Common     NO-BREAK SPACE | 
| 1740 |  |  |  |  |  |  | 0x0000B4 =>    1587,   #  ´  gc=Sk   sc=Common     ACUTE ACCENT | 
| 1741 |  |  |  |  |  |  | 0x000384 =>      82,   #  ΄  gc=Sk   sc=Greek      GREEK TONOS | 
| 1742 |  |  |  |  |  |  | 0x0002DC =>     316,   #  ˜  gc=Sk   sc=Common     SMALL TILDE | 
| 1743 |  |  |  |  |  |  | 0x0000AF =>     148,   #  ¯  gc=Sk   sc=Common     MACRON | 
| 1744 |  |  |  |  |  |  | 0x0002D8 =>       8,   #  ˘  gc=Sk   sc=Common     BREVE | 
| 1745 |  |  |  |  |  |  | 0x0002D9 =>      53,   #  ˙  gc=Sk   sc=Common     DOT ABOVE | 
| 1746 |  |  |  |  |  |  | 0x0000A8 =>    1445,   #  ¨  gc=Sk   sc=Common     DIAERESIS | 
| 1747 |  |  |  |  |  |  | 0x000385 =>       4,   #  ΅  gc=Sk   sc=Common     GREEK DIALYTIKA TONOS | 
| 1748 |  |  |  |  |  |  | 0x0002DA =>      69,   #  ˚  gc=Sk   sc=Common     RING ABOVE | 
| 1749 |  |  |  |  |  |  | 0x0002DD =>     239,   #  ˝  gc=Sk   sc=Common     DOUBLE ACUTE ACCENT | 
| 1750 |  |  |  |  |  |  | 0x0000B8 =>      42,   #  ¸  gc=Sk   sc=Common     CEDILLA | 
| 1751 |  |  |  |  |  |  | 0x0002DB =>       2,   #  ˛  gc=Sk   sc=Common     OGONEK | 
| 1752 |  |  |  |  |  |  | 0x002010 =>       8,   #  ‐  gc=Pd   sc=Common     HYPHEN | 
| 1753 |  |  |  |  |  |  | 0x002011 =>      12,   #  ‑  gc=Pd   sc=Common     NON-BREAKING HYPHEN | 
| 1754 |  |  |  |  |  |  | 0x002012 =>       5,   #  ‒  gc=Pd   sc=Common     FIGURE DASH | 
| 1755 |  |  |  |  |  |  | 0x002013 => 5188247,   #  –  gc=Pd   sc=Common     EN DASH | 
| 1756 |  |  |  |  |  |  | 0x002014 =>  702706,   #  —  gc=Pd   sc=Common     EM DASH | 
| 1757 |  |  |  |  |  |  | 0x002015 =>       1,   #  ―  gc=Pd   sc=Common     HORIZONTAL BAR | 
| 1758 |  |  |  |  |  |  | 0x0000A1 =>     742,   #  ¡  gc=Po   sc=Common     INVERTED EXCLAMATION MARK | 
| 1759 |  |  |  |  |  |  | 0x0000BF =>      92,   #  ¿  gc=Po   sc=Common     INVERTED QUESTION MARK | 
| 1760 |  |  |  |  |  |  | 0x002024 =>      41,   #  ․  gc=Po   sc=Common     ONE DOT LEADER | 
| 1761 |  |  |  |  |  |  | 0x002025 =>       2,   #  ‥  gc=Po   sc=Common     TWO DOT LEADER | 
| 1762 |  |  |  |  |  |  | 0x002026 =>   58545,   #  …  gc=Po   sc=Common     HORIZONTAL ELLIPSIS | 
| 1763 |  |  |  |  |  |  | 0x0000B7 =>  101123,   #  ·  gc=Po   sc=Common     MIDDLE DOT | 
| 1764 |  |  |  |  |  |  | 0x000387 =>       2,   #  ·  gc=Po   sc=Common     GREEK ANO TELEIA | 
| 1765 |  |  |  |  |  |  | 0x002018 =>  312098,   #  ‘  gc=Pi   sc=Common     LEFT SINGLE QUOTATION MARK | 
| 1766 |  |  |  |  |  |  | 0x002019 => 1345093,   #  ’  gc=Pf   sc=Common     RIGHT SINGLE QUOTATION MARK | 
| 1767 |  |  |  |  |  |  | 0x002039 =>       7,   #  ‹  gc=Pi   sc=Common     SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 
| 1768 |  |  |  |  |  |  | 0x00203A =>       5,   #  ›  gc=Pf   sc=Common     SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 
| 1769 |  |  |  |  |  |  | 0x00201C =>  807297,   #  “  gc=Pi   sc=Common     LEFT DOUBLE QUOTATION MARK | 
| 1770 |  |  |  |  |  |  | 0x00201D =>  810658,   #  ”  gc=Pf   sc=Common     RIGHT DOUBLE QUOTATION MARK | 
| 1771 |  |  |  |  |  |  | 0x00201E =>       3,   #  „  gc=Ps   sc=Common     DOUBLE LOW-9 QUOTATION MARK | 
| 1772 |  |  |  |  |  |  | 0x00201F =>       4,   #  ‟  gc=Pi   sc=Common     DOUBLE HIGH-REVERSED-9 QUOTATION MARK | 
| 1773 |  |  |  |  |  |  | 0x0000AB =>    1198,   #  «  gc=Pi   sc=Common     LEFT-POINTING DOUBLE ANGLE QUOTATION MARK | 
| 1774 |  |  |  |  |  |  | 0x0000BB =>    3705,   #  »  gc=Pf   sc=Common     RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK | 
| 1775 |  |  |  |  |  |  | 0x0024A0 =>       1,   #  ⒠  gc=So   sc=Common     PARENTHESIZED LATIN SMALL LETTER E | 
| 1776 |  |  |  |  |  |  | 0x0024B4 =>       1,   #  ⒴  gc=So   sc=Common     PARENTHESIZED LATIN SMALL LETTER Y | 
| 1777 |  |  |  |  |  |  | 0x00FE38 =>       2,   #  ︸ gc=Pe   sc=Common     PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET | 
| 1778 |  |  |  |  |  |  | 0x002985 =>       2,   #  ⦅  gc=Ps   sc=Common     LEFT WHITE PARENTHESIS | 
| 1779 |  |  |  |  |  |  | 0x002986 =>       2,   #  ⦆  gc=Pe   sc=Common     RIGHT WHITE PARENTHESIS | 
| 1780 |  |  |  |  |  |  | 0x002329 =>    3419,   #  〈 gc=Ps   sc=Common     LEFT-POINTING ANGLE BRACKET | 
| 1781 |  |  |  |  |  |  | 0x00232A =>    3428,   #  〉 gc=Pe   sc=Common     RIGHT-POINTING ANGLE BRACKET | 
| 1782 |  |  |  |  |  |  | 0x00300A =>      19,   #  《 gc=Ps   sc=Common     LEFT DOUBLE ANGLE BRACKET | 
| 1783 |  |  |  |  |  |  | 0x00300B =>      24,   #  》 gc=Pe   sc=Common     RIGHT DOUBLE ANGLE BRACKET | 
| 1784 |  |  |  |  |  |  | 0x00301A =>   22451,   #  〚 gc=Ps   sc=Common     LEFT WHITE SQUARE BRACKET | 
| 1785 |  |  |  |  |  |  | 0x00301B =>   22452,   #  〛 gc=Pe   sc=Common     RIGHT WHITE SQUARE BRACKET | 
| 1786 |  |  |  |  |  |  | 0x0000A7 =>   77766,   #  §  gc=So   sc=Common     SECTION SIGN | 
| 1787 |  |  |  |  |  |  | 0x0000B6 =>   22670,   #  ¶  gc=So   sc=Common     PILCROW SIGN | 
| 1788 |  |  |  |  |  |  | 0x00204B =>       1,   #  ⁋  gc=Po   sc=Common     REVERSED PILCROW SIGN | 
| 1789 |  |  |  |  |  |  | 0x0000A9 =>    6174,   #  ©  gc=So   sc=Common     COPYRIGHT SIGN | 
| 1790 |  |  |  |  |  |  | 0x0000AE =>   77437,   #  ®  gc=So   sc=Common     REGISTERED SIGN | 
| 1791 |  |  |  |  |  |  | 0x00204E =>  674216,   #  ⁎  gc=Po   sc=Common     LOW ASTERISK | 
| 1792 |  |  |  |  |  |  | 0x00FF05 =>       6,   #  % gc=Po   sc=Common     FULLWIDTH PERCENT SIGN | 
| 1793 |  |  |  |  |  |  | 0x002030 =>    1155,   #  ‰  gc=Po   sc=Common     PER MILLE SIGN | 
| 1794 |  |  |  |  |  |  | 0x002031 =>       8,   #  ‱  gc=Po   sc=Common     PER TEN THOUSAND SIGN | 
| 1795 |  |  |  |  |  |  | 0x002020 =>  294651,   #  †  gc=Po   sc=Common     DAGGER | 
| 1796 |  |  |  |  |  |  | 0x002021 =>  150474,   #  ‡  gc=Po   sc=Common     DOUBLE DAGGER | 
| 1797 |  |  |  |  |  |  | 0x002022 =>  310614,   #  •  gc=Po   sc=Common     BULLET | 
| 1798 |  |  |  |  |  |  | 0x002032 =>  583389,   #  ′  gc=Po   sc=Common     PRIME | 
| 1799 |  |  |  |  |  |  | 0x002033 =>   12193,   #  ″  gc=Po   sc=Common     DOUBLE PRIME | 
| 1800 |  |  |  |  |  |  | 0x002034 =>     240,   #  ‴  gc=Po   sc=Common     TRIPLE PRIME | 
| 1801 |  |  |  |  |  |  | 0x002057 =>      20,   #  ⁗  gc=Po   sc=Common     QUADRUPLE PRIME | 
| 1802 |  |  |  |  |  |  | 0x002035 =>      94,   #  ‵  gc=Po   sc=Common     REVERSED PRIME | 
| 1803 |  |  |  |  |  |  | 0x002036 =>       2,   #  ‶  gc=Po   sc=Common     REVERSED DOUBLE PRIME | 
| 1804 |  |  |  |  |  |  | 0x002041 =>       1,   #  ⁁  gc=Po   sc=Common     CARET INSERTION POINT | 
| 1805 |  |  |  |  |  |  | 0x0002BA =>     128,   #  ʺ  gc=Lm   sc=Common     MODIFIER LETTER DOUBLE PRIME | 
| 1806 |  |  |  |  |  |  | 0x0002C4 =>      10,   #  ˄  gc=Sk   sc=Common     MODIFIER LETTER UP ARROWHEAD | 
| 1807 |  |  |  |  |  |  | 0x0002C6 =>     227,   #  ˆ  gc=Lm   sc=Common     MODIFIER LETTER CIRCUMFLEX ACCENT | 
| 1808 |  |  |  |  |  |  | 0x0002C7 =>     138,   #  ˇ  gc=Lm   sc=Common     CARON | 
| 1809 |  |  |  |  |  |  | 0x0002C8 =>     276,   #  ˈ  gc=Lm   sc=Common     MODIFIER LETTER VERTICAL LINE | 
| 1810 |  |  |  |  |  |  | 0x0002C9 =>       1,   #  ˉ  gc=Lm   sc=Common     MODIFIER LETTER MACRON | 
| 1811 |  |  |  |  |  |  | 0x0002D4 =>       1,   #  ˔  gc=Sk   sc=Common     MODIFIER LETTER UP TACK | 
| 1812 |  |  |  |  |  |  | 0x0002E6 =>     211,   #  ˦  gc=Sk   sc=Common     MODIFIER LETTER HIGH TONE BAR | 
| 1813 |  |  |  |  |  |  | 0x0000B0 =>  803529,   #  °  gc=So   sc=Common     DEGREE SIGN | 
| 1814 |  |  |  |  |  |  | 0x002103 =>       2,   #  ℃  gc=So   sc=Common     DEGREE CELSIUS | 
| 1815 |  |  |  |  |  |  | 0x002109 =>     243,   #  ℉  gc=So   sc=Common     DEGREE FAHRENHEIT | 
| 1816 |  |  |  |  |  |  | 0x002118 =>       3,   #  ℘  gc=Sm   sc=Common     SCRIPT CAPITAL P | 
| 1817 |  |  |  |  |  |  | 0x00211E =>      34,   #  ℞  gc=So   sc=Common     PRESCRIPTION TAKE | 
| 1818 |  |  |  |  |  |  | 0x002127 =>       9,   #  ℧  gc=So   sc=Common     INVERTED OHM SIGN | 
| 1819 |  |  |  |  |  |  | 0x002129 =>       8,   #  ℩  gc=So   sc=Common     TURNED GREEK SMALL LETTER IOTA | 
| 1820 |  |  |  |  |  |  | 0x002190 =>     916,   #  ←  gc=Sm   sc=Common     LEFTWARDS ARROW | 
| 1821 |  |  |  |  |  |  | 0x002192 =>   62151,   #  →  gc=Sm   sc=Common     RIGHTWARDS ARROW | 
| 1822 |  |  |  |  |  |  | 0x00219B =>       3,   #  ↛  gc=Sm   sc=Common     RIGHTWARDS ARROW WITH STROKE | 
| 1823 |  |  |  |  |  |  | 0x002191 =>   23155,   #  ↑  gc=Sm   sc=Common     UPWARDS ARROW | 
| 1824 |  |  |  |  |  |  | 0x002193 =>   23655,   #  ↓  gc=Sm   sc=Common     DOWNWARDS ARROW | 
| 1825 |  |  |  |  |  |  | 0x002194 =>    3234,   #  ↔  gc=Sm   sc=Common     LEFT RIGHT ARROW | 
| 1826 |  |  |  |  |  |  | 0x002195 =>      81,   #  ↕  gc=So   sc=Common     UP DOWN ARROW | 
| 1827 |  |  |  |  |  |  | 0x002196 =>      77,   #  ↖  gc=So   sc=Common     NORTH WEST ARROW | 
| 1828 |  |  |  |  |  |  | 0x002197 =>     400,   #  ↗  gc=So   sc=Common     NORTH EAST ARROW | 
| 1829 |  |  |  |  |  |  | 0x002198 =>     419,   #  ↘  gc=So   sc=Common     SOUTH EAST ARROW | 
| 1830 |  |  |  |  |  |  | 0x002199 =>      23,   #  ↙  gc=So   sc=Common     SOUTH WEST ARROW | 
| 1831 |  |  |  |  |  |  | 0x00219E =>       1,   #  ↞  gc=So   sc=Common     LEFTWARDS TWO HEADED ARROW | 
| 1832 |  |  |  |  |  |  | 0x0021A0 =>       5,   #  ↠  gc=Sm   sc=Common     RIGHTWARDS TWO HEADED ARROW | 
| 1833 |  |  |  |  |  |  | 0x0021A6 =>      12,   #  ↦  gc=Sm   sc=Common     RIGHTWARDS ARROW FROM BAR | 
| 1834 |  |  |  |  |  |  | 0x0021AB =>       1,   #  ↫  gc=So   sc=Common     LEFTWARDS ARROW WITH LOOP | 
| 1835 |  |  |  |  |  |  | 0x0021AD =>       3,   #  ↭  gc=So   sc=Common     LEFT RIGHT WAVE ARROW | 
| 1836 |  |  |  |  |  |  | 0x0021B0 =>       1,   #  ↰  gc=So   sc=Common     UPWARDS ARROW WITH TIP LEFTWARDS | 
| 1837 |  |  |  |  |  |  | 0x0021B1 =>       4,   #  ↱  gc=So   sc=Common     UPWARDS ARROW WITH TIP RIGHTWARDS | 
| 1838 |  |  |  |  |  |  | 0x0021B3 =>       1,   #  ↳  gc=So   sc=Common     DOWNWARDS ARROW WITH TIP RIGHTWARDS | 
| 1839 |  |  |  |  |  |  | 0x0021BC =>       1,   #  ↼  gc=So   sc=Common     LEFTWARDS HARPOON WITH BARB UPWARDS | 
| 1840 |  |  |  |  |  |  | 0x0021BD =>       5,   #  ↽  gc=So   sc=Common     LEFTWARDS HARPOON WITH BARB DOWNWARDS | 
| 1841 |  |  |  |  |  |  | 0x0021BE =>       3,   #  ↾  gc=So   sc=Common     UPWARDS HARPOON WITH BARB RIGHTWARDS | 
| 1842 |  |  |  |  |  |  | 0x0021C0 =>     216,   #  ⇀  gc=So   sc=Common     RIGHTWARDS HARPOON WITH BARB UPWARDS | 
| 1843 |  |  |  |  |  |  | 0x0021C1 =>       1,   #  ⇁  gc=So   sc=Common     RIGHTWARDS HARPOON WITH BARB DOWNWARDS | 
| 1844 |  |  |  |  |  |  | 0x0021C2 =>       1,   #  ⇂  gc=So   sc=Common     DOWNWARDS HARPOON WITH BARB RIGHTWARDS | 
| 1845 |  |  |  |  |  |  | 0x0021C3 =>       1,   #  ⇃  gc=So   sc=Common     DOWNWARDS HARPOON WITH BARB LEFTWARDS | 
| 1846 |  |  |  |  |  |  | 0x0021C4 =>     402,   #  ⇄  gc=So   sc=Common     RIGHTWARDS ARROW OVER LEFTWARDS ARROW | 
| 1847 |  |  |  |  |  |  | 0x0021C5 =>       7,   #  ⇅  gc=So   sc=Common     UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW | 
| 1848 |  |  |  |  |  |  | 0x0021C6 =>      78,   #  ⇆  gc=So   sc=Common     LEFTWARDS ARROW OVER RIGHTWARDS ARROW | 
| 1849 |  |  |  |  |  |  | 0x0021C8 =>      72,   #  ⇈  gc=So   sc=Common     UPWARDS PAIRED ARROWS | 
| 1850 |  |  |  |  |  |  | 0x0021C9 =>       8,   #  ⇉  gc=So   sc=Common     RIGHTWARDS PAIRED ARROWS | 
| 1851 |  |  |  |  |  |  | 0x0021CA =>      72,   #  ⇊  gc=So   sc=Common     DOWNWARDS PAIRED ARROWS | 
| 1852 |  |  |  |  |  |  | 0x0021CB =>      44,   #  ⇋  gc=So   sc=Common     LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON | 
| 1853 |  |  |  |  |  |  | 0x0021CC =>     445,   #  ⇌  gc=So   sc=Common     RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON | 
| 1854 |  |  |  |  |  |  | 0x0021D0 =>      86,   #  ⇐  gc=So   sc=Common     LEFTWARDS DOUBLE ARROW | 
| 1855 |  |  |  |  |  |  | 0x0021D1 =>     918,   #  ⇑  gc=So   sc=Common     UPWARDS DOUBLE ARROW | 
| 1856 |  |  |  |  |  |  | 0x0021D2 =>    1367,   #  ⇒  gc=Sm   sc=Common     RIGHTWARDS DOUBLE ARROW | 
| 1857 |  |  |  |  |  |  | 0x0021D3 =>     841,   #  ⇓  gc=So   sc=Common     DOWNWARDS DOUBLE ARROW | 
| 1858 |  |  |  |  |  |  | 0x0021D4 =>     689,   #  ⇔  gc=Sm   sc=Common     LEFT RIGHT DOUBLE ARROW | 
| 1859 |  |  |  |  |  |  | 0x0021D5 =>       2,   #  ⇕  gc=So   sc=Common     UP DOWN DOUBLE ARROW | 
| 1860 |  |  |  |  |  |  | 0x0021DB =>       4,   #  ⇛  gc=So   sc=Common     RIGHTWARDS TRIPLE ARROW | 
| 1861 |  |  |  |  |  |  | 0x0021DD =>       1,   #  ⇝  gc=So   sc=Common     RIGHTWARDS SQUIGGLE ARROW | 
| 1862 |  |  |  |  |  |  | 0x0021F5 =>       6,   #  ⇵  gc=Sm   sc=Common     DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW | 
| 1863 |  |  |  |  |  |  | 0x002200 =>     185,   #  ∀  gc=Sm   sc=Common     FOR ALL | 
| 1864 |  |  |  |  |  |  | 0x002201 =>       1,   #  ∁  gc=Sm   sc=Common     COMPLEMENT | 
| 1865 |  |  |  |  |  |  | 0x002202 =>    5852,   #  ∂  gc=Sm   sc=Common     PARTIAL DIFFERENTIAL | 
| 1866 |  |  |  |  |  |  | 0x002203 =>      39,   #  ∃  gc=Sm   sc=Common     THERE EXISTS | 
| 1867 |  |  |  |  |  |  | 0x002205 =>    1583,   #  ∅  gc=Sm   sc=Common     EMPTY SET | 
| 1868 |  |  |  |  |  |  | 0x002206 =>      12,   #  ∆  gc=Sm   sc=Common     INCREMENT | 
| 1869 |  |  |  |  |  |  | 0x002207 =>     916,   #  ∇  gc=Sm   sc=Common     NABLA | 
| 1870 |  |  |  |  |  |  | 0x002208 =>    1059,   #  ∈  gc=Sm   sc=Common     ELEMENT OF | 
| 1871 |  |  |  |  |  |  | 0x002209 =>      31,   #  ∉  gc=Sm   sc=Common     NOT AN ELEMENT OF | 
| 1872 |  |  |  |  |  |  | 0x00220A =>       3,   #  ∊  gc=Sm   sc=Common     SMALL ELEMENT OF | 
| 1873 |  |  |  |  |  |  | 0x00220B =>      18,   #  ∋  gc=Sm   sc=Common     CONTAINS AS MEMBER | 
| 1874 |  |  |  |  |  |  | 0x00220C =>       1,   #  ∌  gc=Sm   sc=Common     DOES NOT CONTAIN AS MEMBER | 
| 1875 |  |  |  |  |  |  | 0x00220D =>       4,   #  ∍  gc=Sm   sc=Common     SMALL CONTAINS AS MEMBER | 
| 1876 |  |  |  |  |  |  | 0x00220F =>     659,   #  ∏  gc=Sm   sc=Common     N-ARY PRODUCT | 
| 1877 |  |  |  |  |  |  | 0x002210 =>      10,   #  ∐  gc=Sm   sc=Common     N-ARY COPRODUCT | 
| 1878 |  |  |  |  |  |  | 0x002211 =>   10654,   #  ∑  gc=Sm   sc=Common     N-ARY SUMMATION | 
| 1879 |  |  |  |  |  |  | 0x0000B1 => 4564745,   #  ±  gc=Sm   sc=Common     PLUS-MINUS SIGN | 
| 1880 |  |  |  |  |  |  | 0x0000F7 =>    1835,   #  ÷  gc=Sm   sc=Common     DIVISION SIGN | 
| 1881 |  |  |  |  |  |  | 0x0000D7 =>  700976,   #  ×  gc=Sm   sc=Common     MULTIPLICATION SIGN | 
| 1882 |  |  |  |  |  |  | 0x00FF1C =>       1,   #  < gc=Sm   sc=Common     FULLWIDTH LESS-THAN SIGN | 
| 1883 |  |  |  |  |  |  | 0x00226E =>       4,   #  ≮  gc=Sm   sc=Common     NOT LESS-THAN | 
| 1884 |  |  |  |  |  |  | 0x00FF1D =>      11,   #  = gc=Sm   sc=Common     FULLWIDTH EQUALS SIGN | 
| 1885 |  |  |  |  |  |  | 0x002260 =>    1888,   #  ≠  gc=Sm   sc=Common     NOT EQUAL TO | 
| 1886 |  |  |  |  |  |  | 0x00226F =>      12,   #  ≯  gc=Sm   sc=Common     NOT GREATER-THAN | 
| 1887 |  |  |  |  |  |  | 0x0000AC =>      36,   #  ¬  gc=Sm   sc=Common     NOT SIGN | 
| 1888 |  |  |  |  |  |  | 0x0000A6 =>     150,   #  ¦  gc=So   sc=Common     BROKEN BAR | 
| 1889 |  |  |  |  |  |  | 0x002016 =>    3435,   #  ‖  gc=Po   sc=Common     DOUBLE VERTICAL LINE | 
| 1890 |  |  |  |  |  |  | 0x002212 => 1989514,   #  −  gc=Sm   sc=Common     MINUS SIGN | 
| 1891 |  |  |  |  |  |  | 0x002213 =>     158,   #  ∓  gc=Sm   sc=Common     MINUS-OR-PLUS SIGN | 
| 1892 |  |  |  |  |  |  | 0x002214 =>       3,   #  ∔  gc=Sm   sc=Common     DOT PLUS | 
| 1893 |  |  |  |  |  |  | 0x002215 =>      13,   #  ∕  gc=Sm   sc=Common     DIVISION SLASH | 
| 1894 |  |  |  |  |  |  | 0x002216 =>       4,   #  ∖  gc=Sm   sc=Common     SET MINUS | 
| 1895 |  |  |  |  |  |  | 0x002217 =>  579784,   #  ∗  gc=Sm   sc=Common     ASTERISK OPERATOR | 
| 1896 |  |  |  |  |  |  | 0x002218 =>    2540,   #  ∘  gc=Sm   sc=Common     RING OPERATOR | 
| 1897 |  |  |  |  |  |  | 0x002219 =>     784,   #  ∙  gc=Sm   sc=Common     BULLET OPERATOR | 
| 1898 |  |  |  |  |  |  | 0x00221A =>    3316,   #  √  gc=Sm   sc=Common     SQUARE ROOT | 
| 1899 |  |  |  |  |  |  | 0x00221D =>     909,   #  ∝  gc=Sm   sc=Common     PROPORTIONAL TO | 
| 1900 |  |  |  |  |  |  | 0x00221E =>    6138,   #  ∞  gc=Sm   sc=Common     INFINITY | 
| 1901 |  |  |  |  |  |  | 0x002220 =>     188,   #  ∠  gc=Sm   sc=Common     ANGLE | 
| 1902 |  |  |  |  |  |  | 0x002222 =>      19,   #  ∢  gc=Sm   sc=Common     SPHERICAL ANGLE | 
| 1903 |  |  |  |  |  |  | 0x002223 =>    3115,   #  ∣  gc=Sm   sc=Common     DIVIDES | 
| 1904 |  |  |  |  |  |  | 0x002225 =>   26293,   #  ∥  gc=Sm   sc=Common     PARALLEL TO | 
| 1905 |  |  |  |  |  |  | 0x002227 =>     936,   #  ∧  gc=Sm   sc=Common     LOGICAL AND | 
| 1906 |  |  |  |  |  |  | 0x002228 =>      34,   #  ∨  gc=Sm   sc=Common     LOGICAL OR | 
| 1907 |  |  |  |  |  |  | 0x002229 =>     216,   #  ∩  gc=Sm   sc=Common     INTERSECTION | 
| 1908 |  |  |  |  |  |  | 0x00222A =>     136,   #  ∪  gc=Sm   sc=Common     UNION | 
| 1909 |  |  |  |  |  |  | 0x00222B =>    4054,   #  ∫  gc=Sm   sc=Common     INTEGRAL | 
| 1910 |  |  |  |  |  |  | 0x00222C =>       1,   #  ∬  gc=Sm   sc=Common     DOUBLE INTEGRAL | 
| 1911 |  |  |  |  |  |  | 0x00222E =>      27,   #  ∮  gc=Sm   sc=Common     CONTOUR INTEGRAL | 
| 1912 |  |  |  |  |  |  | 0x00222F =>       2,   #  ∯  gc=Sm   sc=Common     SURFACE INTEGRAL | 
| 1913 |  |  |  |  |  |  | 0x002234 =>      54,   #  ∴  gc=Sm   sc=Common     THEREFORE | 
| 1914 |  |  |  |  |  |  | 0x002235 =>       2,   #  ∵  gc=Sm   sc=Common     BECAUSE | 
| 1915 |  |  |  |  |  |  | 0x002237 =>     973,   #  ∷  gc=Sm   sc=Common     PROPORTION | 
| 1916 |  |  |  |  |  |  | 0x002238 =>      30,   #  ∸  gc=Sm   sc=Common     DOT MINUS | 
| 1917 |  |  |  |  |  |  | 0x00223C =>  106319,   #  ∼  gc=Sm   sc=Common     TILDE OPERATOR | 
| 1918 |  |  |  |  |  |  | 0x00223D =>     154,   #  ∽  gc=Sm   sc=Common     REVERSED TILDE | 
| 1919 |  |  |  |  |  |  | 0x00223E =>      46,   #  ∾  gc=Sm   sc=Common     INVERTED LAZY S | 
| 1920 |  |  |  |  |  |  | 0x002242 =>       3,   #  ≂  gc=Sm   sc=Common     MINUS TILDE | 
| 1921 |  |  |  |  |  |  | 0x002243 =>     437,   #  ≃  gc=Sm   sc=Common     ASYMPTOTICALLY EQUAL TO | 
| 1922 |  |  |  |  |  |  | 0x002245 =>    1678,   #  ≅  gc=Sm   sc=Common     APPROXIMATELY EQUAL TO | 
| 1923 |  |  |  |  |  |  | 0x002248 =>   16602,   #  ≈  gc=Sm   sc=Common     ALMOST EQUAL TO | 
| 1924 |  |  |  |  |  |  | 0x002249 =>       1,   #  ≉  gc=Sm   sc=Common     NOT ALMOST EQUAL TO | 
| 1925 |  |  |  |  |  |  | 0x00224A =>      18,   #  ≊  gc=Sm   sc=Common     ALMOST EQUAL OR EQUAL TO | 
| 1926 |  |  |  |  |  |  | 0x00224B =>       1,   #  ≋  gc=Sm   sc=Common     TRIPLE TILDE | 
| 1927 |  |  |  |  |  |  | 0x00224C =>      12,   #  ≌  gc=Sm   sc=Common     ALL EQUAL TO | 
| 1928 |  |  |  |  |  |  | 0x00224D =>       1,   #  ≍  gc=Sm   sc=Common     EQUIVALENT TO | 
| 1929 |  |  |  |  |  |  | 0x00224F =>       1,   #  ≏  gc=Sm   sc=Common     DIFFERENCE BETWEEN | 
| 1930 |  |  |  |  |  |  | 0x002250 =>     318,   #  ≐  gc=Sm   sc=Common     APPROACHES THE LIMIT | 
| 1931 |  |  |  |  |  |  | 0x002251 =>       4,   #  ≑  gc=Sm   sc=Common     GEOMETRICALLY EQUAL TO | 
| 1932 |  |  |  |  |  |  | 0x002252 =>      56,   #  ≒  gc=Sm   sc=Common     APPROXIMATELY EQUAL TO OR THE IMAGE OF | 
| 1933 |  |  |  |  |  |  | 0x002253 =>       2,   #  ≓  gc=Sm   sc=Common     IMAGE OF OR APPROXIMATELY EQUAL TO | 
| 1934 |  |  |  |  |  |  | 0x002254 =>      23,   #  ≔  gc=Sm   sc=Common     COLON EQUALS | 
| 1935 |  |  |  |  |  |  | 0x002255 =>       4,   #  ≕  gc=Sm   sc=Common     EQUALS COLON | 
| 1936 |  |  |  |  |  |  | 0x002256 =>       1,   #  ≖  gc=Sm   sc=Common     RING IN EQUAL TO | 
| 1937 |  |  |  |  |  |  | 0x002259 =>       8,   #  ≙  gc=Sm   sc=Common     ESTIMATES | 
| 1938 |  |  |  |  |  |  | 0x00225C =>      41,   #  ≜  gc=Sm   sc=Common     DELTA EQUAL TO | 
| 1939 |  |  |  |  |  |  | 0x002261 =>     729,   #  ≡  gc=Sm   sc=Common     IDENTICAL TO | 
| 1940 |  |  |  |  |  |  | 0x002262 =>      34,   #  ≢  gc=Sm   sc=Common     NOT IDENTICAL TO | 
| 1941 |  |  |  |  |  |  | 0x002264 =>  143271,   #  ≤  gc=Sm   sc=Common     LESS-THAN OR EQUAL TO | 
| 1942 |  |  |  |  |  |  | 0x002270 =>       2,   #  ≰  gc=Sm   sc=Common     NEITHER LESS-THAN NOR EQUAL TO | 
| 1943 |  |  |  |  |  |  | 0x002265 =>  251048,   #  ≥  gc=Sm   sc=Common     GREATER-THAN OR EQUAL TO | 
| 1944 |  |  |  |  |  |  | 0x002271 =>      11,   #  ≱  gc=Sm   sc=Common     NEITHER GREATER-THAN NOR EQUAL TO | 
| 1945 |  |  |  |  |  |  | 0x002266 =>     723,   #  ≦  gc=Sm   sc=Common     LESS-THAN OVER EQUAL TO | 
| 1946 |  |  |  |  |  |  | 0x002267 =>    1249,   #  ≧  gc=Sm   sc=Common     GREATER-THAN OVER EQUAL TO | 
| 1947 |  |  |  |  |  |  | 0x00226A =>    1100,   #  ≪  gc=Sm   sc=Common     MUCH LESS-THAN | 
| 1948 |  |  |  |  |  |  | 0x00226B =>    1852,   #  ≫  gc=Sm   sc=Common     MUCH GREATER-THAN | 
| 1949 |  |  |  |  |  |  | 0x002272 =>      84,   #  ≲  gc=Sm   sc=Common     LESS-THAN OR EQUIVALENT TO | 
| 1950 |  |  |  |  |  |  | 0x002273 =>      48,   #  ≳  gc=Sm   sc=Common     GREATER-THAN OR EQUIVALENT TO | 
| 1951 |  |  |  |  |  |  | 0x002276 =>       5,   #  ≶  gc=Sm   sc=Common     LESS-THAN OR GREATER-THAN | 
| 1952 |  |  |  |  |  |  | 0x002277 =>      15,   #  ≷  gc=Sm   sc=Common     GREATER-THAN OR LESS-THAN | 
| 1953 |  |  |  |  |  |  | 0x002279 =>       2,   #  ≹  gc=Sm   sc=Common     NEITHER GREATER-THAN NOR LESS-THAN | 
| 1954 |  |  |  |  |  |  | 0x00227A =>       7,   #  ≺  gc=Sm   sc=Common     PRECEDES | 
| 1955 |  |  |  |  |  |  | 0x00227B =>      13,   #  ≻  gc=Sm   sc=Common     SUCCEEDS | 
| 1956 |  |  |  |  |  |  | 0x002281 =>       1,   #  ⊁  gc=Sm   sc=Common     DOES NOT SUCCEED | 
| 1957 |  |  |  |  |  |  | 0x00227D =>       1,   #  ≽  gc=Sm   sc=Common     SUCCEEDS OR EQUAL TO | 
| 1958 |  |  |  |  |  |  | 0x002282 =>      30,   #  ⊂  gc=Sm   sc=Common     SUBSET OF | 
| 1959 |  |  |  |  |  |  | 0x002284 =>       7,   #  ⊄  gc=Sm   sc=Common     NOT A SUBSET OF | 
| 1960 |  |  |  |  |  |  | 0x002283 =>       6,   #  ⊃  gc=Sm   sc=Common     SUPERSET OF | 
| 1961 |  |  |  |  |  |  | 0x002286 =>      12,   #  ⊆  gc=Sm   sc=Common     SUBSET OF OR EQUAL TO | 
| 1962 |  |  |  |  |  |  | 0x002287 =>       6,   #  ⊇  gc=Sm   sc=Common     SUPERSET OF OR EQUAL TO | 
| 1963 |  |  |  |  |  |  | 0x00228E =>      23,   #  ⊎  gc=Sm   sc=Common     MULTISET UNION | 
| 1964 |  |  |  |  |  |  | 0x002293 =>      10,   #  ⊓  gc=Sm   sc=Common     SQUARE CAP | 
| 1965 |  |  |  |  |  |  | 0x002294 =>       8,   #  ⊔  gc=Sm   sc=Common     SQUARE CUP | 
| 1966 |  |  |  |  |  |  | 0x002295 =>     361,   #  ⊕  gc=Sm   sc=Common     CIRCLED PLUS | 
| 1967 |  |  |  |  |  |  | 0x002296 =>      73,   #  ⊖  gc=Sm   sc=Common     CIRCLED MINUS | 
| 1968 |  |  |  |  |  |  | 0x002297 =>     376,   #  ⊗  gc=Sm   sc=Common     CIRCLED TIMES | 
| 1969 |  |  |  |  |  |  | 0x002298 =>      24,   #  ⊘  gc=Sm   sc=Common     CIRCLED DIVISION SLASH | 
| 1970 |  |  |  |  |  |  | 0x002299 =>      50,   #  ⊙  gc=Sm   sc=Common     CIRCLED DOT OPERATOR | 
| 1971 |  |  |  |  |  |  | 0x00229A =>     122,   #  ⊚  gc=Sm   sc=Common     CIRCLED RING OPERATOR | 
| 1972 |  |  |  |  |  |  | 0x00229B =>       7,   #  ⊛  gc=Sm   sc=Common     CIRCLED ASTERISK OPERATOR | 
| 1973 |  |  |  |  |  |  | 0x00229D =>      23,   #  ⊝  gc=Sm   sc=Common     CIRCLED DASH | 
| 1974 |  |  |  |  |  |  | 0x00229E =>      16,   #  ⊞  gc=Sm   sc=Common     SQUARED PLUS | 
| 1975 |  |  |  |  |  |  | 0x00229F =>       2,   #  ⊟  gc=Sm   sc=Common     SQUARED MINUS | 
| 1976 |  |  |  |  |  |  | 0x0022A0 =>      34,   #  ⊠  gc=Sm   sc=Common     SQUARED TIMES | 
| 1977 |  |  |  |  |  |  | 0x0022A1 =>       5,   #  ⊡  gc=Sm   sc=Common     SQUARED DOT OPERATOR | 
| 1978 |  |  |  |  |  |  | 0x0022A2 =>      24,   #  ⊢  gc=Sm   sc=Common     RIGHT TACK | 
| 1979 |  |  |  |  |  |  | 0x0022A3 =>      15,   #  ⊣  gc=Sm   sc=Common     LEFT TACK | 
| 1980 |  |  |  |  |  |  | 0x0022A4 =>      36,   #  ⊤  gc=Sm   sc=Common     DOWN TACK | 
| 1981 |  |  |  |  |  |  | 0x0022A5 =>     656,   #  ⊥  gc=Sm   sc=Common     UP TACK | 
| 1982 |  |  |  |  |  |  | 0x0022B9 =>       3,   #  ⊹  gc=Sm   sc=Common     HERMITIAN CONJUGATE MATRIX | 
| 1983 |  |  |  |  |  |  | 0x0022BB =>     229,   #  ⊻  gc=Sm   sc=Common     XOR | 
| 1984 |  |  |  |  |  |  | 0x0022BC =>      34,   #  ⊼  gc=Sm   sc=Common     NAND | 
| 1985 |  |  |  |  |  |  | 0x0022C0 =>      49,   #  ⋀  gc=Sm   sc=Common     N-ARY LOGICAL AND | 
| 1986 |  |  |  |  |  |  | 0x0022C1 =>       2,   #  ⋁  gc=Sm   sc=Common     N-ARY LOGICAL OR | 
| 1987 |  |  |  |  |  |  | 0x0022C2 =>      12,   #  ⋂  gc=Sm   sc=Common     N-ARY INTERSECTION | 
| 1988 |  |  |  |  |  |  | 0x0022C3 =>       6,   #  ⋃  gc=Sm   sc=Common     N-ARY UNION | 
| 1989 |  |  |  |  |  |  | 0x0022C4 =>       6,   #  ⋄  gc=Sm   sc=Common     DIAMOND OPERATOR | 
| 1990 |  |  |  |  |  |  | 0x0022C5 =>    6131,   #  ⋅  gc=Sm   sc=Common     DOT OPERATOR | 
| 1991 |  |  |  |  |  |  | 0x0022C6 =>      46,   #  ⋆  gc=Sm   sc=Common     STAR OPERATOR | 
| 1992 |  |  |  |  |  |  | 0x0022C7 =>      24,   #  ⋇  gc=Sm   sc=Common     DIVISION TIMES | 
| 1993 |  |  |  |  |  |  | 0x0022C8 =>       9,   #  ⋈  gc=Sm   sc=Common     BOWTIE | 
| 1994 |  |  |  |  |  |  | 0x0022CD =>      12,   #  ⋍  gc=Sm   sc=Common     REVERSED TILDE EQUALS | 
| 1995 |  |  |  |  |  |  | 0x0022CE =>       1,   #  ⋎  gc=Sm   sc=Common     CURLY LOGICAL OR | 
| 1996 |  |  |  |  |  |  | 0x0022CF =>       1,   #  ⋏  gc=Sm   sc=Common     CURLY LOGICAL AND | 
| 1997 |  |  |  |  |  |  | 0x0022D6 =>       4,   #  ⋖  gc=Sm   sc=Common     LESS-THAN WITH DOT | 
| 1998 |  |  |  |  |  |  | 0x0022D7 =>       6,   #  ⋗  gc=Sm   sc=Common     GREATER-THAN WITH DOT | 
| 1999 |  |  |  |  |  |  | 0x0022D8 =>       5,   #  ⋘  gc=Sm   sc=Common     VERY MUCH LESS-THAN | 
| 2000 |  |  |  |  |  |  | 0x0022D9 =>      74,   #  ⋙  gc=Sm   sc=Common     VERY MUCH GREATER-THAN | 
| 2001 |  |  |  |  |  |  | 0x0022DA =>       1,   #  ⋚  gc=Sm   sc=Common     LESS-THAN EQUAL TO OR GREATER-THAN | 
| 2002 |  |  |  |  |  |  | 0x0022EE =>     334,   #  ⋮  gc=Sm   sc=Common     VERTICAL ELLIPSIS | 
| 2003 |  |  |  |  |  |  | 0x0022EF =>    2676,   #  ⋯  gc=Sm   sc=Common     MIDLINE HORIZONTAL ELLIPSIS | 
| 2004 |  |  |  |  |  |  | 0x0022F1 =>      63,   #  ⋱  gc=Sm   sc=Common     DOWN RIGHT DIAGONAL ELLIPSIS | 
| 2005 |  |  |  |  |  |  | 0x002302 =>       1,   #  ⌂  gc=So   sc=Common     HOUSE | 
| 2006 |  |  |  |  |  |  | 0x002308 =>      20,   #  ⌈  gc=Sm   sc=Common     LEFT CEILING | 
| 2007 |  |  |  |  |  |  | 0x002309 =>      76,   #  ⌉  gc=Sm   sc=Common     RIGHT CEILING | 
| 2008 |  |  |  |  |  |  | 0x00230A =>      37,   #  ⌊  gc=Sm   sc=Common     LEFT FLOOR | 
| 2009 |  |  |  |  |  |  | 0x00230B =>      70,   #  ⌋  gc=Sm   sc=Common     RIGHT FLOOR | 
| 2010 |  |  |  |  |  |  | 0x002316 =>      15,   #  ⌖  gc=So   sc=Common     POSITION INDICATOR | 
| 2011 |  |  |  |  |  |  | 0x00231C =>       2,   #  ⌜  gc=So   sc=Common     TOP LEFT CORNER | 
| 2012 |  |  |  |  |  |  | 0x00231D =>      11,   #  ⌝  gc=So   sc=Common     TOP RIGHT CORNER | 
| 2013 |  |  |  |  |  |  | 0x00231F =>       5,   #  ⌟  gc=So   sc=Common     BOTTOM RIGHT CORNER | 
| 2014 |  |  |  |  |  |  | 0x002322 =>      11,   #  ⌢  gc=So   sc=Common     FROWN | 
| 2015 |  |  |  |  |  |  | 0x002323 =>      57,   #  ⌣  gc=So   sc=Common     SMILE | 
| 2016 |  |  |  |  |  |  | 0x002394 =>      20,   #  ⎔  gc=So   sc=Common     SOFTWARE-FUNCTION SYMBOL | 
| 2017 |  |  |  |  |  |  | 0x002500 =>       6,   #  ─  gc=So   sc=Common     BOX DRAWINGS LIGHT HORIZONTAL | 
| 2018 |  |  |  |  |  |  | 0x002534 =>       2,   #  ┴  gc=So   sc=Common     BOX DRAWINGS LIGHT UP AND HORIZONTAL | 
| 2019 |  |  |  |  |  |  | 0x002551 =>       1,   #  ║  gc=So   sc=Common     BOX DRAWINGS DOUBLE VERTICAL | 
| 2020 |  |  |  |  |  |  | 0x002580 =>       2,   #  ▀  gc=So   sc=Common     UPPER HALF BLOCK | 
| 2021 |  |  |  |  |  |  | 0x00258C =>       1,   #  ▌  gc=So   sc=Common     LEFT HALF BLOCK | 
| 2022 |  |  |  |  |  |  | 0x002591 =>       1,   #  ░  gc=So   sc=Common     LIGHT SHADE | 
| 2023 |  |  |  |  |  |  | 0x002592 =>       1,   #  ▒  gc=So   sc=Common     MEDIUM SHADE | 
| 2024 |  |  |  |  |  |  | 0x0025A0 =>   26845,   #  ■  gc=So   sc=Common     BLACK SQUARE | 
| 2025 |  |  |  |  |  |  | 0x0025A1 =>   28366,   #  □  gc=So   sc=Common     WHITE SQUARE | 
| 2026 |  |  |  |  |  |  | 0x0025A4 =>      85,   #  ▤  gc=So   sc=Common     SQUARE WITH HORIZONTAL FILL | 
| 2027 |  |  |  |  |  |  | 0x0025A5 =>      64,   #  ▥  gc=So   sc=Common     SQUARE WITH VERTICAL FILL | 
| 2028 |  |  |  |  |  |  | 0x0025A6 =>       1,   #  ▦  gc=So   sc=Common     SQUARE WITH ORTHOGONAL CROSSHATCH FILL | 
| 2029 |  |  |  |  |  |  | 0x0025A7 =>     188,   #  ▧  gc=So   sc=Common     SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL | 
| 2030 |  |  |  |  |  |  | 0x0025A8 =>     525,   #  ▨  gc=So   sc=Common     SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL | 
| 2031 |  |  |  |  |  |  | 0x0025A9 =>     728,   #  ▩  gc=So   sc=Common     SQUARE WITH DIAGONAL CROSSHATCH FILL | 
| 2032 |  |  |  |  |  |  | 0x0025AA =>     532,   #  ▪  gc=So   sc=Common     BLACK SMALL SQUARE | 
| 2033 |  |  |  |  |  |  | 0x0025AB =>     172,   #  ▫  gc=So   sc=Common     WHITE SMALL SQUARE | 
| 2034 |  |  |  |  |  |  | 0x0025AC =>       2,   #  ▬  gc=So   sc=Common     BLACK RECTANGLE | 
| 2035 |  |  |  |  |  |  | 0x0025AD =>      88,   #  ▭  gc=So   sc=Common     WHITE RECTANGLE | 
| 2036 |  |  |  |  |  |  | 0x0025AF =>       4,   #  ▯  gc=So   sc=Common     WHITE VERTICAL RECTANGLE | 
| 2037 |  |  |  |  |  |  | 0x0025B1 =>       5,   #  ▱  gc=So   sc=Common     WHITE PARALLELOGRAM | 
| 2038 |  |  |  |  |  |  | 0x0025B2 =>     208,   #  ▲  gc=So   sc=Common     BLACK UP-POINTING TRIANGLE | 
| 2039 |  |  |  |  |  |  | 0x0025B3 =>     792,   #  △  gc=So   sc=Common     WHITE UP-POINTING TRIANGLE | 
| 2040 |  |  |  |  |  |  | 0x0025B4 =>   12479,   #  ▴  gc=So   sc=Common     BLACK UP-POINTING SMALL TRIANGLE | 
| 2041 |  |  |  |  |  |  | 0x0025B5 =>    7692,   #  ▵  gc=So   sc=Common     WHITE UP-POINTING SMALL TRIANGLE | 
| 2042 |  |  |  |  |  |  | 0x0025B6 =>     643,   #  ▶  gc=So   sc=Common     BLACK RIGHT-POINTING TRIANGLE | 
| 2043 |  |  |  |  |  |  | 0x0025B7 =>      29,   #  ▷  gc=Sm   sc=Common     WHITE RIGHT-POINTING TRIANGLE | 
| 2044 |  |  |  |  |  |  | 0x0025BA =>       1,   #  ►  gc=So   sc=Common     BLACK RIGHT-POINTING POINTER | 
| 2045 |  |  |  |  |  |  | 0x0025BC =>      45,   #  ▼  gc=So   sc=Common     BLACK DOWN-POINTING TRIANGLE | 
| 2046 |  |  |  |  |  |  | 0x0025BD =>     220,   #  ▽  gc=So   sc=Common     WHITE DOWN-POINTING TRIANGLE | 
| 2047 |  |  |  |  |  |  | 0x0025BE =>    3335,   #  ▾  gc=So   sc=Common     BLACK DOWN-POINTING SMALL TRIANGLE | 
| 2048 |  |  |  |  |  |  | 0x0025BF =>    1557,   #  ▿  gc=So   sc=Common     WHITE DOWN-POINTING SMALL TRIANGLE | 
| 2049 |  |  |  |  |  |  | 0x0025C0 =>     113,   #  ◀  gc=So   sc=Common     BLACK LEFT-POINTING TRIANGLE | 
| 2050 |  |  |  |  |  |  | 0x0025C1 =>      33,   #  ◁  gc=Sm   sc=Common     WHITE LEFT-POINTING TRIANGLE | 
| 2051 |  |  |  |  |  |  | 0x0025C6 =>       9,   #  ◆  gc=So   sc=Common     BLACK DIAMOND | 
| 2052 |  |  |  |  |  |  | 0x0025C7 =>       2,   #  ◇  gc=So   sc=Common     WHITE DIAMOND | 
| 2053 |  |  |  |  |  |  | 0x0025C9 =>       1,   #  ◉  gc=So   sc=Common     FISHEYE | 
| 2054 |  |  |  |  |  |  | 0x0025CA =>     906,   #  ◊  gc=So   sc=Common     LOZENGE | 
| 2055 |  |  |  |  |  |  | 0x0025CB =>   28227,   #  ○  gc=So   sc=Common     WHITE CIRCLE | 
| 2056 |  |  |  |  |  |  | 0x0025CF =>    6925,   #  ●  gc=So   sc=Common     BLACK CIRCLE | 
| 2057 |  |  |  |  |  |  | 0x0025D0 =>      45,   #  ◐  gc=So   sc=Common     CIRCLE WITH LEFT HALF BLACK | 
| 2058 |  |  |  |  |  |  | 0x0025D1 =>      65,   #  ◑  gc=So   sc=Common     CIRCLE WITH RIGHT HALF BLACK | 
| 2059 |  |  |  |  |  |  | 0x0025D2 =>      11,   #  ◒  gc=So   sc=Common     CIRCLE WITH LOWER HALF BLACK | 
| 2060 |  |  |  |  |  |  | 0x0025D3 =>       3,   #  ◓  gc=So   sc=Common     CIRCLE WITH UPPER HALF BLACK | 
| 2061 |  |  |  |  |  |  | 0x0025D8 =>      14,   #  ◘  gc=So   sc=Common     INVERSE BULLET | 
| 2062 |  |  |  |  |  |  | 0x0025E6 =>    5443,   #  ◦  gc=So   sc=Common     WHITE BULLET | 
| 2063 |  |  |  |  |  |  | 0x0025E7 =>       5,   #  ◧  gc=So   sc=Common     SQUARE WITH LEFT HALF BLACK | 
| 2064 |  |  |  |  |  |  | 0x0025E8 =>      13,   #  ◨  gc=So   sc=Common     SQUARE WITH RIGHT HALF BLACK | 
| 2065 |  |  |  |  |  |  | 0x0025E9 =>       9,   #  ◩  gc=So   sc=Common     SQUARE WITH UPPER LEFT DIAGONAL HALF BLACK | 
| 2066 |  |  |  |  |  |  | 0x0025EA =>       9,   #  ◪  gc=So   sc=Common     SQUARE WITH LOWER RIGHT DIAGONAL HALF BLACK | 
| 2067 |  |  |  |  |  |  | 0x0025EB =>       3,   #  ◫  gc=So   sc=Common     WHITE SQUARE WITH VERTICAL BISECTING LINE | 
| 2068 |  |  |  |  |  |  | 0x0025EF =>      22,   #  ◯  gc=So   sc=Common     LARGE CIRCLE | 
| 2069 |  |  |  |  |  |  | 0x002605 =>   16916,   #  ★  gc=So   sc=Common     BLACK STAR | 
| 2070 |  |  |  |  |  |  | 0x002606 =>   91231,   #  ☆  gc=So   sc=Common     WHITE STAR | 
| 2071 |  |  |  |  |  |  | 0x00260E =>       4,   #  ☎  gc=So   sc=Common     BLACK TELEPHONE | 
| 2072 |  |  |  |  |  |  | 0x002610 =>    1600,   #  ☐  gc=So   sc=Common     BALLOT BOX | 
| 2073 |  |  |  |  |  |  | 0x002612 =>      32,   #  ☒  gc=So   sc=Common     BALLOT BOX WITH X | 
| 2074 |  |  |  |  |  |  | 0x00263C =>       1,   #  ☼  gc=So   sc=Common     WHITE SUN WITH RAYS | 
| 2075 |  |  |  |  |  |  | 0x002640 =>    1600,   #  ♀  gc=So   sc=Common     FEMALE SIGN | 
| 2076 |  |  |  |  |  |  | 0x002642 =>    1153,   #  ♂  gc=So   sc=Common     MALE SIGN | 
| 2077 |  |  |  |  |  |  | 0x002660 =>      67,   #  ♠  gc=So   sc=Common     BLACK SPADE SUIT | 
| 2078 |  |  |  |  |  |  | 0x002662 =>    3417,   #  ♢  gc=So   sc=Common     WHITE DIAMOND SUIT | 
| 2079 |  |  |  |  |  |  | 0x002663 =>     372,   #  ♣  gc=So   sc=Common     BLACK CLUB SUIT | 
| 2080 |  |  |  |  |  |  | 0x002665 =>      34,   #  ♥  gc=So   sc=Common     BLACK HEART SUIT | 
| 2081 |  |  |  |  |  |  | 0x002666 =>    6943,   #  ♦  gc=So   sc=Common     BLACK DIAMOND SUIT | 
| 2082 |  |  |  |  |  |  | 0x002713 =>    9161,   #  ✓  gc=So   sc=Common     CHECK MARK | 
| 2083 |  |  |  |  |  |  | 0x00271A =>       5,   #  ✚  gc=So   sc=Common     HEAVY GREEK CROSS | 
| 2084 |  |  |  |  |  |  | 0x002720 =>      96,   #  ✠  gc=So   sc=Common     MALTESE CROSS | 
| 2085 |  |  |  |  |  |  | 0x002726 =>       2,   #  ✦  gc=So   sc=Common     BLACK FOUR POINTED STAR | 
| 2086 |  |  |  |  |  |  | 0x002727 =>       2,   #  ✧  gc=So   sc=Common     WHITE FOUR POINTED STAR | 
| 2087 |  |  |  |  |  |  | 0x002730 =>       1,   #  ✰  gc=So   sc=Common     SHADOWED WHITE STAR | 
| 2088 |  |  |  |  |  |  | 0x002732 =>       2,   #  ✲  gc=So   sc=Common     OPEN CENTRE ASTERISK | 
| 2089 |  |  |  |  |  |  | 0x002736 =>       7,   #  ✶  gc=So   sc=Common     SIX POINTED BLACK STAR | 
| 2090 |  |  |  |  |  |  | 0x002737 =>       6,   #  ✷  gc=So   sc=Common     EIGHT POINTED RECTILINEAR BLACK STAR | 
| 2091 |  |  |  |  |  |  | 0x002739 =>       3,   #  ✹  gc=So   sc=Common     TWELVE POINTED BLACK STAR | 
| 2092 |  |  |  |  |  |  | 0x00274F =>     103,   #  ❏  gc=So   sc=Common     LOWER RIGHT DROP-SHADOWED WHITE SQUARE | 
| 2093 |  |  |  |  |  |  | 0x002751 =>      79,   #  ❑  gc=So   sc=Common     LOWER RIGHT SHADOWED WHITE SQUARE | 
| 2094 |  |  |  |  |  |  | 0x002752 =>     347,   #  ❒  gc=So   sc=Common     UPPER RIGHT SHADOWED WHITE SQUARE | 
| 2095 |  |  |  |  |  |  | 0x002756 =>      16,   #  ❖  gc=So   sc=Common     BLACK DIAMOND MINUS WHITE X | 
| 2096 |  |  |  |  |  |  | 0x002758 =>       5,   #  ❘  gc=So   sc=Common     LIGHT VERTICAL BAR | 
| 2097 |  |  |  |  |  |  | 0x002798 =>       1,   #  ➘  gc=So   sc=Common     HEAVY SOUTH EAST ARROW | 
| 2098 |  |  |  |  |  |  | 0x00279A =>       2,   #  ➚  gc=So   sc=Common     HEAVY NORTH EAST ARROW | 
| 2099 |  |  |  |  |  |  | 0x0027A2 =>     475,   #  ➢  gc=So   sc=Common     THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD | 
| 2100 |  |  |  |  |  |  | 0x002937 =>       3,   #  ⤷  gc=Sm   sc=Common     ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS | 
| 2101 |  |  |  |  |  |  | 0x002942 =>       4,   #  ⥂  gc=Sm   sc=Common     RIGHTWARDS ARROW ABOVE SHORT LEFTWARDS ARROW | 
| 2102 |  |  |  |  |  |  | 0x002944 =>       9,   #  ⥄  gc=Sm   sc=Common     SHORT RIGHTWARDS ARROW ABOVE LEFTWARDS ARROW | 
| 2103 |  |  |  |  |  |  | 0x002947 =>      15,   #  ⥇  gc=Sm   sc=Common     RIGHTWARDS ARROW THROUGH X | 
| 2104 |  |  |  |  |  |  | 0x00296E =>       3,   #  ⥮  gc=Sm   sc=Common     UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT | 
| 2105 |  |  |  |  |  |  | 0x00296F =>       1,   #  ⥯  gc=Sm   sc=Common     DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT | 
| 2106 |  |  |  |  |  |  | 0x002980 =>      17,   #  ⦀  gc=Sm   sc=Common     TRIPLE VERTICAL BAR DELIMITER | 
| 2107 |  |  |  |  |  |  | 0x002999 =>       2,   #  ⦙  gc=Sm   sc=Common     DOTTED FENCE | 
| 2108 |  |  |  |  |  |  | 0x0029A0 =>       2,   #  ⦠  gc=Sm   sc=Common     SPHERICAL ANGLE OPENING LEFT | 
| 2109 |  |  |  |  |  |  | 0x0029A1 =>      12,   #  ⦡  gc=Sm   sc=Common     SPHERICAL ANGLE OPENING UP | 
| 2110 |  |  |  |  |  |  | 0x0029B5 =>       9,   #  ⦵  gc=Sm   sc=Common     CIRCLE WITH HORIZONTAL BAR | 
| 2111 |  |  |  |  |  |  | 0x0029B6 =>      13,   #  ⦶  gc=Sm   sc=Common     CIRCLED VERTICAL BAR | 
| 2112 |  |  |  |  |  |  | 0x0029EB =>      59,   #  ⧫  gc=Sm   sc=Common     BLACK LOZENGE | 
| 2113 |  |  |  |  |  |  | 0x0029F8 =>   27547,   #  ⧸  gc=Sm   sc=Common     BIG SOLIDUS | 
| 2114 |  |  |  |  |  |  | 0x0029F9 =>    1238,   #  ⧹  gc=Sm   sc=Common     BIG REVERSE SOLIDUS | 
| 2115 |  |  |  |  |  |  | 0x002A0D =>       4,   #  ⨍  gc=Sm   sc=Common     FINITE PART INTEGRAL | 
| 2116 |  |  |  |  |  |  | 0x002A10 =>       2,   #  ⨐  gc=Sm   sc=Common     CIRCULATION FUNCTION | 
| 2117 |  |  |  |  |  |  | 0x002A16 =>       1,   #  ⨖  gc=Sm   sc=Common     QUATERNION INTEGRAL OPERATOR | 
| 2118 |  |  |  |  |  |  | 0x002A2A =>      42,   #  ⨪  gc=Sm   sc=Common     MINUS SIGN WITH DOT BELOW | 
| 2119 |  |  |  |  |  |  | 0x002A2F =>     270,   #  ⨯  gc=Sm   sc=Common     VECTOR OR CROSS PRODUCT | 
| 2120 |  |  |  |  |  |  | 0x002A38 =>       3,   #  ⨸  gc=Sm   sc=Common     CIRCLED DIVISION SIGN | 
| 2121 |  |  |  |  |  |  | 0x002A3C =>       1,   #  ⨼  gc=Sm   sc=Common     INTERIOR PRODUCT | 
| 2122 |  |  |  |  |  |  | 0x002A3F =>       6,   #  ⨿  gc=Sm   sc=Common     AMALGAMATION OR COPRODUCT | 
| 2123 |  |  |  |  |  |  | 0x002A5E =>      24,   #  ⩞  gc=Sm   sc=Common     LOGICAL AND WITH DOUBLE OVERBAR | 
| 2124 |  |  |  |  |  |  | 0x002A7D =>    5303,   #  ⩽  gc=Sm   sc=Common     LESS-THAN OR SLANTED EQUAL TO | 
| 2125 |  |  |  |  |  |  | 0x002A7E =>    6823,   #  ⩾  gc=Sm   sc=Common     GREATER-THAN OR SLANTED EQUAL TO | 
| 2126 |  |  |  |  |  |  | 0x002A85 =>       1,   #  ⪅  gc=Sm   sc=Common     LESS-THAN OR APPROXIMATE | 
| 2127 |  |  |  |  |  |  | 0x002A86 =>       1,   #  ⪆  gc=Sm   sc=Common     GREATER-THAN OR APPROXIMATE | 
| 2128 |  |  |  |  |  |  | 0x002A95 =>       2,   #  ⪕  gc=Sm   sc=Common     SLANTED EQUAL TO OR LESS-THAN | 
| 2129 |  |  |  |  |  |  | 0x002A96 =>      13,   #  ⪖  gc=Sm   sc=Common     SLANTED EQUAL TO OR GREATER-THAN | 
| 2130 |  |  |  |  |  |  | 0x002AA1 =>     303,   #  ⪡  gc=Sm   sc=Common     DOUBLE NESTED LESS-THAN | 
| 2131 |  |  |  |  |  |  | 0x002AA2 =>     696,   #  ⪢  gc=Sm   sc=Common     DOUBLE NESTED GREATER-THAN | 
| 2132 |  |  |  |  |  |  | 0x002AAF =>       9,   #  ⪯  gc=Sm   sc=Common     PRECEDES ABOVE SINGLE-LINE EQUALS SIGN | 
| 2133 |  |  |  |  |  |  | 0x002AB0 =>      27,   #  ⪰  gc=Sm   sc=Common     SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN | 
| 2134 |  |  |  |  |  |  | 0x002ADF =>      12,   #  ⫟  gc=Sm   sc=Common     SHORT DOWN TACK | 
| 2135 |  |  |  |  |  |  | 0x002AE0 =>       1,   #  ⫠  gc=Sm   sc=Common     SHORT UP TACK | 
| 2136 |  |  |  |  |  |  | 0x002AE2 =>       1,   #  ⫢  gc=Sm   sc=Common     VERTICAL BAR TRIPLE RIGHT TURNSTILE | 
| 2137 |  |  |  |  |  |  | 0x002AEB =>      19,   #  ⫫  gc=Sm   sc=Common     DOUBLE UP TACK | 
| 2138 |  |  |  |  |  |  | 0x002AF6 =>      44,   #  ⫶  gc=Sm   sc=Common     TRIPLE COLON OPERATOR | 
| 2139 |  |  |  |  |  |  | 0x00266D =>       1,   #  ♭  gc=So   sc=Common     MUSIC FLAT SIGN | 
| 2140 |  |  |  |  |  |  | 0x00266F =>     409,   #  ♯  gc=Sm   sc=Common     MUSIC SHARP SIGN | 
| 2141 |  |  |  |  |  |  | 0x00FFFD =>      46,   #  �  gc=So   sc=Common     REPLACEMENT CHARACTER | 
| 2142 |  |  |  |  |  |  | 0x0002D0 =>       4,   #  ː  gc=Lm   sc=Common     MODIFIER LETTER TRIANGULAR COLON | 
| 2143 |  |  |  |  |  |  | 0x0000A4 =>     140,   #  ¤  gc=Sc   sc=Common     CURRENCY SIGN | 
| 2144 |  |  |  |  |  |  | 0x0000A2 =>     241,   #  ¢  gc=Sc   sc=Common     CENT SIGN | 
| 2145 |  |  |  |  |  |  | 0x0000A3 =>    5506,   #  £  gc=Sc   sc=Common     POUND SIGN | 
| 2146 |  |  |  |  |  |  | 0x0000A5 =>     594,   #  ¥  gc=Sc   sc=Common     YEN SIGN | 
| 2147 |  |  |  |  |  |  | 0x0020A4 =>       3,   #  ₤  gc=Sc   sc=Common     LIRA SIGN | 
| 2148 |  |  |  |  |  |  | 0x0020A7 =>      33,   #  ₧  gc=Sc   sc=Common     PESETA SIGN | 
| 2149 |  |  |  |  |  |  | 0x0020AC =>    2757,   #  €  gc=Sc   sc=Common     EURO SIGN | 
| 2150 |  |  |  |  |  |  | 0x002460 =>      41,   #  ①  gc=No   sc=Common     CIRCLED DIGIT ONE | 
| 2151 |  |  |  |  |  |  | 0x002776 =>       2,   #  ❶  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT ONE | 
| 2152 |  |  |  |  |  |  | 0x002780 =>       3,   #  ➀  gc=No   sc=Common     DINGBAT CIRCLED SANS-SERIF DIGIT ONE | 
| 2153 |  |  |  |  |  |  | 0x0000B9 =>       1,   #  ¹  gc=No   sc=Common     SUPERSCRIPT ONE | 
| 2154 |  |  |  |  |  |  | 0x0000BD =>    2556,   #  ½  gc=No   sc=Common     VULGAR FRACTION ONE HALF | 
| 2155 |  |  |  |  |  |  | 0x002153 =>      44,   #  ⅓  gc=No   sc=Common     VULGAR FRACTION ONE THIRD | 
| 2156 |  |  |  |  |  |  | 0x0000BC =>     190,   #  ¼  gc=No   sc=Common     VULGAR FRACTION ONE QUARTER | 
| 2157 |  |  |  |  |  |  | 0x002159 =>       1,   #  ⅙  gc=No   sc=Common     VULGAR FRACTION ONE SIXTH | 
| 2158 |  |  |  |  |  |  | 0x00215B =>       4,   #  ⅛  gc=No   sc=Common     VULGAR FRACTION ONE EIGHTH | 
| 2159 |  |  |  |  |  |  | 0x002469 =>       2,   #  ⑩  gc=No   sc=Common     CIRCLED NUMBER TEN | 
| 2160 |  |  |  |  |  |  | 0x002461 =>      41,   #  ②  gc=No   sc=Common     CIRCLED DIGIT TWO | 
| 2161 |  |  |  |  |  |  | 0x002777 =>       2,   #  ❷  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT TWO | 
| 2162 |  |  |  |  |  |  | 0x0000B2 =>      53,   #  ²  gc=No   sc=Common     SUPERSCRIPT TWO | 
| 2163 |  |  |  |  |  |  | 0x002154 =>      10,   #  ⅔  gc=No   sc=Common     VULGAR FRACTION TWO THIRDS | 
| 2164 |  |  |  |  |  |  | 0x002156 =>       1,   #  ⅖  gc=No   sc=Common     VULGAR FRACTION TWO FIFTHS | 
| 2165 |  |  |  |  |  |  | 0x002462 =>      34,   #  ③  gc=No   sc=Common     CIRCLED DIGIT THREE | 
| 2166 |  |  |  |  |  |  | 0x002778 =>       2,   #  ❸  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT THREE | 
| 2167 |  |  |  |  |  |  | 0x0000B3 =>       2,   #  ³  gc=No   sc=Common     SUPERSCRIPT THREE | 
| 2168 |  |  |  |  |  |  | 0x0000BE =>     307,   #  ¾  gc=No   sc=Common     VULGAR FRACTION THREE QUARTERS | 
| 2169 |  |  |  |  |  |  | 0x00215C =>       4,   #  ⅜  gc=No   sc=Common     VULGAR FRACTION THREE EIGHTHS | 
| 2170 |  |  |  |  |  |  | 0x002463 =>      24,   #  ④  gc=No   sc=Common     CIRCLED DIGIT FOUR | 
| 2171 |  |  |  |  |  |  | 0x002779 =>       1,   #  ❹  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT FOUR | 
| 2172 |  |  |  |  |  |  | 0x002464 =>      26,   #  ⑤  gc=No   sc=Common     CIRCLED DIGIT FIVE | 
| 2173 |  |  |  |  |  |  | 0x00277A =>       1,   #  ❺  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT FIVE | 
| 2174 |  |  |  |  |  |  | 0x00215A =>       1,   #  ⅚  gc=No   sc=Common     VULGAR FRACTION FIVE SIXTHS | 
| 2175 |  |  |  |  |  |  | 0x00215D =>       1,   #  ⅝  gc=No   sc=Common     VULGAR FRACTION FIVE EIGHTHS | 
| 2176 |  |  |  |  |  |  | 0x002465 =>      22,   #  ⑥  gc=No   sc=Common     CIRCLED DIGIT SIX | 
| 2177 |  |  |  |  |  |  | 0x002466 =>      13,   #  ⑦  gc=No   sc=Common     CIRCLED DIGIT SEVEN | 
| 2178 |  |  |  |  |  |  | 0x00215E =>       1,   #  ⅞  gc=No   sc=Common     VULGAR FRACTION SEVEN EIGHTHS | 
| 2179 |  |  |  |  |  |  | 0x002467 =>       9,   #  ⑧  gc=No   sc=Common     CIRCLED DIGIT EIGHT | 
| 2180 |  |  |  |  |  |  | 0x002468 =>       6,   #  ⑨  gc=No   sc=Common     CIRCLED DIGIT NINE | 
| 2181 |  |  |  |  |  |  | 0x0000AA =>      37,   #  ª  gc=Ll   sc=Latin      FEMININE ORDINAL INDICATOR | 
| 2182 |  |  |  |  |  |  | 0x0000E1 =>  122942,   #  á  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH ACUTE | 
| 2183 |  |  |  |  |  |  | 0x0000C1 =>    3242,   #  Á  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH ACUTE | 
| 2184 |  |  |  |  |  |  | 0x0000E0 =>   26679,   #  à  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH GRAVE | 
| 2185 |  |  |  |  |  |  | 0x0000C0 =>     301,   #  À  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH GRAVE | 
| 2186 |  |  |  |  |  |  | 0x000103 =>     246,   #  ă  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE | 
| 2187 |  |  |  |  |  |  | 0x000102 =>       7,   #  Ă  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH BREVE | 
| 2188 |  |  |  |  |  |  | 0x0000E2 =>    4353,   #  â  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX | 
| 2189 |  |  |  |  |  |  | 0x0000C2 =>     214,   #  Â  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH CIRCUMFLEX | 
| 2190 |  |  |  |  |  |  | 0x001EA5 =>       3,   #  ấ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE | 
| 2191 |  |  |  |  |  |  | 0x001EA7 =>       1,   #  ầ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE | 
| 2192 |  |  |  |  |  |  | 0x0001CE =>     206,   #  ǎ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CARON | 
| 2193 |  |  |  |  |  |  | 0x0000E5 =>   19635,   #  å  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH RING ABOVE | 
| 2194 |  |  |  |  |  |  | 0x0000C5 =>   31442,   #  Å  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH RING ABOVE | 
| 2195 |  |  |  |  |  |  | 0x00212B =>      10,   #  Å  gc=Lu   sc=Latin      ANGSTROM SIGN | 
| 2196 |  |  |  |  |  |  | 0x0001FB =>       2,   #  ǻ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE | 
| 2197 |  |  |  |  |  |  | 0x0001FA =>     102,   #  Ǻ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE | 
| 2198 |  |  |  |  |  |  | 0x0000E4 =>  164121,   #  ä  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DIAERESIS | 
| 2199 |  |  |  |  |  |  | 0x0000C4 =>    2253,   #  Ä  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH DIAERESIS | 
| 2200 |  |  |  |  |  |  | 0x0000E3 =>   39749,   #  ã  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH TILDE | 
| 2201 |  |  |  |  |  |  | 0x0000C3 =>     149,   #  Ã  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH TILDE | 
| 2202 |  |  |  |  |  |  | 0x000105 =>     221,   #  ą  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH OGONEK | 
| 2203 |  |  |  |  |  |  | 0x000104 =>       1,   #  Ą  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH OGONEK | 
| 2204 |  |  |  |  |  |  | 0x000101 =>     344,   #  ā  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH MACRON | 
| 2205 |  |  |  |  |  |  | 0x000100 =>      66,   #  Ā  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH MACRON | 
| 2206 |  |  |  |  |  |  | 0x001EA3 =>       4,   #  ả  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH HOOK ABOVE | 
| 2207 |  |  |  |  |  |  | 0x000201 =>       2,   #  ȁ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOUBLE GRAVE | 
| 2208 |  |  |  |  |  |  | 0x001EA1 =>       4,   #  ạ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOT BELOW | 
| 2209 |  |  |  |  |  |  | 0x001EB7 =>       1,   #  ặ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE AND DOT BELOW | 
| 2210 |  |  |  |  |  |  | 0x001EAD =>       5,   #  ậ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW | 
| 2211 |  |  |  |  |  |  | 0x0000E6 =>    4461,   #  æ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE | 
| 2212 |  |  |  |  |  |  | 0x0000C6 =>     190,   #  Æ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AE | 
| 2213 |  |  |  |  |  |  | 0x0001FD =>       5,   #  ǽ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE WITH ACUTE | 
| 2214 |  |  |  |  |  |  | 0x001D00 =>       2,   #  ᴀ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL A | 
| 2215 |  |  |  |  |  |  | 0x000250 =>       6,   #  ɐ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED A | 
| 2216 |  |  |  |  |  |  | 0x000251 =>       8,   #  ɑ  gc=Ll   sc=Latin      LATIN SMALL LETTER ALPHA | 
| 2217 |  |  |  |  |  |  | 0x00212C =>       1,   #  ℬ  gc=Lu   sc=Common     SCRIPT CAPITAL B | 
| 2218 |  |  |  |  |  |  | 0x00212D =>       1,   #  ℭ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL C | 
| 2219 |  |  |  |  |  |  | 0x000107 =>    8874,   #  ć  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH ACUTE | 
| 2220 |  |  |  |  |  |  | 0x000106 =>      84,   #  Ć  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH ACUTE | 
| 2221 |  |  |  |  |  |  | 0x000109 =>     117,   #  ĉ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CIRCUMFLEX | 
| 2222 |  |  |  |  |  |  | 0x000108 =>      48,   #  Ĉ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CIRCUMFLEX | 
| 2223 |  |  |  |  |  |  | 0x00010D =>    5401,   #  č  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CARON | 
| 2224 |  |  |  |  |  |  | 0x00010C =>     938,   #  Č  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CARON | 
| 2225 |  |  |  |  |  |  | 0x00010B =>      22,   #  ċ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH DOT ABOVE | 
| 2226 |  |  |  |  |  |  | 0x00010A =>      17,   #  Ċ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH DOT ABOVE | 
| 2227 |  |  |  |  |  |  | 0x0000E7 =>   39619,   #  ç  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CEDILLA | 
| 2228 |  |  |  |  |  |  | 0x0000C7 =>    3126,   #  Ç  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CEDILLA | 
| 2229 |  |  |  |  |  |  | 0x002105 =>       2,   #  ℅  gc=So   sc=Common     CARE OF | 
| 2230 |  |  |  |  |  |  | 0x001D04 =>       9,   #  ᴄ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL C | 
| 2231 |  |  |  |  |  |  | 0x00010F =>       3,   #  ď  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH CARON | 
| 2232 |  |  |  |  |  |  | 0x00010E =>      17,   #  Ď  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH CARON | 
| 2233 |  |  |  |  |  |  | 0x000111 =>      62,   #  đ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH STROKE | 
| 2234 |  |  |  |  |  |  | 0x000110 =>      50,   #  Đ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH STROKE | 
| 2235 |  |  |  |  |  |  | 0x0000F0 =>     116,   #  ð  gc=Ll   sc=Latin      LATIN SMALL LETTER ETH | 
| 2236 |  |  |  |  |  |  | 0x0000D0 =>      41,   #  Ð  gc=Lu   sc=Latin      LATIN CAPITAL LETTER ETH | 
| 2237 |  |  |  |  |  |  | 0x0000E9 =>  380198,   #  é  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH ACUTE | 
| 2238 |  |  |  |  |  |  | 0x0000C9 =>    3771,   #  É  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH ACUTE | 
| 2239 |  |  |  |  |  |  | 0x0000E8 =>   45632,   #  è  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH GRAVE | 
| 2240 |  |  |  |  |  |  | 0x0000C8 =>     199,   #  È  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH GRAVE | 
| 2241 |  |  |  |  |  |  | 0x000115 =>     122,   #  ĕ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH BREVE | 
| 2242 |  |  |  |  |  |  | 0x000114 =>       1,   #  Ĕ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH BREVE | 
| 2243 |  |  |  |  |  |  | 0x0000EA =>   12473,   #  ê  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX | 
| 2244 |  |  |  |  |  |  | 0x0000CA =>     181,   #  Ê  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CIRCUMFLEX | 
| 2245 |  |  |  |  |  |  | 0x001EBF =>       3,   #  ế  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE | 
| 2246 |  |  |  |  |  |  | 0x001EC5 =>      18,   #  ễ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE | 
| 2247 |  |  |  |  |  |  | 0x001EC3 =>       1,   #  ể  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE | 
| 2248 |  |  |  |  |  |  | 0x00011B =>     474,   #  ě  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CARON | 
| 2249 |  |  |  |  |  |  | 0x00011A =>       1,   #  Ě  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CARON | 
| 2250 |  |  |  |  |  |  | 0x0000EB =>    7505,   #  ë  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DIAERESIS | 
| 2251 |  |  |  |  |  |  | 0x0000CB =>      39,   #  Ë  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DIAERESIS | 
| 2252 |  |  |  |  |  |  | 0x001EBD =>       2,   #  ẽ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH TILDE | 
| 2253 |  |  |  |  |  |  | 0x000117 =>      22,   #  ė  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DOT ABOVE | 
| 2254 |  |  |  |  |  |  | 0x000116 =>      20,   #  Ė  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DOT ABOVE | 
| 2255 |  |  |  |  |  |  | 0x000229 =>       5,   #  ȩ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CEDILLA | 
| 2256 |  |  |  |  |  |  | 0x000119 =>     265,   #  ę  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH OGONEK | 
| 2257 |  |  |  |  |  |  | 0x000113 =>      63,   #  ē  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH MACRON | 
| 2258 |  |  |  |  |  |  | 0x000112 =>      88,   #  Ē  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH MACRON | 
| 2259 |  |  |  |  |  |  | 0x001EBB =>       2,   #  ẻ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH HOOK ABOVE | 
| 2260 |  |  |  |  |  |  | 0x001EB8 =>      31,   #  Ẹ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DOT BELOW | 
| 2261 |  |  |  |  |  |  | 0x001EC7 =>       5,   #  ệ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW | 
| 2262 |  |  |  |  |  |  | 0x001D07 =>       2,   #  ᴇ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL E | 
| 2263 |  |  |  |  |  |  | 0x000259 =>      55,   #  ə  gc=Ll   sc=Latin      LATIN SMALL LETTER SCHWA | 
| 2264 |  |  |  |  |  |  | 0x00025B =>     163,   #  ɛ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN E | 
| 2265 |  |  |  |  |  |  | 0x00025C =>       2,   #  ɜ  gc=Ll   sc=Latin      LATIN SMALL LETTER REVERSED OPEN E | 
| 2266 |  |  |  |  |  |  | 0x00025E =>       2,   #  ɞ  gc=Ll   sc=Latin      LATIN SMALL LETTER CLOSED REVERSED OPEN E | 
| 2267 |  |  |  |  |  |  | 0x00FB01 =>       4,   #  fi  gc=Ll   sc=Latin      LATIN SMALL LIGATURE FI | 
| 2268 |  |  |  |  |  |  | 0x0002A9 =>       1,   #  ʩ  gc=Ll   sc=Latin      LATIN SMALL LETTER FENG DIGRAPH | 
| 2269 |  |  |  |  |  |  | 0x000192 =>     309,   #  ƒ  gc=Ll   sc=Latin      LATIN SMALL LETTER F WITH HOOK | 
| 2270 |  |  |  |  |  |  | 0x002132 =>       5,   #  Ⅎ  gc=Lu   sc=Latin      TURNED CAPITAL F | 
| 2271 |  |  |  |  |  |  | 0x0001F5 =>       8,   #  ǵ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH ACUTE | 
| 2272 |  |  |  |  |  |  | 0x00011F =>    4021,   #  ğ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH BREVE | 
| 2273 |  |  |  |  |  |  | 0x00011E =>      57,   #  Ğ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH BREVE | 
| 2274 |  |  |  |  |  |  | 0x00011D =>      93,   #  ĝ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CIRCUMFLEX | 
| 2275 |  |  |  |  |  |  | 0x00011C =>      10,   #  Ĝ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH CIRCUMFLEX | 
| 2276 |  |  |  |  |  |  | 0x0001E7 =>      37,   #  ǧ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CARON | 
| 2277 |  |  |  |  |  |  | 0x000120 =>       4,   #  Ġ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH DOT ABOVE | 
| 2278 |  |  |  |  |  |  | 0x000123 =>       1,   #  ģ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CEDILLA | 
| 2279 |  |  |  |  |  |  | 0x000122 =>       1,   #  Ģ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH CEDILLA | 
| 2280 |  |  |  |  |  |  | 0x001E21 =>       7,   #  ḡ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH MACRON | 
| 2281 |  |  |  |  |  |  | 0x000261 =>      11,   #  ɡ  gc=Ll   sc=Latin      LATIN SMALL LETTER SCRIPT G | 
| 2282 |  |  |  |  |  |  | 0x000263 =>       4,   #  ɣ  gc=Ll   sc=Latin      LATIN SMALL LETTER GAMMA | 
| 2283 |  |  |  |  |  |  | 0x00210C =>       3,   #  ℌ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL H | 
| 2284 |  |  |  |  |  |  | 0x000125 =>       7,   #  ĥ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH CIRCUMFLEX | 
| 2285 |  |  |  |  |  |  | 0x000124 =>      30,   #  Ĥ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH CIRCUMFLEX | 
| 2286 |  |  |  |  |  |  | 0x001E29 =>       1,   #  ḩ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH CEDILLA | 
| 2287 |  |  |  |  |  |  | 0x000127 =>       2,   #  ħ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH STROKE | 
| 2288 |  |  |  |  |  |  | 0x00210F =>      33,   #  ℏ  gc=Ll   sc=Common     PLANCK CONSTANT OVER TWO PI | 
| 2289 |  |  |  |  |  |  | 0x00029C =>       1,   #  ʜ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL H | 
| 2290 |  |  |  |  |  |  | 0x0002BD =>       2,   #  ʽ  gc=Lm   sc=Common     MODIFIER LETTER REVERSED COMMA | 
| 2291 |  |  |  |  |  |  | 0x0000ED =>   36153,   #  í  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH ACUTE | 
| 2292 |  |  |  |  |  |  | 0x0000CD =>    2242,   #  Í  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH ACUTE | 
| 2293 |  |  |  |  |  |  | 0x0000EC =>     748,   #  ì  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH GRAVE | 
| 2294 |  |  |  |  |  |  | 0x0000CC =>      41,   #  Ì  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH GRAVE | 
| 2295 |  |  |  |  |  |  | 0x00012D =>      56,   #  ĭ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH BREVE | 
| 2296 |  |  |  |  |  |  | 0x00012C =>       4,   #  Ĭ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH BREVE | 
| 2297 |  |  |  |  |  |  | 0x0000EE =>     510,   #  î  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CIRCUMFLEX | 
| 2298 |  |  |  |  |  |  | 0x0000CE =>      76,   #  Î  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH CIRCUMFLEX | 
| 2299 |  |  |  |  |  |  | 0x0001D0 =>       3,   #  ǐ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CARON | 
| 2300 |  |  |  |  |  |  | 0x0001CF =>       2,   #  Ǐ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH CARON | 
| 2301 |  |  |  |  |  |  | 0x0000EF =>    7749,   #  ï  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DIAERESIS | 
| 2302 |  |  |  |  |  |  | 0x0000CF =>     149,   #  Ï  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH DIAERESIS | 
| 2303 |  |  |  |  |  |  | 0x000129 =>      10,   #  ĩ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH TILDE | 
| 2304 |  |  |  |  |  |  | 0x000128 =>       6,   #  Ĩ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH TILDE | 
| 2305 |  |  |  |  |  |  | 0x000130 =>     353,   #  İ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH DOT ABOVE | 
| 2306 |  |  |  |  |  |  | 0x00012F =>       2,   #  į  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH OGONEK | 
| 2307 |  |  |  |  |  |  | 0x00012B =>      12,   #  ī  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH MACRON | 
| 2308 |  |  |  |  |  |  | 0x00012A =>      25,   #  Ī  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH MACRON | 
| 2309 |  |  |  |  |  |  | 0x001ECB =>       3,   #  ị  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DOT BELOW | 
| 2310 |  |  |  |  |  |  | 0x000131 =>   86999,   #  ı  gc=Ll   sc=Latin      LATIN SMALL LETTER DOTLESS I | 
| 2311 |  |  |  |  |  |  | 0x00026A =>      22,   #  ɪ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL I | 
| 2312 |  |  |  |  |  |  | 0x000269 =>      85,   #  ɩ  gc=Ll   sc=Latin      LATIN SMALL LETTER IOTA | 
| 2313 |  |  |  |  |  |  | 0x0002B2 =>       1,   #  ʲ  gc=Lm   sc=Latin      MODIFIER LETTER SMALL J | 
| 2314 |  |  |  |  |  |  | 0x000135 =>       5,   #  ĵ  gc=Ll   sc=Latin      LATIN SMALL LETTER J WITH CIRCUMFLEX | 
| 2315 |  |  |  |  |  |  | 0x000134 =>      15,   #  Ĵ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER J WITH CIRCUMFLEX | 
| 2316 |  |  |  |  |  |  | 0x00212A =>      21,   #  K  gc=Lu   sc=Latin      KELVIN SIGN | 
| 2317 |  |  |  |  |  |  | 0x002113 =>     641,   #  ℓ  gc=Ll   sc=Common     SCRIPT SMALL L | 
| 2318 |  |  |  |  |  |  | 0x002112 =>       1,   #  ℒ  gc=Lu   sc=Common     SCRIPT CAPITAL L | 
| 2319 |  |  |  |  |  |  | 0x00013A =>      49,   #  ĺ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH ACUTE | 
| 2320 |  |  |  |  |  |  | 0x000139 =>      19,   #  Ĺ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH ACUTE | 
| 2321 |  |  |  |  |  |  | 0x00013E =>       3,   #  ľ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH CARON | 
| 2322 |  |  |  |  |  |  | 0x00013D =>       2,   #  Ľ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH CARON | 
| 2323 |  |  |  |  |  |  | 0x00013C =>       1,   #  ļ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH CEDILLA | 
| 2324 |  |  |  |  |  |  | 0x000142 =>    4282,   #  ł  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH STROKE | 
| 2325 |  |  |  |  |  |  | 0x000141 =>     713,   #  Ł  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH STROKE | 
| 2326 |  |  |  |  |  |  | 0x000140 =>      11,   #  ŀ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH MIDDLE DOT | 
| 2327 |  |  |  |  |  |  | 0x00013F =>       2,   #  Ŀ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH MIDDLE DOT | 
| 2328 |  |  |  |  |  |  | 0x00026D =>       2,   #  ɭ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH RETROFLEX HOOK | 
| 2329 |  |  |  |  |  |  | 0x00019B =>      55,   #  ƛ  gc=Ll   sc=Latin      LATIN SMALL LETTER LAMBDA WITH STROKE | 
| 2330 |  |  |  |  |  |  | 0x002133 =>       2,   #  ℳ  gc=Lu   sc=Common     SCRIPT CAPITAL M | 
| 2331 |  |  |  |  |  |  | 0x000271 =>       1,   #  ɱ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH HOOK | 
| 2332 |  |  |  |  |  |  | 0x000144 =>    5029,   #  ń  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH ACUTE | 
| 2333 |  |  |  |  |  |  | 0x000143 =>      14,   #  Ń  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH ACUTE | 
| 2334 |  |  |  |  |  |  | 0x000148 =>     665,   #  ň  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH CARON | 
| 2335 |  |  |  |  |  |  | 0x000147 =>       1,   #  Ň  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH CARON | 
| 2336 |  |  |  |  |  |  | 0x0000F1 =>   34405,   #  ñ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH TILDE | 
| 2337 |  |  |  |  |  |  | 0x0000D1 =>     156,   #  Ñ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH TILDE | 
| 2338 |  |  |  |  |  |  | 0x000146 =>       8,   #  ņ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH CEDILLA | 
| 2339 |  |  |  |  |  |  | 0x000272 =>       7,   #  ɲ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH LEFT HOOK | 
| 2340 |  |  |  |  |  |  | 0x00014B =>       6,   #  ŋ  gc=Ll   sc=Latin      LATIN SMALL LETTER ENG | 
| 2341 |  |  |  |  |  |  | 0x0000BA =>    1361,   #  º  gc=Ll   sc=Latin      MASCULINE ORDINAL INDICATOR | 
| 2342 |  |  |  |  |  |  | 0x002092 =>       2,   #  ₒ  gc=Lm   sc=Latin      LATIN SUBSCRIPT SMALL LETTER O | 
| 2343 |  |  |  |  |  |  | 0x0000F3 =>   99233,   #  ó  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH ACUTE | 
| 2344 |  |  |  |  |  |  | 0x0000D3 =>     607,   #  Ó  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH ACUTE | 
| 2345 |  |  |  |  |  |  | 0x0000F2 =>    5881,   #  ò  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH GRAVE | 
| 2346 |  |  |  |  |  |  | 0x0000D2 =>     103,   #  Ò  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH GRAVE | 
| 2347 |  |  |  |  |  |  | 0x00014F =>      42,   #  ŏ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH BREVE | 
| 2348 |  |  |  |  |  |  | 0x00014E =>       2,   #  Ŏ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH BREVE | 
| 2349 |  |  |  |  |  |  | 0x0000F4 =>   23832,   #  ô  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX | 
| 2350 |  |  |  |  |  |  | 0x0000D4 =>     149,   #  Ô  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH CIRCUMFLEX | 
| 2351 |  |  |  |  |  |  | 0x001ED1 =>       2,   #  ố  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE | 
| 2352 |  |  |  |  |  |  | 0x001ED3 =>       4,   #  ồ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE | 
| 2353 |  |  |  |  |  |  | 0x001ED7 =>       4,   #  ỗ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE | 
| 2354 |  |  |  |  |  |  | 0x001ED5 =>       1,   #  ổ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE | 
| 2355 |  |  |  |  |  |  | 0x0001D2 =>      31,   #  ǒ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CARON | 
| 2356 |  |  |  |  |  |  | 0x0000F6 =>  247208,   #  ö  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DIAERESIS | 
| 2357 |  |  |  |  |  |  | 0x0000D6 =>   15845,   #  Ö  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DIAERESIS | 
| 2358 |  |  |  |  |  |  | 0x000151 =>      28,   #  ő  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOUBLE ACUTE | 
| 2359 |  |  |  |  |  |  | 0x000150 =>       8,   #  Ő  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DOUBLE ACUTE | 
| 2360 |  |  |  |  |  |  | 0x0000F5 =>    2993,   #  õ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH TILDE | 
| 2361 |  |  |  |  |  |  | 0x0000D5 =>      55,   #  Õ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH TILDE | 
| 2362 |  |  |  |  |  |  | 0x0000F8 =>   32111,   #  ø  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE | 
| 2363 |  |  |  |  |  |  | 0x0000D8 =>    7555,   #  Ø  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH STROKE | 
| 2364 |  |  |  |  |  |  | 0x0001FF =>      27,   #  ǿ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE AND ACUTE | 
| 2365 |  |  |  |  |  |  | 0x0001FE =>       8,   #  Ǿ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH STROKE AND ACUTE | 
| 2366 |  |  |  |  |  |  | 0x00014D =>     118,   #  ō  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH MACRON | 
| 2367 |  |  |  |  |  |  | 0x00014C =>      75,   #  Ō  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH MACRON | 
| 2368 |  |  |  |  |  |  | 0x001ECF =>       1,   #  ỏ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HOOK ABOVE | 
| 2369 |  |  |  |  |  |  | 0x0001A1 =>       1,   #  ơ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN | 
| 2370 |  |  |  |  |  |  | 0x0001A0 =>       1,   #  Ơ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH HORN | 
| 2371 |  |  |  |  |  |  | 0x001EDB =>       2,   #  ớ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN AND ACUTE | 
| 2372 |  |  |  |  |  |  | 0x001EDD =>       1,   #  ờ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN AND GRAVE | 
| 2373 |  |  |  |  |  |  | 0x001ECD =>       7,   #  ọ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOT BELOW | 
| 2374 |  |  |  |  |  |  | 0x001ECC =>       8,   #  Ọ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DOT BELOW | 
| 2375 |  |  |  |  |  |  | 0x001ED9 =>       1,   #  ộ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW | 
| 2376 |  |  |  |  |  |  | 0x001ED8 =>       2,   #  Ộ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW | 
| 2377 |  |  |  |  |  |  | 0x000153 =>     722,   #  œ  gc=Ll   sc=Latin      LATIN SMALL LIGATURE OE | 
| 2378 |  |  |  |  |  |  | 0x000152 =>      20,   #  Œ  gc=Lu   sc=Latin      LATIN CAPITAL LIGATURE OE | 
| 2379 |  |  |  |  |  |  | 0x001D0F =>      58,   #  ᴏ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL O | 
| 2380 |  |  |  |  |  |  | 0x000254 =>      45,   #  ɔ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN O | 
| 2381 |  |  |  |  |  |  | 0x000275 =>       1,   #  ɵ  gc=Ll   sc=Latin      LATIN SMALL LETTER BARRED O | 
| 2382 |  |  |  |  |  |  | 0x001E55 =>       1,   #  ṕ  gc=Ll   sc=Latin      LATIN SMALL LETTER P WITH ACUTE | 
| 2383 |  |  |  |  |  |  | 0x001D18 =>       6,   #  ᴘ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL P | 
| 2384 |  |  |  |  |  |  | 0x0001A5 =>       1,   #  ƥ  gc=Ll   sc=Latin      LATIN SMALL LETTER P WITH HOOK | 
| 2385 |  |  |  |  |  |  | 0x000138 =>       2,   #  ĸ  gc=Ll   sc=Latin      LATIN SMALL LETTER KRA | 
| 2386 |  |  |  |  |  |  | 0x00211D =>       1,   #  ℝ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL R | 
| 2387 |  |  |  |  |  |  | 0x000155 =>      54,   #  ŕ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH ACUTE | 
| 2388 |  |  |  |  |  |  | 0x000154 =>       2,   #  Ŕ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH ACUTE | 
| 2389 |  |  |  |  |  |  | 0x000159 =>     882,   #  ř  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH CARON | 
| 2390 |  |  |  |  |  |  | 0x000158 =>      92,   #  Ř  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH CARON | 
| 2391 |  |  |  |  |  |  | 0x000157 =>       5,   #  ŗ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH CEDILLA | 
| 2392 |  |  |  |  |  |  | 0x000156 =>       2,   #  Ŗ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH CEDILLA | 
| 2393 |  |  |  |  |  |  | 0x000213 =>       1,   #  ȓ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH INVERTED BREVE | 
| 2394 |  |  |  |  |  |  | 0x000280 =>       1,   #  ʀ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL R | 
| 2395 |  |  |  |  |  |  | 0x00027C =>       1,   #  ɼ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH LONG LEG | 
| 2396 |  |  |  |  |  |  | 0x00027E =>       6,   #  ɾ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH FISHHOOK | 
| 2397 |  |  |  |  |  |  | 0x00015B =>     784,   #  ś  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH ACUTE | 
| 2398 |  |  |  |  |  |  | 0x00015A =>     317,   #  Ś  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH ACUTE | 
| 2399 |  |  |  |  |  |  | 0x00015D =>      59,   #  ŝ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CIRCUMFLEX | 
| 2400 |  |  |  |  |  |  | 0x00015C =>      63,   #  Ŝ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CIRCUMFLEX | 
| 2401 |  |  |  |  |  |  | 0x000161 =>    5934,   #  š  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CARON | 
| 2402 |  |  |  |  |  |  | 0x000160 =>    2967,   #  Š  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CARON | 
| 2403 |  |  |  |  |  |  | 0x00015F =>    6549,   #  ş  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CEDILLA | 
| 2404 |  |  |  |  |  |  | 0x00015E =>    1651,   #  Ş  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CEDILLA | 
| 2405 |  |  |  |  |  |  | 0x00017F =>       7,   #  ſ  gc=Ll   sc=Latin      LATIN SMALL LETTER LONG S | 
| 2406 |  |  |  |  |  |  | 0x0000DF =>   12061,   #  ß  gc=Ll   sc=Latin      LATIN SMALL LETTER SHARP S | 
| 2407 |  |  |  |  |  |  | 0x000282 =>       2,   #  ʂ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH HOOK | 
| 2408 |  |  |  |  |  |  | 0x000283 =>     498,   #  ʃ  gc=Ll   sc=Latin      LATIN SMALL LETTER ESH | 
| 2409 |  |  |  |  |  |  | 0x000165 =>      23,   #  ť  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH CARON | 
| 2410 |  |  |  |  |  |  | 0x000164 =>       3,   #  Ť  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH CARON | 
| 2411 |  |  |  |  |  |  | 0x001E6A =>       2,   #  Ṫ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH DOT ABOVE | 
| 2412 |  |  |  |  |  |  | 0x000163 =>      35,   #  ţ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH CEDILLA | 
| 2413 |  |  |  |  |  |  | 0x002121 =>       1,   #  ℡  gc=So   sc=Common     TELEPHONE SIGN | 
| 2414 |  |  |  |  |  |  | 0x002122 =>   40398,   #  ™  gc=So   sc=Common     TRADE MARK SIGN | 
| 2415 |  |  |  |  |  |  | 0x0000FA =>   11344,   #  ú  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH ACUTE | 
| 2416 |  |  |  |  |  |  | 0x0000DA =>     147,   #  Ú  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH ACUTE | 
| 2417 |  |  |  |  |  |  | 0x0000F9 =>    1065,   #  ù  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH GRAVE | 
| 2418 |  |  |  |  |  |  | 0x0000D9 =>      34,   #  Ù  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH GRAVE | 
| 2419 |  |  |  |  |  |  | 0x00016D =>      19,   #  ŭ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH BREVE | 
| 2420 |  |  |  |  |  |  | 0x0000FB =>     781,   #  û  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CIRCUMFLEX | 
| 2421 |  |  |  |  |  |  | 0x0000DB =>      43,   #  Û  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH CIRCUMFLEX | 
| 2422 |  |  |  |  |  |  | 0x0001D4 =>       9,   #  ǔ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CARON | 
| 2423 |  |  |  |  |  |  | 0x00016F =>     148,   #  ů  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH RING ABOVE | 
| 2424 |  |  |  |  |  |  | 0x0000FC =>  200690,   #  ü  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS | 
| 2425 |  |  |  |  |  |  | 0x0000DC =>    5390,   #  Ü  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DIAERESIS | 
| 2426 |  |  |  |  |  |  | 0x0001DC =>       2,   #  ǜ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE | 
| 2427 |  |  |  |  |  |  | 0x0001D9 =>       1,   #  Ǚ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON | 
| 2428 |  |  |  |  |  |  | 0x0001D6 =>       2,   #  ǖ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND MACRON | 
| 2429 |  |  |  |  |  |  | 0x0001D5 =>       1,   #  Ǖ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON | 
| 2430 |  |  |  |  |  |  | 0x000171 =>      19,   #  ű  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DOUBLE ACUTE | 
| 2431 |  |  |  |  |  |  | 0x000170 =>       3,   #  Ű  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DOUBLE ACUTE | 
| 2432 |  |  |  |  |  |  | 0x000169 =>      51,   #  ũ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH TILDE | 
| 2433 |  |  |  |  |  |  | 0x000168 =>       1,   #  Ũ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH TILDE | 
| 2434 |  |  |  |  |  |  | 0x000173 =>       9,   #  ų  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH OGONEK | 
| 2435 |  |  |  |  |  |  | 0x00016B =>      70,   #  ū  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH MACRON | 
| 2436 |  |  |  |  |  |  | 0x00016A =>       9,   #  Ū  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH MACRON | 
| 2437 |  |  |  |  |  |  | 0x001EE7 =>       2,   #  ủ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HOOK ABOVE | 
| 2438 |  |  |  |  |  |  | 0x0001B0 =>       1,   #  ư  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN | 
| 2439 |  |  |  |  |  |  | 0x001EE9 =>       1,   #  ứ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN AND ACUTE | 
| 2440 |  |  |  |  |  |  | 0x001EEB =>       1,   #  ừ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN AND GRAVE | 
| 2441 |  |  |  |  |  |  | 0x001EEF =>       1,   #  ữ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN AND TILDE | 
| 2442 |  |  |  |  |  |  | 0x001EF1 =>       3,   #  ự  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN AND DOT BELOW | 
| 2443 |  |  |  |  |  |  | 0x001EE5 =>       3,   #  ụ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DOT BELOW | 
| 2444 |  |  |  |  |  |  | 0x000265 =>       2,   #  ɥ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED H | 
| 2445 |  |  |  |  |  |  | 0x00026F =>      11,   #  ɯ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED M | 
| 2446 |  |  |  |  |  |  | 0x00028A =>      14,   #  ʊ  gc=Ll   sc=Latin      LATIN SMALL LETTER UPSILON | 
| 2447 |  |  |  |  |  |  | 0x001D20 =>       4,   #  ᴠ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL V | 
| 2448 |  |  |  |  |  |  | 0x00028B =>       6,   #  ʋ  gc=Ll   sc=Latin      LATIN SMALL LETTER V WITH HOOK | 
| 2449 |  |  |  |  |  |  | 0x00028C =>      10,   #  ʌ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED V | 
| 2450 |  |  |  |  |  |  | 0x000175 =>       8,   #  ŵ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH CIRCUMFLEX | 
| 2451 |  |  |  |  |  |  | 0x0000FD =>    1882,   #  ý  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH ACUTE | 
| 2452 |  |  |  |  |  |  | 0x0000DD =>     227,   #  Ý  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH ACUTE | 
| 2453 |  |  |  |  |  |  | 0x001EF3 =>       7,   #  ỳ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH GRAVE | 
| 2454 |  |  |  |  |  |  | 0x000177 =>      65,   #  ŷ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH CIRCUMFLEX | 
| 2455 |  |  |  |  |  |  | 0x000176 =>      33,   #  Ŷ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH CIRCUMFLEX | 
| 2456 |  |  |  |  |  |  | 0x0000FF =>     153,   #  ÿ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH DIAERESIS | 
| 2457 |  |  |  |  |  |  | 0x000178 =>      23,   #  Ÿ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH DIAERESIS | 
| 2458 |  |  |  |  |  |  | 0x001EF9 =>       4,   #  ỹ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH TILDE | 
| 2459 |  |  |  |  |  |  | 0x00017A =>     767,   #  ź  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH ACUTE | 
| 2460 |  |  |  |  |  |  | 0x000179 =>      27,   #  Ź  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH ACUTE | 
| 2461 |  |  |  |  |  |  | 0x00017E =>    1338,   #  ž  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH CARON | 
| 2462 |  |  |  |  |  |  | 0x00017D =>     606,   #  Ž  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH CARON | 
| 2463 |  |  |  |  |  |  | 0x00017C =>     751,   #  ż  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH DOT ABOVE | 
| 2464 |  |  |  |  |  |  | 0x00017B =>     281,   #  Ż  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH DOT ABOVE | 
| 2465 |  |  |  |  |  |  | 0x000292 =>      10,   #  ʒ  gc=Ll   sc=Latin      LATIN SMALL LETTER EZH | 
| 2466 |  |  |  |  |  |  | 0x0000FE =>      41,   #  þ  gc=Ll   sc=Latin      LATIN SMALL LETTER THORN | 
| 2467 |  |  |  |  |  |  | 0x0000DE =>      23,   #  Þ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER THORN | 
| 2468 |  |  |  |  |  |  | 0x000294 =>       2,   #  ʔ  gc=Lo   sc=Latin      LATIN LETTER GLOTTAL STOP | 
| 2469 |  |  |  |  |  |  | 0x0002BC =>      50,   #  ʼ  gc=Lm   sc=Common     MODIFIER LETTER APOSTROPHE | 
| 2470 |  |  |  |  |  |  | 0x000295 =>       3,   #  ʕ  gc=Ll   sc=Latin      LATIN LETTER PHARYNGEAL VOICED FRICATIVE | 
| 2471 |  |  |  |  |  |  | 0x0001C0 =>     391,   #  ǀ  gc=Lo   sc=Latin      LATIN LETTER DENTAL CLICK | 
| 2472 |  |  |  |  |  |  | 0x0001C1 =>       2,   #  ǁ  gc=Lo   sc=Latin      LATIN LETTER LATERAL CLICK | 
| 2473 |  |  |  |  |  |  | 0x0001C2 =>      24,   #  ǂ  gc=Lo   sc=Latin      LATIN LETTER ALVEOLAR CLICK | 
| 2474 |  |  |  |  |  |  | 0x0001C3 =>      10,   #  ǃ  gc=Lo   sc=Latin      LATIN LETTER RETROFLEX CLICK | 
| 2475 |  |  |  |  |  |  | 0x000297 =>       6,   #  ʗ  gc=Ll   sc=Latin      LATIN LETTER STRETCHED C | 
| 2476 |  |  |  |  |  |  | 0x000298 =>       1,   #  ʘ  gc=Ll   sc=Latin      LATIN LETTER BILABIAL CLICK | 
| 2477 |  |  |  |  |  |  | 0x0003B1 => 1112960,   #  α  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA | 
| 2478 |  |  |  |  |  |  | 0x000391 =>     229,   #  Α  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA | 
| 2479 |  |  |  |  |  |  | 0x0003AC =>      81,   #  ά  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH TONOS | 
| 2480 |  |  |  |  |  |  | 0x0003B2 => 1097639,   #  β  gc=Ll   sc=Greek      GREEK SMALL LETTER BETA | 
| 2481 |  |  |  |  |  |  | 0x0003D0 =>      56,   #  ϐ  gc=Ll   sc=Greek      GREEK BETA SYMBOL | 
| 2482 |  |  |  |  |  |  | 0x000392 =>     149,   #  Β  gc=Lu   sc=Greek      GREEK CAPITAL LETTER BETA | 
| 2483 |  |  |  |  |  |  | 0x0003B3 =>  321472,   #  γ  gc=Ll   sc=Greek      GREEK SMALL LETTER GAMMA | 
| 2484 |  |  |  |  |  |  | 0x000393 =>    1435,   #  Γ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER GAMMA | 
| 2485 |  |  |  |  |  |  | 0x0003B4 =>   91875,   #  δ  gc=Ll   sc=Greek      GREEK SMALL LETTER DELTA | 
| 2486 |  |  |  |  |  |  | 0x000394 =>  169339,   #  Δ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER DELTA | 
| 2487 |  |  |  |  |  |  | 0x0003B5 =>   35812,   #  ε  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON | 
| 2488 |  |  |  |  |  |  | 0x0003F5 =>   22254,   #  ϵ  gc=Ll   sc=Greek      GREEK LUNATE EPSILON SYMBOL | 
| 2489 |  |  |  |  |  |  | 0x000395 =>      25,   #  Ε  gc=Lu   sc=Greek      GREEK CAPITAL LETTER EPSILON | 
| 2490 |  |  |  |  |  |  | 0x0003AD =>       9,   #  έ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH TONOS | 
| 2491 |  |  |  |  |  |  | 0x0003B6 =>   11812,   #  ζ  gc=Ll   sc=Greek      GREEK SMALL LETTER ZETA | 
| 2492 |  |  |  |  |  |  | 0x000396 =>      12,   #  Ζ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ZETA | 
| 2493 |  |  |  |  |  |  | 0x0003B7 =>    7730,   #  η  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA | 
| 2494 |  |  |  |  |  |  | 0x000397 =>      40,   #  Η  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ETA | 
| 2495 |  |  |  |  |  |  | 0x0003AE =>      19,   #  ή  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH TONOS | 
| 2496 |  |  |  |  |  |  | 0x0003B8 =>   20347,   #  θ  gc=Ll   sc=Greek      GREEK SMALL LETTER THETA | 
| 2497 |  |  |  |  |  |  | 0x0003D1 =>     491,   #  ϑ  gc=Ll   sc=Greek      GREEK THETA SYMBOL | 
| 2498 |  |  |  |  |  |  | 0x000398 =>    1150,   #  Θ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER THETA | 
| 2499 |  |  |  |  |  |  | 0x0003F4 =>      79,   #  ϴ  gc=Lu   sc=Greek      GREEK CAPITAL THETA SYMBOL | 
| 2500 |  |  |  |  |  |  | 0x0003B9 =>     917,   #  ι  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA | 
| 2501 |  |  |  |  |  |  | 0x000399 =>     157,   #  Ι  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA | 
| 2502 |  |  |  |  |  |  | 0x0003AF =>      38,   #  ί  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH TONOS | 
| 2503 |  |  |  |  |  |  | 0x0003CA =>       4,   #  ϊ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA | 
| 2504 |  |  |  |  |  |  | 0x000390 =>       1,   #  ΐ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS | 
| 2505 |  |  |  |  |  |  | 0x0003BA =>  160319,   #  κ  gc=Ll   sc=Greek      GREEK SMALL LETTER KAPPA | 
| 2506 |  |  |  |  |  |  | 0x0003F0 =>     141,   #  ϰ  gc=Ll   sc=Greek      GREEK KAPPA SYMBOL | 
| 2507 |  |  |  |  |  |  | 0x00039A =>      29,   #  Κ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER KAPPA | 
| 2508 |  |  |  |  |  |  | 0x0003BB =>   36760,   #  λ  gc=Ll   sc=Greek      GREEK SMALL LETTER LAMDA | 
| 2509 |  |  |  |  |  |  | 0x00039B =>     771,   #  Λ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER LAMDA | 
| 2510 |  |  |  |  |  |  | 0x0003BC => 1693959,   #  μ  gc=Ll   sc=Greek      GREEK SMALL LETTER MU | 
| 2511 |  |  |  |  |  |  | 0x0000B5 =>     564,   #  µ  gc=Ll   sc=Common     MICRO SIGN | 
| 2512 |  |  |  |  |  |  | 0x00039C =>     208,   #  Μ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER MU | 
| 2513 |  |  |  |  |  |  | 0x0003BD =>    5284,   #  ν  gc=Ll   sc=Greek      GREEK SMALL LETTER NU | 
| 2514 |  |  |  |  |  |  | 0x00039D =>      46,   #  Ν  gc=Lu   sc=Greek      GREEK CAPITAL LETTER NU | 
| 2515 |  |  |  |  |  |  | 0x0003BE =>    2219,   #  ξ  gc=Ll   sc=Greek      GREEK SMALL LETTER XI | 
| 2516 |  |  |  |  |  |  | 0x00039E =>     100,   #  Ξ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER XI | 
| 2517 |  |  |  |  |  |  | 0x0003BF =>     231,   #  ο  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON | 
| 2518 |  |  |  |  |  |  | 0x00039F =>      45,   #  Ο  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON | 
| 2519 |  |  |  |  |  |  | 0x0003CC =>       2,   #  ό  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH TONOS | 
| 2520 |  |  |  |  |  |  | 0x0003C0 =>   12863,   #  π  gc=Ll   sc=Greek      GREEK SMALL LETTER PI | 
| 2521 |  |  |  |  |  |  | 0x0003D6 =>     128,   #  ϖ  gc=Ll   sc=Greek      GREEK PI SYMBOL | 
| 2522 |  |  |  |  |  |  | 0x0003A0 =>     563,   #  Π  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PI | 
| 2523 |  |  |  |  |  |  | 0x0003C1 =>   16067,   #  ρ  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO | 
| 2524 |  |  |  |  |  |  | 0x0003F1 =>     201,   #  ϱ  gc=Ll   sc=Greek      GREEK RHO SYMBOL | 
| 2525 |  |  |  |  |  |  | 0x0003A1 =>       9,   #  Ρ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO | 
| 2526 |  |  |  |  |  |  | 0x0003C3 =>   32772,   #  σ  gc=Ll   sc=Greek      GREEK SMALL LETTER SIGMA | 
| 2527 |  |  |  |  |  |  | 0x0003A3 =>    5534,   #  Σ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER SIGMA | 
| 2528 |  |  |  |  |  |  | 0x0003C2 =>     107,   #  ς  gc=Ll   sc=Greek      GREEK SMALL LETTER FINAL SIGMA | 
| 2529 |  |  |  |  |  |  | 0x0003C4 =>   25989,   #  τ  gc=Ll   sc=Greek      GREEK SMALL LETTER TAU | 
| 2530 |  |  |  |  |  |  | 0x0003A4 =>      63,   #  Τ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER TAU | 
| 2531 |  |  |  |  |  |  | 0x0003C5 =>     760,   #  υ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON | 
| 2532 |  |  |  |  |  |  | 0x0003D2 =>      93,   #  ϒ  gc=Lu   sc=Greek      GREEK UPSILON WITH HOOK SYMBOL | 
| 2533 |  |  |  |  |  |  | 0x0003CD =>      10,   #  ύ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH TONOS | 
| 2534 |  |  |  |  |  |  | 0x0003CB =>      21,   #  ϋ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA | 
| 2535 |  |  |  |  |  |  | 0x0003C6 =>   12562,   #  φ  gc=Ll   sc=Greek      GREEK SMALL LETTER PHI | 
| 2536 |  |  |  |  |  |  | 0x0003D5 =>    3367,   #  ϕ  gc=Ll   sc=Greek      GREEK PHI SYMBOL | 
| 2537 |  |  |  |  |  |  | 0x0003A6 =>    5057,   #  Φ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PHI | 
| 2538 |  |  |  |  |  |  | 0x0003C7 =>   71183,   #  χ  gc=Ll   sc=Greek      GREEK SMALL LETTER CHI | 
| 2539 |  |  |  |  |  |  | 0x0003A7 =>      94,   #  Χ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER CHI | 
| 2540 |  |  |  |  |  |  | 0x0003C8 =>    7035,   #  ψ  gc=Ll   sc=Greek      GREEK SMALL LETTER PSI | 
| 2541 |  |  |  |  |  |  | 0x0003A8 =>    7359,   #  Ψ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PSI | 
| 2542 |  |  |  |  |  |  | 0x0003C9 =>   29414,   #  ω  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA | 
| 2543 |  |  |  |  |  |  | 0x0003A9 =>   16027,   #  Ω  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA | 
| 2544 |  |  |  |  |  |  | 0x002126 =>      30,   #  Ω  gc=Lu   sc=Greek      OHM SIGN | 
| 2545 |  |  |  |  |  |  | 0x0003CE =>       7,   #  ώ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH TONOS | 
| 2546 |  |  |  |  |  |  | 0x00038F =>       5,   #  Ώ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA WITH TONOS | 
| 2547 |  |  |  |  |  |  | 0x0003E1 =>       1,   #  ϡ  gc=Ll   sc=Greek      GREEK SMALL LETTER SAMPI | 
| 2548 |  |  |  |  |  |  | 0x000430 =>      12,   #  а  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER A | 
| 2549 |  |  |  |  |  |  | 0x000410 =>       2,   #  А  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER A | 
| 2550 |  |  |  |  |  |  | 0x000431 =>       3,   #  б  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER BE | 
| 2551 |  |  |  |  |  |  | 0x000411 =>       1,   #  Б  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER BE | 
| 2552 |  |  |  |  |  |  | 0x000432 =>       4,   #  в  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER VE | 
| 2553 |  |  |  |  |  |  | 0x000433 =>       4,   #  г  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER GHE | 
| 2554 |  |  |  |  |  |  | 0x000413 =>      26,   #  Г  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER GHE | 
| 2555 |  |  |  |  |  |  | 0x000434 =>      10,   #  д  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER DE | 
| 2556 |  |  |  |  |  |  | 0x000414 =>       3,   #  Д  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER DE | 
| 2557 |  |  |  |  |  |  | 0x000454 =>      16,   #  є  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER UKRAINIAN IE | 
| 2558 |  |  |  |  |  |  | 0x000404 =>      21,   #  Є  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER UKRAINIAN IE | 
| 2559 |  |  |  |  |  |  | 0x000436 =>       5,   #  ж  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ZHE | 
| 2560 |  |  |  |  |  |  | 0x000416 =>       4,   #  Ж  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ZHE | 
| 2561 |  |  |  |  |  |  | 0x000437 =>       3,   #  з  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ZE | 
| 2562 |  |  |  |  |  |  | 0x000417 =>       1,   #  З  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ZE | 
| 2563 |  |  |  |  |  |  | 0x000438 =>       3,   #  и  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER I | 
| 2564 |  |  |  |  |  |  | 0x000418 =>      11,   #  И  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER I | 
| 2565 |  |  |  |  |  |  | 0x000406 =>       1,   #  І  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I | 
| 2566 |  |  |  |  |  |  | 0x000439 =>       9,   #  й  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SHORT I | 
| 2567 |  |  |  |  |  |  | 0x00043A =>     147,   #  к  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER KA | 
| 2568 |  |  |  |  |  |  | 0x00041A =>       2,   #  К  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER KA | 
| 2569 |  |  |  |  |  |  | 0x00043B =>       2,   #  л  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EL | 
| 2570 |  |  |  |  |  |  | 0x00041B =>       5,   #  Л  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EL | 
| 2571 |  |  |  |  |  |  | 0x00043C =>       2,   #  м  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EM | 
| 2572 |  |  |  |  |  |  | 0x00043D =>      10,   #  н  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EN | 
| 2573 |  |  |  |  |  |  | 0x00043E =>       4,   #  о  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER O | 
| 2574 |  |  |  |  |  |  | 0x0004E9 =>       1,   #  ө  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER BARRED O | 
| 2575 |  |  |  |  |  |  | 0x00043F =>      12,   #  п  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER PE | 
| 2576 |  |  |  |  |  |  | 0x00041F =>      14,   #  П  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER PE | 
| 2577 |  |  |  |  |  |  | 0x000442 =>       7,   #  т  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER TE | 
| 2578 |  |  |  |  |  |  | 0x000423 =>       1,   #  У  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER U | 
| 2579 |  |  |  |  |  |  | 0x000444 =>      29,   #  ф  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EF | 
| 2580 |  |  |  |  |  |  | 0x000424 =>      33,   #  Ф  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EF | 
| 2581 |  |  |  |  |  |  | 0x000425 =>       1,   #  Х  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER HA | 
| 2582 |  |  |  |  |  |  | 0x000446 =>       6,   #  ц  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER TSE | 
| 2583 |  |  |  |  |  |  | 0x000426 =>       1,   #  Ц  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER TSE | 
| 2584 |  |  |  |  |  |  | 0x000447 =>       2,   #  ч  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER CHE | 
| 2585 |  |  |  |  |  |  | 0x000427 =>       2,   #  Ч  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER CHE | 
| 2586 |  |  |  |  |  |  | 0x000448 =>       2,   #  ш  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SHA | 
| 2587 |  |  |  |  |  |  | 0x000428 =>       1,   #  Ш  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER SHA | 
| 2588 |  |  |  |  |  |  | 0x00044B =>       3,   #  ы  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YERU | 
| 2589 |  |  |  |  |  |  | 0x00042B =>       3,   #  Ы  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER YERU | 
| 2590 |  |  |  |  |  |  | 0x00044C =>       1,   #  ь  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SOFT SIGN | 
| 2591 |  |  |  |  |  |  | 0x00042C =>       1,   #  Ь  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER SOFT SIGN | 
| 2592 |  |  |  |  |  |  | 0x00044E =>      19,   #  ю  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YU | 
| 2593 |  |  |  |  |  |  | 0x00044F =>       1,   #  я  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YA | 
| 2594 |  |  |  |  |  |  | 0x00042F =>       1,   #  Я  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER YA | 
| 2595 |  |  |  |  |  |  | 0x002135 =>       8,   #  ℵ  gc=Lo   sc=Common     ALEF SYMBOL | 
| 2596 |  |  |  |  |  |  | 0x002137 =>       4,   #  ℷ  gc=Lo   sc=Common     GIMEL SYMBOL | 
| 2597 |  |  |  |  |  |  | 0x0005E1 =>       9,   #  ס  gc=Lo   sc=Hebrew     HEBREW LETTER SAMEKH | 
| 2598 |  |  |  |  |  |  | 0x000915 =>       3,   #  क  gc=Lo   sc=Devanagari DEVANAGARI LETTER KA | 
| 2599 |  |  |  |  |  |  | 0x000916 =>       1,   #  ख  gc=Lo   sc=Devanagari DEVANAGARI LETTER KHA | 
| 2600 |  |  |  |  |  |  | 0x000937 =>       1,   #  ष  gc=Lo   sc=Devanagari DEVANAGARI LETTER SSA | 
| 2601 |  |  |  |  |  |  | 0x000B0C =>       1,   #  ଌ  gc=Lo   sc=Oriya      ORIYA LETTER VOCALIC L | 
| 2602 |  |  |  |  |  |  | 0x00170E =>       1,   #  ᜎ  gc=Lo   sc=Tagalog    TAGALOG LETTER LA | 
| 2603 |  |  |  |  |  |  | 0x0015DB =>       3,   #  ᗛ  gc=Lo   sc=Canadian_aboriginal CANADIAN SYLLABICS CARRIER HWA | 
| 2604 |  |  |  |  |  |  | 0x004E1C =>       4,   #  东 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-4E1C | 
| 2605 |  |  |  |  |  |  | 0x0051AC =>       4,   #  冬 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-51AC | 
| 2606 |  |  |  |  |  |  | 0x005230 =>       6,   #  到 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-5230 | 
| 2607 |  |  |  |  |  |  | 0x005357 =>       6,   #  南 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-5357 | 
| 2608 |  |  |  |  |  |  | 0x005B50 =>       6,   #  子 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-5B50 | 
| 2609 |  |  |  |  |  |  | 0x005E03 =>       2,   #  布 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-5E03 | 
| 2610 |  |  |  |  |  |  | 0x0065B9 =>       6,   #  方 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-65B9 | 
| 2611 |  |  |  |  |  |  | 0x0071D5 =>       6,   #  燕 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-71D5 | 
| 2612 |  |  |  |  |  |  | 0x008201 =>       8,   #  舁 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8201 | 
| 2613 |  |  |  |  |  |  | 0x008805 =>       1,   #  蠅 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8805 | 
| 2614 |  |  |  |  |  |  | 0x008FC7 =>       6,   #  过 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8FC7 | 
| 2615 |  |  |  |  |  |  | 0x009638 =>       2,   #  阸 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-9638 | 
| 2616 |  |  |  |  |  |  | 0x0098DE =>       6,   #  飞 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-98DE | 
| 2617 |  |  |  |  |  |  | 0x000E00 =>       1,   # <-> gc=Cn   sc=unassigned_script | 
| 2618 |  |  |  |  |  |  | 0x000EF7 =>       6,   # <-> gc=Cn   sc=unassigned_script | 
| 2619 |  |  |  |  |  |  | 0x002065 =>       1,   # <-> gc=Cn   sc=unassigned_script | 
| 2620 |  |  |  |  |  |  | 0x00E2D4 =>       6,   # <-> gc=Co   sc=Unknown | 
| 2621 |  |  |  |  |  |  | 0x00E301 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2622 |  |  |  |  |  |  | 0x00E302 =>       2,   # <-> gc=Co   sc=Unknown | 
| 2623 |  |  |  |  |  |  | 0x00E34C =>       2,   # <-> gc=Co   sc=Unknown | 
| 2624 |  |  |  |  |  |  | 0x00E444 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2625 |  |  |  |  |  |  | 0x00E5B4 =>       7,   # <-> gc=Co   sc=Unknown | 
| 2626 |  |  |  |  |  |  | 0x00E5B6 =>      18,   # <-> gc=Co   sc=Unknown | 
| 2627 |  |  |  |  |  |  | 0x00E5B7 =>       2,   # <-> gc=Co   sc=Unknown | 
| 2628 |  |  |  |  |  |  | 0x00E5B9 =>       3,   # <-> gc=Co   sc=Unknown | 
| 2629 |  |  |  |  |  |  | 0x00E5BA =>       2,   # <-> gc=Co   sc=Unknown | 
| 2630 |  |  |  |  |  |  | 0x00E5F2 =>       3,   # <-> gc=Co   sc=Unknown | 
| 2631 |  |  |  |  |  |  | 0x00E5F4 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2632 |  |  |  |  |  |  | 0x00E5F8 =>    7497,   # <-> gc=Co   sc=Unknown | 
| 2633 |  |  |  |  |  |  | 0x00E5F9 =>      10,   # <-> gc=Co   sc=Unknown | 
| 2634 |  |  |  |  |  |  | 0x00E5FB =>     657,   # <-> gc=Co   sc=Unknown | 
| 2635 |  |  |  |  |  |  | 0x00E5FC =>      43,   # <-> gc=Co   sc=Unknown | 
| 2636 |  |  |  |  |  |  | 0x00E5FD =>       3,   # <-> gc=Co   sc=Unknown | 
| 2637 |  |  |  |  |  |  | 0x00E605 =>    3025,   # <-> gc=Co   sc=Unknown | 
| 2638 |  |  |  |  |  |  | 0x00E606 =>     233,   # <-> gc=Co   sc=Unknown | 
| 2639 |  |  |  |  |  |  | 0x00E607 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2640 |  |  |  |  |  |  | 0x00E60A =>       6,   # <-> gc=Co   sc=Unknown | 
| 2641 |  |  |  |  |  |  | 0x00E60B =>      24,   # <-> gc=Co   sc=Unknown | 
| 2642 |  |  |  |  |  |  | 0x00E626 =>      11,   # <-> gc=Co   sc=Unknown | 
| 2643 |  |  |  |  |  |  | 0x00E62D =>       5,   # <-> gc=Co   sc=Unknown | 
| 2644 |  |  |  |  |  |  | 0x00E630 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2645 |  |  |  |  |  |  | 0x00E634 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2646 |  |  |  |  |  |  | 0x00E643 =>      57,   # <-> gc=Co   sc=Unknown | 
| 2647 |  |  |  |  |  |  | 0x00E659 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2648 |  |  |  |  |  |  | 0x00E6D4 =>       6,   # <-> gc=Co   sc=Unknown | 
| 2649 |  |  |  |  |  |  | 0x00EC02 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2650 |  |  |  |  |  |  | 0x00EF22 =>     157,   # <-> gc=Co   sc=Unknown | 
| 2651 |  |  |  |  |  |  | 0x00F020 =>       6,   # <-> gc=Co   sc=Unknown | 
| 2652 |  |  |  |  |  |  | 0x00F02C =>       3,   # <-> gc=Co   sc=Unknown | 
| 2653 |  |  |  |  |  |  | 0x00F02D =>       2,   # <-> gc=Co   sc=Unknown | 
| 2654 |  |  |  |  |  |  | 0x00F061 =>       4,   # <-> gc=Co   sc=Unknown | 
| 2655 |  |  |  |  |  |  | 0x00F062 =>       2,   # <-> gc=Co   sc=Unknown | 
| 2656 |  |  |  |  |  |  | 0x00F0A2 =>       1,   # <-> gc=Co   sc=Unknown | 
| 2657 |  |  |  |  |  |  | ); | 
| 2658 |  |  |  |  |  |  |  | 
| 2659 |  |  |  |  |  |  | my %pmcoa_training = ( | 
| 2660 |  |  |  |  |  |  |  | 
| 2661 |  |  |  |  |  |  | 0x000640 =>       4,  #  ـ  gc=Lm   sc=Common     ARABIC TATWEEL | 
| 2662 |  |  |  |  |  |  | 0x0000B4 =>    1203,  #  ´  gc=Sk   sc=Common     ACUTE ACCENT | 
| 2663 |  |  |  |  |  |  | 0x000384 =>       3,  #  ΄  gc=Sk   sc=Greek      GREEK TONOS | 
| 2664 |  |  |  |  |  |  | 0x0002DC =>    5725,  #  ˜  gc=Sk   sc=Common     SMALL TILDE | 
| 2665 |  |  |  |  |  |  | 0x0000AF =>    9247,  #  ¯  gc=Sk   sc=Common     MACRON | 
| 2666 |  |  |  |  |  |  | 0x00203E =>       1,  #  ‾  gc=Po   sc=Common     OVERLINE | 
| 2667 |  |  |  |  |  |  | 0x0002D8 =>       2,  #  ˘  gc=Sk   sc=Common     BREVE | 
| 2668 |  |  |  |  |  |  | 0x0002D9 =>    1774,  #  ˙  gc=Sk   sc=Common     DOT ABOVE | 
| 2669 |  |  |  |  |  |  | 0x0000A8 =>     159,  #  ¨  gc=Sk   sc=Common     DIAERESIS | 
| 2670 |  |  |  |  |  |  | 0x0002DA =>     356,  #  ˚  gc=Sk   sc=Common     RING ABOVE | 
| 2671 |  |  |  |  |  |  | 0x0002DD =>       1,  #  ˝  gc=Sk   sc=Common     DOUBLE ACUTE ACCENT | 
| 2672 |  |  |  |  |  |  | 0x001FBF =>       3,  #  ᾿  gc=Sk   sc=Greek      GREEK PSILI | 
| 2673 |  |  |  |  |  |  | 0x0000B8 =>      36,  #  ¸  gc=Sk   sc=Common     CEDILLA | 
| 2674 |  |  |  |  |  |  | 0x002017 =>       2,  #  ‗  gc=Po   sc=Common     DOUBLE LOW LINE | 
| 2675 |  |  |  |  |  |  | 0x00FF0D =>      11,  #  - gc=Pd   sc=Common     FULLWIDTH HYPHEN-MINUS | 
| 2676 |  |  |  |  |  |  | 0x002010 =>    1179,  #  ‐  gc=Pd   sc=Common     HYPHEN | 
| 2677 |  |  |  |  |  |  | 0x002011 =>     278,  #  ‑  gc=Pd   sc=Common     NON-BREAKING HYPHEN | 
| 2678 |  |  |  |  |  |  | 0x002012 =>      37,  #  ‒  gc=Pd   sc=Common     FIGURE DASH | 
| 2679 |  |  |  |  |  |  | 0x002013 => 2663710,  #  –  gc=Pd   sc=Common     EN DASH | 
| 2680 |  |  |  |  |  |  | 0x002014 =>  165345,  #  —  gc=Pd   sc=Common     EM DASH | 
| 2681 |  |  |  |  |  |  | 0x002015 =>     393,  #  ―  gc=Pd   sc=Common     HORIZONTAL BAR | 
| 2682 |  |  |  |  |  |  | 0x0030FB =>       7,  #  ・ gc=Po   sc=Common     KATAKANA MIDDLE DOT | 
| 2683 |  |  |  |  |  |  | 0x00FF0C =>       8,  #  , gc=Po   sc=Common     FULLWIDTH COMMA | 
| 2684 |  |  |  |  |  |  | 0x00066C =>       4,  #  ٬  gc=Po   sc=Arabic     ARABIC THOUSANDS SEPARATOR | 
| 2685 |  |  |  |  |  |  | 0x00FF1B =>       1,  #  ; gc=Po   sc=Common     FULLWIDTH SEMICOLON | 
| 2686 |  |  |  |  |  |  | 0x00FF1A =>       1,  #  : gc=Po   sc=Common     FULLWIDTH COLON | 
| 2687 |  |  |  |  |  |  | 0x0000A1 =>     191,  #  ¡  gc=Po   sc=Common     INVERTED EXCLAMATION MARK | 
| 2688 |  |  |  |  |  |  | 0x0000BF =>     188,  #  ¿  gc=Po   sc=Common     INVERTED QUESTION MARK | 
| 2689 |  |  |  |  |  |  | 0x00203D =>       1,  #  ‽  gc=Po   sc=Common     INTERROBANG | 
| 2690 |  |  |  |  |  |  | 0x00FF0E =>       1,  #  . gc=Po   sc=Common     FULLWIDTH FULL STOP | 
| 2691 |  |  |  |  |  |  | 0x002025 =>     127,  #  ‥  gc=Po   sc=Common     TWO DOT LEADER | 
| 2692 |  |  |  |  |  |  | 0x002026 =>   25433,  #  …  gc=Po   sc=Common     HORIZONTAL ELLIPSIS | 
| 2693 |  |  |  |  |  |  | 0x003002 =>       1,  #  。 gc=Po   sc=Common     IDEOGRAPHIC FULL STOP | 
| 2694 |  |  |  |  |  |  | 0x000387 =>      20,  #  ·  gc=Po   sc=Common     GREEK ANO TELEIA | 
| 2695 |  |  |  |  |  |  | 0x0000B7 =>   86009,  #  ·  gc=Po   sc=Common     MIDDLE DOT | 
| 2696 |  |  |  |  |  |  | 0x00205C =>       1,  #  ⁜  gc=Po   sc=Common     DOTTED CROSS | 
| 2697 |  |  |  |  |  |  | 0x002018 =>  163000,  #  ‘  gc=Pi   sc=Common     LEFT SINGLE QUOTATION MARK | 
| 2698 |  |  |  |  |  |  | 0x002019 =>  376122,  #  ’  gc=Pf   sc=Common     RIGHT SINGLE QUOTATION MARK | 
| 2699 |  |  |  |  |  |  | 0x00201A =>      77,  #  ‚  gc=Ps   sc=Common     SINGLE LOW-9 QUOTATION MARK | 
| 2700 |  |  |  |  |  |  | 0x002039 =>      16,  #  ‹  gc=Pi   sc=Common     SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 
| 2701 |  |  |  |  |  |  | 0x00203A =>       3,  #  ›  gc=Pf   sc=Common     SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 
| 2702 |  |  |  |  |  |  | 0x00201C =>  292797,  #  “  gc=Pi   sc=Common     LEFT DOUBLE QUOTATION MARK | 
| 2703 |  |  |  |  |  |  | 0x00201D =>  293225,  #  ”  gc=Pf   sc=Common     RIGHT DOUBLE QUOTATION MARK | 
| 2704 |  |  |  |  |  |  | 0x00201E =>    1003,  #  „  gc=Ps   sc=Common     DOUBLE LOW-9 QUOTATION MARK | 
| 2705 |  |  |  |  |  |  | 0x00201F =>       3,  #  ‟  gc=Pi   sc=Common     DOUBLE HIGH-REVERSED-9 QUOTATION MARK | 
| 2706 |  |  |  |  |  |  | 0x0000AB =>    1069,  #  «  gc=Pi   sc=Common     LEFT-POINTING DOUBLE ANGLE QUOTATION MARK | 
| 2707 |  |  |  |  |  |  | 0x0000BB =>    1166,  #  »  gc=Pf   sc=Common     RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK | 
| 2708 |  |  |  |  |  |  | 0x00207D =>       1,  #  ⁽  gc=Ps   sc=Common     SUPERSCRIPT LEFT PARENTHESIS | 
| 2709 |  |  |  |  |  |  | 0x00207E =>       1,  #  ⁾  gc=Pe   sc=Common     SUPERSCRIPT RIGHT PARENTHESIS | 
| 2710 |  |  |  |  |  |  | 0x00FE37 =>      14,  #  ︷ gc=Ps   sc=Common     PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET | 
| 2711 |  |  |  |  |  |  | 0x00FE38 =>     324,  #  ︸ gc=Pe   sc=Common     PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET | 
| 2712 |  |  |  |  |  |  | 0x00298F =>       2,  #  ⦏  gc=Ps   sc=Common     LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER | 
| 2713 |  |  |  |  |  |  | 0x003008 =>     574,  #  〈 gc=Ps   sc=Common     LEFT ANGLE BRACKET | 
| 2714 |  |  |  |  |  |  | 0x002329 =>    3761,  #  〈 gc=Ps   sc=Common     LEFT-POINTING ANGLE BRACKET | 
| 2715 |  |  |  |  |  |  | 0x00FE3F =>       1,  #  ︿ gc=Ps   sc=Common     PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET | 
| 2716 |  |  |  |  |  |  | 0x003009 =>     597,  #  〉 gc=Pe   sc=Common     RIGHT ANGLE BRACKET | 
| 2717 |  |  |  |  |  |  | 0x00232A =>    3920,  #  〉 gc=Pe   sc=Common     RIGHT-POINTING ANGLE BRACKET | 
| 2718 |  |  |  |  |  |  | 0x003014 =>       3,  #  〔 gc=Ps   sc=Common     LEFT TORTOISE SHELL BRACKET | 
| 2719 |  |  |  |  |  |  | 0x003015 =>       3,  #  〕 gc=Pe   sc=Common     RIGHT TORTOISE SHELL BRACKET | 
| 2720 |  |  |  |  |  |  | 0x00301A =>       7,  #  〚 gc=Ps   sc=Common     LEFT WHITE SQUARE BRACKET | 
| 2721 |  |  |  |  |  |  | 0x00301B =>       7,  #  〛 gc=Pe   sc=Common     RIGHT WHITE SQUARE BRACKET | 
| 2722 |  |  |  |  |  |  | 0x0000A7 =>   21055,  #  §  gc=So   sc=Common     SECTION SIGN | 
| 2723 |  |  |  |  |  |  | 0x0000B6 =>    7433,  #  ¶  gc=So   sc=Common     PILCROW SIGN | 
| 2724 |  |  |  |  |  |  | 0x0000A9 =>  143225,  #  ©  gc=So   sc=Common     COPYRIGHT SIGN | 
| 2725 |  |  |  |  |  |  | 0x0000AE =>   63149,  #  ®  gc=So   sc=Common     REGISTERED SIGN | 
| 2726 |  |  |  |  |  |  | 0x00204E =>    3294,  #  ⁎  gc=Po   sc=Common     LOW ASTERISK | 
| 2727 |  |  |  |  |  |  | 0x002044 =>      13,  #  ⁄  gc=Sm   sc=Common     FRACTION SLASH | 
| 2728 |  |  |  |  |  |  | 0x00FF06 =>       4,  #  & gc=Po   sc=Common     FULLWIDTH AMPERSAND | 
| 2729 |  |  |  |  |  |  | 0x002030 =>    1535,  #  ‰  gc=Po   sc=Common     PER MILLE SIGN | 
| 2730 |  |  |  |  |  |  | 0x002031 =>      62,  #  ‱  gc=Po   sc=Common     PER TEN THOUSAND SIGN | 
| 2731 |  |  |  |  |  |  | 0x002020 =>   60549,  #  †  gc=Po   sc=Common     DAGGER | 
| 2732 |  |  |  |  |  |  | 0x002021 =>   30763,  #  ‡  gc=Po   sc=Common     DOUBLE DAGGER | 
| 2733 |  |  |  |  |  |  | 0x002022 =>   85746,  #  •  gc=Po   sc=Common     BULLET | 
| 2734 |  |  |  |  |  |  | 0x002023 =>      21,  #  ‣  gc=Po   sc=Common     TRIANGULAR BULLET | 
| 2735 |  |  |  |  |  |  | 0x002027 =>      16,  #  ‧  gc=Po   sc=Common     HYPHENATION POINT | 
| 2736 |  |  |  |  |  |  | 0x002043 =>       1,  #  ⁃  gc=Po   sc=Common     HYPHEN BULLET | 
| 2737 |  |  |  |  |  |  | 0x002032 =>  359852,  #  ′  gc=Po   sc=Common     PRIME | 
| 2738 |  |  |  |  |  |  | 0x002033 =>    5714,  #  ″  gc=Po   sc=Common     DOUBLE PRIME | 
| 2739 |  |  |  |  |  |  | 0x002034 =>     176,  #  ‴  gc=Po   sc=Common     TRIPLE PRIME | 
| 2740 |  |  |  |  |  |  | 0x002035 =>      29,  #  ‵  gc=Po   sc=Common     REVERSED PRIME | 
| 2741 |  |  |  |  |  |  | 0x002037 =>       1,  #  ‷  gc=Po   sc=Common     REVERSED TRIPLE PRIME | 
| 2742 |  |  |  |  |  |  | 0x002038 =>      10,  #  ‸  gc=Po   sc=Common     CARET | 
| 2743 |  |  |  |  |  |  | 0x00203B =>      38,  #  ※  gc=Po   sc=Common     REFERENCE MARK | 
| 2744 |  |  |  |  |  |  | 0x00203F =>       1,  #  ‿  gc=Pc   sc=Common     UNDERTIE | 
| 2745 |  |  |  |  |  |  | 0x002041 =>      48,  #  ⁁  gc=Po   sc=Common     CARET INSERTION POINT | 
| 2746 |  |  |  |  |  |  | 0x0005BE =>       2,  #  ־  gc=Pd   sc=Hebrew     HEBREW PUNCTUATION MAQAF | 
| 2747 |  |  |  |  |  |  | 0x000F09 =>       1,  #  ༉  gc=Po   sc=Tibetan    TIBETAN MARK BSKUR YIG MGO | 
| 2748 |  |  |  |  |  |  | 0x0002B9 =>       3,  #  ʹ  gc=Lm   sc=Common     MODIFIER LETTER PRIME | 
| 2749 |  |  |  |  |  |  | 0x0002BA =>       3,  #  ʺ  gc=Lm   sc=Common     MODIFIER LETTER DOUBLE PRIME | 
| 2750 |  |  |  |  |  |  | 0x0002C3 =>       2,  #  ˃  gc=Sk   sc=Common     MODIFIER LETTER RIGHT ARROWHEAD | 
| 2751 |  |  |  |  |  |  | 0x0002C4 =>      18,  #  ˄  gc=Sk   sc=Common     MODIFIER LETTER UP ARROWHEAD | 
| 2752 |  |  |  |  |  |  | 0x0002C6 =>    2236,  #  ˆ  gc=Lm   sc=Common     MODIFIER LETTER CIRCUMFLEX ACCENT | 
| 2753 |  |  |  |  |  |  | 0x0002C7 =>      30,  #  ˇ  gc=Lm   sc=Common     CARON | 
| 2754 |  |  |  |  |  |  | 0x0002C9 =>      17,  #  ˉ  gc=Lm   sc=Common     MODIFIER LETTER MACRON | 
| 2755 |  |  |  |  |  |  | 0x0002CB =>       1,  #  ˋ  gc=Lm   sc=Common     MODIFIER LETTER GRAVE ACCENT | 
| 2756 |  |  |  |  |  |  | 0x00A719 =>      25,  #  ꜙ  gc=Lm   sc=Common     MODIFIER LETTER DOT HORIZONTAL BAR | 
| 2757 |  |  |  |  |  |  | 0x0000B0 =>  462505,  #  °  gc=So   sc=Common     DEGREE SIGN | 
| 2758 |  |  |  |  |  |  | 0x002103 =>    4992,  #  ℃  gc=So   sc=Common     DEGREE CELSIUS | 
| 2759 |  |  |  |  |  |  | 0x002109 =>       7,  #  ℉  gc=So   sc=Common     DEGREE FAHRENHEIT | 
| 2760 |  |  |  |  |  |  | 0x002117 =>       4,  #  ℗  gc=So   sc=Common     SOUND RECORDING COPYRIGHT | 
| 2761 |  |  |  |  |  |  | 0x002118 =>     459,  #  ℘  gc=Sm   sc=Common     SCRIPT CAPITAL P | 
| 2762 |  |  |  |  |  |  | 0x00211E =>       2,  #  ℞  gc=So   sc=Common     PRESCRIPTION TAKE | 
| 2763 |  |  |  |  |  |  | 0x00212E =>       1,  #  ℮  gc=So   sc=Common     ESTIMATED SYMBOL | 
| 2764 |  |  |  |  |  |  | 0x002190 =>    2765,  #  ←  gc=Sm   sc=Common     LEFTWARDS ARROW | 
| 2765 |  |  |  |  |  |  | 0x002192 =>   48480,  #  →  gc=Sm   sc=Common     RIGHTWARDS ARROW | 
| 2766 |  |  |  |  |  |  | 0x00219B =>      11,  #  ↛  gc=Sm   sc=Common     RIGHTWARDS ARROW WITH STROKE | 
| 2767 |  |  |  |  |  |  | 0x002191 =>   11349,  #  ↑  gc=Sm   sc=Common     UPWARDS ARROW | 
| 2768 |  |  |  |  |  |  | 0x002193 =>   12195,  #  ↓  gc=Sm   sc=Common     DOWNWARDS ARROW | 
| 2769 |  |  |  |  |  |  | 0x002194 =>    2957,  #  ↔  gc=Sm   sc=Common     LEFT RIGHT ARROW | 
| 2770 |  |  |  |  |  |  | 0x0021AE =>       1,  #  ↮  gc=Sm   sc=Common     LEFT RIGHT ARROW WITH STROKE | 
| 2771 |  |  |  |  |  |  | 0x002195 =>      49,  #  ↕  gc=So   sc=Common     UP DOWN ARROW | 
| 2772 |  |  |  |  |  |  | 0x002196 =>       8,  #  ↖  gc=So   sc=Common     NORTH WEST ARROW | 
| 2773 |  |  |  |  |  |  | 0x002197 =>      58,  #  ↗  gc=So   sc=Common     NORTH EAST ARROW | 
| 2774 |  |  |  |  |  |  | 0x002198 =>     105,  #  ↘  gc=So   sc=Common     SOUTH EAST ARROW | 
| 2775 |  |  |  |  |  |  | 0x002199 =>      11,  #  ↙  gc=So   sc=Common     SOUTH WEST ARROW | 
| 2776 |  |  |  |  |  |  | 0x00219D =>       2,  #  ↝  gc=So   sc=Common     RIGHTWARDS WAVE ARROW | 
| 2777 |  |  |  |  |  |  | 0x00219E =>       1,  #  ↞  gc=So   sc=Common     LEFTWARDS TWO HEADED ARROW | 
| 2778 |  |  |  |  |  |  | 0x0021A0 =>       5,  #  ↠  gc=Sm   sc=Common     RIGHTWARDS TWO HEADED ARROW | 
| 2779 |  |  |  |  |  |  | 0x0021A1 =>       5,  #  ↡  gc=So   sc=Common     DOWNWARDS TWO HEADED ARROW | 
| 2780 |  |  |  |  |  |  | 0x0021A3 =>       1,  #  ↣  gc=Sm   sc=Common     RIGHTWARDS ARROW WITH TAIL | 
| 2781 |  |  |  |  |  |  | 0x0021A6 =>     160,  #  ↦  gc=Sm   sc=Common     RIGHTWARDS ARROW FROM BAR | 
| 2782 |  |  |  |  |  |  | 0x0021AA =>      30,  #  ↪  gc=So   sc=Common     RIGHTWARDS ARROW WITH HOOK | 
| 2783 |  |  |  |  |  |  | 0x0021AD =>       2,  #  ↭  gc=So   sc=Common     LEFT RIGHT WAVE ARROW | 
| 2784 |  |  |  |  |  |  | 0x0021B1 =>       3,  #  ↱  gc=So   sc=Common     UPWARDS ARROW WITH TIP RIGHTWARDS | 
| 2785 |  |  |  |  |  |  | 0x0021C0 =>     133,  #  ⇀  gc=So   sc=Common     RIGHTWARDS HARPOON WITH BARB UPWARDS | 
| 2786 |  |  |  |  |  |  | 0x0021C4 =>      87,  #  ⇄  gc=So   sc=Common     RIGHTWARDS ARROW OVER LEFTWARDS ARROW | 
| 2787 |  |  |  |  |  |  | 0x0021C6 =>      59,  #  ⇆  gc=So   sc=Common     LEFTWARDS ARROW OVER RIGHTWARDS ARROW | 
| 2788 |  |  |  |  |  |  | 0x0021C7 =>       1,  #  ⇇  gc=So   sc=Common     LEFTWARDS PAIRED ARROWS | 
| 2789 |  |  |  |  |  |  | 0x0021C9 =>       2,  #  ⇉  gc=So   sc=Common     RIGHTWARDS PAIRED ARROWS | 
| 2790 |  |  |  |  |  |  | 0x0021CB =>     156,  #  ⇋  gc=So   sc=Common     LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON | 
| 2791 |  |  |  |  |  |  | 0x0021CC =>     276,  #  ⇌  gc=So   sc=Common     RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON | 
| 2792 |  |  |  |  |  |  | 0x0021D0 =>     117,  #  ⇐  gc=So   sc=Common     LEFTWARDS DOUBLE ARROW | 
| 2793 |  |  |  |  |  |  | 0x0021D1 =>     154,  #  ⇑  gc=So   sc=Common     UPWARDS DOUBLE ARROW | 
| 2794 |  |  |  |  |  |  | 0x0021D2 =>    1032,  #  ⇒  gc=Sm   sc=Common     RIGHTWARDS DOUBLE ARROW | 
| 2795 |  |  |  |  |  |  | 0x0021D3 =>     228,  #  ⇓  gc=So   sc=Common     DOWNWARDS DOUBLE ARROW | 
| 2796 |  |  |  |  |  |  | 0x0021D4 =>     619,  #  ⇔  gc=Sm   sc=Common     LEFT RIGHT DOUBLE ARROW | 
| 2797 |  |  |  |  |  |  | 0x0021CE =>       1,  #  ⇎  gc=Sm   sc=Common     LEFT RIGHT DOUBLE ARROW WITH STROKE | 
| 2798 |  |  |  |  |  |  | 0x0021D7 =>       1,  #  ⇗  gc=So   sc=Common     NORTH EAST DOUBLE ARROW | 
| 2799 |  |  |  |  |  |  | 0x0021D8 =>       1,  #  ⇘  gc=So   sc=Common     SOUTH EAST DOUBLE ARROW | 
| 2800 |  |  |  |  |  |  | 0x0021DD =>      13,  #  ⇝  gc=So   sc=Common     RIGHTWARDS SQUIGGLE ARROW | 
| 2801 |  |  |  |  |  |  | 0x0021DE =>       1,  #  ⇞  gc=So   sc=Common     UPWARDS ARROW WITH DOUBLE STROKE | 
| 2802 |  |  |  |  |  |  | 0x0021DF =>       1,  #  ⇟  gc=So   sc=Common     DOWNWARDS ARROW WITH DOUBLE STROKE | 
| 2803 |  |  |  |  |  |  | 0x0021E7 =>      14,  #  ⇧  gc=So   sc=Common     UPWARDS WHITE ARROW | 
| 2804 |  |  |  |  |  |  | 0x0021E8 =>      47,  #  ⇨  gc=So   sc=Common     RIGHTWARDS WHITE ARROW | 
| 2805 |  |  |  |  |  |  | 0x0021E9 =>      12,  #  ⇩  gc=So   sc=Common     DOWNWARDS WHITE ARROW | 
| 2806 |  |  |  |  |  |  | 0x002200 =>    1204,  #  ∀  gc=Sm   sc=Common     FOR ALL | 
| 2807 |  |  |  |  |  |  | 0x002201 =>       1,  #  ∁  gc=Sm   sc=Common     COMPLEMENT | 
| 2808 |  |  |  |  |  |  | 0x002202 =>    6096,  #  ∂  gc=Sm   sc=Common     PARTIAL DIFFERENTIAL | 
| 2809 |  |  |  |  |  |  | 0x002204 =>      16,  #  ∄  gc=Sm   sc=Common     THERE DOES NOT EXIST | 
| 2810 |  |  |  |  |  |  | 0x002203 =>     216,  #  ∃  gc=Sm   sc=Common     THERE EXISTS | 
| 2811 |  |  |  |  |  |  | 0x002205 =>    1040,  #  ∅  gc=Sm   sc=Common     EMPTY SET | 
| 2812 |  |  |  |  |  |  | 0x002206 =>    1576,  #  ∆  gc=Sm   sc=Common     INCREMENT | 
| 2813 |  |  |  |  |  |  | 0x002207 =>    1564,  #  ∇  gc=Sm   sc=Common     NABLA | 
| 2814 |  |  |  |  |  |  | 0x002208 =>   15033,  #  ∈  gc=Sm   sc=Common     ELEMENT OF | 
| 2815 |  |  |  |  |  |  | 0x002209 =>     417,  #  ∉  gc=Sm   sc=Common     NOT AN ELEMENT OF | 
| 2816 |  |  |  |  |  |  | 0x00220A =>     455,  #  ∊  gc=Sm   sc=Common     SMALL ELEMENT OF | 
| 2817 |  |  |  |  |  |  | 0x00220B =>      27,  #  ∋  gc=Sm   sc=Common     CONTAINS AS MEMBER | 
| 2818 |  |  |  |  |  |  | 0x00220C =>       1,  #  ∌  gc=Sm   sc=Common     DOES NOT CONTAIN AS MEMBER | 
| 2819 |  |  |  |  |  |  | 0x00220D =>       3,  #  ∍  gc=Sm   sc=Common     SMALL CONTAINS AS MEMBER | 
| 2820 |  |  |  |  |  |  | 0x00220F =>    2159,  #  ∏  gc=Sm   sc=Common     N-ARY PRODUCT | 
| 2821 |  |  |  |  |  |  | 0x002211 =>   23572,  #  ∑  gc=Sm   sc=Common     N-ARY SUMMATION | 
| 2822 |  |  |  |  |  |  | 0x00207A =>      24,  #  ⁺  gc=Sm   sc=Common     SUPERSCRIPT PLUS SIGN | 
| 2823 |  |  |  |  |  |  | 0x0000B1 => 1009762,  #  ±  gc=Sm   sc=Common     PLUS-MINUS SIGN | 
| 2824 |  |  |  |  |  |  | 0x0000F7 =>     756,  #  ÷  gc=Sm   sc=Common     DIVISION SIGN | 
| 2825 |  |  |  |  |  |  | 0x0000D7 =>  350506,  #  ×  gc=Sm   sc=Common     MULTIPLICATION SIGN | 
| 2826 |  |  |  |  |  |  | 0x00226E =>       8,  #  ≮  gc=Sm   sc=Common     NOT LESS-THAN | 
| 2827 |  |  |  |  |  |  | 0x00FF1C =>       1,  #  < gc=Sm   sc=Common     FULLWIDTH LESS-THAN SIGN | 
| 2828 |  |  |  |  |  |  | 0x002260 =>    4846,  #  ≠  gc=Sm   sc=Common     NOT EQUAL TO | 
| 2829 |  |  |  |  |  |  | 0x00FF1D =>       1,  #  = gc=Sm   sc=Common     FULLWIDTH EQUALS SIGN | 
| 2830 |  |  |  |  |  |  | 0x00FE65 =>       1,  #  ﹥ gc=Sm   sc=Common     SMALL GREATER-THAN SIGN | 
| 2831 |  |  |  |  |  |  | 0x0000AC =>     377,  #  ¬  gc=Sm   sc=Common     NOT SIGN | 
| 2832 |  |  |  |  |  |  | 0x0000A6 =>      29,  #  ¦  gc=So   sc=Common     BROKEN BAR | 
| 2833 |  |  |  |  |  |  | 0x002016 =>    2982,  #  ‖  gc=Po   sc=Common     DOUBLE VERTICAL LINE | 
| 2834 |  |  |  |  |  |  | 0x00FF5E =>       1,  #  ~ gc=Sm   sc=Common     FULLWIDTH TILDE | 
| 2835 |  |  |  |  |  |  | 0x002212 =>  784139,  #  −  gc=Sm   sc=Common     MINUS SIGN | 
| 2836 |  |  |  |  |  |  | 0x002213 =>      78,  #  ∓  gc=Sm   sc=Common     MINUS-OR-PLUS SIGN | 
| 2837 |  |  |  |  |  |  | 0x002215 =>       1,  #  ∕  gc=Sm   sc=Common     DIVISION SLASH | 
| 2838 |  |  |  |  |  |  | 0x002216 =>     125,  #  ∖  gc=Sm   sc=Common     SET MINUS | 
| 2839 |  |  |  |  |  |  | 0x002217 =>    7734,  #  ∗  gc=Sm   sc=Common     ASTERISK OPERATOR | 
| 2840 |  |  |  |  |  |  | 0x002218 =>    1176,  #  ∘  gc=Sm   sc=Common     RING OPERATOR | 
| 2841 |  |  |  |  |  |  | 0x002219 =>     302,  #  ∙  gc=Sm   sc=Common     BULLET OPERATOR | 
| 2842 |  |  |  |  |  |  | 0x00221A =>    6937,  #  √  gc=Sm   sc=Common     SQUARE ROOT | 
| 2843 |  |  |  |  |  |  | 0x00221D =>    1136,  #  ∝  gc=Sm   sc=Common     PROPORTIONAL TO | 
| 2844 |  |  |  |  |  |  | 0x00221E =>    7509,  #  ∞  gc=Sm   sc=Common     INFINITY | 
| 2845 |  |  |  |  |  |  | 0x00221F =>       7,  #  ∟  gc=Sm   sc=Common     RIGHT ANGLE | 
| 2846 |  |  |  |  |  |  | 0x002220 =>     123,  #  ∠  gc=Sm   sc=Common     ANGLE | 
| 2847 |  |  |  |  |  |  | 0x002221 =>       2,  #  ∡  gc=Sm   sc=Common     MEASURED ANGLE | 
| 2848 |  |  |  |  |  |  | 0x002222 =>       4,  #  ∢  gc=Sm   sc=Common     SPHERICAL ANGLE | 
| 2849 |  |  |  |  |  |  | 0x002223 =>    2735,  #  ∣  gc=Sm   sc=Common     DIVIDES | 
| 2850 |  |  |  |  |  |  | 0x002224 =>       2,  #  ∤  gc=Sm   sc=Common     DOES NOT DIVIDE | 
| 2851 |  |  |  |  |  |  | 0x002226 =>       7,  #  ∦  gc=Sm   sc=Common     NOT PARALLEL TO | 
| 2852 |  |  |  |  |  |  | 0x002225 =>    1932,  #  ∥  gc=Sm   sc=Common     PARALLEL TO | 
| 2853 |  |  |  |  |  |  | 0x002227 =>    1432,  #  ∧  gc=Sm   sc=Common     LOGICAL AND | 
| 2854 |  |  |  |  |  |  | 0x002228 =>     359,  #  ∨  gc=Sm   sc=Common     LOGICAL OR | 
| 2855 |  |  |  |  |  |  | 0x002229 =>    1721,  #  ∩  gc=Sm   sc=Common     INTERSECTION | 
| 2856 |  |  |  |  |  |  | 0x00222A =>    1727,  #  ∪  gc=Sm   sc=Common     UNION | 
| 2857 |  |  |  |  |  |  | 0x00222B =>    3699,  #  ∫  gc=Sm   sc=Common     INTEGRAL | 
| 2858 |  |  |  |  |  |  | 0x00222C =>      57,  #  ∬  gc=Sm   sc=Common     DOUBLE INTEGRAL | 
| 2859 |  |  |  |  |  |  | 0x00222D =>       6,  #  ∭  gc=Sm   sc=Common     TRIPLE INTEGRAL | 
| 2860 |  |  |  |  |  |  | 0x00222E =>      14,  #  ∮  gc=Sm   sc=Common     CONTOUR INTEGRAL | 
| 2861 |  |  |  |  |  |  | 0x002232 =>       2,  #  ∲  gc=Sm   sc=Common     CLOCKWISE CONTOUR INTEGRAL | 
| 2862 |  |  |  |  |  |  | 0x002234 =>      13,  #  ∴  gc=Sm   sc=Common     THEREFORE | 
| 2863 |  |  |  |  |  |  | 0x002235 =>       1,  #  ∵  gc=Sm   sc=Common     BECAUSE | 
| 2864 |  |  |  |  |  |  | 0x002236 =>   36935,  #  ∶  gc=Sm   sc=Common     RATIO | 
| 2865 |  |  |  |  |  |  | 0x002237 =>    1405,  #  ∷  gc=Sm   sc=Common     PROPORTION | 
| 2866 |  |  |  |  |  |  | 0x002241 =>       2,  #  ≁  gc=Sm   sc=Common     NOT TILDE | 
| 2867 |  |  |  |  |  |  | 0x00223C =>   85341,  #  ∼  gc=Sm   sc=Common     TILDE OPERATOR | 
| 2868 |  |  |  |  |  |  | 0x00223D =>       4,  #  ∽  gc=Sm   sc=Common     REVERSED TILDE | 
| 2869 |  |  |  |  |  |  | 0x00223F =>       1,  #  ∿  gc=Sm   sc=Common     SINE WAVE | 
| 2870 |  |  |  |  |  |  | 0x002242 =>       1,  #  ≂  gc=Sm   sc=Common     MINUS TILDE | 
| 2871 |  |  |  |  |  |  | 0x002243 =>     536,  #  ≃  gc=Sm   sc=Common     ASYMPTOTICALLY EQUAL TO | 
| 2872 |  |  |  |  |  |  | 0x002245 =>     708,  #  ≅  gc=Sm   sc=Common     APPROXIMATELY EQUAL TO | 
| 2873 |  |  |  |  |  |  | 0x002246 =>      10,  #  ≆  gc=Sm   sc=Common     APPROXIMATELY BUT NOT ACTUALLY EQUAL TO | 
| 2874 |  |  |  |  |  |  | 0x002248 =>   12106,  #  ≈  gc=Sm   sc=Common     ALMOST EQUAL TO | 
| 2875 |  |  |  |  |  |  | 0x002249 =>       1,  #  ≉  gc=Sm   sc=Common     NOT ALMOST EQUAL TO | 
| 2876 |  |  |  |  |  |  | 0x00224A =>       6,  #  ≊  gc=Sm   sc=Common     ALMOST EQUAL OR EQUAL TO | 
| 2877 |  |  |  |  |  |  | 0x00224C =>       9,  #  ≌  gc=Sm   sc=Common     ALL EQUAL TO | 
| 2878 |  |  |  |  |  |  | 0x00224D =>      27,  #  ≍  gc=Sm   sc=Common     EQUIVALENT TO | 
| 2879 |  |  |  |  |  |  | 0x00224E =>       1,  #  ≎  gc=Sm   sc=Common     GEOMETRICALLY EQUIVALENT TO | 
| 2880 |  |  |  |  |  |  | 0x002250 =>      12,  #  ≐  gc=Sm   sc=Common     APPROACHES THE LIMIT | 
| 2881 |  |  |  |  |  |  | 0x002251 =>       2,  #  ≑  gc=Sm   sc=Common     GEOMETRICALLY EQUAL TO | 
| 2882 |  |  |  |  |  |  | 0x002252 =>       6,  #  ≒  gc=Sm   sc=Common     APPROXIMATELY EQUAL TO OR THE IMAGE OF | 
| 2883 |  |  |  |  |  |  | 0x002254 =>      97,  #  ≔  gc=Sm   sc=Common     COLON EQUALS | 
| 2884 |  |  |  |  |  |  | 0x002255 =>       4,  #  ≕  gc=Sm   sc=Common     EQUALS COLON | 
| 2885 |  |  |  |  |  |  | 0x002259 =>      12,  #  ≙  gc=Sm   sc=Common     ESTIMATES | 
| 2886 |  |  |  |  |  |  | 0x00225C =>      97,  #  ≜  gc=Sm   sc=Common     DELTA EQUAL TO | 
| 2887 |  |  |  |  |  |  | 0x002261 =>    3091,  #  ≡  gc=Sm   sc=Common     IDENTICAL TO | 
| 2888 |  |  |  |  |  |  | 0x002262 =>       1,  #  ≢  gc=Sm   sc=Common     NOT IDENTICAL TO | 
| 2889 |  |  |  |  |  |  | 0x002264 =>   70789,  #  ≤  gc=Sm   sc=Common     LESS-THAN OR EQUAL TO | 
| 2890 |  |  |  |  |  |  | 0x002270 =>      10,  #  ≰  gc=Sm   sc=Common     NEITHER LESS-THAN NOR EQUAL TO | 
| 2891 |  |  |  |  |  |  | 0x002265 =>  101964,  #  ≥  gc=Sm   sc=Common     GREATER-THAN OR EQUAL TO | 
| 2892 |  |  |  |  |  |  | 0x002266 =>     324,  #  ≦  gc=Sm   sc=Common     LESS-THAN OVER EQUAL TO | 
| 2893 |  |  |  |  |  |  | 0x002267 =>     503,  #  ≧  gc=Sm   sc=Common     GREATER-THAN OVER EQUAL TO | 
| 2894 |  |  |  |  |  |  | 0x002268 =>       4,  #  ≨  gc=Sm   sc=Common     LESS-THAN BUT NOT EQUAL TO | 
| 2895 |  |  |  |  |  |  | 0x00226A =>    1172,  #  ≪  gc=Sm   sc=Common     MUCH LESS-THAN | 
| 2896 |  |  |  |  |  |  | 0x00226B =>     992,  #  ≫  gc=Sm   sc=Common     MUCH GREATER-THAN | 
| 2897 |  |  |  |  |  |  | 0x002272 =>      71,  #  ≲  gc=Sm   sc=Common     LESS-THAN OR EQUIVALENT TO | 
| 2898 |  |  |  |  |  |  | 0x002273 =>      52,  #  ≳  gc=Sm   sc=Common     GREATER-THAN OR EQUIVALENT TO | 
| 2899 |  |  |  |  |  |  | 0x002276 =>       1,  #  ≶  gc=Sm   sc=Common     LESS-THAN OR GREATER-THAN | 
| 2900 |  |  |  |  |  |  | 0x002278 =>       5,  #  ≸  gc=Sm   sc=Common     NEITHER LESS-THAN NOR GREATER-THAN | 
| 2901 |  |  |  |  |  |  | 0x002277 =>       3,  #  ≷  gc=Sm   sc=Common     GREATER-THAN OR LESS-THAN | 
| 2902 |  |  |  |  |  |  | 0x002279 =>       3,  #  ≹  gc=Sm   sc=Common     NEITHER GREATER-THAN NOR LESS-THAN | 
| 2903 |  |  |  |  |  |  | 0x00227A =>     157,  #  ≺  gc=Sm   sc=Common     PRECEDES | 
| 2904 |  |  |  |  |  |  | 0x002281 =>       1,  #  ⊁  gc=Sm   sc=Common     DOES NOT SUCCEED | 
| 2905 |  |  |  |  |  |  | 0x00227B =>      32,  #  ≻  gc=Sm   sc=Common     SUCCEEDS | 
| 2906 |  |  |  |  |  |  | 0x00227C =>      24,  #  ≼  gc=Sm   sc=Common     PRECEDES OR EQUAL TO | 
| 2907 |  |  |  |  |  |  | 0x00227D =>      49,  #  ≽  gc=Sm   sc=Common     SUCCEEDS OR EQUAL TO | 
| 2908 |  |  |  |  |  |  | 0x00227E =>       4,  #  ≾  gc=Sm   sc=Common     PRECEDES OR EQUIVALENT TO | 
| 2909 |  |  |  |  |  |  | 0x002284 =>       8,  #  ⊄  gc=Sm   sc=Common     NOT A SUBSET OF | 
| 2910 |  |  |  |  |  |  | 0x002282 =>     594,  #  ⊂  gc=Sm   sc=Common     SUBSET OF | 
| 2911 |  |  |  |  |  |  | 0x002283 =>      79,  #  ⊃  gc=Sm   sc=Common     SUPERSET OF | 
| 2912 |  |  |  |  |  |  | 0x002288 =>       4,  #  ⊈  gc=Sm   sc=Common     NEITHER A SUBSET OF NOR EQUAL TO | 
| 2913 |  |  |  |  |  |  | 0x002286 =>     754,  #  ⊆  gc=Sm   sc=Common     SUBSET OF OR EQUAL TO | 
| 2914 |  |  |  |  |  |  | 0x002287 =>      29,  #  ⊇  gc=Sm   sc=Common     SUPERSET OF OR EQUAL TO | 
| 2915 |  |  |  |  |  |  | 0x00228A =>       3,  #  ⊊  gc=Sm   sc=Common     SUBSET OF WITH NOT EQUAL TO | 
| 2916 |  |  |  |  |  |  | 0x00228B =>       2,  #  ⊋  gc=Sm   sc=Common     SUPERSET OF WITH NOT EQUAL TO | 
| 2917 |  |  |  |  |  |  | 0x00228D =>       2,  #  ⊍  gc=Sm   sc=Common     MULTISET MULTIPLICATION | 
| 2918 |  |  |  |  |  |  | 0x00228F =>       3,  #  ⊏  gc=Sm   sc=Common     SQUARE IMAGE OF | 
| 2919 |  |  |  |  |  |  | 0x002291 =>      25,  #  ⊑  gc=Sm   sc=Common     SQUARE IMAGE OF OR EQUAL TO | 
| 2920 |  |  |  |  |  |  | 0x002293 =>      74,  #  ⊓  gc=Sm   sc=Common     SQUARE CAP | 
| 2921 |  |  |  |  |  |  | 0x002294 =>       8,  #  ⊔  gc=Sm   sc=Common     SQUARE CUP | 
| 2922 |  |  |  |  |  |  | 0x002295 =>     445,  #  ⊕  gc=Sm   sc=Common     CIRCLED PLUS | 
| 2923 |  |  |  |  |  |  | 0x002296 =>     183,  #  ⊖  gc=Sm   sc=Common     CIRCLED MINUS | 
| 2924 |  |  |  |  |  |  | 0x002297 =>     560,  #  ⊗  gc=Sm   sc=Common     CIRCLED TIMES | 
| 2925 |  |  |  |  |  |  | 0x002298 =>      82,  #  ⊘  gc=Sm   sc=Common     CIRCLED DIVISION SLASH | 
| 2926 |  |  |  |  |  |  | 0x002299 =>      43,  #  ⊙  gc=Sm   sc=Common     CIRCLED DOT OPERATOR | 
| 2927 |  |  |  |  |  |  | 0x00229A =>       2,  #  ⊚  gc=Sm   sc=Common     CIRCLED RING OPERATOR | 
| 2928 |  |  |  |  |  |  | 0x00229B =>       4,  #  ⊛  gc=Sm   sc=Common     CIRCLED ASTERISK OPERATOR | 
| 2929 |  |  |  |  |  |  | 0x00229D =>       3,  #  ⊝  gc=Sm   sc=Common     CIRCLED DASH | 
| 2930 |  |  |  |  |  |  | 0x00229F =>      22,  #  ⊟  gc=Sm   sc=Common     SQUARED MINUS | 
| 2931 |  |  |  |  |  |  | 0x0022A0 =>       7,  #  ⊠  gc=Sm   sc=Common     SQUARED TIMES | 
| 2932 |  |  |  |  |  |  | 0x0022A1 =>       4,  #  ⊡  gc=Sm   sc=Common     SQUARED DOT OPERATOR | 
| 2933 |  |  |  |  |  |  | 0x0022A2 =>      51,  #  ⊢  gc=Sm   sc=Common     RIGHT TACK | 
| 2934 |  |  |  |  |  |  | 0x0022A3 =>     153,  #  ⊣  gc=Sm   sc=Common     LEFT TACK | 
| 2935 |  |  |  |  |  |  | 0x0022A4 =>     473,  #  ⊤  gc=Sm   sc=Common     DOWN TACK | 
| 2936 |  |  |  |  |  |  | 0x0022A5 =>     687,  #  ⊥  gc=Sm   sc=Common     UP TACK | 
| 2937 |  |  |  |  |  |  | 0x0022A7 =>      23,  #  ⊧  gc=Sm   sc=Common     MODELS | 
| 2938 |  |  |  |  |  |  | 0x0022AA =>       3,  #  ⊪  gc=Sm   sc=Common     TRIPLE VERTICAL BAR RIGHT TURNSTILE | 
| 2939 |  |  |  |  |  |  | 0x0022B2 =>       1,  #  ⊲  gc=Sm   sc=Common     NORMAL SUBGROUP OF | 
| 2940 |  |  |  |  |  |  | 0x0022B8 =>       2,  #  ⊸  gc=Sm   sc=Common     MULTIMAP | 
| 2941 |  |  |  |  |  |  | 0x0022BF =>       8,  #  ⊿  gc=Sm   sc=Common     RIGHT TRIANGLE | 
| 2942 |  |  |  |  |  |  | 0x0022C0 =>      77,  #  ⋀  gc=Sm   sc=Common     N-ARY LOGICAL AND | 
| 2943 |  |  |  |  |  |  | 0x0022C1 =>       2,  #  ⋁  gc=Sm   sc=Common     N-ARY LOGICAL OR | 
| 2944 |  |  |  |  |  |  | 0x0022C2 =>      88,  #  ⋂  gc=Sm   sc=Common     N-ARY INTERSECTION | 
| 2945 |  |  |  |  |  |  | 0x0022C3 =>      69,  #  ⋃  gc=Sm   sc=Common     N-ARY UNION | 
| 2946 |  |  |  |  |  |  | 0x0022C4 =>     207,  #  ⋄  gc=Sm   sc=Common     DIAMOND OPERATOR | 
| 2947 |  |  |  |  |  |  | 0x0022C5 =>    6578,  #  ⋅  gc=Sm   sc=Common     DOT OPERATOR | 
| 2948 |  |  |  |  |  |  | 0x0022C6 =>     185,  #  ⋆  gc=Sm   sc=Common     STAR OPERATOR | 
| 2949 |  |  |  |  |  |  | 0x0022C7 =>       3,  #  ⋇  gc=Sm   sc=Common     DIVISION TIMES | 
| 2950 |  |  |  |  |  |  | 0x0022C8 =>       1,  #  ⋈  gc=Sm   sc=Common     BOWTIE | 
| 2951 |  |  |  |  |  |  | 0x0022CD =>       5,  #  ⋍  gc=Sm   sc=Common     REVERSED TILDE EQUALS | 
| 2952 |  |  |  |  |  |  | 0x0022CE =>       1,  #  ⋎  gc=Sm   sc=Common     CURLY LOGICAL OR | 
| 2953 |  |  |  |  |  |  | 0x0022D2 =>       1,  #  ⋒  gc=Sm   sc=Common     DOUBLE INTERSECTION | 
| 2954 |  |  |  |  |  |  | 0x0022D4 =>      15,  #  ⋔  gc=Sm   sc=Common     PITCHFORK | 
| 2955 |  |  |  |  |  |  | 0x0022D8 =>       3,  #  ⋘  gc=Sm   sc=Common     VERY MUCH LESS-THAN | 
| 2956 |  |  |  |  |  |  | 0x0022D9 =>      13,  #  ⋙  gc=Sm   sc=Common     VERY MUCH GREATER-THAN | 
| 2957 |  |  |  |  |  |  | 0x0022DB =>       3,  #  ⋛  gc=Sm   sc=Common     GREATER-THAN EQUAL TO OR LESS-THAN | 
| 2958 |  |  |  |  |  |  | 0x0022E8 =>       1,  #  ⋨  gc=Sm   sc=Common     PRECEDES BUT NOT EQUIVALENT TO | 
| 2959 |  |  |  |  |  |  | 0x0022EE =>    1123,  #  ⋮  gc=Sm   sc=Common     VERTICAL ELLIPSIS | 
| 2960 |  |  |  |  |  |  | 0x0022EF =>    3568,  #  ⋯  gc=Sm   sc=Common     MIDLINE HORIZONTAL ELLIPSIS | 
| 2961 |  |  |  |  |  |  | 0x0022F1 =>     197,  #  ⋱  gc=Sm   sc=Common     DOWN RIGHT DIAGONAL ELLIPSIS | 
| 2962 |  |  |  |  |  |  | 0x002300 =>       1,  #  ⌀  gc=So   sc=Common     DIAMETER SIGN | 
| 2963 |  |  |  |  |  |  | 0x002302 =>       5,  #  ⌂  gc=So   sc=Common     HOUSE | 
| 2964 |  |  |  |  |  |  | 0x002306 =>      13,  #  ⌆  gc=So   sc=Common     PERSPECTIVE | 
| 2965 |  |  |  |  |  |  | 0x002308 =>     127,  #  ⌈  gc=Sm   sc=Common     LEFT CEILING | 
| 2966 |  |  |  |  |  |  | 0x002309 =>     134,  #  ⌉  gc=Sm   sc=Common     RIGHT CEILING | 
| 2967 |  |  |  |  |  |  | 0x00230A =>     307,  #  ⌊  gc=Sm   sc=Common     LEFT FLOOR | 
| 2968 |  |  |  |  |  |  | 0x00230B =>     295,  #  ⌋  gc=Sm   sc=Common     RIGHT FLOOR | 
| 2969 |  |  |  |  |  |  | 0x00230C =>       2,  #  ⌌  gc=So   sc=Common     BOTTOM RIGHT CROP | 
| 2970 |  |  |  |  |  |  | 0x002313 =>     156,  #  ⌓  gc=So   sc=Common     SEGMENT | 
| 2971 |  |  |  |  |  |  | 0x002314 =>       4,  #  ⌔  gc=So   sc=Common     SECTOR | 
| 2972 |  |  |  |  |  |  | 0x002316 =>       5,  #  ⌖  gc=So   sc=Common     POSITION INDICATOR | 
| 2973 |  |  |  |  |  |  | 0x00231D =>      11,  #  ⌝  gc=So   sc=Common     TOP RIGHT CORNER | 
| 2974 |  |  |  |  |  |  | 0x00231E =>      29,  #  ⌞  gc=So   sc=Common     BOTTOM LEFT CORNER | 
| 2975 |  |  |  |  |  |  | 0x00231F =>      29,  #  ⌟  gc=So   sc=Common     BOTTOM RIGHT CORNER | 
| 2976 |  |  |  |  |  |  | 0x002320 =>       1,  #  ⌠  gc=Sm   sc=Common     TOP HALF INTEGRAL | 
| 2977 |  |  |  |  |  |  | 0x002322 =>     142,  #  ⌢  gc=So   sc=Common     FROWN | 
| 2978 |  |  |  |  |  |  | 0x002323 =>     121,  #  ⌣  gc=So   sc=Common     SMILE | 
| 2979 |  |  |  |  |  |  | 0x002337 =>      11,  #  ⌷  gc=So   sc=Common     APL FUNCTIONAL SYMBOL SQUISH QUAD | 
| 2980 |  |  |  |  |  |  | 0x002342 =>       3,  #  ⍂  gc=So   sc=Common     APL FUNCTIONAL SYMBOL QUAD BACKSLASH | 
| 2981 |  |  |  |  |  |  | 0x0023B4 =>      27,  #  ⎴  gc=So   sc=Common     TOP SQUARE BRACKET | 
| 2982 |  |  |  |  |  |  | 0x002423 =>      38,  #  ␣  gc=So   sc=Common     OPEN BOX | 
| 2983 |  |  |  |  |  |  | 0x002500 =>      26,  #  ─  gc=So   sc=Common     BOX DRAWINGS LIGHT HORIZONTAL | 
| 2984 |  |  |  |  |  |  | 0x002501 =>       2,  #  ━  gc=So   sc=Common     BOX DRAWINGS HEAVY HORIZONTAL | 
| 2985 |  |  |  |  |  |  | 0x002502 =>      52,  #  │  gc=So   sc=Common     BOX DRAWINGS LIGHT VERTICAL | 
| 2986 |  |  |  |  |  |  | 0x002504 =>       2,  #  ┄  gc=So   sc=Common     BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL | 
| 2987 |  |  |  |  |  |  | 0x002514 =>       1,  #  └  gc=So   sc=Common     BOX DRAWINGS LIGHT UP AND RIGHT | 
| 2988 |  |  |  |  |  |  | 0x002524 =>      11,  #  ┤  gc=So   sc=Common     BOX DRAWINGS LIGHT VERTICAL AND LEFT | 
| 2989 |  |  |  |  |  |  | 0x00252C =>       4,  #  ┬  gc=So   sc=Common     BOX DRAWINGS LIGHT DOWN AND HORIZONTAL | 
| 2990 |  |  |  |  |  |  | 0x002534 =>       4,  #  ┴  gc=So   sc=Common     BOX DRAWINGS LIGHT UP AND HORIZONTAL | 
| 2991 |  |  |  |  |  |  | 0x00253C =>      32,  #  ┼  gc=So   sc=Common     BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL | 
| 2992 |  |  |  |  |  |  | 0x002540 =>       1,  #  ╀  gc=So   sc=Common     BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT | 
| 2993 |  |  |  |  |  |  | 0x002550 =>       2,  #  ═  gc=So   sc=Common     BOX DRAWINGS DOUBLE HORIZONTAL | 
| 2994 |  |  |  |  |  |  | 0x002551 =>      67,  #  ║  gc=So   sc=Common     BOX DRAWINGS DOUBLE VERTICAL | 
| 2995 |  |  |  |  |  |  | 0x002559 =>       7,  #  ╙  gc=So   sc=Common     BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE | 
| 2996 |  |  |  |  |  |  | 0x00255E =>      17,  #  ╞  gc=So   sc=Common     BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE | 
| 2997 |  |  |  |  |  |  | 0x002560 =>      10,  #  ╠  gc=So   sc=Common     BOX DRAWINGS DOUBLE VERTICAL AND RIGHT | 
| 2998 |  |  |  |  |  |  | 0x002564 =>       4,  #  ╤  gc=So   sc=Common     BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE | 
| 2999 |  |  |  |  |  |  | 0x002566 =>       2,  #  ╦  gc=So   sc=Common     BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL | 
| 3000 |  |  |  |  |  |  | 0x002568 =>      14,  #  ╨  gc=So   sc=Common     BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE | 
| 3001 |  |  |  |  |  |  | 0x00256A =>       8,  #  ╪  gc=So   sc=Common     BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE | 
| 3002 |  |  |  |  |  |  | 0x00256B =>      25,  #  ╫  gc=So   sc=Common     BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE | 
| 3003 |  |  |  |  |  |  | 0x00256C =>       2,  #  ╬  gc=So   sc=Common     BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL | 
| 3004 |  |  |  |  |  |  | 0x00257A =>       1,  #  ╺  gc=So   sc=Common     BOX DRAWINGS HEAVY RIGHT | 
| 3005 |  |  |  |  |  |  | 0x002580 =>      16,  #  ▀  gc=So   sc=Common     UPPER HALF BLOCK | 
| 3006 |  |  |  |  |  |  | 0x002584 =>       1,  #  ▄  gc=So   sc=Common     LOWER HALF BLOCK | 
| 3007 |  |  |  |  |  |  | 0x002588 =>       9,  #  █  gc=So   sc=Common     FULL BLOCK | 
| 3008 |  |  |  |  |  |  | 0x002591 =>      39,  #  ░  gc=So   sc=Common     LIGHT SHADE | 
| 3009 |  |  |  |  |  |  | 0x002592 =>     116,  #  ▒  gc=So   sc=Common     MEDIUM SHADE | 
| 3010 |  |  |  |  |  |  | 0x002593 =>       9,  #  ▓  gc=So   sc=Common     DARK SHADE | 
| 3011 |  |  |  |  |  |  | 0x0025A0 =>    2988,  #  ■  gc=So   sc=Common     BLACK SQUARE | 
| 3012 |  |  |  |  |  |  | 0x0025A1 =>    6011,  #  □  gc=So   sc=Common     WHITE SQUARE | 
| 3013 |  |  |  |  |  |  | 0x0025A2 =>      37,  #  ▢  gc=So   sc=Common     WHITE SQUARE WITH ROUNDED CORNERS | 
| 3014 |  |  |  |  |  |  | 0x0025A4 =>      11,  #  ▤  gc=So   sc=Common     SQUARE WITH HORIZONTAL FILL | 
| 3015 |  |  |  |  |  |  | 0x0025A5 =>       3,  #  ▥  gc=So   sc=Common     SQUARE WITH VERTICAL FILL | 
| 3016 |  |  |  |  |  |  | 0x0025A7 =>      12,  #  ▧  gc=So   sc=Common     SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL | 
| 3017 |  |  |  |  |  |  | 0x0025A8 =>      30,  #  ▨  gc=So   sc=Common     SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL | 
| 3018 |  |  |  |  |  |  | 0x0025A9 =>      22,  #  ▩  gc=So   sc=Common     SQUARE WITH DIAGONAL CROSSHATCH FILL | 
| 3019 |  |  |  |  |  |  | 0x0025AA =>    7528,  #  ▪  gc=So   sc=Common     BLACK SMALL SQUARE | 
| 3020 |  |  |  |  |  |  | 0x0025AB =>     338,  #  ▫  gc=So   sc=Common     WHITE SMALL SQUARE | 
| 3021 |  |  |  |  |  |  | 0x0025AC =>      39,  #  ▬  gc=So   sc=Common     BLACK RECTANGLE | 
| 3022 |  |  |  |  |  |  | 0x0025AD =>      10,  #  ▭  gc=So   sc=Common     WHITE RECTANGLE | 
| 3023 |  |  |  |  |  |  | 0x0025AE =>       7,  #  ▮  gc=So   sc=Common     BLACK VERTICAL RECTANGLE | 
| 3024 |  |  |  |  |  |  | 0x0025AF =>      97,  #  ▯  gc=So   sc=Common     WHITE VERTICAL RECTANGLE | 
| 3025 |  |  |  |  |  |  | 0x0025B1 =>       2,  #  ▱  gc=So   sc=Common     WHITE PARALLELOGRAM | 
| 3026 |  |  |  |  |  |  | 0x0025B2 =>    1536,  #  ▲  gc=So   sc=Common     BLACK UP-POINTING TRIANGLE | 
| 3027 |  |  |  |  |  |  | 0x0025B3 =>     698,  #  △  gc=So   sc=Common     WHITE UP-POINTING TRIANGLE | 
| 3028 |  |  |  |  |  |  | 0x0025B4 =>    1177,  #  ▴  gc=So   sc=Common     BLACK UP-POINTING SMALL TRIANGLE | 
| 3029 |  |  |  |  |  |  | 0x0025B5 =>    1055,  #  ▵  gc=So   sc=Common     WHITE UP-POINTING SMALL TRIANGLE | 
| 3030 |  |  |  |  |  |  | 0x0025B6 =>   17388,  #  ▶  gc=So   sc=Common     BLACK RIGHT-POINTING TRIANGLE | 
| 3031 |  |  |  |  |  |  | 0x0025B7 =>      36,  #  ▷  gc=Sm   sc=Common     WHITE RIGHT-POINTING TRIANGLE | 
| 3032 |  |  |  |  |  |  | 0x0025B8 =>     154,  #  ▸  gc=So   sc=Common     BLACK RIGHT-POINTING SMALL TRIANGLE | 
| 3033 |  |  |  |  |  |  | 0x0025B9 =>      89,  #  ▹  gc=So   sc=Common     WHITE RIGHT-POINTING SMALL TRIANGLE | 
| 3034 |  |  |  |  |  |  | 0x0025BA =>     119,  #  ►  gc=So   sc=Common     BLACK RIGHT-POINTING POINTER | 
| 3035 |  |  |  |  |  |  | 0x0025BB =>       1,  #  ▻  gc=So   sc=Common     WHITE RIGHT-POINTING POINTER | 
| 3036 |  |  |  |  |  |  | 0x0025BC =>     689,  #  ▼  gc=So   sc=Common     BLACK DOWN-POINTING TRIANGLE | 
| 3037 |  |  |  |  |  |  | 0x0025BD =>     150,  #  ▽  gc=So   sc=Common     WHITE DOWN-POINTING TRIANGLE | 
| 3038 |  |  |  |  |  |  | 0x0025BE =>     500,  #  ▾  gc=So   sc=Common     BLACK DOWN-POINTING SMALL TRIANGLE | 
| 3039 |  |  |  |  |  |  | 0x0025BF =>     117,  #  ▿  gc=So   sc=Common     WHITE DOWN-POINTING SMALL TRIANGLE | 
| 3040 |  |  |  |  |  |  | 0x0025C0 =>      29,  #  ◀  gc=So   sc=Common     BLACK LEFT-POINTING TRIANGLE | 
| 3041 |  |  |  |  |  |  | 0x0025C1 =>      22,  #  ◁  gc=Sm   sc=Common     WHITE LEFT-POINTING TRIANGLE | 
| 3042 |  |  |  |  |  |  | 0x0025C2 =>      57,  #  ◂  gc=So   sc=Common     BLACK LEFT-POINTING SMALL TRIANGLE | 
| 3043 |  |  |  |  |  |  | 0x0025C3 =>      40,  #  ◃  gc=So   sc=Common     WHITE LEFT-POINTING SMALL TRIANGLE | 
| 3044 |  |  |  |  |  |  | 0x0025C6 =>    1103,  #  ◆  gc=So   sc=Common     BLACK DIAMOND | 
| 3045 |  |  |  |  |  |  | 0x0025C7 =>     375,  #  ◇  gc=So   sc=Common     WHITE DIAMOND | 
| 3046 |  |  |  |  |  |  | 0x0025C8 =>       2,  #  ◈  gc=So   sc=Common     WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND | 
| 3047 |  |  |  |  |  |  | 0x0025C9 =>       3,  #  ◉  gc=So   sc=Common     FISHEYE | 
| 3048 |  |  |  |  |  |  | 0x0025CA =>     408,  #  ◊  gc=So   sc=Common     LOZENGE | 
| 3049 |  |  |  |  |  |  | 0x0025CB =>    5769,  #  ○  gc=So   sc=Common     WHITE CIRCLE | 
| 3050 |  |  |  |  |  |  | 0x0025CE =>      86,  #  ◎  gc=So   sc=Common     BULLSEYE | 
| 3051 |  |  |  |  |  |  | 0x0025CF =>    3259,  #  ●  gc=So   sc=Common     BLACK CIRCLE | 
| 3052 |  |  |  |  |  |  | 0x0025D0 =>      12,  #  ◐  gc=So   sc=Common     CIRCLE WITH LEFT HALF BLACK | 
| 3053 |  |  |  |  |  |  | 0x0025D2 =>       2,  #  ◒  gc=So   sc=Common     CIRCLE WITH LOWER HALF BLACK | 
| 3054 |  |  |  |  |  |  | 0x0025D3 =>       2,  #  ◓  gc=So   sc=Common     CIRCLE WITH UPPER HALF BLACK | 
| 3055 |  |  |  |  |  |  | 0x0025D6 =>       1,  #  ◖  gc=So   sc=Common     LEFT HALF BLACK CIRCLE | 
| 3056 |  |  |  |  |  |  | 0x0025D8 =>       5,  #  ◘  gc=So   sc=Common     INVERSE BULLET | 
| 3057 |  |  |  |  |  |  | 0x0025D9 =>       5,  #  ◙  gc=So   sc=Common     INVERSE WHITE CIRCLE | 
| 3058 |  |  |  |  |  |  | 0x0025E6 =>    1303,  #  ◦  gc=So   sc=Common     WHITE BULLET | 
| 3059 |  |  |  |  |  |  | 0x0025EC =>       1,  #  ◬  gc=So   sc=Common     WHITE UP-POINTING TRIANGLE WITH DOT | 
| 3060 |  |  |  |  |  |  | 0x0025EF =>      84,  #  ◯  gc=So   sc=Common     LARGE CIRCLE | 
| 3061 |  |  |  |  |  |  | 0x0025F8 =>      17,  #  ◸  gc=Sm   sc=Common     UPPER LEFT TRIANGLE | 
| 3062 |  |  |  |  |  |  | 0x0025FB =>      54,  #  ◻  gc=Sm   sc=Common     WHITE MEDIUM SQUARE | 
| 3063 |  |  |  |  |  |  | 0x0025FC =>      69,  #  ◼  gc=Sm   sc=Common     BLACK MEDIUM SQUARE | 
| 3064 |  |  |  |  |  |  | 0x002605 =>     207,  #  ★  gc=So   sc=Common     BLACK STAR | 
| 3065 |  |  |  |  |  |  | 0x002606 =>     118,  #  ☆  gc=So   sc=Common     WHITE STAR | 
| 3066 |  |  |  |  |  |  | 0x00260D =>       2,  #  ☍  gc=So   sc=Common     OPPOSITION | 
| 3067 |  |  |  |  |  |  | 0x002610 =>     317,  #  ☐  gc=So   sc=Common     BALLOT BOX | 
| 3068 |  |  |  |  |  |  | 0x002611 =>       2,  #  ☑  gc=So   sc=Common     BALLOT BOX WITH CHECK | 
| 3069 |  |  |  |  |  |  | 0x00263A =>       1,  #  ☺  gc=So   sc=Common     WHITE SMILING FACE | 
| 3070 |  |  |  |  |  |  | 0x00263C =>       6,  #  ☼  gc=So   sc=Common     WHITE SUN WITH RAYS | 
| 3071 |  |  |  |  |  |  | 0x00263F =>       1,  #  ☿  gc=So   sc=Common     MERCURY | 
| 3072 |  |  |  |  |  |  | 0x002640 =>    1472,  #  ♀  gc=So   sc=Common     FEMALE SIGN | 
| 3073 |  |  |  |  |  |  | 0x002642 =>    1424,  #  ♂  gc=So   sc=Common     MALE SIGN | 
| 3074 |  |  |  |  |  |  | 0x002660 =>      82,  #  ♠  gc=So   sc=Common     BLACK SPADE SUIT | 
| 3075 |  |  |  |  |  |  | 0x002661 =>      23,  #  ♡  gc=So   sc=Common     WHITE HEART SUIT | 
| 3076 |  |  |  |  |  |  | 0x002662 =>       4,  #  ♢  gc=So   sc=Common     WHITE DIAMOND SUIT | 
| 3077 |  |  |  |  |  |  | 0x002663 =>     147,  #  ♣  gc=So   sc=Common     BLACK CLUB SUIT | 
| 3078 |  |  |  |  |  |  | 0x002665 =>      19,  #  ♥  gc=So   sc=Common     BLACK HEART SUIT | 
| 3079 |  |  |  |  |  |  | 0x002666 =>     808,  #  ♦  gc=So   sc=Common     BLACK DIAMOND SUIT | 
| 3080 |  |  |  |  |  |  | 0x002709 =>     677,  #  ✉  gc=So   sc=Common     ENVELOPE | 
| 3081 |  |  |  |  |  |  | 0x002713 =>    5260,  #  ✓  gc=So   sc=Common     CHECK MARK | 
| 3082 |  |  |  |  |  |  | 0x002714 =>    1072,  #  ✔  gc=So   sc=Common     HEAVY CHECK MARK | 
| 3083 |  |  |  |  |  |  | 0x002715 =>       4,  #  ✕  gc=So   sc=Common     MULTIPLICATION X | 
| 3084 |  |  |  |  |  |  | 0x002716 =>       2,  #  ✖  gc=So   sc=Common     HEAVY MULTIPLICATION X | 
| 3085 |  |  |  |  |  |  | 0x002717 =>     265,  #  ✗  gc=So   sc=Common     BALLOT X | 
| 3086 |  |  |  |  |  |  | 0x002718 =>     164,  #  ✘  gc=So   sc=Common     HEAVY BALLOT X | 
| 3087 |  |  |  |  |  |  | 0x00271A =>       3,  #  ✚  gc=So   sc=Common     HEAVY GREEK CROSS | 
| 3088 |  |  |  |  |  |  | 0x00271D =>      12,  #  ✝  gc=So   sc=Common     LATIN CROSS | 
| 3089 |  |  |  |  |  |  | 0x00271E =>       6,  #  ✞  gc=So   sc=Common     SHADOWED WHITE LATIN CROSS | 
| 3090 |  |  |  |  |  |  | 0x00271F =>       1,  #  ✟  gc=So   sc=Common     OUTLINED LATIN CROSS | 
| 3091 |  |  |  |  |  |  | 0x002720 =>      11,  #  ✠  gc=So   sc=Common     MALTESE CROSS | 
| 3092 |  |  |  |  |  |  | 0x002727 =>       6,  #  ✧  gc=So   sc=Common     WHITE FOUR POINTED STAR | 
| 3093 |  |  |  |  |  |  | 0x002729 =>       4,  #  ✩  gc=So   sc=Common     STRESS OUTLINED WHITE STAR | 
| 3094 |  |  |  |  |  |  | 0x00272E =>       1,  #  ✮  gc=So   sc=Common     HEAVY OUTLINED BLACK STAR | 
| 3095 |  |  |  |  |  |  | 0x00272F =>       2,  #  ✯  gc=So   sc=Common     PINWHEEL STAR | 
| 3096 |  |  |  |  |  |  | 0x002730 =>       2,  #  ✰  gc=So   sc=Common     SHADOWED WHITE STAR | 
| 3097 |  |  |  |  |  |  | 0x002731 =>       1,  #  ✱  gc=So   sc=Common     HEAVY ASTERISK | 
| 3098 |  |  |  |  |  |  | 0x002733 =>       7,  #  ✳  gc=So   sc=Common     EIGHT SPOKED ASTERISK | 
| 3099 |  |  |  |  |  |  | 0x002734 =>       5,  #  ✴  gc=So   sc=Common     EIGHT POINTED BLACK STAR | 
| 3100 |  |  |  |  |  |  | 0x002736 =>      15,  #  ✶  gc=So   sc=Common     SIX POINTED BLACK STAR | 
| 3101 |  |  |  |  |  |  | 0x002737 =>       1,  #  ✷  gc=So   sc=Common     EIGHT POINTED RECTILINEAR BLACK STAR | 
| 3102 |  |  |  |  |  |  | 0x002738 =>      34,  #  ✸  gc=So   sc=Common     HEAVY EIGHT POINTED RECTILINEAR BLACK STAR | 
| 3103 |  |  |  |  |  |  | 0x00273B =>       5,  #  ✻  gc=So   sc=Common     TEARDROP-SPOKED ASTERISK | 
| 3104 |  |  |  |  |  |  | 0x002748 =>       2,  #  ❈  gc=So   sc=Common     HEAVY SPARKLE | 
| 3105 |  |  |  |  |  |  | 0x00274A =>       2,  #  ❊  gc=So   sc=Common     EIGHT TEARDROP-SPOKED PROPELLER ASTERISK | 
| 3106 |  |  |  |  |  |  | 0x00274D =>       8,  #  ❍  gc=So   sc=Common     SHADOWED WHITE CIRCLE | 
| 3107 |  |  |  |  |  |  | 0x002750 =>       4,  #  ❐  gc=So   sc=Common     UPPER RIGHT DROP-SHADOWED WHITE SQUARE | 
| 3108 |  |  |  |  |  |  | 0x002751 =>      59,  #  ❑  gc=So   sc=Common     LOWER RIGHT SHADOWED WHITE SQUARE | 
| 3109 |  |  |  |  |  |  | 0x002752 =>      38,  #  ❒  gc=So   sc=Common     UPPER RIGHT SHADOWED WHITE SQUARE | 
| 3110 |  |  |  |  |  |  | 0x002756 =>      38,  #  ❖  gc=So   sc=Common     BLACK DIAMOND MINUS WHITE X | 
| 3111 |  |  |  |  |  |  | 0x002794 =>      12,  #  ➔  gc=So   sc=Common     HEAVY WIDE-HEADED RIGHTWARDS ARROW | 
| 3112 |  |  |  |  |  |  | 0x00279D =>       3,  #  ➝  gc=So   sc=Common     TRIANGLE-HEADED RIGHTWARDS ARROW | 
| 3113 |  |  |  |  |  |  | 0x00279E =>       1,  #  ➞  gc=So   sc=Common     HEAVY TRIANGLE-HEADED RIGHTWARDS ARROW | 
| 3114 |  |  |  |  |  |  | 0x0027A1 =>       2,  #  ➡  gc=So   sc=Common     BLACK RIGHTWARDS ARROW | 
| 3115 |  |  |  |  |  |  | 0x0027A2 =>     194,  #  ➢  gc=So   sc=Common     THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD | 
| 3116 |  |  |  |  |  |  | 0x0027A4 =>       2,  #  ➤  gc=So   sc=Common     BLACK RIGHTWARDS ARROWHEAD | 
| 3117 |  |  |  |  |  |  | 0x0027E1 =>       2,  #  ⟡  gc=Sm   sc=Common     WHITE CONCAVE-SIDED DIAMOND | 
| 3118 |  |  |  |  |  |  | 0x0027E6 =>      13,  #  ⟦  gc=Ps   sc=Common     MATHEMATICAL LEFT WHITE SQUARE BRACKET | 
| 3119 |  |  |  |  |  |  | 0x0027E7 =>      13,  #  ⟧  gc=Pe   sc=Common     MATHEMATICAL RIGHT WHITE SQUARE BRACKET | 
| 3120 |  |  |  |  |  |  | 0x0027E8 =>    1152,  #  ⟨  gc=Ps   sc=Common     MATHEMATICAL LEFT ANGLE BRACKET | 
| 3121 |  |  |  |  |  |  | 0x0027E9 =>    1151,  #  ⟩  gc=Pe   sc=Common     MATHEMATICAL RIGHT ANGLE BRACKET | 
| 3122 |  |  |  |  |  |  | 0x0027F5 =>      39,  #  ⟵  gc=Sm   sc=Common     LONG LEFTWARDS ARROW | 
| 3123 |  |  |  |  |  |  | 0x0027F6 =>      40,  #  ⟶  gc=Sm   sc=Common     LONG RIGHTWARDS ARROW | 
| 3124 |  |  |  |  |  |  | 0x0027F7 =>       5,  #  ⟷  gc=Sm   sc=Common     LONG LEFT RIGHT ARROW | 
| 3125 |  |  |  |  |  |  | 0x0027F9 =>       5,  #  ⟹  gc=Sm   sc=Common     LONG RIGHTWARDS DOUBLE ARROW | 
| 3126 |  |  |  |  |  |  | 0x0027FA =>       8,  #  ⟺  gc=Sm   sc=Common     LONG LEFT RIGHT DOUBLE ARROW | 
| 3127 |  |  |  |  |  |  | 0x002919 =>       1,  #  ⤙  gc=Sm   sc=Common     LEFTWARDS ARROW-TAIL | 
| 3128 |  |  |  |  |  |  | 0x002922 =>      11,  #  ⤢  gc=Sm   sc=Common     NORTH EAST AND SOUTH WEST ARROW | 
| 3129 |  |  |  |  |  |  | 0x00292A =>       1,  #  ⤪  gc=Sm   sc=Common     SOUTH WEST ARROW AND NORTH WEST ARROW | 
| 3130 |  |  |  |  |  |  | 0x002944 =>      33,  #  ⥄  gc=Sm   sc=Common     SHORT RIGHTWARDS ARROW ABOVE LEFTWARDS ARROW | 
| 3131 |  |  |  |  |  |  | 0x0029B0 =>       1,  #  ⦰  gc=Sm   sc=Common     REVERSED EMPTY SET | 
| 3132 |  |  |  |  |  |  | 0x0029C4 =>       2,  #  ⧄  gc=Sm   sc=Common     SQUARED RISING DIAGONAL SLASH | 
| 3133 |  |  |  |  |  |  | 0x0029E7 =>      12,  #  ⧧  gc=Sm   sc=Common     THERMODYNAMIC | 
| 3134 |  |  |  |  |  |  | 0x0029EB =>      97,  #  ⧫  gc=Sm   sc=Common     BLACK LOZENGE | 
| 3135 |  |  |  |  |  |  | 0x002A11 =>       4,  #  ⨑  gc=Sm   sc=Common     ANTICLOCKWISE INTEGRATION | 
| 3136 |  |  |  |  |  |  | 0x002A46 =>       1,  #  ⩆  gc=Sm   sc=Common     UNION ABOVE INTERSECTION | 
| 3137 |  |  |  |  |  |  | 0x002A52 =>       9,  #  ⩒  gc=Sm   sc=Common     LOGICAL OR WITH DOT ABOVE | 
| 3138 |  |  |  |  |  |  | 0x002A72 =>      10,  #  ⩲  gc=Sm   sc=Common     PLUS SIGN ABOVE EQUALS SIGN | 
| 3139 |  |  |  |  |  |  | 0x002A7D =>     787,  #  ⩽  gc=Sm   sc=Common     LESS-THAN OR SLANTED EQUAL TO | 
| 3140 |  |  |  |  |  |  | 0x002A7E =>    1606,  #  ⩾  gc=Sm   sc=Common     GREATER-THAN OR SLANTED EQUAL TO | 
| 3141 |  |  |  |  |  |  | 0x002AAF =>       1,  #  ⪯  gc=Sm   sc=Common     PRECEDES ABOVE SINGLE-LINE EQUALS SIGN | 
| 3142 |  |  |  |  |  |  | 0x002AB0 =>       2,  #  ⪰  gc=Sm   sc=Common     SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN | 
| 3143 |  |  |  |  |  |  | 0x002AB7 =>       1,  #  ⪷  gc=Sm   sc=Common     PRECEDES ABOVE ALMOST EQUAL TO | 
| 3144 |  |  |  |  |  |  | 0x002ABD =>       1,  #  ⪽  gc=Sm   sc=Common     SUBSET WITH DOT | 
| 3145 |  |  |  |  |  |  | 0x002AC2 =>       4,  #  ⫂  gc=Sm   sc=Common     SUPERSET WITH MULTIPLICATION SIGN BELOW | 
| 3146 |  |  |  |  |  |  | 0x002AC5 =>       3,  #  ⫅  gc=Sm   sc=Common     SUBSET OF ABOVE EQUALS SIGN | 
| 3147 |  |  |  |  |  |  | 0x002ADE =>       1,  #  ⫞  gc=Sm   sc=Common     SHORT LEFT TACK | 
| 3148 |  |  |  |  |  |  | 0x002AEB =>       8,  #  ⫫  gc=Sm   sc=Common     DOUBLE UP TACK | 
| 3149 |  |  |  |  |  |  | 0x002AEF =>       1,  #  ⫯  gc=Sm   sc=Common     VERTICAL LINE WITH CIRCLE ABOVE | 
| 3150 |  |  |  |  |  |  | 0x002AF2 =>       3,  #  ⫲  gc=Sm   sc=Common     PARALLEL WITH HORIZONTAL STROKE | 
| 3151 |  |  |  |  |  |  | 0x002B22 =>       1,  #  ⬢  gc=So   sc=Common     BLACK HEXAGON | 
| 3152 |  |  |  |  |  |  | 0x00266D =>       2,  #  ♭  gc=So   sc=Common     MUSIC FLAT SIGN | 
| 3153 |  |  |  |  |  |  | 0x00266E =>       2,  #  ♮  gc=So   sc=Common     MUSIC NATURAL SIGN | 
| 3154 |  |  |  |  |  |  | 0x00266F =>      16,  #  ♯  gc=Sm   sc=Common     MUSIC SHARP SIGN | 
| 3155 |  |  |  |  |  |  | 0x00FFFD =>       7,  #  �  gc=So   sc=Common     REPLACEMENT CHARACTER | 
| 3156 |  |  |  |  |  |  | 0x0002D0 =>       5,  #  ː  gc=Lm   sc=Common     MODIFIER LETTER TRIANGULAR COLON | 
| 3157 |  |  |  |  |  |  | 0x0000A4 =>    7856,  #  ¤  gc=Sc   sc=Common     CURRENCY SIGN | 
| 3158 |  |  |  |  |  |  | 0x0000A2 =>     554,  #  ¢  gc=Sc   sc=Common     CENT SIGN | 
| 3159 |  |  |  |  |  |  | 0x0000A3 =>    8059,  #  £  gc=Sc   sc=Common     POUND SIGN | 
| 3160 |  |  |  |  |  |  | 0x0000A5 =>     833,  #  ¥  gc=Sc   sc=Common     YEN SIGN | 
| 3161 |  |  |  |  |  |  | 0x0020A0 =>     133,  #  ₠  gc=Sc   sc=Common     EURO-CURRENCY SIGN | 
| 3162 |  |  |  |  |  |  | 0x0020A3 =>       2,  #  ₣  gc=Sc   sc=Common     FRENCH FRANC SIGN | 
| 3163 |  |  |  |  |  |  | 0x0020A4 =>      42,  #  ₤  gc=Sc   sc=Common     LIRA SIGN | 
| 3164 |  |  |  |  |  |  | 0x0020A6 =>      23,  #  ₦  gc=Sc   sc=Common     NAIRA SIGN | 
| 3165 |  |  |  |  |  |  | 0x00FFE6 =>       1,  #  ₩ gc=Sc   sc=Common     FULLWIDTH WON SIGN | 
| 3166 |  |  |  |  |  |  | 0x0020AB =>       2,  #  ₫  gc=Sc   sc=Common     DONG SIGN | 
| 3167 |  |  |  |  |  |  | 0x0020AC =>    7517,  #  €  gc=Sc   sc=Common     EURO SIGN | 
| 3168 |  |  |  |  |  |  | 0x002080 =>       1,  #  ₀  gc=No   sc=Common     SUBSCRIPT ZERO | 
| 3169 |  |  |  |  |  |  | 0x01D7D9 =>       1,  #  𝟙  gc=Nd   sc=Common     MATHEMATICAL DOUBLE-STRUCK DIGIT ONE | 
| 3170 |  |  |  |  |  |  | 0x002460 =>      95,  #  ①  gc=No   sc=Common     CIRCLED DIGIT ONE | 
| 3171 |  |  |  |  |  |  | 0x002780 =>       1,  #  ➀  gc=No   sc=Common     DINGBAT CIRCLED SANS-SERIF DIGIT ONE | 
| 3172 |  |  |  |  |  |  | 0x002776 =>       5,  #  ❶  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT ONE | 
| 3173 |  |  |  |  |  |  | 0x0000B9 =>      28,  #  ¹  gc=No   sc=Common     SUPERSCRIPT ONE | 
| 3174 |  |  |  |  |  |  | 0x0000BD =>    2104,  #  ½  gc=No   sc=Common     VULGAR FRACTION ONE HALF | 
| 3175 |  |  |  |  |  |  | 0x002153 =>      32,  #  ⅓  gc=No   sc=Common     VULGAR FRACTION ONE THIRD | 
| 3176 |  |  |  |  |  |  | 0x0000BC =>     400,  #  ¼  gc=No   sc=Common     VULGAR FRACTION ONE QUARTER | 
| 3177 |  |  |  |  |  |  | 0x002155 =>       4,  #  ⅕  gc=No   sc=Common     VULGAR FRACTION ONE FIFTH | 
| 3178 |  |  |  |  |  |  | 0x002159 =>       1,  #  ⅙  gc=No   sc=Common     VULGAR FRACTION ONE SIXTH | 
| 3179 |  |  |  |  |  |  | 0x00215B =>       3,  #  ⅛  gc=No   sc=Common     VULGAR FRACTION ONE EIGHTH | 
| 3180 |  |  |  |  |  |  | 0x002469 =>       3,  #  ⑩  gc=No   sc=Common     CIRCLED NUMBER TEN | 
| 3181 |  |  |  |  |  |  | 0x00246A =>       2,  #  ⑪  gc=No   sc=Common     CIRCLED NUMBER ELEVEN | 
| 3182 |  |  |  |  |  |  | 0x00246B =>       1,  #  ⑫  gc=No   sc=Common     CIRCLED NUMBER TWELVE | 
| 3183 |  |  |  |  |  |  | 0x002461 =>     115,  #  ②  gc=No   sc=Common     CIRCLED DIGIT TWO | 
| 3184 |  |  |  |  |  |  | 0x002781 =>       1,  #  ➁  gc=No   sc=Common     DINGBAT CIRCLED SANS-SERIF DIGIT TWO | 
| 3185 |  |  |  |  |  |  | 0x002777 =>       5,  #  ❷  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT TWO | 
| 3186 |  |  |  |  |  |  | 0x002082 =>      12,  #  ₂  gc=No   sc=Common     SUBSCRIPT TWO | 
| 3187 |  |  |  |  |  |  | 0x0000B2 =>     920,  #  ²  gc=No   sc=Common     SUPERSCRIPT TWO | 
| 3188 |  |  |  |  |  |  | 0x002154 =>      16,  #  ⅔  gc=No   sc=Common     VULGAR FRACTION TWO THIRDS | 
| 3189 |  |  |  |  |  |  | 0x002462 =>      61,  #  ③  gc=No   sc=Common     CIRCLED DIGIT THREE | 
| 3190 |  |  |  |  |  |  | 0x002778 =>       3,  #  ❸  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED DIGIT THREE | 
| 3191 |  |  |  |  |  |  | 0x00278C =>      61,  #  ➌  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE | 
| 3192 |  |  |  |  |  |  | 0x002083 =>       4,  #  ₃  gc=No   sc=Common     SUBSCRIPT THREE | 
| 3193 |  |  |  |  |  |  | 0x0000B3 =>     113,  #  ³  gc=No   sc=Common     SUPERSCRIPT THREE | 
| 3194 |  |  |  |  |  |  | 0x0000BE =>     223,  #  ¾  gc=No   sc=Common     VULGAR FRACTION THREE QUARTERS | 
| 3195 |  |  |  |  |  |  | 0x0006F4 =>       5,  #  ۴  gc=Nd   sc=Arabic     EXTENDED ARABIC-INDIC DIGIT FOUR | 
| 3196 |  |  |  |  |  |  | 0x000664 =>       2,  #  ٤  gc=Nd   sc=Common     ARABIC-INDIC DIGIT FOUR | 
| 3197 |  |  |  |  |  |  | 0x002463 =>     104,  #  ④  gc=No   sc=Common     CIRCLED DIGIT FOUR | 
| 3198 |  |  |  |  |  |  | 0x002074 =>       3,  #  ⁴  gc=No   sc=Common     SUPERSCRIPT FOUR | 
| 3199 |  |  |  |  |  |  | 0x002464 =>      25,  #  ⑤  gc=No   sc=Common     CIRCLED DIGIT FIVE | 
| 3200 |  |  |  |  |  |  | 0x002784 =>       1,  #  ➄  gc=No   sc=Common     DINGBAT CIRCLED SANS-SERIF DIGIT FIVE | 
| 3201 |  |  |  |  |  |  | 0x00278E =>      12,  #  ➎  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE | 
| 3202 |  |  |  |  |  |  | 0x002075 =>       2,  #  ⁵  gc=No   sc=Common     SUPERSCRIPT FIVE | 
| 3203 |  |  |  |  |  |  | 0x00215D =>       1,  #  ⅝  gc=No   sc=Common     VULGAR FRACTION FIVE EIGHTHS | 
| 3204 |  |  |  |  |  |  | 0x0006F6 =>       6,  #  ۶  gc=Nd   sc=Arabic     EXTENDED ARABIC-INDIC DIGIT SIX | 
| 3205 |  |  |  |  |  |  | 0x002465 =>      30,  #  ⑥  gc=No   sc=Common     CIRCLED DIGIT SIX | 
| 3206 |  |  |  |  |  |  | 0x00278F =>      13,  #  ➏  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX | 
| 3207 |  |  |  |  |  |  | 0x002466 =>      54,  #  ⑦  gc=No   sc=Common     CIRCLED DIGIT SEVEN | 
| 3208 |  |  |  |  |  |  | 0x0006F8 =>       2,  #  ۸  gc=Nd   sc=Arabic     EXTENDED ARABIC-INDIC DIGIT EIGHT | 
| 3209 |  |  |  |  |  |  | 0x002467 =>       3,  #  ⑧  gc=No   sc=Common     CIRCLED DIGIT EIGHT | 
| 3210 |  |  |  |  |  |  | 0x002791 =>      18,  #  ➑  gc=No   sc=Common     DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT | 
| 3211 |  |  |  |  |  |  | 0x002468 =>       7,  #  ⑨  gc=No   sc=Common     CIRCLED DIGIT NINE | 
| 3212 |  |  |  |  |  |  | 0x01D4B6 =>       7,  #  𝒶  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL A | 
| 3213 |  |  |  |  |  |  | 0x0000AA =>      72,  #  ª  gc=Ll   sc=Latin      FEMININE ORDINAL INDICATOR | 
| 3214 |  |  |  |  |  |  | 0x0000E1 =>   53068,  #  á  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH ACUTE | 
| 3215 |  |  |  |  |  |  | 0x000103 =>     805,  #  ă  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE | 
| 3216 |  |  |  |  |  |  | 0x001EAF =>      37,  #  ắ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE AND ACUTE | 
| 3217 |  |  |  |  |  |  | 0x001EB7 =>       2,  #  ặ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH BREVE AND DOT BELOW | 
| 3218 |  |  |  |  |  |  | 0x0001CE =>      12,  #  ǎ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CARON | 
| 3219 |  |  |  |  |  |  | 0x0000E2 =>    2760,  #  â  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX | 
| 3220 |  |  |  |  |  |  | 0x001EA5 =>       1,  #  ấ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE | 
| 3221 |  |  |  |  |  |  | 0x001EAD =>      24,  #  ậ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW | 
| 3222 |  |  |  |  |  |  | 0x001EA7 =>       1,  #  ầ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE | 
| 3223 |  |  |  |  |  |  | 0x0000E4 =>   62227,  #  ä  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DIAERESIS | 
| 3224 |  |  |  |  |  |  | 0x000227 =>      13,  #  ȧ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOT ABOVE | 
| 3225 |  |  |  |  |  |  | 0x001EA1 =>      31,  #  ạ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOT BELOW | 
| 3226 |  |  |  |  |  |  | 0x000201 =>       1,  #  ȁ  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH DOUBLE GRAVE | 
| 3227 |  |  |  |  |  |  | 0x0000E0 =>   11147,  #  à  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH GRAVE | 
| 3228 |  |  |  |  |  |  | 0x001EA3 =>       9,  #  ả  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH HOOK ABOVE | 
| 3229 |  |  |  |  |  |  | 0x000101 =>    1902,  #  ā  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH MACRON | 
| 3230 |  |  |  |  |  |  | 0x000105 =>     306,  #  ą  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH OGONEK | 
| 3231 |  |  |  |  |  |  | 0x0000E5 =>    9917,  #  å  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH RING ABOVE | 
| 3232 |  |  |  |  |  |  | 0x0000E3 =>   18112,  #  ã  gc=Ll   sc=Latin      LATIN SMALL LETTER A WITH TILDE | 
| 3233 |  |  |  |  |  |  | 0x01D538 =>       9,  #  𝔸  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL A | 
| 3234 |  |  |  |  |  |  | 0x01D49C =>     185,  #  𝒜  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL A | 
| 3235 |  |  |  |  |  |  | 0x00212B =>    3397,  #  Å  gc=Lu   sc=Latin      ANGSTROM SIGN | 
| 3236 |  |  |  |  |  |  | 0x0000C1 =>    1949,  #  Á  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH ACUTE | 
| 3237 |  |  |  |  |  |  | 0x000102 =>      37,  #  Ă  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH BREVE | 
| 3238 |  |  |  |  |  |  | 0x0000C2 =>     577,  #  Â  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH CIRCUMFLEX | 
| 3239 |  |  |  |  |  |  | 0x0000C4 =>    1198,  #  Ä  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH DIAERESIS | 
| 3240 |  |  |  |  |  |  | 0x000226 =>       1,  #  Ȧ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH DOT ABOVE | 
| 3241 |  |  |  |  |  |  | 0x0000C0 =>     163,  #  À  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH GRAVE | 
| 3242 |  |  |  |  |  |  | 0x001EA2 =>       1,  #  Ả  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH HOOK ABOVE | 
| 3243 |  |  |  |  |  |  | 0x000100 =>      32,  #  Ā  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH MACRON | 
| 3244 |  |  |  |  |  |  | 0x000104 =>       1,  #  Ą  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH OGONEK | 
| 3245 |  |  |  |  |  |  | 0x0000C5 =>   42397,  #  Å  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH RING ABOVE | 
| 3246 |  |  |  |  |  |  | 0x0001FA =>     206,  #  Ǻ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE | 
| 3247 |  |  |  |  |  |  | 0x0000C3 =>     204,  #  Ã  gc=Lu   sc=Latin      LATIN CAPITAL LETTER A WITH TILDE | 
| 3248 |  |  |  |  |  |  | 0x0024B6 =>       2,  #  Ⓐ  gc=So   sc=Common     CIRCLED LATIN CAPITAL LETTER A | 
| 3249 |  |  |  |  |  |  | 0x0000E6 =>    3589,  #  æ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE | 
| 3250 |  |  |  |  |  |  | 0x0001FD =>       3,  #  ǽ  gc=Ll   sc=Latin      LATIN SMALL LETTER AE WITH ACUTE | 
| 3251 |  |  |  |  |  |  | 0x0000C6 =>     235,  #  Æ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AE | 
| 3252 |  |  |  |  |  |  | 0x000251 =>       2,  #  ɑ  gc=Ll   sc=Latin      LATIN SMALL LETTER ALPHA | 
| 3253 |  |  |  |  |  |  | 0x01D539 =>      21,  #  𝔹  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL B | 
| 3254 |  |  |  |  |  |  | 0x01D505 =>       2,  #  𝔅  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL B | 
| 3255 |  |  |  |  |  |  | 0x00212C =>     289,  #  ℬ  gc=Lu   sc=Common     SCRIPT CAPITAL B | 
| 3256 |  |  |  |  |  |  | 0x001E04 =>       1,  #  Ḅ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER B WITH DOT BELOW | 
| 3257 |  |  |  |  |  |  | 0x0024B7 =>       3,  #  Ⓑ  gc=So   sc=Common     CIRCLED LATIN CAPITAL LETTER B | 
| 3258 |  |  |  |  |  |  | 0x000181 =>       1,  #  Ɓ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER B WITH HOOK | 
| 3259 |  |  |  |  |  |  | 0x000183 =>       1,  #  ƃ  gc=Ll   sc=Latin      LATIN SMALL LETTER B WITH TOPBAR | 
| 3260 |  |  |  |  |  |  | 0x01D4B8 =>       1,  #  𝒸  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL C | 
| 3261 |  |  |  |  |  |  | 0x000107 =>    3372,  #  ć  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH ACUTE | 
| 3262 |  |  |  |  |  |  | 0x00010D =>    2377,  #  č  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CARON | 
| 3263 |  |  |  |  |  |  | 0x0000E7 =>   17094,  #  ç  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CEDILLA | 
| 3264 |  |  |  |  |  |  | 0x000109 =>      73,  #  ĉ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH CIRCUMFLEX | 
| 3265 |  |  |  |  |  |  | 0x00010B =>      43,  #  ċ  gc=Ll   sc=Latin      LATIN SMALL LETTER C WITH DOT ABOVE | 
| 3266 |  |  |  |  |  |  | 0x00212D =>      70,  #  ℭ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL C | 
| 3267 |  |  |  |  |  |  | 0x002102 =>     122,  #  ℂ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL C | 
| 3268 |  |  |  |  |  |  | 0x01D49E =>     544,  #  𝒞  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL C | 
| 3269 |  |  |  |  |  |  | 0x000106 =>      16,  #  Ć  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH ACUTE | 
| 3270 |  |  |  |  |  |  | 0x00010C =>     389,  #  Č  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CARON | 
| 3271 |  |  |  |  |  |  | 0x0000C7 =>     897,  #  Ç  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CEDILLA | 
| 3272 |  |  |  |  |  |  | 0x000108 =>      97,  #  Ĉ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH CIRCUMFLEX | 
| 3273 |  |  |  |  |  |  | 0x00010A =>       8,  #  Ċ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER C WITH DOT ABOVE | 
| 3274 |  |  |  |  |  |  | 0x002105 =>       1,  #  ℅  gc=So   sc=Common     CARE OF | 
| 3275 |  |  |  |  |  |  | 0x002146 =>      14,  #  ⅆ  gc=Ll   sc=Common     DOUBLE-STRUCK ITALIC SMALL D | 
| 3276 |  |  |  |  |  |  | 0x00010F =>       1,  #  ď  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH CARON | 
| 3277 |  |  |  |  |  |  | 0x001E0D =>      13,  #  ḍ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH DOT BELOW | 
| 3278 |  |  |  |  |  |  | 0x000111 =>      50,  #  đ  gc=Ll   sc=Latin      LATIN SMALL LETTER D WITH STROKE | 
| 3279 |  |  |  |  |  |  | 0x0000F0 =>     140,  #  ð  gc=Ll   sc=Latin      LATIN SMALL LETTER ETH | 
| 3280 |  |  |  |  |  |  | 0x002145 =>      28,  #  ⅅ  gc=Lu   sc=Common     DOUBLE-STRUCK ITALIC CAPITAL D | 
| 3281 |  |  |  |  |  |  | 0x01D53B =>       4,  #  𝔻  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL D | 
| 3282 |  |  |  |  |  |  | 0x01D507 =>       3,  #  𝔇  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL D | 
| 3283 |  |  |  |  |  |  | 0x01D49F =>     292,  #  𝒟  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL D | 
| 3284 |  |  |  |  |  |  | 0x00010E =>      11,  #  Ď  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH CARON | 
| 3285 |  |  |  |  |  |  | 0x001E0A =>       3,  #  Ḋ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH DOT ABOVE | 
| 3286 |  |  |  |  |  |  | 0x001E0C =>       1,  #  Ḍ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH DOT BELOW | 
| 3287 |  |  |  |  |  |  | 0x000110 =>      44,  #  Đ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER D WITH STROKE | 
| 3288 |  |  |  |  |  |  | 0x0000D0 =>       7,  #  Ð  gc=Lu   sc=Latin      LATIN CAPITAL LETTER ETH | 
| 3289 |  |  |  |  |  |  | 0x00217E =>       2,  #  ⅾ  gc=Nl   sc=Latin      SMALL ROMAN NUMERAL FIVE HUNDRED | 
| 3290 |  |  |  |  |  |  | 0x003397 =>       2,  #  ㎗ gc=So   sc=Common     SQUARE DL | 
| 3291 |  |  |  |  |  |  | 0x000189 =>       2,  #  Ɖ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER AFRICAN D | 
| 3292 |  |  |  |  |  |  | 0x00212F =>       7,  #  ℯ  gc=Ll   sc=Common     SCRIPT SMALL E | 
| 3293 |  |  |  |  |  |  | 0x0000E9 =>  173691,  #  é  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH ACUTE | 
| 3294 |  |  |  |  |  |  | 0x000115 =>      47,  #  ĕ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH BREVE | 
| 3295 |  |  |  |  |  |  | 0x00011B =>     248,  #  ě  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CARON | 
| 3296 |  |  |  |  |  |  | 0x000229 =>       7,  #  ȩ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CEDILLA | 
| 3297 |  |  |  |  |  |  | 0x0000EA =>    5500,  #  ê  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX | 
| 3298 |  |  |  |  |  |  | 0x001EBF =>      10,  #  ế  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE | 
| 3299 |  |  |  |  |  |  | 0x001EC7 =>      12,  #  ệ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW | 
| 3300 |  |  |  |  |  |  | 0x001EC1 =>       3,  #  ề  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE | 
| 3301 |  |  |  |  |  |  | 0x001EC3 =>       1,  #  ể  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE | 
| 3302 |  |  |  |  |  |  | 0x001EC5 =>       6,  #  ễ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE | 
| 3303 |  |  |  |  |  |  | 0x0000EB =>    3643,  #  ë  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DIAERESIS | 
| 3304 |  |  |  |  |  |  | 0x000117 =>     281,  #  ė  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DOT ABOVE | 
| 3305 |  |  |  |  |  |  | 0x001EB9 =>       3,  #  ẹ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH DOT BELOW | 
| 3306 |  |  |  |  |  |  | 0x0000E8 =>   20520,  #  è  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH GRAVE | 
| 3307 |  |  |  |  |  |  | 0x001EBB =>       1,  #  ẻ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH HOOK ABOVE | 
| 3308 |  |  |  |  |  |  | 0x000207 =>       1,  #  ȇ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH INVERTED BREVE | 
| 3309 |  |  |  |  |  |  | 0x000113 =>      56,  #  ē  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH MACRON | 
| 3310 |  |  |  |  |  |  | 0x000119 =>     486,  #  ę  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH OGONEK | 
| 3311 |  |  |  |  |  |  | 0x001EBD =>       9,  #  ẽ  gc=Ll   sc=Latin      LATIN SMALL LETTER E WITH TILDE | 
| 3312 |  |  |  |  |  |  | 0x01D53C =>     178,  #  𝔼  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL E | 
| 3313 |  |  |  |  |  |  | 0x002130 =>     301,  #  ℰ  gc=Lu   sc=Common     SCRIPT CAPITAL E | 
| 3314 |  |  |  |  |  |  | 0x0000C9 =>    2005,  #  É  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH ACUTE | 
| 3315 |  |  |  |  |  |  | 0x0000CA =>      70,  #  Ê  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CIRCUMFLEX | 
| 3316 |  |  |  |  |  |  | 0x001EC6 =>       1,  #  Ệ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW | 
| 3317 |  |  |  |  |  |  | 0x0000CB =>      11,  #  Ë  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DIAERESIS | 
| 3318 |  |  |  |  |  |  | 0x000116 =>       5,  #  Ė  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH DOT ABOVE | 
| 3319 |  |  |  |  |  |  | 0x0000C8 =>     130,  #  È  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH GRAVE | 
| 3320 |  |  |  |  |  |  | 0x000112 =>      51,  #  Ē  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH MACRON | 
| 3321 |  |  |  |  |  |  | 0x000118 =>       6,  #  Ę  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH OGONEK | 
| 3322 |  |  |  |  |  |  | 0x001EBC =>      13,  #  Ẽ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH TILDE | 
| 3323 |  |  |  |  |  |  | 0x000246 =>       1,  #  Ɇ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER E WITH STROKE | 
| 3324 |  |  |  |  |  |  | 0x0001DD =>       3,  #  ǝ  gc=Ll   sc=Latin      LATIN SMALL LETTER TURNED E | 
| 3325 |  |  |  |  |  |  | 0x000259 =>      34,  #  ə  gc=Ll   sc=Latin      LATIN SMALL LETTER SCHWA | 
| 3326 |  |  |  |  |  |  | 0x00018F =>       4,  #  Ə  gc=Lu   sc=Latin      LATIN CAPITAL LETTER SCHWA | 
| 3327 |  |  |  |  |  |  | 0x00025B =>    5239,  #  ɛ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN E | 
| 3328 |  |  |  |  |  |  | 0x002107 =>       5,  #  ℇ  gc=Lu   sc=Common     EULER CONSTANT | 
| 3329 |  |  |  |  |  |  | 0x000258 =>       1,  #  ɘ  gc=Ll   sc=Latin      LATIN SMALL LETTER REVERSED E | 
| 3330 |  |  |  |  |  |  | 0x01D4BB =>       3,  #  𝒻  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL F | 
| 3331 |  |  |  |  |  |  | 0x01D53D =>       1,  #  𝔽  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL F | 
| 3332 |  |  |  |  |  |  | 0x002131 =>     581,  #  ℱ  gc=Lu   sc=Common     SCRIPT CAPITAL F | 
| 3333 |  |  |  |  |  |  | 0x00FB00 =>       5,  #  ff  gc=Ll   sc=Latin      LATIN SMALL LIGATURE FF | 
| 3334 |  |  |  |  |  |  | 0x00FB03 =>       1,  #  ffi  gc=Ll   sc=Latin      LATIN SMALL LIGATURE FFI | 
| 3335 |  |  |  |  |  |  | 0x00FB01 =>     182,  #  fi  gc=Ll   sc=Latin      LATIN SMALL LIGATURE FI | 
| 3336 |  |  |  |  |  |  | 0x00FB02 =>     100,  #  fl  gc=Ll   sc=Latin      LATIN SMALL LIGATURE FL | 
| 3337 |  |  |  |  |  |  | 0x000192 =>     137,  #  ƒ  gc=Ll   sc=Latin      LATIN SMALL LETTER F WITH HOOK | 
| 3338 |  |  |  |  |  |  | 0x00210A =>      92,  #  ℊ  gc=Ll   sc=Common     SCRIPT SMALL G | 
| 3339 |  |  |  |  |  |  | 0x0001F5 =>       2,  #  ǵ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH ACUTE | 
| 3340 |  |  |  |  |  |  | 0x00011F =>    1467,  #  ğ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH BREVE | 
| 3341 |  |  |  |  |  |  | 0x0001E7 =>      35,  #  ǧ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CARON | 
| 3342 |  |  |  |  |  |  | 0x000123 =>       1,  #  ģ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CEDILLA | 
| 3343 |  |  |  |  |  |  | 0x00011D =>     169,  #  ĝ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH CIRCUMFLEX | 
| 3344 |  |  |  |  |  |  | 0x000121 =>      10,  #  ġ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH DOT ABOVE | 
| 3345 |  |  |  |  |  |  | 0x001E21 =>     121,  #  ḡ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH MACRON | 
| 3346 |  |  |  |  |  |  | 0x01D53E =>       1,  #  𝔾  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL G | 
| 3347 |  |  |  |  |  |  | 0x01D4A2 =>     254,  #  𝒢  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL G | 
| 3348 |  |  |  |  |  |  | 0x00011E =>       3,  #  Ğ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH BREVE | 
| 3349 |  |  |  |  |  |  | 0x0001E6 =>       3,  #  Ǧ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH CARON | 
| 3350 |  |  |  |  |  |  | 0x00011C =>      42,  #  Ĝ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH CIRCUMFLEX | 
| 3351 |  |  |  |  |  |  | 0x000120 =>       3,  #  Ġ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH DOT ABOVE | 
| 3352 |  |  |  |  |  |  | 0x001E20 =>       2,  #  Ḡ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER G WITH MACRON | 
| 3353 |  |  |  |  |  |  | 0x000260 =>       2,  #  ɠ  gc=Ll   sc=Latin      LATIN SMALL LETTER G WITH HOOK | 
| 3354 |  |  |  |  |  |  | 0x0002E0 =>       7,  #  ˠ  gc=Lm   sc=Latin      MODIFIER LETTER SMALL GAMMA | 
| 3355 |  |  |  |  |  |  | 0x01D4BD =>       2,  #  𝒽  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL H | 
| 3356 |  |  |  |  |  |  | 0x00210F =>     577,  #  ℏ  gc=Ll   sc=Common     PLANCK CONSTANT OVER TWO PI | 
| 3357 |  |  |  |  |  |  | 0x000125 =>      19,  #  ĥ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH CIRCUMFLEX | 
| 3358 |  |  |  |  |  |  | 0x001E23 =>       1,  #  ḣ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH DOT ABOVE | 
| 3359 |  |  |  |  |  |  | 0x001E25 =>      30,  #  ḥ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH DOT BELOW | 
| 3360 |  |  |  |  |  |  | 0x000127 =>     151,  #  ħ  gc=Ll   sc=Latin      LATIN SMALL LETTER H WITH STROKE | 
| 3361 |  |  |  |  |  |  | 0x00210C =>       6,  #  ℌ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL H | 
| 3362 |  |  |  |  |  |  | 0x00210D =>      15,  #  ℍ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL H | 
| 3363 |  |  |  |  |  |  | 0x01D5A7 =>      19,  #  𝖧  gc=Lu   sc=Common     MATHEMATICAL SANS-SERIF CAPITAL H | 
| 3364 |  |  |  |  |  |  | 0x00210B =>     508,  #  ℋ  gc=Lu   sc=Common     SCRIPT CAPITAL H | 
| 3365 |  |  |  |  |  |  | 0x000124 =>     108,  #  Ĥ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH CIRCUMFLEX | 
| 3366 |  |  |  |  |  |  | 0x001E22 =>       1,  #  Ḣ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH DOT ABOVE | 
| 3367 |  |  |  |  |  |  | 0x001E24 =>       6,  #  Ḥ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH DOT BELOW | 
| 3368 |  |  |  |  |  |  | 0x000126 =>      29,  #  Ħ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER H WITH STROKE | 
| 3369 |  |  |  |  |  |  | 0x01D4BE =>       7,  #  𝒾  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL I | 
| 3370 |  |  |  |  |  |  | 0x0000ED =>   47405,  #  í  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH ACUTE | 
| 3371 |  |  |  |  |  |  | 0x00012D =>      59,  #  ĭ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH BREVE | 
| 3372 |  |  |  |  |  |  | 0x0001D0 =>       2,  #  ǐ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CARON | 
| 3373 |  |  |  |  |  |  | 0x0000EE =>     698,  #  î  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH CIRCUMFLEX | 
| 3374 |  |  |  |  |  |  | 0x0000EF =>   21057,  #  ï  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DIAERESIS | 
| 3375 |  |  |  |  |  |  | 0x001ECB =>       3,  #  ị  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DOT BELOW | 
| 3376 |  |  |  |  |  |  | 0x000209 =>       1,  #  ȉ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH DOUBLE GRAVE | 
| 3377 |  |  |  |  |  |  | 0x0000EC =>     858,  #  ì  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH GRAVE | 
| 3378 |  |  |  |  |  |  | 0x001EC9 =>       1,  #  ỉ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH HOOK ABOVE | 
| 3379 |  |  |  |  |  |  | 0x00012B =>     617,  #  ī  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH MACRON | 
| 3380 |  |  |  |  |  |  | 0x00012F =>       7,  #  į  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH OGONEK | 
| 3381 |  |  |  |  |  |  | 0x000129 =>       6,  #  ĩ  gc=Ll   sc=Latin      LATIN SMALL LETTER I WITH TILDE | 
| 3382 |  |  |  |  |  |  | 0x002111 =>     134,  #  ℑ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL I | 
| 3383 |  |  |  |  |  |  | 0x01D540 =>      11,  #  𝕀  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL I | 
| 3384 |  |  |  |  |  |  | 0x002110 =>     304,  #  ℐ  gc=Lu   sc=Common     SCRIPT CAPITAL I | 
| 3385 |  |  |  |  |  |  | 0x0000CD =>     174,  #  Í  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH ACUTE | 
| 3386 |  |  |  |  |  |  | 0x0000CE =>     278,  #  Î  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH CIRCUMFLEX | 
| 3387 |  |  |  |  |  |  | 0x0000CF =>      51,  #  Ï  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH DIAERESIS | 
| 3388 |  |  |  |  |  |  | 0x000130 =>     361,  #  İ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH DOT ABOVE | 
| 3389 |  |  |  |  |  |  | 0x0000CC =>      35,  #  Ì  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH GRAVE | 
| 3390 |  |  |  |  |  |  | 0x00012A =>      39,  #  Ī  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH MACRON | 
| 3391 |  |  |  |  |  |  | 0x000128 =>      48,  #  Ĩ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH TILDE | 
| 3392 |  |  |  |  |  |  | 0x002160 =>       8,  #  Ⅰ  gc=Nl   sc=Latin      ROMAN NUMERAL ONE | 
| 3393 |  |  |  |  |  |  | 0x002161 =>       5,  #  Ⅱ  gc=Nl   sc=Latin      ROMAN NUMERAL TWO | 
| 3394 |  |  |  |  |  |  | 0x002162 =>       4,  #  Ⅲ  gc=Nl   sc=Latin      ROMAN NUMERAL THREE | 
| 3395 |  |  |  |  |  |  | 0x000133 =>       1,  #  ij  gc=Ll   sc=Latin      LATIN SMALL LIGATURE IJ | 
| 3396 |  |  |  |  |  |  | 0x000132 =>       1,  #  IJ  gc=Lu   sc=Latin      LATIN CAPITAL LIGATURE IJ | 
| 3397 |  |  |  |  |  |  | 0x002163 =>       1,  #  Ⅳ  gc=Nl   sc=Latin      ROMAN NUMERAL FOUR | 
| 3398 |  |  |  |  |  |  | 0x002178 =>       1,  #  ⅸ  gc=Nl   sc=Latin      SMALL ROMAN NUMERAL NINE | 
| 3399 |  |  |  |  |  |  | 0x01D6A4 =>       1,  #  𝚤  gc=Ll   sc=Common     MATHEMATICAL ITALIC SMALL DOTLESS I | 
| 3400 |  |  |  |  |  |  | 0x000131 =>    1990,  #  ı  gc=Ll   sc=Latin      LATIN SMALL LETTER DOTLESS I | 
| 3401 |  |  |  |  |  |  | 0x00026A =>       9,  #  ɪ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL I | 
| 3402 |  |  |  |  |  |  | 0x000197 =>       6,  #  Ɨ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER I WITH STROKE | 
| 3403 |  |  |  |  |  |  | 0x01D4BF =>      13,  #  𝒿  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL J | 
| 3404 |  |  |  |  |  |  | 0x000135 =>       2,  #  ĵ  gc=Ll   sc=Latin      LATIN SMALL LETTER J WITH CIRCUMFLEX | 
| 3405 |  |  |  |  |  |  | 0x01D541 =>       2,  #  𝕁  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL J | 
| 3406 |  |  |  |  |  |  | 0x01D4A5 =>      56,  #  𝒥  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL J | 
| 3407 |  |  |  |  |  |  | 0x000134 =>       6,  #  Ĵ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER J WITH CIRCUMFLEX | 
| 3408 |  |  |  |  |  |  | 0x001E31 =>       2,  #  ḱ  gc=Ll   sc=Latin      LATIN SMALL LETTER K WITH ACUTE | 
| 3409 |  |  |  |  |  |  | 0x01D542 =>       3,  #  𝕂  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL K | 
| 3410 |  |  |  |  |  |  | 0x01D50E =>       2,  #  𝔎  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL K | 
| 3411 |  |  |  |  |  |  | 0x01D4A6 =>      48,  #  𝒦  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL K | 
| 3412 |  |  |  |  |  |  | 0x000136 =>       2,  #  Ķ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER K WITH CEDILLA | 
| 3413 |  |  |  |  |  |  | 0x000198 =>      15,  #  Ƙ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER K WITH HOOK | 
| 3414 |  |  |  |  |  |  | 0x01D4C1 =>       1,  #  𝓁  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL L | 
| 3415 |  |  |  |  |  |  | 0x002113 =>    3261,  #  ℓ  gc=Ll   sc=Common     SCRIPT SMALL L | 
| 3416 |  |  |  |  |  |  | 0x00013A =>      13,  #  ĺ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH ACUTE | 
| 3417 |  |  |  |  |  |  | 0x00013E =>       1,  #  ľ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH CARON | 
| 3418 |  |  |  |  |  |  | 0x001E3B =>       1,  #  ḻ  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH LINE BELOW | 
| 3419 |  |  |  |  |  |  | 0x000142 =>    1967,  #  ł  gc=Ll   sc=Latin      LATIN SMALL LETTER L WITH STROKE | 
| 3420 |  |  |  |  |  |  | 0x01D543 =>       2,  #  𝕃  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL L | 
| 3421 |  |  |  |  |  |  | 0x002112 =>     564,  #  ℒ  gc=Lu   sc=Common     SCRIPT CAPITAL L | 
| 3422 |  |  |  |  |  |  | 0x000139 =>       1,  #  Ĺ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH ACUTE | 
| 3423 |  |  |  |  |  |  | 0x00013D =>       3,  #  Ľ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH CARON | 
| 3424 |  |  |  |  |  |  | 0x000141 =>     415,  #  Ł  gc=Lu   sc=Latin      LATIN CAPITAL LETTER L WITH STROKE | 
| 3425 |  |  |  |  |  |  | 0x00FF4C =>       1,  #  l gc=Ll   sc=Latin      FULLWIDTH LATIN SMALL LETTER L | 
| 3426 |  |  |  |  |  |  | 0x00029F =>      11,  #  ʟ  gc=Ll   sc=Latin      LATIN LETTER SMALL CAPITAL L | 
| 3427 |  |  |  |  |  |  | 0x00019B =>       2,  #  ƛ  gc=Ll   sc=Latin      LATIN SMALL LETTER LAMBDA WITH STROKE | 
| 3428 |  |  |  |  |  |  | 0x01D4C2 =>       8,  #  𝓂  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL M | 
| 3429 |  |  |  |  |  |  | 0x001E3F =>       1,  #  ḿ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH ACUTE | 
| 3430 |  |  |  |  |  |  | 0x001E41 =>       2,  #  ṁ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH DOT ABOVE | 
| 3431 |  |  |  |  |  |  | 0x001E43 =>      10,  #  ṃ  gc=Ll   sc=Latin      LATIN SMALL LETTER M WITH DOT BELOW | 
| 3432 |  |  |  |  |  |  | 0x002133 =>    1203,  #  ℳ  gc=Lu   sc=Common     SCRIPT CAPITAL M | 
| 3433 |  |  |  |  |  |  | 0x01D4C3 =>      15,  #  𝓃  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL N | 
| 3434 |  |  |  |  |  |  | 0x000144 =>    1815,  #  ń  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH ACUTE | 
| 3435 |  |  |  |  |  |  | 0x000148 =>     217,  #  ň  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH CARON | 
| 3436 |  |  |  |  |  |  | 0x001E45 =>       4,  #  ṅ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH DOT ABOVE | 
| 3437 |  |  |  |  |  |  | 0x001E47 =>      17,  #  ṇ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH DOT BELOW | 
| 3438 |  |  |  |  |  |  | 0x0001F9 =>       1,  #  ǹ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH GRAVE | 
| 3439 |  |  |  |  |  |  | 0x0000F1 =>   15640,  #  ñ  gc=Ll   sc=Latin      LATIN SMALL LETTER N WITH TILDE | 
| 3440 |  |  |  |  |  |  | 0x002115 =>     146,  #  ℕ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL N | 
| 3441 |  |  |  |  |  |  | 0x01D4A9 =>     262,  #  𝒩  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL N | 
| 3442 |  |  |  |  |  |  | 0x000143 =>       4,  #  Ń  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH ACUTE | 
| 3443 |  |  |  |  |  |  | 0x000145 =>       1,  #  Ņ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH CEDILLA | 
| 3444 |  |  |  |  |  |  | 0x0000D1 =>      49,  #  Ñ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER N WITH TILDE | 
| 3445 |  |  |  |  |  |  | 0x002116 =>      17,  #  №  gc=So   sc=Common     NUMERO SIGN | 
| 3446 |  |  |  |  |  |  | 0x00014B =>      15,  #  ŋ  gc=Ll   sc=Latin      LATIN SMALL LETTER ENG | 
| 3447 |  |  |  |  |  |  | 0x002134 =>      21,  #  ℴ  gc=Ll   sc=Common     SCRIPT SMALL O | 
| 3448 |  |  |  |  |  |  | 0x0000F3 =>   42241,  #  ó  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH ACUTE | 
| 3449 |  |  |  |  |  |  | 0x00014F =>       5,  #  ŏ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH BREVE | 
| 3450 |  |  |  |  |  |  | 0x0001D2 =>       4,  #  ǒ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CARON | 
| 3451 |  |  |  |  |  |  | 0x0000F4 =>    9126,  #  ô  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX | 
| 3452 |  |  |  |  |  |  | 0x001ED1 =>       4,  #  ố  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE | 
| 3453 |  |  |  |  |  |  | 0x001ED3 =>       4,  #  ồ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE | 
| 3454 |  |  |  |  |  |  | 0x001ED5 =>       2,  #  ổ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE | 
| 3455 |  |  |  |  |  |  | 0x001ED7 =>       2,  #  ỗ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE | 
| 3456 |  |  |  |  |  |  | 0x0000F6 =>   86074,  #  ö  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DIAERESIS | 
| 3457 |  |  |  |  |  |  | 0x001ECD =>       1,  #  ọ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOT BELOW | 
| 3458 |  |  |  |  |  |  | 0x000151 =>     290,  #  ő  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH DOUBLE ACUTE | 
| 3459 |  |  |  |  |  |  | 0x0000F2 =>    2103,  #  ò  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH GRAVE | 
| 3460 |  |  |  |  |  |  | 0x0001A1 =>       9,  #  ơ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN | 
| 3461 |  |  |  |  |  |  | 0x001EDB =>      23,  #  ớ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN AND ACUTE | 
| 3462 |  |  |  |  |  |  | 0x001EDF =>       1,  #  ở  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE | 
| 3463 |  |  |  |  |  |  | 0x00014D =>     273,  #  ō  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH MACRON | 
| 3464 |  |  |  |  |  |  | 0x0000F8 =>   19049,  #  ø  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE | 
| 3465 |  |  |  |  |  |  | 0x0001FF =>      22,  #  ǿ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH STROKE AND ACUTE | 
| 3466 |  |  |  |  |  |  | 0x0000F5 =>    1293,  #  õ  gc=Ll   sc=Latin      LATIN SMALL LETTER O WITH TILDE | 
| 3467 |  |  |  |  |  |  | 0x0000BA =>    2832,  #  º  gc=Ll   sc=Latin      MASCULINE ORDINAL INDICATOR | 
| 3468 |  |  |  |  |  |  | 0x01D546 =>       3,  #  𝕆  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL O | 
| 3469 |  |  |  |  |  |  | 0x01D4AA =>     137,  #  𝒪  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL O | 
| 3470 |  |  |  |  |  |  | 0x0000D3 =>     261,  #  Ó  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH ACUTE | 
| 3471 |  |  |  |  |  |  | 0x0001D1 =>       1,  #  Ǒ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH CARON | 
| 3472 |  |  |  |  |  |  | 0x0000D4 =>      69,  #  Ô  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH CIRCUMFLEX | 
| 3473 |  |  |  |  |  |  | 0x0000D6 =>    4893,  #  Ö  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DIAERESIS | 
| 3474 |  |  |  |  |  |  | 0x000150 =>      22,  #  Ő  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH DOUBLE ACUTE | 
| 3475 |  |  |  |  |  |  | 0x0000D2 =>      38,  #  Ò  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH GRAVE | 
| 3476 |  |  |  |  |  |  | 0x001ECE =>       1,  #  Ỏ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH HOOK ABOVE | 
| 3477 |  |  |  |  |  |  | 0x00014C =>       6,  #  Ō  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH MACRON | 
| 3478 |  |  |  |  |  |  | 0x0000D8 =>    5210,  #  Ø  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH STROKE | 
| 3479 |  |  |  |  |  |  | 0x0001FE =>       7,  #  Ǿ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH STROKE AND ACUTE | 
| 3480 |  |  |  |  |  |  | 0x0000D5 =>      37,  #  Õ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER O WITH TILDE | 
| 3481 |  |  |  |  |  |  | 0x000153 =>     281,  #  œ  gc=Ll   sc=Latin      LATIN SMALL LIGATURE OE | 
| 3482 |  |  |  |  |  |  | 0x000152 =>      27,  #  Œ  gc=Lu   sc=Latin      LATIN CAPITAL LIGATURE OE | 
| 3483 |  |  |  |  |  |  | 0x000254 =>       1,  #  ɔ  gc=Ll   sc=Latin      LATIN SMALL LETTER OPEN O | 
| 3484 |  |  |  |  |  |  | 0x01D4C5 =>       2,  #  𝓅  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL P | 
| 3485 |  |  |  |  |  |  | 0x001E57 =>       1,  #  ṗ  gc=Ll   sc=Latin      LATIN SMALL LETTER P WITH DOT ABOVE | 
| 3486 |  |  |  |  |  |  | 0x002119 =>     400,  #  ℙ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL P | 
| 3487 |  |  |  |  |  |  | 0x01D4AB =>     258,  #  𝒫  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL P | 
| 3488 |  |  |  |  |  |  | 0x001E54 =>       4,  #  Ṕ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER P WITH ACUTE | 
| 3489 |  |  |  |  |  |  | 0x001E56 =>      16,  #  Ṗ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER P WITH DOT ABOVE | 
| 3490 |  |  |  |  |  |  | 0x0024C5 =>       1,  #  Ⓟ  gc=So   sc=Common     CIRCLED LATIN CAPITAL LETTER P | 
| 3491 |  |  |  |  |  |  | 0x000278 =>      71,  #  ɸ  gc=Ll   sc=Latin      LATIN SMALL LETTER PHI | 
| 3492 |  |  |  |  |  |  | 0x00211A =>      47,  #  ℚ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL Q | 
| 3493 |  |  |  |  |  |  | 0x01D4AC =>      30,  #  𝒬  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL Q | 
| 3494 |  |  |  |  |  |  | 0x000138 =>      44,  #  ĸ  gc=Ll   sc=Latin      LATIN SMALL LETTER KRA | 
| 3495 |  |  |  |  |  |  | 0x000155 =>      36,  #  ŕ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH ACUTE | 
| 3496 |  |  |  |  |  |  | 0x000159 =>     624,  #  ř  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH CARON | 
| 3497 |  |  |  |  |  |  | 0x000157 =>       1,  #  ŗ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH CEDILLA | 
| 3498 |  |  |  |  |  |  | 0x001E59 =>       2,  #  ṙ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH DOT ABOVE | 
| 3499 |  |  |  |  |  |  | 0x001E5D =>       3,  #  ṝ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH DOT BELOW AND MACRON | 
| 3500 |  |  |  |  |  |  | 0x000213 =>       2,  #  ȓ  gc=Ll   sc=Latin      LATIN SMALL LETTER R WITH INVERTED BREVE | 
| 3501 |  |  |  |  |  |  | 0x00211C =>     322,  #  ℜ  gc=Lu   sc=Common     BLACK-LETTER CAPITAL R | 
| 3502 |  |  |  |  |  |  | 0x00211D =>    1252,  #  ℝ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL R | 
| 3503 |  |  |  |  |  |  | 0x00211B =>     483,  #  ℛ  gc=Lu   sc=Common     SCRIPT CAPITAL R | 
| 3504 |  |  |  |  |  |  | 0x000154 =>       3,  #  Ŕ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH ACUTE | 
| 3505 |  |  |  |  |  |  | 0x000158 =>      42,  #  Ř  gc=Lu   sc=Latin      LATIN CAPITAL LETTER R WITH CARON | 
| 3506 |  |  |  |  |  |  | 0x0024C7 =>       2,  #  Ⓡ  gc=So   sc=Common     CIRCLED LATIN CAPITAL LETTER R | 
| 3507 |  |  |  |  |  |  | 0x00015B =>     698,  #  ś  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH ACUTE | 
| 3508 |  |  |  |  |  |  | 0x000161 =>    2291,  #  š  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CARON | 
| 3509 |  |  |  |  |  |  | 0x00015F =>    1465,  #  ş  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CEDILLA | 
| 3510 |  |  |  |  |  |  | 0x00015D =>      72,  #  ŝ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH CIRCUMFLEX | 
| 3511 |  |  |  |  |  |  | 0x000219 =>       1,  #  ș  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH COMMA BELOW | 
| 3512 |  |  |  |  |  |  | 0x001E61 =>       1,  #  ṡ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH DOT ABOVE | 
| 3513 |  |  |  |  |  |  | 0x001E63 =>      17,  #  ṣ  gc=Ll   sc=Latin      LATIN SMALL LETTER S WITH DOT BELOW | 
| 3514 |  |  |  |  |  |  | 0x01D54A =>      23,  #  𝕊  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL S | 
| 3515 |  |  |  |  |  |  | 0x01D516 =>       7,  #  𝔖  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL S | 
| 3516 |  |  |  |  |  |  | 0x01D4AE =>     385,  #  𝒮  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL S | 
| 3517 |  |  |  |  |  |  | 0x00015A =>     150,  #  Ś  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH ACUTE | 
| 3518 |  |  |  |  |  |  | 0x000160 =>    1454,  #  Š  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CARON | 
| 3519 |  |  |  |  |  |  | 0x00015E =>     312,  #  Ş  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CEDILLA | 
| 3520 |  |  |  |  |  |  | 0x00015C =>     132,  #  Ŝ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH CIRCUMFLEX | 
| 3521 |  |  |  |  |  |  | 0x000218 =>       1,  #  Ș  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH COMMA BELOW | 
| 3522 |  |  |  |  |  |  | 0x001E60 =>       1,  #  Ṡ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH DOT ABOVE | 
| 3523 |  |  |  |  |  |  | 0x001E62 =>       8,  #  Ṣ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER S WITH DOT BELOW | 
| 3524 |  |  |  |  |  |  | 0x0000DF =>    8462,  #  ß  gc=Ll   sc=Latin      LATIN SMALL LETTER SHARP S | 
| 3525 |  |  |  |  |  |  | 0x000283 =>       1,  #  ʃ  gc=Ll   sc=Latin      LATIN SMALL LETTER ESH | 
| 3526 |  |  |  |  |  |  | 0x000165 =>      20,  #  ť  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH CARON | 
| 3527 |  |  |  |  |  |  | 0x000163 =>     214,  #  ţ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH CEDILLA | 
| 3528 |  |  |  |  |  |  | 0x00021B =>       3,  #  ț  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH COMMA BELOW | 
| 3529 |  |  |  |  |  |  | 0x001E97 =>       3,  #  ẗ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH DIAERESIS | 
| 3530 |  |  |  |  |  |  | 0x001E6D =>      39,  #  ṭ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH DOT BELOW | 
| 3531 |  |  |  |  |  |  | 0x01D54B =>       4,  #  𝕋  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL T | 
| 3532 |  |  |  |  |  |  | 0x01D517 =>      18,  #  𝔗  gc=Lu   sc=Common     MATHEMATICAL FRAKTUR CAPITAL T | 
| 3533 |  |  |  |  |  |  | 0x01D4AF =>     450,  #  𝒯  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL T | 
| 3534 |  |  |  |  |  |  | 0x000162 =>      11,  #  Ţ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH CEDILLA | 
| 3535 |  |  |  |  |  |  | 0x001E6C =>       1,  #  Ṭ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH DOT BELOW | 
| 3536 |  |  |  |  |  |  | 0x002122 =>   37280,  #  ™  gc=So   sc=Common     TRADE MARK SIGN | 
| 3537 |  |  |  |  |  |  | 0x000167 =>      28,  #  ŧ  gc=Ll   sc=Latin      LATIN SMALL LETTER T WITH STROKE | 
| 3538 |  |  |  |  |  |  | 0x000166 =>      37,  #  Ŧ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER T WITH STROKE | 
| 3539 |  |  |  |  |  |  | 0x01D4CA =>       3,  #  𝓊  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL U | 
| 3540 |  |  |  |  |  |  | 0x0000FA =>    7713,  #  ú  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH ACUTE | 
| 3541 |  |  |  |  |  |  | 0x00016D =>      26,  #  ŭ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH BREVE | 
| 3542 |  |  |  |  |  |  | 0x0001D4 =>       2,  #  ǔ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CARON | 
| 3543 |  |  |  |  |  |  | 0x0000FB =>     493,  #  û  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH CIRCUMFLEX | 
| 3544 |  |  |  |  |  |  | 0x0000FC =>   81674,  #  ü  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS | 
| 3545 |  |  |  |  |  |  | 0x0001D8 =>       1,  #  ǘ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE | 
| 3546 |  |  |  |  |  |  | 0x0001DC =>       1,  #  ǜ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE | 
| 3547 |  |  |  |  |  |  | 0x001EE5 =>       2,  #  ụ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DOT BELOW | 
| 3548 |  |  |  |  |  |  | 0x000171 =>     210,  #  ű  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DOUBLE ACUTE | 
| 3549 |  |  |  |  |  |  | 0x000215 =>       1,  #  ȕ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH DOUBLE GRAVE | 
| 3550 |  |  |  |  |  |  | 0x0000F9 =>     465,  #  ù  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH GRAVE | 
| 3551 |  |  |  |  |  |  | 0x001EE7 =>       1,  #  ủ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HOOK ABOVE | 
| 3552 |  |  |  |  |  |  | 0x0001B0 =>       8,  #  ư  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH HORN | 
| 3553 |  |  |  |  |  |  | 0x00016B =>     317,  #  ū  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH MACRON | 
| 3554 |  |  |  |  |  |  | 0x000173 =>      63,  #  ų  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH OGONEK | 
| 3555 |  |  |  |  |  |  | 0x00016F =>     104,  #  ů  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH RING ABOVE | 
| 3556 |  |  |  |  |  |  | 0x000169 =>      45,  #  ũ  gc=Ll   sc=Latin      LATIN SMALL LETTER U WITH TILDE | 
| 3557 |  |  |  |  |  |  | 0x01D54C =>       1,  #  𝕌  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL U | 
| 3558 |  |  |  |  |  |  | 0x01D4B0 =>      35,  #  𝒰  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL U | 
| 3559 |  |  |  |  |  |  | 0x0000DA =>     105,  #  Ú  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH ACUTE | 
| 3560 |  |  |  |  |  |  | 0x0000DB =>      16,  #  Û  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH CIRCUMFLEX | 
| 3561 |  |  |  |  |  |  | 0x0000DC =>    1677,  #  Ü  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DIAERESIS | 
| 3562 |  |  |  |  |  |  | 0x000170 =>       5,  #  Ű  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH DOUBLE ACUTE | 
| 3563 |  |  |  |  |  |  | 0x0000D9 =>      18,  #  Ù  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH GRAVE | 
| 3564 |  |  |  |  |  |  | 0x00016A =>       4,  #  Ū  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH MACRON | 
| 3565 |  |  |  |  |  |  | 0x000168 =>      33,  #  Ũ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER U WITH TILDE | 
| 3566 |  |  |  |  |  |  | 0x00028A =>       3,  #  ʊ  gc=Ll   sc=Latin      LATIN SMALL LETTER UPSILON | 
| 3567 |  |  |  |  |  |  | 0x001E7D =>      14,  #  ṽ  gc=Ll   sc=Latin      LATIN SMALL LETTER V WITH TILDE | 
| 3568 |  |  |  |  |  |  | 0x01D54D =>       8,  #  𝕍  gc=Lu   sc=Common     MATHEMATICAL DOUBLE-STRUCK CAPITAL V | 
| 3569 |  |  |  |  |  |  | 0x01D4B1 =>      44,  #  𝒱  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL V | 
| 3570 |  |  |  |  |  |  | 0x001E7C =>       3,  #  Ṽ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER V WITH TILDE | 
| 3571 |  |  |  |  |  |  | 0x01D4CC =>       7,  #  𝓌  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL W | 
| 3572 |  |  |  |  |  |  | 0x000175 =>      23,  #  ŵ  gc=Ll   sc=Latin      LATIN SMALL LETTER W WITH CIRCUMFLEX | 
| 3573 |  |  |  |  |  |  | 0x01D4B2 =>      43,  #  𝒲  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL W | 
| 3574 |  |  |  |  |  |  | 0x000174 =>       5,  #  Ŵ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER W WITH CIRCUMFLEX | 
| 3575 |  |  |  |  |  |  | 0x001E84 =>       1,  #  Ẅ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER W WITH DIAERESIS | 
| 3576 |  |  |  |  |  |  | 0x001E86 =>      10,  #  Ẇ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER W WITH DOT ABOVE | 
| 3577 |  |  |  |  |  |  | 0x0024E6 =>       1,  #  ⓦ  gc=So   sc=Common     CIRCLED LATIN SMALL LETTER W | 
| 3578 |  |  |  |  |  |  | 0x01D535 =>      14,  #  𝔵  gc=Ll   sc=Common     MATHEMATICAL FRAKTUR SMALL X | 
| 3579 |  |  |  |  |  |  | 0x01D465 =>       9,  #  𝑥  gc=Ll   sc=Common     MATHEMATICAL ITALIC SMALL X | 
| 3580 |  |  |  |  |  |  | 0x001E8B =>      14,  #  ẋ  gc=Ll   sc=Latin      LATIN SMALL LETTER X WITH DOT ABOVE | 
| 3581 |  |  |  |  |  |  | 0x01D4B3 =>     285,  #  𝒳  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL X | 
| 3582 |  |  |  |  |  |  | 0x001E8A =>       2,  #  Ẋ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER X WITH DOT ABOVE | 
| 3583 |  |  |  |  |  |  | 0x002179 =>       1,  #  ⅹ  gc=Nl   sc=Latin      SMALL ROMAN NUMERAL TEN | 
| 3584 |  |  |  |  |  |  | 0x01D4CE =>       9,  #  𝓎  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL Y | 
| 3585 |  |  |  |  |  |  | 0x0000FD =>     974,  #  ý  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH ACUTE | 
| 3586 |  |  |  |  |  |  | 0x000177 =>      48,  #  ŷ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH CIRCUMFLEX | 
| 3587 |  |  |  |  |  |  | 0x0000FF =>      80,  #  ÿ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH DIAERESIS | 
| 3588 |  |  |  |  |  |  | 0x001E8F =>       7,  #  ẏ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH DOT ABOVE | 
| 3589 |  |  |  |  |  |  | 0x001EF3 =>       3,  #  ỳ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH GRAVE | 
| 3590 |  |  |  |  |  |  | 0x001EF7 =>       1,  #  ỷ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH HOOK ABOVE | 
| 3591 |  |  |  |  |  |  | 0x000233 =>      42,  #  ȳ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH MACRON | 
| 3592 |  |  |  |  |  |  | 0x001EF9 =>      36,  #  ỹ  gc=Ll   sc=Latin      LATIN SMALL LETTER Y WITH TILDE | 
| 3593 |  |  |  |  |  |  | 0x01D4B4 =>      42,  #  𝒴  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL Y | 
| 3594 |  |  |  |  |  |  | 0x0000DD =>      38,  #  Ý  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH ACUTE | 
| 3595 |  |  |  |  |  |  | 0x000176 =>      92,  #  Ŷ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH CIRCUMFLEX | 
| 3596 |  |  |  |  |  |  | 0x000178 =>       7,  #  Ÿ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH DIAERESIS | 
| 3597 |  |  |  |  |  |  | 0x001E8E =>       6,  #  Ẏ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH DOT ABOVE | 
| 3598 |  |  |  |  |  |  | 0x001EF2 =>       2,  #  Ỳ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH GRAVE | 
| 3599 |  |  |  |  |  |  | 0x000232 =>      32,  #  Ȳ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH MACRON | 
| 3600 |  |  |  |  |  |  | 0x001EF8 =>       4,  #  Ỹ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Y WITH TILDE | 
| 3601 |  |  |  |  |  |  | 0x01D4CF =>       4,  #  𝓏  gc=Ll   sc=Common     MATHEMATICAL SCRIPT SMALL Z | 
| 3602 |  |  |  |  |  |  | 0x00017A =>     333,  #  ź  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH ACUTE | 
| 3603 |  |  |  |  |  |  | 0x00017E =>     764,  #  ž  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH CARON | 
| 3604 |  |  |  |  |  |  | 0x001E91 =>       6,  #  ẑ  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH CIRCUMFLEX | 
| 3605 |  |  |  |  |  |  | 0x00017C =>     444,  #  ż  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH DOT ABOVE | 
| 3606 |  |  |  |  |  |  | 0x001E93 =>       1,  #  ẓ  gc=Ll   sc=Latin      LATIN SMALL LETTER Z WITH DOT BELOW | 
| 3607 |  |  |  |  |  |  | 0x002124 =>     114,  #  ℤ  gc=Lu   sc=Common     DOUBLE-STRUCK CAPITAL Z | 
| 3608 |  |  |  |  |  |  | 0x01D4B5 =>      41,  #  𝒵  gc=Lu   sc=Common     MATHEMATICAL SCRIPT CAPITAL Z | 
| 3609 |  |  |  |  |  |  | 0x000179 =>       1,  #  Ź  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH ACUTE | 
| 3610 |  |  |  |  |  |  | 0x00017D =>     338,  #  Ž  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH CARON | 
| 3611 |  |  |  |  |  |  | 0x00017B =>     119,  #  Ż  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH DOT ABOVE | 
| 3612 |  |  |  |  |  |  | 0x0001B5 =>      40,  #  Ƶ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER Z WITH STROKE | 
| 3613 |  |  |  |  |  |  | 0x000292 =>       3,  #  ʒ  gc=Ll   sc=Latin      LATIN SMALL LETTER EZH | 
| 3614 |  |  |  |  |  |  | 0x00021D =>       2,  #  ȝ  gc=Ll   sc=Latin      LATIN SMALL LETTER YOGH | 
| 3615 |  |  |  |  |  |  | 0x0000FE =>      61,  #  þ  gc=Ll   sc=Latin      LATIN SMALL LETTER THORN | 
| 3616 |  |  |  |  |  |  | 0x0000DE =>      25,  #  Þ  gc=Lu   sc=Latin      LATIN CAPITAL LETTER THORN | 
| 3617 |  |  |  |  |  |  | 0x0002BC =>      32,  #  ʼ  gc=Lm   sc=Common     MODIFIER LETTER APOSTROPHE | 
| 3618 |  |  |  |  |  |  | 0x0001C1 =>       7,  #  ǁ  gc=Lo   sc=Latin      LATIN LETTER LATERAL CLICK | 
| 3619 |  |  |  |  |  |  | 0x0001C2 =>       9,  #  ǂ  gc=Lo   sc=Latin      LATIN LETTER ALVEOLAR CLICK | 
| 3620 |  |  |  |  |  |  | 0x0002AC =>       1,  #  ʬ  gc=Ll   sc=Latin      LATIN LETTER BILABIAL PERCUSSIVE | 
| 3621 |  |  |  |  |  |  | 0x0003B1 =>  512312,  #  α  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA | 
| 3622 |  |  |  |  |  |  | 0x001FB1 =>       5,  #  ᾱ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH MACRON | 
| 3623 |  |  |  |  |  |  | 0x001FB6 =>       6,  #  ᾶ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH PERISPOMENI | 
| 3624 |  |  |  |  |  |  | 0x001F00 =>       4,  #  ἀ  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH PSILI | 
| 3625 |  |  |  |  |  |  | 0x0003AC =>     102,  #  ά  gc=Ll   sc=Greek      GREEK SMALL LETTER ALPHA WITH TONOS | 
| 3626 |  |  |  |  |  |  | 0x000391 =>     140,  #  Α  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA | 
| 3627 |  |  |  |  |  |  | 0x000386 =>       1,  #  Ά  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ALPHA WITH TONOS | 
| 3628 |  |  |  |  |  |  | 0x0003D0 =>       1,  #  ϐ  gc=Ll   sc=Greek      GREEK BETA SYMBOL | 
| 3629 |  |  |  |  |  |  | 0x0003B2 =>  519669,  #  β  gc=Ll   sc=Greek      GREEK SMALL LETTER BETA | 
| 3630 |  |  |  |  |  |  | 0x000392 =>     167,  #  Β  gc=Lu   sc=Greek      GREEK CAPITAL LETTER BETA | 
| 3631 |  |  |  |  |  |  | 0x0003B3 =>  191986,  #  γ  gc=Ll   sc=Greek      GREEK SMALL LETTER GAMMA | 
| 3632 |  |  |  |  |  |  | 0x000393 =>    5298,  #  Γ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER GAMMA | 
| 3633 |  |  |  |  |  |  | 0x0003B4 =>   58415,  #  δ  gc=Ll   sc=Greek      GREEK SMALL LETTER DELTA | 
| 3634 |  |  |  |  |  |  | 0x000394 =>  220464,  #  Δ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER DELTA | 
| 3635 |  |  |  |  |  |  | 0x0003F5 =>    2414,  #  ϵ  gc=Ll   sc=Greek      GREEK LUNATE EPSILON SYMBOL | 
| 3636 |  |  |  |  |  |  | 0x0003B5 =>   28136,  #  ε  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON | 
| 3637 |  |  |  |  |  |  | 0x001F14 =>       2,  #  ἔ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA | 
| 3638 |  |  |  |  |  |  | 0x0003AD =>      25,  #  έ  gc=Ll   sc=Greek      GREEK SMALL LETTER EPSILON WITH TONOS | 
| 3639 |  |  |  |  |  |  | 0x000395 =>      51,  #  Ε  gc=Lu   sc=Greek      GREEK CAPITAL LETTER EPSILON | 
| 3640 |  |  |  |  |  |  | 0x0003B6 =>    7757,  #  ζ  gc=Ll   sc=Greek      GREEK SMALL LETTER ZETA | 
| 3641 |  |  |  |  |  |  | 0x000396 =>       6,  #  Ζ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ZETA | 
| 3642 |  |  |  |  |  |  | 0x0003B7 =>   11342,  #  η  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA | 
| 3643 |  |  |  |  |  |  | 0x001FC6 =>       2,  #  ῆ  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH PERISPOMENI | 
| 3644 |  |  |  |  |  |  | 0x0003AE =>      59,  #  ή  gc=Ll   sc=Greek      GREEK SMALL LETTER ETA WITH TONOS | 
| 3645 |  |  |  |  |  |  | 0x000397 =>      61,  #  Η  gc=Lu   sc=Greek      GREEK CAPITAL LETTER ETA | 
| 3646 |  |  |  |  |  |  | 0x0003B8 =>   28775,  #  θ  gc=Ll   sc=Greek      GREEK SMALL LETTER THETA | 
| 3647 |  |  |  |  |  |  | 0x0003D1 =>     550,  #  ϑ  gc=Ll   sc=Greek      GREEK THETA SYMBOL | 
| 3648 |  |  |  |  |  |  | 0x000398 =>    3610,  #  Θ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER THETA | 
| 3649 |  |  |  |  |  |  | 0x0003F4 =>       1,  #  ϴ  gc=Lu   sc=Greek      GREEK CAPITAL THETA SYMBOL | 
| 3650 |  |  |  |  |  |  | 0x0003B9 =>    1252,  #  ι  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA | 
| 3651 |  |  |  |  |  |  | 0x0003CA =>       6,  #  ϊ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA | 
| 3652 |  |  |  |  |  |  | 0x000390 =>       1,  #  ΐ  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS | 
| 3653 |  |  |  |  |  |  | 0x0003AF =>      62,  #  ί  gc=Ll   sc=Greek      GREEK SMALL LETTER IOTA WITH TONOS | 
| 3654 |  |  |  |  |  |  | 0x000399 =>     121,  #  Ι  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA | 
| 3655 |  |  |  |  |  |  | 0x0003AA =>      27,  #  Ϊ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER IOTA WITH DIALYTIKA | 
| 3656 |  |  |  |  |  |  | 0x0003BA =>   82276,  #  κ  gc=Ll   sc=Greek      GREEK SMALL LETTER KAPPA | 
| 3657 |  |  |  |  |  |  | 0x00039A =>      75,  #  Κ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER KAPPA | 
| 3658 |  |  |  |  |  |  | 0x0003BB =>   42333,  #  λ  gc=Ll   sc=Greek      GREEK SMALL LETTER LAMDA | 
| 3659 |  |  |  |  |  |  | 0x00039B =>    2478,  #  Λ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER LAMDA | 
| 3660 |  |  |  |  |  |  | 0x0000B5 =>  203225,  #  µ  gc=Ll   sc=Common     MICRO SIGN | 
| 3661 |  |  |  |  |  |  | 0x0003BC =>  528576,  #  μ  gc=Ll   sc=Greek      GREEK SMALL LETTER MU | 
| 3662 |  |  |  |  |  |  | 0x00039C =>      99,  #  Μ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER MU | 
| 3663 |  |  |  |  |  |  | 0x00338D =>       3,  #  ㎍ gc=So   sc=Common     SQUARE MU G | 
| 3664 |  |  |  |  |  |  | 0x003395 =>      12,  #  ㎕ gc=So   sc=Common     SQUARE MU L | 
| 3665 |  |  |  |  |  |  | 0x00339B =>       1,  #  ㎛ gc=So   sc=Common     SQUARE MU M | 
| 3666 |  |  |  |  |  |  | 0x0003BD =>   12220,  #  ν  gc=Ll   sc=Greek      GREEK SMALL LETTER NU | 
| 3667 |  |  |  |  |  |  | 0x00039D =>      62,  #  Ν  gc=Lu   sc=Greek      GREEK CAPITAL LETTER NU | 
| 3668 |  |  |  |  |  |  | 0x0003BE =>    4484,  #  ξ  gc=Ll   sc=Greek      GREEK SMALL LETTER XI | 
| 3669 |  |  |  |  |  |  | 0x00039E =>     329,  #  Ξ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER XI | 
| 3670 |  |  |  |  |  |  | 0x0003BF =>     582,  #  ο  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON | 
| 3671 |  |  |  |  |  |  | 0x001F45 =>       2,  #  ὅ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA | 
| 3672 |  |  |  |  |  |  | 0x001F44 =>       7,  #  ὄ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH PSILI AND OXIA | 
| 3673 |  |  |  |  |  |  | 0x0003CC =>      49,  #  ό  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH TONOS | 
| 3674 |  |  |  |  |  |  | 0x00039F =>      25,  #  Ο  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON | 
| 3675 |  |  |  |  |  |  | 0x0003D6 =>      90,  #  ϖ  gc=Ll   sc=Greek      GREEK PI SYMBOL | 
| 3676 |  |  |  |  |  |  | 0x0003C0 =>   21146,  #  π  gc=Ll   sc=Greek      GREEK SMALL LETTER PI | 
| 3677 |  |  |  |  |  |  | 0x0003A0 =>    1582,  #  Π  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PI | 
| 3678 |  |  |  |  |  |  | 0x0003F1 =>      55,  #  ϱ  gc=Ll   sc=Greek      GREEK RHO SYMBOL | 
| 3679 |  |  |  |  |  |  | 0x0003C1 =>   18253,  #  ρ  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO | 
| 3680 |  |  |  |  |  |  | 0x0003A1 =>      17,  #  Ρ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO | 
| 3681 |  |  |  |  |  |  | 0x0003C2 =>     288,  #  ς  gc=Ll   sc=Greek      GREEK SMALL LETTER FINAL SIGMA | 
| 3682 |  |  |  |  |  |  | 0x0003C3 =>   44186,  #  σ  gc=Ll   sc=Greek      GREEK SMALL LETTER SIGMA | 
| 3683 |  |  |  |  |  |  | 0x0003A3 =>    9392,  #  Σ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER SIGMA | 
| 3684 |  |  |  |  |  |  | 0x0003C4 =>   29633,  #  τ  gc=Ll   sc=Greek      GREEK SMALL LETTER TAU | 
| 3685 |  |  |  |  |  |  | 0x0003A4 =>      72,  #  Τ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER TAU | 
| 3686 |  |  |  |  |  |  | 0x0003C5 =>    1449,  #  υ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON | 
| 3687 |  |  |  |  |  |  | 0x001F55 =>       2,  #  ὕ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA | 
| 3688 |  |  |  |  |  |  | 0x0003CB =>      18,  #  ϋ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA | 
| 3689 |  |  |  |  |  |  | 0x0003B0 =>      12,  #  ΰ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS | 
| 3690 |  |  |  |  |  |  | 0x001F50 =>       4,  #  ὐ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH PSILI | 
| 3691 |  |  |  |  |  |  | 0x0003CD =>      27,  #  ύ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH TONOS | 
| 3692 |  |  |  |  |  |  | 0x0003A5 =>      54,  #  Υ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON | 
| 3693 |  |  |  |  |  |  | 0x0003D2 =>     117,  #  ϒ  gc=Lu   sc=Greek      GREEK UPSILON WITH HOOK SYMBOL | 
| 3694 |  |  |  |  |  |  | 0x0003D5 =>   10025,  #  ϕ  gc=Ll   sc=Greek      GREEK PHI SYMBOL | 
| 3695 |  |  |  |  |  |  | 0x0003C6 =>   13777,  #  φ  gc=Ll   sc=Greek      GREEK SMALL LETTER PHI | 
| 3696 |  |  |  |  |  |  | 0x0003A6 =>   12067,  #  Φ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PHI | 
| 3697 |  |  |  |  |  |  | 0x0003C7 =>   32188,  #  χ  gc=Ll   sc=Greek      GREEK SMALL LETTER CHI | 
| 3698 |  |  |  |  |  |  | 0x0003A7 =>     767,  #  Χ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER CHI | 
| 3699 |  |  |  |  |  |  | 0x0003C8 =>    8392,  #  ψ  gc=Ll   sc=Greek      GREEK SMALL LETTER PSI | 
| 3700 |  |  |  |  |  |  | 0x0003A8 =>    7927,  #  Ψ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PSI | 
| 3701 |  |  |  |  |  |  | 0x0003C9 =>   20779,  #  ω  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA | 
| 3702 |  |  |  |  |  |  | 0x0003CE =>      42,  #  ώ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH TONOS | 
| 3703 |  |  |  |  |  |  | 0x0003A9 =>    8698,  #  Ω  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA | 
| 3704 |  |  |  |  |  |  | 0x00038F =>       2,  #  Ώ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA WITH TONOS | 
| 3705 |  |  |  |  |  |  | 0x002126 =>     267,  #  Ω  gc=Lu   sc=Greek      OHM SIGN | 
| 3706 |  |  |  |  |  |  | 0x0003EC =>      35,  #  Ϭ  gc=Lu   sc=Coptic     COPTIC CAPITAL LETTER SHIMA | 
| 3707 |  |  |  |  |  |  | 0x000430 =>      27,  #  а  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER A | 
| 3708 |  |  |  |  |  |  | 0x000410 =>       4,  #  А  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER A | 
| 3709 |  |  |  |  |  |  | 0x0004D9 =>       8,  #  ә  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SCHWA | 
| 3710 |  |  |  |  |  |  | 0x000431 =>      10,  #  б  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER BE | 
| 3711 |  |  |  |  |  |  | 0x000432 =>       4,  #  в  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER VE | 
| 3712 |  |  |  |  |  |  | 0x000433 =>       2,  #  г  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER GHE | 
| 3713 |  |  |  |  |  |  | 0x000413 =>      46,  #  Г  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER GHE | 
| 3714 |  |  |  |  |  |  | 0x000434 =>       3,  #  д  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER DE | 
| 3715 |  |  |  |  |  |  | 0x000452 =>       1,  #  ђ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER DJE | 
| 3716 |  |  |  |  |  |  | 0x000435 =>      10,  #  е  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER IE | 
| 3717 |  |  |  |  |  |  | 0x000415 =>       1,  #  Е  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER IE | 
| 3718 |  |  |  |  |  |  | 0x000454 =>      19,  #  є  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER UKRAINIAN IE | 
| 3719 |  |  |  |  |  |  | 0x000404 =>      23,  #  Є  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER UKRAINIAN IE | 
| 3720 |  |  |  |  |  |  | 0x000436 =>      11,  #  ж  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ZHE | 
| 3721 |  |  |  |  |  |  | 0x000416 =>      28,  #  Ж  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ZHE | 
| 3722 |  |  |  |  |  |  | 0x000437 =>       3,  #  з  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ZE | 
| 3723 |  |  |  |  |  |  | 0x000417 =>       2,  #  З  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ZE | 
| 3724 |  |  |  |  |  |  | 0x000438 =>      12,  #  и  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER I | 
| 3725 |  |  |  |  |  |  | 0x000418 =>      18,  #  И  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER I | 
| 3726 |  |  |  |  |  |  | 0x000456 =>       3,  #  і  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I | 
| 3727 |  |  |  |  |  |  | 0x000406 =>      30,  #  І  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I | 
| 3728 |  |  |  |  |  |  | 0x000457 =>      15,  #  ї  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YI | 
| 3729 |  |  |  |  |  |  | 0x000439 =>       4,  #  й  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SHORT I | 
| 3730 |  |  |  |  |  |  | 0x00043A =>      93,  #  к  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER KA | 
| 3731 |  |  |  |  |  |  | 0x00041A =>      21,  #  К  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER KA | 
| 3732 |  |  |  |  |  |  | 0x00043B =>       6,  #  л  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EL | 
| 3733 |  |  |  |  |  |  | 0x00041B =>      21,  #  Л  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EL | 
| 3734 |  |  |  |  |  |  | 0x00043C =>       4,  #  м  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EM | 
| 3735 |  |  |  |  |  |  | 0x00041C =>      11,  #  М  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EM | 
| 3736 |  |  |  |  |  |  | 0x00043D =>       5,  #  н  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EN | 
| 3737 |  |  |  |  |  |  | 0x00041D =>       8,  #  Н  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EN | 
| 3738 |  |  |  |  |  |  | 0x00043E =>       6,  #  о  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER O | 
| 3739 |  |  |  |  |  |  | 0x00041E =>       6,  #  О  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER O | 
| 3740 |  |  |  |  |  |  | 0x0004E7 =>       3,  #  ӧ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER O WITH DIAERESIS | 
| 3741 |  |  |  |  |  |  | 0x0004E8 =>       1,  #  Ө  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER BARRED O | 
| 3742 |  |  |  |  |  |  | 0x00043F =>       2,  #  п  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER PE | 
| 3743 |  |  |  |  |  |  | 0x00041F =>      26,  #  П  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER PE | 
| 3744 |  |  |  |  |  |  | 0x000440 =>      16,  #  р  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ER | 
| 3745 |  |  |  |  |  |  | 0x000420 =>       4,  #  Р  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ER | 
| 3746 |  |  |  |  |  |  | 0x000441 =>       7,  #  с  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER ES | 
| 3747 |  |  |  |  |  |  | 0x000421 =>      25,  #  С  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ES | 
| 3748 |  |  |  |  |  |  | 0x000442 =>       4,  #  т  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER TE | 
| 3749 |  |  |  |  |  |  | 0x000422 =>       5,  #  Т  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER TE | 
| 3750 |  |  |  |  |  |  | 0x00045B =>       2,  #  ћ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER TSHE | 
| 3751 |  |  |  |  |  |  | 0x000443 =>       4,  #  у  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER U | 
| 3752 |  |  |  |  |  |  | 0x00045E =>       1,  #  ў  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SHORT U | 
| 3753 |  |  |  |  |  |  | 0x0004B1 =>       6,  #  ұ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE | 
| 3754 |  |  |  |  |  |  | 0x0004B0 =>       7,  #  Ұ  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE | 
| 3755 |  |  |  |  |  |  | 0x000444 =>       7,  #  ф  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER EF | 
| 3756 |  |  |  |  |  |  | 0x000424 =>     234,  #  Ф  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER EF | 
| 3757 |  |  |  |  |  |  | 0x000445 =>       5,  #  х  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER HA | 
| 3758 |  |  |  |  |  |  | 0x0004B3 =>       1,  #  ҳ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER HA WITH DESCENDER | 
| 3759 |  |  |  |  |  |  | 0x000447 =>       2,  #  ч  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER CHE | 
| 3760 |  |  |  |  |  |  | 0x000428 =>      19,  #  Ш  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER SHA | 
| 3761 |  |  |  |  |  |  | 0x00044A =>       1,  #  ъ  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER HARD SIGN | 
| 3762 |  |  |  |  |  |  | 0x00044B =>       5,  #  ы  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YERU | 
| 3763 |  |  |  |  |  |  | 0x00044C =>       1,  #  ь  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER SOFT SIGN | 
| 3764 |  |  |  |  |  |  | 0x00044E =>       1,  #  ю  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YU | 
| 3765 |  |  |  |  |  |  | 0x00044F =>       3,  #  я  gc=Ll   sc=Cyrillic   CYRILLIC SMALL LETTER YA | 
| 3766 |  |  |  |  |  |  | 0x002135 =>      42,  #  ℵ  gc=Lo   sc=Common     ALEF SYMBOL | 
| 3767 |  |  |  |  |  |  | 0x0005D0 =>       1,  #  א  gc=Lo   sc=Hebrew     HEBREW LETTER ALEF | 
| 3768 |  |  |  |  |  |  | 0x0005D1 =>       1,  #  ב  gc=Lo   sc=Hebrew     HEBREW LETTER BET | 
| 3769 |  |  |  |  |  |  | 0x002138 =>       2,  #  ℸ  gc=Lo   sc=Common     DALET SYMBOL | 
| 3770 |  |  |  |  |  |  | 0x0005DA =>       1,  #  ך  gc=Lo   sc=Hebrew     HEBREW LETTER FINAL KAF | 
| 3771 |  |  |  |  |  |  | 0x0005DB =>       9,  #  כ  gc=Lo   sc=Hebrew     HEBREW LETTER KAF | 
| 3772 |  |  |  |  |  |  | 0x0005DD =>       1,  #  ם  gc=Lo   sc=Hebrew     HEBREW LETTER FINAL MEM | 
| 3773 |  |  |  |  |  |  | 0x0005DE =>       1,  #  מ  gc=Lo   sc=Hebrew     HEBREW LETTER MEM | 
| 3774 |  |  |  |  |  |  | 0x0005DF =>       1,  #  ן  gc=Lo   sc=Hebrew     HEBREW LETTER FINAL NUN | 
| 3775 |  |  |  |  |  |  | 0x0005E0 =>       1,  #  נ  gc=Lo   sc=Hebrew     HEBREW LETTER NUN | 
| 3776 |  |  |  |  |  |  | 0x0005E3 =>       1,  #  ף  gc=Lo   sc=Hebrew     HEBREW LETTER FINAL PE | 
| 3777 |  |  |  |  |  |  | 0x0005E4 =>       2,  #  פ  gc=Lo   sc=Hebrew     HEBREW LETTER PE | 
| 3778 |  |  |  |  |  |  | 0x0005E5 =>       3,  #  ץ  gc=Lo   sc=Hebrew     HEBREW LETTER FINAL TSADI | 
| 3779 |  |  |  |  |  |  | 0x0005E6 =>       1,  #  צ  gc=Lo   sc=Hebrew     HEBREW LETTER TSADI | 
| 3780 |  |  |  |  |  |  | 0x0005E7 =>       1,  #  ק  gc=Lo   sc=Hebrew     HEBREW LETTER QOF | 
| 3781 |  |  |  |  |  |  | 0x000627 =>       1,  #  ا  gc=Lo   sc=Arabic     ARABIC LETTER ALEF | 
| 3782 |  |  |  |  |  |  | 0x000628 =>       1,  #  ب  gc=Lo   sc=Arabic     ARABIC LETTER BEH | 
| 3783 |  |  |  |  |  |  | 0x00062A =>       1,  #  ت  gc=Lo   sc=Arabic     ARABIC LETTER TEH | 
| 3784 |  |  |  |  |  |  | 0x00062B =>       1,  #  ث  gc=Lo   sc=Arabic     ARABIC LETTER THEH | 
| 3785 |  |  |  |  |  |  | 0x000646 =>       1,  #  ن  gc=Lo   sc=Arabic     ARABIC LETTER NOON | 
| 3786 |  |  |  |  |  |  | 0x000647 =>       4,  #  ه  gc=Lo   sc=Arabic     ARABIC LETTER HEH | 
| 3787 |  |  |  |  |  |  | 0x000648 =>       1,  #  و  gc=Lo   sc=Arabic     ARABIC LETTER WAW | 
| 3788 |  |  |  |  |  |  | 0x00064A =>       1,  #  ي  gc=Lo   sc=Arabic     ARABIC LETTER YEH | 
| 3789 |  |  |  |  |  |  | 0x003131 =>       1,  #  ㄱ gc=Lo   sc=Hangul     HANGUL LETTER KIYEOK | 
| 3790 |  |  |  |  |  |  | 0x00AC00 =>       2,  #  가 gc=Lo   sc=Hangul     HANGUL SYLLABLE GA | 
| 3791 |  |  |  |  |  |  | 0x00AC01 =>       4,  #  각 gc=Lo   sc=Hangul     HANGUL SYLLABLE GAG | 
| 3792 |  |  |  |  |  |  | 0x00AC04 =>       5,  #  간 gc=Lo   sc=Hangul     HANGUL SYLLABLE GAN | 
| 3793 |  |  |  |  |  |  | 0x00AC19 =>       2,  #  같 gc=Lo   sc=Hangul     HANGUL SYLLABLE GAT | 
| 3794 |  |  |  |  |  |  | 0x00AC83 =>       3,  #  것 gc=Lo   sc=Hangul     HANGUL SYLLABLE GEOS | 
| 3795 |  |  |  |  |  |  | 0x00AC8C =>       2,  #  게 gc=Lo   sc=Hangul     HANGUL SYLLABLE GE | 
| 3796 |  |  |  |  |  |  | 0x00ACBD =>       1,  #  경 gc=Lo   sc=Hangul     HANGUL SYLLABLE GYEONG | 
| 3797 |  |  |  |  |  |  | 0x00ACE0 =>       7,  #  고 gc=Lo   sc=Hangul     HANGUL SYLLABLE GO | 
| 3798 |  |  |  |  |  |  | 0x00ACFC =>       5,  #  과 gc=Lo   sc=Hangul     HANGUL SYLLABLE GWA | 
| 3799 |  |  |  |  |  |  | 0x00AD50 =>       2,  #  교 gc=Lo   sc=Hangul     HANGUL SYLLABLE GYO | 
| 3800 |  |  |  |  |  |  | 0x00AD6C =>       2,  #  구 gc=Lo   sc=Hangul     HANGUL SYLLABLE GU | 
| 3801 |  |  |  |  |  |  | 0x00AD6D =>      37,  #  국 gc=Lo   sc=Hangul     HANGUL SYLLABLE GUG | 
| 3802 |  |  |  |  |  |  | 0x00ADDC =>       2,  #  규 gc=Lo   sc=Hangul     HANGUL SYLLABLE GYU | 
| 3803 |  |  |  |  |  |  | 0x00ADFC =>       2,  #  근 gc=Lo   sc=Hangul     HANGUL SYLLABLE GEUN | 
| 3804 |  |  |  |  |  |  | 0x00AE4C =>      20,  #  까 gc=Lo   sc=Hangul     HANGUL SYLLABLE GGA | 
| 3805 |  |  |  |  |  |  | 0x00B098 =>       5,  #  나 gc=Lo   sc=Hangul     HANGUL SYLLABLE NA | 
| 3806 |  |  |  |  |  |  | 0x00B0B8 =>       1,  #  낸 gc=Lo   sc=Hangul     HANGUL SYLLABLE NAEN | 
| 3807 |  |  |  |  |  |  | 0x00B144 =>       6,  #  년 gc=Lo   sc=Hangul     HANGUL SYLLABLE NYEON | 
| 3808 |  |  |  |  |  |  | 0x00B290 =>       4,  #  느 gc=Lo   sc=Hangul     HANGUL SYLLABLE NEU | 
| 3809 |  |  |  |  |  |  | 0x00B294 =>       8,  #  는 gc=Lo   sc=Hangul     HANGUL SYLLABLE NEUN | 
| 3810 |  |  |  |  |  |  | 0x00B2C8 =>      21,  #  니 gc=Lo   sc=Hangul     HANGUL SYLLABLE NI | 
| 3811 |  |  |  |  |  |  | 0x00B2E4 =>      18,  #  다 gc=Lo   sc=Hangul     HANGUL SYLLABLE DA | 
| 3812 |  |  |  |  |  |  | 0x00B2F9 =>       3,  #  당 gc=Lo   sc=Hangul     HANGUL SYLLABLE DANG | 
| 3813 |  |  |  |  |  |  | 0x00B300 =>      10,  #  대 gc=Lo   sc=Hangul     HANGUL SYLLABLE DAE | 
| 3814 |  |  |  |  |  |  | 0x00B354 =>       5,  #  더 gc=Lo   sc=Hangul     HANGUL SYLLABLE DEO | 
| 3815 |  |  |  |  |  |  | 0x00B358 =>       2,  #  던 gc=Lo   sc=Hangul     HANGUL SYLLABLE DEON | 
| 3816 |  |  |  |  |  |  | 0x00B3C4 =>       4,  #  도 gc=Lo   sc=Hangul     HANGUL SYLLABLE DO | 
| 3817 |  |  |  |  |  |  | 0x00B418 =>       2,  #  되 gc=Lo   sc=Hangul     HANGUL SYLLABLE DOE | 
| 3818 |  |  |  |  |  |  | 0x00B41C =>       6,  #  된 gc=Lo   sc=Hangul     HANGUL SYLLABLE DOEN | 
| 3819 |  |  |  |  |  |  | 0x00B429 =>       2,  #  됩 gc=Lo   sc=Hangul     HANGUL SYLLABLE DOEB | 
| 3820 |  |  |  |  |  |  | 0x00B458 =>       1,  #  둘 gc=Lo   sc=Hangul     HANGUL SYLLABLE DUL | 
| 3821 |  |  |  |  |  |  | 0x00B4E4 =>       3,  #  들 gc=Lo   sc=Hangul     HANGUL SYLLABLE DEUL | 
| 3822 |  |  |  |  |  |  | 0x00B514 =>       2,  #  디 gc=Lo   sc=Hangul     HANGUL SYLLABLE DI | 
| 3823 |  |  |  |  |  |  | 0x00B54C =>       2,  #  때 gc=Lo   sc=Hangul     HANGUL SYLLABLE DDAE | 
| 3824 |  |  |  |  |  |  | 0x00B5A4 =>       3,  #  떤 gc=Lo   sc=Hangul     HANGUL SYLLABLE DDEON | 
| 3825 |  |  |  |  |  |  | 0x00B77C =>       6,  #  라 gc=Lo   sc=Hangul     HANGUL SYLLABLE RA | 
| 3826 |  |  |  |  |  |  | 0x00B78C =>       3,  #  람 gc=Lo   sc=Hangul     HANGUL SYLLABLE RAM | 
| 3827 |  |  |  |  |  |  | 0x00B838 =>       2,  #  렸 gc=Lo   sc=Hangul     HANGUL SYLLABLE RYEOSS | 
| 3828 |  |  |  |  |  |  | 0x00B85C =>       9,  #  로 gc=Lo   sc=Hangul     HANGUL SYLLABLE RO | 
| 3829 |  |  |  |  |  |  | 0x00B85D =>       2,  #  록 gc=Lo   sc=Hangul     HANGUL SYLLABLE ROG | 
| 3830 |  |  |  |  |  |  | 0x00B958 =>       2,  #  류 gc=Lo   sc=Hangul     HANGUL SYLLABLE RYU | 
| 3831 |  |  |  |  |  |  | 0x00B9C8 =>       2,  #  마 gc=Lo   sc=Hangul     HANGUL SYLLABLE MA | 
| 3832 |  |  |  |  |  |  | 0x00B9CC =>       8,  #  만 gc=Lo   sc=Hangul     HANGUL SYLLABLE MAN | 
| 3833 |  |  |  |  |  |  | 0x00B9D0 =>       6,  #  말 gc=Lo   sc=Hangul     HANGUL SYLLABLE MAL | 
| 3834 |  |  |  |  |  |  | 0x00BA70 =>       1,  #  며 gc=Lo   sc=Hangul     HANGUL SYLLABLE MYEO | 
| 3835 |  |  |  |  |  |  | 0x00BA74 =>       2,  #  면 gc=Lo   sc=Hangul     HANGUL SYLLABLE MYEON | 
| 3836 |  |  |  |  |  |  | 0x00BA87 =>       6,  #  몇 gc=Lo   sc=Hangul     HANGUL SYLLABLE MYEOC | 
| 3837 |  |  |  |  |  |  | 0x00BAA8 =>       1,  #  모 gc=Lo   sc=Hangul     HANGUL SYLLABLE MO | 
| 3838 |  |  |  |  |  |  | 0x00BAA9 =>       2,  #  목 gc=Lo   sc=Hangul     HANGUL SYLLABLE MOG | 
| 3839 |  |  |  |  |  |  | 0x00BBF8 =>       5,  #  미 gc=Lo   sc=Hangul     HANGUL SYLLABLE MI | 
| 3840 |  |  |  |  |  |  | 0x00BC18 =>       6,  #  반 gc=Lo   sc=Hangul     HANGUL SYLLABLE BAN | 
| 3841 |  |  |  |  |  |  | 0x00BC30 =>       1,  #  배 gc=Lo   sc=Hangul     HANGUL SYLLABLE BAE | 
| 3842 |  |  |  |  |  |  | 0x00BCF4 =>       2,  #  보 gc=Lo   sc=Hangul     HANGUL SYLLABLE BO | 
| 3843 |  |  |  |  |  |  | 0x00BCF8 =>       7,  #  본 gc=Lo   sc=Hangul     HANGUL SYLLABLE BON | 
| 3844 |  |  |  |  |  |  | 0x00BD80 =>      14,  #  부 gc=Lo   sc=Hangul     HANGUL SYLLABLE BU | 
| 3845 |  |  |  |  |  |  | 0x00BD84 =>      10,  #  분 gc=Lo   sc=Hangul     HANGUL SYLLABLE BUN | 
| 3846 |  |  |  |  |  |  | 0x00BE44 =>       4,  #  비 gc=Lo   sc=Hangul     HANGUL SYLLABLE BI | 
| 3847 |  |  |  |  |  |  | 0x00C0AC =>       5,  #  사 gc=Lo   sc=Hangul     HANGUL SYLLABLE SA | 
| 3848 |  |  |  |  |  |  | 0x00C0B4 =>       3,  #  살 gc=Lo   sc=Hangul     HANGUL SYLLABLE SAL | 
| 3849 |  |  |  |  |  |  | 0x00C0DD =>       4,  #  생 gc=Lo   sc=Hangul     HANGUL SYLLABLE SAENG | 
| 3850 |  |  |  |  |  |  | 0x00C11C =>       8,  #  서 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEO | 
| 3851 |  |  |  |  |  |  | 0x00C120 =>       2,  #  선 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEON | 
| 3852 |  |  |  |  |  |  | 0x00C12F =>       2,  #  섯 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEOS | 
| 3853 |  |  |  |  |  |  | 0x00C131 =>       1,  #  성 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEONG | 
| 3854 |  |  |  |  |  |  | 0x00C138 =>       1,  #  세 gc=Lo   sc=Hangul     HANGUL SYLLABLE SE | 
| 3855 |  |  |  |  |  |  | 0x00C168 =>       6,  #  셨 gc=Lo   sc=Hangul     HANGUL SYLLABLE SYEOSS | 
| 3856 |  |  |  |  |  |  | 0x00C218 =>       1,  #  수 gc=Lo   sc=Hangul     HANGUL SYLLABLE SU | 
| 3857 |  |  |  |  |  |  | 0x00C2B5 =>       8,  #  습 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEUB | 
| 3858 |  |  |  |  |  |  | 0x00C2B7 =>       2,  #  슷 gc=Lo   sc=Hangul     HANGUL SYLLABLE SEUS | 
| 3859 |  |  |  |  |  |  | 0x00C2DC =>       1,  #  시 gc=Lo   sc=Hangul     HANGUL SYLLABLE SI | 
| 3860 |  |  |  |  |  |  | 0x00C2E0 =>       3,  #  신 gc=Lo   sc=Hangul     HANGUL SYLLABLE SIN | 
| 3861 |  |  |  |  |  |  | 0x00C2E4 =>       1,  #  실 gc=Lo   sc=Hangul     HANGUL SYLLABLE SIL | 
| 3862 |  |  |  |  |  |  | 0x00C2ED =>       7,  #  십 gc=Lo   sc=Hangul     HANGUL SYLLABLE SIB | 
| 3863 |  |  |  |  |  |  | 0x00C4F0 =>       3,  #  쓰 gc=Lo   sc=Hangul     HANGUL SYLLABLE SSEU | 
| 3864 |  |  |  |  |  |  | 0x00C500 =>       1,  #  씀 gc=Lo   sc=Hangul     HANGUL SYLLABLE SSEUM | 
| 3865 |  |  |  |  |  |  | 0x00C529 =>       1,  #  씩 gc=Lo   sc=Hangul     HANGUL SYLLABLE SSIG | 
| 3866 |  |  |  |  |  |  | 0x00C544 =>       2,  #  아 gc=Lo   sc=Hangul     HANGUL SYLLABLE A | 
| 3867 |  |  |  |  |  |  | 0x00C545 =>      14,  #  악 gc=Lo   sc=Hangul     HANGUL SYLLABLE AG | 
| 3868 |  |  |  |  |  |  | 0x00C57D =>       1,  #  약 gc=Lo   sc=Hangul     HANGUL SYLLABLE YAG | 
| 3869 |  |  |  |  |  |  | 0x00C591 =>       1,  #  양 gc=Lo   sc=Hangul     HANGUL SYLLABLE YANG | 
| 3870 |  |  |  |  |  |  | 0x00C5B4 =>      22,  #  어 gc=Lo   sc=Hangul     HANGUL SYLLABLE EO | 
| 3871 |  |  |  |  |  |  | 0x00C5D0 =>      13,  #  에 gc=Lo   sc=Hangul     HANGUL SYLLABLE E | 
| 3872 |  |  |  |  |  |  | 0x00C5EC =>       2,  #  여 gc=Lo   sc=Hangul     HANGUL SYLLABLE YEO | 
| 3873 |  |  |  |  |  |  | 0x00C601 =>       8,  #  영 gc=Lo   sc=Hangul     HANGUL SYLLABLE YEONG | 
| 3874 |  |  |  |  |  |  | 0x00C624 =>       1,  #  오 gc=Lo   sc=Hangul     HANGUL SYLLABLE O | 
| 3875 |  |  |  |  |  |  | 0x00C678 =>      10,  #  외 gc=Lo   sc=Hangul     HANGUL SYLLABLE OE | 
| 3876 |  |  |  |  |  |  | 0x00C6B8 =>       2,  #  울 gc=Lo   sc=Hangul     HANGUL SYLLABLE UL | 
| 3877 |  |  |  |  |  |  | 0x00C73C =>       4,  #  으 gc=Lo   sc=Hangul     HANGUL SYLLABLE EU | 
| 3878 |  |  |  |  |  |  | 0x00C740 =>       3,  #  은 gc=Lo   sc=Hangul     HANGUL SYLLABLE EUN | 
| 3879 |  |  |  |  |  |  | 0x00C744 =>      13,  #  을 gc=Lo   sc=Hangul     HANGUL SYLLABLE EUL | 
| 3880 |  |  |  |  |  |  | 0x00C74C =>      15,  #  음 gc=Lo   sc=Hangul     HANGUL SYLLABLE EUM | 
| 3881 |  |  |  |  |  |  | 0x00C758 =>       4,  #  의 gc=Lo   sc=Hangul     HANGUL SYLLABLE YI | 
| 3882 |  |  |  |  |  |  | 0x00C774 =>      11,  #  이 gc=Lo   sc=Hangul     HANGUL SYLLABLE I | 
| 3883 |  |  |  |  |  |  | 0x00C778 =>      14,  #  인 gc=Lo   sc=Hangul     HANGUL SYLLABLE IN | 
| 3884 |  |  |  |  |  |  | 0x00C77C =>       1,  #  일 gc=Lo   sc=Hangul     HANGUL SYLLABLE IL | 
| 3885 |  |  |  |  |  |  | 0x00C77D =>       6,  #  읽 gc=Lo   sc=Hangul     HANGUL SYLLABLE ILG | 
| 3886 |  |  |  |  |  |  | 0x00C785 =>       2,  #  입 gc=Lo   sc=Hangul     HANGUL SYLLABLE IB | 
| 3887 |  |  |  |  |  |  | 0x00C788 =>       3,  #  있 gc=Lo   sc=Hangul     HANGUL SYLLABLE ISS | 
| 3888 |  |  |  |  |  |  | 0x00C790 =>       3,  #  자 gc=Lo   sc=Hangul     HANGUL SYLLABLE JA | 
| 3889 |  |  |  |  |  |  | 0x00C798 =>       4,  #  잘 gc=Lo   sc=Hangul     HANGUL SYLLABLE JAL | 
| 3890 |  |  |  |  |  |  | 0x00C7A5 =>       1,  #  장 gc=Lo   sc=Hangul     HANGUL SYLLABLE JANG | 
| 3891 |  |  |  |  |  |  | 0x00C801 =>       2,  #  적 gc=Lo   sc=Hangul     HANGUL SYLLABLE JEOG | 
| 3892 |  |  |  |  |  |  | 0x00C804 =>       4,  #  전 gc=Lo   sc=Hangul     HANGUL SYLLABLE JEON | 
| 3893 |  |  |  |  |  |  | 0x00C815 =>       4,  #  정 gc=Lo   sc=Hangul     HANGUL SYLLABLE JEONG | 
| 3894 |  |  |  |  |  |  | 0x00C885 =>       4,  #  종 gc=Lo   sc=Hangul     HANGUL SYLLABLE JONG | 
| 3895 |  |  |  |  |  |  | 0x00C8FC =>       1,  #  주 gc=Lo   sc=Hangul     HANGUL SYLLABLE JU | 
| 3896 |  |  |  |  |  |  | 0x00C911 =>       7,  #  중 gc=Lo   sc=Hangul     HANGUL SYLLABLE JUNG | 
| 3897 |  |  |  |  |  |  | 0x00C9C0 =>       6,  #  지 gc=Lo   sc=Hangul     HANGUL SYLLABLE JI | 
| 3898 |  |  |  |  |  |  | 0x00CC45 =>       7,  #  책 gc=Lo   sc=Hangul     HANGUL SYLLABLE CAEG | 
| 3899 |  |  |  |  |  |  | 0x00CD5C =>       2,  #  최 gc=Lo   sc=Hangul     HANGUL SYLLABLE COE | 
| 3900 |  |  |  |  |  |  | 0x00CE58 =>       2,  #  치 gc=Lo   sc=Hangul     HANGUL SYLLABLE CI | 
| 3901 |  |  |  |  |  |  | 0x00CE5C =>       2,  #  친 gc=Lo   sc=Hangul     HANGUL SYLLABLE CIN | 
| 3902 |  |  |  |  |  |  | 0x00D0C0 =>       1,  #  타 gc=Lo   sc=Hangul     HANGUL SYLLABLE TA | 
| 3903 |  |  |  |  |  |  | 0x00D2B9 =>       1,  #  특 gc=Lo   sc=Hangul     HANGUL SYLLABLE TEUG | 
| 3904 |  |  |  |  |  |  | 0x00D3B8 =>       1,  #  편 gc=Lo   sc=Hangul     HANGUL SYLLABLE PYEON | 
| 3905 |  |  |  |  |  |  | 0x00D558 =>       7,  #  하 gc=Lo   sc=Hangul     HANGUL SYLLABLE HA | 
| 3906 |  |  |  |  |  |  | 0x00D559 =>       4,  #  학 gc=Lo   sc=Hangul     HANGUL SYLLABLE HAG | 
| 3907 |  |  |  |  |  |  | 0x00D55C =>      24,  #  한 gc=Lo   sc=Hangul     HANGUL SYLLABLE HAN | 
| 3908 |  |  |  |  |  |  | 0x00D560 =>       1,  #  할 gc=Lo   sc=Hangul     HANGUL SYLLABLE HAL | 
| 3909 |  |  |  |  |  |  | 0x00D56D =>       2,  #  항 gc=Lo   sc=Hangul     HANGUL SYLLABLE HANG | 
| 3910 |  |  |  |  |  |  | 0x00D574 =>       4,  #  해 gc=Lo   sc=Hangul     HANGUL SYLLABLE HAE | 
| 3911 |  |  |  |  |  |  | 0x00D638 =>       2,  #  호 gc=Lo   sc=Hangul     HANGUL SYLLABLE HO | 
| 3912 |  |  |  |  |  |  | 0x00D6C4 =>       1,  #  후 gc=Lo   sc=Hangul     HANGUL SYLLABLE HU | 
| 3913 |  |  |  |  |  |  | 0x00FF95 =>       1,  #  ユ  gc=Lo   sc=Katakana   HALFWIDTH KATAKANA LETTER YU | 
| 3914 |  |  |  |  |  |  | 0x006240 =>       2,  #  所 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6240 | 
| 3915 |  |  |  |  |  |  | 0x006587 =>       2,  #  文 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6587 | 
| 3916 |  |  |  |  |  |  | 0x006709 =>       2,  #  有 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6709 | 
| 3917 |  |  |  |  |  |  | 0x00689D =>       2,  #  條 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-689D | 
| 3918 |  |  |  |  |  |  | 0x007368 =>       1,  #  獨 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-7368 | 
| 3919 |  |  |  |  |  |  | 0x007974 =>       1,  #  祴 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-7974 | 
| 3920 |  |  |  |  |  |  | 0x008230 =>       1,  #  舰 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8230 | 
| 3921 |  |  |  |  |  |  | 0x008713 =>       3,  #  蜓 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8713 | 
| 3922 |  |  |  |  |  |  | 0x009792 =>       1,  #  鞒 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-9792 | 
| 3923 |  |  |  |  |  |  | 0x009794 =>       1,  #  鞔 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-9794 | 
| 3924 |  |  |  |  |  |  | 0x0036E7 =>       1,  #  㛧 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E7 | 
| 3925 |  |  |  |  |  |  | 0x0036E8 =>       1,  #  㛨 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E8 | 
| 3926 |  |  |  |  |  |  | 0x0036E9 =>       1,  #  㛩 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E9 | 
| 3927 |  |  |  |  |  |  | 0x0036EA =>       1,  #  㛪 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EA | 
| 3928 |  |  |  |  |  |  | 0x0036EB =>       1,  #  㛫 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EB | 
| 3929 |  |  |  |  |  |  | 0x0036EC =>       1,  #  㛬 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EC | 
| 3930 |  |  |  |  |  |  | 0x0036ED =>       1,  #  㛭 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36ED | 
| 3931 |  |  |  |  |  |  | 0x0036EE =>       1,  #  㛮 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EE | 
| 3932 |  |  |  |  |  |  | 0x0036EF =>       1,  #  㛯 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EF | 
| 3933 |  |  |  |  |  |  | 0x003B12 =>       1,  #  㬒 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-3B12 | 
| 3934 |  |  |  |  |  |  | 0x003B21 =>       1,  #  㬡 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-3B21 | 
| 3935 |  |  |  |  |  |  | 0x000085 =>     264,  # <--->gc=Cc   sc=Common | 
| 3936 |  |  |  |  |  |  | 0x000086 =>      14,  # <--->gc=Cc   sc=Common | 
| 3937 |  |  |  |  |  |  | 0x002061 =>   10097,  # <--->gc=Cf   sc=Common     FUNCTION APPLICATION | 
| 3938 |  |  |  |  |  |  | 0x00206C =>      17,  # <--->gc=Cf   sc=Common     INHIBIT ARABIC FORM SHAPING | 
| 3939 |  |  |  |  |  |  | 0x00206A =>       1,  # <--->gc=Cf   sc=Common     INHIBIT SYMMETRIC SWAPPING | 
| 3940 |  |  |  |  |  |  | 0x002062 =>     204,  # <--->gc=Cf   sc=Common     INVISIBLE TIMES | 
| 3941 |  |  |  |  |  |  | 0x00202A =>       1,  # <--->gc=Cf   sc=Common     LEFT-TO-RIGHT EMBEDDING | 
| 3942 |  |  |  |  |  |  | 0x00200E =>       5,  # <--->gc=Cf   sc=Common     LEFT-TO-RIGHT MARK | 
| 3943 |  |  |  |  |  |  | 0x0000AD =>    3587,  # <--->gc=Cf   sc=Common     SOFT HYPHEN | 
| 3944 |  |  |  |  |  |  | 0x00FEFF =>       9,  # <--->gc=Cf   sc=Common     ZERO WIDTH NO-BREAK SPACE | 
| 3945 |  |  |  |  |  |  | 0x00200B =>     128,  # <--->gc=Cf   sc=Common     ZERO WIDTH SPACE | 
| 3946 |  |  |  |  |  |  | 0x00200D =>      59,  # <--->gc=Cf   sc=Inherited  ZERO WIDTH JOINER | 
| 3947 |  |  |  |  |  |  | 0x100002 =>       2,  # <--->gc=Co   sc=Unknown | 
| 3948 |  |  |  |  |  |  | 0x002028 =>   10940,  # <--->gc=Zl   sc=Common     LINE SEPARATOR | 
| 3949 |  |  |  |  |  |  | 0x002003 =>  602377,  # <--->gc=Zs   sc=Common     EM SPACE | 
| 3950 |  |  |  |  |  |  | 0x002000 =>       1,  # <--->gc=Zs   sc=Common     EN QUAD | 
| 3951 |  |  |  |  |  |  | 0x002002 =>    8517,  # <--->gc=Zs   sc=Common     EN SPACE | 
| 3952 |  |  |  |  |  |  | 0x002007 =>     422,  # <--->gc=Zs   sc=Common     FIGURE SPACE | 
| 3953 |  |  |  |  |  |  | 0x002005 =>   21027,  # <--->gc=Zs   sc=Common     FOUR-PER-EM SPACE | 
| 3954 |  |  |  |  |  |  | 0x00200A =>  491842,  # <--->gc=Zs   sc=Common     HAIR SPACE | 
| 3955 |  |  |  |  |  |  | 0x003000 =>      17,  # <--->gc=Zs   sc=Common     IDEOGRAPHIC SPACE | 
| 3956 |  |  |  |  |  |  | 0x00205F =>      28,  # <--->gc=Zs   sc=Common     MEDIUM MATHEMATICAL SPACE | 
| 3957 |  |  |  |  |  |  | 0x00202F =>    1682,  # <--->gc=Zs   sc=Common     NARROW NO-BREAK SPACE | 
| 3958 |  |  |  |  |  |  | 0x0000A0 => 1065594,  # <--->gc=Zs   sc=Common     NO-BREAK SPACE | 
| 3959 |  |  |  |  |  |  | 0x002008 =>     702,  # <--->gc=Zs   sc=Common     PUNCTUATION SPACE | 
| 3960 |  |  |  |  |  |  | 0x002006 =>      90,  # <--->gc=Zs   sc=Common     SIX-PER-EM SPACE | 
| 3961 |  |  |  |  |  |  | 0x002009 =>  420888,  # <--->gc=Zs   sc=Common     THIN SPACE | 
| 3962 |  |  |  |  |  |  | 0x002004 =>      26,  # <--->gc=Zs   sc=Common     THREE-PER-EM SPACE | 
| 3963 |  |  |  |  |  |  | 0x0020DE =>     217,  # ◌ ⃞  gc=Me   sc=Inherited  COMBINING ENCLOSING SQUARE | 
| 3964 |  |  |  |  |  |  | 0x000597 =>       2,  # ◌ ֗  gc=Mn   sc=Hebrew     HEBREW ACCENT REVIA | 
| 3965 |  |  |  |  |  |  | 0x0005BF =>       2,  # ◌ ֿ  gc=Mn   sc=Hebrew     HEBREW POINT RAFE | 
| 3966 |  |  |  |  |  |  | 0x000652 =>       1,  # ◌ ْ  gc=Mn   sc=Inherited  ARABIC SUKUN | 
| 3967 |  |  |  |  |  |  | 0x000301 =>      40,  # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE ACCENT | 
| 3968 |  |  |  |  |  |  | 0x000341 =>       5,  # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE TONE MARK | 
| 3969 |  |  |  |  |  |  | 0x000306 =>      19,  # ◌ ̆  gc=Mn   sc=Inherited  COMBINING BREVE | 
| 3970 |  |  |  |  |  |  | 0x00030C =>       3,  # ◌ ̌  gc=Mn   sc=Inherited  COMBINING CARON | 
| 3971 |  |  |  |  |  |  | 0x000327 =>       8,  # ◌ ̧  gc=Mn   sc=Inherited  COMBINING CEDILLA | 
| 3972 |  |  |  |  |  |  | 0x000302 =>    1249,  # ◌ ̂  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT | 
| 3973 |  |  |  |  |  |  | 0x000308 =>       6,  # ◌ ̈  gc=Mn   sc=Inherited  COMBINING DIAERESIS | 
| 3974 |  |  |  |  |  |  | 0x000307 =>     458,  # ◌ ̇  gc=Mn   sc=Inherited  COMBINING DOT ABOVE | 
| 3975 |  |  |  |  |  |  | 0x000358 =>       3,  # ◌ ͘  gc=Mn   sc=Inherited  COMBINING DOT ABOVE RIGHT | 
| 3976 |  |  |  |  |  |  | 0x000323 =>       7,  # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW | 
| 3977 |  |  |  |  |  |  | 0x000323 =>       6,  # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW | 
| 3978 |  |  |  |  |  |  | 0x00030B =>       3,  # ◌ ̋  gc=Mn   sc=Inherited  COMBINING DOUBLE ACUTE ACCENT | 
| 3979 |  |  |  |  |  |  | 0x000300 =>      85,  # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE ACCENT | 
| 3980 |  |  |  |  |  |  | 0x000344 =>       1,  # ◌ ̈́  gc=Mn   sc=Inherited  COMBINING GREEK DIALYTIKA TONOS | 
| 3981 |  |  |  |  |  |  | 0x000343 =>       1,  # ◌ ̓  gc=Mn   sc=Inherited  COMBINING GREEK KORONIS | 
| 3982 |  |  |  |  |  |  | 0x000342 =>      55,  # ◌ ͂  gc=Mn   sc=Inherited  COMBINING GREEK PERISPOMENI | 
| 3983 |  |  |  |  |  |  | 0x000311 =>       9,  # ◌ ̑  gc=Mn   sc=Inherited  COMBINING INVERTED BREVE | 
| 3984 |  |  |  |  |  |  | 0x000332 =>     150,  # ◌ ̲  gc=Mn   sc=Inherited  COMBINING LOW LINE | 
| 3985 |  |  |  |  |  |  | 0x000304 =>     624,  # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON | 
| 3986 |  |  |  |  |  |  | 0x000304 =>       1,  # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON | 
| 3987 |  |  |  |  |  |  | 0x000328 =>       1,  # ◌ ̨  gc=Mn   sc=Inherited  COMBINING OGONEK | 
| 3988 |  |  |  |  |  |  | 0x000305 =>    1093,  # ◌ ̅  gc=Mn   sc=Inherited  COMBINING OVERLINE | 
| 3989 |  |  |  |  |  |  | 0x0020D7 =>     335,  # ◌ ⃗  gc=Mn   sc=Inherited  COMBINING RIGHT ARROW ABOVE | 
| 3990 |  |  |  |  |  |  | 0x0020D1 =>       8,  # ◌ ⃑  gc=Mn   sc=Inherited  COMBINING RIGHT HARPOON ABOVE | 
| 3991 |  |  |  |  |  |  | 0x00030A =>      35,  # ◌ ̊  gc=Mn   sc=Inherited  COMBINING RING ABOVE | 
| 3992 |  |  |  |  |  |  | 0x000337 =>       6,  # ◌ ̷  gc=Mn   sc=Inherited  COMBINING SHORT SOLIDUS OVERLAY | 
| 3993 |  |  |  |  |  |  | 0x000335 =>      11,  # ◌ ̵  gc=Mn   sc=Inherited  COMBINING SHORT STROKE OVERLAY | 
| 3994 |  |  |  |  |  |  | 0x0020DB =>       3,  # ◌ ⃛  gc=Mn   sc=Inherited  COMBINING THREE DOTS ABOVE | 
| 3995 |  |  |  |  |  |  | 0x000303 =>     440,  # ◌ ̃  gc=Mn   sc=Inherited  COMBINING TILDE | 
| 3996 |  |  |  |  |  |  | 0x00FE00 =>      12,  # ◌ ︀  gc=Mn   sc=Inherited  VARIATION SELECTOR-1 | 
| 3997 |  |  |  |  |  |  | 0x001036 =>       1,  # ◌ ံ  gc=Mn   sc=Myanmar    MYANMAR SIGN ANUSVARA | 
| 3998 |  |  |  |  |  |  | 0x000EBC =>       1,  # ◌ ຼ  gc=Mn   sc=Lao        LAO SEMIVOWEL SIGN LO | 
| 3999 |  |  |  |  |  |  | 0x000F9E =>       1,  # ◌ ྞ  gc=Mn   sc=Tibetan    TIBETAN SUBJOINED LETTER NNA | 
| 4000 |  |  |  |  |  |  |  | 
| 4001 |  |  |  |  |  |  | ); | 
| 4002 |  |  |  |  |  |  |  | 
| 4003 |  |  |  |  |  |  | %default_training_data = %pmcoa_training; | 
| 4004 |  |  |  |  |  |  |  | 
| 4005 |  |  |  |  |  |  | } | 
| 4006 |  |  |  |  |  |  |  | 
| 4007 |  |  |  |  |  |  |  | 
| 4008 |  |  |  |  |  |  | 1; # End of Lingua::EN::ByteEncoded | 
| 4009 |  |  |  |  |  |  |  | 
| 4010 |  |  |  |  |  |  | __END__ |