line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ |
2
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
3
|
|
|
|
|
|
|
# Simd::Avx512 - Emulate SIMD instructions |
4
|
|
|
|
|
|
|
# Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021 |
5
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
6
|
|
|
|
|
|
|
# podDocumentation |
7
|
|
|
|
|
|
|
package Simd::Avx512; |
8
|
|
|
|
|
|
|
our $VERSION = 20210122; |
9
|
1
|
|
|
1
|
|
1294
|
use warnings FATAL => qw(all); |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
37
|
|
10
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
30
|
|
11
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
76
|
|
12
|
1
|
|
|
1
|
|
604
|
use Data::Dump qw(dump); |
|
1
|
|
|
|
|
7937
|
|
|
1
|
|
|
|
|
61
|
|
13
|
1
|
|
|
1
|
|
7
|
use feature qw(say current_sub); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
3232
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my $develop = -e q(/home/phil/); # Development mode |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub repeat($$) # Repeat a string |
18
|
157
|
|
|
157
|
0
|
258
|
{my ($string, $repeat) = @_; # String to repeat, number of repetitions |
19
|
157
|
|
|
|
|
970
|
$string x $repeat |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
63
|
|
|
63
|
0
|
104
|
sub zByte {repeat('0', 8)} # Zero byte |
23
|
0
|
|
|
0
|
0
|
0
|
sub zWord {repeat('0', 16)} # Zero word |
24
|
0
|
|
|
0
|
0
|
0
|
sub zDWord {repeat('0', 32)} # Zero double word |
25
|
31
|
|
|
31
|
0
|
60
|
sub zQWord {repeat('0', 64)} # Zero quad word |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub zBytes($) # String of zero bytes of specified length |
28
|
63
|
|
|
63
|
0
|
109
|
{my ($length) = @_; # Length |
29
|
63
|
|
|
|
|
144
|
repeat(zByte, $length) |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub zWords($) # String of zero words of specified length |
33
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
34
|
0
|
|
|
|
|
0
|
repeat(zWord, $length) |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub zDWords($) # String of zero double words of specified length |
38
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
39
|
0
|
|
|
|
|
0
|
repeat(zDWord, $length) |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub zQWords($) # String of zero quad words of specified length |
43
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
44
|
0
|
|
|
|
|
0
|
repeat(zQWord, $length) |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub byte($) # A byte with the specified value |
48
|
16
|
|
|
16
|
0
|
26
|
{my ($value) = @_; # Value of the byte |
49
|
16
|
50
|
33
|
|
|
46
|
confess "0 - 2**8 required ($value)" unless $value >= 0 and $value < 2**8; |
50
|
16
|
|
|
|
|
54
|
sprintf("%08b", $value) |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub word($) # A word with the specified value |
54
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the word |
55
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**16 required ($value)" unless $value >= 0 and $value < 2**16; |
56
|
0
|
|
|
|
|
0
|
sprintf("%016b", $value) |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub dWord($) # A double word with the specified value |
60
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the double word |
61
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**32 required ($value)" unless $value >= 0 and $value < 2**32; |
62
|
0
|
|
|
|
|
0
|
sprintf("%032b", $value) |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub qWord($) # A quad word with the specified value |
66
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the quad word |
67
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**64 required ($value)" unless $value >= 0 and $value < 2**64; |
68
|
0
|
|
|
|
|
0
|
sprintf("%064b", $value) |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
31
|
|
|
31
|
0
|
64
|
sub maskRegister {zQWord} # Mask register set to zero |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub require8or16or32or64($) # Check that we have a size of 8|16|32|64 bits |
74
|
0
|
|
|
0
|
0
|
0
|
{my ($size) = @_; # Size to check |
75
|
0
|
0
|
0
|
|
|
0
|
confess "8|16|32|64 required for operand ($size)" unless $size == 8 or $size == 16 or $size == 32 or $size == 64; |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub require64($) # Check that we have a string of 64 bits |
79
|
28
|
|
|
28
|
0
|
48
|
{my ($xmm) = @_; # Bytes |
80
|
28
|
50
|
|
|
|
58
|
defined($xmm) or confess; |
81
|
28
|
|
|
|
|
43
|
my $l = length $xmm; |
82
|
28
|
50
|
|
|
|
59
|
confess "64 bits required for operand ($l)" unless $l == 64; |
83
|
28
|
50
|
|
|
|
121
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub require128($) # Check that we have a string of 128 bits |
87
|
18
|
|
|
18
|
0
|
29
|
{my ($xmm) = @_; # Bytes |
88
|
18
|
|
|
|
|
23
|
my $l = length $xmm; |
89
|
18
|
50
|
|
|
|
36
|
confess "128 bits required for operand ($l)" unless $l == 128; |
90
|
18
|
50
|
|
|
|
69
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub require128or245or512($;$) # Check that we have a string of 128|256|512 bits in the first operand and optionally the same in the second operand |
94
|
39
|
|
|
39
|
0
|
80
|
{my ($xmm1, $xmm2) = @_; # Bytes, optional bytes |
95
|
39
|
|
|
|
|
63
|
my $l = length $xmm1; |
96
|
39
|
50
|
100
|
|
|
182
|
confess "128|256|512 bits required for first operand ($l)" unless $l == 128 or $l == 256 or $l == 512; |
|
|
|
66
|
|
|
|
|
97
|
39
|
100
|
|
|
|
105
|
if (defined $xmm2) |
98
|
33
|
|
|
|
|
51
|
{my $m = length $xmm2; |
99
|
33
|
50
|
100
|
|
|
131
|
confess "128|256|512 bits required for second operand ($m)" unless $m == 128 or $m == 256 or $m == 512; |
|
|
|
66
|
|
|
|
|
100
|
33
|
50
|
|
|
|
75
|
confess "Operands must have same length($l,$m)" unless $l == $m; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub require64or128or245or512($) # Check that we have a string of 64|128|256|512 bits |
105
|
3
|
|
|
3
|
0
|
13
|
{my ($xmm) = @_; # Bytes |
106
|
3
|
|
|
|
|
7
|
my $l = length $xmm; |
107
|
3
|
0
|
33
|
|
|
8
|
confess "64|128|256|512 bits required for operand" unless $l == 64 or $l == 128 or $l == 256 or $l == 512; |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
108
|
3
|
50
|
|
|
|
16
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub requireSameLength($$) # Check that the two operands have the same length |
112
|
16
|
|
|
16
|
0
|
26
|
{my ($xmm1, $xmm2) = @_; # Bytes, bytes |
113
|
16
|
|
|
|
|
30
|
my ($l, $L) = (length($xmm1), length($xmm2)); |
114
|
16
|
50
|
|
|
|
35
|
confess "Operands have different lengths($l, $L)" unless $l == $L; |
115
|
16
|
|
|
|
|
32
|
$l |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub flipBitsUnderMask($$) # Flip the bits in a string where the corresponding mask bit is 1 else leave the bit as is |
119
|
15
|
|
|
15
|
0
|
32
|
{my ($string, $mask) = @_; # Bit string, mask |
120
|
15
|
|
|
|
|
30
|
my $l = requireSameLength $string, $mask; |
121
|
15
|
|
|
|
|
21
|
my $f = ''; |
122
|
15
|
|
|
|
|
36
|
for my $i(0..$l-1) # Each character in the string and mask |
123
|
820
|
|
|
|
|
1053
|
{my $s = substr($string, $i, 1); |
124
|
820
|
100
|
|
|
|
1413
|
$f .= substr($mask, $i, 1) eq '0' ? $s : $s eq '0' ? '1' : '0' |
|
|
100
|
|
|
|
|
|
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
$f |
127
|
15
|
|
|
|
|
89
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
#D1 Instructions # Emulation of Avx512 instructions |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub PSLLDQ($$) # Packed Shift Left Logical DoubleQword |
132
|
8
|
|
|
8
|
1
|
19
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
133
|
8
|
|
|
|
|
19
|
require128 $xmm1; # Check that we have a string of 128 bits |
134
|
8
|
|
|
|
|
19
|
substr($xmm1, $imm8 * 8).zBytes($imm8) |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub VPSLLDQ($$) # Packed Shift Left Logical DoubleQword |
138
|
3
|
|
|
3
|
1
|
10
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
139
|
3
|
|
|
|
|
9
|
require128or245or512 $xmm1; # Check that we have a string of 128 bits |
140
|
3
|
50
|
33
|
|
|
15
|
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16; |
141
|
|
|
|
|
|
|
|
142
|
3
|
100
|
|
|
|
8
|
return PSLLDQ($xmm1, $imm8) if length($xmm1) == 128; |
143
|
|
|
|
|
|
|
|
144
|
2
|
100
|
|
|
|
7
|
return PSLLDQ(substr($xmm1, 0, 128), $imm8). |
145
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256; |
146
|
|
|
|
|
|
|
|
147
|
1
|
|
|
|
|
10
|
return PSLLDQ(substr($xmm1, 0, 128), $imm8). |
148
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 128, 128), $imm8). |
149
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 256, 128), $imm8). |
150
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 384, 128), $imm8) |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub PSRLDQ($$) # Packed Shift Right Logical DoubleQword |
154
|
8
|
|
|
8
|
1
|
18
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
155
|
8
|
|
|
|
|
19
|
require128 $xmm1; # Check that we have a string of 128 bits |
156
|
8
|
|
|
|
|
18
|
zBytes($imm8).substr($xmm1, 0, 128 - $imm8 * 8) |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub VPSRLDQ($$) # Packed Shift Right Logical DoubleQword |
160
|
3
|
|
|
3
|
1
|
8
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
161
|
3
|
|
|
|
|
8
|
require128or245or512 $xmm1; # Check that we have a string of 128 bits |
162
|
3
|
50
|
33
|
|
|
13
|
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16; |
163
|
|
|
|
|
|
|
|
164
|
3
|
100
|
|
|
|
9
|
return PSRLDQ($xmm1, $imm8) if length($xmm1) == 128; |
165
|
|
|
|
|
|
|
|
166
|
2
|
100
|
|
|
|
12
|
return PSRLDQ(substr($xmm1, 0, 128), $imm8). |
167
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256; |
168
|
|
|
|
|
|
|
|
169
|
1
|
|
|
|
|
5
|
return PSRLDQ(substr($xmm1, 0, 128), $imm8). |
170
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 128, 128), $imm8). |
171
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 256, 128), $imm8). |
172
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 384, 128), $imm8) |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub PCMPEQB($$) # Packed CoMPare EQual Byte |
176
|
1
|
|
|
1
|
1
|
5
|
{my ($xmm1, $xmm2) = @_; # Bytes, bytes |
177
|
1
|
|
|
|
|
3
|
require128 $xmm1; # Check that we have a string of 128 bits in the first operand |
178
|
1
|
|
|
|
|
3
|
require128 $xmm2; # Check that we have a string of 128 bits in the second operand |
179
|
1
|
|
|
|
|
6
|
requireSameLength $xmm1, $xmm2; # Check operands have the same length |
180
|
1
|
|
|
|
|
1
|
my $N = 16; # Bytes in operation |
181
|
1
|
|
|
|
|
3
|
my $xmm3 = zBytes $N; |
182
|
1
|
|
|
|
|
12
|
for(0..$N-1) |
183
|
16
|
100
|
|
|
|
42
|
{substr($xmm3, $_*8, 8) = substr($xmm1, $_*8, 8) eq substr($xmm2, $_*8, 8) ? |
184
|
|
|
|
|
|
|
byte(255) : byte(0); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
$xmm3 |
187
|
1
|
|
|
|
|
18
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub vpcmpeq($$$;$) #P Packed CoMPare EQual Byte|word|double|quad with optional masking |
190
|
5
|
|
|
5
|
1
|
12
|
{my ($size, $k2, $xmm1, $xmm2) = @_; # Size in bits: 8|16|32|64 of each element, optional input mask, bytes, bytes |
191
|
|
|
|
|
|
|
|
192
|
5
|
50
|
|
|
|
13
|
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right |
193
|
5
|
100
|
|
|
|
16
|
require64or128or245or512 $k2 if defined $k2; # Optional mask |
194
|
5
|
|
|
|
|
15
|
require128or245or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand |
195
|
|
|
|
|
|
|
|
196
|
5
|
|
|
|
|
10
|
my $N = length($xmm1) / $size; # Bytes|Words|Doubles|Quads in operation |
197
|
5
|
100
|
|
|
|
11
|
if (defined $k2) # Masked operation |
198
|
3
|
|
|
|
|
20
|
{my $k1 = maskRegister; # Result register |
199
|
3
|
100
|
|
|
|
9
|
$k2 = substr($k2, 48) if $N == 16; # Relevant portion of register |
200
|
3
|
100
|
|
|
|
17
|
$k2 = substr($k2, 32) if $N == 32; |
201
|
3
|
|
|
|
|
13
|
for(0..$N-1) |
202
|
112
|
100
|
|
|
|
189
|
{next unless substr($k2, $_, 1) eq '1'; |
203
|
16
|
|
|
|
|
24
|
my $o = $_ * $size; |
204
|
16
|
100
|
|
|
|
37
|
substr($k1, $_, 1) = substr($xmm1, $o, $size) eq |
205
|
|
|
|
|
|
|
substr($xmm2, $o, $size) ? '1' : '0'; |
206
|
|
|
|
|
|
|
} |
207
|
3
|
100
|
|
|
|
16
|
return zBytes(6).substr($k1, 0, 16) if $N == 16; |
208
|
2
|
100
|
|
|
|
6
|
return zBytes(4).substr($k1, 0, 32) if $N == 32; |
209
|
1
|
|
|
|
|
5
|
return $k1 |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
2
|
|
|
|
|
5
|
my $xmm3 = zBytes $N; # Non masked operation |
213
|
2
|
|
|
|
|
5
|
my $clear = '0' x $size; |
214
|
2
|
|
|
|
|
7
|
my $set = '1' x $size; |
215
|
2
|
|
|
|
|
8
|
for(0..$N-1) |
216
|
48
|
|
|
|
|
60
|
{my $o = $_ * $size; |
217
|
48
|
100
|
|
|
|
94
|
substr($xmm3, $o, $size) = substr($xmm1, $o, $size) eq |
218
|
|
|
|
|
|
|
substr($xmm2, $o, $size) ? $set : $clear |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
$xmm3 |
221
|
2
|
|
|
|
|
9
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub VPCMPEQB($$;$) # Packed CoMPare EQual Byte with optional masking |
224
|
5
|
100
|
|
5
|
1
|
26
|
{my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes |
225
|
5
|
|
|
|
|
13
|
vpcmpeq(8, $k2, $xmm1, $xmm2) |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub VPCMPEQW($$;$) # Packed CoMPare EQual Byte with optional masking |
229
|
0
|
0
|
|
0
|
1
|
0
|
{my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes |
230
|
0
|
|
|
|
|
0
|
vpcmpeq(16, $k2, $xmm1, $xmm2) |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub vpcmpu($$$$$) # Packed CoMPare Unsigned Byte |
234
|
28
|
|
|
28
|
1
|
59
|
{my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code |
235
|
|
|
|
|
|
|
|
236
|
28
|
50
|
|
|
|
68
|
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right |
237
|
28
|
|
|
|
|
77
|
require64 $k2; # Mask |
238
|
28
|
|
|
|
|
68
|
require128or245or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand |
239
|
28
|
50
|
|
|
|
104
|
confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $T = # String tests |
242
|
33
|
100
|
|
33
|
|
106
|
[sub {return 1 if $_[0] eq $_[1]; 0}, # eq 0 |
|
19
|
|
|
|
|
51
|
|
243
|
25
|
100
|
|
25
|
|
69
|
sub {return 1 if $_[0] lt $_[1]; 0}, # lt 1 |
|
18
|
|
|
|
|
42
|
|
244
|
25
|
100
|
|
25
|
|
79
|
sub {return 1 if $_[0] le $_[1]; 0}, # le 2 |
|
10
|
|
|
|
|
27
|
|
245
|
|
|
|
|
|
|
undef, |
246
|
33
|
100
|
|
33
|
|
102
|
sub {return 1 if $_[0] ne $_[1]; 0}, # ne 4 |
|
14
|
|
|
|
|
44
|
|
247
|
25
|
100
|
|
25
|
|
83
|
sub {return 1 if $_[0] ge $_[1]; 0}, # ge 5 |
|
7
|
|
|
|
|
19
|
|
248
|
25
|
100
|
|
25
|
|
73
|
sub {return 1 if $_[0] gt $_[1]; 0}, # gt 6 |
|
15
|
|
|
|
|
39
|
|
249
|
28
|
|
|
|
|
215
|
]; |
250
|
|
|
|
|
|
|
|
251
|
28
|
|
|
|
|
68
|
my $N = length($xmm1) / $size; # Number of elements |
252
|
28
|
|
|
|
|
52
|
my $k1 = maskRegister; |
253
|
28
|
|
|
|
|
75
|
$k2 = substr($k2, -$N); # Relevant portion of mask |
254
|
28
|
|
|
|
|
74
|
for(0..$N-1) |
255
|
816
|
100
|
|
|
|
1406
|
{next unless substr($k2, $_, 1) eq '1'; # Mask |
256
|
166
|
|
|
|
|
216
|
my $o = $_ * $size; |
257
|
166
|
100
|
|
|
|
279
|
substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code |
|
166
|
|
|
|
|
265
|
|
258
|
|
|
|
|
|
|
substr($xmm2, $o, $size)) ? '1' : '0'; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
28
|
|
|
|
|
48
|
substr(zBytes(8).substr($k1, 0, $N), -64) |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub VPCMPUB($$$$) # Packed CoMPare Unsigned Byte |
265
|
10
|
|
|
10
|
1
|
27
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code |
266
|
10
|
|
|
|
|
34
|
vpcmpu 8, $k2, $xmm1, $xmm2, $op |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub VPCMPUW($$$$) # Packed CoMPare Unsigned Word |
270
|
6
|
|
|
6
|
1
|
18
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code |
271
|
6
|
|
|
|
|
14
|
vpcmpu 16, $k2, $xmm1, $xmm2, $op |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub VPCMPUD($$$$) # Packed CoMPare Unsigned Dword |
275
|
6
|
|
|
6
|
1
|
16
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code |
276
|
6
|
|
|
|
|
15
|
vpcmpu 32, $k2, $xmm1, $xmm2, $op |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub VPCMPUQ($$$$) # Packed CoMPare Unsigned Qword |
280
|
6
|
|
|
6
|
1
|
16
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code |
281
|
6
|
|
|
|
|
13
|
vpcmpu 64, $k2, $xmm1, $xmm2, $op |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#D0 |
285
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
286
|
|
|
|
|
|
|
# Export |
287
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
288
|
|
|
|
|
|
|
|
289
|
1
|
|
|
1
|
|
8
|
use Exporter qw(import); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
67
|
|
290
|
|
|
|
|
|
|
|
291
|
1
|
|
|
1
|
|
7
|
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
450
|
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
294
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
295
|
|
|
|
|
|
|
); |
296
|
|
|
|
|
|
|
%EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]); |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# podDocumentation |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=pod |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=encoding utf-8 |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 Name |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Simd::Avx512 - Emulate SIMD instructions |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head1 Synopsis |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Help needed please! |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=head1 Description |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Emulate SIMD instructions |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Version 20210122. |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
The following sections describe the methods in each functional area of this |
321
|
|
|
|
|
|
|
module. For an alphabetic listing of all methods by name see L. |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 Instructions |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Emulation of Avx512 instructions |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 PSLLDQ($xmm1, $imm8) |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Packed Shift Left Logical DoubleQword |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Parameter Description |
334
|
|
|
|
|
|
|
1 $xmm1 Bytes |
335
|
|
|
|
|
|
|
2 $imm8 Length of shift |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
B |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
is_deeply PSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
344
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
345
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
346
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
347
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
348
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
349
|
|
|
|
|
|
|
,2), |
350
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 VPSLLDQ($xmm1, $imm8) |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Packed Shift Left Logical DoubleQword |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Parameter Description |
358
|
|
|
|
|
|
|
1 $xmm1 Bytes |
359
|
|
|
|
|
|
|
2 $imm8 Length of shift |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
B |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
368
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
369
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
370
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
371
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
372
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
373
|
|
|
|
|
|
|
,2), |
374
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
380
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
381
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
382
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
383
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
384
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
385
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
386
|
|
|
|
|
|
|
,2), |
387
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
388
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
394
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
395
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
396
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
397
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
398
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
399
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
400
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
401
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
402
|
|
|
|
|
|
|
,2), |
403
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
404
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
405
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
406
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 PSRLDQ($xmm1, $imm8) |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Packed Shift Right Logical DoubleQword |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Parameter Description |
414
|
|
|
|
|
|
|
1 $xmm1 Bytes |
415
|
|
|
|
|
|
|
2 $imm8 Length of shift |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
B |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
is_deeply PSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
424
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
425
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
426
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
427
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
428
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
429
|
|
|
|
|
|
|
,2), |
430
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 VPSRLDQ($xmm1, $imm8) |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Packed Shift Right Logical DoubleQword |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Parameter Description |
438
|
|
|
|
|
|
|
1 $xmm1 Bytes |
439
|
|
|
|
|
|
|
2 $imm8 Length of shift |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
B |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
448
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
449
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
450
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
451
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
452
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
453
|
|
|
|
|
|
|
,2), |
454
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
460
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
461
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
462
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
463
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
464
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
465
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
466
|
|
|
|
|
|
|
,2), |
467
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
468
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
474
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
475
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
476
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
477
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
478
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
479
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
480
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
481
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
482
|
|
|
|
|
|
|
,2), |
483
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
484
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
485
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
486
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head2 PCMPEQB($xmm1, $xmm2) |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
Packed CoMPare EQual Byte |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Parameter Description |
494
|
|
|
|
|
|
|
1 $xmm1 Bytes |
495
|
|
|
|
|
|
|
2 $xmm2 Bytes |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
B |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
is_deeply PCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
504
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
505
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
506
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
507
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
508
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
509
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
510
|
|
|
|
|
|
|
), |
511
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 VPCMPEQB($k2, $xmm1, $xmm2) |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Packed CoMPare EQual Byte with optional masking |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Parameter Description |
519
|
|
|
|
|
|
|
1 $k2 Optional input mask |
520
|
|
|
|
|
|
|
2 $xmm1 Bytes |
521
|
|
|
|
|
|
|
3 $xmm2 Bytes |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
B |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
530
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
531
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
532
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
533
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
534
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
535
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
536
|
|
|
|
|
|
|
), |
537
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
543
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
544
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
545
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
546
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
547
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
548
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
549
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
550
|
|
|
|
|
|
|
.'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
551
|
|
|
|
|
|
|
), |
552
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
553
|
|
|
|
|
|
|
.'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
559
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
560
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
561
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
562
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
563
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
564
|
|
|
|
|
|
|
'00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
565
|
|
|
|
|
|
|
'10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
566
|
|
|
|
|
|
|
), |
567
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 256 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
573
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
574
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
575
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
576
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
577
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
578
|
|
|
|
|
|
|
'0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
579
|
|
|
|
|
|
|
'1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
580
|
|
|
|
|
|
|
), |
581
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
587
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
588
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
589
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
590
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
591
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
592
|
|
|
|
|
|
|
'00000000110000001000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
593
|
|
|
|
|
|
|
'00000000110000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
594
|
|
|
|
|
|
|
), |
595
|
|
|
|
|
|
|
'1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head2 VPCMPEQW($k2, $xmm1, $xmm2) |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Packed CoMPare EQual Byte with optional masking |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
Parameter Description |
603
|
|
|
|
|
|
|
1 $k2 Optional input mask |
604
|
|
|
|
|
|
|
2 $xmm1 Bytes |
605
|
|
|
|
|
|
|
3 $xmm2 Bytes |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head2 vpcmpu($size, $k2, $xmm1, $xmm2, $op) |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Packed CoMPare Unsigned Byte |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
Parameter Description |
612
|
|
|
|
|
|
|
1 $size Size of element in bits |
613
|
|
|
|
|
|
|
2 $k2 Input mask |
614
|
|
|
|
|
|
|
3 $xmm1 Bytes |
615
|
|
|
|
|
|
|
4 $xmm2 Bytes |
616
|
|
|
|
|
|
|
5 $op Test code |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head2 VPCMPUB($k2, $xmm1, $xmm2, $op) |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Packed CoMPare Unsigned Byte |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
Parameter Description |
623
|
|
|
|
|
|
|
1 $k2 Input mask |
624
|
|
|
|
|
|
|
2 $xmm1 Bytes |
625
|
|
|
|
|
|
|
3 $xmm2 Bytes |
626
|
|
|
|
|
|
|
4 $op Test code |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
B |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
my ($mi, $mo, $o1, $o2) = ( # 128 |
632
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
633
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
634
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
635
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
636
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
637
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
638
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
639
|
|
|
|
|
|
|
'00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
640
|
|
|
|
|
|
|
'10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
641
|
|
|
|
|
|
|
); |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
my ($mi, $mo, $o1, $o2) = ( # 256 |
651
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
652
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
653
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
654
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
655
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
656
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
657
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
658
|
|
|
|
|
|
|
'0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
659
|
|
|
|
|
|
|
'1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
660
|
|
|
|
|
|
|
); |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
669
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
670
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
671
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
672
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
673
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
674
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
675
|
|
|
|
|
|
|
'1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
676
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
677
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
678
|
|
|
|
|
|
|
'00000000110000001000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
679
|
|
|
|
|
|
|
'00000000110000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
680
|
|
|
|
|
|
|
); |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 VPCMPUW($k2, $xmm1, $xmm2, $op) |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Packed CoMPare Unsigned Word |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Parameter Description |
706
|
|
|
|
|
|
|
1 $k2 Input mask |
707
|
|
|
|
|
|
|
2 $xmm1 Words |
708
|
|
|
|
|
|
|
3 $xmm2 Words |
709
|
|
|
|
|
|
|
4 $op Test code |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
B |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
715
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
716
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
717
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
718
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
719
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
720
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
721
|
|
|
|
|
|
|
'1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
722
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
723
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
724
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
725
|
|
|
|
|
|
|
'00000000110000000000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
726
|
|
|
|
|
|
|
); |
727
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
728
|
|
|
|
|
|
|
{$$i = zBytes(4).$$i; |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 VPCMPUD($k2, $xmm1, $xmm2, $op) |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Packed CoMPare Unsigned Dword |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Parameter Description |
755
|
|
|
|
|
|
|
1 $k2 Input mask |
756
|
|
|
|
|
|
|
2 $xmm1 Dwords |
757
|
|
|
|
|
|
|
3 $xmm2 Dwords |
758
|
|
|
|
|
|
|
4 $op Test code |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
B |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
764
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
765
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
766
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
767
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
768
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
769
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
770
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
771
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
772
|
|
|
|
|
|
|
'1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
773
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000', |
774
|
|
|
|
|
|
|
'00000000110000000000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000', |
775
|
|
|
|
|
|
|
); |
776
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
777
|
|
|
|
|
|
|
{$$i = zBytes(6).$$i; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 VPCMPUQ($k2, $xmm1, $xmm2, $op) |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Packed CoMPare Unsigned Qword |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Parameter Description |
804
|
|
|
|
|
|
|
1 $k2 Input mask |
805
|
|
|
|
|
|
|
2 $xmm1 Qwords |
806
|
|
|
|
|
|
|
3 $xmm2 Qwords |
807
|
|
|
|
|
|
|
4 $op Test code |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
B |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
813
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
814
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
815
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
816
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
817
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
818
|
|
|
|
|
|
|
'1'. '1'. '1'. '1'. '1'. '0'. '0'. '1', |
819
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0', |
820
|
|
|
|
|
|
|
'1'. '0'. '1'. '1'. '0'. '0'. '0'. '0', |
821
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '1'. '0'. '0'. '1', |
822
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000', |
823
|
|
|
|
|
|
|
'00000000110000001000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000', |
824
|
|
|
|
|
|
|
); |
825
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
826
|
|
|
|
|
|
|
{$$i = zBytes(7).$$i; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
=head1 Private Methods |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=head2 vpcmpeq($size, $k2, $xmm1, $xmm2) |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Packed CoMPare EQual Byte|word|double|quad with optional masking |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
Parameter Description |
856
|
|
|
|
|
|
|
1 $size Size in bits: 8|16|32|64 of each element |
857
|
|
|
|
|
|
|
2 $k2 Optional input mask |
858
|
|
|
|
|
|
|
3 $xmm1 Bytes |
859
|
|
|
|
|
|
|
4 $xmm2 Bytes |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
=head1 Index |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
1 L - Packed CoMPare EQual Byte |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
2 L - Packed Shift Left Logical DoubleQword |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
3 L - Packed Shift Right Logical DoubleQword |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
4 L - Packed CoMPare EQual Byte|word|double|quad with optional masking |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
5 L - Packed CoMPare EQual Byte with optional masking |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
6 L - Packed CoMPare EQual Byte with optional masking |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
7 L - Packed CoMPare Unsigned Byte |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
8 L - Packed CoMPare Unsigned Byte |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
9 L - Packed CoMPare Unsigned Dword |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
10 L - Packed CoMPare Unsigned Qword |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
11 L - Packed CoMPare Unsigned Word |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
12 L - Packed Shift Left Logical DoubleQword |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
13 L - Packed Shift Right Logical DoubleQword |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
=head1 Installation |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
This module is written in 100% Pure Perl and, thus, it is easy to read, |
894
|
|
|
|
|
|
|
comprehend, use, modify and install via B: |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
sudo cpan install Simd::Avx512 |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
=head1 Author |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
L |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
L |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=head1 Copyright |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
Copyright (c) 2016-2019 Philip R Brenan. |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
This module is free software. It may be used, redistributed and/or modified |
909
|
|
|
|
|
|
|
under the same terms as Perl itself. |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
# Tests and documentation |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub test |
918
|
1
|
|
|
1
|
0
|
6
|
{my $p = __PACKAGE__; |
919
|
1
|
|
|
|
|
8
|
binmode($_, ":utf8") for *STDOUT, *STDERR; |
920
|
1
|
50
|
|
|
|
68
|
return if eval "eof(${p}::DATA)"; |
921
|
1
|
|
|
|
|
75
|
my $s = eval "join('', <${p}::DATA>)"; |
922
|
1
|
50
|
|
|
|
13
|
$@ and die $@; |
923
|
1
|
|
|
1
|
|
6
|
eval $s; |
|
1
|
|
|
1
|
|
14
|
|
|
1
|
|
|
1
|
|
42
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
|
1
|
|
|
|
|
539
|
|
|
1
|
|
|
|
|
1294
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
853
|
|
|
1
|
|
|
|
|
68292
|
|
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
126
|
|
924
|
1
|
50
|
|
|
|
833
|
$@ and die $@; |
925
|
1
|
|
|
|
|
146
|
1 |
926
|
|
|
|
|
|
|
} |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
test unless caller; |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
1; |
931
|
|
|
|
|
|
|
# podDocumentation |
932
|
|
|
|
|
|
|
__DATA__ |