line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
This file is part of GCC. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
GCC is free software; you can redistribute it and/or modify |
6
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
7
|
|
|
|
|
|
|
the Free Software Foundation; either version 3, or (at your option) |
8
|
|
|
|
|
|
|
any later version. |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
GCC is distributed in the hope that it will be useful, |
11
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
|
|
|
|
GNU General Public License for more details. |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional |
16
|
|
|
|
|
|
|
permissions described in the GCC Runtime Library Exception, version |
17
|
|
|
|
|
|
|
3.1, as published by the Free Software Foundation. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License and |
20
|
|
|
|
|
|
|
a copy of the GCC Runtime Library Exception along with this program; |
21
|
|
|
|
|
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
22
|
|
|
|
|
|
|
. */ |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#ifndef _IMMINTRIN_H_INCLUDED |
25
|
|
|
|
|
|
|
#error "Never use directly; include instead." |
26
|
|
|
|
|
|
|
#endif |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#ifndef _AVX512FINTRIN_H_INCLUDED |
29
|
|
|
|
|
|
|
#define _AVX512FINTRIN_H_INCLUDED |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
#ifndef __AVX512F__ |
32
|
|
|
|
|
|
|
#pragma GCC push_options |
33
|
|
|
|
|
|
|
#pragma GCC target("avx512f") |
34
|
|
|
|
|
|
|
#define __DISABLE_AVX512F__ |
35
|
|
|
|
|
|
|
#endif /* __AVX512F__ */ |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
/* Internal data types for implementing the intrinsics. */ |
38
|
|
|
|
|
|
|
typedef double __v8df __attribute__ ((__vector_size__ (64))); |
39
|
|
|
|
|
|
|
typedef float __v16sf __attribute__ ((__vector_size__ (64))); |
40
|
|
|
|
|
|
|
typedef long long __v8di __attribute__ ((__vector_size__ (64))); |
41
|
|
|
|
|
|
|
typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); |
42
|
|
|
|
|
|
|
typedef int __v16si __attribute__ ((__vector_size__ (64))); |
43
|
|
|
|
|
|
|
typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); |
44
|
|
|
|
|
|
|
typedef short __v32hi __attribute__ ((__vector_size__ (64))); |
45
|
|
|
|
|
|
|
typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); |
46
|
|
|
|
|
|
|
typedef char __v64qi __attribute__ ((__vector_size__ (64))); |
47
|
|
|
|
|
|
|
typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
/* The Intel API is flexible enough that we must allow aliasing with other |
50
|
|
|
|
|
|
|
vector types, and their scalar components. */ |
51
|
|
|
|
|
|
|
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); |
52
|
|
|
|
|
|
|
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); |
53
|
|
|
|
|
|
|
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
typedef unsigned char __mmask8; |
56
|
|
|
|
|
|
|
typedef unsigned short __mmask16; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
extern __inline __m512i |
59
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
60
|
|
|
|
|
|
|
_mm512_set_epi64 (long long __A, long long __B, long long __C, |
61
|
|
|
|
|
|
|
long long __D, long long __E, long long __F, |
62
|
|
|
|
|
|
|
long long __G, long long __H) |
63
|
|
|
|
|
|
|
{ |
64
|
|
|
|
|
|
|
return __extension__ (__m512i) (__v8di) |
65
|
|
|
|
|
|
|
{ __H, __G, __F, __E, __D, __C, __B, __A }; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
/* Create the vector [A B C D E F G H I J K L M N O P]. */ |
69
|
|
|
|
|
|
|
extern __inline __m512i |
70
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
71
|
|
|
|
|
|
|
_mm512_set_epi32 (int __A, int __B, int __C, int __D, |
72
|
|
|
|
|
|
|
int __E, int __F, int __G, int __H, |
73
|
|
|
|
|
|
|
int __I, int __J, int __K, int __L, |
74
|
|
|
|
|
|
|
int __M, int __N, int __O, int __P) |
75
|
|
|
|
|
|
|
{ |
76
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v16si) |
77
|
|
|
|
|
|
|
{ __P, __O, __N, __M, __L, __K, __J, __I, |
78
|
|
|
|
|
|
|
__H, __G, __F, __E, __D, __C, __B, __A }; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
extern __inline __m512d |
82
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
83
|
|
|
|
|
|
|
_mm512_set_pd (double __A, double __B, double __C, double __D, |
84
|
|
|
|
|
|
|
double __E, double __F, double __G, double __H) |
85
|
|
|
|
|
|
|
{ |
86
|
|
|
|
|
|
|
return __extension__ (__m512d) |
87
|
|
|
|
|
|
|
{ __H, __G, __F, __E, __D, __C, __B, __A }; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
extern __inline __m512 |
91
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
92
|
|
|
|
|
|
|
_mm512_set_ps (float __A, float __B, float __C, float __D, |
93
|
|
|
|
|
|
|
float __E, float __F, float __G, float __H, |
94
|
|
|
|
|
|
|
float __I, float __J, float __K, float __L, |
95
|
|
|
|
|
|
|
float __M, float __N, float __O, float __P) |
96
|
|
|
|
|
|
|
{ |
97
|
|
|
|
|
|
|
return __extension__ (__m512) |
98
|
|
|
|
|
|
|
{ __P, __O, __N, __M, __L, __K, __J, __I, |
99
|
|
|
|
|
|
|
__H, __G, __F, __E, __D, __C, __B, __A }; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ |
103
|
|
|
|
|
|
|
_mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ |
106
|
|
|
|
|
|
|
e8,e9,e10,e11,e12,e13,e14,e15) \ |
107
|
|
|
|
|
|
|
_mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ |
110
|
|
|
|
|
|
|
_mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ |
113
|
|
|
|
|
|
|
_mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
extern __inline __m512 |
116
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
117
|
|
|
|
|
|
|
_mm512_undefined_ps (void) |
118
|
|
|
|
|
|
|
{ |
119
|
|
|
|
|
|
|
__m512 __Y = __Y; |
120
|
|
|
|
|
|
|
return __Y; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
extern __inline __m512d |
124
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
125
|
|
|
|
|
|
|
_mm512_undefined_pd (void) |
126
|
|
|
|
|
|
|
{ |
127
|
|
|
|
|
|
|
__m512d __Y = __Y; |
128
|
|
|
|
|
|
|
return __Y; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
extern __inline __m512i |
132
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
133
|
|
|
|
|
|
|
_mm512_undefined_si512 (void) |
134
|
|
|
|
|
|
|
{ |
135
|
|
|
|
|
|
|
__m512i __Y = __Y; |
136
|
|
|
|
|
|
|
return __Y; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
extern __inline __m512i |
140
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
141
|
|
|
|
|
|
|
_mm512_set1_epi8 (char __A) |
142
|
|
|
|
|
|
|
{ |
143
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v64qi) |
144
|
|
|
|
|
|
|
{ __A, __A, __A, __A, __A, __A, __A, __A, |
145
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
146
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
147
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
148
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
149
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
150
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
151
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A }; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
extern __inline __m512i |
155
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
156
|
|
|
|
|
|
|
_mm512_set1_epi16 (short __A) |
157
|
|
|
|
|
|
|
{ |
158
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v32hi) |
159
|
|
|
|
|
|
|
{ __A, __A, __A, __A, __A, __A, __A, __A, |
160
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
161
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A, |
162
|
|
|
|
|
|
|
__A, __A, __A, __A, __A, __A, __A, __A }; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
extern __inline __m512d |
166
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
167
|
|
|
|
|
|
|
_mm512_set1_pd (double __A) |
168
|
|
|
|
|
|
|
{ |
169
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ |
170
|
|
|
|
|
|
|
(__v2df) { __A, }, |
171
|
|
|
|
|
|
|
(__v8df) |
172
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
173
|
|
|
|
|
|
|
(__mmask8) -1); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
extern __inline __m512 |
177
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
178
|
|
|
|
|
|
|
_mm512_set1_ps (float __A) |
179
|
|
|
|
|
|
|
{ |
180
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastss512 (__extension__ |
181
|
|
|
|
|
|
|
(__v4sf) { __A, }, |
182
|
|
|
|
|
|
|
(__v16sf) |
183
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
184
|
|
|
|
|
|
|
(__mmask16) -1); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
/* Create the vector [A B C D A B C D A B C D A B C D]. */ |
188
|
|
|
|
|
|
|
extern __inline __m512i |
189
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
190
|
|
|
|
|
|
|
_mm512_set4_epi32 (int __A, int __B, int __C, int __D) |
191
|
|
|
|
|
|
|
{ |
192
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v16si) |
193
|
|
|
|
|
|
|
{ __D, __C, __B, __A, __D, __C, __B, __A, |
194
|
|
|
|
|
|
|
__D, __C, __B, __A, __D, __C, __B, __A }; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
extern __inline __m512i |
198
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
199
|
|
|
|
|
|
|
_mm512_set4_epi64 (long long __A, long long __B, long long __C, |
200
|
|
|
|
|
|
|
long long __D) |
201
|
|
|
|
|
|
|
{ |
202
|
|
|
|
|
|
|
return __extension__ (__m512i) (__v8di) |
203
|
|
|
|
|
|
|
{ __D, __C, __B, __A, __D, __C, __B, __A }; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
extern __inline __m512d |
207
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
208
|
|
|
|
|
|
|
_mm512_set4_pd (double __A, double __B, double __C, double __D) |
209
|
|
|
|
|
|
|
{ |
210
|
|
|
|
|
|
|
return __extension__ (__m512d) |
211
|
|
|
|
|
|
|
{ __D, __C, __B, __A, __D, __C, __B, __A }; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
extern __inline __m512 |
215
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
216
|
|
|
|
|
|
|
_mm512_set4_ps (float __A, float __B, float __C, float __D) |
217
|
|
|
|
|
|
|
{ |
218
|
|
|
|
|
|
|
return __extension__ (__m512) |
219
|
|
|
|
|
|
|
{ __D, __C, __B, __A, __D, __C, __B, __A, |
220
|
|
|
|
|
|
|
__D, __C, __B, __A, __D, __C, __B, __A }; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
#define _mm512_setr4_epi64(e0,e1,e2,e3) \ |
224
|
|
|
|
|
|
|
_mm512_set4_epi64(e3,e2,e1,e0) |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
#define _mm512_setr4_epi32(e0,e1,e2,e3) \ |
227
|
|
|
|
|
|
|
_mm512_set4_epi32(e3,e2,e1,e0) |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
#define _mm512_setr4_pd(e0,e1,e2,e3) \ |
230
|
|
|
|
|
|
|
_mm512_set4_pd(e3,e2,e1,e0) |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
#define _mm512_setr4_ps(e0,e1,e2,e3) \ |
233
|
|
|
|
|
|
|
_mm512_set4_ps(e3,e2,e1,e0) |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
extern __inline __m512 |
236
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
237
|
|
|
|
|
|
|
_mm512_setzero_ps (void) |
238
|
|
|
|
|
|
|
{ |
239
|
|
|
|
|
|
|
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, |
240
|
|
|
|
|
|
|
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
extern __inline __m512d |
244
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
245
|
|
|
|
|
|
|
_mm512_setzero_pd (void) |
246
|
|
|
|
|
|
|
{ |
247
|
|
|
|
|
|
|
return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
extern __inline __m512i |
251
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
252
|
|
|
|
|
|
|
_mm512_setzero_epi32 (void) |
253
|
|
|
|
|
|
|
{ |
254
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
extern __inline __m512i |
258
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
259
|
|
|
|
|
|
|
_mm512_setzero_si512 (void) |
260
|
|
|
|
|
|
|
{ |
261
|
|
|
|
|
|
|
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
extern __inline __m512d |
265
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
266
|
|
|
|
|
|
|
_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) |
267
|
|
|
|
|
|
|
{ |
268
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
269
|
|
|
|
|
|
|
(__v8df) __W, |
270
|
|
|
|
|
|
|
(__mmask8) __U); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
extern __inline __m512d |
274
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
275
|
|
|
|
|
|
|
_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) |
276
|
|
|
|
|
|
|
{ |
277
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
278
|
|
|
|
|
|
|
(__v8df) |
279
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
280
|
|
|
|
|
|
|
(__mmask8) __U); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
extern __inline __m512 |
284
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
285
|
|
|
|
|
|
|
_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) |
286
|
|
|
|
|
|
|
{ |
287
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
288
|
|
|
|
|
|
|
(__v16sf) __W, |
289
|
|
|
|
|
|
|
(__mmask16) __U); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
extern __inline __m512 |
293
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
294
|
|
|
|
|
|
|
_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) |
295
|
|
|
|
|
|
|
{ |
296
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
297
|
|
|
|
|
|
|
(__v16sf) |
298
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
299
|
|
|
|
|
|
|
(__mmask16) __U); |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
extern __inline __m512d |
303
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
304
|
|
|
|
|
|
|
_mm512_load_pd (void const *__P) |
305
|
|
|
|
|
|
|
{ |
306
|
|
|
|
|
|
|
return *(__m512d *) __P; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
extern __inline __m512d |
310
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
311
|
|
|
|
|
|
|
_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) |
312
|
|
|
|
|
|
|
{ |
313
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
314
|
|
|
|
|
|
|
(__v8df) __W, |
315
|
|
|
|
|
|
|
(__mmask8) __U); |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
extern __inline __m512d |
319
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
320
|
|
|
|
|
|
|
_mm512_maskz_load_pd (__mmask8 __U, void const *__P) |
321
|
|
|
|
|
|
|
{ |
322
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
323
|
|
|
|
|
|
|
(__v8df) |
324
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
325
|
|
|
|
|
|
|
(__mmask8) __U); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
extern __inline void |
329
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
330
|
|
|
|
|
|
|
_mm512_store_pd (void *__P, __m512d __A) |
331
|
|
|
|
|
|
|
{ |
332
|
|
|
|
|
|
|
*(__m512d *) __P = __A; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
extern __inline void |
336
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
337
|
|
|
|
|
|
|
_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) |
338
|
|
|
|
|
|
|
{ |
339
|
|
|
|
|
|
|
__builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, |
340
|
|
|
|
|
|
|
(__mmask8) __U); |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
extern __inline __m512 |
344
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
345
|
|
|
|
|
|
|
_mm512_load_ps (void const *__P) |
346
|
|
|
|
|
|
|
{ |
347
|
|
|
|
|
|
|
return *(__m512 *) __P; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
extern __inline __m512 |
351
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
352
|
|
|
|
|
|
|
_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) |
353
|
|
|
|
|
|
|
{ |
354
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
355
|
|
|
|
|
|
|
(__v16sf) __W, |
356
|
|
|
|
|
|
|
(__mmask16) __U); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
extern __inline __m512 |
360
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
361
|
|
|
|
|
|
|
_mm512_maskz_load_ps (__mmask16 __U, void const *__P) |
362
|
|
|
|
|
|
|
{ |
363
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
364
|
|
|
|
|
|
|
(__v16sf) |
365
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
366
|
|
|
|
|
|
|
(__mmask16) __U); |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
extern __inline void |
370
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
371
|
|
|
|
|
|
|
_mm512_store_ps (void *__P, __m512 __A) |
372
|
|
|
|
|
|
|
{ |
373
|
|
|
|
|
|
|
*(__m512 *) __P = __A; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
extern __inline void |
377
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
378
|
|
|
|
|
|
|
_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) |
379
|
|
|
|
|
|
|
{ |
380
|
|
|
|
|
|
|
__builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, |
381
|
|
|
|
|
|
|
(__mmask16) __U); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
extern __inline __m512i |
385
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
386
|
|
|
|
|
|
|
_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
387
|
|
|
|
|
|
|
{ |
388
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
389
|
|
|
|
|
|
|
(__v8di) __W, |
390
|
|
|
|
|
|
|
(__mmask8) __U); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
extern __inline __m512i |
394
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
395
|
|
|
|
|
|
|
_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) |
396
|
|
|
|
|
|
|
{ |
397
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
398
|
|
|
|
|
|
|
(__v8di) |
399
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
400
|
|
|
|
|
|
|
(__mmask8) __U); |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
extern __inline __m512i |
404
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
405
|
|
|
|
|
|
|
_mm512_load_epi64 (void const *__P) |
406
|
|
|
|
|
|
|
{ |
407
|
|
|
|
|
|
|
return *(__m512i *) __P; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
extern __inline __m512i |
411
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
412
|
|
|
|
|
|
|
_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
413
|
|
|
|
|
|
|
{ |
414
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
415
|
|
|
|
|
|
|
(__v8di) __W, |
416
|
|
|
|
|
|
|
(__mmask8) __U); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
extern __inline __m512i |
420
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
421
|
|
|
|
|
|
|
_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) |
422
|
|
|
|
|
|
|
{ |
423
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
424
|
|
|
|
|
|
|
(__v8di) |
425
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
426
|
|
|
|
|
|
|
(__mmask8) __U); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
extern __inline void |
430
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
431
|
|
|
|
|
|
|
_mm512_store_epi64 (void *__P, __m512i __A) |
432
|
|
|
|
|
|
|
{ |
433
|
|
|
|
|
|
|
*(__m512i *) __P = __A; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
extern __inline void |
437
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
438
|
|
|
|
|
|
|
_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) |
439
|
|
|
|
|
|
|
{ |
440
|
|
|
|
|
|
|
__builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, |
441
|
|
|
|
|
|
|
(__mmask8) __U); |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
extern __inline __m512i |
445
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
446
|
|
|
|
|
|
|
_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
447
|
|
|
|
|
|
|
{ |
448
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
449
|
|
|
|
|
|
|
(__v16si) __W, |
450
|
|
|
|
|
|
|
(__mmask16) __U); |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
extern __inline __m512i |
454
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
455
|
|
|
|
|
|
|
_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) |
456
|
|
|
|
|
|
|
{ |
457
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
458
|
|
|
|
|
|
|
(__v16si) |
459
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
460
|
|
|
|
|
|
|
(__mmask16) __U); |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
extern __inline __m512i |
464
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
465
|
|
|
|
|
|
|
_mm512_load_si512 (void const *__P) |
466
|
|
|
|
|
|
|
{ |
467
|
|
|
|
|
|
|
return *(__m512i *) __P; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
extern __inline __m512i |
471
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
472
|
|
|
|
|
|
|
_mm512_load_epi32 (void const *__P) |
473
|
|
|
|
|
|
|
{ |
474
|
|
|
|
|
|
|
return *(__m512i *) __P; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
extern __inline __m512i |
478
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
479
|
|
|
|
|
|
|
_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
480
|
|
|
|
|
|
|
{ |
481
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
482
|
|
|
|
|
|
|
(__v16si) __W, |
483
|
|
|
|
|
|
|
(__mmask16) __U); |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
extern __inline __m512i |
487
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
488
|
|
|
|
|
|
|
_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) |
489
|
|
|
|
|
|
|
{ |
490
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
491
|
|
|
|
|
|
|
(__v16si) |
492
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
493
|
|
|
|
|
|
|
(__mmask16) __U); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
extern __inline void |
497
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
498
|
|
|
|
|
|
|
_mm512_store_si512 (void *__P, __m512i __A) |
499
|
|
|
|
|
|
|
{ |
500
|
|
|
|
|
|
|
*(__m512i *) __P = __A; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
extern __inline void |
504
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
505
|
|
|
|
|
|
|
_mm512_store_epi32 (void *__P, __m512i __A) |
506
|
|
|
|
|
|
|
{ |
507
|
|
|
|
|
|
|
*(__m512i *) __P = __A; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
extern __inline void |
511
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
512
|
|
|
|
|
|
|
_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) |
513
|
|
|
|
|
|
|
{ |
514
|
|
|
|
|
|
|
__builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, |
515
|
|
|
|
|
|
|
(__mmask16) __U); |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
extern __inline __m512i |
519
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
520
|
|
|
|
|
|
|
_mm512_mullo_epi32 (__m512i __A, __m512i __B) |
521
|
|
|
|
|
|
|
{ |
522
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A * (__v16su) __B); |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
extern __inline __m512i |
526
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
527
|
|
|
|
|
|
|
_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
528
|
|
|
|
|
|
|
{ |
529
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
530
|
|
|
|
|
|
|
(__v16si) __B, |
531
|
|
|
|
|
|
|
(__v16si) |
532
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
533
|
|
|
|
|
|
|
__M); |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
extern __inline __m512i |
537
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
538
|
|
|
|
|
|
|
_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
539
|
|
|
|
|
|
|
{ |
540
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
541
|
|
|
|
|
|
|
(__v16si) __B, |
542
|
|
|
|
|
|
|
(__v16si) __W, __M); |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
extern __inline __m512i |
546
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
547
|
|
|
|
|
|
|
_mm512_sllv_epi32 (__m512i __X, __m512i __Y) |
548
|
|
|
|
|
|
|
{ |
549
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
550
|
|
|
|
|
|
|
(__v16si) __Y, |
551
|
|
|
|
|
|
|
(__v16si) |
552
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
553
|
|
|
|
|
|
|
(__mmask16) -1); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
extern __inline __m512i |
557
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
558
|
|
|
|
|
|
|
_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
559
|
|
|
|
|
|
|
{ |
560
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
561
|
|
|
|
|
|
|
(__v16si) __Y, |
562
|
|
|
|
|
|
|
(__v16si) __W, |
563
|
|
|
|
|
|
|
(__mmask16) __U); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
extern __inline __m512i |
567
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
568
|
|
|
|
|
|
|
_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
569
|
|
|
|
|
|
|
{ |
570
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
571
|
|
|
|
|
|
|
(__v16si) __Y, |
572
|
|
|
|
|
|
|
(__v16si) |
573
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
574
|
|
|
|
|
|
|
(__mmask16) __U); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
extern __inline __m512i |
578
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
579
|
|
|
|
|
|
|
_mm512_srav_epi32 (__m512i __X, __m512i __Y) |
580
|
|
|
|
|
|
|
{ |
581
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
582
|
|
|
|
|
|
|
(__v16si) __Y, |
583
|
|
|
|
|
|
|
(__v16si) |
584
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
585
|
|
|
|
|
|
|
(__mmask16) -1); |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
extern __inline __m512i |
589
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
590
|
|
|
|
|
|
|
_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
591
|
|
|
|
|
|
|
{ |
592
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
593
|
|
|
|
|
|
|
(__v16si) __Y, |
594
|
|
|
|
|
|
|
(__v16si) __W, |
595
|
|
|
|
|
|
|
(__mmask16) __U); |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
extern __inline __m512i |
599
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
600
|
|
|
|
|
|
|
_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
601
|
|
|
|
|
|
|
{ |
602
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
603
|
|
|
|
|
|
|
(__v16si) __Y, |
604
|
|
|
|
|
|
|
(__v16si) |
605
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
606
|
|
|
|
|
|
|
(__mmask16) __U); |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
extern __inline __m512i |
610
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
611
|
|
|
|
|
|
|
_mm512_srlv_epi32 (__m512i __X, __m512i __Y) |
612
|
|
|
|
|
|
|
{ |
613
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
614
|
|
|
|
|
|
|
(__v16si) __Y, |
615
|
|
|
|
|
|
|
(__v16si) |
616
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
617
|
|
|
|
|
|
|
(__mmask16) -1); |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
extern __inline __m512i |
621
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
622
|
|
|
|
|
|
|
_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
623
|
|
|
|
|
|
|
{ |
624
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
625
|
|
|
|
|
|
|
(__v16si) __Y, |
626
|
|
|
|
|
|
|
(__v16si) __W, |
627
|
|
|
|
|
|
|
(__mmask16) __U); |
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
extern __inline __m512i |
631
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
632
|
|
|
|
|
|
|
_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
633
|
|
|
|
|
|
|
{ |
634
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
635
|
|
|
|
|
|
|
(__v16si) __Y, |
636
|
|
|
|
|
|
|
(__v16si) |
637
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
638
|
|
|
|
|
|
|
(__mmask16) __U); |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
extern __inline __m512i |
642
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
643
|
|
|
|
|
|
|
_mm512_add_epi64 (__m512i __A, __m512i __B) |
644
|
|
|
|
|
|
|
{ |
645
|
0
|
|
|
|
|
|
return (__m512i) ((__v8du) __A + (__v8du) __B); |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
extern __inline __m512i |
649
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
650
|
|
|
|
|
|
|
_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
651
|
|
|
|
|
|
|
{ |
652
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
653
|
|
|
|
|
|
|
(__v8di) __B, |
654
|
|
|
|
|
|
|
(__v8di) __W, |
655
|
|
|
|
|
|
|
(__mmask8) __U); |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
extern __inline __m512i |
659
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
660
|
|
|
|
|
|
|
_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
661
|
|
|
|
|
|
|
{ |
662
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
663
|
|
|
|
|
|
|
(__v8di) __B, |
664
|
|
|
|
|
|
|
(__v8di) |
665
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
666
|
|
|
|
|
|
|
(__mmask8) __U); |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
extern __inline __m512i |
670
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
671
|
|
|
|
|
|
|
_mm512_sub_epi64 (__m512i __A, __m512i __B) |
672
|
|
|
|
|
|
|
{ |
673
|
|
|
|
|
|
|
return (__m512i) ((__v8du) __A - (__v8du) __B); |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
extern __inline __m512i |
677
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
678
|
|
|
|
|
|
|
_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
679
|
|
|
|
|
|
|
{ |
680
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
681
|
|
|
|
|
|
|
(__v8di) __B, |
682
|
|
|
|
|
|
|
(__v8di) __W, |
683
|
|
|
|
|
|
|
(__mmask8) __U); |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
extern __inline __m512i |
687
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
688
|
|
|
|
|
|
|
_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
689
|
|
|
|
|
|
|
{ |
690
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
691
|
|
|
|
|
|
|
(__v8di) __B, |
692
|
|
|
|
|
|
|
(__v8di) |
693
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
694
|
|
|
|
|
|
|
(__mmask8) __U); |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
extern __inline __m512i |
698
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
699
|
|
|
|
|
|
|
_mm512_sllv_epi64 (__m512i __X, __m512i __Y) |
700
|
|
|
|
|
|
|
{ |
701
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
702
|
|
|
|
|
|
|
(__v8di) __Y, |
703
|
|
|
|
|
|
|
(__v8di) |
704
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
705
|
|
|
|
|
|
|
(__mmask8) -1); |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
extern __inline __m512i |
709
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
710
|
|
|
|
|
|
|
_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
711
|
|
|
|
|
|
|
{ |
712
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
713
|
|
|
|
|
|
|
(__v8di) __Y, |
714
|
|
|
|
|
|
|
(__v8di) __W, |
715
|
|
|
|
|
|
|
(__mmask8) __U); |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
extern __inline __m512i |
719
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
720
|
|
|
|
|
|
|
_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
721
|
|
|
|
|
|
|
{ |
722
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
723
|
|
|
|
|
|
|
(__v8di) __Y, |
724
|
|
|
|
|
|
|
(__v8di) |
725
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
726
|
|
|
|
|
|
|
(__mmask8) __U); |
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
extern __inline __m512i |
730
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
731
|
|
|
|
|
|
|
_mm512_srav_epi64 (__m512i __X, __m512i __Y) |
732
|
|
|
|
|
|
|
{ |
733
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
734
|
|
|
|
|
|
|
(__v8di) __Y, |
735
|
|
|
|
|
|
|
(__v8di) |
736
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
737
|
|
|
|
|
|
|
(__mmask8) -1); |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
extern __inline __m512i |
741
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
742
|
|
|
|
|
|
|
_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
743
|
|
|
|
|
|
|
{ |
744
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
745
|
|
|
|
|
|
|
(__v8di) __Y, |
746
|
|
|
|
|
|
|
(__v8di) __W, |
747
|
|
|
|
|
|
|
(__mmask8) __U); |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
extern __inline __m512i |
751
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
752
|
|
|
|
|
|
|
_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
753
|
|
|
|
|
|
|
{ |
754
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
755
|
|
|
|
|
|
|
(__v8di) __Y, |
756
|
|
|
|
|
|
|
(__v8di) |
757
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
758
|
|
|
|
|
|
|
(__mmask8) __U); |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
extern __inline __m512i |
762
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
763
|
|
|
|
|
|
|
_mm512_srlv_epi64 (__m512i __X, __m512i __Y) |
764
|
|
|
|
|
|
|
{ |
765
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
766
|
|
|
|
|
|
|
(__v8di) __Y, |
767
|
|
|
|
|
|
|
(__v8di) |
768
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
769
|
|
|
|
|
|
|
(__mmask8) -1); |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
extern __inline __m512i |
773
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
774
|
|
|
|
|
|
|
_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
775
|
|
|
|
|
|
|
{ |
776
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
777
|
|
|
|
|
|
|
(__v8di) __Y, |
778
|
|
|
|
|
|
|
(__v8di) __W, |
779
|
|
|
|
|
|
|
(__mmask8) __U); |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
extern __inline __m512i |
783
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
784
|
|
|
|
|
|
|
_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
785
|
|
|
|
|
|
|
{ |
786
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
787
|
|
|
|
|
|
|
(__v8di) __Y, |
788
|
|
|
|
|
|
|
(__v8di) |
789
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
790
|
|
|
|
|
|
|
(__mmask8) __U); |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
extern __inline __m512i |
794
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
795
|
|
|
|
|
|
|
_mm512_add_epi32 (__m512i __A, __m512i __B) |
796
|
|
|
|
|
|
|
{ |
797
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A + (__v16su) __B); |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
extern __inline __m512i |
801
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
802
|
|
|
|
|
|
|
_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
803
|
|
|
|
|
|
|
{ |
804
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
805
|
|
|
|
|
|
|
(__v16si) __B, |
806
|
|
|
|
|
|
|
(__v16si) __W, |
807
|
|
|
|
|
|
|
(__mmask16) __U); |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
extern __inline __m512i |
811
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
812
|
|
|
|
|
|
|
_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
813
|
|
|
|
|
|
|
{ |
814
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
815
|
|
|
|
|
|
|
(__v16si) __B, |
816
|
|
|
|
|
|
|
(__v16si) |
817
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
818
|
|
|
|
|
|
|
(__mmask16) __U); |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
extern __inline __m512i |
822
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
823
|
|
|
|
|
|
|
_mm512_mul_epi32 (__m512i __X, __m512i __Y) |
824
|
|
|
|
|
|
|
{ |
825
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
826
|
|
|
|
|
|
|
(__v16si) __Y, |
827
|
|
|
|
|
|
|
(__v8di) |
828
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
829
|
|
|
|
|
|
|
(__mmask8) -1); |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
extern __inline __m512i |
833
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
834
|
|
|
|
|
|
|
_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
835
|
|
|
|
|
|
|
{ |
836
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
837
|
|
|
|
|
|
|
(__v16si) __Y, |
838
|
|
|
|
|
|
|
(__v8di) __W, __M); |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
extern __inline __m512i |
842
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
843
|
|
|
|
|
|
|
_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) |
844
|
|
|
|
|
|
|
{ |
845
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
846
|
|
|
|
|
|
|
(__v16si) __Y, |
847
|
|
|
|
|
|
|
(__v8di) |
848
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
849
|
|
|
|
|
|
|
__M); |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
extern __inline __m512i |
853
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
854
|
|
|
|
|
|
|
_mm512_sub_epi32 (__m512i __A, __m512i __B) |
855
|
|
|
|
|
|
|
{ |
856
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A - (__v16su) __B); |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
extern __inline __m512i |
860
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
861
|
|
|
|
|
|
|
_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
862
|
|
|
|
|
|
|
{ |
863
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
864
|
|
|
|
|
|
|
(__v16si) __B, |
865
|
|
|
|
|
|
|
(__v16si) __W, |
866
|
|
|
|
|
|
|
(__mmask16) __U); |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
extern __inline __m512i |
870
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
871
|
|
|
|
|
|
|
_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
872
|
|
|
|
|
|
|
{ |
873
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
874
|
|
|
|
|
|
|
(__v16si) __B, |
875
|
|
|
|
|
|
|
(__v16si) |
876
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
877
|
|
|
|
|
|
|
(__mmask16) __U); |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
extern __inline __m512i |
881
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
882
|
|
|
|
|
|
|
_mm512_mul_epu32 (__m512i __X, __m512i __Y) |
883
|
|
|
|
|
|
|
{ |
884
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
885
|
|
|
|
|
|
|
(__v16si) __Y, |
886
|
|
|
|
|
|
|
(__v8di) |
887
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
888
|
|
|
|
|
|
|
(__mmask8) -1); |
889
|
|
|
|
|
|
|
} |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
extern __inline __m512i |
892
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
893
|
|
|
|
|
|
|
_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
894
|
|
|
|
|
|
|
{ |
895
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
896
|
|
|
|
|
|
|
(__v16si) __Y, |
897
|
|
|
|
|
|
|
(__v8di) __W, __M); |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
extern __inline __m512i |
901
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
902
|
|
|
|
|
|
|
_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) |
903
|
|
|
|
|
|
|
{ |
904
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
905
|
|
|
|
|
|
|
(__v16si) __Y, |
906
|
|
|
|
|
|
|
(__v8di) |
907
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
908
|
|
|
|
|
|
|
__M); |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
912
|
|
|
|
|
|
|
extern __inline __m512i |
913
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
914
|
|
|
|
|
|
|
_mm512_slli_epi64 (__m512i __A, unsigned int __B) |
915
|
|
|
|
|
|
|
{ |
916
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
917
|
|
|
|
|
|
|
(__v8di) |
918
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
919
|
|
|
|
|
|
|
(__mmask8) -1); |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
extern __inline __m512i |
923
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
924
|
|
|
|
|
|
|
_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
925
|
|
|
|
|
|
|
unsigned int __B) |
926
|
|
|
|
|
|
|
{ |
927
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
928
|
|
|
|
|
|
|
(__v8di) __W, |
929
|
|
|
|
|
|
|
(__mmask8) __U); |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
extern __inline __m512i |
933
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
934
|
|
|
|
|
|
|
_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
935
|
|
|
|
|
|
|
{ |
936
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
937
|
|
|
|
|
|
|
(__v8di) |
938
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
939
|
|
|
|
|
|
|
(__mmask8) __U); |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
#else |
942
|
|
|
|
|
|
|
#define _mm512_slli_epi64(X, C) \ |
943
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
944
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_undefined_si512 (),\ |
945
|
|
|
|
|
|
|
(__mmask8)-1)) |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
#define _mm512_mask_slli_epi64(W, U, X, C) \ |
948
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
949
|
|
|
|
|
|
|
(__v8di)(__m512i)(W),\ |
950
|
|
|
|
|
|
|
(__mmask8)(U))) |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
#define _mm512_maskz_slli_epi64(U, X, C) \ |
953
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
954
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_setzero_si512 (),\ |
955
|
|
|
|
|
|
|
(__mmask8)(U))) |
956
|
|
|
|
|
|
|
#endif |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
extern __inline __m512i |
959
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
960
|
|
|
|
|
|
|
_mm512_sll_epi64 (__m512i __A, __m128i __B) |
961
|
|
|
|
|
|
|
{ |
962
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
963
|
|
|
|
|
|
|
(__v2di) __B, |
964
|
|
|
|
|
|
|
(__v8di) |
965
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
966
|
|
|
|
|
|
|
(__mmask8) -1); |
967
|
|
|
|
|
|
|
} |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
extern __inline __m512i |
970
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
971
|
|
|
|
|
|
|
_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
972
|
|
|
|
|
|
|
{ |
973
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
974
|
|
|
|
|
|
|
(__v2di) __B, |
975
|
|
|
|
|
|
|
(__v8di) __W, |
976
|
|
|
|
|
|
|
(__mmask8) __U); |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
extern __inline __m512i |
980
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
981
|
|
|
|
|
|
|
_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
982
|
|
|
|
|
|
|
{ |
983
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
984
|
|
|
|
|
|
|
(__v2di) __B, |
985
|
|
|
|
|
|
|
(__v8di) |
986
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
987
|
|
|
|
|
|
|
(__mmask8) __U); |
988
|
|
|
|
|
|
|
} |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
991
|
|
|
|
|
|
|
extern __inline __m512i |
992
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
993
|
|
|
|
|
|
|
_mm512_srli_epi64 (__m512i __A, unsigned int __B) |
994
|
|
|
|
|
|
|
{ |
995
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
996
|
|
|
|
|
|
|
(__v8di) |
997
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
998
|
|
|
|
|
|
|
(__mmask8) -1); |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
extern __inline __m512i |
1002
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1003
|
|
|
|
|
|
|
_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, |
1004
|
|
|
|
|
|
|
__m512i __A, unsigned int __B) |
1005
|
|
|
|
|
|
|
{ |
1006
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
1007
|
|
|
|
|
|
|
(__v8di) __W, |
1008
|
|
|
|
|
|
|
(__mmask8) __U); |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
extern __inline __m512i |
1012
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1013
|
|
|
|
|
|
|
_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
1014
|
|
|
|
|
|
|
{ |
1015
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
1016
|
|
|
|
|
|
|
(__v8di) |
1017
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1018
|
|
|
|
|
|
|
(__mmask8) __U); |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
#else |
1021
|
|
|
|
|
|
|
#define _mm512_srli_epi64(X, C) \ |
1022
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1023
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_undefined_si512 (),\ |
1024
|
|
|
|
|
|
|
(__mmask8)-1)) |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
#define _mm512_mask_srli_epi64(W, U, X, C) \ |
1027
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1028
|
|
|
|
|
|
|
(__v8di)(__m512i)(W),\ |
1029
|
|
|
|
|
|
|
(__mmask8)(U))) |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
#define _mm512_maskz_srli_epi64(U, X, C) \ |
1032
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1033
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_setzero_si512 (),\ |
1034
|
|
|
|
|
|
|
(__mmask8)(U))) |
1035
|
|
|
|
|
|
|
#endif |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
extern __inline __m512i |
1038
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1039
|
|
|
|
|
|
|
_mm512_srl_epi64 (__m512i __A, __m128i __B) |
1040
|
|
|
|
|
|
|
{ |
1041
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
1042
|
|
|
|
|
|
|
(__v2di) __B, |
1043
|
|
|
|
|
|
|
(__v8di) |
1044
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1045
|
|
|
|
|
|
|
(__mmask8) -1); |
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
extern __inline __m512i |
1049
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1050
|
|
|
|
|
|
|
_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
1051
|
|
|
|
|
|
|
{ |
1052
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
1053
|
|
|
|
|
|
|
(__v2di) __B, |
1054
|
|
|
|
|
|
|
(__v8di) __W, |
1055
|
|
|
|
|
|
|
(__mmask8) __U); |
1056
|
|
|
|
|
|
|
} |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
extern __inline __m512i |
1059
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1060
|
|
|
|
|
|
|
_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
1061
|
|
|
|
|
|
|
{ |
1062
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
1063
|
|
|
|
|
|
|
(__v2di) __B, |
1064
|
|
|
|
|
|
|
(__v8di) |
1065
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1066
|
|
|
|
|
|
|
(__mmask8) __U); |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1070
|
|
|
|
|
|
|
extern __inline __m512i |
1071
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1072
|
|
|
|
|
|
|
_mm512_srai_epi64 (__m512i __A, unsigned int __B) |
1073
|
|
|
|
|
|
|
{ |
1074
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
1075
|
|
|
|
|
|
|
(__v8di) |
1076
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1077
|
|
|
|
|
|
|
(__mmask8) -1); |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
extern __inline __m512i |
1081
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1082
|
|
|
|
|
|
|
_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
1083
|
|
|
|
|
|
|
unsigned int __B) |
1084
|
|
|
|
|
|
|
{ |
1085
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
1086
|
|
|
|
|
|
|
(__v8di) __W, |
1087
|
|
|
|
|
|
|
(__mmask8) __U); |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
extern __inline __m512i |
1091
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1092
|
|
|
|
|
|
|
_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
1093
|
|
|
|
|
|
|
{ |
1094
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
1095
|
|
|
|
|
|
|
(__v8di) |
1096
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1097
|
|
|
|
|
|
|
(__mmask8) __U); |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
#else |
1100
|
|
|
|
|
|
|
#define _mm512_srai_epi64(X, C) \ |
1101
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1102
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_undefined_si512 (),\ |
1103
|
|
|
|
|
|
|
(__mmask8)-1)) |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
#define _mm512_mask_srai_epi64(W, U, X, C) \ |
1106
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1107
|
|
|
|
|
|
|
(__v8di)(__m512i)(W),\ |
1108
|
|
|
|
|
|
|
(__mmask8)(U))) |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
#define _mm512_maskz_srai_epi64(U, X, C) \ |
1111
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
1112
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_setzero_si512 (),\ |
1113
|
|
|
|
|
|
|
(__mmask8)(U))) |
1114
|
|
|
|
|
|
|
#endif |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
extern __inline __m512i |
1117
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1118
|
|
|
|
|
|
|
_mm512_sra_epi64 (__m512i __A, __m128i __B) |
1119
|
|
|
|
|
|
|
{ |
1120
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
1121
|
|
|
|
|
|
|
(__v2di) __B, |
1122
|
|
|
|
|
|
|
(__v8di) |
1123
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1124
|
|
|
|
|
|
|
(__mmask8) -1); |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
extern __inline __m512i |
1128
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1129
|
|
|
|
|
|
|
_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
1130
|
|
|
|
|
|
|
{ |
1131
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
1132
|
|
|
|
|
|
|
(__v2di) __B, |
1133
|
|
|
|
|
|
|
(__v8di) __W, |
1134
|
|
|
|
|
|
|
(__mmask8) __U); |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
extern __inline __m512i |
1138
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1139
|
|
|
|
|
|
|
_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
1140
|
|
|
|
|
|
|
{ |
1141
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
1142
|
|
|
|
|
|
|
(__v2di) __B, |
1143
|
|
|
|
|
|
|
(__v8di) |
1144
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1145
|
|
|
|
|
|
|
(__mmask8) __U); |
1146
|
|
|
|
|
|
|
} |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1149
|
|
|
|
|
|
|
extern __inline __m512i |
1150
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1151
|
|
|
|
|
|
|
_mm512_slli_epi32 (__m512i __A, unsigned int __B) |
1152
|
|
|
|
|
|
|
{ |
1153
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
1154
|
|
|
|
|
|
|
(__v16si) |
1155
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1156
|
|
|
|
|
|
|
(__mmask16) -1); |
1157
|
|
|
|
|
|
|
} |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
extern __inline __m512i |
1160
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1161
|
|
|
|
|
|
|
_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
1162
|
|
|
|
|
|
|
unsigned int __B) |
1163
|
|
|
|
|
|
|
{ |
1164
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
1165
|
|
|
|
|
|
|
(__v16si) __W, |
1166
|
|
|
|
|
|
|
(__mmask16) __U); |
1167
|
|
|
|
|
|
|
} |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
extern __inline __m512i |
1170
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1171
|
|
|
|
|
|
|
_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
1172
|
|
|
|
|
|
|
{ |
1173
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
1174
|
|
|
|
|
|
|
(__v16si) |
1175
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1176
|
|
|
|
|
|
|
(__mmask16) __U); |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
#else |
1179
|
|
|
|
|
|
|
#define _mm512_slli_epi32(X, C) \ |
1180
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1181
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_undefined_si512 (),\ |
1182
|
|
|
|
|
|
|
(__mmask16)-1)) |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
#define _mm512_mask_slli_epi32(W, U, X, C) \ |
1185
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1186
|
|
|
|
|
|
|
(__v16si)(__m512i)(W),\ |
1187
|
|
|
|
|
|
|
(__mmask16)(U))) |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
#define _mm512_maskz_slli_epi32(U, X, C) \ |
1190
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1191
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_setzero_si512 (),\ |
1192
|
|
|
|
|
|
|
(__mmask16)(U))) |
1193
|
|
|
|
|
|
|
#endif |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
extern __inline __m512i |
1196
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1197
|
|
|
|
|
|
|
_mm512_sll_epi32 (__m512i __A, __m128i __B) |
1198
|
|
|
|
|
|
|
{ |
1199
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
1200
|
|
|
|
|
|
|
(__v4si) __B, |
1201
|
|
|
|
|
|
|
(__v16si) |
1202
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1203
|
|
|
|
|
|
|
(__mmask16) -1); |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
extern __inline __m512i |
1207
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1208
|
|
|
|
|
|
|
_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
1209
|
|
|
|
|
|
|
{ |
1210
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
1211
|
|
|
|
|
|
|
(__v4si) __B, |
1212
|
|
|
|
|
|
|
(__v16si) __W, |
1213
|
|
|
|
|
|
|
(__mmask16) __U); |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
extern __inline __m512i |
1217
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1218
|
|
|
|
|
|
|
_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
1219
|
|
|
|
|
|
|
{ |
1220
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
1221
|
|
|
|
|
|
|
(__v4si) __B, |
1222
|
|
|
|
|
|
|
(__v16si) |
1223
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1224
|
|
|
|
|
|
|
(__mmask16) __U); |
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1228
|
|
|
|
|
|
|
extern __inline __m512i |
1229
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1230
|
|
|
|
|
|
|
_mm512_srli_epi32 (__m512i __A, unsigned int __B) |
1231
|
|
|
|
|
|
|
{ |
1232
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
1233
|
|
|
|
|
|
|
(__v16si) |
1234
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1235
|
|
|
|
|
|
|
(__mmask16) -1); |
1236
|
|
|
|
|
|
|
} |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
extern __inline __m512i |
1239
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1240
|
|
|
|
|
|
|
_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, |
1241
|
|
|
|
|
|
|
__m512i __A, unsigned int __B) |
1242
|
|
|
|
|
|
|
{ |
1243
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
1244
|
|
|
|
|
|
|
(__v16si) __W, |
1245
|
|
|
|
|
|
|
(__mmask16) __U); |
1246
|
|
|
|
|
|
|
} |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
extern __inline __m512i |
1249
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1250
|
|
|
|
|
|
|
_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
1251
|
|
|
|
|
|
|
{ |
1252
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
1253
|
|
|
|
|
|
|
(__v16si) |
1254
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1255
|
|
|
|
|
|
|
(__mmask16) __U); |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
#else |
1258
|
|
|
|
|
|
|
#define _mm512_srli_epi32(X, C) \ |
1259
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1260
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_undefined_si512 (),\ |
1261
|
|
|
|
|
|
|
(__mmask16)-1)) |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
#define _mm512_mask_srli_epi32(W, U, X, C) \ |
1264
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1265
|
|
|
|
|
|
|
(__v16si)(__m512i)(W),\ |
1266
|
|
|
|
|
|
|
(__mmask16)(U))) |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
#define _mm512_maskz_srli_epi32(U, X, C) \ |
1269
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1270
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_setzero_si512 (),\ |
1271
|
|
|
|
|
|
|
(__mmask16)(U))) |
1272
|
|
|
|
|
|
|
#endif |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
extern __inline __m512i |
1275
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1276
|
|
|
|
|
|
|
_mm512_srl_epi32 (__m512i __A, __m128i __B) |
1277
|
|
|
|
|
|
|
{ |
1278
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
1279
|
|
|
|
|
|
|
(__v4si) __B, |
1280
|
|
|
|
|
|
|
(__v16si) |
1281
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1282
|
|
|
|
|
|
|
(__mmask16) -1); |
1283
|
|
|
|
|
|
|
} |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
extern __inline __m512i |
1286
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1287
|
|
|
|
|
|
|
_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
1288
|
|
|
|
|
|
|
{ |
1289
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
1290
|
|
|
|
|
|
|
(__v4si) __B, |
1291
|
|
|
|
|
|
|
(__v16si) __W, |
1292
|
|
|
|
|
|
|
(__mmask16) __U); |
1293
|
|
|
|
|
|
|
} |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
extern __inline __m512i |
1296
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1297
|
|
|
|
|
|
|
_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
1298
|
|
|
|
|
|
|
{ |
1299
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
1300
|
|
|
|
|
|
|
(__v4si) __B, |
1301
|
|
|
|
|
|
|
(__v16si) |
1302
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1303
|
|
|
|
|
|
|
(__mmask16) __U); |
1304
|
|
|
|
|
|
|
} |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1307
|
|
|
|
|
|
|
extern __inline __m512i |
1308
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1309
|
|
|
|
|
|
|
_mm512_srai_epi32 (__m512i __A, unsigned int __B) |
1310
|
|
|
|
|
|
|
{ |
1311
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
1312
|
|
|
|
|
|
|
(__v16si) |
1313
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1314
|
|
|
|
|
|
|
(__mmask16) -1); |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
extern __inline __m512i |
1318
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1319
|
|
|
|
|
|
|
_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
1320
|
|
|
|
|
|
|
unsigned int __B) |
1321
|
|
|
|
|
|
|
{ |
1322
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
1323
|
|
|
|
|
|
|
(__v16si) __W, |
1324
|
|
|
|
|
|
|
(__mmask16) __U); |
1325
|
|
|
|
|
|
|
} |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
extern __inline __m512i |
1328
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1329
|
|
|
|
|
|
|
_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
1330
|
|
|
|
|
|
|
{ |
1331
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
1332
|
|
|
|
|
|
|
(__v16si) |
1333
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1334
|
|
|
|
|
|
|
(__mmask16) __U); |
1335
|
|
|
|
|
|
|
} |
1336
|
|
|
|
|
|
|
#else |
1337
|
|
|
|
|
|
|
#define _mm512_srai_epi32(X, C) \ |
1338
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1339
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_undefined_si512 (),\ |
1340
|
|
|
|
|
|
|
(__mmask16)-1)) |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
#define _mm512_mask_srai_epi32(W, U, X, C) \ |
1343
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1344
|
|
|
|
|
|
|
(__v16si)(__m512i)(W),\ |
1345
|
|
|
|
|
|
|
(__mmask16)(U))) |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
#define _mm512_maskz_srai_epi32(U, X, C) \ |
1348
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
1349
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_setzero_si512 (),\ |
1350
|
|
|
|
|
|
|
(__mmask16)(U))) |
1351
|
|
|
|
|
|
|
#endif |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
extern __inline __m512i |
1354
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1355
|
|
|
|
|
|
|
_mm512_sra_epi32 (__m512i __A, __m128i __B) |
1356
|
|
|
|
|
|
|
{ |
1357
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
1358
|
|
|
|
|
|
|
(__v4si) __B, |
1359
|
|
|
|
|
|
|
(__v16si) |
1360
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1361
|
|
|
|
|
|
|
(__mmask16) -1); |
1362
|
|
|
|
|
|
|
} |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
extern __inline __m512i |
1365
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1366
|
|
|
|
|
|
|
_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
1367
|
|
|
|
|
|
|
{ |
1368
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
1369
|
|
|
|
|
|
|
(__v4si) __B, |
1370
|
|
|
|
|
|
|
(__v16si) __W, |
1371
|
|
|
|
|
|
|
(__mmask16) __U); |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
extern __inline __m512i |
1375
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1376
|
|
|
|
|
|
|
_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
1377
|
|
|
|
|
|
|
{ |
1378
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
1379
|
|
|
|
|
|
|
(__v4si) __B, |
1380
|
|
|
|
|
|
|
(__v16si) |
1381
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1382
|
|
|
|
|
|
|
(__mmask16) __U); |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1386
|
|
|
|
|
|
|
extern __inline __m128d |
1387
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1388
|
|
|
|
|
|
|
_mm_add_round_sd (__m128d __A, __m128d __B, const int __R) |
1389
|
|
|
|
|
|
|
{ |
1390
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, |
1391
|
|
|
|
|
|
|
(__v2df) __B, |
1392
|
|
|
|
|
|
|
__R); |
1393
|
|
|
|
|
|
|
} |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
extern __inline __m128 |
1396
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1397
|
|
|
|
|
|
|
_mm_add_round_ss (__m128 __A, __m128 __B, const int __R) |
1398
|
|
|
|
|
|
|
{ |
1399
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, |
1400
|
|
|
|
|
|
|
(__v4sf) __B, |
1401
|
|
|
|
|
|
|
__R); |
1402
|
|
|
|
|
|
|
} |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
extern __inline __m128d |
1405
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1406
|
|
|
|
|
|
|
_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) |
1407
|
|
|
|
|
|
|
{ |
1408
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, |
1409
|
|
|
|
|
|
|
(__v2df) __B, |
1410
|
|
|
|
|
|
|
__R); |
1411
|
|
|
|
|
|
|
} |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
extern __inline __m128 |
1414
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1415
|
|
|
|
|
|
|
_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) |
1416
|
|
|
|
|
|
|
{ |
1417
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, |
1418
|
|
|
|
|
|
|
(__v4sf) __B, |
1419
|
|
|
|
|
|
|
__R); |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
#else |
1423
|
|
|
|
|
|
|
#define _mm_add_round_sd(A, B, C) \ |
1424
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_addsd_round(A, B, C) |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
#define _mm_add_round_ss(A, B, C) \ |
1427
|
|
|
|
|
|
|
(__m128)__builtin_ia32_addss_round(A, B, C) |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
#define _mm_sub_round_sd(A, B, C) \ |
1430
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_subsd_round(A, B, C) |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
#define _mm_sub_round_ss(A, B, C) \ |
1433
|
|
|
|
|
|
|
(__m128)__builtin_ia32_subss_round(A, B, C) |
1434
|
|
|
|
|
|
|
#endif |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1437
|
|
|
|
|
|
|
extern __inline __m512i |
1438
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1439
|
|
|
|
|
|
|
_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm) |
1440
|
|
|
|
|
|
|
{ |
1441
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
1442
|
|
|
|
|
|
|
(__v8di) __B, |
1443
|
|
|
|
|
|
|
(__v8di) __C, imm, |
1444
|
|
|
|
|
|
|
(__mmask8) -1); |
1445
|
|
|
|
|
|
|
} |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
extern __inline __m512i |
1448
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1449
|
|
|
|
|
|
|
_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, |
1450
|
|
|
|
|
|
|
__m512i __C, const int imm) |
1451
|
|
|
|
|
|
|
{ |
1452
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
1453
|
|
|
|
|
|
|
(__v8di) __B, |
1454
|
|
|
|
|
|
|
(__v8di) __C, imm, |
1455
|
|
|
|
|
|
|
(__mmask8) __U); |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
extern __inline __m512i |
1459
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1460
|
|
|
|
|
|
|
_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, |
1461
|
|
|
|
|
|
|
__m512i __C, const int imm) |
1462
|
|
|
|
|
|
|
{ |
1463
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, |
1464
|
|
|
|
|
|
|
(__v8di) __B, |
1465
|
|
|
|
|
|
|
(__v8di) __C, |
1466
|
|
|
|
|
|
|
imm, (__mmask8) __U); |
1467
|
|
|
|
|
|
|
} |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
extern __inline __m512i |
1470
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1471
|
|
|
|
|
|
|
_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm) |
1472
|
|
|
|
|
|
|
{ |
1473
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
1474
|
|
|
|
|
|
|
(__v16si) __B, |
1475
|
|
|
|
|
|
|
(__v16si) __C, |
1476
|
|
|
|
|
|
|
imm, (__mmask16) -1); |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
extern __inline __m512i |
1480
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1481
|
|
|
|
|
|
|
_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, |
1482
|
|
|
|
|
|
|
__m512i __C, const int imm) |
1483
|
|
|
|
|
|
|
{ |
1484
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
1485
|
|
|
|
|
|
|
(__v16si) __B, |
1486
|
|
|
|
|
|
|
(__v16si) __C, |
1487
|
|
|
|
|
|
|
imm, (__mmask16) __U); |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
extern __inline __m512i |
1491
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1492
|
|
|
|
|
|
|
_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, |
1493
|
|
|
|
|
|
|
__m512i __C, const int imm) |
1494
|
|
|
|
|
|
|
{ |
1495
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, |
1496
|
|
|
|
|
|
|
(__v16si) __B, |
1497
|
|
|
|
|
|
|
(__v16si) __C, |
1498
|
|
|
|
|
|
|
imm, (__mmask16) __U); |
1499
|
|
|
|
|
|
|
} |
1500
|
|
|
|
|
|
|
#else |
1501
|
|
|
|
|
|
|
#define _mm512_ternarylogic_epi64(A, B, C, I) \ |
1502
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ |
1503
|
|
|
|
|
|
|
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) |
1504
|
|
|
|
|
|
|
#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ |
1505
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ |
1506
|
|
|
|
|
|
|
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) |
1507
|
|
|
|
|
|
|
#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ |
1508
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ |
1509
|
|
|
|
|
|
|
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) |
1510
|
|
|
|
|
|
|
#define _mm512_ternarylogic_epi32(A, B, C, I) \ |
1511
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ |
1512
|
|
|
|
|
|
|
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
1513
|
|
|
|
|
|
|
(__mmask16)-1)) |
1514
|
|
|
|
|
|
|
#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ |
1515
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ |
1516
|
|
|
|
|
|
|
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
1517
|
|
|
|
|
|
|
(__mmask16)(U))) |
1518
|
|
|
|
|
|
|
#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ |
1519
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ |
1520
|
|
|
|
|
|
|
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
1521
|
|
|
|
|
|
|
(__mmask16)(U))) |
1522
|
|
|
|
|
|
|
#endif |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
extern __inline __m512d |
1525
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1526
|
|
|
|
|
|
|
_mm512_rcp14_pd (__m512d __A) |
1527
|
|
|
|
|
|
|
{ |
1528
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
1529
|
|
|
|
|
|
|
(__v8df) |
1530
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
1531
|
|
|
|
|
|
|
(__mmask8) -1); |
1532
|
|
|
|
|
|
|
} |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
extern __inline __m512d |
1535
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1536
|
|
|
|
|
|
|
_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
1537
|
|
|
|
|
|
|
{ |
1538
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
1539
|
|
|
|
|
|
|
(__v8df) __W, |
1540
|
|
|
|
|
|
|
(__mmask8) __U); |
1541
|
|
|
|
|
|
|
} |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
extern __inline __m512d |
1544
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1545
|
|
|
|
|
|
|
_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) |
1546
|
|
|
|
|
|
|
{ |
1547
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
1548
|
|
|
|
|
|
|
(__v8df) |
1549
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
1550
|
|
|
|
|
|
|
(__mmask8) __U); |
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
extern __inline __m512 |
1554
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1555
|
|
|
|
|
|
|
_mm512_rcp14_ps (__m512 __A) |
1556
|
|
|
|
|
|
|
{ |
1557
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
1558
|
|
|
|
|
|
|
(__v16sf) |
1559
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
1560
|
|
|
|
|
|
|
(__mmask16) -1); |
1561
|
|
|
|
|
|
|
} |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
extern __inline __m512 |
1564
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1565
|
|
|
|
|
|
|
_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
1566
|
|
|
|
|
|
|
{ |
1567
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
1568
|
|
|
|
|
|
|
(__v16sf) __W, |
1569
|
|
|
|
|
|
|
(__mmask16) __U); |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
extern __inline __m512 |
1573
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1574
|
|
|
|
|
|
|
_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) |
1575
|
|
|
|
|
|
|
{ |
1576
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
1577
|
|
|
|
|
|
|
(__v16sf) |
1578
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
1579
|
|
|
|
|
|
|
(__mmask16) __U); |
1580
|
|
|
|
|
|
|
} |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
extern __inline __m128d |
1583
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1584
|
|
|
|
|
|
|
_mm_rcp14_sd (__m128d __A, __m128d __B) |
1585
|
|
|
|
|
|
|
{ |
1586
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, |
1587
|
|
|
|
|
|
|
(__v2df) __A); |
1588
|
|
|
|
|
|
|
} |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
extern __inline __m128 |
1591
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1592
|
|
|
|
|
|
|
_mm_rcp14_ss (__m128 __A, __m128 __B) |
1593
|
|
|
|
|
|
|
{ |
1594
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, |
1595
|
|
|
|
|
|
|
(__v4sf) __A); |
1596
|
|
|
|
|
|
|
} |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
extern __inline __m512d |
1599
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1600
|
|
|
|
|
|
|
_mm512_rsqrt14_pd (__m512d __A) |
1601
|
|
|
|
|
|
|
{ |
1602
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
1603
|
|
|
|
|
|
|
(__v8df) |
1604
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
1605
|
|
|
|
|
|
|
(__mmask8) -1); |
1606
|
|
|
|
|
|
|
} |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
extern __inline __m512d |
1609
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1610
|
|
|
|
|
|
|
_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
1611
|
|
|
|
|
|
|
{ |
1612
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
1613
|
|
|
|
|
|
|
(__v8df) __W, |
1614
|
|
|
|
|
|
|
(__mmask8) __U); |
1615
|
|
|
|
|
|
|
} |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
extern __inline __m512d |
1618
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1619
|
|
|
|
|
|
|
_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) |
1620
|
|
|
|
|
|
|
{ |
1621
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
1622
|
|
|
|
|
|
|
(__v8df) |
1623
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
1624
|
|
|
|
|
|
|
(__mmask8) __U); |
1625
|
|
|
|
|
|
|
} |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
extern __inline __m512 |
1628
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1629
|
|
|
|
|
|
|
_mm512_rsqrt14_ps (__m512 __A) |
1630
|
|
|
|
|
|
|
{ |
1631
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
1632
|
|
|
|
|
|
|
(__v16sf) |
1633
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
1634
|
|
|
|
|
|
|
(__mmask16) -1); |
1635
|
|
|
|
|
|
|
} |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
extern __inline __m512 |
1638
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1639
|
|
|
|
|
|
|
_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
1640
|
|
|
|
|
|
|
{ |
1641
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
1642
|
|
|
|
|
|
|
(__v16sf) __W, |
1643
|
|
|
|
|
|
|
(__mmask16) __U); |
1644
|
|
|
|
|
|
|
} |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
extern __inline __m512 |
1647
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1648
|
|
|
|
|
|
|
_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) |
1649
|
|
|
|
|
|
|
{ |
1650
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
1651
|
|
|
|
|
|
|
(__v16sf) |
1652
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
1653
|
|
|
|
|
|
|
(__mmask16) __U); |
1654
|
|
|
|
|
|
|
} |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
extern __inline __m128d |
1657
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1658
|
|
|
|
|
|
|
_mm_rsqrt14_sd (__m128d __A, __m128d __B) |
1659
|
|
|
|
|
|
|
{ |
1660
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, |
1661
|
|
|
|
|
|
|
(__v2df) __A); |
1662
|
|
|
|
|
|
|
} |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
extern __inline __m128 |
1665
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1666
|
|
|
|
|
|
|
_mm_rsqrt14_ss (__m128 __A, __m128 __B) |
1667
|
|
|
|
|
|
|
{ |
1668
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, |
1669
|
|
|
|
|
|
|
(__v4sf) __A); |
1670
|
|
|
|
|
|
|
} |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
1673
|
|
|
|
|
|
|
extern __inline __m512d |
1674
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1675
|
|
|
|
|
|
|
_mm512_sqrt_round_pd (__m512d __A, const int __R) |
1676
|
|
|
|
|
|
|
{ |
1677
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
1678
|
|
|
|
|
|
|
(__v8df) |
1679
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
1680
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
1681
|
|
|
|
|
|
|
} |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
extern __inline __m512d |
1684
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1685
|
|
|
|
|
|
|
_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
1686
|
|
|
|
|
|
|
const int __R) |
1687
|
|
|
|
|
|
|
{ |
1688
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
1689
|
|
|
|
|
|
|
(__v8df) __W, |
1690
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
1691
|
|
|
|
|
|
|
} |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
extern __inline __m512d |
1694
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1695
|
|
|
|
|
|
|
_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) |
1696
|
|
|
|
|
|
|
{ |
1697
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
1698
|
|
|
|
|
|
|
(__v8df) |
1699
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
1700
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
1701
|
|
|
|
|
|
|
} |
1702
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
extern __inline __m512 |
1704
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1705
|
|
|
|
|
|
|
_mm512_sqrt_round_ps (__m512 __A, const int __R) |
1706
|
|
|
|
|
|
|
{ |
1707
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
1708
|
|
|
|
|
|
|
(__v16sf) |
1709
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
1710
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
1711
|
|
|
|
|
|
|
} |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
extern __inline __m512 |
1714
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1715
|
|
|
|
|
|
|
_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) |
1716
|
|
|
|
|
|
|
{ |
1717
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
1718
|
|
|
|
|
|
|
(__v16sf) __W, |
1719
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
1720
|
|
|
|
|
|
|
} |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
extern __inline __m512 |
1723
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1724
|
|
|
|
|
|
|
_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) |
1725
|
|
|
|
|
|
|
{ |
1726
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
1727
|
|
|
|
|
|
|
(__v16sf) |
1728
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
1729
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
1730
|
|
|
|
|
|
|
} |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
extern __inline __m128d |
1733
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1734
|
|
|
|
|
|
|
_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) |
1735
|
|
|
|
|
|
|
{ |
1736
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B, |
1737
|
|
|
|
|
|
|
(__v2df) __A, |
1738
|
|
|
|
|
|
|
__R); |
1739
|
|
|
|
|
|
|
} |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
extern __inline __m128 |
1742
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1743
|
|
|
|
|
|
|
_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) |
1744
|
|
|
|
|
|
|
{ |
1745
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B, |
1746
|
|
|
|
|
|
|
(__v4sf) __A, |
1747
|
|
|
|
|
|
|
__R); |
1748
|
|
|
|
|
|
|
} |
1749
|
|
|
|
|
|
|
#else |
1750
|
|
|
|
|
|
|
#define _mm512_sqrt_round_pd(A, C) \ |
1751
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
#define _mm512_mask_sqrt_round_pd(W, U, A, C) \ |
1754
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
#define _mm512_maskz_sqrt_round_pd(U, A, C) \ |
1757
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
#define _mm512_sqrt_round_ps(A, C) \ |
1760
|
|
|
|
|
|
|
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) |
1761
|
|
|
|
|
|
|
|
1762
|
|
|
|
|
|
|
#define _mm512_mask_sqrt_round_ps(W, U, A, C) \ |
1763
|
|
|
|
|
|
|
(__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) |
1764
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
#define _mm512_maskz_sqrt_round_ps(U, A, C) \ |
1766
|
|
|
|
|
|
|
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
#define _mm_sqrt_round_sd(A, B, C) \ |
1769
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_sqrtsd_round(A, B, C) |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
#define _mm_sqrt_round_ss(A, B, C) \ |
1772
|
|
|
|
|
|
|
(__m128)__builtin_ia32_sqrtss_round(A, B, C) |
1773
|
|
|
|
|
|
|
#endif |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
extern __inline __m512i |
1776
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1777
|
|
|
|
|
|
|
_mm512_cvtepi8_epi32 (__m128i __A) |
1778
|
|
|
|
|
|
|
{ |
1779
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
1780
|
|
|
|
|
|
|
(__v16si) |
1781
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1782
|
|
|
|
|
|
|
(__mmask16) -1); |
1783
|
|
|
|
|
|
|
} |
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
extern __inline __m512i |
1786
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1787
|
|
|
|
|
|
|
_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
1788
|
|
|
|
|
|
|
{ |
1789
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
1790
|
|
|
|
|
|
|
(__v16si) __W, |
1791
|
|
|
|
|
|
|
(__mmask16) __U); |
1792
|
|
|
|
|
|
|
} |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
extern __inline __m512i |
1795
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1796
|
|
|
|
|
|
|
_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) |
1797
|
|
|
|
|
|
|
{ |
1798
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
1799
|
|
|
|
|
|
|
(__v16si) |
1800
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1801
|
|
|
|
|
|
|
(__mmask16) __U); |
1802
|
|
|
|
|
|
|
} |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
extern __inline __m512i |
1805
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1806
|
|
|
|
|
|
|
_mm512_cvtepi8_epi64 (__m128i __A) |
1807
|
|
|
|
|
|
|
{ |
1808
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
1809
|
|
|
|
|
|
|
(__v8di) |
1810
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1811
|
|
|
|
|
|
|
(__mmask8) -1); |
1812
|
|
|
|
|
|
|
} |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
extern __inline __m512i |
1815
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1816
|
|
|
|
|
|
|
_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
1817
|
|
|
|
|
|
|
{ |
1818
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
1819
|
|
|
|
|
|
|
(__v8di) __W, |
1820
|
|
|
|
|
|
|
(__mmask8) __U); |
1821
|
|
|
|
|
|
|
} |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
extern __inline __m512i |
1824
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1825
|
|
|
|
|
|
|
_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) |
1826
|
|
|
|
|
|
|
{ |
1827
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
1828
|
|
|
|
|
|
|
(__v8di) |
1829
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1830
|
|
|
|
|
|
|
(__mmask8) __U); |
1831
|
|
|
|
|
|
|
} |
1832
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
extern __inline __m512i |
1834
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1835
|
|
|
|
|
|
|
_mm512_cvtepi16_epi32 (__m256i __A) |
1836
|
|
|
|
|
|
|
{ |
1837
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
1838
|
|
|
|
|
|
|
(__v16si) |
1839
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1840
|
|
|
|
|
|
|
(__mmask16) -1); |
1841
|
|
|
|
|
|
|
} |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
extern __inline __m512i |
1844
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1845
|
|
|
|
|
|
|
_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
1846
|
|
|
|
|
|
|
{ |
1847
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
1848
|
|
|
|
|
|
|
(__v16si) __W, |
1849
|
|
|
|
|
|
|
(__mmask16) __U); |
1850
|
|
|
|
|
|
|
} |
1851
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
extern __inline __m512i |
1853
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1854
|
|
|
|
|
|
|
_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) |
1855
|
|
|
|
|
|
|
{ |
1856
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
1857
|
|
|
|
|
|
|
(__v16si) |
1858
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1859
|
|
|
|
|
|
|
(__mmask16) __U); |
1860
|
|
|
|
|
|
|
} |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
extern __inline __m512i |
1863
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1864
|
|
|
|
|
|
|
_mm512_cvtepi16_epi64 (__m128i __A) |
1865
|
|
|
|
|
|
|
{ |
1866
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
1867
|
|
|
|
|
|
|
(__v8di) |
1868
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1869
|
|
|
|
|
|
|
(__mmask8) -1); |
1870
|
|
|
|
|
|
|
} |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
extern __inline __m512i |
1873
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1874
|
|
|
|
|
|
|
_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
1875
|
|
|
|
|
|
|
{ |
1876
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
1877
|
|
|
|
|
|
|
(__v8di) __W, |
1878
|
|
|
|
|
|
|
(__mmask8) __U); |
1879
|
|
|
|
|
|
|
} |
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
extern __inline __m512i |
1882
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1883
|
|
|
|
|
|
|
_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) |
1884
|
|
|
|
|
|
|
{ |
1885
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
1886
|
|
|
|
|
|
|
(__v8di) |
1887
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1888
|
|
|
|
|
|
|
(__mmask8) __U); |
1889
|
|
|
|
|
|
|
} |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
extern __inline __m512i |
1892
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1893
|
|
|
|
|
|
|
_mm512_cvtepi32_epi64 (__m256i __X) |
1894
|
|
|
|
|
|
|
{ |
1895
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
1896
|
|
|
|
|
|
|
(__v8di) |
1897
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1898
|
|
|
|
|
|
|
(__mmask8) -1); |
1899
|
|
|
|
|
|
|
} |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
extern __inline __m512i |
1902
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1903
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
1904
|
|
|
|
|
|
|
{ |
1905
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
1906
|
|
|
|
|
|
|
(__v8di) __W, |
1907
|
|
|
|
|
|
|
(__mmask8) __U); |
1908
|
|
|
|
|
|
|
} |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
extern __inline __m512i |
1911
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1912
|
|
|
|
|
|
|
_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) |
1913
|
|
|
|
|
|
|
{ |
1914
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
1915
|
|
|
|
|
|
|
(__v8di) |
1916
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1917
|
|
|
|
|
|
|
(__mmask8) __U); |
1918
|
|
|
|
|
|
|
} |
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
extern __inline __m512i |
1921
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1922
|
|
|
|
|
|
|
_mm512_cvtepu8_epi32 (__m128i __A) |
1923
|
|
|
|
|
|
|
{ |
1924
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
1925
|
|
|
|
|
|
|
(__v16si) |
1926
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1927
|
|
|
|
|
|
|
(__mmask16) -1); |
1928
|
|
|
|
|
|
|
} |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
extern __inline __m512i |
1931
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1932
|
|
|
|
|
|
|
_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
1933
|
|
|
|
|
|
|
{ |
1934
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
1935
|
|
|
|
|
|
|
(__v16si) __W, |
1936
|
|
|
|
|
|
|
(__mmask16) __U); |
1937
|
|
|
|
|
|
|
} |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
extern __inline __m512i |
1940
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1941
|
|
|
|
|
|
|
_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) |
1942
|
|
|
|
|
|
|
{ |
1943
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
1944
|
|
|
|
|
|
|
(__v16si) |
1945
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1946
|
|
|
|
|
|
|
(__mmask16) __U); |
1947
|
|
|
|
|
|
|
} |
1948
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
extern __inline __m512i |
1950
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1951
|
|
|
|
|
|
|
_mm512_cvtepu8_epi64 (__m128i __A) |
1952
|
|
|
|
|
|
|
{ |
1953
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
1954
|
|
|
|
|
|
|
(__v8di) |
1955
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1956
|
|
|
|
|
|
|
(__mmask8) -1); |
1957
|
|
|
|
|
|
|
} |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
extern __inline __m512i |
1960
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1961
|
|
|
|
|
|
|
_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
1962
|
|
|
|
|
|
|
{ |
1963
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
1964
|
|
|
|
|
|
|
(__v8di) __W, |
1965
|
|
|
|
|
|
|
(__mmask8) __U); |
1966
|
|
|
|
|
|
|
} |
1967
|
|
|
|
|
|
|
|
1968
|
|
|
|
|
|
|
extern __inline __m512i |
1969
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1970
|
|
|
|
|
|
|
_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) |
1971
|
|
|
|
|
|
|
{ |
1972
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
1973
|
|
|
|
|
|
|
(__v8di) |
1974
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
1975
|
|
|
|
|
|
|
(__mmask8) __U); |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
extern __inline __m512i |
1979
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1980
|
|
|
|
|
|
|
_mm512_cvtepu16_epi32 (__m256i __A) |
1981
|
|
|
|
|
|
|
{ |
1982
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
1983
|
|
|
|
|
|
|
(__v16si) |
1984
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
1985
|
|
|
|
|
|
|
(__mmask16) -1); |
1986
|
|
|
|
|
|
|
} |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
extern __inline __m512i |
1989
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1990
|
|
|
|
|
|
|
_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
1991
|
|
|
|
|
|
|
{ |
1992
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
1993
|
|
|
|
|
|
|
(__v16si) __W, |
1994
|
|
|
|
|
|
|
(__mmask16) __U); |
1995
|
|
|
|
|
|
|
} |
1996
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
extern __inline __m512i |
1998
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
1999
|
|
|
|
|
|
|
_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) |
2000
|
|
|
|
|
|
|
{ |
2001
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
2002
|
|
|
|
|
|
|
(__v16si) |
2003
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
2004
|
|
|
|
|
|
|
(__mmask16) __U); |
2005
|
|
|
|
|
|
|
} |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
extern __inline __m512i |
2008
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2009
|
|
|
|
|
|
|
_mm512_cvtepu16_epi64 (__m128i __A) |
2010
|
|
|
|
|
|
|
{ |
2011
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
2012
|
|
|
|
|
|
|
(__v8di) |
2013
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
2014
|
|
|
|
|
|
|
(__mmask8) -1); |
2015
|
|
|
|
|
|
|
} |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
extern __inline __m512i |
2018
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2019
|
|
|
|
|
|
|
_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
2020
|
|
|
|
|
|
|
{ |
2021
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
2022
|
|
|
|
|
|
|
(__v8di) __W, |
2023
|
|
|
|
|
|
|
(__mmask8) __U); |
2024
|
|
|
|
|
|
|
} |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
extern __inline __m512i |
2027
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2028
|
|
|
|
|
|
|
_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) |
2029
|
|
|
|
|
|
|
{ |
2030
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
2031
|
|
|
|
|
|
|
(__v8di) |
2032
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
2033
|
|
|
|
|
|
|
(__mmask8) __U); |
2034
|
|
|
|
|
|
|
} |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
extern __inline __m512i |
2037
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2038
|
|
|
|
|
|
|
_mm512_cvtepu32_epi64 (__m256i __X) |
2039
|
|
|
|
|
|
|
{ |
2040
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
2041
|
|
|
|
|
|
|
(__v8di) |
2042
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
2043
|
|
|
|
|
|
|
(__mmask8) -1); |
2044
|
|
|
|
|
|
|
} |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
extern __inline __m512i |
2047
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2048
|
|
|
|
|
|
|
_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
2049
|
|
|
|
|
|
|
{ |
2050
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
2051
|
|
|
|
|
|
|
(__v8di) __W, |
2052
|
|
|
|
|
|
|
(__mmask8) __U); |
2053
|
|
|
|
|
|
|
} |
2054
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
extern __inline __m512i |
2056
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2057
|
|
|
|
|
|
|
_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) |
2058
|
|
|
|
|
|
|
{ |
2059
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
2060
|
|
|
|
|
|
|
(__v8di) |
2061
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
2062
|
|
|
|
|
|
|
(__mmask8) __U); |
2063
|
|
|
|
|
|
|
} |
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
2066
|
|
|
|
|
|
|
extern __inline __m512d |
2067
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2068
|
|
|
|
|
|
|
_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) |
2069
|
|
|
|
|
|
|
{ |
2070
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
2071
|
|
|
|
|
|
|
(__v8df) __B, |
2072
|
|
|
|
|
|
|
(__v8df) |
2073
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2074
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2075
|
|
|
|
|
|
|
} |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
extern __inline __m512d |
2078
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2079
|
|
|
|
|
|
|
_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2080
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2081
|
|
|
|
|
|
|
{ |
2082
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
2083
|
|
|
|
|
|
|
(__v8df) __B, |
2084
|
|
|
|
|
|
|
(__v8df) __W, |
2085
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2086
|
|
|
|
|
|
|
} |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
extern __inline __m512d |
2089
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2090
|
|
|
|
|
|
|
_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2091
|
|
|
|
|
|
|
const int __R) |
2092
|
|
|
|
|
|
|
{ |
2093
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
2094
|
|
|
|
|
|
|
(__v8df) __B, |
2095
|
|
|
|
|
|
|
(__v8df) |
2096
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2097
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2098
|
|
|
|
|
|
|
} |
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
extern __inline __m512 |
2101
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2102
|
|
|
|
|
|
|
_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) |
2103
|
|
|
|
|
|
|
{ |
2104
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
2105
|
|
|
|
|
|
|
(__v16sf) __B, |
2106
|
|
|
|
|
|
|
(__v16sf) |
2107
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2108
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2109
|
|
|
|
|
|
|
} |
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
extern __inline __m512 |
2112
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2113
|
|
|
|
|
|
|
_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2114
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2115
|
|
|
|
|
|
|
{ |
2116
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
2117
|
|
|
|
|
|
|
(__v16sf) __B, |
2118
|
|
|
|
|
|
|
(__v16sf) __W, |
2119
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2120
|
|
|
|
|
|
|
} |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
extern __inline __m512 |
2123
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2124
|
|
|
|
|
|
|
_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2125
|
|
|
|
|
|
|
{ |
2126
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
2127
|
|
|
|
|
|
|
(__v16sf) __B, |
2128
|
|
|
|
|
|
|
(__v16sf) |
2129
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2130
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2131
|
|
|
|
|
|
|
} |
2132
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
extern __inline __m512d |
2134
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2135
|
|
|
|
|
|
|
_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) |
2136
|
|
|
|
|
|
|
{ |
2137
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
2138
|
|
|
|
|
|
|
(__v8df) __B, |
2139
|
|
|
|
|
|
|
(__v8df) |
2140
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2141
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2142
|
|
|
|
|
|
|
} |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
extern __inline __m512d |
2145
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2146
|
|
|
|
|
|
|
_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2147
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2148
|
|
|
|
|
|
|
{ |
2149
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
2150
|
|
|
|
|
|
|
(__v8df) __B, |
2151
|
|
|
|
|
|
|
(__v8df) __W, |
2152
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2153
|
|
|
|
|
|
|
} |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
extern __inline __m512d |
2156
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2157
|
|
|
|
|
|
|
_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2158
|
|
|
|
|
|
|
const int __R) |
2159
|
|
|
|
|
|
|
{ |
2160
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
2161
|
|
|
|
|
|
|
(__v8df) __B, |
2162
|
|
|
|
|
|
|
(__v8df) |
2163
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2164
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2165
|
|
|
|
|
|
|
} |
2166
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
extern __inline __m512 |
2168
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2169
|
|
|
|
|
|
|
_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) |
2170
|
|
|
|
|
|
|
{ |
2171
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
2172
|
|
|
|
|
|
|
(__v16sf) __B, |
2173
|
|
|
|
|
|
|
(__v16sf) |
2174
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2175
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2176
|
|
|
|
|
|
|
} |
2177
|
|
|
|
|
|
|
|
2178
|
|
|
|
|
|
|
extern __inline __m512 |
2179
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2180
|
|
|
|
|
|
|
_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2181
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2182
|
|
|
|
|
|
|
{ |
2183
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
2184
|
|
|
|
|
|
|
(__v16sf) __B, |
2185
|
|
|
|
|
|
|
(__v16sf) __W, |
2186
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2187
|
|
|
|
|
|
|
} |
2188
|
|
|
|
|
|
|
|
2189
|
|
|
|
|
|
|
extern __inline __m512 |
2190
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2191
|
|
|
|
|
|
|
_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2192
|
|
|
|
|
|
|
{ |
2193
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
2194
|
|
|
|
|
|
|
(__v16sf) __B, |
2195
|
|
|
|
|
|
|
(__v16sf) |
2196
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2197
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2198
|
|
|
|
|
|
|
} |
2199
|
|
|
|
|
|
|
#else |
2200
|
|
|
|
|
|
|
#define _mm512_add_round_pd(A, B, C) \ |
2201
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
2202
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
#define _mm512_mask_add_round_pd(W, U, A, B, C) \ |
2204
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) |
2205
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
#define _mm512_maskz_add_round_pd(U, A, B, C) \ |
2207
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
2208
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
#define _mm512_add_round_ps(A, B, C) \ |
2210
|
|
|
|
|
|
|
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
#define _mm512_mask_add_round_ps(W, U, A, B, C) \ |
2213
|
|
|
|
|
|
|
(__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
#define _mm512_maskz_add_round_ps(U, A, B, C) \ |
2216
|
|
|
|
|
|
|
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
#define _mm512_sub_round_pd(A, B, C) \ |
2219
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
#define _mm512_mask_sub_round_pd(W, U, A, B, C) \ |
2222
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) |
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
#define _mm512_maskz_sub_round_pd(U, A, B, C) \ |
2225
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
#define _mm512_sub_round_ps(A, B, C) \ |
2228
|
|
|
|
|
|
|
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
#define _mm512_mask_sub_round_ps(W, U, A, B, C) \ |
2231
|
|
|
|
|
|
|
(__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) |
2232
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
#define _mm512_maskz_sub_round_ps(U, A, B, C) \ |
2234
|
|
|
|
|
|
|
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
2235
|
|
|
|
|
|
|
#endif |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
2238
|
|
|
|
|
|
|
extern __inline __m512d |
2239
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2240
|
|
|
|
|
|
|
_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) |
2241
|
|
|
|
|
|
|
{ |
2242
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
2243
|
|
|
|
|
|
|
(__v8df) __B, |
2244
|
|
|
|
|
|
|
(__v8df) |
2245
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2246
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2247
|
|
|
|
|
|
|
} |
2248
|
|
|
|
|
|
|
|
2249
|
|
|
|
|
|
|
extern __inline __m512d |
2250
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2251
|
|
|
|
|
|
|
_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2252
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2253
|
|
|
|
|
|
|
{ |
2254
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
2255
|
|
|
|
|
|
|
(__v8df) __B, |
2256
|
|
|
|
|
|
|
(__v8df) __W, |
2257
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2258
|
|
|
|
|
|
|
} |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
extern __inline __m512d |
2261
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2262
|
|
|
|
|
|
|
_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2263
|
|
|
|
|
|
|
const int __R) |
2264
|
|
|
|
|
|
|
{ |
2265
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
2266
|
|
|
|
|
|
|
(__v8df) __B, |
2267
|
|
|
|
|
|
|
(__v8df) |
2268
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2269
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2270
|
|
|
|
|
|
|
} |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
extern __inline __m512 |
2273
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2274
|
|
|
|
|
|
|
_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) |
2275
|
|
|
|
|
|
|
{ |
2276
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
2277
|
|
|
|
|
|
|
(__v16sf) __B, |
2278
|
|
|
|
|
|
|
(__v16sf) |
2279
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2280
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2281
|
|
|
|
|
|
|
} |
2282
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
extern __inline __m512 |
2284
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2285
|
|
|
|
|
|
|
_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2286
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2287
|
|
|
|
|
|
|
{ |
2288
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
2289
|
|
|
|
|
|
|
(__v16sf) __B, |
2290
|
|
|
|
|
|
|
(__v16sf) __W, |
2291
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2292
|
|
|
|
|
|
|
} |
2293
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
extern __inline __m512 |
2295
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2296
|
|
|
|
|
|
|
_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2297
|
|
|
|
|
|
|
{ |
2298
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
2299
|
|
|
|
|
|
|
(__v16sf) __B, |
2300
|
|
|
|
|
|
|
(__v16sf) |
2301
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2302
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2303
|
|
|
|
|
|
|
} |
2304
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
extern __inline __m512d |
2306
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2307
|
|
|
|
|
|
|
_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) |
2308
|
|
|
|
|
|
|
{ |
2309
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
2310
|
|
|
|
|
|
|
(__v8df) __V, |
2311
|
|
|
|
|
|
|
(__v8df) |
2312
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2313
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2314
|
|
|
|
|
|
|
} |
2315
|
|
|
|
|
|
|
|
2316
|
|
|
|
|
|
|
extern __inline __m512d |
2317
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2318
|
|
|
|
|
|
|
_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, |
2319
|
|
|
|
|
|
|
__m512d __V, const int __R) |
2320
|
|
|
|
|
|
|
{ |
2321
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
2322
|
|
|
|
|
|
|
(__v8df) __V, |
2323
|
|
|
|
|
|
|
(__v8df) __W, |
2324
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2325
|
|
|
|
|
|
|
} |
2326
|
|
|
|
|
|
|
|
2327
|
|
|
|
|
|
|
extern __inline __m512d |
2328
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2329
|
|
|
|
|
|
|
_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, |
2330
|
|
|
|
|
|
|
const int __R) |
2331
|
|
|
|
|
|
|
{ |
2332
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
2333
|
|
|
|
|
|
|
(__v8df) __V, |
2334
|
|
|
|
|
|
|
(__v8df) |
2335
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2336
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2337
|
|
|
|
|
|
|
} |
2338
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
extern __inline __m512 |
2340
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2341
|
|
|
|
|
|
|
_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) |
2342
|
|
|
|
|
|
|
{ |
2343
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
2344
|
|
|
|
|
|
|
(__v16sf) __B, |
2345
|
|
|
|
|
|
|
(__v16sf) |
2346
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2347
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2348
|
|
|
|
|
|
|
} |
2349
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
extern __inline __m512 |
2351
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2352
|
|
|
|
|
|
|
_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2353
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2354
|
|
|
|
|
|
|
{ |
2355
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
2356
|
|
|
|
|
|
|
(__v16sf) __B, |
2357
|
|
|
|
|
|
|
(__v16sf) __W, |
2358
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2359
|
|
|
|
|
|
|
} |
2360
|
|
|
|
|
|
|
|
2361
|
|
|
|
|
|
|
extern __inline __m512 |
2362
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2363
|
|
|
|
|
|
|
_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2364
|
|
|
|
|
|
|
{ |
2365
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
2366
|
|
|
|
|
|
|
(__v16sf) __B, |
2367
|
|
|
|
|
|
|
(__v16sf) |
2368
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2369
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2370
|
|
|
|
|
|
|
} |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
extern __inline __m128d |
2373
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2374
|
|
|
|
|
|
|
_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) |
2375
|
|
|
|
|
|
|
{ |
2376
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, |
2377
|
|
|
|
|
|
|
(__v2df) __B, |
2378
|
|
|
|
|
|
|
__R); |
2379
|
|
|
|
|
|
|
} |
2380
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
extern __inline __m128 |
2382
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2383
|
|
|
|
|
|
|
_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) |
2384
|
|
|
|
|
|
|
{ |
2385
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, |
2386
|
|
|
|
|
|
|
(__v4sf) __B, |
2387
|
|
|
|
|
|
|
__R); |
2388
|
|
|
|
|
|
|
} |
2389
|
|
|
|
|
|
|
|
2390
|
|
|
|
|
|
|
extern __inline __m128d |
2391
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2392
|
|
|
|
|
|
|
_mm_div_round_sd (__m128d __A, __m128d __B, const int __R) |
2393
|
|
|
|
|
|
|
{ |
2394
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, |
2395
|
|
|
|
|
|
|
(__v2df) __B, |
2396
|
|
|
|
|
|
|
__R); |
2397
|
|
|
|
|
|
|
} |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
extern __inline __m128 |
2400
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2401
|
|
|
|
|
|
|
_mm_div_round_ss (__m128 __A, __m128 __B, const int __R) |
2402
|
|
|
|
|
|
|
{ |
2403
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, |
2404
|
|
|
|
|
|
|
(__v4sf) __B, |
2405
|
|
|
|
|
|
|
__R); |
2406
|
|
|
|
|
|
|
} |
2407
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
#else |
2409
|
|
|
|
|
|
|
#define _mm512_mul_round_pd(A, B, C) \ |
2410
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
2411
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
#define _mm512_mask_mul_round_pd(W, U, A, B, C) \ |
2413
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) |
2414
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
#define _mm512_maskz_mul_round_pd(U, A, B, C) \ |
2416
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
#define _mm512_mul_round_ps(A, B, C) \ |
2419
|
|
|
|
|
|
|
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
2420
|
|
|
|
|
|
|
|
2421
|
|
|
|
|
|
|
#define _mm512_mask_mul_round_ps(W, U, A, B, C) \ |
2422
|
|
|
|
|
|
|
(__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) |
2423
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
#define _mm512_maskz_mul_round_ps(U, A, B, C) \ |
2425
|
|
|
|
|
|
|
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
2426
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
#define _mm512_div_round_pd(A, B, C) \ |
2428
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
2429
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
#define _mm512_mask_div_round_pd(W, U, A, B, C) \ |
2431
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) |
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
#define _mm512_maskz_div_round_pd(U, A, B, C) \ |
2434
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
2435
|
|
|
|
|
|
|
|
2436
|
|
|
|
|
|
|
#define _mm512_div_round_ps(A, B, C) \ |
2437
|
|
|
|
|
|
|
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
2438
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
#define _mm512_mask_div_round_ps(W, U, A, B, C) \ |
2440
|
|
|
|
|
|
|
(__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) |
2441
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
#define _mm512_maskz_div_round_ps(U, A, B, C) \ |
2443
|
|
|
|
|
|
|
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
#define _mm_mul_round_sd(A, B, C) \ |
2446
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_mulsd_round(A, B, C) |
2447
|
|
|
|
|
|
|
|
2448
|
|
|
|
|
|
|
#define _mm_mul_round_ss(A, B, C) \ |
2449
|
|
|
|
|
|
|
(__m128)__builtin_ia32_mulss_round(A, B, C) |
2450
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
#define _mm_div_round_sd(A, B, C) \ |
2452
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_divsd_round(A, B, C) |
2453
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
#define _mm_div_round_ss(A, B, C) \ |
2455
|
|
|
|
|
|
|
(__m128)__builtin_ia32_divss_round(A, B, C) |
2456
|
|
|
|
|
|
|
#endif |
2457
|
|
|
|
|
|
|
|
2458
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
2459
|
|
|
|
|
|
|
extern __inline __m512d |
2460
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2461
|
|
|
|
|
|
|
_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) |
2462
|
|
|
|
|
|
|
{ |
2463
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
2464
|
|
|
|
|
|
|
(__v8df) __B, |
2465
|
|
|
|
|
|
|
(__v8df) |
2466
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2467
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2468
|
|
|
|
|
|
|
} |
2469
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
extern __inline __m512d |
2471
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2472
|
|
|
|
|
|
|
_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2473
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2474
|
|
|
|
|
|
|
{ |
2475
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
2476
|
|
|
|
|
|
|
(__v8df) __B, |
2477
|
|
|
|
|
|
|
(__v8df) __W, |
2478
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2479
|
|
|
|
|
|
|
} |
2480
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
extern __inline __m512d |
2482
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2483
|
|
|
|
|
|
|
_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2484
|
|
|
|
|
|
|
const int __R) |
2485
|
|
|
|
|
|
|
{ |
2486
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
2487
|
|
|
|
|
|
|
(__v8df) __B, |
2488
|
|
|
|
|
|
|
(__v8df) |
2489
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2490
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2491
|
|
|
|
|
|
|
} |
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
extern __inline __m512 |
2494
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2495
|
|
|
|
|
|
|
_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) |
2496
|
|
|
|
|
|
|
{ |
2497
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
2498
|
|
|
|
|
|
|
(__v16sf) __B, |
2499
|
|
|
|
|
|
|
(__v16sf) |
2500
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2501
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2502
|
|
|
|
|
|
|
} |
2503
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
extern __inline __m512 |
2505
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2506
|
|
|
|
|
|
|
_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2507
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2508
|
|
|
|
|
|
|
{ |
2509
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
2510
|
|
|
|
|
|
|
(__v16sf) __B, |
2511
|
|
|
|
|
|
|
(__v16sf) __W, |
2512
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2513
|
|
|
|
|
|
|
} |
2514
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
extern __inline __m512 |
2516
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2517
|
|
|
|
|
|
|
_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2518
|
|
|
|
|
|
|
{ |
2519
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
2520
|
|
|
|
|
|
|
(__v16sf) __B, |
2521
|
|
|
|
|
|
|
(__v16sf) |
2522
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2523
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2524
|
|
|
|
|
|
|
} |
2525
|
|
|
|
|
|
|
|
2526
|
|
|
|
|
|
|
extern __inline __m512d |
2527
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2528
|
|
|
|
|
|
|
_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) |
2529
|
|
|
|
|
|
|
{ |
2530
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
2531
|
|
|
|
|
|
|
(__v8df) __B, |
2532
|
|
|
|
|
|
|
(__v8df) |
2533
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2534
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2535
|
|
|
|
|
|
|
} |
2536
|
|
|
|
|
|
|
|
2537
|
|
|
|
|
|
|
extern __inline __m512d |
2538
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2539
|
|
|
|
|
|
|
_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2540
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2541
|
|
|
|
|
|
|
{ |
2542
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
2543
|
|
|
|
|
|
|
(__v8df) __B, |
2544
|
|
|
|
|
|
|
(__v8df) __W, |
2545
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2546
|
|
|
|
|
|
|
} |
2547
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
extern __inline __m512d |
2549
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2550
|
|
|
|
|
|
|
_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2551
|
|
|
|
|
|
|
const int __R) |
2552
|
|
|
|
|
|
|
{ |
2553
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
2554
|
|
|
|
|
|
|
(__v8df) __B, |
2555
|
|
|
|
|
|
|
(__v8df) |
2556
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2557
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2558
|
|
|
|
|
|
|
} |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
extern __inline __m512 |
2561
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2562
|
|
|
|
|
|
|
_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) |
2563
|
|
|
|
|
|
|
{ |
2564
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
2565
|
|
|
|
|
|
|
(__v16sf) __B, |
2566
|
|
|
|
|
|
|
(__v16sf) |
2567
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2568
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2569
|
|
|
|
|
|
|
} |
2570
|
|
|
|
|
|
|
|
2571
|
|
|
|
|
|
|
extern __inline __m512 |
2572
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2573
|
|
|
|
|
|
|
_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2574
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2575
|
|
|
|
|
|
|
{ |
2576
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
2577
|
|
|
|
|
|
|
(__v16sf) __B, |
2578
|
|
|
|
|
|
|
(__v16sf) __W, |
2579
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2580
|
|
|
|
|
|
|
} |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
extern __inline __m512 |
2583
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2584
|
|
|
|
|
|
|
_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
2585
|
|
|
|
|
|
|
{ |
2586
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
2587
|
|
|
|
|
|
|
(__v16sf) __B, |
2588
|
|
|
|
|
|
|
(__v16sf) |
2589
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2590
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2591
|
|
|
|
|
|
|
} |
2592
|
|
|
|
|
|
|
#else |
2593
|
|
|
|
|
|
|
#define _mm512_max_round_pd(A, B, R) \ |
2594
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) |
2595
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
#define _mm512_mask_max_round_pd(W, U, A, B, R) \ |
2597
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) |
2598
|
|
|
|
|
|
|
|
2599
|
|
|
|
|
|
|
#define _mm512_maskz_max_round_pd(U, A, B, R) \ |
2600
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) |
2601
|
|
|
|
|
|
|
|
2602
|
|
|
|
|
|
|
#define _mm512_max_round_ps(A, B, R) \ |
2603
|
|
|
|
|
|
|
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
#define _mm512_mask_max_round_ps(W, U, A, B, R) \ |
2606
|
|
|
|
|
|
|
(__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) |
2607
|
|
|
|
|
|
|
|
2608
|
|
|
|
|
|
|
#define _mm512_maskz_max_round_ps(U, A, B, R) \ |
2609
|
|
|
|
|
|
|
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) |
2610
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
#define _mm512_min_round_pd(A, B, R) \ |
2612
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) |
2613
|
|
|
|
|
|
|
|
2614
|
|
|
|
|
|
|
#define _mm512_mask_min_round_pd(W, U, A, B, R) \ |
2615
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) |
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
#define _mm512_maskz_min_round_pd(U, A, B, R) \ |
2618
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) |
2619
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
#define _mm512_min_round_ps(A, B, R) \ |
2621
|
|
|
|
|
|
|
(__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) |
2622
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
#define _mm512_mask_min_round_ps(W, U, A, B, R) \ |
2624
|
|
|
|
|
|
|
(__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) |
2625
|
|
|
|
|
|
|
|
2626
|
|
|
|
|
|
|
#define _mm512_maskz_min_round_ps(U, A, B, R) \ |
2627
|
|
|
|
|
|
|
(__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) |
2628
|
|
|
|
|
|
|
#endif |
2629
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
2631
|
|
|
|
|
|
|
extern __inline __m512d |
2632
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2633
|
|
|
|
|
|
|
_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) |
2634
|
|
|
|
|
|
|
{ |
2635
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
2636
|
|
|
|
|
|
|
(__v8df) __B, |
2637
|
|
|
|
|
|
|
(__v8df) |
2638
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
2639
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2640
|
|
|
|
|
|
|
} |
2641
|
|
|
|
|
|
|
|
2642
|
|
|
|
|
|
|
extern __inline __m512d |
2643
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2644
|
|
|
|
|
|
|
_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
2645
|
|
|
|
|
|
|
__m512d __B, const int __R) |
2646
|
|
|
|
|
|
|
{ |
2647
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
2648
|
|
|
|
|
|
|
(__v8df) __B, |
2649
|
|
|
|
|
|
|
(__v8df) __W, |
2650
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2651
|
|
|
|
|
|
|
} |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
extern __inline __m512d |
2654
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2655
|
|
|
|
|
|
|
_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2656
|
|
|
|
|
|
|
const int __R) |
2657
|
|
|
|
|
|
|
{ |
2658
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
2659
|
|
|
|
|
|
|
(__v8df) __B, |
2660
|
|
|
|
|
|
|
(__v8df) |
2661
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
2662
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2663
|
|
|
|
|
|
|
} |
2664
|
|
|
|
|
|
|
|
2665
|
|
|
|
|
|
|
extern __inline __m512 |
2666
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2667
|
|
|
|
|
|
|
_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) |
2668
|
|
|
|
|
|
|
{ |
2669
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
2670
|
|
|
|
|
|
|
(__v16sf) __B, |
2671
|
|
|
|
|
|
|
(__v16sf) |
2672
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
2673
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2674
|
|
|
|
|
|
|
} |
2675
|
|
|
|
|
|
|
|
2676
|
|
|
|
|
|
|
extern __inline __m512 |
2677
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2678
|
|
|
|
|
|
|
_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
2679
|
|
|
|
|
|
|
__m512 __B, const int __R) |
2680
|
|
|
|
|
|
|
{ |
2681
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
2682
|
|
|
|
|
|
|
(__v16sf) __B, |
2683
|
|
|
|
|
|
|
(__v16sf) __W, |
2684
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2685
|
|
|
|
|
|
|
} |
2686
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
extern __inline __m512 |
2688
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2689
|
|
|
|
|
|
|
_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
2690
|
|
|
|
|
|
|
const int __R) |
2691
|
|
|
|
|
|
|
{ |
2692
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
2693
|
|
|
|
|
|
|
(__v16sf) __B, |
2694
|
|
|
|
|
|
|
(__v16sf) |
2695
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
2696
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2697
|
|
|
|
|
|
|
} |
2698
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
extern __inline __m128d |
2700
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2701
|
|
|
|
|
|
|
_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) |
2702
|
|
|
|
|
|
|
{ |
2703
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, |
2704
|
|
|
|
|
|
|
(__v2df) __B, |
2705
|
|
|
|
|
|
|
__R); |
2706
|
|
|
|
|
|
|
} |
2707
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
extern __inline __m128 |
2709
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2710
|
|
|
|
|
|
|
_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) |
2711
|
|
|
|
|
|
|
{ |
2712
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, |
2713
|
|
|
|
|
|
|
(__v4sf) __B, |
2714
|
|
|
|
|
|
|
__R); |
2715
|
|
|
|
|
|
|
} |
2716
|
|
|
|
|
|
|
#else |
2717
|
|
|
|
|
|
|
#define _mm512_scalef_round_pd(A, B, C) \ |
2718
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
2719
|
|
|
|
|
|
|
|
2720
|
|
|
|
|
|
|
#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ |
2721
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) |
2722
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ |
2724
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
2725
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
#define _mm512_scalef_round_ps(A, B, C) \ |
2727
|
|
|
|
|
|
|
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
2728
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ |
2730
|
|
|
|
|
|
|
(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) |
2731
|
|
|
|
|
|
|
|
2732
|
|
|
|
|
|
|
#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ |
2733
|
|
|
|
|
|
|
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
#define _mm_scalef_round_sd(A, B, C) \ |
2736
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_scalefsd_round(A, B, C) |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
#define _mm_scalef_round_ss(A, B, C) \ |
2739
|
|
|
|
|
|
|
(__m128)__builtin_ia32_scalefss_round(A, B, C) |
2740
|
|
|
|
|
|
|
#endif |
2741
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
2743
|
|
|
|
|
|
|
extern __inline __m512d |
2744
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2745
|
|
|
|
|
|
|
_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
2746
|
|
|
|
|
|
|
{ |
2747
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
2748
|
|
|
|
|
|
|
(__v8df) __B, |
2749
|
|
|
|
|
|
|
(__v8df) __C, |
2750
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2751
|
|
|
|
|
|
|
} |
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
extern __inline __m512d |
2754
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2755
|
|
|
|
|
|
|
_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
2756
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2757
|
|
|
|
|
|
|
{ |
2758
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
2759
|
|
|
|
|
|
|
(__v8df) __B, |
2760
|
|
|
|
|
|
|
(__v8df) __C, |
2761
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2762
|
|
|
|
|
|
|
} |
2763
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
extern __inline __m512d |
2765
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2766
|
|
|
|
|
|
|
_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, |
2767
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
2768
|
|
|
|
|
|
|
{ |
2769
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, |
2770
|
|
|
|
|
|
|
(__v8df) __B, |
2771
|
|
|
|
|
|
|
(__v8df) __C, |
2772
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2773
|
|
|
|
|
|
|
} |
2774
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
extern __inline __m512d |
2776
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2777
|
|
|
|
|
|
|
_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2778
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2779
|
|
|
|
|
|
|
{ |
2780
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
2781
|
|
|
|
|
|
|
(__v8df) __B, |
2782
|
|
|
|
|
|
|
(__v8df) __C, |
2783
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2784
|
|
|
|
|
|
|
} |
2785
|
|
|
|
|
|
|
|
2786
|
|
|
|
|
|
|
extern __inline __m512 |
2787
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2788
|
|
|
|
|
|
|
_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
2789
|
|
|
|
|
|
|
{ |
2790
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
2791
|
|
|
|
|
|
|
(__v16sf) __B, |
2792
|
|
|
|
|
|
|
(__v16sf) __C, |
2793
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2794
|
|
|
|
|
|
|
} |
2795
|
|
|
|
|
|
|
|
2796
|
|
|
|
|
|
|
extern __inline __m512 |
2797
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2798
|
|
|
|
|
|
|
_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
2799
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2800
|
|
|
|
|
|
|
{ |
2801
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
2802
|
|
|
|
|
|
|
(__v16sf) __B, |
2803
|
|
|
|
|
|
|
(__v16sf) __C, |
2804
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2805
|
|
|
|
|
|
|
} |
2806
|
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
extern __inline __m512 |
2808
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2809
|
|
|
|
|
|
|
_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, |
2810
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
2811
|
|
|
|
|
|
|
{ |
2812
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, |
2813
|
|
|
|
|
|
|
(__v16sf) __B, |
2814
|
|
|
|
|
|
|
(__v16sf) __C, |
2815
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2816
|
|
|
|
|
|
|
} |
2817
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
extern __inline __m512 |
2819
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2820
|
|
|
|
|
|
|
_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
2821
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2822
|
|
|
|
|
|
|
{ |
2823
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
2824
|
|
|
|
|
|
|
(__v16sf) __B, |
2825
|
|
|
|
|
|
|
(__v16sf) __C, |
2826
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2827
|
|
|
|
|
|
|
} |
2828
|
|
|
|
|
|
|
|
2829
|
|
|
|
|
|
|
extern __inline __m512d |
2830
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2831
|
|
|
|
|
|
|
_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
2832
|
|
|
|
|
|
|
{ |
2833
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
2834
|
|
|
|
|
|
|
(__v8df) __B, |
2835
|
|
|
|
|
|
|
-(__v8df) __C, |
2836
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2837
|
|
|
|
|
|
|
} |
2838
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
extern __inline __m512d |
2840
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2841
|
|
|
|
|
|
|
_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
2842
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2843
|
|
|
|
|
|
|
{ |
2844
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
2845
|
|
|
|
|
|
|
(__v8df) __B, |
2846
|
|
|
|
|
|
|
-(__v8df) __C, |
2847
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2848
|
|
|
|
|
|
|
} |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
extern __inline __m512d |
2851
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2852
|
|
|
|
|
|
|
_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, |
2853
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
2854
|
|
|
|
|
|
|
{ |
2855
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, |
2856
|
|
|
|
|
|
|
(__v8df) __B, |
2857
|
|
|
|
|
|
|
(__v8df) __C, |
2858
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2859
|
|
|
|
|
|
|
} |
2860
|
|
|
|
|
|
|
|
2861
|
|
|
|
|
|
|
extern __inline __m512d |
2862
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2863
|
|
|
|
|
|
|
_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2864
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2865
|
|
|
|
|
|
|
{ |
2866
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
2867
|
|
|
|
|
|
|
(__v8df) __B, |
2868
|
|
|
|
|
|
|
-(__v8df) __C, |
2869
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2870
|
|
|
|
|
|
|
} |
2871
|
|
|
|
|
|
|
|
2872
|
|
|
|
|
|
|
extern __inline __m512 |
2873
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2874
|
|
|
|
|
|
|
_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
2875
|
|
|
|
|
|
|
{ |
2876
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
2877
|
|
|
|
|
|
|
(__v16sf) __B, |
2878
|
|
|
|
|
|
|
-(__v16sf) __C, |
2879
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2880
|
|
|
|
|
|
|
} |
2881
|
|
|
|
|
|
|
|
2882
|
|
|
|
|
|
|
extern __inline __m512 |
2883
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2884
|
|
|
|
|
|
|
_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
2885
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2886
|
|
|
|
|
|
|
{ |
2887
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
2888
|
|
|
|
|
|
|
(__v16sf) __B, |
2889
|
|
|
|
|
|
|
-(__v16sf) __C, |
2890
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2891
|
|
|
|
|
|
|
} |
2892
|
|
|
|
|
|
|
|
2893
|
|
|
|
|
|
|
extern __inline __m512 |
2894
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2895
|
|
|
|
|
|
|
_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, |
2896
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
2897
|
|
|
|
|
|
|
{ |
2898
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, |
2899
|
|
|
|
|
|
|
(__v16sf) __B, |
2900
|
|
|
|
|
|
|
(__v16sf) __C, |
2901
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2902
|
|
|
|
|
|
|
} |
2903
|
|
|
|
|
|
|
|
2904
|
|
|
|
|
|
|
extern __inline __m512 |
2905
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2906
|
|
|
|
|
|
|
_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
2907
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2908
|
|
|
|
|
|
|
{ |
2909
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
2910
|
|
|
|
|
|
|
(__v16sf) __B, |
2911
|
|
|
|
|
|
|
-(__v16sf) __C, |
2912
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2913
|
|
|
|
|
|
|
} |
2914
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
extern __inline __m512d |
2916
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2917
|
|
|
|
|
|
|
_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
2918
|
|
|
|
|
|
|
{ |
2919
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
2920
|
|
|
|
|
|
|
(__v8df) __B, |
2921
|
|
|
|
|
|
|
(__v8df) __C, |
2922
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
2923
|
|
|
|
|
|
|
} |
2924
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
extern __inline __m512d |
2926
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2927
|
|
|
|
|
|
|
_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
2928
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2929
|
|
|
|
|
|
|
{ |
2930
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
2931
|
|
|
|
|
|
|
(__v8df) __B, |
2932
|
|
|
|
|
|
|
(__v8df) __C, |
2933
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2934
|
|
|
|
|
|
|
} |
2935
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
extern __inline __m512d |
2937
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2938
|
|
|
|
|
|
|
_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, |
2939
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
2940
|
|
|
|
|
|
|
{ |
2941
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, |
2942
|
|
|
|
|
|
|
(__v8df) __B, |
2943
|
|
|
|
|
|
|
(__v8df) __C, |
2944
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2945
|
|
|
|
|
|
|
} |
2946
|
|
|
|
|
|
|
|
2947
|
|
|
|
|
|
|
extern __inline __m512d |
2948
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2949
|
|
|
|
|
|
|
_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
2950
|
|
|
|
|
|
|
__m512d __C, const int __R) |
2951
|
|
|
|
|
|
|
{ |
2952
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
2953
|
|
|
|
|
|
|
(__v8df) __B, |
2954
|
|
|
|
|
|
|
(__v8df) __C, |
2955
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
2956
|
|
|
|
|
|
|
} |
2957
|
|
|
|
|
|
|
|
2958
|
|
|
|
|
|
|
extern __inline __m512 |
2959
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2960
|
|
|
|
|
|
|
_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
2961
|
|
|
|
|
|
|
{ |
2962
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
2963
|
|
|
|
|
|
|
(__v16sf) __B, |
2964
|
|
|
|
|
|
|
(__v16sf) __C, |
2965
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
2966
|
|
|
|
|
|
|
} |
2967
|
|
|
|
|
|
|
|
2968
|
|
|
|
|
|
|
extern __inline __m512 |
2969
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2970
|
|
|
|
|
|
|
_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
2971
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2972
|
|
|
|
|
|
|
{ |
2973
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
2974
|
|
|
|
|
|
|
(__v16sf) __B, |
2975
|
|
|
|
|
|
|
(__v16sf) __C, |
2976
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2977
|
|
|
|
|
|
|
} |
2978
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
extern __inline __m512 |
2980
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2981
|
|
|
|
|
|
|
_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, |
2982
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
2983
|
|
|
|
|
|
|
{ |
2984
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, |
2985
|
|
|
|
|
|
|
(__v16sf) __B, |
2986
|
|
|
|
|
|
|
(__v16sf) __C, |
2987
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2988
|
|
|
|
|
|
|
} |
2989
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
extern __inline __m512 |
2991
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
2992
|
|
|
|
|
|
|
_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
2993
|
|
|
|
|
|
|
__m512 __C, const int __R) |
2994
|
|
|
|
|
|
|
{ |
2995
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
2996
|
|
|
|
|
|
|
(__v16sf) __B, |
2997
|
|
|
|
|
|
|
(__v16sf) __C, |
2998
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
2999
|
|
|
|
|
|
|
} |
3000
|
|
|
|
|
|
|
|
3001
|
|
|
|
|
|
|
extern __inline __m512d |
3002
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3003
|
|
|
|
|
|
|
_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
3004
|
|
|
|
|
|
|
{ |
3005
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
3006
|
|
|
|
|
|
|
(__v8df) __B, |
3007
|
|
|
|
|
|
|
-(__v8df) __C, |
3008
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
3009
|
|
|
|
|
|
|
} |
3010
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
extern __inline __m512d |
3012
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3013
|
|
|
|
|
|
|
_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
3014
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3015
|
|
|
|
|
|
|
{ |
3016
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
3017
|
|
|
|
|
|
|
(__v8df) __B, |
3018
|
|
|
|
|
|
|
-(__v8df) __C, |
3019
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3020
|
|
|
|
|
|
|
} |
3021
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
extern __inline __m512d |
3023
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3024
|
|
|
|
|
|
|
_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, |
3025
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
3026
|
|
|
|
|
|
|
{ |
3027
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, |
3028
|
|
|
|
|
|
|
(__v8df) __B, |
3029
|
|
|
|
|
|
|
(__v8df) __C, |
3030
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3031
|
|
|
|
|
|
|
} |
3032
|
|
|
|
|
|
|
|
3033
|
|
|
|
|
|
|
extern __inline __m512d |
3034
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3035
|
|
|
|
|
|
|
_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
3036
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3037
|
|
|
|
|
|
|
{ |
3038
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
3039
|
|
|
|
|
|
|
(__v8df) __B, |
3040
|
|
|
|
|
|
|
-(__v8df) __C, |
3041
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3042
|
|
|
|
|
|
|
} |
3043
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
extern __inline __m512 |
3045
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3046
|
|
|
|
|
|
|
_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
3047
|
|
|
|
|
|
|
{ |
3048
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
3049
|
|
|
|
|
|
|
(__v16sf) __B, |
3050
|
|
|
|
|
|
|
-(__v16sf) __C, |
3051
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
3052
|
|
|
|
|
|
|
} |
3053
|
|
|
|
|
|
|
|
3054
|
|
|
|
|
|
|
extern __inline __m512 |
3055
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3056
|
|
|
|
|
|
|
_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
3057
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3058
|
|
|
|
|
|
|
{ |
3059
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
3060
|
|
|
|
|
|
|
(__v16sf) __B, |
3061
|
|
|
|
|
|
|
-(__v16sf) __C, |
3062
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3063
|
|
|
|
|
|
|
} |
3064
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
extern __inline __m512 |
3066
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3067
|
|
|
|
|
|
|
_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, |
3068
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
3069
|
|
|
|
|
|
|
{ |
3070
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, |
3071
|
|
|
|
|
|
|
(__v16sf) __B, |
3072
|
|
|
|
|
|
|
(__v16sf) __C, |
3073
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3074
|
|
|
|
|
|
|
} |
3075
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
extern __inline __m512 |
3077
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3078
|
|
|
|
|
|
|
_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
3079
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3080
|
|
|
|
|
|
|
{ |
3081
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
3082
|
|
|
|
|
|
|
(__v16sf) __B, |
3083
|
|
|
|
|
|
|
-(__v16sf) __C, |
3084
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3085
|
|
|
|
|
|
|
} |
3086
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
extern __inline __m512d |
3088
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3089
|
|
|
|
|
|
|
_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
3090
|
|
|
|
|
|
|
{ |
3091
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, |
3092
|
|
|
|
|
|
|
(__v8df) __B, |
3093
|
|
|
|
|
|
|
(__v8df) __C, |
3094
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
3095
|
|
|
|
|
|
|
} |
3096
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
extern __inline __m512d |
3098
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3099
|
|
|
|
|
|
|
_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
3100
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3101
|
|
|
|
|
|
|
{ |
3102
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, |
3103
|
|
|
|
|
|
|
(__v8df) __B, |
3104
|
|
|
|
|
|
|
(__v8df) __C, |
3105
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3106
|
|
|
|
|
|
|
} |
3107
|
|
|
|
|
|
|
|
3108
|
|
|
|
|
|
|
extern __inline __m512d |
3109
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3110
|
|
|
|
|
|
|
_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, |
3111
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
3112
|
|
|
|
|
|
|
{ |
3113
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, |
3114
|
|
|
|
|
|
|
(__v8df) __B, |
3115
|
|
|
|
|
|
|
(__v8df) __C, |
3116
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3117
|
|
|
|
|
|
|
} |
3118
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
extern __inline __m512d |
3120
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3121
|
|
|
|
|
|
|
_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
3122
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3123
|
|
|
|
|
|
|
{ |
3124
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
3125
|
|
|
|
|
|
|
(__v8df) __B, |
3126
|
|
|
|
|
|
|
(__v8df) __C, |
3127
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3128
|
|
|
|
|
|
|
} |
3129
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
extern __inline __m512 |
3131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3132
|
|
|
|
|
|
|
_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
3133
|
|
|
|
|
|
|
{ |
3134
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, |
3135
|
|
|
|
|
|
|
(__v16sf) __B, |
3136
|
|
|
|
|
|
|
(__v16sf) __C, |
3137
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
3138
|
|
|
|
|
|
|
} |
3139
|
|
|
|
|
|
|
|
3140
|
|
|
|
|
|
|
extern __inline __m512 |
3141
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3142
|
|
|
|
|
|
|
_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
3143
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3144
|
|
|
|
|
|
|
{ |
3145
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, |
3146
|
|
|
|
|
|
|
(__v16sf) __B, |
3147
|
|
|
|
|
|
|
(__v16sf) __C, |
3148
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3149
|
|
|
|
|
|
|
} |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
extern __inline __m512 |
3152
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3153
|
|
|
|
|
|
|
_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, |
3154
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
3155
|
|
|
|
|
|
|
{ |
3156
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, |
3157
|
|
|
|
|
|
|
(__v16sf) __B, |
3158
|
|
|
|
|
|
|
(__v16sf) __C, |
3159
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3160
|
|
|
|
|
|
|
} |
3161
|
|
|
|
|
|
|
|
3162
|
|
|
|
|
|
|
extern __inline __m512 |
3163
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3164
|
|
|
|
|
|
|
_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
3165
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3166
|
|
|
|
|
|
|
{ |
3167
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
3168
|
|
|
|
|
|
|
(__v16sf) __B, |
3169
|
|
|
|
|
|
|
(__v16sf) __C, |
3170
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3171
|
|
|
|
|
|
|
} |
3172
|
|
|
|
|
|
|
|
3173
|
|
|
|
|
|
|
extern __inline __m512d |
3174
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3175
|
|
|
|
|
|
|
_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
3176
|
|
|
|
|
|
|
{ |
3177
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, |
3178
|
|
|
|
|
|
|
(__v8df) __B, |
3179
|
|
|
|
|
|
|
-(__v8df) __C, |
3180
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
3181
|
|
|
|
|
|
|
} |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
extern __inline __m512d |
3184
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3185
|
|
|
|
|
|
|
_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
3186
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3187
|
|
|
|
|
|
|
{ |
3188
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, |
3189
|
|
|
|
|
|
|
(__v8df) __B, |
3190
|
|
|
|
|
|
|
(__v8df) __C, |
3191
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3192
|
|
|
|
|
|
|
} |
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
extern __inline __m512d |
3195
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3196
|
|
|
|
|
|
|
_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, |
3197
|
|
|
|
|
|
|
__mmask8 __U, const int __R) |
3198
|
|
|
|
|
|
|
{ |
3199
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, |
3200
|
|
|
|
|
|
|
(__v8df) __B, |
3201
|
|
|
|
|
|
|
(__v8df) __C, |
3202
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3203
|
|
|
|
|
|
|
} |
3204
|
|
|
|
|
|
|
|
3205
|
|
|
|
|
|
|
extern __inline __m512d |
3206
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3207
|
|
|
|
|
|
|
_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
3208
|
|
|
|
|
|
|
__m512d __C, const int __R) |
3209
|
|
|
|
|
|
|
{ |
3210
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
3211
|
|
|
|
|
|
|
(__v8df) __B, |
3212
|
|
|
|
|
|
|
-(__v8df) __C, |
3213
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
3214
|
|
|
|
|
|
|
} |
3215
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
extern __inline __m512 |
3217
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3218
|
|
|
|
|
|
|
_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
3219
|
|
|
|
|
|
|
{ |
3220
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, |
3221
|
|
|
|
|
|
|
(__v16sf) __B, |
3222
|
|
|
|
|
|
|
-(__v16sf) __C, |
3223
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
3224
|
|
|
|
|
|
|
} |
3225
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
extern __inline __m512 |
3227
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3228
|
|
|
|
|
|
|
_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
3229
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3230
|
|
|
|
|
|
|
{ |
3231
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, |
3232
|
|
|
|
|
|
|
(__v16sf) __B, |
3233
|
|
|
|
|
|
|
(__v16sf) __C, |
3234
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3235
|
|
|
|
|
|
|
} |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
extern __inline __m512 |
3238
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3239
|
|
|
|
|
|
|
_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, |
3240
|
|
|
|
|
|
|
__mmask16 __U, const int __R) |
3241
|
|
|
|
|
|
|
{ |
3242
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, |
3243
|
|
|
|
|
|
|
(__v16sf) __B, |
3244
|
|
|
|
|
|
|
(__v16sf) __C, |
3245
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3246
|
|
|
|
|
|
|
} |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
extern __inline __m512 |
3249
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3250
|
|
|
|
|
|
|
_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
3251
|
|
|
|
|
|
|
__m512 __C, const int __R) |
3252
|
|
|
|
|
|
|
{ |
3253
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
3254
|
|
|
|
|
|
|
(__v16sf) __B, |
3255
|
|
|
|
|
|
|
-(__v16sf) __C, |
3256
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
3257
|
|
|
|
|
|
|
} |
3258
|
|
|
|
|
|
|
#else |
3259
|
|
|
|
|
|
|
#define _mm512_fmadd_round_pd(A, B, C, R) \ |
3260
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) |
3261
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ |
3263
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) |
3264
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ |
3266
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) |
3267
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ |
3269
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) |
3270
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
#define _mm512_fmadd_round_ps(A, B, C, R) \ |
3272
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) |
3273
|
|
|
|
|
|
|
|
3274
|
|
|
|
|
|
|
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ |
3275
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) |
3276
|
|
|
|
|
|
|
|
3277
|
|
|
|
|
|
|
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ |
3278
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) |
3279
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ |
3281
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) |
3282
|
|
|
|
|
|
|
|
3283
|
|
|
|
|
|
|
#define _mm512_fmsub_round_pd(A, B, C, R) \ |
3284
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) |
3285
|
|
|
|
|
|
|
|
3286
|
|
|
|
|
|
|
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ |
3287
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) |
3288
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ |
3290
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) |
3291
|
|
|
|
|
|
|
|
3292
|
|
|
|
|
|
|
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ |
3293
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) |
3294
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
#define _mm512_fmsub_round_ps(A, B, C, R) \ |
3296
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) |
3297
|
|
|
|
|
|
|
|
3298
|
|
|
|
|
|
|
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ |
3299
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) |
3300
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ |
3302
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) |
3303
|
|
|
|
|
|
|
|
3304
|
|
|
|
|
|
|
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ |
3305
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) |
3306
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
#define _mm512_fmaddsub_round_pd(A, B, C, R) \ |
3308
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) |
3309
|
|
|
|
|
|
|
|
3310
|
|
|
|
|
|
|
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ |
3311
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) |
3312
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ |
3314
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) |
3315
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ |
3317
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) |
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
#define _mm512_fmaddsub_round_ps(A, B, C, R) \ |
3320
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) |
3321
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ |
3323
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) |
3324
|
|
|
|
|
|
|
|
3325
|
|
|
|
|
|
|
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ |
3326
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) |
3327
|
|
|
|
|
|
|
|
3328
|
|
|
|
|
|
|
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ |
3329
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) |
3330
|
|
|
|
|
|
|
|
3331
|
|
|
|
|
|
|
#define _mm512_fmsubadd_round_pd(A, B, C, R) \ |
3332
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) |
3333
|
|
|
|
|
|
|
|
3334
|
|
|
|
|
|
|
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ |
3335
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) |
3336
|
|
|
|
|
|
|
|
3337
|
|
|
|
|
|
|
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ |
3338
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) |
3339
|
|
|
|
|
|
|
|
3340
|
|
|
|
|
|
|
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ |
3341
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) |
3342
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
#define _mm512_fmsubadd_round_ps(A, B, C, R) \ |
3344
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) |
3345
|
|
|
|
|
|
|
|
3346
|
|
|
|
|
|
|
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ |
3347
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) |
3348
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ |
3350
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) |
3351
|
|
|
|
|
|
|
|
3352
|
|
|
|
|
|
|
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ |
3353
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) |
3354
|
|
|
|
|
|
|
|
3355
|
|
|
|
|
|
|
#define _mm512_fnmadd_round_pd(A, B, C, R) \ |
3356
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) |
3357
|
|
|
|
|
|
|
|
3358
|
|
|
|
|
|
|
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ |
3359
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ |
3362
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) |
3363
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ |
3365
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) |
3366
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
#define _mm512_fnmadd_round_ps(A, B, C, R) \ |
3368
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) |
3369
|
|
|
|
|
|
|
|
3370
|
|
|
|
|
|
|
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ |
3371
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) |
3372
|
|
|
|
|
|
|
|
3373
|
|
|
|
|
|
|
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ |
3374
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) |
3375
|
|
|
|
|
|
|
|
3376
|
|
|
|
|
|
|
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ |
3377
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) |
3378
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
#define _mm512_fnmsub_round_pd(A, B, C, R) \ |
3380
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) |
3381
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ |
3383
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) |
3384
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ |
3386
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) |
3387
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ |
3389
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) |
3390
|
|
|
|
|
|
|
|
3391
|
|
|
|
|
|
|
#define _mm512_fnmsub_round_ps(A, B, C, R) \ |
3392
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) |
3393
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ |
3395
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) |
3396
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ |
3398
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) |
3399
|
|
|
|
|
|
|
|
3400
|
|
|
|
|
|
|
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ |
3401
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) |
3402
|
|
|
|
|
|
|
#endif |
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
extern __inline __m512i |
3405
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3406
|
|
|
|
|
|
|
_mm512_abs_epi64 (__m512i __A) |
3407
|
|
|
|
|
|
|
{ |
3408
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, |
3409
|
|
|
|
|
|
|
(__v8di) |
3410
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3411
|
|
|
|
|
|
|
(__mmask8) -1); |
3412
|
|
|
|
|
|
|
} |
3413
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
extern __inline __m512i |
3415
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3416
|
|
|
|
|
|
|
_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
3417
|
|
|
|
|
|
|
{ |
3418
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, |
3419
|
|
|
|
|
|
|
(__v8di) __W, |
3420
|
|
|
|
|
|
|
(__mmask8) __U); |
3421
|
|
|
|
|
|
|
} |
3422
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
extern __inline __m512i |
3424
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3425
|
|
|
|
|
|
|
_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) |
3426
|
|
|
|
|
|
|
{ |
3427
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, |
3428
|
|
|
|
|
|
|
(__v8di) |
3429
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3430
|
|
|
|
|
|
|
(__mmask8) __U); |
3431
|
|
|
|
|
|
|
} |
3432
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
extern __inline __m512i |
3434
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3435
|
|
|
|
|
|
|
_mm512_abs_epi32 (__m512i __A) |
3436
|
|
|
|
|
|
|
{ |
3437
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, |
3438
|
|
|
|
|
|
|
(__v16si) |
3439
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3440
|
|
|
|
|
|
|
(__mmask16) -1); |
3441
|
|
|
|
|
|
|
} |
3442
|
|
|
|
|
|
|
|
3443
|
|
|
|
|
|
|
extern __inline __m512i |
3444
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3445
|
|
|
|
|
|
|
_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
3446
|
|
|
|
|
|
|
{ |
3447
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, |
3448
|
|
|
|
|
|
|
(__v16si) __W, |
3449
|
|
|
|
|
|
|
(__mmask16) __U); |
3450
|
|
|
|
|
|
|
} |
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
extern __inline __m512i |
3453
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3454
|
|
|
|
|
|
|
_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) |
3455
|
|
|
|
|
|
|
{ |
3456
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, |
3457
|
|
|
|
|
|
|
(__v16si) |
3458
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3459
|
|
|
|
|
|
|
(__mmask16) __U); |
3460
|
|
|
|
|
|
|
} |
3461
|
|
|
|
|
|
|
|
3462
|
|
|
|
|
|
|
extern __inline __m512 |
3463
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3464
|
|
|
|
|
|
|
_mm512_broadcastss_ps (__m128 __A) |
3465
|
|
|
|
|
|
|
{ |
3466
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, |
3467
|
|
|
|
|
|
|
(__v16sf) |
3468
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
3469
|
|
|
|
|
|
|
(__mmask16) -1); |
3470
|
|
|
|
|
|
|
} |
3471
|
|
|
|
|
|
|
|
3472
|
|
|
|
|
|
|
extern __inline __m512 |
3473
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3474
|
|
|
|
|
|
|
_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) |
3475
|
|
|
|
|
|
|
{ |
3476
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, |
3477
|
|
|
|
|
|
|
(__v16sf) __O, __M); |
3478
|
|
|
|
|
|
|
} |
3479
|
|
|
|
|
|
|
|
3480
|
|
|
|
|
|
|
extern __inline __m512 |
3481
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3482
|
|
|
|
|
|
|
_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) |
3483
|
|
|
|
|
|
|
{ |
3484
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, |
3485
|
|
|
|
|
|
|
(__v16sf) |
3486
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
3487
|
|
|
|
|
|
|
__M); |
3488
|
|
|
|
|
|
|
} |
3489
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
extern __inline __m512d |
3491
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3492
|
|
|
|
|
|
|
_mm512_broadcastsd_pd (__m128d __A) |
3493
|
|
|
|
|
|
|
{ |
3494
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, |
3495
|
|
|
|
|
|
|
(__v8df) |
3496
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
3497
|
|
|
|
|
|
|
(__mmask8) -1); |
3498
|
|
|
|
|
|
|
} |
3499
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
extern __inline __m512d |
3501
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3502
|
|
|
|
|
|
|
_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) |
3503
|
|
|
|
|
|
|
{ |
3504
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, |
3505
|
|
|
|
|
|
|
(__v8df) __O, __M); |
3506
|
|
|
|
|
|
|
} |
3507
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
extern __inline __m512d |
3509
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3510
|
|
|
|
|
|
|
_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) |
3511
|
|
|
|
|
|
|
{ |
3512
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, |
3513
|
|
|
|
|
|
|
(__v8df) |
3514
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
3515
|
|
|
|
|
|
|
__M); |
3516
|
|
|
|
|
|
|
} |
3517
|
|
|
|
|
|
|
|
3518
|
|
|
|
|
|
|
extern __inline __m512i |
3519
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3520
|
|
|
|
|
|
|
_mm512_broadcastd_epi32 (__m128i __A) |
3521
|
|
|
|
|
|
|
{ |
3522
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, |
3523
|
|
|
|
|
|
|
(__v16si) |
3524
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3525
|
|
|
|
|
|
|
(__mmask16) -1); |
3526
|
|
|
|
|
|
|
} |
3527
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
extern __inline __m512i |
3529
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3530
|
|
|
|
|
|
|
_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) |
3531
|
|
|
|
|
|
|
{ |
3532
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, |
3533
|
|
|
|
|
|
|
(__v16si) __O, __M); |
3534
|
|
|
|
|
|
|
} |
3535
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
extern __inline __m512i |
3537
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3538
|
|
|
|
|
|
|
_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) |
3539
|
|
|
|
|
|
|
{ |
3540
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, |
3541
|
|
|
|
|
|
|
(__v16si) |
3542
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3543
|
|
|
|
|
|
|
__M); |
3544
|
|
|
|
|
|
|
} |
3545
|
|
|
|
|
|
|
|
3546
|
|
|
|
|
|
|
extern __inline __m512i |
3547
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3548
|
|
|
|
|
|
|
_mm512_set1_epi32 (int __A) |
3549
|
|
|
|
|
|
|
{ |
3550
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, |
3551
|
|
|
|
|
|
|
(__v16si) |
3552
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3553
|
|
|
|
|
|
|
(__mmask16)(-1)); |
3554
|
|
|
|
|
|
|
} |
3555
|
|
|
|
|
|
|
|
3556
|
|
|
|
|
|
|
extern __inline __m512i |
3557
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3558
|
|
|
|
|
|
|
_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) |
3559
|
|
|
|
|
|
|
{ |
3560
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, |
3561
|
|
|
|
|
|
|
__M); |
3562
|
|
|
|
|
|
|
} |
3563
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
extern __inline __m512i |
3565
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3566
|
|
|
|
|
|
|
_mm512_maskz_set1_epi32 (__mmask16 __M, int __A) |
3567
|
|
|
|
|
|
|
{ |
3568
|
|
|
|
|
|
|
return (__m512i) |
3569
|
|
|
|
|
|
|
__builtin_ia32_pbroadcastd512_gpr_mask (__A, |
3570
|
|
|
|
|
|
|
(__v16si) _mm512_setzero_si512 (), |
3571
|
|
|
|
|
|
|
__M); |
3572
|
|
|
|
|
|
|
} |
3573
|
|
|
|
|
|
|
|
3574
|
|
|
|
|
|
|
extern __inline __m512i |
3575
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3576
|
|
|
|
|
|
|
_mm512_broadcastq_epi64 (__m128i __A) |
3577
|
|
|
|
|
|
|
{ |
3578
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, |
3579
|
|
|
|
|
|
|
(__v8di) |
3580
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3581
|
|
|
|
|
|
|
(__mmask8) -1); |
3582
|
|
|
|
|
|
|
} |
3583
|
|
|
|
|
|
|
|
3584
|
|
|
|
|
|
|
extern __inline __m512i |
3585
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3586
|
|
|
|
|
|
|
_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) |
3587
|
|
|
|
|
|
|
{ |
3588
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, |
3589
|
|
|
|
|
|
|
(__v8di) __O, __M); |
3590
|
|
|
|
|
|
|
} |
3591
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
extern __inline __m512i |
3593
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3594
|
|
|
|
|
|
|
_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) |
3595
|
|
|
|
|
|
|
{ |
3596
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, |
3597
|
|
|
|
|
|
|
(__v8di) |
3598
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3599
|
|
|
|
|
|
|
__M); |
3600
|
|
|
|
|
|
|
} |
3601
|
|
|
|
|
|
|
|
3602
|
|
|
|
|
|
|
extern __inline __m512i |
3603
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3604
|
|
|
|
|
|
|
_mm512_set1_epi64 (long long __A) |
3605
|
|
|
|
|
|
|
{ |
3606
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, |
3607
|
|
|
|
|
|
|
(__v8di) |
3608
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3609
|
|
|
|
|
|
|
(__mmask8)(-1)); |
3610
|
|
|
|
|
|
|
} |
3611
|
|
|
|
|
|
|
|
3612
|
|
|
|
|
|
|
extern __inline __m512i |
3613
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3614
|
|
|
|
|
|
|
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) |
3615
|
|
|
|
|
|
|
{ |
3616
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, |
3617
|
|
|
|
|
|
|
__M); |
3618
|
|
|
|
|
|
|
} |
3619
|
|
|
|
|
|
|
|
3620
|
|
|
|
|
|
|
extern __inline __m512i |
3621
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3622
|
|
|
|
|
|
|
_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) |
3623
|
|
|
|
|
|
|
{ |
3624
|
|
|
|
|
|
|
return (__m512i) |
3625
|
|
|
|
|
|
|
__builtin_ia32_pbroadcastq512_gpr_mask (__A, |
3626
|
|
|
|
|
|
|
(__v8di) _mm512_setzero_si512 (), |
3627
|
|
|
|
|
|
|
__M); |
3628
|
|
|
|
|
|
|
} |
3629
|
|
|
|
|
|
|
|
3630
|
|
|
|
|
|
|
extern __inline __m512 |
3631
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3632
|
|
|
|
|
|
|
_mm512_broadcast_f32x4 (__m128 __A) |
3633
|
|
|
|
|
|
|
{ |
3634
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, |
3635
|
|
|
|
|
|
|
(__v16sf) |
3636
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
3637
|
|
|
|
|
|
|
(__mmask16) -1); |
3638
|
|
|
|
|
|
|
} |
3639
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
extern __inline __m512 |
3641
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3642
|
|
|
|
|
|
|
_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) |
3643
|
|
|
|
|
|
|
{ |
3644
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, |
3645
|
|
|
|
|
|
|
(__v16sf) __O, |
3646
|
|
|
|
|
|
|
__M); |
3647
|
|
|
|
|
|
|
} |
3648
|
|
|
|
|
|
|
|
3649
|
|
|
|
|
|
|
extern __inline __m512 |
3650
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3651
|
|
|
|
|
|
|
_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) |
3652
|
|
|
|
|
|
|
{ |
3653
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, |
3654
|
|
|
|
|
|
|
(__v16sf) |
3655
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
3656
|
|
|
|
|
|
|
__M); |
3657
|
|
|
|
|
|
|
} |
3658
|
|
|
|
|
|
|
|
3659
|
|
|
|
|
|
|
extern __inline __m512i |
3660
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3661
|
|
|
|
|
|
|
_mm512_broadcast_i32x4 (__m128i __A) |
3662
|
|
|
|
|
|
|
{ |
3663
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, |
3664
|
|
|
|
|
|
|
(__v16si) |
3665
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3666
|
|
|
|
|
|
|
(__mmask16) -1); |
3667
|
|
|
|
|
|
|
} |
3668
|
|
|
|
|
|
|
|
3669
|
|
|
|
|
|
|
extern __inline __m512i |
3670
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3671
|
|
|
|
|
|
|
_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) |
3672
|
|
|
|
|
|
|
{ |
3673
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, |
3674
|
|
|
|
|
|
|
(__v16si) __O, |
3675
|
|
|
|
|
|
|
__M); |
3676
|
|
|
|
|
|
|
} |
3677
|
|
|
|
|
|
|
|
3678
|
|
|
|
|
|
|
extern __inline __m512i |
3679
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3680
|
|
|
|
|
|
|
_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) |
3681
|
|
|
|
|
|
|
{ |
3682
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, |
3683
|
|
|
|
|
|
|
(__v16si) |
3684
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3685
|
|
|
|
|
|
|
__M); |
3686
|
|
|
|
|
|
|
} |
3687
|
|
|
|
|
|
|
|
3688
|
|
|
|
|
|
|
extern __inline __m512d |
3689
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3690
|
|
|
|
|
|
|
_mm512_broadcast_f64x4 (__m256d __A) |
3691
|
|
|
|
|
|
|
{ |
3692
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, |
3693
|
|
|
|
|
|
|
(__v8df) |
3694
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
3695
|
|
|
|
|
|
|
(__mmask8) -1); |
3696
|
|
|
|
|
|
|
} |
3697
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
extern __inline __m512d |
3699
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3700
|
|
|
|
|
|
|
_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) |
3701
|
|
|
|
|
|
|
{ |
3702
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, |
3703
|
|
|
|
|
|
|
(__v8df) __O, |
3704
|
|
|
|
|
|
|
__M); |
3705
|
|
|
|
|
|
|
} |
3706
|
|
|
|
|
|
|
|
3707
|
|
|
|
|
|
|
extern __inline __m512d |
3708
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3709
|
|
|
|
|
|
|
_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) |
3710
|
|
|
|
|
|
|
{ |
3711
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, |
3712
|
|
|
|
|
|
|
(__v8df) |
3713
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
3714
|
|
|
|
|
|
|
__M); |
3715
|
|
|
|
|
|
|
} |
3716
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
extern __inline __m512i |
3718
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3719
|
|
|
|
|
|
|
_mm512_broadcast_i64x4 (__m256i __A) |
3720
|
|
|
|
|
|
|
{ |
3721
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, |
3722
|
|
|
|
|
|
|
(__v8di) |
3723
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3724
|
|
|
|
|
|
|
(__mmask8) -1); |
3725
|
|
|
|
|
|
|
} |
3726
|
|
|
|
|
|
|
|
3727
|
|
|
|
|
|
|
extern __inline __m512i |
3728
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3729
|
|
|
|
|
|
|
_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) |
3730
|
|
|
|
|
|
|
{ |
3731
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, |
3732
|
|
|
|
|
|
|
(__v8di) __O, |
3733
|
|
|
|
|
|
|
__M); |
3734
|
|
|
|
|
|
|
} |
3735
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
extern __inline __m512i |
3737
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3738
|
|
|
|
|
|
|
_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) |
3739
|
|
|
|
|
|
|
{ |
3740
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, |
3741
|
|
|
|
|
|
|
(__v8di) |
3742
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3743
|
|
|
|
|
|
|
__M); |
3744
|
|
|
|
|
|
|
} |
3745
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
typedef enum |
3747
|
|
|
|
|
|
|
{ |
3748
|
|
|
|
|
|
|
_MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, |
3749
|
|
|
|
|
|
|
_MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, |
3750
|
|
|
|
|
|
|
_MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, |
3751
|
|
|
|
|
|
|
_MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, |
3752
|
|
|
|
|
|
|
_MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, |
3753
|
|
|
|
|
|
|
_MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, |
3754
|
|
|
|
|
|
|
_MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, |
3755
|
|
|
|
|
|
|
_MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, |
3756
|
|
|
|
|
|
|
_MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, |
3757
|
|
|
|
|
|
|
_MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, |
3758
|
|
|
|
|
|
|
_MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, |
3759
|
|
|
|
|
|
|
_MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, |
3760
|
|
|
|
|
|
|
_MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, |
3761
|
|
|
|
|
|
|
_MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, |
3762
|
|
|
|
|
|
|
_MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, |
3763
|
|
|
|
|
|
|
_MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, |
3764
|
|
|
|
|
|
|
_MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, |
3765
|
|
|
|
|
|
|
_MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, |
3766
|
|
|
|
|
|
|
_MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, |
3767
|
|
|
|
|
|
|
_MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, |
3768
|
|
|
|
|
|
|
_MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, |
3769
|
|
|
|
|
|
|
_MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, |
3770
|
|
|
|
|
|
|
_MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, |
3771
|
|
|
|
|
|
|
_MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, |
3772
|
|
|
|
|
|
|
_MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, |
3773
|
|
|
|
|
|
|
_MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, |
3774
|
|
|
|
|
|
|
_MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, |
3775
|
|
|
|
|
|
|
_MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, |
3776
|
|
|
|
|
|
|
_MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, |
3777
|
|
|
|
|
|
|
_MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, |
3778
|
|
|
|
|
|
|
_MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, |
3779
|
|
|
|
|
|
|
_MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, |
3780
|
|
|
|
|
|
|
_MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, |
3781
|
|
|
|
|
|
|
_MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, |
3782
|
|
|
|
|
|
|
_MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, |
3783
|
|
|
|
|
|
|
_MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, |
3784
|
|
|
|
|
|
|
_MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, |
3785
|
|
|
|
|
|
|
_MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, |
3786
|
|
|
|
|
|
|
_MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, |
3787
|
|
|
|
|
|
|
_MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, |
3788
|
|
|
|
|
|
|
_MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, |
3789
|
|
|
|
|
|
|
_MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, |
3790
|
|
|
|
|
|
|
_MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, |
3791
|
|
|
|
|
|
|
_MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, |
3792
|
|
|
|
|
|
|
_MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, |
3793
|
|
|
|
|
|
|
_MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, |
3794
|
|
|
|
|
|
|
_MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, |
3795
|
|
|
|
|
|
|
_MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, |
3796
|
|
|
|
|
|
|
_MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, |
3797
|
|
|
|
|
|
|
_MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, |
3798
|
|
|
|
|
|
|
_MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, |
3799
|
|
|
|
|
|
|
_MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, |
3800
|
|
|
|
|
|
|
_MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, |
3801
|
|
|
|
|
|
|
_MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, |
3802
|
|
|
|
|
|
|
_MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, |
3803
|
|
|
|
|
|
|
_MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, |
3804
|
|
|
|
|
|
|
_MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, |
3805
|
|
|
|
|
|
|
_MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, |
3806
|
|
|
|
|
|
|
_MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, |
3807
|
|
|
|
|
|
|
_MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, |
3808
|
|
|
|
|
|
|
_MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, |
3809
|
|
|
|
|
|
|
_MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, |
3810
|
|
|
|
|
|
|
_MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, |
3811
|
|
|
|
|
|
|
_MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, |
3812
|
|
|
|
|
|
|
_MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, |
3813
|
|
|
|
|
|
|
_MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, |
3814
|
|
|
|
|
|
|
_MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, |
3815
|
|
|
|
|
|
|
_MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, |
3816
|
|
|
|
|
|
|
_MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, |
3817
|
|
|
|
|
|
|
_MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, |
3818
|
|
|
|
|
|
|
_MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, |
3819
|
|
|
|
|
|
|
_MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, |
3820
|
|
|
|
|
|
|
_MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, |
3821
|
|
|
|
|
|
|
_MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, |
3822
|
|
|
|
|
|
|
_MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, |
3823
|
|
|
|
|
|
|
_MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, |
3824
|
|
|
|
|
|
|
_MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, |
3825
|
|
|
|
|
|
|
_MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, |
3826
|
|
|
|
|
|
|
_MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, |
3827
|
|
|
|
|
|
|
_MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, |
3828
|
|
|
|
|
|
|
_MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, |
3829
|
|
|
|
|
|
|
_MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, |
3830
|
|
|
|
|
|
|
_MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, |
3831
|
|
|
|
|
|
|
_MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, |
3832
|
|
|
|
|
|
|
_MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, |
3833
|
|
|
|
|
|
|
_MM_PERM_DDDD = 0xFF |
3834
|
|
|
|
|
|
|
} _MM_PERM_ENUM; |
3835
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
3837
|
|
|
|
|
|
|
extern __inline __m512i |
3838
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3839
|
|
|
|
|
|
|
_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) |
3840
|
|
|
|
|
|
|
{ |
3841
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, |
3842
|
|
|
|
|
|
|
__mask, |
3843
|
|
|
|
|
|
|
(__v16si) |
3844
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3845
|
|
|
|
|
|
|
(__mmask16) -1); |
3846
|
|
|
|
|
|
|
} |
3847
|
|
|
|
|
|
|
|
3848
|
|
|
|
|
|
|
extern __inline __m512i |
3849
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3850
|
|
|
|
|
|
|
_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
3851
|
|
|
|
|
|
|
_MM_PERM_ENUM __mask) |
3852
|
|
|
|
|
|
|
{ |
3853
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, |
3854
|
|
|
|
|
|
|
__mask, |
3855
|
|
|
|
|
|
|
(__v16si) __W, |
3856
|
|
|
|
|
|
|
(__mmask16) __U); |
3857
|
|
|
|
|
|
|
} |
3858
|
|
|
|
|
|
|
|
3859
|
|
|
|
|
|
|
extern __inline __m512i |
3860
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3861
|
|
|
|
|
|
|
_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) |
3862
|
|
|
|
|
|
|
{ |
3863
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, |
3864
|
|
|
|
|
|
|
__mask, |
3865
|
|
|
|
|
|
|
(__v16si) |
3866
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3867
|
|
|
|
|
|
|
(__mmask16) __U); |
3868
|
|
|
|
|
|
|
} |
3869
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
extern __inline __m512i |
3871
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3872
|
|
|
|
|
|
|
_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) |
3873
|
|
|
|
|
|
|
{ |
3874
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, |
3875
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
3876
|
|
|
|
|
|
|
(__v8di) |
3877
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3878
|
|
|
|
|
|
|
(__mmask8) -1); |
3879
|
|
|
|
|
|
|
} |
3880
|
|
|
|
|
|
|
|
3881
|
|
|
|
|
|
|
extern __inline __m512i |
3882
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3883
|
|
|
|
|
|
|
_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, |
3884
|
|
|
|
|
|
|
__m512i __B, const int __imm) |
3885
|
|
|
|
|
|
|
{ |
3886
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, |
3887
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
3888
|
|
|
|
|
|
|
(__v8di) __W, |
3889
|
|
|
|
|
|
|
(__mmask8) __U); |
3890
|
|
|
|
|
|
|
} |
3891
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
extern __inline __m512i |
3893
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3894
|
|
|
|
|
|
|
_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, |
3895
|
|
|
|
|
|
|
const int __imm) |
3896
|
|
|
|
|
|
|
{ |
3897
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, |
3898
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
3899
|
|
|
|
|
|
|
(__v8di) |
3900
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3901
|
|
|
|
|
|
|
(__mmask8) __U); |
3902
|
|
|
|
|
|
|
} |
3903
|
|
|
|
|
|
|
|
3904
|
|
|
|
|
|
|
extern __inline __m512i |
3905
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3906
|
|
|
|
|
|
|
_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) |
3907
|
|
|
|
|
|
|
{ |
3908
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, |
3909
|
|
|
|
|
|
|
(__v16si) __B, |
3910
|
|
|
|
|
|
|
__imm, |
3911
|
|
|
|
|
|
|
(__v16si) |
3912
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
3913
|
|
|
|
|
|
|
(__mmask16) -1); |
3914
|
|
|
|
|
|
|
} |
3915
|
|
|
|
|
|
|
|
3916
|
|
|
|
|
|
|
extern __inline __m512i |
3917
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3918
|
|
|
|
|
|
|
_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, |
3919
|
|
|
|
|
|
|
__m512i __B, const int __imm) |
3920
|
|
|
|
|
|
|
{ |
3921
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, |
3922
|
|
|
|
|
|
|
(__v16si) __B, |
3923
|
|
|
|
|
|
|
__imm, |
3924
|
|
|
|
|
|
|
(__v16si) __W, |
3925
|
|
|
|
|
|
|
(__mmask16) __U); |
3926
|
|
|
|
|
|
|
} |
3927
|
|
|
|
|
|
|
|
3928
|
|
|
|
|
|
|
extern __inline __m512i |
3929
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3930
|
|
|
|
|
|
|
_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, |
3931
|
|
|
|
|
|
|
const int __imm) |
3932
|
|
|
|
|
|
|
{ |
3933
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, |
3934
|
|
|
|
|
|
|
(__v16si) __B, |
3935
|
|
|
|
|
|
|
__imm, |
3936
|
|
|
|
|
|
|
(__v16si) |
3937
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
3938
|
|
|
|
|
|
|
(__mmask16) __U); |
3939
|
|
|
|
|
|
|
} |
3940
|
|
|
|
|
|
|
|
3941
|
|
|
|
|
|
|
extern __inline __m512d |
3942
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3943
|
|
|
|
|
|
|
_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) |
3944
|
|
|
|
|
|
|
{ |
3945
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, |
3946
|
|
|
|
|
|
|
(__v8df) __B, __imm, |
3947
|
|
|
|
|
|
|
(__v8df) |
3948
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
3949
|
|
|
|
|
|
|
(__mmask8) -1); |
3950
|
|
|
|
|
|
|
} |
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
extern __inline __m512d |
3953
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3954
|
|
|
|
|
|
|
_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, |
3955
|
|
|
|
|
|
|
__m512d __B, const int __imm) |
3956
|
|
|
|
|
|
|
{ |
3957
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, |
3958
|
|
|
|
|
|
|
(__v8df) __B, __imm, |
3959
|
|
|
|
|
|
|
(__v8df) __W, |
3960
|
|
|
|
|
|
|
(__mmask8) __U); |
3961
|
|
|
|
|
|
|
} |
3962
|
|
|
|
|
|
|
|
3963
|
|
|
|
|
|
|
extern __inline __m512d |
3964
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3965
|
|
|
|
|
|
|
_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, |
3966
|
|
|
|
|
|
|
const int __imm) |
3967
|
|
|
|
|
|
|
{ |
3968
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, |
3969
|
|
|
|
|
|
|
(__v8df) __B, __imm, |
3970
|
|
|
|
|
|
|
(__v8df) |
3971
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
3972
|
|
|
|
|
|
|
(__mmask8) __U); |
3973
|
|
|
|
|
|
|
} |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
extern __inline __m512 |
3976
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3977
|
|
|
|
|
|
|
_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) |
3978
|
|
|
|
|
|
|
{ |
3979
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, |
3980
|
|
|
|
|
|
|
(__v16sf) __B, __imm, |
3981
|
|
|
|
|
|
|
(__v16sf) |
3982
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
3983
|
|
|
|
|
|
|
(__mmask16) -1); |
3984
|
|
|
|
|
|
|
} |
3985
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
extern __inline __m512 |
3987
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3988
|
|
|
|
|
|
|
_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, |
3989
|
|
|
|
|
|
|
__m512 __B, const int __imm) |
3990
|
|
|
|
|
|
|
{ |
3991
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, |
3992
|
|
|
|
|
|
|
(__v16sf) __B, __imm, |
3993
|
|
|
|
|
|
|
(__v16sf) __W, |
3994
|
|
|
|
|
|
|
(__mmask16) __U); |
3995
|
|
|
|
|
|
|
} |
3996
|
|
|
|
|
|
|
|
3997
|
|
|
|
|
|
|
extern __inline __m512 |
3998
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
3999
|
|
|
|
|
|
|
_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, |
4000
|
|
|
|
|
|
|
const int __imm) |
4001
|
|
|
|
|
|
|
{ |
4002
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, |
4003
|
|
|
|
|
|
|
(__v16sf) __B, __imm, |
4004
|
|
|
|
|
|
|
(__v16sf) |
4005
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
4006
|
|
|
|
|
|
|
(__mmask16) __U); |
4007
|
|
|
|
|
|
|
} |
4008
|
|
|
|
|
|
|
|
4009
|
|
|
|
|
|
|
#else |
4010
|
|
|
|
|
|
|
#define _mm512_shuffle_epi32(X, C) \ |
4011
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
4012
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_undefined_si512 (),\ |
4013
|
|
|
|
|
|
|
(__mmask16)-1)) |
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_epi32(W, U, X, C) \ |
4016
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
4017
|
|
|
|
|
|
|
(__v16si)(__m512i)(W),\ |
4018
|
|
|
|
|
|
|
(__mmask16)(U))) |
4019
|
|
|
|
|
|
|
|
4020
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_epi32(U, X, C) \ |
4021
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
4022
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_setzero_si512 (),\ |
4023
|
|
|
|
|
|
|
(__mmask16)(U))) |
4024
|
|
|
|
|
|
|
|
4025
|
|
|
|
|
|
|
#define _mm512_shuffle_i64x2(X, Y, C) \ |
4026
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ |
4027
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C),\ |
4028
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_undefined_si512 (),\ |
4029
|
|
|
|
|
|
|
(__mmask8)-1)) |
4030
|
|
|
|
|
|
|
|
4031
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ |
4032
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ |
4033
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C),\ |
4034
|
|
|
|
|
|
|
(__v8di)(__m512i)(W),\ |
4035
|
|
|
|
|
|
|
(__mmask8)(U))) |
4036
|
|
|
|
|
|
|
|
4037
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ |
4038
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ |
4039
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C),\ |
4040
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_setzero_si512 (),\ |
4041
|
|
|
|
|
|
|
(__mmask8)(U))) |
4042
|
|
|
|
|
|
|
|
4043
|
|
|
|
|
|
|
#define _mm512_shuffle_i32x4(X, Y, C) \ |
4044
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ |
4045
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C),\ |
4046
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_undefined_si512 (),\ |
4047
|
|
|
|
|
|
|
(__mmask16)-1)) |
4048
|
|
|
|
|
|
|
|
4049
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ |
4050
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ |
4051
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C),\ |
4052
|
|
|
|
|
|
|
(__v16si)(__m512i)(W),\ |
4053
|
|
|
|
|
|
|
(__mmask16)(U))) |
4054
|
|
|
|
|
|
|
|
4055
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ |
4056
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ |
4057
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C),\ |
4058
|
|
|
|
|
|
|
(__v16si)(__m512i)_mm512_setzero_si512 (),\ |
4059
|
|
|
|
|
|
|
(__mmask16)(U))) |
4060
|
|
|
|
|
|
|
|
4061
|
|
|
|
|
|
|
#define _mm512_shuffle_f64x2(X, Y, C) \ |
4062
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ |
4063
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
4064
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(),\ |
4065
|
|
|
|
|
|
|
(__mmask8)-1)) |
4066
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ |
4068
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ |
4069
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
4070
|
|
|
|
|
|
|
(__v8df)(__m512d)(W),\ |
4071
|
|
|
|
|
|
|
(__mmask8)(U))) |
4072
|
|
|
|
|
|
|
|
4073
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ |
4074
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ |
4075
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
4076
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(),\ |
4077
|
|
|
|
|
|
|
(__mmask8)(U))) |
4078
|
|
|
|
|
|
|
|
4079
|
|
|
|
|
|
|
#define _mm512_shuffle_f32x4(X, Y, C) \ |
4080
|
|
|
|
|
|
|
((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ |
4081
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
4082
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_undefined_ps(),\ |
4083
|
|
|
|
|
|
|
(__mmask16)-1)) |
4084
|
|
|
|
|
|
|
|
4085
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ |
4086
|
|
|
|
|
|
|
((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ |
4087
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
4088
|
|
|
|
|
|
|
(__v16sf)(__m512)(W),\ |
4089
|
|
|
|
|
|
|
(__mmask16)(U))) |
4090
|
|
|
|
|
|
|
|
4091
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ |
4092
|
|
|
|
|
|
|
((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ |
4093
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
4094
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_setzero_ps(),\ |
4095
|
|
|
|
|
|
|
(__mmask16)(U))) |
4096
|
|
|
|
|
|
|
#endif |
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
extern __inline __m512i |
4099
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4100
|
|
|
|
|
|
|
_mm512_rolv_epi32 (__m512i __A, __m512i __B) |
4101
|
|
|
|
|
|
|
{ |
4102
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, |
4103
|
|
|
|
|
|
|
(__v16si) __B, |
4104
|
|
|
|
|
|
|
(__v16si) |
4105
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4106
|
|
|
|
|
|
|
(__mmask16) -1); |
4107
|
|
|
|
|
|
|
} |
4108
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
extern __inline __m512i |
4110
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4111
|
|
|
|
|
|
|
_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
4112
|
|
|
|
|
|
|
{ |
4113
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, |
4114
|
|
|
|
|
|
|
(__v16si) __B, |
4115
|
|
|
|
|
|
|
(__v16si) __W, |
4116
|
|
|
|
|
|
|
(__mmask16) __U); |
4117
|
|
|
|
|
|
|
} |
4118
|
|
|
|
|
|
|
|
4119
|
|
|
|
|
|
|
extern __inline __m512i |
4120
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4121
|
|
|
|
|
|
|
_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
4122
|
|
|
|
|
|
|
{ |
4123
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, |
4124
|
|
|
|
|
|
|
(__v16si) __B, |
4125
|
|
|
|
|
|
|
(__v16si) |
4126
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4127
|
|
|
|
|
|
|
(__mmask16) __U); |
4128
|
|
|
|
|
|
|
} |
4129
|
|
|
|
|
|
|
|
4130
|
|
|
|
|
|
|
extern __inline __m512i |
4131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4132
|
|
|
|
|
|
|
_mm512_rorv_epi32 (__m512i __A, __m512i __B) |
4133
|
|
|
|
|
|
|
{ |
4134
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, |
4135
|
|
|
|
|
|
|
(__v16si) __B, |
4136
|
|
|
|
|
|
|
(__v16si) |
4137
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4138
|
|
|
|
|
|
|
(__mmask16) -1); |
4139
|
|
|
|
|
|
|
} |
4140
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
extern __inline __m512i |
4142
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4143
|
|
|
|
|
|
|
_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
4144
|
|
|
|
|
|
|
{ |
4145
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, |
4146
|
|
|
|
|
|
|
(__v16si) __B, |
4147
|
|
|
|
|
|
|
(__v16si) __W, |
4148
|
|
|
|
|
|
|
(__mmask16) __U); |
4149
|
|
|
|
|
|
|
} |
4150
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
extern __inline __m512i |
4152
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4153
|
|
|
|
|
|
|
_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
4154
|
|
|
|
|
|
|
{ |
4155
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, |
4156
|
|
|
|
|
|
|
(__v16si) __B, |
4157
|
|
|
|
|
|
|
(__v16si) |
4158
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4159
|
|
|
|
|
|
|
(__mmask16) __U); |
4160
|
|
|
|
|
|
|
} |
4161
|
|
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
extern __inline __m512i |
4163
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4164
|
|
|
|
|
|
|
_mm512_rolv_epi64 (__m512i __A, __m512i __B) |
4165
|
|
|
|
|
|
|
{ |
4166
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, |
4167
|
|
|
|
|
|
|
(__v8di) __B, |
4168
|
|
|
|
|
|
|
(__v8di) |
4169
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4170
|
|
|
|
|
|
|
(__mmask8) -1); |
4171
|
|
|
|
|
|
|
} |
4172
|
|
|
|
|
|
|
|
4173
|
|
|
|
|
|
|
extern __inline __m512i |
4174
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4175
|
|
|
|
|
|
|
_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
4176
|
|
|
|
|
|
|
{ |
4177
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, |
4178
|
|
|
|
|
|
|
(__v8di) __B, |
4179
|
|
|
|
|
|
|
(__v8di) __W, |
4180
|
|
|
|
|
|
|
(__mmask8) __U); |
4181
|
|
|
|
|
|
|
} |
4182
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
extern __inline __m512i |
4184
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4185
|
|
|
|
|
|
|
_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
4186
|
|
|
|
|
|
|
{ |
4187
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, |
4188
|
|
|
|
|
|
|
(__v8di) __B, |
4189
|
|
|
|
|
|
|
(__v8di) |
4190
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4191
|
|
|
|
|
|
|
(__mmask8) __U); |
4192
|
|
|
|
|
|
|
} |
4193
|
|
|
|
|
|
|
|
4194
|
|
|
|
|
|
|
extern __inline __m512i |
4195
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4196
|
|
|
|
|
|
|
_mm512_rorv_epi64 (__m512i __A, __m512i __B) |
4197
|
|
|
|
|
|
|
{ |
4198
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, |
4199
|
|
|
|
|
|
|
(__v8di) __B, |
4200
|
|
|
|
|
|
|
(__v8di) |
4201
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4202
|
|
|
|
|
|
|
(__mmask8) -1); |
4203
|
|
|
|
|
|
|
} |
4204
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
extern __inline __m512i |
4206
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4207
|
|
|
|
|
|
|
_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
4208
|
|
|
|
|
|
|
{ |
4209
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, |
4210
|
|
|
|
|
|
|
(__v8di) __B, |
4211
|
|
|
|
|
|
|
(__v8di) __W, |
4212
|
|
|
|
|
|
|
(__mmask8) __U); |
4213
|
|
|
|
|
|
|
} |
4214
|
|
|
|
|
|
|
|
4215
|
|
|
|
|
|
|
extern __inline __m512i |
4216
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4217
|
|
|
|
|
|
|
_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
4218
|
|
|
|
|
|
|
{ |
4219
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, |
4220
|
|
|
|
|
|
|
(__v8di) __B, |
4221
|
|
|
|
|
|
|
(__v8di) |
4222
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4223
|
|
|
|
|
|
|
(__mmask8) __U); |
4224
|
|
|
|
|
|
|
} |
4225
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4227
|
|
|
|
|
|
|
extern __inline __m256i |
4228
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4229
|
|
|
|
|
|
|
_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) |
4230
|
|
|
|
|
|
|
{ |
4231
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
4232
|
|
|
|
|
|
|
(__v8si) |
4233
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4234
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
4235
|
|
|
|
|
|
|
} |
4236
|
|
|
|
|
|
|
|
4237
|
|
|
|
|
|
|
extern __inline __m256i |
4238
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4239
|
|
|
|
|
|
|
_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, |
4240
|
|
|
|
|
|
|
const int __R) |
4241
|
|
|
|
|
|
|
{ |
4242
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
4243
|
|
|
|
|
|
|
(__v8si) __W, |
4244
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4245
|
|
|
|
|
|
|
} |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
extern __inline __m256i |
4248
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4249
|
|
|
|
|
|
|
_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) |
4250
|
|
|
|
|
|
|
{ |
4251
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
4252
|
|
|
|
|
|
|
(__v8si) |
4253
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4254
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4255
|
|
|
|
|
|
|
} |
4256
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
extern __inline __m256i |
4258
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4259
|
|
|
|
|
|
|
_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) |
4260
|
|
|
|
|
|
|
{ |
4261
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
4262
|
|
|
|
|
|
|
(__v8si) |
4263
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4264
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
4265
|
|
|
|
|
|
|
} |
4266
|
|
|
|
|
|
|
|
4267
|
|
|
|
|
|
|
extern __inline __m256i |
4268
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4269
|
|
|
|
|
|
|
_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, |
4270
|
|
|
|
|
|
|
const int __R) |
4271
|
|
|
|
|
|
|
{ |
4272
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
4273
|
|
|
|
|
|
|
(__v8si) __W, |
4274
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4275
|
|
|
|
|
|
|
} |
4276
|
|
|
|
|
|
|
|
4277
|
|
|
|
|
|
|
extern __inline __m256i |
4278
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4279
|
|
|
|
|
|
|
_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) |
4280
|
|
|
|
|
|
|
{ |
4281
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
4282
|
|
|
|
|
|
|
(__v8si) |
4283
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4284
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4285
|
|
|
|
|
|
|
} |
4286
|
|
|
|
|
|
|
#else |
4287
|
|
|
|
|
|
|
#define _mm512_cvtt_roundpd_epi32(A, B) \ |
4288
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) |
4289
|
|
|
|
|
|
|
|
4290
|
|
|
|
|
|
|
#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ |
4291
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ |
4294
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) |
4295
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
#define _mm512_cvtt_roundpd_epu32(A, B) \ |
4297
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) |
4298
|
|
|
|
|
|
|
|
4299
|
|
|
|
|
|
|
#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ |
4300
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) |
4301
|
|
|
|
|
|
|
|
4302
|
|
|
|
|
|
|
#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ |
4303
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) |
4304
|
|
|
|
|
|
|
#endif |
4305
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4307
|
|
|
|
|
|
|
extern __inline __m256i |
4308
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4309
|
|
|
|
|
|
|
_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) |
4310
|
|
|
|
|
|
|
{ |
4311
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
4312
|
|
|
|
|
|
|
(__v8si) |
4313
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4314
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
4315
|
|
|
|
|
|
|
} |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
extern __inline __m256i |
4318
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4319
|
|
|
|
|
|
|
_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, |
4320
|
|
|
|
|
|
|
const int __R) |
4321
|
|
|
|
|
|
|
{ |
4322
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
4323
|
|
|
|
|
|
|
(__v8si) __W, |
4324
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4325
|
|
|
|
|
|
|
} |
4326
|
|
|
|
|
|
|
|
4327
|
|
|
|
|
|
|
extern __inline __m256i |
4328
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4329
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) |
4330
|
|
|
|
|
|
|
{ |
4331
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
4332
|
|
|
|
|
|
|
(__v8si) |
4333
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4334
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4335
|
|
|
|
|
|
|
} |
4336
|
|
|
|
|
|
|
|
4337
|
|
|
|
|
|
|
extern __inline __m256i |
4338
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4339
|
|
|
|
|
|
|
_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) |
4340
|
|
|
|
|
|
|
{ |
4341
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
4342
|
|
|
|
|
|
|
(__v8si) |
4343
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4344
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
4345
|
|
|
|
|
|
|
} |
4346
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
extern __inline __m256i |
4348
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4349
|
|
|
|
|
|
|
_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, |
4350
|
|
|
|
|
|
|
const int __R) |
4351
|
|
|
|
|
|
|
{ |
4352
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
4353
|
|
|
|
|
|
|
(__v8si) __W, |
4354
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4355
|
|
|
|
|
|
|
} |
4356
|
|
|
|
|
|
|
|
4357
|
|
|
|
|
|
|
extern __inline __m256i |
4358
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4359
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) |
4360
|
|
|
|
|
|
|
{ |
4361
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
4362
|
|
|
|
|
|
|
(__v8si) |
4363
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4364
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
4365
|
|
|
|
|
|
|
} |
4366
|
|
|
|
|
|
|
#else |
4367
|
|
|
|
|
|
|
#define _mm512_cvt_roundpd_epi32(A, B) \ |
4368
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) |
4369
|
|
|
|
|
|
|
|
4370
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ |
4371
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ |
4374
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) |
4375
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
#define _mm512_cvt_roundpd_epu32(A, B) \ |
4377
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) |
4378
|
|
|
|
|
|
|
|
4379
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ |
4380
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) |
4381
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ |
4383
|
|
|
|
|
|
|
((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) |
4384
|
|
|
|
|
|
|
#endif |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4387
|
|
|
|
|
|
|
extern __inline __m512i |
4388
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4389
|
|
|
|
|
|
|
_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) |
4390
|
|
|
|
|
|
|
{ |
4391
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
4392
|
|
|
|
|
|
|
(__v16si) |
4393
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4394
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
4395
|
|
|
|
|
|
|
} |
4396
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
extern __inline __m512i |
4398
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4399
|
|
|
|
|
|
|
_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, |
4400
|
|
|
|
|
|
|
const int __R) |
4401
|
|
|
|
|
|
|
{ |
4402
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
4403
|
|
|
|
|
|
|
(__v16si) __W, |
4404
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4405
|
|
|
|
|
|
|
} |
4406
|
|
|
|
|
|
|
|
4407
|
|
|
|
|
|
|
extern __inline __m512i |
4408
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4409
|
|
|
|
|
|
|
_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) |
4410
|
|
|
|
|
|
|
{ |
4411
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
4412
|
|
|
|
|
|
|
(__v16si) |
4413
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4414
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4415
|
|
|
|
|
|
|
} |
4416
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
extern __inline __m512i |
4418
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4419
|
|
|
|
|
|
|
_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) |
4420
|
|
|
|
|
|
|
{ |
4421
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
4422
|
|
|
|
|
|
|
(__v16si) |
4423
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4424
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
4425
|
|
|
|
|
|
|
} |
4426
|
|
|
|
|
|
|
|
4427
|
|
|
|
|
|
|
extern __inline __m512i |
4428
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4429
|
|
|
|
|
|
|
_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, |
4430
|
|
|
|
|
|
|
const int __R) |
4431
|
|
|
|
|
|
|
{ |
4432
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
4433
|
|
|
|
|
|
|
(__v16si) __W, |
4434
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4435
|
|
|
|
|
|
|
} |
4436
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
extern __inline __m512i |
4438
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4439
|
|
|
|
|
|
|
_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) |
4440
|
|
|
|
|
|
|
{ |
4441
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
4442
|
|
|
|
|
|
|
(__v16si) |
4443
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4444
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4445
|
|
|
|
|
|
|
} |
4446
|
|
|
|
|
|
|
#else |
4447
|
|
|
|
|
|
|
#define _mm512_cvtt_roundps_epi32(A, B) \ |
4448
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) |
4449
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ |
4451
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) |
4452
|
|
|
|
|
|
|
|
4453
|
|
|
|
|
|
|
#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ |
4454
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) |
4455
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
#define _mm512_cvtt_roundps_epu32(A, B) \ |
4457
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) |
4458
|
|
|
|
|
|
|
|
4459
|
|
|
|
|
|
|
#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ |
4460
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) |
4461
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ |
4463
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) |
4464
|
|
|
|
|
|
|
#endif |
4465
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4467
|
|
|
|
|
|
|
extern __inline __m512i |
4468
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4469
|
|
|
|
|
|
|
_mm512_cvt_roundps_epi32 (__m512 __A, const int __R) |
4470
|
|
|
|
|
|
|
{ |
4471
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
4472
|
|
|
|
|
|
|
(__v16si) |
4473
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4474
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
4475
|
|
|
|
|
|
|
} |
4476
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
extern __inline __m512i |
4478
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4479
|
|
|
|
|
|
|
_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, |
4480
|
|
|
|
|
|
|
const int __R) |
4481
|
|
|
|
|
|
|
{ |
4482
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
4483
|
|
|
|
|
|
|
(__v16si) __W, |
4484
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4485
|
|
|
|
|
|
|
} |
4486
|
|
|
|
|
|
|
|
4487
|
|
|
|
|
|
|
extern __inline __m512i |
4488
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4489
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) |
4490
|
|
|
|
|
|
|
{ |
4491
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
4492
|
|
|
|
|
|
|
(__v16si) |
4493
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4494
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4495
|
|
|
|
|
|
|
} |
4496
|
|
|
|
|
|
|
|
4497
|
|
|
|
|
|
|
extern __inline __m512i |
4498
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4499
|
|
|
|
|
|
|
_mm512_cvt_roundps_epu32 (__m512 __A, const int __R) |
4500
|
|
|
|
|
|
|
{ |
4501
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
4502
|
|
|
|
|
|
|
(__v16si) |
4503
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
4504
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
4505
|
|
|
|
|
|
|
} |
4506
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
extern __inline __m512i |
4508
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4509
|
|
|
|
|
|
|
_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, |
4510
|
|
|
|
|
|
|
const int __R) |
4511
|
|
|
|
|
|
|
{ |
4512
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
4513
|
|
|
|
|
|
|
(__v16si) __W, |
4514
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4515
|
|
|
|
|
|
|
} |
4516
|
|
|
|
|
|
|
|
4517
|
|
|
|
|
|
|
extern __inline __m512i |
4518
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4519
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) |
4520
|
|
|
|
|
|
|
{ |
4521
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
4522
|
|
|
|
|
|
|
(__v16si) |
4523
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
4524
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
4525
|
|
|
|
|
|
|
} |
4526
|
|
|
|
|
|
|
#else |
4527
|
|
|
|
|
|
|
#define _mm512_cvt_roundps_epi32(A, B) \ |
4528
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) |
4529
|
|
|
|
|
|
|
|
4530
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ |
4531
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) |
4532
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ |
4534
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) |
4535
|
|
|
|
|
|
|
|
4536
|
|
|
|
|
|
|
#define _mm512_cvt_roundps_epu32(A, B) \ |
4537
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) |
4538
|
|
|
|
|
|
|
|
4539
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ |
4540
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) |
4541
|
|
|
|
|
|
|
|
4542
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ |
4543
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) |
4544
|
|
|
|
|
|
|
#endif |
4545
|
|
|
|
|
|
|
|
4546
|
|
|
|
|
|
|
extern __inline __m128d |
4547
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4548
|
|
|
|
|
|
|
_mm_cvtu32_sd (__m128d __A, unsigned __B) |
4549
|
|
|
|
|
|
|
{ |
4550
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); |
4551
|
|
|
|
|
|
|
} |
4552
|
|
|
|
|
|
|
|
4553
|
|
|
|
|
|
|
#ifdef __x86_64__ |
4554
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4555
|
|
|
|
|
|
|
extern __inline __m128d |
4556
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4557
|
|
|
|
|
|
|
_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) |
4558
|
|
|
|
|
|
|
{ |
4559
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); |
4560
|
|
|
|
|
|
|
} |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
extern __inline __m128d |
4563
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4564
|
|
|
|
|
|
|
_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) |
4565
|
|
|
|
|
|
|
{ |
4566
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); |
4567
|
|
|
|
|
|
|
} |
4568
|
|
|
|
|
|
|
|
4569
|
|
|
|
|
|
|
extern __inline __m128d |
4570
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4571
|
|
|
|
|
|
|
_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) |
4572
|
|
|
|
|
|
|
{ |
4573
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); |
4574
|
|
|
|
|
|
|
} |
4575
|
|
|
|
|
|
|
#else |
4576
|
|
|
|
|
|
|
#define _mm_cvt_roundu64_sd(A, B, C) \ |
4577
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) |
4578
|
|
|
|
|
|
|
|
4579
|
|
|
|
|
|
|
#define _mm_cvt_roundi64_sd(A, B, C) \ |
4580
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) |
4581
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
#define _mm_cvt_roundsi64_sd(A, B, C) \ |
4583
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) |
4584
|
|
|
|
|
|
|
#endif |
4585
|
|
|
|
|
|
|
|
4586
|
|
|
|
|
|
|
#endif |
4587
|
|
|
|
|
|
|
|
4588
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4589
|
|
|
|
|
|
|
extern __inline __m128 |
4590
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4591
|
|
|
|
|
|
|
_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) |
4592
|
|
|
|
|
|
|
{ |
4593
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); |
4594
|
|
|
|
|
|
|
} |
4595
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
extern __inline __m128 |
4597
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4598
|
|
|
|
|
|
|
_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) |
4599
|
|
|
|
|
|
|
{ |
4600
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); |
4601
|
|
|
|
|
|
|
} |
4602
|
|
|
|
|
|
|
|
4603
|
|
|
|
|
|
|
extern __inline __m128 |
4604
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4605
|
|
|
|
|
|
|
_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) |
4606
|
|
|
|
|
|
|
{ |
4607
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); |
4608
|
|
|
|
|
|
|
} |
4609
|
|
|
|
|
|
|
#else |
4610
|
|
|
|
|
|
|
#define _mm_cvt_roundu32_ss(A, B, C) \ |
4611
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtusi2ss32(A, B, C) |
4612
|
|
|
|
|
|
|
|
4613
|
|
|
|
|
|
|
#define _mm_cvt_roundi32_ss(A, B, C) \ |
4614
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtsi2ss32(A, B, C) |
4615
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
#define _mm_cvt_roundsi32_ss(A, B, C) \ |
4617
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtsi2ss32(A, B, C) |
4618
|
|
|
|
|
|
|
#endif |
4619
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
#ifdef __x86_64__ |
4621
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
4622
|
|
|
|
|
|
|
extern __inline __m128 |
4623
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4624
|
|
|
|
|
|
|
_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) |
4625
|
|
|
|
|
|
|
{ |
4626
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); |
4627
|
|
|
|
|
|
|
} |
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
extern __inline __m128 |
4630
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4631
|
|
|
|
|
|
|
_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) |
4632
|
|
|
|
|
|
|
{ |
4633
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); |
4634
|
|
|
|
|
|
|
} |
4635
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
extern __inline __m128 |
4637
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4638
|
|
|
|
|
|
|
_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) |
4639
|
|
|
|
|
|
|
{ |
4640
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); |
4641
|
|
|
|
|
|
|
} |
4642
|
|
|
|
|
|
|
#else |
4643
|
|
|
|
|
|
|
#define _mm_cvt_roundu64_ss(A, B, C) \ |
4644
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtusi2ss64(A, B, C) |
4645
|
|
|
|
|
|
|
|
4646
|
|
|
|
|
|
|
#define _mm_cvt_roundi64_ss(A, B, C) \ |
4647
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtsi2ss64(A, B, C) |
4648
|
|
|
|
|
|
|
|
4649
|
|
|
|
|
|
|
#define _mm_cvt_roundsi64_ss(A, B, C) \ |
4650
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtsi2ss64(A, B, C) |
4651
|
|
|
|
|
|
|
#endif |
4652
|
|
|
|
|
|
|
|
4653
|
|
|
|
|
|
|
#endif |
4654
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
extern __inline __m128i |
4656
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4657
|
|
|
|
|
|
|
_mm512_cvtepi32_epi8 (__m512i __A) |
4658
|
|
|
|
|
|
|
{ |
4659
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
4660
|
|
|
|
|
|
|
(__v16qi) |
4661
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
4662
|
|
|
|
|
|
|
(__mmask16) -1); |
4663
|
|
|
|
|
|
|
} |
4664
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
extern __inline void |
4666
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4667
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
4668
|
|
|
|
|
|
|
{ |
4669
|
|
|
|
|
|
|
__builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
4670
|
|
|
|
|
|
|
} |
4671
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
extern __inline __m128i |
4673
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4674
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
4675
|
|
|
|
|
|
|
{ |
4676
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
4677
|
|
|
|
|
|
|
(__v16qi) __O, __M); |
4678
|
|
|
|
|
|
|
} |
4679
|
|
|
|
|
|
|
|
4680
|
|
|
|
|
|
|
extern __inline __m128i |
4681
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4682
|
|
|
|
|
|
|
_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) |
4683
|
|
|
|
|
|
|
{ |
4684
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
4685
|
|
|
|
|
|
|
(__v16qi) |
4686
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
4687
|
|
|
|
|
|
|
__M); |
4688
|
|
|
|
|
|
|
} |
4689
|
|
|
|
|
|
|
|
4690
|
|
|
|
|
|
|
extern __inline __m128i |
4691
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4692
|
|
|
|
|
|
|
_mm512_cvtsepi32_epi8 (__m512i __A) |
4693
|
|
|
|
|
|
|
{ |
4694
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
4695
|
|
|
|
|
|
|
(__v16qi) |
4696
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
4697
|
|
|
|
|
|
|
(__mmask16) -1); |
4698
|
|
|
|
|
|
|
} |
4699
|
|
|
|
|
|
|
|
4700
|
|
|
|
|
|
|
extern __inline void |
4701
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4702
|
|
|
|
|
|
|
_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
4703
|
|
|
|
|
|
|
{ |
4704
|
|
|
|
|
|
|
__builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
4705
|
|
|
|
|
|
|
} |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
extern __inline __m128i |
4708
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4709
|
|
|
|
|
|
|
_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
4710
|
|
|
|
|
|
|
{ |
4711
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
4712
|
|
|
|
|
|
|
(__v16qi) __O, __M); |
4713
|
|
|
|
|
|
|
} |
4714
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
extern __inline __m128i |
4716
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4717
|
|
|
|
|
|
|
_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) |
4718
|
|
|
|
|
|
|
{ |
4719
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
4720
|
|
|
|
|
|
|
(__v16qi) |
4721
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
4722
|
|
|
|
|
|
|
__M); |
4723
|
|
|
|
|
|
|
} |
4724
|
|
|
|
|
|
|
|
4725
|
|
|
|
|
|
|
extern __inline __m128i |
4726
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4727
|
|
|
|
|
|
|
_mm512_cvtusepi32_epi8 (__m512i __A) |
4728
|
|
|
|
|
|
|
{ |
4729
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
4730
|
|
|
|
|
|
|
(__v16qi) |
4731
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
4732
|
|
|
|
|
|
|
(__mmask16) -1); |
4733
|
|
|
|
|
|
|
} |
4734
|
|
|
|
|
|
|
|
4735
|
|
|
|
|
|
|
extern __inline void |
4736
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4737
|
|
|
|
|
|
|
_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
4738
|
|
|
|
|
|
|
{ |
4739
|
|
|
|
|
|
|
__builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
4740
|
|
|
|
|
|
|
} |
4741
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
extern __inline __m128i |
4743
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4744
|
|
|
|
|
|
|
_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
4745
|
|
|
|
|
|
|
{ |
4746
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
4747
|
|
|
|
|
|
|
(__v16qi) __O, |
4748
|
|
|
|
|
|
|
__M); |
4749
|
|
|
|
|
|
|
} |
4750
|
|
|
|
|
|
|
|
4751
|
|
|
|
|
|
|
extern __inline __m128i |
4752
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4753
|
|
|
|
|
|
|
_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) |
4754
|
|
|
|
|
|
|
{ |
4755
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
4756
|
|
|
|
|
|
|
(__v16qi) |
4757
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
4758
|
|
|
|
|
|
|
__M); |
4759
|
|
|
|
|
|
|
} |
4760
|
|
|
|
|
|
|
|
4761
|
|
|
|
|
|
|
extern __inline __m256i |
4762
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4763
|
|
|
|
|
|
|
_mm512_cvtepi32_epi16 (__m512i __A) |
4764
|
|
|
|
|
|
|
{ |
4765
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
4766
|
|
|
|
|
|
|
(__v16hi) |
4767
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4768
|
|
|
|
|
|
|
(__mmask16) -1); |
4769
|
|
|
|
|
|
|
} |
4770
|
|
|
|
|
|
|
|
4771
|
|
|
|
|
|
|
extern __inline void |
4772
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4773
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) |
4774
|
|
|
|
|
|
|
{ |
4775
|
|
|
|
|
|
|
__builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); |
4776
|
|
|
|
|
|
|
} |
4777
|
|
|
|
|
|
|
|
4778
|
|
|
|
|
|
|
extern __inline __m256i |
4779
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4780
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
4781
|
|
|
|
|
|
|
{ |
4782
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
4783
|
|
|
|
|
|
|
(__v16hi) __O, __M); |
4784
|
|
|
|
|
|
|
} |
4785
|
|
|
|
|
|
|
|
4786
|
|
|
|
|
|
|
extern __inline __m256i |
4787
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4788
|
|
|
|
|
|
|
_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) |
4789
|
|
|
|
|
|
|
{ |
4790
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
4791
|
|
|
|
|
|
|
(__v16hi) |
4792
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4793
|
|
|
|
|
|
|
__M); |
4794
|
|
|
|
|
|
|
} |
4795
|
|
|
|
|
|
|
|
4796
|
|
|
|
|
|
|
extern __inline __m256i |
4797
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4798
|
|
|
|
|
|
|
_mm512_cvtsepi32_epi16 (__m512i __A) |
4799
|
|
|
|
|
|
|
{ |
4800
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
4801
|
|
|
|
|
|
|
(__v16hi) |
4802
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4803
|
|
|
|
|
|
|
(__mmask16) -1); |
4804
|
|
|
|
|
|
|
} |
4805
|
|
|
|
|
|
|
|
4806
|
|
|
|
|
|
|
extern __inline void |
4807
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4808
|
|
|
|
|
|
|
_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) |
4809
|
|
|
|
|
|
|
{ |
4810
|
|
|
|
|
|
|
__builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); |
4811
|
|
|
|
|
|
|
} |
4812
|
|
|
|
|
|
|
|
4813
|
|
|
|
|
|
|
extern __inline __m256i |
4814
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4815
|
|
|
|
|
|
|
_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
4816
|
|
|
|
|
|
|
{ |
4817
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
4818
|
|
|
|
|
|
|
(__v16hi) __O, __M); |
4819
|
|
|
|
|
|
|
} |
4820
|
|
|
|
|
|
|
|
4821
|
|
|
|
|
|
|
extern __inline __m256i |
4822
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4823
|
|
|
|
|
|
|
_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) |
4824
|
|
|
|
|
|
|
{ |
4825
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
4826
|
|
|
|
|
|
|
(__v16hi) |
4827
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4828
|
|
|
|
|
|
|
__M); |
4829
|
|
|
|
|
|
|
} |
4830
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
extern __inline __m256i |
4832
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4833
|
|
|
|
|
|
|
_mm512_cvtusepi32_epi16 (__m512i __A) |
4834
|
|
|
|
|
|
|
{ |
4835
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
4836
|
|
|
|
|
|
|
(__v16hi) |
4837
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4838
|
|
|
|
|
|
|
(__mmask16) -1); |
4839
|
|
|
|
|
|
|
} |
4840
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
extern __inline void |
4842
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4843
|
|
|
|
|
|
|
_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) |
4844
|
|
|
|
|
|
|
{ |
4845
|
|
|
|
|
|
|
__builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); |
4846
|
|
|
|
|
|
|
} |
4847
|
|
|
|
|
|
|
|
4848
|
|
|
|
|
|
|
extern __inline __m256i |
4849
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4850
|
|
|
|
|
|
|
_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
4851
|
|
|
|
|
|
|
{ |
4852
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
4853
|
|
|
|
|
|
|
(__v16hi) __O, |
4854
|
|
|
|
|
|
|
__M); |
4855
|
|
|
|
|
|
|
} |
4856
|
|
|
|
|
|
|
|
4857
|
|
|
|
|
|
|
extern __inline __m256i |
4858
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4859
|
|
|
|
|
|
|
_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) |
4860
|
|
|
|
|
|
|
{ |
4861
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
4862
|
|
|
|
|
|
|
(__v16hi) |
4863
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4864
|
|
|
|
|
|
|
__M); |
4865
|
|
|
|
|
|
|
} |
4866
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
extern __inline __m256i |
4868
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4869
|
|
|
|
|
|
|
_mm512_cvtepi64_epi32 (__m512i __A) |
4870
|
|
|
|
|
|
|
{ |
4871
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
4872
|
|
|
|
|
|
|
(__v8si) |
4873
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4874
|
|
|
|
|
|
|
(__mmask8) -1); |
4875
|
|
|
|
|
|
|
} |
4876
|
|
|
|
|
|
|
|
4877
|
|
|
|
|
|
|
extern __inline void |
4878
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4879
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) |
4880
|
|
|
|
|
|
|
{ |
4881
|
|
|
|
|
|
|
__builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); |
4882
|
|
|
|
|
|
|
} |
4883
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
extern __inline __m256i |
4885
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4886
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
4887
|
|
|
|
|
|
|
{ |
4888
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
4889
|
|
|
|
|
|
|
(__v8si) __O, __M); |
4890
|
|
|
|
|
|
|
} |
4891
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
extern __inline __m256i |
4893
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4894
|
|
|
|
|
|
|
_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) |
4895
|
|
|
|
|
|
|
{ |
4896
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
4897
|
|
|
|
|
|
|
(__v8si) |
4898
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4899
|
|
|
|
|
|
|
__M); |
4900
|
|
|
|
|
|
|
} |
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
extern __inline __m256i |
4903
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4904
|
|
|
|
|
|
|
_mm512_cvtsepi64_epi32 (__m512i __A) |
4905
|
|
|
|
|
|
|
{ |
4906
|
|
|
|
|
|
|
__v8si __O; |
4907
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
4908
|
|
|
|
|
|
|
(__v8si) |
4909
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4910
|
|
|
|
|
|
|
(__mmask8) -1); |
4911
|
|
|
|
|
|
|
} |
4912
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
extern __inline void |
4914
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4915
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) |
4916
|
|
|
|
|
|
|
{ |
4917
|
|
|
|
|
|
|
__builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); |
4918
|
|
|
|
|
|
|
} |
4919
|
|
|
|
|
|
|
|
4920
|
|
|
|
|
|
|
extern __inline __m256i |
4921
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4922
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
4923
|
|
|
|
|
|
|
{ |
4924
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
4925
|
|
|
|
|
|
|
(__v8si) __O, __M); |
4926
|
|
|
|
|
|
|
} |
4927
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
extern __inline __m256i |
4929
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4930
|
|
|
|
|
|
|
_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) |
4931
|
|
|
|
|
|
|
{ |
4932
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
4933
|
|
|
|
|
|
|
(__v8si) |
4934
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4935
|
|
|
|
|
|
|
__M); |
4936
|
|
|
|
|
|
|
} |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
extern __inline __m256i |
4939
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4940
|
|
|
|
|
|
|
_mm512_cvtusepi64_epi32 (__m512i __A) |
4941
|
|
|
|
|
|
|
{ |
4942
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
4943
|
|
|
|
|
|
|
(__v8si) |
4944
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
4945
|
|
|
|
|
|
|
(__mmask8) -1); |
4946
|
|
|
|
|
|
|
} |
4947
|
|
|
|
|
|
|
|
4948
|
|
|
|
|
|
|
extern __inline void |
4949
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4950
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) |
4951
|
|
|
|
|
|
|
{ |
4952
|
|
|
|
|
|
|
__builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); |
4953
|
|
|
|
|
|
|
} |
4954
|
|
|
|
|
|
|
|
4955
|
|
|
|
|
|
|
extern __inline __m256i |
4956
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4957
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
4958
|
|
|
|
|
|
|
{ |
4959
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
4960
|
|
|
|
|
|
|
(__v8si) __O, __M); |
4961
|
|
|
|
|
|
|
} |
4962
|
|
|
|
|
|
|
|
4963
|
|
|
|
|
|
|
extern __inline __m256i |
4964
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4965
|
|
|
|
|
|
|
_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) |
4966
|
|
|
|
|
|
|
{ |
4967
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
4968
|
|
|
|
|
|
|
(__v8si) |
4969
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
4970
|
|
|
|
|
|
|
__M); |
4971
|
|
|
|
|
|
|
} |
4972
|
|
|
|
|
|
|
|
4973
|
|
|
|
|
|
|
extern __inline __m128i |
4974
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4975
|
|
|
|
|
|
|
_mm512_cvtepi64_epi16 (__m512i __A) |
4976
|
|
|
|
|
|
|
{ |
4977
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
4978
|
|
|
|
|
|
|
(__v8hi) |
4979
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
4980
|
|
|
|
|
|
|
(__mmask8) -1); |
4981
|
|
|
|
|
|
|
} |
4982
|
|
|
|
|
|
|
|
4983
|
|
|
|
|
|
|
extern __inline void |
4984
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4985
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) |
4986
|
|
|
|
|
|
|
{ |
4987
|
|
|
|
|
|
|
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); |
4988
|
|
|
|
|
|
|
} |
4989
|
|
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
extern __inline __m128i |
4991
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
4992
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
4993
|
|
|
|
|
|
|
{ |
4994
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
4995
|
|
|
|
|
|
|
(__v8hi) __O, __M); |
4996
|
|
|
|
|
|
|
} |
4997
|
|
|
|
|
|
|
|
4998
|
|
|
|
|
|
|
extern __inline __m128i |
4999
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5000
|
|
|
|
|
|
|
_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) |
5001
|
|
|
|
|
|
|
{ |
5002
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
5003
|
|
|
|
|
|
|
(__v8hi) |
5004
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5005
|
|
|
|
|
|
|
__M); |
5006
|
|
|
|
|
|
|
} |
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
extern __inline __m128i |
5009
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5010
|
|
|
|
|
|
|
_mm512_cvtsepi64_epi16 (__m512i __A) |
5011
|
|
|
|
|
|
|
{ |
5012
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
5013
|
|
|
|
|
|
|
(__v8hi) |
5014
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5015
|
|
|
|
|
|
|
(__mmask8) -1); |
5016
|
|
|
|
|
|
|
} |
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
extern __inline void |
5019
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5020
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) |
5021
|
|
|
|
|
|
|
{ |
5022
|
|
|
|
|
|
|
__builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); |
5023
|
|
|
|
|
|
|
} |
5024
|
|
|
|
|
|
|
|
5025
|
|
|
|
|
|
|
extern __inline __m128i |
5026
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5027
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
5028
|
|
|
|
|
|
|
{ |
5029
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
5030
|
|
|
|
|
|
|
(__v8hi) __O, __M); |
5031
|
|
|
|
|
|
|
} |
5032
|
|
|
|
|
|
|
|
5033
|
|
|
|
|
|
|
extern __inline __m128i |
5034
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5035
|
|
|
|
|
|
|
_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) |
5036
|
|
|
|
|
|
|
{ |
5037
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
5038
|
|
|
|
|
|
|
(__v8hi) |
5039
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5040
|
|
|
|
|
|
|
__M); |
5041
|
|
|
|
|
|
|
} |
5042
|
|
|
|
|
|
|
|
5043
|
|
|
|
|
|
|
extern __inline __m128i |
5044
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5045
|
|
|
|
|
|
|
_mm512_cvtusepi64_epi16 (__m512i __A) |
5046
|
|
|
|
|
|
|
{ |
5047
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
5048
|
|
|
|
|
|
|
(__v8hi) |
5049
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5050
|
|
|
|
|
|
|
(__mmask8) -1); |
5051
|
|
|
|
|
|
|
} |
5052
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
extern __inline void |
5054
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5055
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) |
5056
|
|
|
|
|
|
|
{ |
5057
|
|
|
|
|
|
|
__builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); |
5058
|
|
|
|
|
|
|
} |
5059
|
|
|
|
|
|
|
|
5060
|
|
|
|
|
|
|
extern __inline __m128i |
5061
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5062
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
5063
|
|
|
|
|
|
|
{ |
5064
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
5065
|
|
|
|
|
|
|
(__v8hi) __O, __M); |
5066
|
|
|
|
|
|
|
} |
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
extern __inline __m128i |
5069
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5070
|
|
|
|
|
|
|
_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) |
5071
|
|
|
|
|
|
|
{ |
5072
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
5073
|
|
|
|
|
|
|
(__v8hi) |
5074
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5075
|
|
|
|
|
|
|
__M); |
5076
|
|
|
|
|
|
|
} |
5077
|
|
|
|
|
|
|
|
5078
|
|
|
|
|
|
|
extern __inline __m128i |
5079
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5080
|
|
|
|
|
|
|
_mm512_cvtepi64_epi8 (__m512i __A) |
5081
|
|
|
|
|
|
|
{ |
5082
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
5083
|
|
|
|
|
|
|
(__v16qi) |
5084
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5085
|
|
|
|
|
|
|
(__mmask8) -1); |
5086
|
|
|
|
|
|
|
} |
5087
|
|
|
|
|
|
|
|
5088
|
|
|
|
|
|
|
extern __inline void |
5089
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5090
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
5091
|
|
|
|
|
|
|
{ |
5092
|
|
|
|
|
|
|
__builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
5093
|
|
|
|
|
|
|
} |
5094
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
extern __inline __m128i |
5096
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5097
|
|
|
|
|
|
|
_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
5098
|
|
|
|
|
|
|
{ |
5099
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
5100
|
|
|
|
|
|
|
(__v16qi) __O, __M); |
5101
|
|
|
|
|
|
|
} |
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
extern __inline __m128i |
5104
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5105
|
|
|
|
|
|
|
_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) |
5106
|
|
|
|
|
|
|
{ |
5107
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
5108
|
|
|
|
|
|
|
(__v16qi) |
5109
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5110
|
|
|
|
|
|
|
__M); |
5111
|
|
|
|
|
|
|
} |
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
extern __inline __m128i |
5114
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5115
|
|
|
|
|
|
|
_mm512_cvtsepi64_epi8 (__m512i __A) |
5116
|
|
|
|
|
|
|
{ |
5117
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
5118
|
|
|
|
|
|
|
(__v16qi) |
5119
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5120
|
|
|
|
|
|
|
(__mmask8) -1); |
5121
|
|
|
|
|
|
|
} |
5122
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
extern __inline void |
5124
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5125
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
5126
|
|
|
|
|
|
|
{ |
5127
|
|
|
|
|
|
|
__builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
5128
|
|
|
|
|
|
|
} |
5129
|
|
|
|
|
|
|
|
5130
|
|
|
|
|
|
|
extern __inline __m128i |
5131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5132
|
|
|
|
|
|
|
_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
5133
|
|
|
|
|
|
|
{ |
5134
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
5135
|
|
|
|
|
|
|
(__v16qi) __O, __M); |
5136
|
|
|
|
|
|
|
} |
5137
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
extern __inline __m128i |
5139
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5140
|
|
|
|
|
|
|
_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) |
5141
|
|
|
|
|
|
|
{ |
5142
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
5143
|
|
|
|
|
|
|
(__v16qi) |
5144
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5145
|
|
|
|
|
|
|
__M); |
5146
|
|
|
|
|
|
|
} |
5147
|
|
|
|
|
|
|
|
5148
|
|
|
|
|
|
|
extern __inline __m128i |
5149
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5150
|
|
|
|
|
|
|
_mm512_cvtusepi64_epi8 (__m512i __A) |
5151
|
|
|
|
|
|
|
{ |
5152
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
5153
|
|
|
|
|
|
|
(__v16qi) |
5154
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5155
|
|
|
|
|
|
|
(__mmask8) -1); |
5156
|
|
|
|
|
|
|
} |
5157
|
|
|
|
|
|
|
|
5158
|
|
|
|
|
|
|
extern __inline void |
5159
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5160
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
5161
|
|
|
|
|
|
|
{ |
5162
|
|
|
|
|
|
|
__builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
5163
|
|
|
|
|
|
|
} |
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
extern __inline __m128i |
5166
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5167
|
|
|
|
|
|
|
_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
5168
|
|
|
|
|
|
|
{ |
5169
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
5170
|
|
|
|
|
|
|
(__v16qi) __O, |
5171
|
|
|
|
|
|
|
__M); |
5172
|
|
|
|
|
|
|
} |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
extern __inline __m128i |
5175
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5176
|
|
|
|
|
|
|
_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) |
5177
|
|
|
|
|
|
|
{ |
5178
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
5179
|
|
|
|
|
|
|
(__v16qi) |
5180
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5181
|
|
|
|
|
|
|
__M); |
5182
|
|
|
|
|
|
|
} |
5183
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
extern __inline __m512d |
5185
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5186
|
|
|
|
|
|
|
_mm512_cvtepi32_pd (__m256i __A) |
5187
|
|
|
|
|
|
|
{ |
5188
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, |
5189
|
|
|
|
|
|
|
(__v8df) |
5190
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
5191
|
|
|
|
|
|
|
(__mmask8) -1); |
5192
|
|
|
|
|
|
|
} |
5193
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
extern __inline __m512d |
5195
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5196
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) |
5197
|
|
|
|
|
|
|
{ |
5198
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, |
5199
|
|
|
|
|
|
|
(__v8df) __W, |
5200
|
|
|
|
|
|
|
(__mmask8) __U); |
5201
|
|
|
|
|
|
|
} |
5202
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
extern __inline __m512d |
5204
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5205
|
|
|
|
|
|
|
_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) |
5206
|
|
|
|
|
|
|
{ |
5207
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, |
5208
|
|
|
|
|
|
|
(__v8df) |
5209
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
5210
|
|
|
|
|
|
|
(__mmask8) __U); |
5211
|
|
|
|
|
|
|
} |
5212
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
extern __inline __m512d |
5214
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5215
|
|
|
|
|
|
|
_mm512_cvtepu32_pd (__m256i __A) |
5216
|
|
|
|
|
|
|
{ |
5217
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, |
5218
|
|
|
|
|
|
|
(__v8df) |
5219
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
5220
|
|
|
|
|
|
|
(__mmask8) -1); |
5221
|
|
|
|
|
|
|
} |
5222
|
|
|
|
|
|
|
|
5223
|
|
|
|
|
|
|
extern __inline __m512d |
5224
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5225
|
|
|
|
|
|
|
_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) |
5226
|
|
|
|
|
|
|
{ |
5227
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, |
5228
|
|
|
|
|
|
|
(__v8df) __W, |
5229
|
|
|
|
|
|
|
(__mmask8) __U); |
5230
|
|
|
|
|
|
|
} |
5231
|
|
|
|
|
|
|
|
5232
|
|
|
|
|
|
|
extern __inline __m512d |
5233
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5234
|
|
|
|
|
|
|
_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) |
5235
|
|
|
|
|
|
|
{ |
5236
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, |
5237
|
|
|
|
|
|
|
(__v8df) |
5238
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
5239
|
|
|
|
|
|
|
(__mmask8) __U); |
5240
|
|
|
|
|
|
|
} |
5241
|
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
5243
|
|
|
|
|
|
|
extern __inline __m512 |
5244
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5245
|
|
|
|
|
|
|
_mm512_cvt_roundepi32_ps (__m512i __A, const int __R) |
5246
|
|
|
|
|
|
|
{ |
5247
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
5248
|
|
|
|
|
|
|
(__v16sf) |
5249
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
5250
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
5251
|
|
|
|
|
|
|
} |
5252
|
|
|
|
|
|
|
|
5253
|
|
|
|
|
|
|
extern __inline __m512 |
5254
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5255
|
|
|
|
|
|
|
_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, |
5256
|
|
|
|
|
|
|
const int __R) |
5257
|
|
|
|
|
|
|
{ |
5258
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
5259
|
|
|
|
|
|
|
(__v16sf) __W, |
5260
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
5261
|
|
|
|
|
|
|
} |
5262
|
|
|
|
|
|
|
|
5263
|
|
|
|
|
|
|
extern __inline __m512 |
5264
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5265
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) |
5266
|
|
|
|
|
|
|
{ |
5267
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
5268
|
|
|
|
|
|
|
(__v16sf) |
5269
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
5270
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
5271
|
|
|
|
|
|
|
} |
5272
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
extern __inline __m512 |
5274
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5275
|
|
|
|
|
|
|
_mm512_cvt_roundepu32_ps (__m512i __A, const int __R) |
5276
|
|
|
|
|
|
|
{ |
5277
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
5278
|
|
|
|
|
|
|
(__v16sf) |
5279
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
5280
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
5281
|
|
|
|
|
|
|
} |
5282
|
|
|
|
|
|
|
|
5283
|
|
|
|
|
|
|
extern __inline __m512 |
5284
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5285
|
|
|
|
|
|
|
_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, |
5286
|
|
|
|
|
|
|
const int __R) |
5287
|
|
|
|
|
|
|
{ |
5288
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
5289
|
|
|
|
|
|
|
(__v16sf) __W, |
5290
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
5291
|
|
|
|
|
|
|
} |
5292
|
|
|
|
|
|
|
|
5293
|
|
|
|
|
|
|
extern __inline __m512 |
5294
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5295
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) |
5296
|
|
|
|
|
|
|
{ |
5297
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
5298
|
|
|
|
|
|
|
(__v16sf) |
5299
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
5300
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
5301
|
|
|
|
|
|
|
} |
5302
|
|
|
|
|
|
|
|
5303
|
|
|
|
|
|
|
#else |
5304
|
|
|
|
|
|
|
#define _mm512_cvt_roundepi32_ps(A, B) \ |
5305
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) |
5306
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ |
5308
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) |
5309
|
|
|
|
|
|
|
|
5310
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ |
5311
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) |
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
#define _mm512_cvt_roundepu32_ps(A, B) \ |
5314
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) |
5315
|
|
|
|
|
|
|
|
5316
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ |
5317
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) |
5318
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ |
5320
|
|
|
|
|
|
|
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) |
5321
|
|
|
|
|
|
|
#endif |
5322
|
|
|
|
|
|
|
|
5323
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
5324
|
|
|
|
|
|
|
extern __inline __m256d |
5325
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5326
|
|
|
|
|
|
|
_mm512_extractf64x4_pd (__m512d __A, const int __imm) |
5327
|
|
|
|
|
|
|
{ |
5328
|
|
|
|
|
|
|
return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, |
5329
|
|
|
|
|
|
|
__imm, |
5330
|
|
|
|
|
|
|
(__v4df) |
5331
|
|
|
|
|
|
|
_mm256_undefined_pd (), |
5332
|
|
|
|
|
|
|
(__mmask8) -1); |
5333
|
|
|
|
|
|
|
} |
5334
|
|
|
|
|
|
|
|
5335
|
|
|
|
|
|
|
extern __inline __m256d |
5336
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5337
|
|
|
|
|
|
|
_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, |
5338
|
|
|
|
|
|
|
const int __imm) |
5339
|
|
|
|
|
|
|
{ |
5340
|
|
|
|
|
|
|
return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, |
5341
|
|
|
|
|
|
|
__imm, |
5342
|
|
|
|
|
|
|
(__v4df) __W, |
5343
|
|
|
|
|
|
|
(__mmask8) __U); |
5344
|
|
|
|
|
|
|
} |
5345
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
extern __inline __m256d |
5347
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5348
|
|
|
|
|
|
|
_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) |
5349
|
|
|
|
|
|
|
{ |
5350
|
|
|
|
|
|
|
return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, |
5351
|
|
|
|
|
|
|
__imm, |
5352
|
|
|
|
|
|
|
(__v4df) |
5353
|
|
|
|
|
|
|
_mm256_setzero_pd (), |
5354
|
|
|
|
|
|
|
(__mmask8) __U); |
5355
|
|
|
|
|
|
|
} |
5356
|
|
|
|
|
|
|
|
5357
|
|
|
|
|
|
|
extern __inline __m128 |
5358
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5359
|
|
|
|
|
|
|
_mm512_extractf32x4_ps (__m512 __A, const int __imm) |
5360
|
|
|
|
|
|
|
{ |
5361
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, |
5362
|
|
|
|
|
|
|
__imm, |
5363
|
|
|
|
|
|
|
(__v4sf) |
5364
|
|
|
|
|
|
|
_mm_undefined_ps (), |
5365
|
|
|
|
|
|
|
(__mmask8) -1); |
5366
|
|
|
|
|
|
|
} |
5367
|
|
|
|
|
|
|
|
5368
|
|
|
|
|
|
|
extern __inline __m128 |
5369
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5370
|
|
|
|
|
|
|
_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, |
5371
|
|
|
|
|
|
|
const int __imm) |
5372
|
|
|
|
|
|
|
{ |
5373
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, |
5374
|
|
|
|
|
|
|
__imm, |
5375
|
|
|
|
|
|
|
(__v4sf) __W, |
5376
|
|
|
|
|
|
|
(__mmask8) __U); |
5377
|
|
|
|
|
|
|
} |
5378
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
extern __inline __m128 |
5380
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5381
|
|
|
|
|
|
|
_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) |
5382
|
|
|
|
|
|
|
{ |
5383
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, |
5384
|
|
|
|
|
|
|
__imm, |
5385
|
|
|
|
|
|
|
(__v4sf) |
5386
|
|
|
|
|
|
|
_mm_setzero_ps (), |
5387
|
|
|
|
|
|
|
(__mmask8) __U); |
5388
|
|
|
|
|
|
|
} |
5389
|
|
|
|
|
|
|
|
5390
|
|
|
|
|
|
|
extern __inline __m256i |
5391
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5392
|
|
|
|
|
|
|
_mm512_extracti64x4_epi64 (__m512i __A, const int __imm) |
5393
|
|
|
|
|
|
|
{ |
5394
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, |
5395
|
|
|
|
|
|
|
__imm, |
5396
|
|
|
|
|
|
|
(__v4di) |
5397
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
5398
|
|
|
|
|
|
|
(__mmask8) -1); |
5399
|
|
|
|
|
|
|
} |
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
extern __inline __m256i |
5402
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5403
|
|
|
|
|
|
|
_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, |
5404
|
|
|
|
|
|
|
const int __imm) |
5405
|
|
|
|
|
|
|
{ |
5406
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, |
5407
|
|
|
|
|
|
|
__imm, |
5408
|
|
|
|
|
|
|
(__v4di) __W, |
5409
|
|
|
|
|
|
|
(__mmask8) __U); |
5410
|
|
|
|
|
|
|
} |
5411
|
|
|
|
|
|
|
|
5412
|
|
|
|
|
|
|
extern __inline __m256i |
5413
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5414
|
|
|
|
|
|
|
_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) |
5415
|
|
|
|
|
|
|
{ |
5416
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, |
5417
|
|
|
|
|
|
|
__imm, |
5418
|
|
|
|
|
|
|
(__v4di) |
5419
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
5420
|
|
|
|
|
|
|
(__mmask8) __U); |
5421
|
|
|
|
|
|
|
} |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
extern __inline __m128i |
5424
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5425
|
|
|
|
|
|
|
_mm512_extracti32x4_epi32 (__m512i __A, const int __imm) |
5426
|
|
|
|
|
|
|
{ |
5427
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, |
5428
|
|
|
|
|
|
|
__imm, |
5429
|
|
|
|
|
|
|
(__v4si) |
5430
|
|
|
|
|
|
|
_mm_undefined_si128 (), |
5431
|
|
|
|
|
|
|
(__mmask8) -1); |
5432
|
|
|
|
|
|
|
} |
5433
|
|
|
|
|
|
|
|
5434
|
|
|
|
|
|
|
extern __inline __m128i |
5435
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5436
|
|
|
|
|
|
|
_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, |
5437
|
|
|
|
|
|
|
const int __imm) |
5438
|
|
|
|
|
|
|
{ |
5439
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, |
5440
|
|
|
|
|
|
|
__imm, |
5441
|
|
|
|
|
|
|
(__v4si) __W, |
5442
|
|
|
|
|
|
|
(__mmask8) __U); |
5443
|
|
|
|
|
|
|
} |
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
extern __inline __m128i |
5446
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5447
|
|
|
|
|
|
|
_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) |
5448
|
|
|
|
|
|
|
{ |
5449
|
|
|
|
|
|
|
return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, |
5450
|
|
|
|
|
|
|
__imm, |
5451
|
|
|
|
|
|
|
(__v4si) |
5452
|
|
|
|
|
|
|
_mm_setzero_si128 (), |
5453
|
|
|
|
|
|
|
(__mmask8) __U); |
5454
|
|
|
|
|
|
|
} |
5455
|
|
|
|
|
|
|
#else |
5456
|
|
|
|
|
|
|
|
5457
|
|
|
|
|
|
|
#define _mm512_extractf64x4_pd(X, C) \ |
5458
|
|
|
|
|
|
|
((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ |
5459
|
|
|
|
|
|
|
(int) (C),\ |
5460
|
|
|
|
|
|
|
(__v4df)(__m256d)_mm256_undefined_pd(),\ |
5461
|
|
|
|
|
|
|
(__mmask8)-1)) |
5462
|
|
|
|
|
|
|
|
5463
|
|
|
|
|
|
|
#define _mm512_mask_extractf64x4_pd(W, U, X, C) \ |
5464
|
|
|
|
|
|
|
((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ |
5465
|
|
|
|
|
|
|
(int) (C),\ |
5466
|
|
|
|
|
|
|
(__v4df)(__m256d)(W),\ |
5467
|
|
|
|
|
|
|
(__mmask8)(U))) |
5468
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
#define _mm512_maskz_extractf64x4_pd(U, X, C) \ |
5470
|
|
|
|
|
|
|
((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ |
5471
|
|
|
|
|
|
|
(int) (C),\ |
5472
|
|
|
|
|
|
|
(__v4df)(__m256d)_mm256_setzero_pd(),\ |
5473
|
|
|
|
|
|
|
(__mmask8)(U))) |
5474
|
|
|
|
|
|
|
|
5475
|
|
|
|
|
|
|
#define _mm512_extractf32x4_ps(X, C) \ |
5476
|
|
|
|
|
|
|
((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ |
5477
|
|
|
|
|
|
|
(int) (C),\ |
5478
|
|
|
|
|
|
|
(__v4sf)(__m128)_mm_undefined_ps(),\ |
5479
|
|
|
|
|
|
|
(__mmask8)-1)) |
5480
|
|
|
|
|
|
|
|
5481
|
|
|
|
|
|
|
#define _mm512_mask_extractf32x4_ps(W, U, X, C) \ |
5482
|
|
|
|
|
|
|
((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ |
5483
|
|
|
|
|
|
|
(int) (C),\ |
5484
|
|
|
|
|
|
|
(__v4sf)(__m128)(W),\ |
5485
|
|
|
|
|
|
|
(__mmask8)(U))) |
5486
|
|
|
|
|
|
|
|
5487
|
|
|
|
|
|
|
#define _mm512_maskz_extractf32x4_ps(U, X, C) \ |
5488
|
|
|
|
|
|
|
((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ |
5489
|
|
|
|
|
|
|
(int) (C),\ |
5490
|
|
|
|
|
|
|
(__v4sf)(__m128)_mm_setzero_ps(),\ |
5491
|
|
|
|
|
|
|
(__mmask8)(U))) |
5492
|
|
|
|
|
|
|
|
5493
|
|
|
|
|
|
|
#define _mm512_extracti64x4_epi64(X, C) \ |
5494
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ |
5495
|
|
|
|
|
|
|
(int) (C),\ |
5496
|
|
|
|
|
|
|
(__v4di)(__m256i)_mm256_undefined_si256 (),\ |
5497
|
|
|
|
|
|
|
(__mmask8)-1)) |
5498
|
|
|
|
|
|
|
|
5499
|
|
|
|
|
|
|
#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ |
5500
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ |
5501
|
|
|
|
|
|
|
(int) (C),\ |
5502
|
|
|
|
|
|
|
(__v4di)(__m256i)(W),\ |
5503
|
|
|
|
|
|
|
(__mmask8)(U))) |
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
#define _mm512_maskz_extracti64x4_epi64(U, X, C) \ |
5506
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ |
5507
|
|
|
|
|
|
|
(int) (C),\ |
5508
|
|
|
|
|
|
|
(__v4di)(__m256i)_mm256_setzero_si256 (),\ |
5509
|
|
|
|
|
|
|
(__mmask8)(U))) |
5510
|
|
|
|
|
|
|
|
5511
|
|
|
|
|
|
|
#define _mm512_extracti32x4_epi32(X, C) \ |
5512
|
|
|
|
|
|
|
((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ |
5513
|
|
|
|
|
|
|
(int) (C),\ |
5514
|
|
|
|
|
|
|
(__v4si)(__m128i)_mm_undefined_si128 (),\ |
5515
|
|
|
|
|
|
|
(__mmask8)-1)) |
5516
|
|
|
|
|
|
|
|
5517
|
|
|
|
|
|
|
#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ |
5518
|
|
|
|
|
|
|
((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ |
5519
|
|
|
|
|
|
|
(int) (C),\ |
5520
|
|
|
|
|
|
|
(__v4si)(__m128i)(W),\ |
5521
|
|
|
|
|
|
|
(__mmask8)(U))) |
5522
|
|
|
|
|
|
|
|
5523
|
|
|
|
|
|
|
#define _mm512_maskz_extracti32x4_epi32(U, X, C) \ |
5524
|
|
|
|
|
|
|
((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ |
5525
|
|
|
|
|
|
|
(int) (C),\ |
5526
|
|
|
|
|
|
|
(__v4si)(__m128i)_mm_setzero_si128 (),\ |
5527
|
|
|
|
|
|
|
(__mmask8)(U))) |
5528
|
|
|
|
|
|
|
#endif |
5529
|
|
|
|
|
|
|
|
5530
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
5531
|
|
|
|
|
|
|
extern __inline __m512i |
5532
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5533
|
|
|
|
|
|
|
_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) |
5534
|
|
|
|
|
|
|
{ |
5535
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, |
5536
|
|
|
|
|
|
|
(__v4si) __B, |
5537
|
|
|
|
|
|
|
__imm, |
5538
|
|
|
|
|
|
|
(__v16si) __A, -1); |
5539
|
|
|
|
|
|
|
} |
5540
|
|
|
|
|
|
|
|
5541
|
|
|
|
|
|
|
extern __inline __m512 |
5542
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5543
|
|
|
|
|
|
|
_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) |
5544
|
|
|
|
|
|
|
{ |
5545
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, |
5546
|
|
|
|
|
|
|
(__v4sf) __B, |
5547
|
|
|
|
|
|
|
__imm, |
5548
|
|
|
|
|
|
|
(__v16sf) __A, -1); |
5549
|
|
|
|
|
|
|
} |
5550
|
|
|
|
|
|
|
|
5551
|
|
|
|
|
|
|
extern __inline __m512i |
5552
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5553
|
|
|
|
|
|
|
_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) |
5554
|
|
|
|
|
|
|
{ |
5555
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, |
5556
|
|
|
|
|
|
|
(__v4di) __B, |
5557
|
|
|
|
|
|
|
__imm, |
5558
|
|
|
|
|
|
|
(__v8di) |
5559
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
5560
|
|
|
|
|
|
|
(__mmask8) -1); |
5561
|
|
|
|
|
|
|
} |
5562
|
|
|
|
|
|
|
|
5563
|
|
|
|
|
|
|
extern __inline __m512i |
5564
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5565
|
|
|
|
|
|
|
_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, |
5566
|
|
|
|
|
|
|
__m256i __B, const int __imm) |
5567
|
|
|
|
|
|
|
{ |
5568
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, |
5569
|
|
|
|
|
|
|
(__v4di) __B, |
5570
|
|
|
|
|
|
|
__imm, |
5571
|
|
|
|
|
|
|
(__v8di) __W, |
5572
|
|
|
|
|
|
|
(__mmask8) __U); |
5573
|
|
|
|
|
|
|
} |
5574
|
|
|
|
|
|
|
|
5575
|
|
|
|
|
|
|
extern __inline __m512i |
5576
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5577
|
|
|
|
|
|
|
_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, |
5578
|
|
|
|
|
|
|
const int __imm) |
5579
|
|
|
|
|
|
|
{ |
5580
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, |
5581
|
|
|
|
|
|
|
(__v4di) __B, |
5582
|
|
|
|
|
|
|
__imm, |
5583
|
|
|
|
|
|
|
(__v8di) |
5584
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
5585
|
|
|
|
|
|
|
(__mmask8) __U); |
5586
|
|
|
|
|
|
|
} |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
extern __inline __m512d |
5589
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5590
|
|
|
|
|
|
|
_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) |
5591
|
|
|
|
|
|
|
{ |
5592
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, |
5593
|
|
|
|
|
|
|
(__v4df) __B, |
5594
|
|
|
|
|
|
|
__imm, |
5595
|
|
|
|
|
|
|
(__v8df) |
5596
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
5597
|
|
|
|
|
|
|
(__mmask8) -1); |
5598
|
|
|
|
|
|
|
} |
5599
|
|
|
|
|
|
|
|
5600
|
|
|
|
|
|
|
extern __inline __m512d |
5601
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5602
|
|
|
|
|
|
|
_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, |
5603
|
|
|
|
|
|
|
__m256d __B, const int __imm) |
5604
|
|
|
|
|
|
|
{ |
5605
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, |
5606
|
|
|
|
|
|
|
(__v4df) __B, |
5607
|
|
|
|
|
|
|
__imm, |
5608
|
|
|
|
|
|
|
(__v8df) __W, |
5609
|
|
|
|
|
|
|
(__mmask8) __U); |
5610
|
|
|
|
|
|
|
} |
5611
|
|
|
|
|
|
|
|
5612
|
|
|
|
|
|
|
extern __inline __m512d |
5613
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5614
|
|
|
|
|
|
|
_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, |
5615
|
|
|
|
|
|
|
const int __imm) |
5616
|
|
|
|
|
|
|
{ |
5617
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, |
5618
|
|
|
|
|
|
|
(__v4df) __B, |
5619
|
|
|
|
|
|
|
__imm, |
5620
|
|
|
|
|
|
|
(__v8df) |
5621
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
5622
|
|
|
|
|
|
|
(__mmask8) __U); |
5623
|
|
|
|
|
|
|
} |
5624
|
|
|
|
|
|
|
#else |
5625
|
|
|
|
|
|
|
#define _mm512_insertf32x4(X, Y, C) \ |
5626
|
|
|
|
|
|
|
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ |
5627
|
|
|
|
|
|
|
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) |
5628
|
|
|
|
|
|
|
|
5629
|
|
|
|
|
|
|
#define _mm512_inserti32x4(X, Y, C) \ |
5630
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ |
5631
|
|
|
|
|
|
|
(__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) |
5632
|
|
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
#define _mm512_insertf64x4(X, Y, C) \ |
5634
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ |
5635
|
|
|
|
|
|
|
(__v4df)(__m256d) (Y), (int) (C), \ |
5636
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(), \ |
5637
|
|
|
|
|
|
|
(__mmask8)-1)) |
5638
|
|
|
|
|
|
|
|
5639
|
|
|
|
|
|
|
#define _mm512_mask_insertf64x4(W, U, X, Y, C) \ |
5640
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ |
5641
|
|
|
|
|
|
|
(__v4df)(__m256d) (Y), (int) (C), \ |
5642
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), \ |
5643
|
|
|
|
|
|
|
(__mmask8)(U))) |
5644
|
|
|
|
|
|
|
|
5645
|
|
|
|
|
|
|
#define _mm512_maskz_insertf64x4(U, X, Y, C) \ |
5646
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ |
5647
|
|
|
|
|
|
|
(__v4df)(__m256d) (Y), (int) (C), \ |
5648
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(), \ |
5649
|
|
|
|
|
|
|
(__mmask8)(U))) |
5650
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
#define _mm512_inserti64x4(X, Y, C) \ |
5652
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ |
5653
|
|
|
|
|
|
|
(__v4di)(__m256i) (Y), (int) (C), \ |
5654
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_undefined_si512 (), \ |
5655
|
|
|
|
|
|
|
(__mmask8)-1)) |
5656
|
|
|
|
|
|
|
|
5657
|
|
|
|
|
|
|
#define _mm512_mask_inserti64x4(W, U, X, Y, C) \ |
5658
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ |
5659
|
|
|
|
|
|
|
(__v4di)(__m256i) (Y), (int) (C),\ |
5660
|
|
|
|
|
|
|
(__v8di)(__m512i)(W),\ |
5661
|
|
|
|
|
|
|
(__mmask8)(U))) |
5662
|
|
|
|
|
|
|
|
5663
|
|
|
|
|
|
|
#define _mm512_maskz_inserti64x4(U, X, Y, C) \ |
5664
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ |
5665
|
|
|
|
|
|
|
(__v4di)(__m256i) (Y), (int) (C), \ |
5666
|
|
|
|
|
|
|
(__v8di)(__m512i)_mm512_setzero_si512 (), \ |
5667
|
|
|
|
|
|
|
(__mmask8)(U))) |
5668
|
|
|
|
|
|
|
#endif |
5669
|
|
|
|
|
|
|
|
5670
|
|
|
|
|
|
|
extern __inline __m512d |
5671
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5672
|
|
|
|
|
|
|
_mm512_loadu_pd (void const *__P) |
5673
|
|
|
|
|
|
|
{ |
5674
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, |
5675
|
|
|
|
|
|
|
(__v8df) |
5676
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
5677
|
|
|
|
|
|
|
(__mmask8) -1); |
5678
|
|
|
|
|
|
|
} |
5679
|
|
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
extern __inline __m512d |
5681
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5682
|
|
|
|
|
|
|
_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) |
5683
|
|
|
|
|
|
|
{ |
5684
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, |
5685
|
|
|
|
|
|
|
(__v8df) __W, |
5686
|
|
|
|
|
|
|
(__mmask8) __U); |
5687
|
|
|
|
|
|
|
} |
5688
|
|
|
|
|
|
|
|
5689
|
|
|
|
|
|
|
extern __inline __m512d |
5690
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5691
|
|
|
|
|
|
|
_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) |
5692
|
|
|
|
|
|
|
{ |
5693
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, |
5694
|
|
|
|
|
|
|
(__v8df) |
5695
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
5696
|
|
|
|
|
|
|
(__mmask8) __U); |
5697
|
|
|
|
|
|
|
} |
5698
|
|
|
|
|
|
|
|
5699
|
|
|
|
|
|
|
extern __inline void |
5700
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5701
|
|
|
|
|
|
|
_mm512_storeu_pd (void *__P, __m512d __A) |
5702
|
|
|
|
|
|
|
{ |
5703
|
|
|
|
|
|
|
__builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, |
5704
|
|
|
|
|
|
|
(__mmask8) -1); |
5705
|
|
|
|
|
|
|
} |
5706
|
|
|
|
|
|
|
|
5707
|
|
|
|
|
|
|
extern __inline void |
5708
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5709
|
|
|
|
|
|
|
_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) |
5710
|
|
|
|
|
|
|
{ |
5711
|
|
|
|
|
|
|
__builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, |
5712
|
|
|
|
|
|
|
(__mmask8) __U); |
5713
|
|
|
|
|
|
|
} |
5714
|
|
|
|
|
|
|
|
5715
|
|
|
|
|
|
|
extern __inline __m512 |
5716
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5717
|
|
|
|
|
|
|
_mm512_loadu_ps (void const *__P) |
5718
|
|
|
|
|
|
|
{ |
5719
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, |
5720
|
|
|
|
|
|
|
(__v16sf) |
5721
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
5722
|
|
|
|
|
|
|
(__mmask16) -1); |
5723
|
|
|
|
|
|
|
} |
5724
|
|
|
|
|
|
|
|
5725
|
|
|
|
|
|
|
extern __inline __m512 |
5726
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5727
|
|
|
|
|
|
|
_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) |
5728
|
|
|
|
|
|
|
{ |
5729
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, |
5730
|
|
|
|
|
|
|
(__v16sf) __W, |
5731
|
|
|
|
|
|
|
(__mmask16) __U); |
5732
|
|
|
|
|
|
|
} |
5733
|
|
|
|
|
|
|
|
5734
|
|
|
|
|
|
|
extern __inline __m512 |
5735
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5736
|
|
|
|
|
|
|
_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) |
5737
|
|
|
|
|
|
|
{ |
5738
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, |
5739
|
|
|
|
|
|
|
(__v16sf) |
5740
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
5741
|
|
|
|
|
|
|
(__mmask16) __U); |
5742
|
|
|
|
|
|
|
} |
5743
|
|
|
|
|
|
|
|
5744
|
|
|
|
|
|
|
extern __inline void |
5745
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5746
|
|
|
|
|
|
|
_mm512_storeu_ps (void *__P, __m512 __A) |
5747
|
|
|
|
|
|
|
{ |
5748
|
|
|
|
|
|
|
__builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, |
5749
|
|
|
|
|
|
|
(__mmask16) -1); |
5750
|
|
|
|
|
|
|
} |
5751
|
|
|
|
|
|
|
|
5752
|
|
|
|
|
|
|
extern __inline void |
5753
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5754
|
|
|
|
|
|
|
_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) |
5755
|
|
|
|
|
|
|
{ |
5756
|
|
|
|
|
|
|
__builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, |
5757
|
|
|
|
|
|
|
(__mmask16) __U); |
5758
|
|
|
|
|
|
|
} |
5759
|
|
|
|
|
|
|
|
5760
|
|
|
|
|
|
|
extern __inline __m512i |
5761
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5762
|
|
|
|
|
|
|
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
5763
|
|
|
|
|
|
|
{ |
5764
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, |
5765
|
|
|
|
|
|
|
(__v8di) __W, |
5766
|
|
|
|
|
|
|
(__mmask8) __U); |
5767
|
|
|
|
|
|
|
} |
5768
|
|
|
|
|
|
|
|
5769
|
|
|
|
|
|
|
extern __inline __m512i |
5770
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5771
|
|
|
|
|
|
|
_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) |
5772
|
|
|
|
|
|
|
{ |
5773
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, |
5774
|
|
|
|
|
|
|
(__v8di) |
5775
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
5776
|
|
|
|
|
|
|
(__mmask8) __U); |
5777
|
|
|
|
|
|
|
} |
5778
|
|
|
|
|
|
|
|
5779
|
|
|
|
|
|
|
extern __inline void |
5780
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5781
|
|
|
|
|
|
|
_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) |
5782
|
|
|
|
|
|
|
{ |
5783
|
|
|
|
|
|
|
__builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, |
5784
|
|
|
|
|
|
|
(__mmask8) __U); |
5785
|
|
|
|
|
|
|
} |
5786
|
|
|
|
|
|
|
|
5787
|
|
|
|
|
|
|
extern __inline __m512i |
5788
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5789
|
|
|
|
|
|
|
_mm512_loadu_si512 (void const *__P) |
5790
|
|
|
|
|
|
|
{ |
5791
|
0
|
|
|
|
|
|
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, |
5792
|
|
|
|
|
|
|
(__v16si) |
5793
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
5794
|
|
|
|
|
|
|
(__mmask16) -1); |
5795
|
|
|
|
|
|
|
} |
5796
|
|
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
extern __inline __m512i |
5798
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5799
|
|
|
|
|
|
|
_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
5800
|
|
|
|
|
|
|
{ |
5801
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, |
5802
|
|
|
|
|
|
|
(__v16si) __W, |
5803
|
|
|
|
|
|
|
(__mmask16) __U); |
5804
|
|
|
|
|
|
|
} |
5805
|
|
|
|
|
|
|
|
5806
|
|
|
|
|
|
|
extern __inline __m512i |
5807
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5808
|
|
|
|
|
|
|
_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) |
5809
|
|
|
|
|
|
|
{ |
5810
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, |
5811
|
|
|
|
|
|
|
(__v16si) |
5812
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
5813
|
|
|
|
|
|
|
(__mmask16) __U); |
5814
|
|
|
|
|
|
|
} |
5815
|
|
|
|
|
|
|
|
5816
|
|
|
|
|
|
|
extern __inline void |
5817
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5818
|
|
|
|
|
|
|
_mm512_storeu_si512 (void *__P, __m512i __A) |
5819
|
|
|
|
|
|
|
{ |
5820
|
|
|
|
|
|
|
__builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, |
5821
|
|
|
|
|
|
|
(__mmask16) -1); |
5822
|
|
|
|
|
|
|
} |
5823
|
|
|
|
|
|
|
|
5824
|
|
|
|
|
|
|
extern __inline void |
5825
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5826
|
|
|
|
|
|
|
_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) |
5827
|
|
|
|
|
|
|
{ |
5828
|
|
|
|
|
|
|
__builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, |
5829
|
|
|
|
|
|
|
(__mmask16) __U); |
5830
|
|
|
|
|
|
|
} |
5831
|
|
|
|
|
|
|
|
5832
|
|
|
|
|
|
|
extern __inline __m512d |
5833
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5834
|
|
|
|
|
|
|
_mm512_permutevar_pd (__m512d __A, __m512i __C) |
5835
|
|
|
|
|
|
|
{ |
5836
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, |
5837
|
|
|
|
|
|
|
(__v8di) __C, |
5838
|
|
|
|
|
|
|
(__v8df) |
5839
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
5840
|
|
|
|
|
|
|
(__mmask8) -1); |
5841
|
|
|
|
|
|
|
} |
5842
|
|
|
|
|
|
|
|
5843
|
|
|
|
|
|
|
extern __inline __m512d |
5844
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5845
|
|
|
|
|
|
|
_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) |
5846
|
|
|
|
|
|
|
{ |
5847
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, |
5848
|
|
|
|
|
|
|
(__v8di) __C, |
5849
|
|
|
|
|
|
|
(__v8df) __W, |
5850
|
|
|
|
|
|
|
(__mmask8) __U); |
5851
|
|
|
|
|
|
|
} |
5852
|
|
|
|
|
|
|
|
5853
|
|
|
|
|
|
|
extern __inline __m512d |
5854
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5855
|
|
|
|
|
|
|
_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) |
5856
|
|
|
|
|
|
|
{ |
5857
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, |
5858
|
|
|
|
|
|
|
(__v8di) __C, |
5859
|
|
|
|
|
|
|
(__v8df) |
5860
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
5861
|
|
|
|
|
|
|
(__mmask8) __U); |
5862
|
|
|
|
|
|
|
} |
5863
|
|
|
|
|
|
|
|
5864
|
|
|
|
|
|
|
extern __inline __m512 |
5865
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5866
|
|
|
|
|
|
|
_mm512_permutevar_ps (__m512 __A, __m512i __C) |
5867
|
|
|
|
|
|
|
{ |
5868
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, |
5869
|
|
|
|
|
|
|
(__v16si) __C, |
5870
|
|
|
|
|
|
|
(__v16sf) |
5871
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
5872
|
|
|
|
|
|
|
(__mmask16) -1); |
5873
|
|
|
|
|
|
|
} |
5874
|
|
|
|
|
|
|
|
5875
|
|
|
|
|
|
|
extern __inline __m512 |
5876
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5877
|
|
|
|
|
|
|
_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) |
5878
|
|
|
|
|
|
|
{ |
5879
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, |
5880
|
|
|
|
|
|
|
(__v16si) __C, |
5881
|
|
|
|
|
|
|
(__v16sf) __W, |
5882
|
|
|
|
|
|
|
(__mmask16) __U); |
5883
|
|
|
|
|
|
|
} |
5884
|
|
|
|
|
|
|
|
5885
|
|
|
|
|
|
|
extern __inline __m512 |
5886
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5887
|
|
|
|
|
|
|
_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) |
5888
|
|
|
|
|
|
|
{ |
5889
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, |
5890
|
|
|
|
|
|
|
(__v16si) __C, |
5891
|
|
|
|
|
|
|
(__v16sf) |
5892
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
5893
|
|
|
|
|
|
|
(__mmask16) __U); |
5894
|
|
|
|
|
|
|
} |
5895
|
|
|
|
|
|
|
|
5896
|
|
|
|
|
|
|
extern __inline __m512i |
5897
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5898
|
|
|
|
|
|
|
_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) |
5899
|
|
|
|
|
|
|
{ |
5900
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I |
5901
|
|
|
|
|
|
|
/* idx */ , |
5902
|
|
|
|
|
|
|
(__v8di) __A, |
5903
|
|
|
|
|
|
|
(__v8di) __B, |
5904
|
|
|
|
|
|
|
(__mmask8) -1); |
5905
|
|
|
|
|
|
|
} |
5906
|
|
|
|
|
|
|
|
5907
|
|
|
|
|
|
|
extern __inline __m512i |
5908
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5909
|
|
|
|
|
|
|
_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, |
5910
|
|
|
|
|
|
|
__m512i __B) |
5911
|
|
|
|
|
|
|
{ |
5912
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I |
5913
|
|
|
|
|
|
|
/* idx */ , |
5914
|
|
|
|
|
|
|
(__v8di) __A, |
5915
|
|
|
|
|
|
|
(__v8di) __B, |
5916
|
|
|
|
|
|
|
(__mmask8) __U); |
5917
|
|
|
|
|
|
|
} |
5918
|
|
|
|
|
|
|
|
5919
|
|
|
|
|
|
|
extern __inline __m512i |
5920
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5921
|
|
|
|
|
|
|
_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, |
5922
|
|
|
|
|
|
|
__mmask8 __U, __m512i __B) |
5923
|
|
|
|
|
|
|
{ |
5924
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, |
5925
|
|
|
|
|
|
|
(__v8di) __I |
5926
|
|
|
|
|
|
|
/* idx */ , |
5927
|
|
|
|
|
|
|
(__v8di) __B, |
5928
|
|
|
|
|
|
|
(__mmask8) __U); |
5929
|
|
|
|
|
|
|
} |
5930
|
|
|
|
|
|
|
|
5931
|
|
|
|
|
|
|
extern __inline __m512i |
5932
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5933
|
|
|
|
|
|
|
_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, |
5934
|
|
|
|
|
|
|
__m512i __I, __m512i __B) |
5935
|
|
|
|
|
|
|
{ |
5936
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I |
5937
|
|
|
|
|
|
|
/* idx */ , |
5938
|
|
|
|
|
|
|
(__v8di) __A, |
5939
|
|
|
|
|
|
|
(__v8di) __B, |
5940
|
|
|
|
|
|
|
(__mmask8) __U); |
5941
|
|
|
|
|
|
|
} |
5942
|
|
|
|
|
|
|
|
5943
|
|
|
|
|
|
|
extern __inline __m512i |
5944
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5945
|
|
|
|
|
|
|
_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) |
5946
|
|
|
|
|
|
|
{ |
5947
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I |
5948
|
|
|
|
|
|
|
/* idx */ , |
5949
|
|
|
|
|
|
|
(__v16si) __A, |
5950
|
|
|
|
|
|
|
(__v16si) __B, |
5951
|
|
|
|
|
|
|
(__mmask16) -1); |
5952
|
|
|
|
|
|
|
} |
5953
|
|
|
|
|
|
|
|
5954
|
|
|
|
|
|
|
extern __inline __m512i |
5955
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5956
|
|
|
|
|
|
|
_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, |
5957
|
|
|
|
|
|
|
__m512i __I, __m512i __B) |
5958
|
|
|
|
|
|
|
{ |
5959
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I |
5960
|
|
|
|
|
|
|
/* idx */ , |
5961
|
|
|
|
|
|
|
(__v16si) __A, |
5962
|
|
|
|
|
|
|
(__v16si) __B, |
5963
|
|
|
|
|
|
|
(__mmask16) __U); |
5964
|
|
|
|
|
|
|
} |
5965
|
|
|
|
|
|
|
|
5966
|
|
|
|
|
|
|
extern __inline __m512i |
5967
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5968
|
|
|
|
|
|
|
_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, |
5969
|
|
|
|
|
|
|
__mmask16 __U, __m512i __B) |
5970
|
|
|
|
|
|
|
{ |
5971
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, |
5972
|
|
|
|
|
|
|
(__v16si) __I |
5973
|
|
|
|
|
|
|
/* idx */ , |
5974
|
|
|
|
|
|
|
(__v16si) __B, |
5975
|
|
|
|
|
|
|
(__mmask16) __U); |
5976
|
|
|
|
|
|
|
} |
5977
|
|
|
|
|
|
|
|
5978
|
|
|
|
|
|
|
extern __inline __m512i |
5979
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5980
|
|
|
|
|
|
|
_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, |
5981
|
|
|
|
|
|
|
__m512i __I, __m512i __B) |
5982
|
|
|
|
|
|
|
{ |
5983
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I |
5984
|
|
|
|
|
|
|
/* idx */ , |
5985
|
|
|
|
|
|
|
(__v16si) __A, |
5986
|
|
|
|
|
|
|
(__v16si) __B, |
5987
|
|
|
|
|
|
|
(__mmask16) __U); |
5988
|
|
|
|
|
|
|
} |
5989
|
|
|
|
|
|
|
|
5990
|
|
|
|
|
|
|
extern __inline __m512d |
5991
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
5992
|
|
|
|
|
|
|
_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) |
5993
|
|
|
|
|
|
|
{ |
5994
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I |
5995
|
|
|
|
|
|
|
/* idx */ , |
5996
|
|
|
|
|
|
|
(__v8df) __A, |
5997
|
|
|
|
|
|
|
(__v8df) __B, |
5998
|
|
|
|
|
|
|
(__mmask8) -1); |
5999
|
|
|
|
|
|
|
} |
6000
|
|
|
|
|
|
|
|
6001
|
|
|
|
|
|
|
extern __inline __m512d |
6002
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6003
|
|
|
|
|
|
|
_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, |
6004
|
|
|
|
|
|
|
__m512d __B) |
6005
|
|
|
|
|
|
|
{ |
6006
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I |
6007
|
|
|
|
|
|
|
/* idx */ , |
6008
|
|
|
|
|
|
|
(__v8df) __A, |
6009
|
|
|
|
|
|
|
(__v8df) __B, |
6010
|
|
|
|
|
|
|
(__mmask8) __U); |
6011
|
|
|
|
|
|
|
} |
6012
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
extern __inline __m512d |
6014
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6015
|
|
|
|
|
|
|
_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, |
6016
|
|
|
|
|
|
|
__m512d __B) |
6017
|
|
|
|
|
|
|
{ |
6018
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, |
6019
|
|
|
|
|
|
|
(__v8di) __I |
6020
|
|
|
|
|
|
|
/* idx */ , |
6021
|
|
|
|
|
|
|
(__v8df) __B, |
6022
|
|
|
|
|
|
|
(__mmask8) __U); |
6023
|
|
|
|
|
|
|
} |
6024
|
|
|
|
|
|
|
|
6025
|
|
|
|
|
|
|
extern __inline __m512d |
6026
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6027
|
|
|
|
|
|
|
_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, |
6028
|
|
|
|
|
|
|
__m512d __B) |
6029
|
|
|
|
|
|
|
{ |
6030
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I |
6031
|
|
|
|
|
|
|
/* idx */ , |
6032
|
|
|
|
|
|
|
(__v8df) __A, |
6033
|
|
|
|
|
|
|
(__v8df) __B, |
6034
|
|
|
|
|
|
|
(__mmask8) __U); |
6035
|
|
|
|
|
|
|
} |
6036
|
|
|
|
|
|
|
|
6037
|
|
|
|
|
|
|
extern __inline __m512 |
6038
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6039
|
|
|
|
|
|
|
_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) |
6040
|
|
|
|
|
|
|
{ |
6041
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I |
6042
|
|
|
|
|
|
|
/* idx */ , |
6043
|
|
|
|
|
|
|
(__v16sf) __A, |
6044
|
|
|
|
|
|
|
(__v16sf) __B, |
6045
|
|
|
|
|
|
|
(__mmask16) -1); |
6046
|
|
|
|
|
|
|
} |
6047
|
|
|
|
|
|
|
|
6048
|
|
|
|
|
|
|
extern __inline __m512 |
6049
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6050
|
|
|
|
|
|
|
_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) |
6051
|
|
|
|
|
|
|
{ |
6052
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I |
6053
|
|
|
|
|
|
|
/* idx */ , |
6054
|
|
|
|
|
|
|
(__v16sf) __A, |
6055
|
|
|
|
|
|
|
(__v16sf) __B, |
6056
|
|
|
|
|
|
|
(__mmask16) __U); |
6057
|
|
|
|
|
|
|
} |
6058
|
|
|
|
|
|
|
|
6059
|
|
|
|
|
|
|
extern __inline __m512 |
6060
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6061
|
|
|
|
|
|
|
_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, |
6062
|
|
|
|
|
|
|
__m512 __B) |
6063
|
|
|
|
|
|
|
{ |
6064
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, |
6065
|
|
|
|
|
|
|
(__v16si) __I |
6066
|
|
|
|
|
|
|
/* idx */ , |
6067
|
|
|
|
|
|
|
(__v16sf) __B, |
6068
|
|
|
|
|
|
|
(__mmask16) __U); |
6069
|
|
|
|
|
|
|
} |
6070
|
|
|
|
|
|
|
|
6071
|
|
|
|
|
|
|
extern __inline __m512 |
6072
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6073
|
|
|
|
|
|
|
_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, |
6074
|
|
|
|
|
|
|
__m512 __B) |
6075
|
|
|
|
|
|
|
{ |
6076
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I |
6077
|
|
|
|
|
|
|
/* idx */ , |
6078
|
|
|
|
|
|
|
(__v16sf) __A, |
6079
|
|
|
|
|
|
|
(__v16sf) __B, |
6080
|
|
|
|
|
|
|
(__mmask16) __U); |
6081
|
|
|
|
|
|
|
} |
6082
|
|
|
|
|
|
|
|
6083
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
6084
|
|
|
|
|
|
|
extern __inline __m512d |
6085
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6086
|
|
|
|
|
|
|
_mm512_permute_pd (__m512d __X, const int __C) |
6087
|
|
|
|
|
|
|
{ |
6088
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, |
6089
|
|
|
|
|
|
|
(__v8df) |
6090
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
6091
|
|
|
|
|
|
|
(__mmask8) -1); |
6092
|
|
|
|
|
|
|
} |
6093
|
|
|
|
|
|
|
|
6094
|
|
|
|
|
|
|
extern __inline __m512d |
6095
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6096
|
|
|
|
|
|
|
_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) |
6097
|
|
|
|
|
|
|
{ |
6098
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, |
6099
|
|
|
|
|
|
|
(__v8df) __W, |
6100
|
|
|
|
|
|
|
(__mmask8) __U); |
6101
|
|
|
|
|
|
|
} |
6102
|
|
|
|
|
|
|
|
6103
|
|
|
|
|
|
|
extern __inline __m512d |
6104
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6105
|
|
|
|
|
|
|
_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) |
6106
|
|
|
|
|
|
|
{ |
6107
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, |
6108
|
|
|
|
|
|
|
(__v8df) |
6109
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
6110
|
|
|
|
|
|
|
(__mmask8) __U); |
6111
|
|
|
|
|
|
|
} |
6112
|
|
|
|
|
|
|
|
6113
|
|
|
|
|
|
|
extern __inline __m512 |
6114
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6115
|
|
|
|
|
|
|
_mm512_permute_ps (__m512 __X, const int __C) |
6116
|
|
|
|
|
|
|
{ |
6117
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, |
6118
|
|
|
|
|
|
|
(__v16sf) |
6119
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
6120
|
|
|
|
|
|
|
(__mmask16) -1); |
6121
|
|
|
|
|
|
|
} |
6122
|
|
|
|
|
|
|
|
6123
|
|
|
|
|
|
|
extern __inline __m512 |
6124
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6125
|
|
|
|
|
|
|
_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) |
6126
|
|
|
|
|
|
|
{ |
6127
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, |
6128
|
|
|
|
|
|
|
(__v16sf) __W, |
6129
|
|
|
|
|
|
|
(__mmask16) __U); |
6130
|
|
|
|
|
|
|
} |
6131
|
|
|
|
|
|
|
|
6132
|
|
|
|
|
|
|
extern __inline __m512 |
6133
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6134
|
|
|
|
|
|
|
_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) |
6135
|
|
|
|
|
|
|
{ |
6136
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, |
6137
|
|
|
|
|
|
|
(__v16sf) |
6138
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
6139
|
|
|
|
|
|
|
(__mmask16) __U); |
6140
|
|
|
|
|
|
|
} |
6141
|
|
|
|
|
|
|
#else |
6142
|
|
|
|
|
|
|
#define _mm512_permute_pd(X, C) \ |
6143
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ |
6144
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(),\ |
6145
|
|
|
|
|
|
|
(__mmask8)(-1))) |
6146
|
|
|
|
|
|
|
|
6147
|
|
|
|
|
|
|
#define _mm512_mask_permute_pd(W, U, X, C) \ |
6148
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ |
6149
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), \ |
6150
|
|
|
|
|
|
|
(__mmask8)(U))) |
6151
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
#define _mm512_maskz_permute_pd(U, X, C) \ |
6153
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ |
6154
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(), \ |
6155
|
|
|
|
|
|
|
(__mmask8)(U))) |
6156
|
|
|
|
|
|
|
|
6157
|
|
|
|
|
|
|
#define _mm512_permute_ps(X, C) \ |
6158
|
|
|
|
|
|
|
((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ |
6159
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_undefined_ps(),\ |
6160
|
|
|
|
|
|
|
(__mmask16)(-1))) |
6161
|
|
|
|
|
|
|
|
6162
|
|
|
|
|
|
|
#define _mm512_mask_permute_ps(W, U, X, C) \ |
6163
|
|
|
|
|
|
|
((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ |
6164
|
|
|
|
|
|
|
(__v16sf)(__m512)(W), \ |
6165
|
|
|
|
|
|
|
(__mmask16)(U))) |
6166
|
|
|
|
|
|
|
|
6167
|
|
|
|
|
|
|
#define _mm512_maskz_permute_ps(U, X, C) \ |
6168
|
|
|
|
|
|
|
((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ |
6169
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_setzero_ps(), \ |
6170
|
|
|
|
|
|
|
(__mmask16)(U))) |
6171
|
|
|
|
|
|
|
#endif |
6172
|
|
|
|
|
|
|
|
6173
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
6174
|
|
|
|
|
|
|
extern __inline __m512i |
6175
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6176
|
|
|
|
|
|
|
_mm512_permutex_epi64 (__m512i __X, const int __I) |
6177
|
|
|
|
|
|
|
{ |
6178
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, |
6179
|
|
|
|
|
|
|
(__v8di) |
6180
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6181
|
|
|
|
|
|
|
(__mmask8) (-1)); |
6182
|
|
|
|
|
|
|
} |
6183
|
|
|
|
|
|
|
|
6184
|
|
|
|
|
|
|
extern __inline __m512i |
6185
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6186
|
|
|
|
|
|
|
_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, |
6187
|
|
|
|
|
|
|
__m512i __X, const int __I) |
6188
|
|
|
|
|
|
|
{ |
6189
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, |
6190
|
|
|
|
|
|
|
(__v8di) __W, |
6191
|
|
|
|
|
|
|
(__mmask8) __M); |
6192
|
|
|
|
|
|
|
} |
6193
|
|
|
|
|
|
|
|
6194
|
|
|
|
|
|
|
extern __inline __m512i |
6195
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6196
|
|
|
|
|
|
|
_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) |
6197
|
|
|
|
|
|
|
{ |
6198
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, |
6199
|
|
|
|
|
|
|
(__v8di) |
6200
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6201
|
|
|
|
|
|
|
(__mmask8) __M); |
6202
|
|
|
|
|
|
|
} |
6203
|
|
|
|
|
|
|
|
6204
|
|
|
|
|
|
|
extern __inline __m512d |
6205
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6206
|
|
|
|
|
|
|
_mm512_permutex_pd (__m512d __X, const int __M) |
6207
|
|
|
|
|
|
|
{ |
6208
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, |
6209
|
|
|
|
|
|
|
(__v8df) |
6210
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
6211
|
|
|
|
|
|
|
(__mmask8) -1); |
6212
|
|
|
|
|
|
|
} |
6213
|
|
|
|
|
|
|
|
6214
|
|
|
|
|
|
|
extern __inline __m512d |
6215
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6216
|
|
|
|
|
|
|
_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) |
6217
|
|
|
|
|
|
|
{ |
6218
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, |
6219
|
|
|
|
|
|
|
(__v8df) __W, |
6220
|
|
|
|
|
|
|
(__mmask8) __U); |
6221
|
|
|
|
|
|
|
} |
6222
|
|
|
|
|
|
|
|
6223
|
|
|
|
|
|
|
extern __inline __m512d |
6224
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6225
|
|
|
|
|
|
|
_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) |
6226
|
|
|
|
|
|
|
{ |
6227
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, |
6228
|
|
|
|
|
|
|
(__v8df) |
6229
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
6230
|
|
|
|
|
|
|
(__mmask8) __U); |
6231
|
|
|
|
|
|
|
} |
6232
|
|
|
|
|
|
|
#else |
6233
|
|
|
|
|
|
|
#define _mm512_permutex_pd(X, M) \ |
6234
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ |
6235
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(),\ |
6236
|
|
|
|
|
|
|
(__mmask8)-1)) |
6237
|
|
|
|
|
|
|
|
6238
|
|
|
|
|
|
|
#define _mm512_mask_permutex_pd(W, U, X, M) \ |
6239
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ |
6240
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), (__mmask8)(U))) |
6241
|
|
|
|
|
|
|
|
6242
|
|
|
|
|
|
|
#define _mm512_maskz_permutex_pd(U, X, M) \ |
6243
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ |
6244
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(),\ |
6245
|
|
|
|
|
|
|
(__mmask8)(U))) |
6246
|
|
|
|
|
|
|
|
6247
|
|
|
|
|
|
|
#define _mm512_permutex_epi64(X, I) \ |
6248
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ |
6249
|
|
|
|
|
|
|
(int)(I), \ |
6250
|
|
|
|
|
|
|
(__v8di)(__m512i) \ |
6251
|
|
|
|
|
|
|
(_mm512_undefined_si512 ()),\ |
6252
|
|
|
|
|
|
|
(__mmask8)(-1))) |
6253
|
|
|
|
|
|
|
|
6254
|
|
|
|
|
|
|
#define _mm512_maskz_permutex_epi64(M, X, I) \ |
6255
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ |
6256
|
|
|
|
|
|
|
(int)(I), \ |
6257
|
|
|
|
|
|
|
(__v8di)(__m512i) \ |
6258
|
|
|
|
|
|
|
(_mm512_setzero_si512 ()),\ |
6259
|
|
|
|
|
|
|
(__mmask8)(M))) |
6260
|
|
|
|
|
|
|
|
6261
|
|
|
|
|
|
|
#define _mm512_mask_permutex_epi64(W, M, X, I) \ |
6262
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ |
6263
|
|
|
|
|
|
|
(int)(I), \ |
6264
|
|
|
|
|
|
|
(__v8di)(__m512i)(W), \ |
6265
|
|
|
|
|
|
|
(__mmask8)(M))) |
6266
|
|
|
|
|
|
|
#endif |
6267
|
|
|
|
|
|
|
|
6268
|
|
|
|
|
|
|
extern __inline __m512i |
6269
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6270
|
|
|
|
|
|
|
_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) |
6271
|
|
|
|
|
|
|
{ |
6272
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, |
6273
|
|
|
|
|
|
|
(__v8di) __X, |
6274
|
|
|
|
|
|
|
(__v8di) |
6275
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6276
|
|
|
|
|
|
|
__M); |
6277
|
|
|
|
|
|
|
} |
6278
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
extern __inline __m512i |
6280
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6281
|
|
|
|
|
|
|
_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) |
6282
|
|
|
|
|
|
|
{ |
6283
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, |
6284
|
|
|
|
|
|
|
(__v8di) __X, |
6285
|
|
|
|
|
|
|
(__v8di) |
6286
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6287
|
|
|
|
|
|
|
(__mmask8) -1); |
6288
|
|
|
|
|
|
|
} |
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
extern __inline __m512i |
6291
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6292
|
|
|
|
|
|
|
_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, |
6293
|
|
|
|
|
|
|
__m512i __Y) |
6294
|
|
|
|
|
|
|
{ |
6295
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, |
6296
|
|
|
|
|
|
|
(__v8di) __X, |
6297
|
|
|
|
|
|
|
(__v8di) __W, |
6298
|
|
|
|
|
|
|
__M); |
6299
|
|
|
|
|
|
|
} |
6300
|
|
|
|
|
|
|
|
6301
|
|
|
|
|
|
|
extern __inline __m512i |
6302
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6303
|
|
|
|
|
|
|
_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) |
6304
|
|
|
|
|
|
|
{ |
6305
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, |
6306
|
|
|
|
|
|
|
(__v16si) __X, |
6307
|
|
|
|
|
|
|
(__v16si) |
6308
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6309
|
|
|
|
|
|
|
__M); |
6310
|
|
|
|
|
|
|
} |
6311
|
|
|
|
|
|
|
|
6312
|
|
|
|
|
|
|
extern __inline __m512i |
6313
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6314
|
|
|
|
|
|
|
_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) |
6315
|
|
|
|
|
|
|
{ |
6316
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, |
6317
|
|
|
|
|
|
|
(__v16si) __X, |
6318
|
|
|
|
|
|
|
(__v16si) |
6319
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6320
|
|
|
|
|
|
|
(__mmask16) -1); |
6321
|
|
|
|
|
|
|
} |
6322
|
|
|
|
|
|
|
|
6323
|
|
|
|
|
|
|
extern __inline __m512i |
6324
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6325
|
|
|
|
|
|
|
_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, |
6326
|
|
|
|
|
|
|
__m512i __Y) |
6327
|
|
|
|
|
|
|
{ |
6328
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, |
6329
|
|
|
|
|
|
|
(__v16si) __X, |
6330
|
|
|
|
|
|
|
(__v16si) __W, |
6331
|
|
|
|
|
|
|
__M); |
6332
|
|
|
|
|
|
|
} |
6333
|
|
|
|
|
|
|
|
6334
|
|
|
|
|
|
|
extern __inline __m512d |
6335
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6336
|
|
|
|
|
|
|
_mm512_permutexvar_pd (__m512i __X, __m512d __Y) |
6337
|
|
|
|
|
|
|
{ |
6338
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, |
6339
|
|
|
|
|
|
|
(__v8di) __X, |
6340
|
|
|
|
|
|
|
(__v8df) |
6341
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
6342
|
|
|
|
|
|
|
(__mmask8) -1); |
6343
|
|
|
|
|
|
|
} |
6344
|
|
|
|
|
|
|
|
6345
|
|
|
|
|
|
|
extern __inline __m512d |
6346
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6347
|
|
|
|
|
|
|
_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) |
6348
|
|
|
|
|
|
|
{ |
6349
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, |
6350
|
|
|
|
|
|
|
(__v8di) __X, |
6351
|
|
|
|
|
|
|
(__v8df) __W, |
6352
|
|
|
|
|
|
|
(__mmask8) __U); |
6353
|
|
|
|
|
|
|
} |
6354
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
extern __inline __m512d |
6356
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6357
|
|
|
|
|
|
|
_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) |
6358
|
|
|
|
|
|
|
{ |
6359
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, |
6360
|
|
|
|
|
|
|
(__v8di) __X, |
6361
|
|
|
|
|
|
|
(__v8df) |
6362
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
6363
|
|
|
|
|
|
|
(__mmask8) __U); |
6364
|
|
|
|
|
|
|
} |
6365
|
|
|
|
|
|
|
|
6366
|
|
|
|
|
|
|
extern __inline __m512 |
6367
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6368
|
|
|
|
|
|
|
_mm512_permutexvar_ps (__m512i __X, __m512 __Y) |
6369
|
|
|
|
|
|
|
{ |
6370
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, |
6371
|
|
|
|
|
|
|
(__v16si) __X, |
6372
|
|
|
|
|
|
|
(__v16sf) |
6373
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
6374
|
|
|
|
|
|
|
(__mmask16) -1); |
6375
|
|
|
|
|
|
|
} |
6376
|
|
|
|
|
|
|
|
6377
|
|
|
|
|
|
|
extern __inline __m512 |
6378
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6379
|
|
|
|
|
|
|
_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) |
6380
|
|
|
|
|
|
|
{ |
6381
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, |
6382
|
|
|
|
|
|
|
(__v16si) __X, |
6383
|
|
|
|
|
|
|
(__v16sf) __W, |
6384
|
|
|
|
|
|
|
(__mmask16) __U); |
6385
|
|
|
|
|
|
|
} |
6386
|
|
|
|
|
|
|
|
6387
|
|
|
|
|
|
|
extern __inline __m512 |
6388
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6389
|
|
|
|
|
|
|
_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) |
6390
|
|
|
|
|
|
|
{ |
6391
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, |
6392
|
|
|
|
|
|
|
(__v16si) __X, |
6393
|
|
|
|
|
|
|
(__v16sf) |
6394
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
6395
|
|
|
|
|
|
|
(__mmask16) __U); |
6396
|
|
|
|
|
|
|
} |
6397
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
6399
|
|
|
|
|
|
|
extern __inline __m512 |
6400
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6401
|
|
|
|
|
|
|
_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) |
6402
|
|
|
|
|
|
|
{ |
6403
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, |
6404
|
|
|
|
|
|
|
(__v16sf) __V, __imm, |
6405
|
|
|
|
|
|
|
(__v16sf) |
6406
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
6407
|
|
|
|
|
|
|
(__mmask16) -1); |
6408
|
|
|
|
|
|
|
} |
6409
|
|
|
|
|
|
|
|
6410
|
|
|
|
|
|
|
extern __inline __m512 |
6411
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6412
|
|
|
|
|
|
|
_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, |
6413
|
|
|
|
|
|
|
__m512 __V, const int __imm) |
6414
|
|
|
|
|
|
|
{ |
6415
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, |
6416
|
|
|
|
|
|
|
(__v16sf) __V, __imm, |
6417
|
|
|
|
|
|
|
(__v16sf) __W, |
6418
|
|
|
|
|
|
|
(__mmask16) __U); |
6419
|
|
|
|
|
|
|
} |
6420
|
|
|
|
|
|
|
|
6421
|
|
|
|
|
|
|
extern __inline __m512 |
6422
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6423
|
|
|
|
|
|
|
_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) |
6424
|
|
|
|
|
|
|
{ |
6425
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, |
6426
|
|
|
|
|
|
|
(__v16sf) __V, __imm, |
6427
|
|
|
|
|
|
|
(__v16sf) |
6428
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
6429
|
|
|
|
|
|
|
(__mmask16) __U); |
6430
|
|
|
|
|
|
|
} |
6431
|
|
|
|
|
|
|
|
6432
|
|
|
|
|
|
|
extern __inline __m512d |
6433
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6434
|
|
|
|
|
|
|
_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) |
6435
|
|
|
|
|
|
|
{ |
6436
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, |
6437
|
|
|
|
|
|
|
(__v8df) __V, __imm, |
6438
|
|
|
|
|
|
|
(__v8df) |
6439
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
6440
|
|
|
|
|
|
|
(__mmask8) -1); |
6441
|
|
|
|
|
|
|
} |
6442
|
|
|
|
|
|
|
|
6443
|
|
|
|
|
|
|
extern __inline __m512d |
6444
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6445
|
|
|
|
|
|
|
_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, |
6446
|
|
|
|
|
|
|
__m512d __V, const int __imm) |
6447
|
|
|
|
|
|
|
{ |
6448
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, |
6449
|
|
|
|
|
|
|
(__v8df) __V, __imm, |
6450
|
|
|
|
|
|
|
(__v8df) __W, |
6451
|
|
|
|
|
|
|
(__mmask8) __U); |
6452
|
|
|
|
|
|
|
} |
6453
|
|
|
|
|
|
|
|
6454
|
|
|
|
|
|
|
extern __inline __m512d |
6455
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6456
|
|
|
|
|
|
|
_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, |
6457
|
|
|
|
|
|
|
const int __imm) |
6458
|
|
|
|
|
|
|
{ |
6459
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, |
6460
|
|
|
|
|
|
|
(__v8df) __V, __imm, |
6461
|
|
|
|
|
|
|
(__v8df) |
6462
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
6463
|
|
|
|
|
|
|
(__mmask8) __U); |
6464
|
|
|
|
|
|
|
} |
6465
|
|
|
|
|
|
|
|
6466
|
|
|
|
|
|
|
extern __inline __m512d |
6467
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6468
|
|
|
|
|
|
|
_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, |
6469
|
|
|
|
|
|
|
const int __imm, const int __R) |
6470
|
|
|
|
|
|
|
{ |
6471
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, |
6472
|
|
|
|
|
|
|
(__v8df) __B, |
6473
|
|
|
|
|
|
|
(__v8di) __C, |
6474
|
|
|
|
|
|
|
__imm, |
6475
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
6476
|
|
|
|
|
|
|
} |
6477
|
|
|
|
|
|
|
|
6478
|
|
|
|
|
|
|
extern __inline __m512d |
6479
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6480
|
|
|
|
|
|
|
_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
6481
|
|
|
|
|
|
|
__m512i __C, const int __imm, const int __R) |
6482
|
|
|
|
|
|
|
{ |
6483
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, |
6484
|
|
|
|
|
|
|
(__v8df) __B, |
6485
|
|
|
|
|
|
|
(__v8di) __C, |
6486
|
|
|
|
|
|
|
__imm, |
6487
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6488
|
|
|
|
|
|
|
} |
6489
|
|
|
|
|
|
|
|
6490
|
|
|
|
|
|
|
extern __inline __m512d |
6491
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6492
|
|
|
|
|
|
|
_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
6493
|
|
|
|
|
|
|
__m512i __C, const int __imm, const int __R) |
6494
|
|
|
|
|
|
|
{ |
6495
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, |
6496
|
|
|
|
|
|
|
(__v8df) __B, |
6497
|
|
|
|
|
|
|
(__v8di) __C, |
6498
|
|
|
|
|
|
|
__imm, |
6499
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6500
|
|
|
|
|
|
|
} |
6501
|
|
|
|
|
|
|
|
6502
|
|
|
|
|
|
|
extern __inline __m512 |
6503
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6504
|
|
|
|
|
|
|
_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, |
6505
|
|
|
|
|
|
|
const int __imm, const int __R) |
6506
|
|
|
|
|
|
|
{ |
6507
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, |
6508
|
|
|
|
|
|
|
(__v16sf) __B, |
6509
|
|
|
|
|
|
|
(__v16si) __C, |
6510
|
|
|
|
|
|
|
__imm, |
6511
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
6512
|
|
|
|
|
|
|
} |
6513
|
|
|
|
|
|
|
|
6514
|
|
|
|
|
|
|
extern __inline __m512 |
6515
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6516
|
|
|
|
|
|
|
_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
6517
|
|
|
|
|
|
|
__m512i __C, const int __imm, const int __R) |
6518
|
|
|
|
|
|
|
{ |
6519
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, |
6520
|
|
|
|
|
|
|
(__v16sf) __B, |
6521
|
|
|
|
|
|
|
(__v16si) __C, |
6522
|
|
|
|
|
|
|
__imm, |
6523
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
6524
|
|
|
|
|
|
|
} |
6525
|
|
|
|
|
|
|
|
6526
|
|
|
|
|
|
|
extern __inline __m512 |
6527
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6528
|
|
|
|
|
|
|
_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
6529
|
|
|
|
|
|
|
__m512i __C, const int __imm, const int __R) |
6530
|
|
|
|
|
|
|
{ |
6531
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, |
6532
|
|
|
|
|
|
|
(__v16sf) __B, |
6533
|
|
|
|
|
|
|
(__v16si) __C, |
6534
|
|
|
|
|
|
|
__imm, |
6535
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
6536
|
|
|
|
|
|
|
} |
6537
|
|
|
|
|
|
|
|
6538
|
|
|
|
|
|
|
extern __inline __m128d |
6539
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6540
|
|
|
|
|
|
|
_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, |
6541
|
|
|
|
|
|
|
const int __imm, const int __R) |
6542
|
|
|
|
|
|
|
{ |
6543
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, |
6544
|
|
|
|
|
|
|
(__v2df) __B, |
6545
|
|
|
|
|
|
|
(__v2di) __C, __imm, |
6546
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
6547
|
|
|
|
|
|
|
} |
6548
|
|
|
|
|
|
|
|
6549
|
|
|
|
|
|
|
extern __inline __m128d |
6550
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6551
|
|
|
|
|
|
|
_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, |
6552
|
|
|
|
|
|
|
__m128i __C, const int __imm, const int __R) |
6553
|
|
|
|
|
|
|
{ |
6554
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, |
6555
|
|
|
|
|
|
|
(__v2df) __B, |
6556
|
|
|
|
|
|
|
(__v2di) __C, __imm, |
6557
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6558
|
|
|
|
|
|
|
} |
6559
|
|
|
|
|
|
|
|
6560
|
|
|
|
|
|
|
extern __inline __m128d |
6561
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6562
|
|
|
|
|
|
|
_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, |
6563
|
|
|
|
|
|
|
__m128i __C, const int __imm, const int __R) |
6564
|
|
|
|
|
|
|
{ |
6565
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, |
6566
|
|
|
|
|
|
|
(__v2df) __B, |
6567
|
|
|
|
|
|
|
(__v2di) __C, |
6568
|
|
|
|
|
|
|
__imm, |
6569
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6570
|
|
|
|
|
|
|
} |
6571
|
|
|
|
|
|
|
|
6572
|
|
|
|
|
|
|
extern __inline __m128 |
6573
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6574
|
|
|
|
|
|
|
_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, |
6575
|
|
|
|
|
|
|
const int __imm, const int __R) |
6576
|
|
|
|
|
|
|
{ |
6577
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, |
6578
|
|
|
|
|
|
|
(__v4sf) __B, |
6579
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
6580
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
6581
|
|
|
|
|
|
|
} |
6582
|
|
|
|
|
|
|
|
6583
|
|
|
|
|
|
|
extern __inline __m128 |
6584
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6585
|
|
|
|
|
|
|
_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, |
6586
|
|
|
|
|
|
|
__m128i __C, const int __imm, const int __R) |
6587
|
|
|
|
|
|
|
{ |
6588
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, |
6589
|
|
|
|
|
|
|
(__v4sf) __B, |
6590
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
6591
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6592
|
|
|
|
|
|
|
} |
6593
|
|
|
|
|
|
|
|
6594
|
|
|
|
|
|
|
extern __inline __m128 |
6595
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6596
|
|
|
|
|
|
|
_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, |
6597
|
|
|
|
|
|
|
__m128i __C, const int __imm, const int __R) |
6598
|
|
|
|
|
|
|
{ |
6599
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, |
6600
|
|
|
|
|
|
|
(__v4sf) __B, |
6601
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
6602
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
6603
|
|
|
|
|
|
|
} |
6604
|
|
|
|
|
|
|
|
6605
|
|
|
|
|
|
|
#else |
6606
|
|
|
|
|
|
|
#define _mm512_shuffle_pd(X, Y, C) \ |
6607
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ |
6608
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
6609
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(),\ |
6610
|
|
|
|
|
|
|
(__mmask8)-1)) |
6611
|
|
|
|
|
|
|
|
6612
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ |
6613
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ |
6614
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
6615
|
|
|
|
|
|
|
(__v8df)(__m512d)(W),\ |
6616
|
|
|
|
|
|
|
(__mmask8)(U))) |
6617
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_pd(U, X, Y, C) \ |
6619
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ |
6620
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(C),\ |
6621
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(),\ |
6622
|
|
|
|
|
|
|
(__mmask8)(U))) |
6623
|
|
|
|
|
|
|
|
6624
|
|
|
|
|
|
|
#define _mm512_shuffle_ps(X, Y, C) \ |
6625
|
|
|
|
|
|
|
((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ |
6626
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
6627
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_undefined_ps(),\ |
6628
|
|
|
|
|
|
|
(__mmask16)-1)) |
6629
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ |
6631
|
|
|
|
|
|
|
((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ |
6632
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
6633
|
|
|
|
|
|
|
(__v16sf)(__m512)(W),\ |
6634
|
|
|
|
|
|
|
(__mmask16)(U))) |
6635
|
|
|
|
|
|
|
|
6636
|
|
|
|
|
|
|
#define _mm512_maskz_shuffle_ps(U, X, Y, C) \ |
6637
|
|
|
|
|
|
|
((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ |
6638
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(C),\ |
6639
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_setzero_ps(),\ |
6640
|
|
|
|
|
|
|
(__mmask16)(U))) |
6641
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ |
6643
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ |
6644
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
6645
|
|
|
|
|
|
|
(__mmask8)(-1), (R))) |
6646
|
|
|
|
|
|
|
|
6647
|
|
|
|
|
|
|
#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ |
6648
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ |
6649
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
6650
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6651
|
|
|
|
|
|
|
|
6652
|
|
|
|
|
|
|
#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ |
6653
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ |
6654
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
6655
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6656
|
|
|
|
|
|
|
|
6657
|
|
|
|
|
|
|
#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ |
6658
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ |
6659
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
6660
|
|
|
|
|
|
|
(__mmask16)(-1), (R))) |
6661
|
|
|
|
|
|
|
|
6662
|
|
|
|
|
|
|
#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ |
6663
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ |
6664
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
6665
|
|
|
|
|
|
|
(__mmask16)(U), (R))) |
6666
|
|
|
|
|
|
|
|
6667
|
|
|
|
|
|
|
#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ |
6668
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ |
6669
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
6670
|
|
|
|
|
|
|
(__mmask16)(U), (R))) |
6671
|
|
|
|
|
|
|
|
6672
|
|
|
|
|
|
|
#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ |
6673
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ |
6674
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
6675
|
|
|
|
|
|
|
(__mmask8)(-1), (R))) |
6676
|
|
|
|
|
|
|
|
6677
|
|
|
|
|
|
|
#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ |
6678
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ |
6679
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
6680
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6681
|
|
|
|
|
|
|
|
6682
|
|
|
|
|
|
|
#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ |
6683
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ |
6684
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
6685
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6686
|
|
|
|
|
|
|
|
6687
|
|
|
|
|
|
|
#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ |
6688
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ |
6689
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
6690
|
|
|
|
|
|
|
(__mmask8)(-1), (R))) |
6691
|
|
|
|
|
|
|
|
6692
|
|
|
|
|
|
|
#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ |
6693
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ |
6694
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
6695
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6696
|
|
|
|
|
|
|
|
6697
|
|
|
|
|
|
|
#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ |
6698
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ |
6699
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
6700
|
|
|
|
|
|
|
(__mmask8)(U), (R))) |
6701
|
|
|
|
|
|
|
#endif |
6702
|
|
|
|
|
|
|
|
6703
|
|
|
|
|
|
|
extern __inline __m512 |
6704
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6705
|
|
|
|
|
|
|
_mm512_movehdup_ps (__m512 __A) |
6706
|
|
|
|
|
|
|
{ |
6707
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, |
6708
|
|
|
|
|
|
|
(__v16sf) |
6709
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
6710
|
|
|
|
|
|
|
(__mmask16) -1); |
6711
|
|
|
|
|
|
|
} |
6712
|
|
|
|
|
|
|
|
6713
|
|
|
|
|
|
|
extern __inline __m512 |
6714
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6715
|
|
|
|
|
|
|
_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) |
6716
|
|
|
|
|
|
|
{ |
6717
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, |
6718
|
|
|
|
|
|
|
(__v16sf) __W, |
6719
|
|
|
|
|
|
|
(__mmask16) __U); |
6720
|
|
|
|
|
|
|
} |
6721
|
|
|
|
|
|
|
|
6722
|
|
|
|
|
|
|
extern __inline __m512 |
6723
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6724
|
|
|
|
|
|
|
_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) |
6725
|
|
|
|
|
|
|
{ |
6726
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, |
6727
|
|
|
|
|
|
|
(__v16sf) |
6728
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
6729
|
|
|
|
|
|
|
(__mmask16) __U); |
6730
|
|
|
|
|
|
|
} |
6731
|
|
|
|
|
|
|
|
6732
|
|
|
|
|
|
|
extern __inline __m512 |
6733
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6734
|
|
|
|
|
|
|
_mm512_moveldup_ps (__m512 __A) |
6735
|
|
|
|
|
|
|
{ |
6736
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, |
6737
|
|
|
|
|
|
|
(__v16sf) |
6738
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
6739
|
|
|
|
|
|
|
(__mmask16) -1); |
6740
|
|
|
|
|
|
|
} |
6741
|
|
|
|
|
|
|
|
6742
|
|
|
|
|
|
|
extern __inline __m512 |
6743
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6744
|
|
|
|
|
|
|
_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) |
6745
|
|
|
|
|
|
|
{ |
6746
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, |
6747
|
|
|
|
|
|
|
(__v16sf) __W, |
6748
|
|
|
|
|
|
|
(__mmask16) __U); |
6749
|
|
|
|
|
|
|
} |
6750
|
|
|
|
|
|
|
|
6751
|
|
|
|
|
|
|
extern __inline __m512 |
6752
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6753
|
|
|
|
|
|
|
_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) |
6754
|
|
|
|
|
|
|
{ |
6755
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, |
6756
|
|
|
|
|
|
|
(__v16sf) |
6757
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
6758
|
|
|
|
|
|
|
(__mmask16) __U); |
6759
|
|
|
|
|
|
|
} |
6760
|
|
|
|
|
|
|
|
6761
|
|
|
|
|
|
|
extern __inline __m512i |
6762
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6763
|
|
|
|
|
|
|
_mm512_or_si512 (__m512i __A, __m512i __B) |
6764
|
|
|
|
|
|
|
{ |
6765
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A | (__v16su) __B); |
6766
|
|
|
|
|
|
|
} |
6767
|
|
|
|
|
|
|
|
6768
|
|
|
|
|
|
|
extern __inline __m512i |
6769
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6770
|
|
|
|
|
|
|
_mm512_or_epi32 (__m512i __A, __m512i __B) |
6771
|
|
|
|
|
|
|
{ |
6772
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A | (__v16su) __B); |
6773
|
|
|
|
|
|
|
} |
6774
|
|
|
|
|
|
|
|
6775
|
|
|
|
|
|
|
extern __inline __m512i |
6776
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6777
|
|
|
|
|
|
|
_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
6778
|
|
|
|
|
|
|
{ |
6779
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, |
6780
|
|
|
|
|
|
|
(__v16si) __B, |
6781
|
|
|
|
|
|
|
(__v16si) __W, |
6782
|
|
|
|
|
|
|
(__mmask16) __U); |
6783
|
|
|
|
|
|
|
} |
6784
|
|
|
|
|
|
|
|
6785
|
|
|
|
|
|
|
extern __inline __m512i |
6786
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6787
|
|
|
|
|
|
|
_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
6788
|
|
|
|
|
|
|
{ |
6789
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, |
6790
|
|
|
|
|
|
|
(__v16si) __B, |
6791
|
|
|
|
|
|
|
(__v16si) |
6792
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6793
|
|
|
|
|
|
|
(__mmask16) __U); |
6794
|
|
|
|
|
|
|
} |
6795
|
|
|
|
|
|
|
|
6796
|
|
|
|
|
|
|
extern __inline __m512i |
6797
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6798
|
|
|
|
|
|
|
_mm512_or_epi64 (__m512i __A, __m512i __B) |
6799
|
|
|
|
|
|
|
{ |
6800
|
|
|
|
|
|
|
return (__m512i) ((__v8du) __A | (__v8du) __B); |
6801
|
|
|
|
|
|
|
} |
6802
|
|
|
|
|
|
|
|
6803
|
|
|
|
|
|
|
extern __inline __m512i |
6804
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6805
|
|
|
|
|
|
|
_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
6806
|
|
|
|
|
|
|
{ |
6807
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, |
6808
|
|
|
|
|
|
|
(__v8di) __B, |
6809
|
|
|
|
|
|
|
(__v8di) __W, |
6810
|
|
|
|
|
|
|
(__mmask8) __U); |
6811
|
|
|
|
|
|
|
} |
6812
|
|
|
|
|
|
|
|
6813
|
|
|
|
|
|
|
extern __inline __m512i |
6814
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6815
|
|
|
|
|
|
|
_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
6816
|
|
|
|
|
|
|
{ |
6817
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, |
6818
|
|
|
|
|
|
|
(__v8di) __B, |
6819
|
|
|
|
|
|
|
(__v8di) |
6820
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6821
|
|
|
|
|
|
|
(__mmask8) __U); |
6822
|
|
|
|
|
|
|
} |
6823
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
extern __inline __m512i |
6825
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6826
|
|
|
|
|
|
|
_mm512_xor_si512 (__m512i __A, __m512i __B) |
6827
|
|
|
|
|
|
|
{ |
6828
|
0
|
|
|
|
|
|
return (__m512i) ((__v16su) __A ^ (__v16su) __B); |
6829
|
|
|
|
|
|
|
} |
6830
|
|
|
|
|
|
|
|
6831
|
|
|
|
|
|
|
extern __inline __m512i |
6832
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6833
|
|
|
|
|
|
|
_mm512_xor_epi32 (__m512i __A, __m512i __B) |
6834
|
|
|
|
|
|
|
{ |
6835
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A ^ (__v16su) __B); |
6836
|
|
|
|
|
|
|
} |
6837
|
|
|
|
|
|
|
|
6838
|
|
|
|
|
|
|
extern __inline __m512i |
6839
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6840
|
|
|
|
|
|
|
_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
6841
|
|
|
|
|
|
|
{ |
6842
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, |
6843
|
|
|
|
|
|
|
(__v16si) __B, |
6844
|
|
|
|
|
|
|
(__v16si) __W, |
6845
|
|
|
|
|
|
|
(__mmask16) __U); |
6846
|
|
|
|
|
|
|
} |
6847
|
|
|
|
|
|
|
|
6848
|
|
|
|
|
|
|
extern __inline __m512i |
6849
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6850
|
|
|
|
|
|
|
_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
6851
|
|
|
|
|
|
|
{ |
6852
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, |
6853
|
|
|
|
|
|
|
(__v16si) __B, |
6854
|
|
|
|
|
|
|
(__v16si) |
6855
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6856
|
|
|
|
|
|
|
(__mmask16) __U); |
6857
|
|
|
|
|
|
|
} |
6858
|
|
|
|
|
|
|
|
6859
|
|
|
|
|
|
|
extern __inline __m512i |
6860
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6861
|
|
|
|
|
|
|
_mm512_xor_epi64 (__m512i __A, __m512i __B) |
6862
|
|
|
|
|
|
|
{ |
6863
|
|
|
|
|
|
|
return (__m512i) ((__v8du) __A ^ (__v8du) __B); |
6864
|
|
|
|
|
|
|
} |
6865
|
|
|
|
|
|
|
|
6866
|
|
|
|
|
|
|
extern __inline __m512i |
6867
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6868
|
|
|
|
|
|
|
_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
6869
|
|
|
|
|
|
|
{ |
6870
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, |
6871
|
|
|
|
|
|
|
(__v8di) __B, |
6872
|
|
|
|
|
|
|
(__v8di) __W, |
6873
|
|
|
|
|
|
|
(__mmask8) __U); |
6874
|
|
|
|
|
|
|
} |
6875
|
|
|
|
|
|
|
|
6876
|
|
|
|
|
|
|
extern __inline __m512i |
6877
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6878
|
|
|
|
|
|
|
_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B) |
6879
|
|
|
|
|
|
|
{ |
6880
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, |
6881
|
|
|
|
|
|
|
(__v8di) __B, |
6882
|
|
|
|
|
|
|
(__v8di) |
6883
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6884
|
|
|
|
|
|
|
(__mmask8) __U); |
6885
|
|
|
|
|
|
|
} |
6886
|
|
|
|
|
|
|
|
6887
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
6888
|
|
|
|
|
|
|
extern __inline __m512i |
6889
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6890
|
|
|
|
|
|
|
_mm512_rol_epi32 (__m512i __A, const int __B) |
6891
|
|
|
|
|
|
|
{ |
6892
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, |
6893
|
|
|
|
|
|
|
(__v16si) |
6894
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6895
|
|
|
|
|
|
|
(__mmask16) -1); |
6896
|
|
|
|
|
|
|
} |
6897
|
|
|
|
|
|
|
|
6898
|
|
|
|
|
|
|
extern __inline __m512i |
6899
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6900
|
|
|
|
|
|
|
_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) |
6901
|
|
|
|
|
|
|
{ |
6902
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, |
6903
|
|
|
|
|
|
|
(__v16si) __W, |
6904
|
|
|
|
|
|
|
(__mmask16) __U); |
6905
|
|
|
|
|
|
|
} |
6906
|
|
|
|
|
|
|
|
6907
|
|
|
|
|
|
|
extern __inline __m512i |
6908
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6909
|
|
|
|
|
|
|
_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) |
6910
|
|
|
|
|
|
|
{ |
6911
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, |
6912
|
|
|
|
|
|
|
(__v16si) |
6913
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6914
|
|
|
|
|
|
|
(__mmask16) __U); |
6915
|
|
|
|
|
|
|
} |
6916
|
|
|
|
|
|
|
|
6917
|
|
|
|
|
|
|
extern __inline __m512i |
6918
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6919
|
|
|
|
|
|
|
_mm512_ror_epi32 (__m512i __A, int __B) |
6920
|
|
|
|
|
|
|
{ |
6921
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, |
6922
|
|
|
|
|
|
|
(__v16si) |
6923
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6924
|
|
|
|
|
|
|
(__mmask16) -1); |
6925
|
|
|
|
|
|
|
} |
6926
|
|
|
|
|
|
|
|
6927
|
|
|
|
|
|
|
extern __inline __m512i |
6928
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6929
|
|
|
|
|
|
|
_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) |
6930
|
|
|
|
|
|
|
{ |
6931
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, |
6932
|
|
|
|
|
|
|
(__v16si) __W, |
6933
|
|
|
|
|
|
|
(__mmask16) __U); |
6934
|
|
|
|
|
|
|
} |
6935
|
|
|
|
|
|
|
|
6936
|
|
|
|
|
|
|
extern __inline __m512i |
6937
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6938
|
|
|
|
|
|
|
_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) |
6939
|
|
|
|
|
|
|
{ |
6940
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, |
6941
|
|
|
|
|
|
|
(__v16si) |
6942
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6943
|
|
|
|
|
|
|
(__mmask16) __U); |
6944
|
|
|
|
|
|
|
} |
6945
|
|
|
|
|
|
|
|
6946
|
|
|
|
|
|
|
extern __inline __m512i |
6947
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6948
|
|
|
|
|
|
|
_mm512_rol_epi64 (__m512i __A, const int __B) |
6949
|
|
|
|
|
|
|
{ |
6950
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, |
6951
|
|
|
|
|
|
|
(__v8di) |
6952
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6953
|
|
|
|
|
|
|
(__mmask8) -1); |
6954
|
|
|
|
|
|
|
} |
6955
|
|
|
|
|
|
|
|
6956
|
|
|
|
|
|
|
extern __inline __m512i |
6957
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6958
|
|
|
|
|
|
|
_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) |
6959
|
|
|
|
|
|
|
{ |
6960
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, |
6961
|
|
|
|
|
|
|
(__v8di) __W, |
6962
|
|
|
|
|
|
|
(__mmask8) __U); |
6963
|
|
|
|
|
|
|
} |
6964
|
|
|
|
|
|
|
|
6965
|
|
|
|
|
|
|
extern __inline __m512i |
6966
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6967
|
|
|
|
|
|
|
_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) |
6968
|
|
|
|
|
|
|
{ |
6969
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, |
6970
|
|
|
|
|
|
|
(__v8di) |
6971
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
6972
|
|
|
|
|
|
|
(__mmask8) __U); |
6973
|
|
|
|
|
|
|
} |
6974
|
|
|
|
|
|
|
|
6975
|
|
|
|
|
|
|
extern __inline __m512i |
6976
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6977
|
|
|
|
|
|
|
_mm512_ror_epi64 (__m512i __A, int __B) |
6978
|
|
|
|
|
|
|
{ |
6979
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, |
6980
|
|
|
|
|
|
|
(__v8di) |
6981
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
6982
|
|
|
|
|
|
|
(__mmask8) -1); |
6983
|
|
|
|
|
|
|
} |
6984
|
|
|
|
|
|
|
|
6985
|
|
|
|
|
|
|
extern __inline __m512i |
6986
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6987
|
|
|
|
|
|
|
_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) |
6988
|
|
|
|
|
|
|
{ |
6989
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, |
6990
|
|
|
|
|
|
|
(__v8di) __W, |
6991
|
|
|
|
|
|
|
(__mmask8) __U); |
6992
|
|
|
|
|
|
|
} |
6993
|
|
|
|
|
|
|
|
6994
|
|
|
|
|
|
|
extern __inline __m512i |
6995
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
6996
|
|
|
|
|
|
|
_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) |
6997
|
|
|
|
|
|
|
{ |
6998
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, |
6999
|
|
|
|
|
|
|
(__v8di) |
7000
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7001
|
|
|
|
|
|
|
(__mmask8) __U); |
7002
|
|
|
|
|
|
|
} |
7003
|
|
|
|
|
|
|
|
7004
|
|
|
|
|
|
|
#else |
7005
|
|
|
|
|
|
|
#define _mm512_rol_epi32(A, B) \ |
7006
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ |
7007
|
|
|
|
|
|
|
(int)(B), \ |
7008
|
|
|
|
|
|
|
(__v16si)_mm512_undefined_si512 (), \ |
7009
|
|
|
|
|
|
|
(__mmask16)(-1))) |
7010
|
|
|
|
|
|
|
#define _mm512_mask_rol_epi32(W, U, A, B) \ |
7011
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ |
7012
|
|
|
|
|
|
|
(int)(B), \ |
7013
|
|
|
|
|
|
|
(__v16si)(__m512i)(W), \ |
7014
|
|
|
|
|
|
|
(__mmask16)(U))) |
7015
|
|
|
|
|
|
|
#define _mm512_maskz_rol_epi32(U, A, B) \ |
7016
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ |
7017
|
|
|
|
|
|
|
(int)(B), \ |
7018
|
|
|
|
|
|
|
(__v16si)_mm512_setzero_si512 (), \ |
7019
|
|
|
|
|
|
|
(__mmask16)(U))) |
7020
|
|
|
|
|
|
|
#define _mm512_ror_epi32(A, B) \ |
7021
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ |
7022
|
|
|
|
|
|
|
(int)(B), \ |
7023
|
|
|
|
|
|
|
(__v16si)_mm512_undefined_si512 (), \ |
7024
|
|
|
|
|
|
|
(__mmask16)(-1))) |
7025
|
|
|
|
|
|
|
#define _mm512_mask_ror_epi32(W, U, A, B) \ |
7026
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ |
7027
|
|
|
|
|
|
|
(int)(B), \ |
7028
|
|
|
|
|
|
|
(__v16si)(__m512i)(W), \ |
7029
|
|
|
|
|
|
|
(__mmask16)(U))) |
7030
|
|
|
|
|
|
|
#define _mm512_maskz_ror_epi32(U, A, B) \ |
7031
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ |
7032
|
|
|
|
|
|
|
(int)(B), \ |
7033
|
|
|
|
|
|
|
(__v16si)_mm512_setzero_si512 (), \ |
7034
|
|
|
|
|
|
|
(__mmask16)(U))) |
7035
|
|
|
|
|
|
|
#define _mm512_rol_epi64(A, B) \ |
7036
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ |
7037
|
|
|
|
|
|
|
(int)(B), \ |
7038
|
|
|
|
|
|
|
(__v8di)_mm512_undefined_si512 (), \ |
7039
|
|
|
|
|
|
|
(__mmask8)(-1))) |
7040
|
|
|
|
|
|
|
#define _mm512_mask_rol_epi64(W, U, A, B) \ |
7041
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ |
7042
|
|
|
|
|
|
|
(int)(B), \ |
7043
|
|
|
|
|
|
|
(__v8di)(__m512i)(W), \ |
7044
|
|
|
|
|
|
|
(__mmask8)(U))) |
7045
|
|
|
|
|
|
|
#define _mm512_maskz_rol_epi64(U, A, B) \ |
7046
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ |
7047
|
|
|
|
|
|
|
(int)(B), \ |
7048
|
|
|
|
|
|
|
(__v8di)_mm512_setzero_si512 (), \ |
7049
|
|
|
|
|
|
|
(__mmask8)(U))) |
7050
|
|
|
|
|
|
|
|
7051
|
|
|
|
|
|
|
#define _mm512_ror_epi64(A, B) \ |
7052
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ |
7053
|
|
|
|
|
|
|
(int)(B), \ |
7054
|
|
|
|
|
|
|
(__v8di)_mm512_undefined_si512 (), \ |
7055
|
|
|
|
|
|
|
(__mmask8)(-1))) |
7056
|
|
|
|
|
|
|
#define _mm512_mask_ror_epi64(W, U, A, B) \ |
7057
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ |
7058
|
|
|
|
|
|
|
(int)(B), \ |
7059
|
|
|
|
|
|
|
(__v8di)(__m512i)(W), \ |
7060
|
|
|
|
|
|
|
(__mmask8)(U))) |
7061
|
|
|
|
|
|
|
#define _mm512_maskz_ror_epi64(U, A, B) \ |
7062
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ |
7063
|
|
|
|
|
|
|
(int)(B), \ |
7064
|
|
|
|
|
|
|
(__v8di)_mm512_setzero_si512 (), \ |
7065
|
|
|
|
|
|
|
(__mmask8)(U))) |
7066
|
|
|
|
|
|
|
#endif |
7067
|
|
|
|
|
|
|
|
7068
|
|
|
|
|
|
|
extern __inline __m512i |
7069
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7070
|
|
|
|
|
|
|
_mm512_and_si512 (__m512i __A, __m512i __B) |
7071
|
|
|
|
|
|
|
{ |
7072
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A & (__v16su) __B); |
7073
|
|
|
|
|
|
|
} |
7074
|
|
|
|
|
|
|
|
7075
|
|
|
|
|
|
|
extern __inline __m512i |
7076
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7077
|
|
|
|
|
|
|
_mm512_and_epi32 (__m512i __A, __m512i __B) |
7078
|
|
|
|
|
|
|
{ |
7079
|
|
|
|
|
|
|
return (__m512i) ((__v16su) __A & (__v16su) __B); |
7080
|
|
|
|
|
|
|
} |
7081
|
|
|
|
|
|
|
|
7082
|
|
|
|
|
|
|
extern __inline __m512i |
7083
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7084
|
|
|
|
|
|
|
_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
7085
|
|
|
|
|
|
|
{ |
7086
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, |
7087
|
|
|
|
|
|
|
(__v16si) __B, |
7088
|
|
|
|
|
|
|
(__v16si) __W, |
7089
|
|
|
|
|
|
|
(__mmask16) __U); |
7090
|
|
|
|
|
|
|
} |
7091
|
|
|
|
|
|
|
|
7092
|
|
|
|
|
|
|
extern __inline __m512i |
7093
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7094
|
|
|
|
|
|
|
_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
7095
|
|
|
|
|
|
|
{ |
7096
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, |
7097
|
|
|
|
|
|
|
(__v16si) __B, |
7098
|
|
|
|
|
|
|
(__v16si) |
7099
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7100
|
|
|
|
|
|
|
(__mmask16) __U); |
7101
|
|
|
|
|
|
|
} |
7102
|
|
|
|
|
|
|
|
7103
|
|
|
|
|
|
|
extern __inline __m512i |
7104
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7105
|
|
|
|
|
|
|
_mm512_and_epi64 (__m512i __A, __m512i __B) |
7106
|
|
|
|
|
|
|
{ |
7107
|
|
|
|
|
|
|
return (__m512i) ((__v8du) __A & (__v8du) __B); |
7108
|
|
|
|
|
|
|
} |
7109
|
|
|
|
|
|
|
|
7110
|
|
|
|
|
|
|
extern __inline __m512i |
7111
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7112
|
|
|
|
|
|
|
_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
7113
|
|
|
|
|
|
|
{ |
7114
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, |
7115
|
|
|
|
|
|
|
(__v8di) __B, |
7116
|
|
|
|
|
|
|
(__v8di) __W, __U); |
7117
|
|
|
|
|
|
|
} |
7118
|
|
|
|
|
|
|
|
7119
|
|
|
|
|
|
|
extern __inline __m512i |
7120
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7121
|
|
|
|
|
|
|
_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
7122
|
|
|
|
|
|
|
{ |
7123
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, |
7124
|
|
|
|
|
|
|
(__v8di) __B, |
7125
|
|
|
|
|
|
|
(__v8di) |
7126
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7127
|
|
|
|
|
|
|
__U); |
7128
|
|
|
|
|
|
|
} |
7129
|
|
|
|
|
|
|
|
7130
|
|
|
|
|
|
|
extern __inline __m512i |
7131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7132
|
|
|
|
|
|
|
_mm512_andnot_si512 (__m512i __A, __m512i __B) |
7133
|
|
|
|
|
|
|
{ |
7134
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, |
7135
|
|
|
|
|
|
|
(__v16si) __B, |
7136
|
|
|
|
|
|
|
(__v16si) |
7137
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7138
|
|
|
|
|
|
|
(__mmask16) -1); |
7139
|
|
|
|
|
|
|
} |
7140
|
|
|
|
|
|
|
|
7141
|
|
|
|
|
|
|
extern __inline __m512i |
7142
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7143
|
|
|
|
|
|
|
_mm512_andnot_epi32 (__m512i __A, __m512i __B) |
7144
|
|
|
|
|
|
|
{ |
7145
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, |
7146
|
|
|
|
|
|
|
(__v16si) __B, |
7147
|
|
|
|
|
|
|
(__v16si) |
7148
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7149
|
|
|
|
|
|
|
(__mmask16) -1); |
7150
|
|
|
|
|
|
|
} |
7151
|
|
|
|
|
|
|
|
7152
|
|
|
|
|
|
|
extern __inline __m512i |
7153
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7154
|
|
|
|
|
|
|
_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
7155
|
|
|
|
|
|
|
{ |
7156
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, |
7157
|
|
|
|
|
|
|
(__v16si) __B, |
7158
|
|
|
|
|
|
|
(__v16si) __W, |
7159
|
|
|
|
|
|
|
(__mmask16) __U); |
7160
|
|
|
|
|
|
|
} |
7161
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
extern __inline __m512i |
7163
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7164
|
|
|
|
|
|
|
_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
7165
|
|
|
|
|
|
|
{ |
7166
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, |
7167
|
|
|
|
|
|
|
(__v16si) __B, |
7168
|
|
|
|
|
|
|
(__v16si) |
7169
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7170
|
|
|
|
|
|
|
(__mmask16) __U); |
7171
|
|
|
|
|
|
|
} |
7172
|
|
|
|
|
|
|
|
7173
|
|
|
|
|
|
|
extern __inline __m512i |
7174
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7175
|
|
|
|
|
|
|
_mm512_andnot_epi64 (__m512i __A, __m512i __B) |
7176
|
|
|
|
|
|
|
{ |
7177
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, |
7178
|
|
|
|
|
|
|
(__v8di) __B, |
7179
|
|
|
|
|
|
|
(__v8di) |
7180
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7181
|
|
|
|
|
|
|
(__mmask8) -1); |
7182
|
|
|
|
|
|
|
} |
7183
|
|
|
|
|
|
|
|
7184
|
|
|
|
|
|
|
extern __inline __m512i |
7185
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7186
|
|
|
|
|
|
|
_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
7187
|
|
|
|
|
|
|
{ |
7188
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, |
7189
|
|
|
|
|
|
|
(__v8di) __B, |
7190
|
|
|
|
|
|
|
(__v8di) __W, __U); |
7191
|
|
|
|
|
|
|
} |
7192
|
|
|
|
|
|
|
|
7193
|
|
|
|
|
|
|
extern __inline __m512i |
7194
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7195
|
|
|
|
|
|
|
_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
7196
|
|
|
|
|
|
|
{ |
7197
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, |
7198
|
|
|
|
|
|
|
(__v8di) __B, |
7199
|
|
|
|
|
|
|
(__v8di) |
7200
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7201
|
|
|
|
|
|
|
__U); |
7202
|
|
|
|
|
|
|
} |
7203
|
|
|
|
|
|
|
|
7204
|
|
|
|
|
|
|
extern __inline __mmask16 |
7205
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7206
|
|
|
|
|
|
|
_mm512_test_epi32_mask (__m512i __A, __m512i __B) |
7207
|
|
|
|
|
|
|
{ |
7208
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, |
7209
|
|
|
|
|
|
|
(__v16si) __B, |
7210
|
|
|
|
|
|
|
(__mmask16) -1); |
7211
|
|
|
|
|
|
|
} |
7212
|
|
|
|
|
|
|
|
7213
|
|
|
|
|
|
|
extern __inline __mmask16 |
7214
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7215
|
|
|
|
|
|
|
_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
7216
|
|
|
|
|
|
|
{ |
7217
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, |
7218
|
|
|
|
|
|
|
(__v16si) __B, __U); |
7219
|
|
|
|
|
|
|
} |
7220
|
|
|
|
|
|
|
|
7221
|
|
|
|
|
|
|
extern __inline __mmask8 |
7222
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7223
|
|
|
|
|
|
|
_mm512_test_epi64_mask (__m512i __A, __m512i __B) |
7224
|
|
|
|
|
|
|
{ |
7225
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, |
7226
|
|
|
|
|
|
|
(__v8di) __B, |
7227
|
|
|
|
|
|
|
(__mmask8) -1); |
7228
|
|
|
|
|
|
|
} |
7229
|
|
|
|
|
|
|
|
7230
|
|
|
|
|
|
|
extern __inline __mmask8 |
7231
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7232
|
|
|
|
|
|
|
_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
7233
|
|
|
|
|
|
|
{ |
7234
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); |
7235
|
|
|
|
|
|
|
} |
7236
|
|
|
|
|
|
|
|
7237
|
|
|
|
|
|
|
extern __inline __mmask16 |
7238
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7239
|
|
|
|
|
|
|
_mm512_testn_epi32_mask (__m512i __A, __m512i __B) |
7240
|
|
|
|
|
|
|
{ |
7241
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, |
7242
|
|
|
|
|
|
|
(__v16si) __B, |
7243
|
|
|
|
|
|
|
(__mmask16) -1); |
7244
|
|
|
|
|
|
|
} |
7245
|
|
|
|
|
|
|
|
7246
|
|
|
|
|
|
|
extern __inline __mmask16 |
7247
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7248
|
|
|
|
|
|
|
_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
7249
|
|
|
|
|
|
|
{ |
7250
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, |
7251
|
|
|
|
|
|
|
(__v16si) __B, __U); |
7252
|
|
|
|
|
|
|
} |
7253
|
|
|
|
|
|
|
|
7254
|
|
|
|
|
|
|
extern __inline __mmask8 |
7255
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7256
|
|
|
|
|
|
|
_mm512_testn_epi64_mask (__m512i __A, __m512i __B) |
7257
|
|
|
|
|
|
|
{ |
7258
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, |
7259
|
|
|
|
|
|
|
(__v8di) __B, |
7260
|
|
|
|
|
|
|
(__mmask8) -1); |
7261
|
|
|
|
|
|
|
} |
7262
|
|
|
|
|
|
|
|
7263
|
|
|
|
|
|
|
extern __inline __mmask8 |
7264
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7265
|
|
|
|
|
|
|
_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
7266
|
|
|
|
|
|
|
{ |
7267
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, |
7268
|
|
|
|
|
|
|
(__v8di) __B, __U); |
7269
|
|
|
|
|
|
|
} |
7270
|
|
|
|
|
|
|
|
7271
|
|
|
|
|
|
|
extern __inline __m512i |
7272
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7273
|
|
|
|
|
|
|
_mm512_unpackhi_epi32 (__m512i __A, __m512i __B) |
7274
|
|
|
|
|
|
|
{ |
7275
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, |
7276
|
|
|
|
|
|
|
(__v16si) __B, |
7277
|
|
|
|
|
|
|
(__v16si) |
7278
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7279
|
|
|
|
|
|
|
(__mmask16) -1); |
7280
|
|
|
|
|
|
|
} |
7281
|
|
|
|
|
|
|
|
7282
|
|
|
|
|
|
|
extern __inline __m512i |
7283
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7284
|
|
|
|
|
|
|
_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
7285
|
|
|
|
|
|
|
__m512i __B) |
7286
|
|
|
|
|
|
|
{ |
7287
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, |
7288
|
|
|
|
|
|
|
(__v16si) __B, |
7289
|
|
|
|
|
|
|
(__v16si) __W, |
7290
|
|
|
|
|
|
|
(__mmask16) __U); |
7291
|
|
|
|
|
|
|
} |
7292
|
|
|
|
|
|
|
|
7293
|
|
|
|
|
|
|
extern __inline __m512i |
7294
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7295
|
|
|
|
|
|
|
_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
7296
|
|
|
|
|
|
|
{ |
7297
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, |
7298
|
|
|
|
|
|
|
(__v16si) __B, |
7299
|
|
|
|
|
|
|
(__v16si) |
7300
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7301
|
|
|
|
|
|
|
(__mmask16) __U); |
7302
|
|
|
|
|
|
|
} |
7303
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
extern __inline __m512i |
7305
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7306
|
|
|
|
|
|
|
_mm512_unpackhi_epi64 (__m512i __A, __m512i __B) |
7307
|
|
|
|
|
|
|
{ |
7308
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, |
7309
|
|
|
|
|
|
|
(__v8di) __B, |
7310
|
|
|
|
|
|
|
(__v8di) |
7311
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7312
|
|
|
|
|
|
|
(__mmask8) -1); |
7313
|
|
|
|
|
|
|
} |
7314
|
|
|
|
|
|
|
|
7315
|
|
|
|
|
|
|
extern __inline __m512i |
7316
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7317
|
|
|
|
|
|
|
_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
7318
|
|
|
|
|
|
|
{ |
7319
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, |
7320
|
|
|
|
|
|
|
(__v8di) __B, |
7321
|
|
|
|
|
|
|
(__v8di) __W, |
7322
|
|
|
|
|
|
|
(__mmask8) __U); |
7323
|
|
|
|
|
|
|
} |
7324
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
extern __inline __m512i |
7326
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7327
|
|
|
|
|
|
|
_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
7328
|
|
|
|
|
|
|
{ |
7329
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, |
7330
|
|
|
|
|
|
|
(__v8di) __B, |
7331
|
|
|
|
|
|
|
(__v8di) |
7332
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7333
|
|
|
|
|
|
|
(__mmask8) __U); |
7334
|
|
|
|
|
|
|
} |
7335
|
|
|
|
|
|
|
|
7336
|
|
|
|
|
|
|
extern __inline __m512i |
7337
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7338
|
|
|
|
|
|
|
_mm512_unpacklo_epi32 (__m512i __A, __m512i __B) |
7339
|
|
|
|
|
|
|
{ |
7340
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, |
7341
|
|
|
|
|
|
|
(__v16si) __B, |
7342
|
|
|
|
|
|
|
(__v16si) |
7343
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7344
|
|
|
|
|
|
|
(__mmask16) -1); |
7345
|
|
|
|
|
|
|
} |
7346
|
|
|
|
|
|
|
|
7347
|
|
|
|
|
|
|
extern __inline __m512i |
7348
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7349
|
|
|
|
|
|
|
_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
7350
|
|
|
|
|
|
|
__m512i __B) |
7351
|
|
|
|
|
|
|
{ |
7352
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, |
7353
|
|
|
|
|
|
|
(__v16si) __B, |
7354
|
|
|
|
|
|
|
(__v16si) __W, |
7355
|
|
|
|
|
|
|
(__mmask16) __U); |
7356
|
|
|
|
|
|
|
} |
7357
|
|
|
|
|
|
|
|
7358
|
|
|
|
|
|
|
extern __inline __m512i |
7359
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7360
|
|
|
|
|
|
|
_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
7361
|
|
|
|
|
|
|
{ |
7362
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, |
7363
|
|
|
|
|
|
|
(__v16si) __B, |
7364
|
|
|
|
|
|
|
(__v16si) |
7365
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7366
|
|
|
|
|
|
|
(__mmask16) __U); |
7367
|
|
|
|
|
|
|
} |
7368
|
|
|
|
|
|
|
|
7369
|
|
|
|
|
|
|
extern __inline __m512i |
7370
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7371
|
|
|
|
|
|
|
_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) |
7372
|
|
|
|
|
|
|
{ |
7373
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, |
7374
|
|
|
|
|
|
|
(__v8di) __B, |
7375
|
|
|
|
|
|
|
(__v8di) |
7376
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
7377
|
|
|
|
|
|
|
(__mmask8) -1); |
7378
|
|
|
|
|
|
|
} |
7379
|
|
|
|
|
|
|
|
7380
|
|
|
|
|
|
|
extern __inline __m512i |
7381
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7382
|
|
|
|
|
|
|
_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
7383
|
|
|
|
|
|
|
{ |
7384
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, |
7385
|
|
|
|
|
|
|
(__v8di) __B, |
7386
|
|
|
|
|
|
|
(__v8di) __W, |
7387
|
|
|
|
|
|
|
(__mmask8) __U); |
7388
|
|
|
|
|
|
|
} |
7389
|
|
|
|
|
|
|
|
7390
|
|
|
|
|
|
|
extern __inline __m512i |
7391
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7392
|
|
|
|
|
|
|
_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
7393
|
|
|
|
|
|
|
{ |
7394
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, |
7395
|
|
|
|
|
|
|
(__v8di) __B, |
7396
|
|
|
|
|
|
|
(__v8di) |
7397
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
7398
|
|
|
|
|
|
|
(__mmask8) __U); |
7399
|
|
|
|
|
|
|
} |
7400
|
|
|
|
|
|
|
|
7401
|
|
|
|
|
|
|
#ifdef __x86_64__ |
7402
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7403
|
|
|
|
|
|
|
extern __inline unsigned long long |
7404
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7405
|
|
|
|
|
|
|
_mm_cvt_roundss_u64 (__m128 __A, const int __R) |
7406
|
|
|
|
|
|
|
{ |
7407
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); |
7408
|
|
|
|
|
|
|
} |
7409
|
|
|
|
|
|
|
|
7410
|
|
|
|
|
|
|
extern __inline long long |
7411
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7412
|
|
|
|
|
|
|
_mm_cvt_roundss_si64 (__m128 __A, const int __R) |
7413
|
|
|
|
|
|
|
{ |
7414
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); |
7415
|
|
|
|
|
|
|
} |
7416
|
|
|
|
|
|
|
|
7417
|
|
|
|
|
|
|
extern __inline long long |
7418
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7419
|
|
|
|
|
|
|
_mm_cvt_roundss_i64 (__m128 __A, const int __R) |
7420
|
|
|
|
|
|
|
{ |
7421
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); |
7422
|
|
|
|
|
|
|
} |
7423
|
|
|
|
|
|
|
|
7424
|
|
|
|
|
|
|
extern __inline unsigned long long |
7425
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7426
|
|
|
|
|
|
|
_mm_cvtt_roundss_u64 (__m128 __A, const int __R) |
7427
|
|
|
|
|
|
|
{ |
7428
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); |
7429
|
|
|
|
|
|
|
} |
7430
|
|
|
|
|
|
|
|
7431
|
|
|
|
|
|
|
extern __inline long long |
7432
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7433
|
|
|
|
|
|
|
_mm_cvtt_roundss_i64 (__m128 __A, const int __R) |
7434
|
|
|
|
|
|
|
{ |
7435
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); |
7436
|
|
|
|
|
|
|
} |
7437
|
|
|
|
|
|
|
|
7438
|
|
|
|
|
|
|
extern __inline long long |
7439
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7440
|
|
|
|
|
|
|
_mm_cvtt_roundss_si64 (__m128 __A, const int __R) |
7441
|
|
|
|
|
|
|
{ |
7442
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); |
7443
|
|
|
|
|
|
|
} |
7444
|
|
|
|
|
|
|
#else |
7445
|
|
|
|
|
|
|
#define _mm_cvt_roundss_u64(A, B) \ |
7446
|
|
|
|
|
|
|
((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) |
7447
|
|
|
|
|
|
|
|
7448
|
|
|
|
|
|
|
#define _mm_cvt_roundss_si64(A, B) \ |
7449
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvtss2si64(A, B)) |
7450
|
|
|
|
|
|
|
|
7451
|
|
|
|
|
|
|
#define _mm_cvt_roundss_i64(A, B) \ |
7452
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvtss2si64(A, B)) |
7453
|
|
|
|
|
|
|
|
7454
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_u64(A, B) \ |
7455
|
|
|
|
|
|
|
((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) |
7456
|
|
|
|
|
|
|
|
7457
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_i64(A, B) \ |
7458
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvttss2si64(A, B)) |
7459
|
|
|
|
|
|
|
|
7460
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_si64(A, B) \ |
7461
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvttss2si64(A, B)) |
7462
|
|
|
|
|
|
|
#endif |
7463
|
|
|
|
|
|
|
#endif |
7464
|
|
|
|
|
|
|
|
7465
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7466
|
|
|
|
|
|
|
extern __inline unsigned |
7467
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7468
|
|
|
|
|
|
|
_mm_cvt_roundss_u32 (__m128 __A, const int __R) |
7469
|
|
|
|
|
|
|
{ |
7470
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); |
7471
|
|
|
|
|
|
|
} |
7472
|
|
|
|
|
|
|
|
7473
|
|
|
|
|
|
|
extern __inline int |
7474
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7475
|
|
|
|
|
|
|
_mm_cvt_roundss_si32 (__m128 __A, const int __R) |
7476
|
|
|
|
|
|
|
{ |
7477
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); |
7478
|
|
|
|
|
|
|
} |
7479
|
|
|
|
|
|
|
|
7480
|
|
|
|
|
|
|
extern __inline int |
7481
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7482
|
|
|
|
|
|
|
_mm_cvt_roundss_i32 (__m128 __A, const int __R) |
7483
|
|
|
|
|
|
|
{ |
7484
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); |
7485
|
|
|
|
|
|
|
} |
7486
|
|
|
|
|
|
|
|
7487
|
|
|
|
|
|
|
extern __inline unsigned |
7488
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7489
|
|
|
|
|
|
|
_mm_cvtt_roundss_u32 (__m128 __A, const int __R) |
7490
|
|
|
|
|
|
|
{ |
7491
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); |
7492
|
|
|
|
|
|
|
} |
7493
|
|
|
|
|
|
|
|
7494
|
|
|
|
|
|
|
extern __inline int |
7495
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7496
|
|
|
|
|
|
|
_mm_cvtt_roundss_i32 (__m128 __A, const int __R) |
7497
|
|
|
|
|
|
|
{ |
7498
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); |
7499
|
|
|
|
|
|
|
} |
7500
|
|
|
|
|
|
|
|
7501
|
|
|
|
|
|
|
extern __inline int |
7502
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7503
|
|
|
|
|
|
|
_mm_cvtt_roundss_si32 (__m128 __A, const int __R) |
7504
|
|
|
|
|
|
|
{ |
7505
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); |
7506
|
|
|
|
|
|
|
} |
7507
|
|
|
|
|
|
|
#else |
7508
|
|
|
|
|
|
|
#define _mm_cvt_roundss_u32(A, B) \ |
7509
|
|
|
|
|
|
|
((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) |
7510
|
|
|
|
|
|
|
|
7511
|
|
|
|
|
|
|
#define _mm_cvt_roundss_si32(A, B) \ |
7512
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvtss2si32(A, B)) |
7513
|
|
|
|
|
|
|
|
7514
|
|
|
|
|
|
|
#define _mm_cvt_roundss_i32(A, B) \ |
7515
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvtss2si32(A, B)) |
7516
|
|
|
|
|
|
|
|
7517
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_u32(A, B) \ |
7518
|
|
|
|
|
|
|
((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) |
7519
|
|
|
|
|
|
|
|
7520
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_si32(A, B) \ |
7521
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvttss2si32(A, B)) |
7522
|
|
|
|
|
|
|
|
7523
|
|
|
|
|
|
|
#define _mm_cvtt_roundss_i32(A, B) \ |
7524
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvttss2si32(A, B)) |
7525
|
|
|
|
|
|
|
#endif |
7526
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
#ifdef __x86_64__ |
7528
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7529
|
|
|
|
|
|
|
extern __inline unsigned long long |
7530
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7531
|
|
|
|
|
|
|
_mm_cvt_roundsd_u64 (__m128d __A, const int __R) |
7532
|
|
|
|
|
|
|
{ |
7533
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); |
7534
|
|
|
|
|
|
|
} |
7535
|
|
|
|
|
|
|
|
7536
|
|
|
|
|
|
|
extern __inline long long |
7537
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7538
|
|
|
|
|
|
|
_mm_cvt_roundsd_si64 (__m128d __A, const int __R) |
7539
|
|
|
|
|
|
|
{ |
7540
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); |
7541
|
|
|
|
|
|
|
} |
7542
|
|
|
|
|
|
|
|
7543
|
|
|
|
|
|
|
extern __inline long long |
7544
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7545
|
|
|
|
|
|
|
_mm_cvt_roundsd_i64 (__m128d __A, const int __R) |
7546
|
|
|
|
|
|
|
{ |
7547
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); |
7548
|
|
|
|
|
|
|
} |
7549
|
|
|
|
|
|
|
|
7550
|
|
|
|
|
|
|
extern __inline unsigned long long |
7551
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7552
|
|
|
|
|
|
|
_mm_cvtt_roundsd_u64 (__m128d __A, const int __R) |
7553
|
|
|
|
|
|
|
{ |
7554
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); |
7555
|
|
|
|
|
|
|
} |
7556
|
|
|
|
|
|
|
|
7557
|
|
|
|
|
|
|
extern __inline long long |
7558
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7559
|
|
|
|
|
|
|
_mm_cvtt_roundsd_si64 (__m128d __A, const int __R) |
7560
|
|
|
|
|
|
|
{ |
7561
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); |
7562
|
|
|
|
|
|
|
} |
7563
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
extern __inline long long |
7565
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7566
|
|
|
|
|
|
|
_mm_cvtt_roundsd_i64 (__m128d __A, const int __R) |
7567
|
|
|
|
|
|
|
{ |
7568
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); |
7569
|
|
|
|
|
|
|
} |
7570
|
|
|
|
|
|
|
#else |
7571
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_u64(A, B) \ |
7572
|
|
|
|
|
|
|
((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) |
7573
|
|
|
|
|
|
|
|
7574
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_si64(A, B) \ |
7575
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvtsd2si64(A, B)) |
7576
|
|
|
|
|
|
|
|
7577
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_i64(A, B) \ |
7578
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvtsd2si64(A, B)) |
7579
|
|
|
|
|
|
|
|
7580
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_u64(A, B) \ |
7581
|
|
|
|
|
|
|
((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) |
7582
|
|
|
|
|
|
|
|
7583
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_si64(A, B) \ |
7584
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvttsd2si64(A, B)) |
7585
|
|
|
|
|
|
|
|
7586
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_i64(A, B) \ |
7587
|
|
|
|
|
|
|
((long long)__builtin_ia32_vcvttsd2si64(A, B)) |
7588
|
|
|
|
|
|
|
#endif |
7589
|
|
|
|
|
|
|
#endif |
7590
|
|
|
|
|
|
|
|
7591
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7592
|
|
|
|
|
|
|
extern __inline unsigned |
7593
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7594
|
|
|
|
|
|
|
_mm_cvt_roundsd_u32 (__m128d __A, const int __R) |
7595
|
|
|
|
|
|
|
{ |
7596
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); |
7597
|
|
|
|
|
|
|
} |
7598
|
|
|
|
|
|
|
|
7599
|
|
|
|
|
|
|
extern __inline int |
7600
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7601
|
|
|
|
|
|
|
_mm_cvt_roundsd_si32 (__m128d __A, const int __R) |
7602
|
|
|
|
|
|
|
{ |
7603
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); |
7604
|
|
|
|
|
|
|
} |
7605
|
|
|
|
|
|
|
|
7606
|
|
|
|
|
|
|
extern __inline int |
7607
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7608
|
|
|
|
|
|
|
_mm_cvt_roundsd_i32 (__m128d __A, const int __R) |
7609
|
|
|
|
|
|
|
{ |
7610
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); |
7611
|
|
|
|
|
|
|
} |
7612
|
|
|
|
|
|
|
|
7613
|
|
|
|
|
|
|
extern __inline unsigned |
7614
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7615
|
|
|
|
|
|
|
_mm_cvtt_roundsd_u32 (__m128d __A, const int __R) |
7616
|
|
|
|
|
|
|
{ |
7617
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); |
7618
|
|
|
|
|
|
|
} |
7619
|
|
|
|
|
|
|
|
7620
|
|
|
|
|
|
|
extern __inline int |
7621
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7622
|
|
|
|
|
|
|
_mm_cvtt_roundsd_i32 (__m128d __A, const int __R) |
7623
|
|
|
|
|
|
|
{ |
7624
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); |
7625
|
|
|
|
|
|
|
} |
7626
|
|
|
|
|
|
|
|
7627
|
|
|
|
|
|
|
extern __inline int |
7628
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7629
|
|
|
|
|
|
|
_mm_cvtt_roundsd_si32 (__m128d __A, const int __R) |
7630
|
|
|
|
|
|
|
{ |
7631
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); |
7632
|
|
|
|
|
|
|
} |
7633
|
|
|
|
|
|
|
#else |
7634
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_u32(A, B) \ |
7635
|
|
|
|
|
|
|
((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) |
7636
|
|
|
|
|
|
|
|
7637
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_si32(A, B) \ |
7638
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvtsd2si32(A, B)) |
7639
|
|
|
|
|
|
|
|
7640
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_i32(A, B) \ |
7641
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvtsd2si32(A, B)) |
7642
|
|
|
|
|
|
|
|
7643
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_u32(A, B) \ |
7644
|
|
|
|
|
|
|
((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) |
7645
|
|
|
|
|
|
|
|
7646
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_si32(A, B) \ |
7647
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvttsd2si32(A, B)) |
7648
|
|
|
|
|
|
|
|
7649
|
|
|
|
|
|
|
#define _mm_cvtt_roundsd_i32(A, B) \ |
7650
|
|
|
|
|
|
|
((int)__builtin_ia32_vcvttsd2si32(A, B)) |
7651
|
|
|
|
|
|
|
#endif |
7652
|
|
|
|
|
|
|
|
7653
|
|
|
|
|
|
|
extern __inline __m512d |
7654
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7655
|
|
|
|
|
|
|
_mm512_movedup_pd (__m512d __A) |
7656
|
|
|
|
|
|
|
{ |
7657
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, |
7658
|
|
|
|
|
|
|
(__v8df) |
7659
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
7660
|
|
|
|
|
|
|
(__mmask8) -1); |
7661
|
|
|
|
|
|
|
} |
7662
|
|
|
|
|
|
|
|
7663
|
|
|
|
|
|
|
extern __inline __m512d |
7664
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7665
|
|
|
|
|
|
|
_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) |
7666
|
|
|
|
|
|
|
{ |
7667
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, |
7668
|
|
|
|
|
|
|
(__v8df) __W, |
7669
|
|
|
|
|
|
|
(__mmask8) __U); |
7670
|
|
|
|
|
|
|
} |
7671
|
|
|
|
|
|
|
|
7672
|
|
|
|
|
|
|
extern __inline __m512d |
7673
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7674
|
|
|
|
|
|
|
_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) |
7675
|
|
|
|
|
|
|
{ |
7676
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, |
7677
|
|
|
|
|
|
|
(__v8df) |
7678
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7679
|
|
|
|
|
|
|
(__mmask8) __U); |
7680
|
|
|
|
|
|
|
} |
7681
|
|
|
|
|
|
|
|
7682
|
|
|
|
|
|
|
extern __inline __m512d |
7683
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7684
|
|
|
|
|
|
|
_mm512_unpacklo_pd (__m512d __A, __m512d __B) |
7685
|
|
|
|
|
|
|
{ |
7686
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, |
7687
|
|
|
|
|
|
|
(__v8df) __B, |
7688
|
|
|
|
|
|
|
(__v8df) |
7689
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
7690
|
|
|
|
|
|
|
(__mmask8) -1); |
7691
|
|
|
|
|
|
|
} |
7692
|
|
|
|
|
|
|
|
7693
|
|
|
|
|
|
|
extern __inline __m512d |
7694
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7695
|
|
|
|
|
|
|
_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
7696
|
|
|
|
|
|
|
{ |
7697
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, |
7698
|
|
|
|
|
|
|
(__v8df) __B, |
7699
|
|
|
|
|
|
|
(__v8df) __W, |
7700
|
|
|
|
|
|
|
(__mmask8) __U); |
7701
|
|
|
|
|
|
|
} |
7702
|
|
|
|
|
|
|
|
7703
|
|
|
|
|
|
|
extern __inline __m512d |
7704
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7705
|
|
|
|
|
|
|
_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) |
7706
|
|
|
|
|
|
|
{ |
7707
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, |
7708
|
|
|
|
|
|
|
(__v8df) __B, |
7709
|
|
|
|
|
|
|
(__v8df) |
7710
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7711
|
|
|
|
|
|
|
(__mmask8) __U); |
7712
|
|
|
|
|
|
|
} |
7713
|
|
|
|
|
|
|
|
7714
|
|
|
|
|
|
|
extern __inline __m512d |
7715
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7716
|
|
|
|
|
|
|
_mm512_unpackhi_pd (__m512d __A, __m512d __B) |
7717
|
|
|
|
|
|
|
{ |
7718
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, |
7719
|
|
|
|
|
|
|
(__v8df) __B, |
7720
|
|
|
|
|
|
|
(__v8df) |
7721
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
7722
|
|
|
|
|
|
|
(__mmask8) -1); |
7723
|
|
|
|
|
|
|
} |
7724
|
|
|
|
|
|
|
|
7725
|
|
|
|
|
|
|
extern __inline __m512d |
7726
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7727
|
|
|
|
|
|
|
_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
7728
|
|
|
|
|
|
|
{ |
7729
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, |
7730
|
|
|
|
|
|
|
(__v8df) __B, |
7731
|
|
|
|
|
|
|
(__v8df) __W, |
7732
|
|
|
|
|
|
|
(__mmask8) __U); |
7733
|
|
|
|
|
|
|
} |
7734
|
|
|
|
|
|
|
|
7735
|
|
|
|
|
|
|
extern __inline __m512d |
7736
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7737
|
|
|
|
|
|
|
_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) |
7738
|
|
|
|
|
|
|
{ |
7739
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, |
7740
|
|
|
|
|
|
|
(__v8df) __B, |
7741
|
|
|
|
|
|
|
(__v8df) |
7742
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7743
|
|
|
|
|
|
|
(__mmask8) __U); |
7744
|
|
|
|
|
|
|
} |
7745
|
|
|
|
|
|
|
|
7746
|
|
|
|
|
|
|
extern __inline __m512 |
7747
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7748
|
|
|
|
|
|
|
_mm512_unpackhi_ps (__m512 __A, __m512 __B) |
7749
|
|
|
|
|
|
|
{ |
7750
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, |
7751
|
|
|
|
|
|
|
(__v16sf) __B, |
7752
|
|
|
|
|
|
|
(__v16sf) |
7753
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
7754
|
|
|
|
|
|
|
(__mmask16) -1); |
7755
|
|
|
|
|
|
|
} |
7756
|
|
|
|
|
|
|
|
7757
|
|
|
|
|
|
|
extern __inline __m512 |
7758
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7759
|
|
|
|
|
|
|
_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
7760
|
|
|
|
|
|
|
{ |
7761
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, |
7762
|
|
|
|
|
|
|
(__v16sf) __B, |
7763
|
|
|
|
|
|
|
(__v16sf) __W, |
7764
|
|
|
|
|
|
|
(__mmask16) __U); |
7765
|
|
|
|
|
|
|
} |
7766
|
|
|
|
|
|
|
|
7767
|
|
|
|
|
|
|
extern __inline __m512 |
7768
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7769
|
|
|
|
|
|
|
_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) |
7770
|
|
|
|
|
|
|
{ |
7771
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, |
7772
|
|
|
|
|
|
|
(__v16sf) __B, |
7773
|
|
|
|
|
|
|
(__v16sf) |
7774
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
7775
|
|
|
|
|
|
|
(__mmask16) __U); |
7776
|
|
|
|
|
|
|
} |
7777
|
|
|
|
|
|
|
|
7778
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7779
|
|
|
|
|
|
|
extern __inline __m512d |
7780
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7781
|
|
|
|
|
|
|
_mm512_cvt_roundps_pd (__m256 __A, const int __R) |
7782
|
|
|
|
|
|
|
{ |
7783
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
7784
|
|
|
|
|
|
|
(__v8df) |
7785
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
7786
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
7787
|
|
|
|
|
|
|
} |
7788
|
|
|
|
|
|
|
|
7789
|
|
|
|
|
|
|
extern __inline __m512d |
7790
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7791
|
|
|
|
|
|
|
_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, |
7792
|
|
|
|
|
|
|
const int __R) |
7793
|
|
|
|
|
|
|
{ |
7794
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
7795
|
|
|
|
|
|
|
(__v8df) __W, |
7796
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
7797
|
|
|
|
|
|
|
} |
7798
|
|
|
|
|
|
|
|
7799
|
|
|
|
|
|
|
extern __inline __m512d |
7800
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7801
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) |
7802
|
|
|
|
|
|
|
{ |
7803
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
7804
|
|
|
|
|
|
|
(__v8df) |
7805
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
7806
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
7807
|
|
|
|
|
|
|
} |
7808
|
|
|
|
|
|
|
|
7809
|
|
|
|
|
|
|
extern __inline __m512 |
7810
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7811
|
|
|
|
|
|
|
_mm512_cvt_roundph_ps (__m256i __A, const int __R) |
7812
|
|
|
|
|
|
|
{ |
7813
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
7814
|
|
|
|
|
|
|
(__v16sf) |
7815
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
7816
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
7817
|
|
|
|
|
|
|
} |
7818
|
|
|
|
|
|
|
|
7819
|
|
|
|
|
|
|
extern __inline __m512 |
7820
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7821
|
|
|
|
|
|
|
_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, |
7822
|
|
|
|
|
|
|
const int __R) |
7823
|
|
|
|
|
|
|
{ |
7824
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
7825
|
|
|
|
|
|
|
(__v16sf) __W, |
7826
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
7827
|
|
|
|
|
|
|
} |
7828
|
|
|
|
|
|
|
|
7829
|
|
|
|
|
|
|
extern __inline __m512 |
7830
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7831
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) |
7832
|
|
|
|
|
|
|
{ |
7833
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
7834
|
|
|
|
|
|
|
(__v16sf) |
7835
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
7836
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
7837
|
|
|
|
|
|
|
} |
7838
|
|
|
|
|
|
|
|
7839
|
|
|
|
|
|
|
extern __inline __m256i |
7840
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7841
|
|
|
|
|
|
|
_mm512_cvt_roundps_ph (__m512 __A, const int __I) |
7842
|
|
|
|
|
|
|
{ |
7843
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7844
|
|
|
|
|
|
|
__I, |
7845
|
|
|
|
|
|
|
(__v16hi) |
7846
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
7847
|
|
|
|
|
|
|
-1); |
7848
|
|
|
|
|
|
|
} |
7849
|
|
|
|
|
|
|
|
7850
|
|
|
|
|
|
|
extern __inline __m256i |
7851
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7852
|
|
|
|
|
|
|
_mm512_cvtps_ph (__m512 __A, const int __I) |
7853
|
|
|
|
|
|
|
{ |
7854
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7855
|
|
|
|
|
|
|
__I, |
7856
|
|
|
|
|
|
|
(__v16hi) |
7857
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
7858
|
|
|
|
|
|
|
-1); |
7859
|
|
|
|
|
|
|
} |
7860
|
|
|
|
|
|
|
|
7861
|
|
|
|
|
|
|
extern __inline __m256i |
7862
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7863
|
|
|
|
|
|
|
_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, |
7864
|
|
|
|
|
|
|
const int __I) |
7865
|
|
|
|
|
|
|
{ |
7866
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7867
|
|
|
|
|
|
|
__I, |
7868
|
|
|
|
|
|
|
(__v16hi) __U, |
7869
|
|
|
|
|
|
|
(__mmask16) __W); |
7870
|
|
|
|
|
|
|
} |
7871
|
|
|
|
|
|
|
|
7872
|
|
|
|
|
|
|
extern __inline __m256i |
7873
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7874
|
|
|
|
|
|
|
_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) |
7875
|
|
|
|
|
|
|
{ |
7876
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7877
|
|
|
|
|
|
|
__I, |
7878
|
|
|
|
|
|
|
(__v16hi) __U, |
7879
|
|
|
|
|
|
|
(__mmask16) __W); |
7880
|
|
|
|
|
|
|
} |
7881
|
|
|
|
|
|
|
|
7882
|
|
|
|
|
|
|
extern __inline __m256i |
7883
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7884
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) |
7885
|
|
|
|
|
|
|
{ |
7886
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7887
|
|
|
|
|
|
|
__I, |
7888
|
|
|
|
|
|
|
(__v16hi) |
7889
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
7890
|
|
|
|
|
|
|
(__mmask16) __W); |
7891
|
|
|
|
|
|
|
} |
7892
|
|
|
|
|
|
|
|
7893
|
|
|
|
|
|
|
extern __inline __m256i |
7894
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7895
|
|
|
|
|
|
|
_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) |
7896
|
|
|
|
|
|
|
{ |
7897
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, |
7898
|
|
|
|
|
|
|
__I, |
7899
|
|
|
|
|
|
|
(__v16hi) |
7900
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
7901
|
|
|
|
|
|
|
(__mmask16) __W); |
7902
|
|
|
|
|
|
|
} |
7903
|
|
|
|
|
|
|
#else |
7904
|
|
|
|
|
|
|
#define _mm512_cvt_roundps_pd(A, B) \ |
7905
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B) |
7906
|
|
|
|
|
|
|
|
7907
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ |
7908
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) |
7909
|
|
|
|
|
|
|
|
7910
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundps_pd(U, A, B) \ |
7911
|
|
|
|
|
|
|
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) |
7912
|
|
|
|
|
|
|
|
7913
|
|
|
|
|
|
|
#define _mm512_cvt_roundph_ps(A, B) \ |
7914
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B) |
7915
|
|
|
|
|
|
|
|
7916
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ |
7917
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) |
7918
|
|
|
|
|
|
|
|
7919
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundph_ps(U, A, B) \ |
7920
|
|
|
|
|
|
|
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) |
7921
|
|
|
|
|
|
|
|
7922
|
|
|
|
|
|
|
#define _mm512_cvt_roundps_ph(A, I) \ |
7923
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7924
|
|
|
|
|
|
|
(__v16hi)_mm256_undefined_si256 (), -1)) |
7925
|
|
|
|
|
|
|
#define _mm512_cvtps_ph(A, I) \ |
7926
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7927
|
|
|
|
|
|
|
(__v16hi)_mm256_undefined_si256 (), -1)) |
7928
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ |
7929
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7930
|
|
|
|
|
|
|
(__v16hi)(__m256i)(U), (__mmask16) (W))) |
7931
|
|
|
|
|
|
|
#define _mm512_mask_cvtps_ph(U, W, A, I) \ |
7932
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7933
|
|
|
|
|
|
|
(__v16hi)(__m256i)(U), (__mmask16) (W))) |
7934
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ |
7935
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7936
|
|
|
|
|
|
|
(__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) |
7937
|
|
|
|
|
|
|
#define _mm512_maskz_cvtps_ph(W, A, I) \ |
7938
|
|
|
|
|
|
|
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ |
7939
|
|
|
|
|
|
|
(__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) |
7940
|
|
|
|
|
|
|
#endif |
7941
|
|
|
|
|
|
|
|
7942
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
7943
|
|
|
|
|
|
|
extern __inline __m256 |
7944
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7945
|
|
|
|
|
|
|
_mm512_cvt_roundpd_ps (__m512d __A, const int __R) |
7946
|
|
|
|
|
|
|
{ |
7947
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
7948
|
|
|
|
|
|
|
(__v8sf) |
7949
|
|
|
|
|
|
|
_mm256_undefined_ps (), |
7950
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
7951
|
|
|
|
|
|
|
} |
7952
|
|
|
|
|
|
|
|
7953
|
|
|
|
|
|
|
extern __inline __m256 |
7954
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7955
|
|
|
|
|
|
|
_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, |
7956
|
|
|
|
|
|
|
const int __R) |
7957
|
|
|
|
|
|
|
{ |
7958
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
7959
|
|
|
|
|
|
|
(__v8sf) __W, |
7960
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
7961
|
|
|
|
|
|
|
} |
7962
|
|
|
|
|
|
|
|
7963
|
|
|
|
|
|
|
extern __inline __m256 |
7964
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7965
|
|
|
|
|
|
|
_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) |
7966
|
|
|
|
|
|
|
{ |
7967
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
7968
|
|
|
|
|
|
|
(__v8sf) |
7969
|
|
|
|
|
|
|
_mm256_setzero_ps (), |
7970
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
7971
|
|
|
|
|
|
|
} |
7972
|
|
|
|
|
|
|
|
7973
|
|
|
|
|
|
|
extern __inline __m128 |
7974
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7975
|
|
|
|
|
|
|
_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R) |
7976
|
|
|
|
|
|
|
{ |
7977
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A, |
7978
|
|
|
|
|
|
|
(__v2df) __B, |
7979
|
|
|
|
|
|
|
__R); |
7980
|
|
|
|
|
|
|
} |
7981
|
|
|
|
|
|
|
|
7982
|
|
|
|
|
|
|
extern __inline __m128d |
7983
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
7984
|
|
|
|
|
|
|
_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R) |
7985
|
|
|
|
|
|
|
{ |
7986
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A, |
7987
|
|
|
|
|
|
|
(__v4sf) __B, |
7988
|
|
|
|
|
|
|
__R); |
7989
|
|
|
|
|
|
|
} |
7990
|
|
|
|
|
|
|
#else |
7991
|
|
|
|
|
|
|
#define _mm512_cvt_roundpd_ps(A, B) \ |
7992
|
|
|
|
|
|
|
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B) |
7993
|
|
|
|
|
|
|
|
7994
|
|
|
|
|
|
|
#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ |
7995
|
|
|
|
|
|
|
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) |
7996
|
|
|
|
|
|
|
|
7997
|
|
|
|
|
|
|
#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ |
7998
|
|
|
|
|
|
|
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) |
7999
|
|
|
|
|
|
|
|
8000
|
|
|
|
|
|
|
#define _mm_cvt_roundsd_ss(A, B, C) \ |
8001
|
|
|
|
|
|
|
(__m128)__builtin_ia32_cvtsd2ss_round(A, B, C) |
8002
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
#define _mm_cvt_roundss_sd(A, B, C) \ |
8004
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_cvtss2sd_round(A, B, C) |
8005
|
|
|
|
|
|
|
#endif |
8006
|
|
|
|
|
|
|
|
8007
|
|
|
|
|
|
|
extern __inline void |
8008
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8009
|
|
|
|
|
|
|
_mm512_stream_si512 (__m512i * __P, __m512i __A) |
8010
|
|
|
|
|
|
|
{ |
8011
|
|
|
|
|
|
|
__builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); |
8012
|
|
|
|
|
|
|
} |
8013
|
|
|
|
|
|
|
|
8014
|
|
|
|
|
|
|
extern __inline void |
8015
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8016
|
|
|
|
|
|
|
_mm512_stream_ps (float *__P, __m512 __A) |
8017
|
|
|
|
|
|
|
{ |
8018
|
|
|
|
|
|
|
__builtin_ia32_movntps512 (__P, (__v16sf) __A); |
8019
|
|
|
|
|
|
|
} |
8020
|
|
|
|
|
|
|
|
8021
|
|
|
|
|
|
|
extern __inline void |
8022
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8023
|
|
|
|
|
|
|
_mm512_stream_pd (double *__P, __m512d __A) |
8024
|
|
|
|
|
|
|
{ |
8025
|
|
|
|
|
|
|
__builtin_ia32_movntpd512 (__P, (__v8df) __A); |
8026
|
|
|
|
|
|
|
} |
8027
|
|
|
|
|
|
|
|
8028
|
|
|
|
|
|
|
extern __inline __m512i |
8029
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8030
|
|
|
|
|
|
|
_mm512_stream_load_si512 (void *__P) |
8031
|
|
|
|
|
|
|
{ |
8032
|
0
|
|
|
|
|
|
return __builtin_ia32_movntdqa512 ((__v8di *)__P); |
8033
|
|
|
|
|
|
|
} |
8034
|
|
|
|
|
|
|
|
8035
|
|
|
|
|
|
|
/* Constants for mantissa extraction */ |
8036
|
|
|
|
|
|
|
typedef enum |
8037
|
|
|
|
|
|
|
{ |
8038
|
|
|
|
|
|
|
_MM_MANT_NORM_1_2, /* interval [1, 2) */ |
8039
|
|
|
|
|
|
|
_MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ |
8040
|
|
|
|
|
|
|
_MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ |
8041
|
|
|
|
|
|
|
_MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ |
8042
|
|
|
|
|
|
|
} _MM_MANTISSA_NORM_ENUM; |
8043
|
|
|
|
|
|
|
|
8044
|
|
|
|
|
|
|
typedef enum |
8045
|
|
|
|
|
|
|
{ |
8046
|
|
|
|
|
|
|
_MM_MANT_SIGN_src, /* sign = sign(SRC) */ |
8047
|
|
|
|
|
|
|
_MM_MANT_SIGN_zero, /* sign = 0 */ |
8048
|
|
|
|
|
|
|
_MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ |
8049
|
|
|
|
|
|
|
} _MM_MANTISSA_SIGN_ENUM; |
8050
|
|
|
|
|
|
|
|
8051
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
8052
|
|
|
|
|
|
|
extern __inline __m128 |
8053
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8054
|
|
|
|
|
|
|
_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R) |
8055
|
|
|
|
|
|
|
{ |
8056
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, |
8057
|
|
|
|
|
|
|
(__v4sf) __B, |
8058
|
|
|
|
|
|
|
__R); |
8059
|
|
|
|
|
|
|
} |
8060
|
|
|
|
|
|
|
|
8061
|
|
|
|
|
|
|
extern __inline __m128d |
8062
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8063
|
|
|
|
|
|
|
_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R) |
8064
|
|
|
|
|
|
|
{ |
8065
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, |
8066
|
|
|
|
|
|
|
(__v2df) __B, |
8067
|
|
|
|
|
|
|
__R); |
8068
|
|
|
|
|
|
|
} |
8069
|
|
|
|
|
|
|
|
8070
|
|
|
|
|
|
|
extern __inline __m512 |
8071
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8072
|
|
|
|
|
|
|
_mm512_getexp_round_ps (__m512 __A, const int __R) |
8073
|
|
|
|
|
|
|
{ |
8074
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
8075
|
|
|
|
|
|
|
(__v16sf) |
8076
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
8077
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
8078
|
|
|
|
|
|
|
} |
8079
|
|
|
|
|
|
|
|
8080
|
|
|
|
|
|
|
extern __inline __m512 |
8081
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8082
|
|
|
|
|
|
|
_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
8083
|
|
|
|
|
|
|
const int __R) |
8084
|
|
|
|
|
|
|
{ |
8085
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
8086
|
|
|
|
|
|
|
(__v16sf) __W, |
8087
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
8088
|
|
|
|
|
|
|
} |
8089
|
|
|
|
|
|
|
|
8090
|
|
|
|
|
|
|
extern __inline __m512 |
8091
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8092
|
|
|
|
|
|
|
_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) |
8093
|
|
|
|
|
|
|
{ |
8094
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
8095
|
|
|
|
|
|
|
(__v16sf) |
8096
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
8097
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
8098
|
|
|
|
|
|
|
} |
8099
|
|
|
|
|
|
|
|
8100
|
|
|
|
|
|
|
extern __inline __m512d |
8101
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8102
|
|
|
|
|
|
|
_mm512_getexp_round_pd (__m512d __A, const int __R) |
8103
|
|
|
|
|
|
|
{ |
8104
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
8105
|
|
|
|
|
|
|
(__v8df) |
8106
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
8107
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
8108
|
|
|
|
|
|
|
} |
8109
|
|
|
|
|
|
|
|
8110
|
|
|
|
|
|
|
extern __inline __m512d |
8111
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8112
|
|
|
|
|
|
|
_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
8113
|
|
|
|
|
|
|
const int __R) |
8114
|
|
|
|
|
|
|
{ |
8115
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
8116
|
|
|
|
|
|
|
(__v8df) __W, |
8117
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
8118
|
|
|
|
|
|
|
} |
8119
|
|
|
|
|
|
|
|
8120
|
|
|
|
|
|
|
extern __inline __m512d |
8121
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8122
|
|
|
|
|
|
|
_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) |
8123
|
|
|
|
|
|
|
{ |
8124
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
8125
|
|
|
|
|
|
|
(__v8df) |
8126
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
8127
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
8128
|
|
|
|
|
|
|
} |
8129
|
|
|
|
|
|
|
|
8130
|
|
|
|
|
|
|
extern __inline __m512d |
8131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8132
|
|
|
|
|
|
|
_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, |
8133
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8134
|
|
|
|
|
|
|
{ |
8135
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
8136
|
|
|
|
|
|
|
(__C << 2) | __B, |
8137
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
8138
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
8139
|
|
|
|
|
|
|
} |
8140
|
|
|
|
|
|
|
|
8141
|
|
|
|
|
|
|
extern __inline __m512d |
8142
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8143
|
|
|
|
|
|
|
_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
8144
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, |
8145
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8146
|
|
|
|
|
|
|
{ |
8147
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
8148
|
|
|
|
|
|
|
(__C << 2) | __B, |
8149
|
|
|
|
|
|
|
(__v8df) __W, __U, |
8150
|
|
|
|
|
|
|
__R); |
8151
|
|
|
|
|
|
|
} |
8152
|
|
|
|
|
|
|
|
8153
|
|
|
|
|
|
|
extern __inline __m512d |
8154
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8155
|
|
|
|
|
|
|
_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, |
8156
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, |
8157
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8158
|
|
|
|
|
|
|
{ |
8159
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
8160
|
|
|
|
|
|
|
(__C << 2) | __B, |
8161
|
|
|
|
|
|
|
(__v8df) |
8162
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
8163
|
|
|
|
|
|
|
__U, __R); |
8164
|
|
|
|
|
|
|
} |
8165
|
|
|
|
|
|
|
|
8166
|
|
|
|
|
|
|
extern __inline __m512 |
8167
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8168
|
|
|
|
|
|
|
_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, |
8169
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8170
|
|
|
|
|
|
|
{ |
8171
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
8172
|
|
|
|
|
|
|
(__C << 2) | __B, |
8173
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
8174
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
8175
|
|
|
|
|
|
|
} |
8176
|
|
|
|
|
|
|
|
8177
|
|
|
|
|
|
|
extern __inline __m512 |
8178
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8179
|
|
|
|
|
|
|
_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
8180
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, |
8181
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8182
|
|
|
|
|
|
|
{ |
8183
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
8184
|
|
|
|
|
|
|
(__C << 2) | __B, |
8185
|
|
|
|
|
|
|
(__v16sf) __W, __U, |
8186
|
|
|
|
|
|
|
__R); |
8187
|
|
|
|
|
|
|
} |
8188
|
|
|
|
|
|
|
|
8189
|
|
|
|
|
|
|
extern __inline __m512 |
8190
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8191
|
|
|
|
|
|
|
_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, |
8192
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, |
8193
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C, const int __R) |
8194
|
|
|
|
|
|
|
{ |
8195
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
8196
|
|
|
|
|
|
|
(__C << 2) | __B, |
8197
|
|
|
|
|
|
|
(__v16sf) |
8198
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
8199
|
|
|
|
|
|
|
__U, __R); |
8200
|
|
|
|
|
|
|
} |
8201
|
|
|
|
|
|
|
|
8202
|
|
|
|
|
|
|
extern __inline __m128d |
8203
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8204
|
|
|
|
|
|
|
_mm_getmant_round_sd (__m128d __A, __m128d __B, |
8205
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __C, |
8206
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __D, const int __R) |
8207
|
|
|
|
|
|
|
{ |
8208
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, |
8209
|
|
|
|
|
|
|
(__v2df) __B, |
8210
|
|
|
|
|
|
|
(__D << 2) | __C, |
8211
|
|
|
|
|
|
|
__R); |
8212
|
|
|
|
|
|
|
} |
8213
|
|
|
|
|
|
|
|
8214
|
|
|
|
|
|
|
extern __inline __m128 |
8215
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8216
|
|
|
|
|
|
|
_mm_getmant_round_ss (__m128 __A, __m128 __B, |
8217
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __C, |
8218
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __D, const int __R) |
8219
|
|
|
|
|
|
|
{ |
8220
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, |
8221
|
|
|
|
|
|
|
(__v4sf) __B, |
8222
|
|
|
|
|
|
|
(__D << 2) | __C, |
8223
|
|
|
|
|
|
|
__R); |
8224
|
|
|
|
|
|
|
} |
8225
|
|
|
|
|
|
|
|
8226
|
|
|
|
|
|
|
#else |
8227
|
|
|
|
|
|
|
#define _mm512_getmant_round_pd(X, B, C, R) \ |
8228
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
8229
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8230
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_undefined_pd(), \ |
8231
|
|
|
|
|
|
|
(__mmask8)-1,\ |
8232
|
|
|
|
|
|
|
(R))) |
8233
|
|
|
|
|
|
|
|
8234
|
|
|
|
|
|
|
#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ |
8235
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
8236
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8237
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), \ |
8238
|
|
|
|
|
|
|
(__mmask8)(U),\ |
8239
|
|
|
|
|
|
|
(R))) |
8240
|
|
|
|
|
|
|
|
8241
|
|
|
|
|
|
|
#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ |
8242
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
8243
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8244
|
|
|
|
|
|
|
(__v8df)(__m512d)_mm512_setzero_pd(), \ |
8245
|
|
|
|
|
|
|
(__mmask8)(U),\ |
8246
|
|
|
|
|
|
|
(R))) |
8247
|
|
|
|
|
|
|
#define _mm512_getmant_round_ps(X, B, C, R) \ |
8248
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
8249
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8250
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_undefined_ps(), \ |
8251
|
|
|
|
|
|
|
(__mmask16)-1,\ |
8252
|
|
|
|
|
|
|
(R))) |
8253
|
|
|
|
|
|
|
|
8254
|
|
|
|
|
|
|
#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ |
8255
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
8256
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8257
|
|
|
|
|
|
|
(__v16sf)(__m512)(W), \ |
8258
|
|
|
|
|
|
|
(__mmask16)(U),\ |
8259
|
|
|
|
|
|
|
(R))) |
8260
|
|
|
|
|
|
|
|
8261
|
|
|
|
|
|
|
#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ |
8262
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
8263
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
8264
|
|
|
|
|
|
|
(__v16sf)(__m512)_mm512_setzero_ps(), \ |
8265
|
|
|
|
|
|
|
(__mmask16)(U),\ |
8266
|
|
|
|
|
|
|
(R))) |
8267
|
|
|
|
|
|
|
#define _mm_getmant_round_sd(X, Y, C, D, R) \ |
8268
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ |
8269
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), \ |
8270
|
|
|
|
|
|
|
(int)(((D)<<2) | (C)), \ |
8271
|
|
|
|
|
|
|
(R))) |
8272
|
|
|
|
|
|
|
|
8273
|
|
|
|
|
|
|
#define _mm_getmant_round_ss(X, Y, C, D, R) \ |
8274
|
|
|
|
|
|
|
((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ |
8275
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), \ |
8276
|
|
|
|
|
|
|
(int)(((D)<<2) | (C)), \ |
8277
|
|
|
|
|
|
|
(R))) |
8278
|
|
|
|
|
|
|
|
8279
|
|
|
|
|
|
|
#define _mm_getexp_round_ss(A, B, R) \ |
8280
|
|
|
|
|
|
|
((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R)) |
8281
|
|
|
|
|
|
|
|
8282
|
|
|
|
|
|
|
#define _mm_getexp_round_sd(A, B, R) \ |
8283
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R)) |
8284
|
|
|
|
|
|
|
|
8285
|
|
|
|
|
|
|
#define _mm512_getexp_round_ps(A, R) \ |
8286
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
8287
|
|
|
|
|
|
|
(__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R)) |
8288
|
|
|
|
|
|
|
|
8289
|
|
|
|
|
|
|
#define _mm512_mask_getexp_round_ps(W, U, A, R) \ |
8290
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
8291
|
|
|
|
|
|
|
(__v16sf)(__m512)(W), (__mmask16)(U), R)) |
8292
|
|
|
|
|
|
|
|
8293
|
|
|
|
|
|
|
#define _mm512_maskz_getexp_round_ps(U, A, R) \ |
8294
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
8295
|
|
|
|
|
|
|
(__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) |
8296
|
|
|
|
|
|
|
|
8297
|
|
|
|
|
|
|
#define _mm512_getexp_round_pd(A, R) \ |
8298
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
8299
|
|
|
|
|
|
|
(__v8df)_mm512_undefined_pd(), (__mmask8)-1, R)) |
8300
|
|
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
#define _mm512_mask_getexp_round_pd(W, U, A, R) \ |
8302
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
8303
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), (__mmask8)(U), R)) |
8304
|
|
|
|
|
|
|
|
8305
|
|
|
|
|
|
|
#define _mm512_maskz_getexp_round_pd(U, A, R) \ |
8306
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
8307
|
|
|
|
|
|
|
(__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) |
8308
|
|
|
|
|
|
|
#endif |
8309
|
|
|
|
|
|
|
|
8310
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
8311
|
|
|
|
|
|
|
extern __inline __m512 |
8312
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8313
|
|
|
|
|
|
|
_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) |
8314
|
|
|
|
|
|
|
{ |
8315
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, |
8316
|
|
|
|
|
|
|
(__v16sf) |
8317
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
8318
|
|
|
|
|
|
|
-1, __R); |
8319
|
|
|
|
|
|
|
} |
8320
|
|
|
|
|
|
|
|
8321
|
|
|
|
|
|
|
extern __inline __m512 |
8322
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8323
|
|
|
|
|
|
|
_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, |
8324
|
|
|
|
|
|
|
const int __imm, const int __R) |
8325
|
|
|
|
|
|
|
{ |
8326
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, |
8327
|
|
|
|
|
|
|
(__v16sf) __A, |
8328
|
|
|
|
|
|
|
(__mmask16) __B, __R); |
8329
|
|
|
|
|
|
|
} |
8330
|
|
|
|
|
|
|
|
8331
|
|
|
|
|
|
|
extern __inline __m512 |
8332
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8333
|
|
|
|
|
|
|
_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, |
8334
|
|
|
|
|
|
|
const int __imm, const int __R) |
8335
|
|
|
|
|
|
|
{ |
8336
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, |
8337
|
|
|
|
|
|
|
__imm, |
8338
|
|
|
|
|
|
|
(__v16sf) |
8339
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
8340
|
|
|
|
|
|
|
(__mmask16) __A, __R); |
8341
|
|
|
|
|
|
|
} |
8342
|
|
|
|
|
|
|
|
8343
|
|
|
|
|
|
|
extern __inline __m512d |
8344
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8345
|
|
|
|
|
|
|
_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) |
8346
|
|
|
|
|
|
|
{ |
8347
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, |
8348
|
|
|
|
|
|
|
(__v8df) |
8349
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
8350
|
|
|
|
|
|
|
-1, __R); |
8351
|
|
|
|
|
|
|
} |
8352
|
|
|
|
|
|
|
|
8353
|
|
|
|
|
|
|
extern __inline __m512d |
8354
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8355
|
|
|
|
|
|
|
_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, |
8356
|
|
|
|
|
|
|
__m512d __C, const int __imm, const int __R) |
8357
|
|
|
|
|
|
|
{ |
8358
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, |
8359
|
|
|
|
|
|
|
(__v8df) __A, |
8360
|
|
|
|
|
|
|
(__mmask8) __B, __R); |
8361
|
|
|
|
|
|
|
} |
8362
|
|
|
|
|
|
|
|
8363
|
|
|
|
|
|
|
extern __inline __m512d |
8364
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8365
|
|
|
|
|
|
|
_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, |
8366
|
|
|
|
|
|
|
const int __imm, const int __R) |
8367
|
|
|
|
|
|
|
{ |
8368
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, |
8369
|
|
|
|
|
|
|
__imm, |
8370
|
|
|
|
|
|
|
(__v8df) |
8371
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
8372
|
|
|
|
|
|
|
(__mmask8) __A, __R); |
8373
|
|
|
|
|
|
|
} |
8374
|
|
|
|
|
|
|
|
8375
|
|
|
|
|
|
|
extern __inline __m128 |
8376
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8377
|
|
|
|
|
|
|
_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R) |
8378
|
|
|
|
|
|
|
{ |
8379
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, |
8380
|
|
|
|
|
|
|
(__v4sf) __B, __imm, __R); |
8381
|
|
|
|
|
|
|
} |
8382
|
|
|
|
|
|
|
|
8383
|
|
|
|
|
|
|
extern __inline __m128d |
8384
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8385
|
|
|
|
|
|
|
_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, |
8386
|
|
|
|
|
|
|
const int __R) |
8387
|
|
|
|
|
|
|
{ |
8388
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, |
8389
|
|
|
|
|
|
|
(__v2df) __B, __imm, __R); |
8390
|
|
|
|
|
|
|
} |
8391
|
|
|
|
|
|
|
|
8392
|
|
|
|
|
|
|
#else |
8393
|
|
|
|
|
|
|
#define _mm512_roundscale_round_ps(A, B, R) \ |
8394
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ |
8395
|
|
|
|
|
|
|
(__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R)) |
8396
|
|
|
|
|
|
|
#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ |
8397
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ |
8398
|
|
|
|
|
|
|
(int)(D), \ |
8399
|
|
|
|
|
|
|
(__v16sf)(__m512)(A), \ |
8400
|
|
|
|
|
|
|
(__mmask16)(B), R)) |
8401
|
|
|
|
|
|
|
#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ |
8402
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ |
8403
|
|
|
|
|
|
|
(int)(C), \ |
8404
|
|
|
|
|
|
|
(__v16sf)_mm512_setzero_ps(),\ |
8405
|
|
|
|
|
|
|
(__mmask16)(A), R)) |
8406
|
|
|
|
|
|
|
#define _mm512_roundscale_round_pd(A, B, R) \ |
8407
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ |
8408
|
|
|
|
|
|
|
(__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R)) |
8409
|
|
|
|
|
|
|
#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ |
8410
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ |
8411
|
|
|
|
|
|
|
(int)(D), \ |
8412
|
|
|
|
|
|
|
(__v8df)(__m512d)(A), \ |
8413
|
|
|
|
|
|
|
(__mmask8)(B), R)) |
8414
|
|
|
|
|
|
|
#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ |
8415
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ |
8416
|
|
|
|
|
|
|
(int)(C), \ |
8417
|
|
|
|
|
|
|
(__v8df)_mm512_setzero_pd(),\ |
8418
|
|
|
|
|
|
|
(__mmask8)(A), R)) |
8419
|
|
|
|
|
|
|
#define _mm_roundscale_round_ss(A, B, C, R) \ |
8420
|
|
|
|
|
|
|
((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ |
8421
|
|
|
|
|
|
|
(__v4sf)(__m128)(B), (int)(C), R)) |
8422
|
|
|
|
|
|
|
#define _mm_roundscale_round_sd(A, B, C, R) \ |
8423
|
|
|
|
|
|
|
((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ |
8424
|
|
|
|
|
|
|
(__v2df)(__m128d)(B), (int)(C), R)) |
8425
|
|
|
|
|
|
|
#endif |
8426
|
|
|
|
|
|
|
|
8427
|
|
|
|
|
|
|
extern __inline __m512 |
8428
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8429
|
|
|
|
|
|
|
_mm512_floor_ps (__m512 __A) |
8430
|
|
|
|
|
|
|
{ |
8431
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
8432
|
|
|
|
|
|
|
_MM_FROUND_FLOOR, |
8433
|
|
|
|
|
|
|
(__v16sf) __A, -1, |
8434
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8435
|
|
|
|
|
|
|
} |
8436
|
|
|
|
|
|
|
|
8437
|
|
|
|
|
|
|
extern __inline __m512d |
8438
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8439
|
|
|
|
|
|
|
_mm512_floor_pd (__m512d __A) |
8440
|
|
|
|
|
|
|
{ |
8441
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
8442
|
|
|
|
|
|
|
_MM_FROUND_FLOOR, |
8443
|
|
|
|
|
|
|
(__v8df) __A, -1, |
8444
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8445
|
|
|
|
|
|
|
} |
8446
|
|
|
|
|
|
|
|
8447
|
|
|
|
|
|
|
extern __inline __m512 |
8448
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8449
|
|
|
|
|
|
|
_mm512_ceil_ps (__m512 __A) |
8450
|
|
|
|
|
|
|
{ |
8451
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
8452
|
|
|
|
|
|
|
_MM_FROUND_CEIL, |
8453
|
|
|
|
|
|
|
(__v16sf) __A, -1, |
8454
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8455
|
|
|
|
|
|
|
} |
8456
|
|
|
|
|
|
|
|
8457
|
|
|
|
|
|
|
extern __inline __m512d |
8458
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8459
|
|
|
|
|
|
|
_mm512_ceil_pd (__m512d __A) |
8460
|
|
|
|
|
|
|
{ |
8461
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
8462
|
|
|
|
|
|
|
_MM_FROUND_CEIL, |
8463
|
|
|
|
|
|
|
(__v8df) __A, -1, |
8464
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8465
|
|
|
|
|
|
|
} |
8466
|
|
|
|
|
|
|
|
8467
|
|
|
|
|
|
|
extern __inline __m512 |
8468
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8469
|
|
|
|
|
|
|
_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) |
8470
|
|
|
|
|
|
|
{ |
8471
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
8472
|
|
|
|
|
|
|
_MM_FROUND_FLOOR, |
8473
|
|
|
|
|
|
|
(__v16sf) __W, __U, |
8474
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8475
|
|
|
|
|
|
|
} |
8476
|
|
|
|
|
|
|
|
8477
|
|
|
|
|
|
|
extern __inline __m512d |
8478
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8479
|
|
|
|
|
|
|
_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) |
8480
|
|
|
|
|
|
|
{ |
8481
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
8482
|
|
|
|
|
|
|
_MM_FROUND_FLOOR, |
8483
|
|
|
|
|
|
|
(__v8df) __W, __U, |
8484
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8485
|
|
|
|
|
|
|
} |
8486
|
|
|
|
|
|
|
|
8487
|
|
|
|
|
|
|
extern __inline __m512 |
8488
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8489
|
|
|
|
|
|
|
_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) |
8490
|
|
|
|
|
|
|
{ |
8491
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
8492
|
|
|
|
|
|
|
_MM_FROUND_CEIL, |
8493
|
|
|
|
|
|
|
(__v16sf) __W, __U, |
8494
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8495
|
|
|
|
|
|
|
} |
8496
|
|
|
|
|
|
|
|
8497
|
|
|
|
|
|
|
extern __inline __m512d |
8498
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8499
|
|
|
|
|
|
|
_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) |
8500
|
|
|
|
|
|
|
{ |
8501
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
8502
|
|
|
|
|
|
|
_MM_FROUND_CEIL, |
8503
|
|
|
|
|
|
|
(__v8df) __W, __U, |
8504
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
8505
|
|
|
|
|
|
|
} |
8506
|
|
|
|
|
|
|
|
8507
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
8508
|
|
|
|
|
|
|
extern __inline __m512i |
8509
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8510
|
|
|
|
|
|
|
_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) |
8511
|
|
|
|
|
|
|
{ |
8512
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, |
8513
|
|
|
|
|
|
|
(__v16si) __B, __imm, |
8514
|
|
|
|
|
|
|
(__v16si) |
8515
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
8516
|
|
|
|
|
|
|
(__mmask16) -1); |
8517
|
|
|
|
|
|
|
} |
8518
|
|
|
|
|
|
|
|
8519
|
|
|
|
|
|
|
extern __inline __m512i |
8520
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8521
|
|
|
|
|
|
|
_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
8522
|
|
|
|
|
|
|
__m512i __B, const int __imm) |
8523
|
|
|
|
|
|
|
{ |
8524
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, |
8525
|
|
|
|
|
|
|
(__v16si) __B, __imm, |
8526
|
|
|
|
|
|
|
(__v16si) __W, |
8527
|
|
|
|
|
|
|
(__mmask16) __U); |
8528
|
|
|
|
|
|
|
} |
8529
|
|
|
|
|
|
|
|
8530
|
|
|
|
|
|
|
extern __inline __m512i |
8531
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8532
|
|
|
|
|
|
|
_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, |
8533
|
|
|
|
|
|
|
const int __imm) |
8534
|
|
|
|
|
|
|
{ |
8535
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, |
8536
|
|
|
|
|
|
|
(__v16si) __B, __imm, |
8537
|
|
|
|
|
|
|
(__v16si) |
8538
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
8539
|
|
|
|
|
|
|
(__mmask16) __U); |
8540
|
|
|
|
|
|
|
} |
8541
|
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
extern __inline __m512i |
8543
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8544
|
|
|
|
|
|
|
_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) |
8545
|
|
|
|
|
|
|
{ |
8546
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, |
8547
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
8548
|
|
|
|
|
|
|
(__v8di) |
8549
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
8550
|
|
|
|
|
|
|
(__mmask8) -1); |
8551
|
|
|
|
|
|
|
} |
8552
|
|
|
|
|
|
|
|
8553
|
|
|
|
|
|
|
extern __inline __m512i |
8554
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8555
|
|
|
|
|
|
|
_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
8556
|
|
|
|
|
|
|
__m512i __B, const int __imm) |
8557
|
|
|
|
|
|
|
{ |
8558
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, |
8559
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
8560
|
|
|
|
|
|
|
(__v8di) __W, |
8561
|
|
|
|
|
|
|
(__mmask8) __U); |
8562
|
|
|
|
|
|
|
} |
8563
|
|
|
|
|
|
|
|
8564
|
|
|
|
|
|
|
extern __inline __m512i |
8565
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8566
|
|
|
|
|
|
|
_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, |
8567
|
|
|
|
|
|
|
const int __imm) |
8568
|
|
|
|
|
|
|
{ |
8569
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, |
8570
|
|
|
|
|
|
|
(__v8di) __B, __imm, |
8571
|
|
|
|
|
|
|
(__v8di) |
8572
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
8573
|
|
|
|
|
|
|
(__mmask8) __U); |
8574
|
|
|
|
|
|
|
} |
8575
|
|
|
|
|
|
|
#else |
8576
|
|
|
|
|
|
|
#define _mm512_alignr_epi32(X, Y, C) \ |
8577
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ |
8578
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\ |
8579
|
|
|
|
|
|
|
(__mmask16)-1)) |
8580
|
|
|
|
|
|
|
|
8581
|
|
|
|
|
|
|
#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ |
8582
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ |
8583
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ |
8584
|
|
|
|
|
|
|
(__mmask16)(U))) |
8585
|
|
|
|
|
|
|
|
8586
|
|
|
|
|
|
|
#define _mm512_maskz_alignr_epi32(U, X, Y, C) \ |
8587
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ |
8588
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\ |
8589
|
|
|
|
|
|
|
(__mmask16)(U))) |
8590
|
|
|
|
|
|
|
|
8591
|
|
|
|
|
|
|
#define _mm512_alignr_epi64(X, Y, C) \ |
8592
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ |
8593
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \ |
8594
|
|
|
|
|
|
|
(__mmask8)-1)) |
8595
|
|
|
|
|
|
|
|
8596
|
|
|
|
|
|
|
#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ |
8597
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ |
8598
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) |
8599
|
|
|
|
|
|
|
|
8600
|
|
|
|
|
|
|
#define _mm512_maskz_alignr_epi64(U, X, Y, C) \ |
8601
|
|
|
|
|
|
|
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ |
8602
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\ |
8603
|
|
|
|
|
|
|
(__mmask8)(U))) |
8604
|
|
|
|
|
|
|
#endif |
8605
|
|
|
|
|
|
|
|
8606
|
|
|
|
|
|
|
extern __inline __mmask16 |
8607
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8608
|
|
|
|
|
|
|
_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) |
8609
|
|
|
|
|
|
|
{ |
8610
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, |
8611
|
|
|
|
|
|
|
(__v16si) __B, |
8612
|
|
|
|
|
|
|
(__mmask16) -1); |
8613
|
|
|
|
|
|
|
} |
8614
|
|
|
|
|
|
|
|
8615
|
|
|
|
|
|
|
extern __inline __mmask16 |
8616
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8617
|
|
|
|
|
|
|
_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
8618
|
|
|
|
|
|
|
{ |
8619
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, |
8620
|
|
|
|
|
|
|
(__v16si) __B, __U); |
8621
|
|
|
|
|
|
|
} |
8622
|
|
|
|
|
|
|
|
8623
|
|
|
|
|
|
|
extern __inline __mmask8 |
8624
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8625
|
|
|
|
|
|
|
_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
8626
|
|
|
|
|
|
|
{ |
8627
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, |
8628
|
|
|
|
|
|
|
(__v8di) __B, __U); |
8629
|
|
|
|
|
|
|
} |
8630
|
|
|
|
|
|
|
|
8631
|
|
|
|
|
|
|
extern __inline __mmask8 |
8632
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8633
|
|
|
|
|
|
|
_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) |
8634
|
|
|
|
|
|
|
{ |
8635
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, |
8636
|
|
|
|
|
|
|
(__v8di) __B, |
8637
|
|
|
|
|
|
|
(__mmask8) -1); |
8638
|
|
|
|
|
|
|
} |
8639
|
|
|
|
|
|
|
|
8640
|
|
|
|
|
|
|
extern __inline __mmask16 |
8641
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8642
|
|
|
|
|
|
|
_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) |
8643
|
|
|
|
|
|
|
{ |
8644
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, |
8645
|
|
|
|
|
|
|
(__v16si) __B, |
8646
|
|
|
|
|
|
|
(__mmask16) -1); |
8647
|
|
|
|
|
|
|
} |
8648
|
|
|
|
|
|
|
|
8649
|
|
|
|
|
|
|
extern __inline __mmask16 |
8650
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8651
|
|
|
|
|
|
|
_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
8652
|
|
|
|
|
|
|
{ |
8653
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, |
8654
|
|
|
|
|
|
|
(__v16si) __B, __U); |
8655
|
|
|
|
|
|
|
} |
8656
|
|
|
|
|
|
|
|
8657
|
|
|
|
|
|
|
extern __inline __mmask8 |
8658
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8659
|
|
|
|
|
|
|
_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
8660
|
|
|
|
|
|
|
{ |
8661
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, |
8662
|
|
|
|
|
|
|
(__v8di) __B, __U); |
8663
|
|
|
|
|
|
|
} |
8664
|
|
|
|
|
|
|
|
8665
|
|
|
|
|
|
|
extern __inline __mmask8 |
8666
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8667
|
|
|
|
|
|
|
_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) |
8668
|
|
|
|
|
|
|
{ |
8669
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, |
8670
|
|
|
|
|
|
|
(__v8di) __B, |
8671
|
|
|
|
|
|
|
(__mmask8) -1); |
8672
|
|
|
|
|
|
|
} |
8673
|
|
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
extern __inline __mmask16 |
8675
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8676
|
|
|
|
|
|
|
_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y) |
8677
|
|
|
|
|
|
|
{ |
8678
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8679
|
|
|
|
|
|
|
(__v16si) __Y, 5, |
8680
|
|
|
|
|
|
|
(__mmask16) -1); |
8681
|
|
|
|
|
|
|
} |
8682
|
|
|
|
|
|
|
|
8683
|
|
|
|
|
|
|
extern __inline __mmask16 |
8684
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8685
|
|
|
|
|
|
|
_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8686
|
|
|
|
|
|
|
{ |
8687
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8688
|
|
|
|
|
|
|
(__v16si) __Y, 5, |
8689
|
|
|
|
|
|
|
(__mmask16) __M); |
8690
|
|
|
|
|
|
|
} |
8691
|
|
|
|
|
|
|
|
8692
|
|
|
|
|
|
|
extern __inline __mmask16 |
8693
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8694
|
|
|
|
|
|
|
_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8695
|
|
|
|
|
|
|
{ |
8696
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8697
|
|
|
|
|
|
|
(__v16si) __Y, 5, |
8698
|
|
|
|
|
|
|
(__mmask16) __M); |
8699
|
|
|
|
|
|
|
} |
8700
|
|
|
|
|
|
|
|
8701
|
|
|
|
|
|
|
extern __inline __mmask16 |
8702
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8703
|
|
|
|
|
|
|
_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y) |
8704
|
|
|
|
|
|
|
{ |
8705
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8706
|
|
|
|
|
|
|
(__v16si) __Y, 5, |
8707
|
|
|
|
|
|
|
(__mmask16) -1); |
8708
|
|
|
|
|
|
|
} |
8709
|
|
|
|
|
|
|
|
8710
|
|
|
|
|
|
|
extern __inline __mmask8 |
8711
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8712
|
|
|
|
|
|
|
_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8713
|
|
|
|
|
|
|
{ |
8714
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8715
|
|
|
|
|
|
|
(__v8di) __Y, 5, |
8716
|
|
|
|
|
|
|
(__mmask8) __M); |
8717
|
|
|
|
|
|
|
} |
8718
|
|
|
|
|
|
|
|
8719
|
|
|
|
|
|
|
extern __inline __mmask8 |
8720
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8721
|
|
|
|
|
|
|
_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y) |
8722
|
|
|
|
|
|
|
{ |
8723
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8724
|
|
|
|
|
|
|
(__v8di) __Y, 5, |
8725
|
|
|
|
|
|
|
(__mmask8) -1); |
8726
|
|
|
|
|
|
|
} |
8727
|
|
|
|
|
|
|
|
8728
|
|
|
|
|
|
|
extern __inline __mmask8 |
8729
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8730
|
|
|
|
|
|
|
_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8731
|
|
|
|
|
|
|
{ |
8732
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8733
|
|
|
|
|
|
|
(__v8di) __Y, 5, |
8734
|
|
|
|
|
|
|
(__mmask8) __M); |
8735
|
|
|
|
|
|
|
} |
8736
|
|
|
|
|
|
|
|
8737
|
|
|
|
|
|
|
extern __inline __mmask8 |
8738
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8739
|
|
|
|
|
|
|
_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y) |
8740
|
|
|
|
|
|
|
{ |
8741
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8742
|
|
|
|
|
|
|
(__v8di) __Y, 5, |
8743
|
|
|
|
|
|
|
(__mmask8) -1); |
8744
|
|
|
|
|
|
|
} |
8745
|
|
|
|
|
|
|
|
8746
|
|
|
|
|
|
|
extern __inline __mmask16 |
8747
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8748
|
|
|
|
|
|
|
_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8749
|
|
|
|
|
|
|
{ |
8750
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8751
|
|
|
|
|
|
|
(__v16si) __Y, 2, |
8752
|
|
|
|
|
|
|
(__mmask16) __M); |
8753
|
|
|
|
|
|
|
} |
8754
|
|
|
|
|
|
|
|
8755
|
|
|
|
|
|
|
extern __inline __mmask16 |
8756
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8757
|
|
|
|
|
|
|
_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y) |
8758
|
|
|
|
|
|
|
{ |
8759
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8760
|
|
|
|
|
|
|
(__v16si) __Y, 2, |
8761
|
|
|
|
|
|
|
(__mmask16) -1); |
8762
|
|
|
|
|
|
|
} |
8763
|
|
|
|
|
|
|
|
8764
|
|
|
|
|
|
|
extern __inline __mmask16 |
8765
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8766
|
|
|
|
|
|
|
_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8767
|
|
|
|
|
|
|
{ |
8768
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8769
|
|
|
|
|
|
|
(__v16si) __Y, 2, |
8770
|
|
|
|
|
|
|
(__mmask16) __M); |
8771
|
|
|
|
|
|
|
} |
8772
|
|
|
|
|
|
|
|
8773
|
|
|
|
|
|
|
extern __inline __mmask16 |
8774
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8775
|
|
|
|
|
|
|
_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y) |
8776
|
|
|
|
|
|
|
{ |
8777
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8778
|
|
|
|
|
|
|
(__v16si) __Y, 2, |
8779
|
|
|
|
|
|
|
(__mmask16) -1); |
8780
|
|
|
|
|
|
|
} |
8781
|
|
|
|
|
|
|
|
8782
|
|
|
|
|
|
|
extern __inline __mmask8 |
8783
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8784
|
|
|
|
|
|
|
_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8785
|
|
|
|
|
|
|
{ |
8786
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8787
|
|
|
|
|
|
|
(__v8di) __Y, 2, |
8788
|
|
|
|
|
|
|
(__mmask8) __M); |
8789
|
|
|
|
|
|
|
} |
8790
|
|
|
|
|
|
|
|
8791
|
|
|
|
|
|
|
extern __inline __mmask8 |
8792
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8793
|
|
|
|
|
|
|
_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y) |
8794
|
|
|
|
|
|
|
{ |
8795
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8796
|
|
|
|
|
|
|
(__v8di) __Y, 2, |
8797
|
|
|
|
|
|
|
(__mmask8) -1); |
8798
|
|
|
|
|
|
|
} |
8799
|
|
|
|
|
|
|
|
8800
|
|
|
|
|
|
|
extern __inline __mmask8 |
8801
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8802
|
|
|
|
|
|
|
_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8803
|
|
|
|
|
|
|
{ |
8804
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8805
|
|
|
|
|
|
|
(__v8di) __Y, 2, |
8806
|
|
|
|
|
|
|
(__mmask8) __M); |
8807
|
|
|
|
|
|
|
} |
8808
|
|
|
|
|
|
|
|
8809
|
|
|
|
|
|
|
extern __inline __mmask8 |
8810
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8811
|
|
|
|
|
|
|
_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y) |
8812
|
|
|
|
|
|
|
{ |
8813
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8814
|
|
|
|
|
|
|
(__v8di) __Y, 2, |
8815
|
|
|
|
|
|
|
(__mmask8) -1); |
8816
|
|
|
|
|
|
|
} |
8817
|
|
|
|
|
|
|
|
8818
|
|
|
|
|
|
|
extern __inline __mmask16 |
8819
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8820
|
|
|
|
|
|
|
_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8821
|
|
|
|
|
|
|
{ |
8822
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8823
|
|
|
|
|
|
|
(__v16si) __Y, 1, |
8824
|
|
|
|
|
|
|
(__mmask16) __M); |
8825
|
|
|
|
|
|
|
} |
8826
|
|
|
|
|
|
|
|
8827
|
|
|
|
|
|
|
extern __inline __mmask16 |
8828
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8829
|
|
|
|
|
|
|
_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y) |
8830
|
|
|
|
|
|
|
{ |
8831
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8832
|
|
|
|
|
|
|
(__v16si) __Y, 1, |
8833
|
|
|
|
|
|
|
(__mmask16) -1); |
8834
|
|
|
|
|
|
|
} |
8835
|
|
|
|
|
|
|
|
8836
|
|
|
|
|
|
|
extern __inline __mmask16 |
8837
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8838
|
|
|
|
|
|
|
_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8839
|
|
|
|
|
|
|
{ |
8840
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8841
|
|
|
|
|
|
|
(__v16si) __Y, 1, |
8842
|
|
|
|
|
|
|
(__mmask16) __M); |
8843
|
|
|
|
|
|
|
} |
8844
|
|
|
|
|
|
|
|
8845
|
|
|
|
|
|
|
extern __inline __mmask16 |
8846
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8847
|
|
|
|
|
|
|
_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y) |
8848
|
|
|
|
|
|
|
{ |
8849
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8850
|
|
|
|
|
|
|
(__v16si) __Y, 1, |
8851
|
|
|
|
|
|
|
(__mmask16) -1); |
8852
|
|
|
|
|
|
|
} |
8853
|
|
|
|
|
|
|
|
8854
|
|
|
|
|
|
|
extern __inline __mmask8 |
8855
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8856
|
|
|
|
|
|
|
_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8857
|
|
|
|
|
|
|
{ |
8858
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8859
|
|
|
|
|
|
|
(__v8di) __Y, 1, |
8860
|
|
|
|
|
|
|
(__mmask8) __M); |
8861
|
|
|
|
|
|
|
} |
8862
|
|
|
|
|
|
|
|
8863
|
|
|
|
|
|
|
extern __inline __mmask8 |
8864
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8865
|
|
|
|
|
|
|
_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y) |
8866
|
|
|
|
|
|
|
{ |
8867
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8868
|
|
|
|
|
|
|
(__v8di) __Y, 1, |
8869
|
|
|
|
|
|
|
(__mmask8) -1); |
8870
|
|
|
|
|
|
|
} |
8871
|
|
|
|
|
|
|
|
8872
|
|
|
|
|
|
|
extern __inline __mmask8 |
8873
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8874
|
|
|
|
|
|
|
_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8875
|
|
|
|
|
|
|
{ |
8876
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8877
|
|
|
|
|
|
|
(__v8di) __Y, 1, |
8878
|
|
|
|
|
|
|
(__mmask8) __M); |
8879
|
|
|
|
|
|
|
} |
8880
|
|
|
|
|
|
|
|
8881
|
|
|
|
|
|
|
extern __inline __mmask8 |
8882
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8883
|
|
|
|
|
|
|
_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y) |
8884
|
|
|
|
|
|
|
{ |
8885
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8886
|
|
|
|
|
|
|
(__v8di) __Y, 1, |
8887
|
|
|
|
|
|
|
(__mmask8) -1); |
8888
|
|
|
|
|
|
|
} |
8889
|
|
|
|
|
|
|
|
8890
|
|
|
|
|
|
|
extern __inline __mmask16 |
8891
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8892
|
|
|
|
|
|
|
_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y) |
8893
|
|
|
|
|
|
|
{ |
8894
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8895
|
|
|
|
|
|
|
(__v16si) __Y, 4, |
8896
|
|
|
|
|
|
|
(__mmask16) -1); |
8897
|
|
|
|
|
|
|
} |
8898
|
|
|
|
|
|
|
|
8899
|
|
|
|
|
|
|
extern __inline __mmask16 |
8900
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8901
|
|
|
|
|
|
|
_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8902
|
|
|
|
|
|
|
{ |
8903
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8904
|
|
|
|
|
|
|
(__v16si) __Y, 4, |
8905
|
|
|
|
|
|
|
(__mmask16) __M); |
8906
|
|
|
|
|
|
|
} |
8907
|
|
|
|
|
|
|
|
8908
|
|
|
|
|
|
|
extern __inline __mmask16 |
8909
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8910
|
|
|
|
|
|
|
_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8911
|
|
|
|
|
|
|
{ |
8912
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8913
|
|
|
|
|
|
|
(__v16si) __Y, 4, |
8914
|
|
|
|
|
|
|
(__mmask16) __M); |
8915
|
|
|
|
|
|
|
} |
8916
|
|
|
|
|
|
|
|
8917
|
|
|
|
|
|
|
extern __inline __mmask16 |
8918
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8919
|
|
|
|
|
|
|
_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) |
8920
|
|
|
|
|
|
|
{ |
8921
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
8922
|
|
|
|
|
|
|
(__v16si) __Y, 4, |
8923
|
|
|
|
|
|
|
(__mmask16) -1); |
8924
|
|
|
|
|
|
|
} |
8925
|
|
|
|
|
|
|
|
8926
|
|
|
|
|
|
|
extern __inline __mmask8 |
8927
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8928
|
|
|
|
|
|
|
_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y) |
8929
|
|
|
|
|
|
|
{ |
8930
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8931
|
|
|
|
|
|
|
(__v8di) __Y, 4, |
8932
|
|
|
|
|
|
|
(__mmask8) __M); |
8933
|
|
|
|
|
|
|
} |
8934
|
|
|
|
|
|
|
|
8935
|
|
|
|
|
|
|
extern __inline __mmask8 |
8936
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8937
|
|
|
|
|
|
|
_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y) |
8938
|
|
|
|
|
|
|
{ |
8939
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8940
|
|
|
|
|
|
|
(__v8di) __Y, 4, |
8941
|
|
|
|
|
|
|
(__mmask8) -1); |
8942
|
|
|
|
|
|
|
} |
8943
|
|
|
|
|
|
|
|
8944
|
|
|
|
|
|
|
extern __inline __mmask8 |
8945
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8946
|
|
|
|
|
|
|
_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) |
8947
|
|
|
|
|
|
|
{ |
8948
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8949
|
|
|
|
|
|
|
(__v8di) __Y, 4, |
8950
|
|
|
|
|
|
|
(__mmask8) __M); |
8951
|
|
|
|
|
|
|
} |
8952
|
|
|
|
|
|
|
|
8953
|
|
|
|
|
|
|
extern __inline __mmask8 |
8954
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8955
|
|
|
|
|
|
|
_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y) |
8956
|
|
|
|
|
|
|
{ |
8957
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8958
|
|
|
|
|
|
|
(__v8di) __Y, 4, |
8959
|
|
|
|
|
|
|
(__mmask8) -1); |
8960
|
|
|
|
|
|
|
} |
8961
|
|
|
|
|
|
|
|
8962
|
|
|
|
|
|
|
#define _MM_CMPINT_EQ 0x0 |
8963
|
|
|
|
|
|
|
#define _MM_CMPINT_LT 0x1 |
8964
|
|
|
|
|
|
|
#define _MM_CMPINT_LE 0x2 |
8965
|
|
|
|
|
|
|
#define _MM_CMPINT_UNUSED 0x3 |
8966
|
|
|
|
|
|
|
#define _MM_CMPINT_NE 0x4 |
8967
|
|
|
|
|
|
|
#define _MM_CMPINT_NLT 0x5 |
8968
|
|
|
|
|
|
|
#define _MM_CMPINT_GE 0x5 |
8969
|
|
|
|
|
|
|
#define _MM_CMPINT_NLE 0x6 |
8970
|
|
|
|
|
|
|
#define _MM_CMPINT_GT 0x6 |
8971
|
|
|
|
|
|
|
|
8972
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
8973
|
|
|
|
|
|
|
extern __inline __mmask8 |
8974
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8975
|
|
|
|
|
|
|
_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) |
8976
|
|
|
|
|
|
|
{ |
8977
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
8978
|
|
|
|
|
|
|
(__v8di) __Y, __P, |
8979
|
|
|
|
|
|
|
(__mmask8) -1); |
8980
|
|
|
|
|
|
|
} |
8981
|
|
|
|
|
|
|
|
8982
|
|
|
|
|
|
|
extern __inline __mmask16 |
8983
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8984
|
|
|
|
|
|
|
_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) |
8985
|
|
|
|
|
|
|
{ |
8986
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
8987
|
|
|
|
|
|
|
(__v16si) __Y, __P, |
8988
|
|
|
|
|
|
|
(__mmask16) -1); |
8989
|
|
|
|
|
|
|
} |
8990
|
|
|
|
|
|
|
|
8991
|
|
|
|
|
|
|
extern __inline __mmask8 |
8992
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
8993
|
|
|
|
|
|
|
_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) |
8994
|
|
|
|
|
|
|
{ |
8995
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
8996
|
|
|
|
|
|
|
(__v8di) __Y, __P, |
8997
|
|
|
|
|
|
|
(__mmask8) -1); |
8998
|
|
|
|
|
|
|
} |
8999
|
|
|
|
|
|
|
|
9000
|
|
|
|
|
|
|
extern __inline __mmask16 |
9001
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9002
|
|
|
|
|
|
|
_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) |
9003
|
|
|
|
|
|
|
{ |
9004
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
9005
|
|
|
|
|
|
|
(__v16si) __Y, __P, |
9006
|
|
|
|
|
|
|
(__mmask16) -1); |
9007
|
|
|
|
|
|
|
} |
9008
|
|
|
|
|
|
|
|
9009
|
|
|
|
|
|
|
extern __inline __mmask8 |
9010
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9011
|
|
|
|
|
|
|
_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, |
9012
|
|
|
|
|
|
|
const int __R) |
9013
|
|
|
|
|
|
|
{ |
9014
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, |
9015
|
|
|
|
|
|
|
(__v8df) __Y, __P, |
9016
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
9017
|
|
|
|
|
|
|
} |
9018
|
|
|
|
|
|
|
|
9019
|
|
|
|
|
|
|
extern __inline __mmask16 |
9020
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9021
|
|
|
|
|
|
|
_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) |
9022
|
|
|
|
|
|
|
{ |
9023
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, |
9024
|
|
|
|
|
|
|
(__v16sf) __Y, __P, |
9025
|
|
|
|
|
|
|
(__mmask16) -1, __R); |
9026
|
|
|
|
|
|
|
} |
9027
|
|
|
|
|
|
|
|
9028
|
|
|
|
|
|
|
extern __inline __mmask8 |
9029
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9030
|
|
|
|
|
|
|
_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, |
9031
|
|
|
|
|
|
|
const int __P) |
9032
|
|
|
|
|
|
|
{ |
9033
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, |
9034
|
|
|
|
|
|
|
(__v8di) __Y, __P, |
9035
|
|
|
|
|
|
|
(__mmask8) __U); |
9036
|
|
|
|
|
|
|
} |
9037
|
|
|
|
|
|
|
|
9038
|
|
|
|
|
|
|
extern __inline __mmask16 |
9039
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9040
|
|
|
|
|
|
|
_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, |
9041
|
|
|
|
|
|
|
const int __P) |
9042
|
|
|
|
|
|
|
{ |
9043
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, |
9044
|
|
|
|
|
|
|
(__v16si) __Y, __P, |
9045
|
|
|
|
|
|
|
(__mmask16) __U); |
9046
|
|
|
|
|
|
|
} |
9047
|
|
|
|
|
|
|
|
9048
|
|
|
|
|
|
|
extern __inline __mmask8 |
9049
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9050
|
|
|
|
|
|
|
_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, |
9051
|
|
|
|
|
|
|
const int __P) |
9052
|
|
|
|
|
|
|
{ |
9053
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, |
9054
|
|
|
|
|
|
|
(__v8di) __Y, __P, |
9055
|
|
|
|
|
|
|
(__mmask8) __U); |
9056
|
|
|
|
|
|
|
} |
9057
|
|
|
|
|
|
|
|
9058
|
|
|
|
|
|
|
extern __inline __mmask16 |
9059
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9060
|
|
|
|
|
|
|
_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, |
9061
|
|
|
|
|
|
|
const int __P) |
9062
|
|
|
|
|
|
|
{ |
9063
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, |
9064
|
|
|
|
|
|
|
(__v16si) __Y, __P, |
9065
|
|
|
|
|
|
|
(__mmask16) __U); |
9066
|
|
|
|
|
|
|
} |
9067
|
|
|
|
|
|
|
|
9068
|
|
|
|
|
|
|
extern __inline __mmask8 |
9069
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9070
|
|
|
|
|
|
|
_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, |
9071
|
|
|
|
|
|
|
const int __P, const int __R) |
9072
|
|
|
|
|
|
|
{ |
9073
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, |
9074
|
|
|
|
|
|
|
(__v8df) __Y, __P, |
9075
|
|
|
|
|
|
|
(__mmask8) __U, __R); |
9076
|
|
|
|
|
|
|
} |
9077
|
|
|
|
|
|
|
|
9078
|
|
|
|
|
|
|
extern __inline __mmask16 |
9079
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9080
|
|
|
|
|
|
|
_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, |
9081
|
|
|
|
|
|
|
const int __P, const int __R) |
9082
|
|
|
|
|
|
|
{ |
9083
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, |
9084
|
|
|
|
|
|
|
(__v16sf) __Y, __P, |
9085
|
|
|
|
|
|
|
(__mmask16) __U, __R); |
9086
|
|
|
|
|
|
|
} |
9087
|
|
|
|
|
|
|
|
9088
|
|
|
|
|
|
|
extern __inline __mmask8 |
9089
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9090
|
|
|
|
|
|
|
_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) |
9091
|
|
|
|
|
|
|
{ |
9092
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, |
9093
|
|
|
|
|
|
|
(__v2df) __Y, __P, |
9094
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
9095
|
|
|
|
|
|
|
} |
9096
|
|
|
|
|
|
|
|
9097
|
|
|
|
|
|
|
extern __inline __mmask8 |
9098
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9099
|
|
|
|
|
|
|
_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, |
9100
|
|
|
|
|
|
|
const int __P, const int __R) |
9101
|
|
|
|
|
|
|
{ |
9102
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, |
9103
|
|
|
|
|
|
|
(__v2df) __Y, __P, |
9104
|
|
|
|
|
|
|
(__mmask8) __M, __R); |
9105
|
|
|
|
|
|
|
} |
9106
|
|
|
|
|
|
|
|
9107
|
|
|
|
|
|
|
extern __inline __mmask8 |
9108
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9109
|
|
|
|
|
|
|
_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) |
9110
|
|
|
|
|
|
|
{ |
9111
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, |
9112
|
|
|
|
|
|
|
(__v4sf) __Y, __P, |
9113
|
|
|
|
|
|
|
(__mmask8) -1, __R); |
9114
|
|
|
|
|
|
|
} |
9115
|
|
|
|
|
|
|
|
9116
|
|
|
|
|
|
|
extern __inline __mmask8 |
9117
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9118
|
|
|
|
|
|
|
_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, |
9119
|
|
|
|
|
|
|
const int __P, const int __R) |
9120
|
|
|
|
|
|
|
{ |
9121
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, |
9122
|
|
|
|
|
|
|
(__v4sf) __Y, __P, |
9123
|
|
|
|
|
|
|
(__mmask8) __M, __R); |
9124
|
|
|
|
|
|
|
} |
9125
|
|
|
|
|
|
|
|
9126
|
|
|
|
|
|
|
#else |
9127
|
|
|
|
|
|
|
#define _mm512_cmp_epi64_mask(X, Y, P) \ |
9128
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ |
9129
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(P),\ |
9130
|
|
|
|
|
|
|
(__mmask8)-1)) |
9131
|
|
|
|
|
|
|
|
9132
|
|
|
|
|
|
|
#define _mm512_cmp_epi32_mask(X, Y, P) \ |
9133
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ |
9134
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(P),\ |
9135
|
|
|
|
|
|
|
(__mmask16)-1)) |
9136
|
|
|
|
|
|
|
|
9137
|
|
|
|
|
|
|
#define _mm512_cmp_epu64_mask(X, Y, P) \ |
9138
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ |
9139
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(P),\ |
9140
|
|
|
|
|
|
|
(__mmask8)-1)) |
9141
|
|
|
|
|
|
|
|
9142
|
|
|
|
|
|
|
#define _mm512_cmp_epu32_mask(X, Y, P) \ |
9143
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ |
9144
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(P),\ |
9145
|
|
|
|
|
|
|
(__mmask16)-1)) |
9146
|
|
|
|
|
|
|
|
9147
|
|
|
|
|
|
|
#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ |
9148
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ |
9149
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(P),\ |
9150
|
|
|
|
|
|
|
(__mmask8)-1, R)) |
9151
|
|
|
|
|
|
|
|
9152
|
|
|
|
|
|
|
#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ |
9153
|
|
|
|
|
|
|
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ |
9154
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(P),\ |
9155
|
|
|
|
|
|
|
(__mmask16)-1, R)) |
9156
|
|
|
|
|
|
|
|
9157
|
|
|
|
|
|
|
#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ |
9158
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ |
9159
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(P),\ |
9160
|
|
|
|
|
|
|
(__mmask8)M)) |
9161
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ |
9163
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ |
9164
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(P),\ |
9165
|
|
|
|
|
|
|
(__mmask16)M)) |
9166
|
|
|
|
|
|
|
|
9167
|
|
|
|
|
|
|
#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ |
9168
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ |
9169
|
|
|
|
|
|
|
(__v8di)(__m512i)(Y), (int)(P),\ |
9170
|
|
|
|
|
|
|
(__mmask8)M)) |
9171
|
|
|
|
|
|
|
|
9172
|
|
|
|
|
|
|
#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ |
9173
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ |
9174
|
|
|
|
|
|
|
(__v16si)(__m512i)(Y), (int)(P),\ |
9175
|
|
|
|
|
|
|
(__mmask16)M)) |
9176
|
|
|
|
|
|
|
|
9177
|
|
|
|
|
|
|
#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ |
9178
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ |
9179
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(P),\ |
9180
|
|
|
|
|
|
|
(__mmask8)M, R)) |
9181
|
|
|
|
|
|
|
|
9182
|
|
|
|
|
|
|
#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ |
9183
|
|
|
|
|
|
|
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ |
9184
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(P),\ |
9185
|
|
|
|
|
|
|
(__mmask16)M, R)) |
9186
|
|
|
|
|
|
|
|
9187
|
|
|
|
|
|
|
#define _mm_cmp_round_sd_mask(X, Y, P, R) \ |
9188
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ |
9189
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (int)(P),\ |
9190
|
|
|
|
|
|
|
(__mmask8)-1, R)) |
9191
|
|
|
|
|
|
|
|
9192
|
|
|
|
|
|
|
#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ |
9193
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ |
9194
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (int)(P),\ |
9195
|
|
|
|
|
|
|
(M), R)) |
9196
|
|
|
|
|
|
|
|
9197
|
|
|
|
|
|
|
#define _mm_cmp_round_ss_mask(X, Y, P, R) \ |
9198
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ |
9199
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (int)(P), \ |
9200
|
|
|
|
|
|
|
(__mmask8)-1, R)) |
9201
|
|
|
|
|
|
|
|
9202
|
|
|
|
|
|
|
#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ |
9203
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ |
9204
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (int)(P), \ |
9205
|
|
|
|
|
|
|
(M), R)) |
9206
|
|
|
|
|
|
|
#endif |
9207
|
|
|
|
|
|
|
|
9208
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
9209
|
|
|
|
|
|
|
extern __inline __m512 |
9210
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9211
|
|
|
|
|
|
|
_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale) |
9212
|
|
|
|
|
|
|
{ |
9213
|
|
|
|
|
|
|
__m512 v1_old = _mm512_undefined_ps (); |
9214
|
|
|
|
|
|
|
__mmask16 mask = 0xFFFF; |
9215
|
|
|
|
|
|
|
|
9216
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, |
9217
|
|
|
|
|
|
|
__addr, |
9218
|
|
|
|
|
|
|
(__v16si) __index, |
9219
|
|
|
|
|
|
|
mask, __scale); |
9220
|
|
|
|
|
|
|
} |
9221
|
|
|
|
|
|
|
|
9222
|
|
|
|
|
|
|
extern __inline __m512 |
9223
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9224
|
|
|
|
|
|
|
_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask, |
9225
|
|
|
|
|
|
|
__m512i __index, float const *__addr, int __scale) |
9226
|
|
|
|
|
|
|
{ |
9227
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, |
9228
|
|
|
|
|
|
|
__addr, |
9229
|
|
|
|
|
|
|
(__v16si) __index, |
9230
|
|
|
|
|
|
|
__mask, __scale); |
9231
|
|
|
|
|
|
|
} |
9232
|
|
|
|
|
|
|
|
9233
|
|
|
|
|
|
|
extern __inline __m512d |
9234
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9235
|
|
|
|
|
|
|
_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale) |
9236
|
|
|
|
|
|
|
{ |
9237
|
|
|
|
|
|
|
__m512d v1_old = _mm512_undefined_pd (); |
9238
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9239
|
|
|
|
|
|
|
|
9240
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old, |
9241
|
|
|
|
|
|
|
__addr, |
9242
|
|
|
|
|
|
|
(__v8si) __index, mask, |
9243
|
|
|
|
|
|
|
__scale); |
9244
|
|
|
|
|
|
|
} |
9245
|
|
|
|
|
|
|
|
9246
|
|
|
|
|
|
|
extern __inline __m512d |
9247
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9248
|
|
|
|
|
|
|
_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, |
9249
|
|
|
|
|
|
|
__m256i __index, double const *__addr, int __scale) |
9250
|
|
|
|
|
|
|
{ |
9251
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, |
9252
|
|
|
|
|
|
|
__addr, |
9253
|
|
|
|
|
|
|
(__v8si) __index, |
9254
|
|
|
|
|
|
|
__mask, __scale); |
9255
|
|
|
|
|
|
|
} |
9256
|
|
|
|
|
|
|
|
9257
|
|
|
|
|
|
|
extern __inline __m256 |
9258
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9259
|
|
|
|
|
|
|
_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale) |
9260
|
|
|
|
|
|
|
{ |
9261
|
|
|
|
|
|
|
__m256 v1_old = _mm256_undefined_ps (); |
9262
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9263
|
|
|
|
|
|
|
|
9264
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old, |
9265
|
|
|
|
|
|
|
__addr, |
9266
|
|
|
|
|
|
|
(__v8di) __index, mask, |
9267
|
|
|
|
|
|
|
__scale); |
9268
|
|
|
|
|
|
|
} |
9269
|
|
|
|
|
|
|
|
9270
|
|
|
|
|
|
|
extern __inline __m256 |
9271
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9272
|
|
|
|
|
|
|
_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, |
9273
|
|
|
|
|
|
|
__m512i __index, float const *__addr, int __scale) |
9274
|
|
|
|
|
|
|
{ |
9275
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, |
9276
|
|
|
|
|
|
|
__addr, |
9277
|
|
|
|
|
|
|
(__v8di) __index, |
9278
|
|
|
|
|
|
|
__mask, __scale); |
9279
|
|
|
|
|
|
|
} |
9280
|
|
|
|
|
|
|
|
9281
|
|
|
|
|
|
|
extern __inline __m512d |
9282
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9283
|
|
|
|
|
|
|
_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale) |
9284
|
|
|
|
|
|
|
{ |
9285
|
|
|
|
|
|
|
__m512d v1_old = _mm512_undefined_pd (); |
9286
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9287
|
|
|
|
|
|
|
|
9288
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old, |
9289
|
|
|
|
|
|
|
__addr, |
9290
|
|
|
|
|
|
|
(__v8di) __index, mask, |
9291
|
|
|
|
|
|
|
__scale); |
9292
|
|
|
|
|
|
|
} |
9293
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
extern __inline __m512d |
9295
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9296
|
|
|
|
|
|
|
_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, |
9297
|
|
|
|
|
|
|
__m512i __index, double const *__addr, int __scale) |
9298
|
|
|
|
|
|
|
{ |
9299
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, |
9300
|
|
|
|
|
|
|
__addr, |
9301
|
|
|
|
|
|
|
(__v8di) __index, |
9302
|
|
|
|
|
|
|
__mask, __scale); |
9303
|
|
|
|
|
|
|
} |
9304
|
|
|
|
|
|
|
|
9305
|
|
|
|
|
|
|
extern __inline __m512i |
9306
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9307
|
|
|
|
|
|
|
_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale) |
9308
|
|
|
|
|
|
|
{ |
9309
|
|
|
|
|
|
|
__m512i v1_old = _mm512_undefined_si512 (); |
9310
|
|
|
|
|
|
|
__mmask16 mask = 0xFFFF; |
9311
|
|
|
|
|
|
|
|
9312
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old, |
9313
|
|
|
|
|
|
|
__addr, |
9314
|
|
|
|
|
|
|
(__v16si) __index, |
9315
|
|
|
|
|
|
|
mask, __scale); |
9316
|
|
|
|
|
|
|
} |
9317
|
|
|
|
|
|
|
|
9318
|
|
|
|
|
|
|
extern __inline __m512i |
9319
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9320
|
|
|
|
|
|
|
_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, |
9321
|
|
|
|
|
|
|
__m512i __index, int const *__addr, int __scale) |
9322
|
|
|
|
|
|
|
{ |
9323
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, |
9324
|
|
|
|
|
|
|
__addr, |
9325
|
|
|
|
|
|
|
(__v16si) __index, |
9326
|
|
|
|
|
|
|
__mask, __scale); |
9327
|
|
|
|
|
|
|
} |
9328
|
|
|
|
|
|
|
|
9329
|
|
|
|
|
|
|
extern __inline __m512i |
9330
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9331
|
|
|
|
|
|
|
_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale) |
9332
|
|
|
|
|
|
|
{ |
9333
|
|
|
|
|
|
|
__m512i v1_old = _mm512_undefined_si512 (); |
9334
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9335
|
|
|
|
|
|
|
|
9336
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old, |
9337
|
|
|
|
|
|
|
__addr, |
9338
|
|
|
|
|
|
|
(__v8si) __index, mask, |
9339
|
|
|
|
|
|
|
__scale); |
9340
|
|
|
|
|
|
|
} |
9341
|
|
|
|
|
|
|
|
9342
|
|
|
|
|
|
|
extern __inline __m512i |
9343
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9344
|
|
|
|
|
|
|
_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, |
9345
|
|
|
|
|
|
|
__m256i __index, long long const *__addr, |
9346
|
|
|
|
|
|
|
int __scale) |
9347
|
|
|
|
|
|
|
{ |
9348
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, |
9349
|
|
|
|
|
|
|
__addr, |
9350
|
|
|
|
|
|
|
(__v8si) __index, |
9351
|
|
|
|
|
|
|
__mask, __scale); |
9352
|
|
|
|
|
|
|
} |
9353
|
|
|
|
|
|
|
|
9354
|
|
|
|
|
|
|
extern __inline __m256i |
9355
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9356
|
|
|
|
|
|
|
_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale) |
9357
|
|
|
|
|
|
|
{ |
9358
|
|
|
|
|
|
|
__m256i v1_old = _mm256_undefined_si256 (); |
9359
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9360
|
|
|
|
|
|
|
|
9361
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old, |
9362
|
|
|
|
|
|
|
__addr, |
9363
|
|
|
|
|
|
|
(__v8di) __index, |
9364
|
|
|
|
|
|
|
mask, __scale); |
9365
|
|
|
|
|
|
|
} |
9366
|
|
|
|
|
|
|
|
9367
|
|
|
|
|
|
|
extern __inline __m256i |
9368
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9369
|
|
|
|
|
|
|
_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, |
9370
|
|
|
|
|
|
|
__m512i __index, int const *__addr, int __scale) |
9371
|
|
|
|
|
|
|
{ |
9372
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, |
9373
|
|
|
|
|
|
|
__addr, |
9374
|
|
|
|
|
|
|
(__v8di) __index, |
9375
|
|
|
|
|
|
|
__mask, __scale); |
9376
|
|
|
|
|
|
|
} |
9377
|
|
|
|
|
|
|
|
9378
|
|
|
|
|
|
|
extern __inline __m512i |
9379
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9380
|
|
|
|
|
|
|
_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale) |
9381
|
|
|
|
|
|
|
{ |
9382
|
|
|
|
|
|
|
__m512i v1_old = _mm512_undefined_si512 (); |
9383
|
|
|
|
|
|
|
__mmask8 mask = 0xFF; |
9384
|
|
|
|
|
|
|
|
9385
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old, |
9386
|
|
|
|
|
|
|
__addr, |
9387
|
|
|
|
|
|
|
(__v8di) __index, mask, |
9388
|
|
|
|
|
|
|
__scale); |
9389
|
|
|
|
|
|
|
} |
9390
|
|
|
|
|
|
|
|
9391
|
|
|
|
|
|
|
extern __inline __m512i |
9392
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9393
|
|
|
|
|
|
|
_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, |
9394
|
|
|
|
|
|
|
__m512i __index, long long const *__addr, |
9395
|
|
|
|
|
|
|
int __scale) |
9396
|
|
|
|
|
|
|
{ |
9397
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, |
9398
|
|
|
|
|
|
|
__addr, |
9399
|
|
|
|
|
|
|
(__v8di) __index, |
9400
|
|
|
|
|
|
|
__mask, __scale); |
9401
|
|
|
|
|
|
|
} |
9402
|
|
|
|
|
|
|
|
9403
|
|
|
|
|
|
|
extern __inline void |
9404
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9405
|
|
|
|
|
|
|
_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale) |
9406
|
|
|
|
|
|
|
{ |
9407
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, |
9408
|
|
|
|
|
|
|
(__v16si) __index, (__v16sf) __v1, __scale); |
9409
|
|
|
|
|
|
|
} |
9410
|
|
|
|
|
|
|
|
9411
|
|
|
|
|
|
|
extern __inline void |
9412
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9413
|
|
|
|
|
|
|
_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask, |
9414
|
|
|
|
|
|
|
__m512i __index, __m512 __v1, int __scale) |
9415
|
|
|
|
|
|
|
{ |
9416
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, |
9417
|
|
|
|
|
|
|
(__v16sf) __v1, __scale); |
9418
|
|
|
|
|
|
|
} |
9419
|
|
|
|
|
|
|
|
9420
|
|
|
|
|
|
|
extern __inline void |
9421
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9422
|
|
|
|
|
|
|
_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1, |
9423
|
|
|
|
|
|
|
int __scale) |
9424
|
|
|
|
|
|
|
{ |
9425
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, |
9426
|
|
|
|
|
|
|
(__v8si) __index, (__v8df) __v1, __scale); |
9427
|
|
|
|
|
|
|
} |
9428
|
|
|
|
|
|
|
|
9429
|
|
|
|
|
|
|
extern __inline void |
9430
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9431
|
|
|
|
|
|
|
_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask, |
9432
|
|
|
|
|
|
|
__m256i __index, __m512d __v1, int __scale) |
9433
|
|
|
|
|
|
|
{ |
9434
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, |
9435
|
|
|
|
|
|
|
(__v8df) __v1, __scale); |
9436
|
|
|
|
|
|
|
} |
9437
|
|
|
|
|
|
|
|
9438
|
|
|
|
|
|
|
extern __inline void |
9439
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9440
|
|
|
|
|
|
|
_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale) |
9441
|
|
|
|
|
|
|
{ |
9442
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, |
9443
|
|
|
|
|
|
|
(__v8di) __index, (__v8sf) __v1, __scale); |
9444
|
|
|
|
|
|
|
} |
9445
|
|
|
|
|
|
|
|
9446
|
|
|
|
|
|
|
extern __inline void |
9447
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9448
|
|
|
|
|
|
|
_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask, |
9449
|
|
|
|
|
|
|
__m512i __index, __m256 __v1, int __scale) |
9450
|
|
|
|
|
|
|
{ |
9451
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, |
9452
|
|
|
|
|
|
|
(__v8sf) __v1, __scale); |
9453
|
|
|
|
|
|
|
} |
9454
|
|
|
|
|
|
|
|
9455
|
|
|
|
|
|
|
extern __inline void |
9456
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9457
|
|
|
|
|
|
|
_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1, |
9458
|
|
|
|
|
|
|
int __scale) |
9459
|
|
|
|
|
|
|
{ |
9460
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, |
9461
|
|
|
|
|
|
|
(__v8di) __index, (__v8df) __v1, __scale); |
9462
|
|
|
|
|
|
|
} |
9463
|
|
|
|
|
|
|
|
9464
|
|
|
|
|
|
|
extern __inline void |
9465
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9466
|
|
|
|
|
|
|
_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask, |
9467
|
|
|
|
|
|
|
__m512i __index, __m512d __v1, int __scale) |
9468
|
|
|
|
|
|
|
{ |
9469
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, |
9470
|
|
|
|
|
|
|
(__v8df) __v1, __scale); |
9471
|
|
|
|
|
|
|
} |
9472
|
|
|
|
|
|
|
|
9473
|
|
|
|
|
|
|
extern __inline void |
9474
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9475
|
|
|
|
|
|
|
_mm512_i32scatter_epi32 (int *__addr, __m512i __index, |
9476
|
|
|
|
|
|
|
__m512i __v1, int __scale) |
9477
|
|
|
|
|
|
|
{ |
9478
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, |
9479
|
|
|
|
|
|
|
(__v16si) __index, (__v16si) __v1, __scale); |
9480
|
|
|
|
|
|
|
} |
9481
|
|
|
|
|
|
|
|
9482
|
|
|
|
|
|
|
extern __inline void |
9483
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9484
|
|
|
|
|
|
|
_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask, |
9485
|
|
|
|
|
|
|
__m512i __index, __m512i __v1, int __scale) |
9486
|
|
|
|
|
|
|
{ |
9487
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, |
9488
|
|
|
|
|
|
|
(__v16si) __v1, __scale); |
9489
|
|
|
|
|
|
|
} |
9490
|
|
|
|
|
|
|
|
9491
|
|
|
|
|
|
|
extern __inline void |
9492
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9493
|
|
|
|
|
|
|
_mm512_i32scatter_epi64 (long long *__addr, __m256i __index, |
9494
|
|
|
|
|
|
|
__m512i __v1, int __scale) |
9495
|
|
|
|
|
|
|
{ |
9496
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, |
9497
|
|
|
|
|
|
|
(__v8si) __index, (__v8di) __v1, __scale); |
9498
|
|
|
|
|
|
|
} |
9499
|
|
|
|
|
|
|
|
9500
|
|
|
|
|
|
|
extern __inline void |
9501
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9502
|
|
|
|
|
|
|
_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask, |
9503
|
|
|
|
|
|
|
__m256i __index, __m512i __v1, int __scale) |
9504
|
|
|
|
|
|
|
{ |
9505
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, |
9506
|
|
|
|
|
|
|
(__v8di) __v1, __scale); |
9507
|
|
|
|
|
|
|
} |
9508
|
|
|
|
|
|
|
|
9509
|
|
|
|
|
|
|
extern __inline void |
9510
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9511
|
|
|
|
|
|
|
_mm512_i64scatter_epi32 (int *__addr, __m512i __index, |
9512
|
|
|
|
|
|
|
__m256i __v1, int __scale) |
9513
|
|
|
|
|
|
|
{ |
9514
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, |
9515
|
|
|
|
|
|
|
(__v8di) __index, (__v8si) __v1, __scale); |
9516
|
|
|
|
|
|
|
} |
9517
|
|
|
|
|
|
|
|
9518
|
|
|
|
|
|
|
extern __inline void |
9519
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9520
|
|
|
|
|
|
|
_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask, |
9521
|
|
|
|
|
|
|
__m512i __index, __m256i __v1, int __scale) |
9522
|
|
|
|
|
|
|
{ |
9523
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, |
9524
|
|
|
|
|
|
|
(__v8si) __v1, __scale); |
9525
|
|
|
|
|
|
|
} |
9526
|
|
|
|
|
|
|
|
9527
|
|
|
|
|
|
|
extern __inline void |
9528
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9529
|
|
|
|
|
|
|
_mm512_i64scatter_epi64 (long long *__addr, __m512i __index, |
9530
|
|
|
|
|
|
|
__m512i __v1, int __scale) |
9531
|
|
|
|
|
|
|
{ |
9532
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, |
9533
|
|
|
|
|
|
|
(__v8di) __index, (__v8di) __v1, __scale); |
9534
|
|
|
|
|
|
|
} |
9535
|
|
|
|
|
|
|
|
9536
|
|
|
|
|
|
|
extern __inline void |
9537
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9538
|
|
|
|
|
|
|
_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, |
9539
|
|
|
|
|
|
|
__m512i __index, __m512i __v1, int __scale) |
9540
|
|
|
|
|
|
|
{ |
9541
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, |
9542
|
|
|
|
|
|
|
(__v8di) __v1, __scale); |
9543
|
|
|
|
|
|
|
} |
9544
|
|
|
|
|
|
|
#else |
9545
|
|
|
|
|
|
|
#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ |
9546
|
|
|
|
|
|
|
(__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ |
9547
|
|
|
|
|
|
|
(float const *)ADDR, \ |
9548
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9549
|
|
|
|
|
|
|
(__mmask16)0xFFFF, (int)SCALE) |
9550
|
|
|
|
|
|
|
|
9551
|
|
|
|
|
|
|
#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9552
|
|
|
|
|
|
|
(__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ |
9553
|
|
|
|
|
|
|
(float const *)ADDR, \ |
9554
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9555
|
|
|
|
|
|
|
(__mmask16)MASK, (int)SCALE) |
9556
|
|
|
|
|
|
|
|
9557
|
|
|
|
|
|
|
#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ |
9558
|
|
|
|
|
|
|
(__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ |
9559
|
|
|
|
|
|
|
(double const *)ADDR, \ |
9560
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9561
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9562
|
|
|
|
|
|
|
|
9563
|
|
|
|
|
|
|
#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9564
|
|
|
|
|
|
|
(__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ |
9565
|
|
|
|
|
|
|
(double const *)ADDR, \ |
9566
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9567
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9568
|
|
|
|
|
|
|
|
9569
|
|
|
|
|
|
|
#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ |
9570
|
|
|
|
|
|
|
(__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ |
9571
|
|
|
|
|
|
|
(float const *)ADDR, \ |
9572
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9573
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9574
|
|
|
|
|
|
|
|
9575
|
|
|
|
|
|
|
#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9576
|
|
|
|
|
|
|
(__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ |
9577
|
|
|
|
|
|
|
(float const *)ADDR, \ |
9578
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9579
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9580
|
|
|
|
|
|
|
|
9581
|
|
|
|
|
|
|
#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ |
9582
|
|
|
|
|
|
|
(__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ |
9583
|
|
|
|
|
|
|
(double const *)ADDR, \ |
9584
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9585
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9586
|
|
|
|
|
|
|
|
9587
|
|
|
|
|
|
|
#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9588
|
|
|
|
|
|
|
(__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ |
9589
|
|
|
|
|
|
|
(double const *)ADDR, \ |
9590
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9591
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9592
|
|
|
|
|
|
|
|
9593
|
|
|
|
|
|
|
#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ |
9594
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \ |
9595
|
|
|
|
|
|
|
(int const *)ADDR, \ |
9596
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9597
|
|
|
|
|
|
|
(__mmask16)0xFFFF, (int)SCALE) |
9598
|
|
|
|
|
|
|
|
9599
|
|
|
|
|
|
|
#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9600
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ |
9601
|
|
|
|
|
|
|
(int const *)ADDR, \ |
9602
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9603
|
|
|
|
|
|
|
(__mmask16)MASK, (int)SCALE) |
9604
|
|
|
|
|
|
|
|
9605
|
|
|
|
|
|
|
#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ |
9606
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \ |
9607
|
|
|
|
|
|
|
(long long const *)ADDR, \ |
9608
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9609
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9610
|
|
|
|
|
|
|
|
9611
|
|
|
|
|
|
|
#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9612
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ |
9613
|
|
|
|
|
|
|
(long long const *)ADDR, \ |
9614
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9615
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9616
|
|
|
|
|
|
|
|
9617
|
|
|
|
|
|
|
#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ |
9618
|
|
|
|
|
|
|
(__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \ |
9619
|
|
|
|
|
|
|
(int const *)ADDR, \ |
9620
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9621
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9622
|
|
|
|
|
|
|
|
9623
|
|
|
|
|
|
|
#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9624
|
|
|
|
|
|
|
(__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ |
9625
|
|
|
|
|
|
|
(int const *)ADDR, \ |
9626
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9627
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9628
|
|
|
|
|
|
|
|
9629
|
|
|
|
|
|
|
#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ |
9630
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \ |
9631
|
|
|
|
|
|
|
(long long const *)ADDR, \ |
9632
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9633
|
|
|
|
|
|
|
(__mmask8)0xFF, (int)SCALE) |
9634
|
|
|
|
|
|
|
|
9635
|
|
|
|
|
|
|
#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ |
9636
|
|
|
|
|
|
|
(__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ |
9637
|
|
|
|
|
|
|
(long long const *)ADDR, \ |
9638
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9639
|
|
|
|
|
|
|
(__mmask8)MASK, (int)SCALE) |
9640
|
|
|
|
|
|
|
|
9641
|
|
|
|
|
|
|
#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ |
9642
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \ |
9643
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9644
|
|
|
|
|
|
|
(__v16sf)(__m512)V1, (int)SCALE) |
9645
|
|
|
|
|
|
|
|
9646
|
|
|
|
|
|
|
#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ |
9647
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \ |
9648
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9649
|
|
|
|
|
|
|
(__v16sf)(__m512)V1, (int)SCALE) |
9650
|
|
|
|
|
|
|
|
9651
|
|
|
|
|
|
|
#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ |
9652
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \ |
9653
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9654
|
|
|
|
|
|
|
(__v8df)(__m512d)V1, (int)SCALE) |
9655
|
|
|
|
|
|
|
|
9656
|
|
|
|
|
|
|
#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ |
9657
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \ |
9658
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9659
|
|
|
|
|
|
|
(__v8df)(__m512d)V1, (int)SCALE) |
9660
|
|
|
|
|
|
|
|
9661
|
|
|
|
|
|
|
#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ |
9662
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \ |
9663
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9664
|
|
|
|
|
|
|
(__v8sf)(__m256)V1, (int)SCALE) |
9665
|
|
|
|
|
|
|
|
9666
|
|
|
|
|
|
|
#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ |
9667
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \ |
9668
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9669
|
|
|
|
|
|
|
(__v8sf)(__m256)V1, (int)SCALE) |
9670
|
|
|
|
|
|
|
|
9671
|
|
|
|
|
|
|
#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ |
9672
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \ |
9673
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9674
|
|
|
|
|
|
|
(__v8df)(__m512d)V1, (int)SCALE) |
9675
|
|
|
|
|
|
|
|
9676
|
|
|
|
|
|
|
#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ |
9677
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \ |
9678
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9679
|
|
|
|
|
|
|
(__v8df)(__m512d)V1, (int)SCALE) |
9680
|
|
|
|
|
|
|
|
9681
|
|
|
|
|
|
|
#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ |
9682
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \ |
9683
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9684
|
|
|
|
|
|
|
(__v16si)(__m512i)V1, (int)SCALE) |
9685
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ |
9687
|
|
|
|
|
|
|
__builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \ |
9688
|
|
|
|
|
|
|
(__v16si)(__m512i)INDEX, \ |
9689
|
|
|
|
|
|
|
(__v16si)(__m512i)V1, (int)SCALE) |
9690
|
|
|
|
|
|
|
|
9691
|
|
|
|
|
|
|
#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ |
9692
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \ |
9693
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9694
|
|
|
|
|
|
|
(__v8di)(__m512i)V1, (int)SCALE) |
9695
|
|
|
|
|
|
|
|
9696
|
|
|
|
|
|
|
#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ |
9697
|
|
|
|
|
|
|
__builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \ |
9698
|
|
|
|
|
|
|
(__v8si)(__m256i)INDEX, \ |
9699
|
|
|
|
|
|
|
(__v8di)(__m512i)V1, (int)SCALE) |
9700
|
|
|
|
|
|
|
|
9701
|
|
|
|
|
|
|
#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ |
9702
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \ |
9703
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9704
|
|
|
|
|
|
|
(__v8si)(__m256i)V1, (int)SCALE) |
9705
|
|
|
|
|
|
|
|
9706
|
|
|
|
|
|
|
#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ |
9707
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \ |
9708
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9709
|
|
|
|
|
|
|
(__v8si)(__m256i)V1, (int)SCALE) |
9710
|
|
|
|
|
|
|
|
9711
|
|
|
|
|
|
|
#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ |
9712
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \ |
9713
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9714
|
|
|
|
|
|
|
(__v8di)(__m512i)V1, (int)SCALE) |
9715
|
|
|
|
|
|
|
|
9716
|
|
|
|
|
|
|
#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ |
9717
|
|
|
|
|
|
|
__builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \ |
9718
|
|
|
|
|
|
|
(__v8di)(__m512i)INDEX, \ |
9719
|
|
|
|
|
|
|
(__v8di)(__m512i)V1, (int)SCALE) |
9720
|
|
|
|
|
|
|
#endif |
9721
|
|
|
|
|
|
|
|
9722
|
|
|
|
|
|
|
extern __inline __m512d |
9723
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9724
|
|
|
|
|
|
|
_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) |
9725
|
|
|
|
|
|
|
{ |
9726
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, |
9727
|
|
|
|
|
|
|
(__v8df) __W, |
9728
|
|
|
|
|
|
|
(__mmask8) __U); |
9729
|
|
|
|
|
|
|
} |
9730
|
|
|
|
|
|
|
|
9731
|
|
|
|
|
|
|
extern __inline __m512d |
9732
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9733
|
|
|
|
|
|
|
_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) |
9734
|
|
|
|
|
|
|
{ |
9735
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, |
9736
|
|
|
|
|
|
|
(__v8df) |
9737
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
9738
|
|
|
|
|
|
|
(__mmask8) __U); |
9739
|
|
|
|
|
|
|
} |
9740
|
|
|
|
|
|
|
|
9741
|
|
|
|
|
|
|
extern __inline void |
9742
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9743
|
|
|
|
|
|
|
_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) |
9744
|
|
|
|
|
|
|
{ |
9745
|
|
|
|
|
|
|
__builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, |
9746
|
|
|
|
|
|
|
(__mmask8) __U); |
9747
|
|
|
|
|
|
|
} |
9748
|
|
|
|
|
|
|
|
9749
|
|
|
|
|
|
|
extern __inline __m512 |
9750
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9751
|
|
|
|
|
|
|
_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) |
9752
|
|
|
|
|
|
|
{ |
9753
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, |
9754
|
|
|
|
|
|
|
(__v16sf) __W, |
9755
|
|
|
|
|
|
|
(__mmask16) __U); |
9756
|
|
|
|
|
|
|
} |
9757
|
|
|
|
|
|
|
|
9758
|
|
|
|
|
|
|
extern __inline __m512 |
9759
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9760
|
|
|
|
|
|
|
_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) |
9761
|
|
|
|
|
|
|
{ |
9762
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, |
9763
|
|
|
|
|
|
|
(__v16sf) |
9764
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
9765
|
|
|
|
|
|
|
(__mmask16) __U); |
9766
|
|
|
|
|
|
|
} |
9767
|
|
|
|
|
|
|
|
9768
|
|
|
|
|
|
|
extern __inline void |
9769
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9770
|
|
|
|
|
|
|
_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) |
9771
|
|
|
|
|
|
|
{ |
9772
|
|
|
|
|
|
|
__builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, |
9773
|
|
|
|
|
|
|
(__mmask16) __U); |
9774
|
|
|
|
|
|
|
} |
9775
|
|
|
|
|
|
|
|
9776
|
|
|
|
|
|
|
extern __inline __m512i |
9777
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9778
|
|
|
|
|
|
|
_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
9779
|
|
|
|
|
|
|
{ |
9780
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, |
9781
|
|
|
|
|
|
|
(__v8di) __W, |
9782
|
|
|
|
|
|
|
(__mmask8) __U); |
9783
|
|
|
|
|
|
|
} |
9784
|
|
|
|
|
|
|
|
9785
|
|
|
|
|
|
|
extern __inline __m512i |
9786
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9787
|
|
|
|
|
|
|
_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) |
9788
|
|
|
|
|
|
|
{ |
9789
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, |
9790
|
|
|
|
|
|
|
(__v8di) |
9791
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
9792
|
|
|
|
|
|
|
(__mmask8) __U); |
9793
|
|
|
|
|
|
|
} |
9794
|
|
|
|
|
|
|
|
9795
|
|
|
|
|
|
|
extern __inline void |
9796
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9797
|
|
|
|
|
|
|
_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) |
9798
|
|
|
|
|
|
|
{ |
9799
|
|
|
|
|
|
|
__builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, |
9800
|
|
|
|
|
|
|
(__mmask8) __U); |
9801
|
|
|
|
|
|
|
} |
9802
|
|
|
|
|
|
|
|
9803
|
|
|
|
|
|
|
extern __inline __m512i |
9804
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9805
|
|
|
|
|
|
|
_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
9806
|
|
|
|
|
|
|
{ |
9807
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, |
9808
|
|
|
|
|
|
|
(__v16si) __W, |
9809
|
|
|
|
|
|
|
(__mmask16) __U); |
9810
|
|
|
|
|
|
|
} |
9811
|
|
|
|
|
|
|
|
9812
|
|
|
|
|
|
|
extern __inline __m512i |
9813
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9814
|
|
|
|
|
|
|
_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) |
9815
|
|
|
|
|
|
|
{ |
9816
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, |
9817
|
|
|
|
|
|
|
(__v16si) |
9818
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
9819
|
|
|
|
|
|
|
(__mmask16) __U); |
9820
|
|
|
|
|
|
|
} |
9821
|
|
|
|
|
|
|
|
9822
|
|
|
|
|
|
|
extern __inline void |
9823
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9824
|
|
|
|
|
|
|
_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) |
9825
|
|
|
|
|
|
|
{ |
9826
|
|
|
|
|
|
|
__builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, |
9827
|
|
|
|
|
|
|
(__mmask16) __U); |
9828
|
|
|
|
|
|
|
} |
9829
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
extern __inline __m512d |
9831
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9832
|
|
|
|
|
|
|
_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) |
9833
|
|
|
|
|
|
|
{ |
9834
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, |
9835
|
|
|
|
|
|
|
(__v8df) __W, |
9836
|
|
|
|
|
|
|
(__mmask8) __U); |
9837
|
|
|
|
|
|
|
} |
9838
|
|
|
|
|
|
|
|
9839
|
|
|
|
|
|
|
extern __inline __m512d |
9840
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9841
|
|
|
|
|
|
|
_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) |
9842
|
|
|
|
|
|
|
{ |
9843
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, |
9844
|
|
|
|
|
|
|
(__v8df) |
9845
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
9846
|
|
|
|
|
|
|
(__mmask8) __U); |
9847
|
|
|
|
|
|
|
} |
9848
|
|
|
|
|
|
|
|
9849
|
|
|
|
|
|
|
extern __inline __m512d |
9850
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9851
|
|
|
|
|
|
|
_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) |
9852
|
|
|
|
|
|
|
{ |
9853
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, |
9854
|
|
|
|
|
|
|
(__v8df) __W, |
9855
|
|
|
|
|
|
|
(__mmask8) __U); |
9856
|
|
|
|
|
|
|
} |
9857
|
|
|
|
|
|
|
|
9858
|
|
|
|
|
|
|
extern __inline __m512d |
9859
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9860
|
|
|
|
|
|
|
_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) |
9861
|
|
|
|
|
|
|
{ |
9862
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, |
9863
|
|
|
|
|
|
|
(__v8df) |
9864
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
9865
|
|
|
|
|
|
|
(__mmask8) __U); |
9866
|
|
|
|
|
|
|
} |
9867
|
|
|
|
|
|
|
|
9868
|
|
|
|
|
|
|
extern __inline __m512 |
9869
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9870
|
|
|
|
|
|
|
_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) |
9871
|
|
|
|
|
|
|
{ |
9872
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, |
9873
|
|
|
|
|
|
|
(__v16sf) __W, |
9874
|
|
|
|
|
|
|
(__mmask16) __U); |
9875
|
|
|
|
|
|
|
} |
9876
|
|
|
|
|
|
|
|
9877
|
|
|
|
|
|
|
extern __inline __m512 |
9878
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9879
|
|
|
|
|
|
|
_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) |
9880
|
|
|
|
|
|
|
{ |
9881
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, |
9882
|
|
|
|
|
|
|
(__v16sf) |
9883
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
9884
|
|
|
|
|
|
|
(__mmask16) __U); |
9885
|
|
|
|
|
|
|
} |
9886
|
|
|
|
|
|
|
|
9887
|
|
|
|
|
|
|
extern __inline __m512 |
9888
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9889
|
|
|
|
|
|
|
_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) |
9890
|
|
|
|
|
|
|
{ |
9891
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, |
9892
|
|
|
|
|
|
|
(__v16sf) __W, |
9893
|
|
|
|
|
|
|
(__mmask16) __U); |
9894
|
|
|
|
|
|
|
} |
9895
|
|
|
|
|
|
|
|
9896
|
|
|
|
|
|
|
extern __inline __m512 |
9897
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9898
|
|
|
|
|
|
|
_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) |
9899
|
|
|
|
|
|
|
{ |
9900
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, |
9901
|
|
|
|
|
|
|
(__v16sf) |
9902
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
9903
|
|
|
|
|
|
|
(__mmask16) __U); |
9904
|
|
|
|
|
|
|
} |
9905
|
|
|
|
|
|
|
|
9906
|
|
|
|
|
|
|
extern __inline __m512i |
9907
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9908
|
|
|
|
|
|
|
_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
9909
|
|
|
|
|
|
|
{ |
9910
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, |
9911
|
|
|
|
|
|
|
(__v8di) __W, |
9912
|
|
|
|
|
|
|
(__mmask8) __U); |
9913
|
|
|
|
|
|
|
} |
9914
|
|
|
|
|
|
|
|
9915
|
|
|
|
|
|
|
extern __inline __m512i |
9916
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9917
|
|
|
|
|
|
|
_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) |
9918
|
|
|
|
|
|
|
{ |
9919
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, |
9920
|
|
|
|
|
|
|
(__v8di) |
9921
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
9922
|
|
|
|
|
|
|
(__mmask8) __U); |
9923
|
|
|
|
|
|
|
} |
9924
|
|
|
|
|
|
|
|
9925
|
|
|
|
|
|
|
extern __inline __m512i |
9926
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9927
|
|
|
|
|
|
|
_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
9928
|
|
|
|
|
|
|
{ |
9929
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, |
9930
|
|
|
|
|
|
|
(__v8di) __W, |
9931
|
|
|
|
|
|
|
(__mmask8) __U); |
9932
|
|
|
|
|
|
|
} |
9933
|
|
|
|
|
|
|
|
9934
|
|
|
|
|
|
|
extern __inline __m512i |
9935
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9936
|
|
|
|
|
|
|
_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) |
9937
|
|
|
|
|
|
|
{ |
9938
|
|
|
|
|
|
|
return (__m512i) |
9939
|
|
|
|
|
|
|
__builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, |
9940
|
|
|
|
|
|
|
(__v8di) |
9941
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
9942
|
|
|
|
|
|
|
(__mmask8) __U); |
9943
|
|
|
|
|
|
|
} |
9944
|
|
|
|
|
|
|
|
9945
|
|
|
|
|
|
|
extern __inline __m512i |
9946
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9947
|
|
|
|
|
|
|
_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
9948
|
|
|
|
|
|
|
{ |
9949
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, |
9950
|
|
|
|
|
|
|
(__v16si) __W, |
9951
|
|
|
|
|
|
|
(__mmask16) __U); |
9952
|
|
|
|
|
|
|
} |
9953
|
|
|
|
|
|
|
|
9954
|
|
|
|
|
|
|
extern __inline __m512i |
9955
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9956
|
|
|
|
|
|
|
_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) |
9957
|
|
|
|
|
|
|
{ |
9958
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, |
9959
|
|
|
|
|
|
|
(__v16si) |
9960
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
9961
|
|
|
|
|
|
|
(__mmask16) __U); |
9962
|
|
|
|
|
|
|
} |
9963
|
|
|
|
|
|
|
|
9964
|
|
|
|
|
|
|
extern __inline __m512i |
9965
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9966
|
|
|
|
|
|
|
_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
9967
|
|
|
|
|
|
|
{ |
9968
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, |
9969
|
|
|
|
|
|
|
(__v16si) __W, |
9970
|
|
|
|
|
|
|
(__mmask16) __U); |
9971
|
|
|
|
|
|
|
} |
9972
|
|
|
|
|
|
|
|
9973
|
|
|
|
|
|
|
extern __inline __m512i |
9974
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9975
|
|
|
|
|
|
|
_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) |
9976
|
|
|
|
|
|
|
{ |
9977
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, |
9978
|
|
|
|
|
|
|
(__v16si) |
9979
|
|
|
|
|
|
|
_mm512_setzero_si512 |
9980
|
|
|
|
|
|
|
(), (__mmask16) __U); |
9981
|
|
|
|
|
|
|
} |
9982
|
|
|
|
|
|
|
|
9983
|
|
|
|
|
|
|
/* Mask arithmetic operations */ |
9984
|
|
|
|
|
|
|
extern __inline __mmask16 |
9985
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9986
|
|
|
|
|
|
|
_mm512_kand (__mmask16 __A, __mmask16 __B) |
9987
|
|
|
|
|
|
|
{ |
9988
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); |
9989
|
|
|
|
|
|
|
} |
9990
|
|
|
|
|
|
|
|
9991
|
|
|
|
|
|
|
extern __inline __mmask16 |
9992
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
9993
|
|
|
|
|
|
|
_mm512_kandn (__mmask16 __A, __mmask16 __B) |
9994
|
|
|
|
|
|
|
{ |
9995
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); |
9996
|
|
|
|
|
|
|
} |
9997
|
|
|
|
|
|
|
|
9998
|
|
|
|
|
|
|
extern __inline __mmask16 |
9999
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10000
|
|
|
|
|
|
|
_mm512_kor (__mmask16 __A, __mmask16 __B) |
10001
|
|
|
|
|
|
|
{ |
10002
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); |
10003
|
|
|
|
|
|
|
} |
10004
|
|
|
|
|
|
|
|
10005
|
|
|
|
|
|
|
extern __inline int |
10006
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10007
|
|
|
|
|
|
|
_mm512_kortestz (__mmask16 __A, __mmask16 __B) |
10008
|
|
|
|
|
|
|
{ |
10009
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, |
10010
|
|
|
|
|
|
|
(__mmask16) __B); |
10011
|
|
|
|
|
|
|
} |
10012
|
|
|
|
|
|
|
|
10013
|
|
|
|
|
|
|
extern __inline int |
10014
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10015
|
|
|
|
|
|
|
_mm512_kortestc (__mmask16 __A, __mmask16 __B) |
10016
|
|
|
|
|
|
|
{ |
10017
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, |
10018
|
|
|
|
|
|
|
(__mmask16) __B); |
10019
|
|
|
|
|
|
|
} |
10020
|
|
|
|
|
|
|
|
10021
|
|
|
|
|
|
|
extern __inline __mmask16 |
10022
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10023
|
|
|
|
|
|
|
_mm512_kxnor (__mmask16 __A, __mmask16 __B) |
10024
|
|
|
|
|
|
|
{ |
10025
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); |
10026
|
|
|
|
|
|
|
} |
10027
|
|
|
|
|
|
|
|
10028
|
|
|
|
|
|
|
extern __inline __mmask16 |
10029
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10030
|
|
|
|
|
|
|
_mm512_kxor (__mmask16 __A, __mmask16 __B) |
10031
|
|
|
|
|
|
|
{ |
10032
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); |
10033
|
|
|
|
|
|
|
} |
10034
|
|
|
|
|
|
|
|
10035
|
|
|
|
|
|
|
extern __inline __mmask16 |
10036
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10037
|
|
|
|
|
|
|
_mm512_knot (__mmask16 __A) |
10038
|
|
|
|
|
|
|
{ |
10039
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); |
10040
|
|
|
|
|
|
|
} |
10041
|
|
|
|
|
|
|
|
10042
|
|
|
|
|
|
|
extern __inline __mmask16 |
10043
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10044
|
|
|
|
|
|
|
_mm512_kunpackb (__mmask16 __A, __mmask16 __B) |
10045
|
|
|
|
|
|
|
{ |
10046
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); |
10047
|
|
|
|
|
|
|
} |
10048
|
|
|
|
|
|
|
|
10049
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
10050
|
|
|
|
|
|
|
extern __inline __m512i |
10051
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10052
|
|
|
|
|
|
|
_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, |
10053
|
|
|
|
|
|
|
const int __imm) |
10054
|
|
|
|
|
|
|
{ |
10055
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, |
10056
|
|
|
|
|
|
|
(__v4si) __D, |
10057
|
|
|
|
|
|
|
__imm, |
10058
|
|
|
|
|
|
|
(__v16si) |
10059
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10060
|
|
|
|
|
|
|
__B); |
10061
|
|
|
|
|
|
|
} |
10062
|
|
|
|
|
|
|
|
10063
|
|
|
|
|
|
|
extern __inline __m512 |
10064
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10065
|
|
|
|
|
|
|
_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, |
10066
|
|
|
|
|
|
|
const int __imm) |
10067
|
|
|
|
|
|
|
{ |
10068
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, |
10069
|
|
|
|
|
|
|
(__v4sf) __D, |
10070
|
|
|
|
|
|
|
__imm, |
10071
|
|
|
|
|
|
|
(__v16sf) |
10072
|
|
|
|
|
|
|
_mm512_setzero_ps (), __B); |
10073
|
|
|
|
|
|
|
} |
10074
|
|
|
|
|
|
|
|
10075
|
|
|
|
|
|
|
extern __inline __m512i |
10076
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10077
|
|
|
|
|
|
|
_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, |
10078
|
|
|
|
|
|
|
__m128i __D, const int __imm) |
10079
|
|
|
|
|
|
|
{ |
10080
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, |
10081
|
|
|
|
|
|
|
(__v4si) __D, |
10082
|
|
|
|
|
|
|
__imm, |
10083
|
|
|
|
|
|
|
(__v16si) __A, |
10084
|
|
|
|
|
|
|
__B); |
10085
|
|
|
|
|
|
|
} |
10086
|
|
|
|
|
|
|
|
10087
|
|
|
|
|
|
|
extern __inline __m512 |
10088
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10089
|
|
|
|
|
|
|
_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, |
10090
|
|
|
|
|
|
|
__m128 __D, const int __imm) |
10091
|
|
|
|
|
|
|
{ |
10092
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, |
10093
|
|
|
|
|
|
|
(__v4sf) __D, |
10094
|
|
|
|
|
|
|
__imm, |
10095
|
|
|
|
|
|
|
(__v16sf) __A, __B); |
10096
|
|
|
|
|
|
|
} |
10097
|
|
|
|
|
|
|
#else |
10098
|
|
|
|
|
|
|
#define _mm512_maskz_insertf32x4(A, X, Y, C) \ |
10099
|
|
|
|
|
|
|
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ |
10100
|
|
|
|
|
|
|
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ |
10101
|
|
|
|
|
|
|
(__mmask8)(A))) |
10102
|
|
|
|
|
|
|
|
10103
|
|
|
|
|
|
|
#define _mm512_maskz_inserti32x4(A, X, Y, C) \ |
10104
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ |
10105
|
|
|
|
|
|
|
(__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ |
10106
|
|
|
|
|
|
|
(__mmask8)(A))) |
10107
|
|
|
|
|
|
|
|
10108
|
|
|
|
|
|
|
#define _mm512_mask_insertf32x4(A, B, X, Y, C) \ |
10109
|
|
|
|
|
|
|
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ |
10110
|
|
|
|
|
|
|
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ |
10111
|
|
|
|
|
|
|
(__mmask8)(B))) |
10112
|
|
|
|
|
|
|
|
10113
|
|
|
|
|
|
|
#define _mm512_mask_inserti32x4(A, B, X, Y, C) \ |
10114
|
|
|
|
|
|
|
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ |
10115
|
|
|
|
|
|
|
(__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ |
10116
|
|
|
|
|
|
|
(__mmask8)(B))) |
10117
|
|
|
|
|
|
|
#endif |
10118
|
|
|
|
|
|
|
|
10119
|
|
|
|
|
|
|
extern __inline __m512i |
10120
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10121
|
|
|
|
|
|
|
_mm512_max_epi64 (__m512i __A, __m512i __B) |
10122
|
|
|
|
|
|
|
{ |
10123
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, |
10124
|
|
|
|
|
|
|
(__v8di) __B, |
10125
|
|
|
|
|
|
|
(__v8di) |
10126
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10127
|
|
|
|
|
|
|
(__mmask8) -1); |
10128
|
|
|
|
|
|
|
} |
10129
|
|
|
|
|
|
|
|
10130
|
|
|
|
|
|
|
extern __inline __m512i |
10131
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10132
|
|
|
|
|
|
|
_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) |
10133
|
|
|
|
|
|
|
{ |
10134
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, |
10135
|
|
|
|
|
|
|
(__v8di) __B, |
10136
|
|
|
|
|
|
|
(__v8di) |
10137
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10138
|
|
|
|
|
|
|
__M); |
10139
|
|
|
|
|
|
|
} |
10140
|
|
|
|
|
|
|
|
10141
|
|
|
|
|
|
|
extern __inline __m512i |
10142
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10143
|
|
|
|
|
|
|
_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
10144
|
|
|
|
|
|
|
{ |
10145
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, |
10146
|
|
|
|
|
|
|
(__v8di) __B, |
10147
|
|
|
|
|
|
|
(__v8di) __W, __M); |
10148
|
|
|
|
|
|
|
} |
10149
|
|
|
|
|
|
|
|
10150
|
|
|
|
|
|
|
extern __inline __m512i |
10151
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10152
|
|
|
|
|
|
|
_mm512_min_epi64 (__m512i __A, __m512i __B) |
10153
|
|
|
|
|
|
|
{ |
10154
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, |
10155
|
|
|
|
|
|
|
(__v8di) __B, |
10156
|
|
|
|
|
|
|
(__v8di) |
10157
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10158
|
|
|
|
|
|
|
(__mmask8) -1); |
10159
|
|
|
|
|
|
|
} |
10160
|
|
|
|
|
|
|
|
10161
|
|
|
|
|
|
|
extern __inline __m512i |
10162
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10163
|
|
|
|
|
|
|
_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
10164
|
|
|
|
|
|
|
{ |
10165
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, |
10166
|
|
|
|
|
|
|
(__v8di) __B, |
10167
|
|
|
|
|
|
|
(__v8di) __W, __M); |
10168
|
|
|
|
|
|
|
} |
10169
|
|
|
|
|
|
|
|
10170
|
|
|
|
|
|
|
extern __inline __m512i |
10171
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10172
|
|
|
|
|
|
|
_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) |
10173
|
|
|
|
|
|
|
{ |
10174
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, |
10175
|
|
|
|
|
|
|
(__v8di) __B, |
10176
|
|
|
|
|
|
|
(__v8di) |
10177
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10178
|
|
|
|
|
|
|
__M); |
10179
|
|
|
|
|
|
|
} |
10180
|
|
|
|
|
|
|
|
10181
|
|
|
|
|
|
|
extern __inline __m512i |
10182
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10183
|
|
|
|
|
|
|
_mm512_max_epu64 (__m512i __A, __m512i __B) |
10184
|
|
|
|
|
|
|
{ |
10185
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, |
10186
|
|
|
|
|
|
|
(__v8di) __B, |
10187
|
|
|
|
|
|
|
(__v8di) |
10188
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10189
|
|
|
|
|
|
|
(__mmask8) -1); |
10190
|
|
|
|
|
|
|
} |
10191
|
|
|
|
|
|
|
|
10192
|
|
|
|
|
|
|
extern __inline __m512i |
10193
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10194
|
|
|
|
|
|
|
_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) |
10195
|
|
|
|
|
|
|
{ |
10196
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, |
10197
|
|
|
|
|
|
|
(__v8di) __B, |
10198
|
|
|
|
|
|
|
(__v8di) |
10199
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10200
|
|
|
|
|
|
|
__M); |
10201
|
|
|
|
|
|
|
} |
10202
|
|
|
|
|
|
|
|
10203
|
|
|
|
|
|
|
extern __inline __m512i |
10204
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10205
|
|
|
|
|
|
|
_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
10206
|
|
|
|
|
|
|
{ |
10207
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, |
10208
|
|
|
|
|
|
|
(__v8di) __B, |
10209
|
|
|
|
|
|
|
(__v8di) __W, __M); |
10210
|
|
|
|
|
|
|
} |
10211
|
|
|
|
|
|
|
|
10212
|
|
|
|
|
|
|
extern __inline __m512i |
10213
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10214
|
|
|
|
|
|
|
_mm512_min_epu64 (__m512i __A, __m512i __B) |
10215
|
|
|
|
|
|
|
{ |
10216
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, |
10217
|
|
|
|
|
|
|
(__v8di) __B, |
10218
|
|
|
|
|
|
|
(__v8di) |
10219
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10220
|
|
|
|
|
|
|
(__mmask8) -1); |
10221
|
|
|
|
|
|
|
} |
10222
|
|
|
|
|
|
|
|
10223
|
|
|
|
|
|
|
extern __inline __m512i |
10224
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10225
|
|
|
|
|
|
|
_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
10226
|
|
|
|
|
|
|
{ |
10227
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, |
10228
|
|
|
|
|
|
|
(__v8di) __B, |
10229
|
|
|
|
|
|
|
(__v8di) __W, __M); |
10230
|
|
|
|
|
|
|
} |
10231
|
|
|
|
|
|
|
|
10232
|
|
|
|
|
|
|
extern __inline __m512i |
10233
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10234
|
|
|
|
|
|
|
_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) |
10235
|
|
|
|
|
|
|
{ |
10236
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, |
10237
|
|
|
|
|
|
|
(__v8di) __B, |
10238
|
|
|
|
|
|
|
(__v8di) |
10239
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10240
|
|
|
|
|
|
|
__M); |
10241
|
|
|
|
|
|
|
} |
10242
|
|
|
|
|
|
|
|
10243
|
|
|
|
|
|
|
extern __inline __m512i |
10244
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10245
|
|
|
|
|
|
|
_mm512_max_epi32 (__m512i __A, __m512i __B) |
10246
|
|
|
|
|
|
|
{ |
10247
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, |
10248
|
|
|
|
|
|
|
(__v16si) __B, |
10249
|
|
|
|
|
|
|
(__v16si) |
10250
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10251
|
|
|
|
|
|
|
(__mmask16) -1); |
10252
|
|
|
|
|
|
|
} |
10253
|
|
|
|
|
|
|
|
10254
|
|
|
|
|
|
|
extern __inline __m512i |
10255
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10256
|
|
|
|
|
|
|
_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
10257
|
|
|
|
|
|
|
{ |
10258
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, |
10259
|
|
|
|
|
|
|
(__v16si) __B, |
10260
|
|
|
|
|
|
|
(__v16si) |
10261
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10262
|
|
|
|
|
|
|
__M); |
10263
|
|
|
|
|
|
|
} |
10264
|
|
|
|
|
|
|
|
10265
|
|
|
|
|
|
|
extern __inline __m512i |
10266
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10267
|
|
|
|
|
|
|
_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
10268
|
|
|
|
|
|
|
{ |
10269
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, |
10270
|
|
|
|
|
|
|
(__v16si) __B, |
10271
|
|
|
|
|
|
|
(__v16si) __W, __M); |
10272
|
|
|
|
|
|
|
} |
10273
|
|
|
|
|
|
|
|
10274
|
|
|
|
|
|
|
extern __inline __m512i |
10275
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10276
|
|
|
|
|
|
|
_mm512_min_epi32 (__m512i __A, __m512i __B) |
10277
|
|
|
|
|
|
|
{ |
10278
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, |
10279
|
|
|
|
|
|
|
(__v16si) __B, |
10280
|
|
|
|
|
|
|
(__v16si) |
10281
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10282
|
|
|
|
|
|
|
(__mmask16) -1); |
10283
|
|
|
|
|
|
|
} |
10284
|
|
|
|
|
|
|
|
10285
|
|
|
|
|
|
|
extern __inline __m512i |
10286
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10287
|
|
|
|
|
|
|
_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
10288
|
|
|
|
|
|
|
{ |
10289
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, |
10290
|
|
|
|
|
|
|
(__v16si) __B, |
10291
|
|
|
|
|
|
|
(__v16si) |
10292
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10293
|
|
|
|
|
|
|
__M); |
10294
|
|
|
|
|
|
|
} |
10295
|
|
|
|
|
|
|
|
10296
|
|
|
|
|
|
|
extern __inline __m512i |
10297
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10298
|
|
|
|
|
|
|
_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
10299
|
|
|
|
|
|
|
{ |
10300
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, |
10301
|
|
|
|
|
|
|
(__v16si) __B, |
10302
|
|
|
|
|
|
|
(__v16si) __W, __M); |
10303
|
|
|
|
|
|
|
} |
10304
|
|
|
|
|
|
|
|
10305
|
|
|
|
|
|
|
extern __inline __m512i |
10306
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10307
|
|
|
|
|
|
|
_mm512_max_epu32 (__m512i __A, __m512i __B) |
10308
|
|
|
|
|
|
|
{ |
10309
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, |
10310
|
|
|
|
|
|
|
(__v16si) __B, |
10311
|
|
|
|
|
|
|
(__v16si) |
10312
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10313
|
|
|
|
|
|
|
(__mmask16) -1); |
10314
|
|
|
|
|
|
|
} |
10315
|
|
|
|
|
|
|
|
10316
|
|
|
|
|
|
|
extern __inline __m512i |
10317
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10318
|
|
|
|
|
|
|
_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) |
10319
|
|
|
|
|
|
|
{ |
10320
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, |
10321
|
|
|
|
|
|
|
(__v16si) __B, |
10322
|
|
|
|
|
|
|
(__v16si) |
10323
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10324
|
|
|
|
|
|
|
__M); |
10325
|
|
|
|
|
|
|
} |
10326
|
|
|
|
|
|
|
|
10327
|
|
|
|
|
|
|
extern __inline __m512i |
10328
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10329
|
|
|
|
|
|
|
_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
10330
|
|
|
|
|
|
|
{ |
10331
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, |
10332
|
|
|
|
|
|
|
(__v16si) __B, |
10333
|
|
|
|
|
|
|
(__v16si) __W, __M); |
10334
|
|
|
|
|
|
|
} |
10335
|
|
|
|
|
|
|
|
10336
|
|
|
|
|
|
|
extern __inline __m512i |
10337
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10338
|
|
|
|
|
|
|
_mm512_min_epu32 (__m512i __A, __m512i __B) |
10339
|
|
|
|
|
|
|
{ |
10340
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, |
10341
|
|
|
|
|
|
|
(__v16si) __B, |
10342
|
|
|
|
|
|
|
(__v16si) |
10343
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
10344
|
|
|
|
|
|
|
(__mmask16) -1); |
10345
|
|
|
|
|
|
|
} |
10346
|
|
|
|
|
|
|
|
10347
|
|
|
|
|
|
|
extern __inline __m512i |
10348
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10349
|
|
|
|
|
|
|
_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) |
10350
|
|
|
|
|
|
|
{ |
10351
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, |
10352
|
|
|
|
|
|
|
(__v16si) __B, |
10353
|
|
|
|
|
|
|
(__v16si) |
10354
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
10355
|
|
|
|
|
|
|
__M); |
10356
|
|
|
|
|
|
|
} |
10357
|
|
|
|
|
|
|
|
10358
|
|
|
|
|
|
|
extern __inline __m512i |
10359
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10360
|
|
|
|
|
|
|
_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
10361
|
|
|
|
|
|
|
{ |
10362
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, |
10363
|
|
|
|
|
|
|
(__v16si) __B, |
10364
|
|
|
|
|
|
|
(__v16si) __W, __M); |
10365
|
|
|
|
|
|
|
} |
10366
|
|
|
|
|
|
|
|
10367
|
|
|
|
|
|
|
extern __inline __m512 |
10368
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10369
|
|
|
|
|
|
|
_mm512_unpacklo_ps (__m512 __A, __m512 __B) |
10370
|
|
|
|
|
|
|
{ |
10371
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, |
10372
|
|
|
|
|
|
|
(__v16sf) __B, |
10373
|
|
|
|
|
|
|
(__v16sf) |
10374
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
10375
|
|
|
|
|
|
|
(__mmask16) -1); |
10376
|
|
|
|
|
|
|
} |
10377
|
|
|
|
|
|
|
|
10378
|
|
|
|
|
|
|
extern __inline __m512 |
10379
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10380
|
|
|
|
|
|
|
_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10381
|
|
|
|
|
|
|
{ |
10382
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, |
10383
|
|
|
|
|
|
|
(__v16sf) __B, |
10384
|
|
|
|
|
|
|
(__v16sf) __W, |
10385
|
|
|
|
|
|
|
(__mmask16) __U); |
10386
|
|
|
|
|
|
|
} |
10387
|
|
|
|
|
|
|
|
10388
|
|
|
|
|
|
|
extern __inline __m512 |
10389
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10390
|
|
|
|
|
|
|
_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10391
|
|
|
|
|
|
|
{ |
10392
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, |
10393
|
|
|
|
|
|
|
(__v16sf) __B, |
10394
|
|
|
|
|
|
|
(__v16sf) |
10395
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10396
|
|
|
|
|
|
|
(__mmask16) __U); |
10397
|
|
|
|
|
|
|
} |
10398
|
|
|
|
|
|
|
|
10399
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
10400
|
|
|
|
|
|
|
extern __inline __m128d |
10401
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10402
|
|
|
|
|
|
|
_mm_max_round_sd (__m128d __A, __m128d __B, const int __R) |
10403
|
|
|
|
|
|
|
{ |
10404
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, |
10405
|
|
|
|
|
|
|
(__v2df) __B, |
10406
|
|
|
|
|
|
|
__R); |
10407
|
|
|
|
|
|
|
} |
10408
|
|
|
|
|
|
|
|
10409
|
|
|
|
|
|
|
extern __inline __m128 |
10410
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10411
|
|
|
|
|
|
|
_mm_max_round_ss (__m128 __A, __m128 __B, const int __R) |
10412
|
|
|
|
|
|
|
{ |
10413
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, |
10414
|
|
|
|
|
|
|
(__v4sf) __B, |
10415
|
|
|
|
|
|
|
__R); |
10416
|
|
|
|
|
|
|
} |
10417
|
|
|
|
|
|
|
|
10418
|
|
|
|
|
|
|
extern __inline __m128d |
10419
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10420
|
|
|
|
|
|
|
_mm_min_round_sd (__m128d __A, __m128d __B, const int __R) |
10421
|
|
|
|
|
|
|
{ |
10422
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, |
10423
|
|
|
|
|
|
|
(__v2df) __B, |
10424
|
|
|
|
|
|
|
__R); |
10425
|
|
|
|
|
|
|
} |
10426
|
|
|
|
|
|
|
|
10427
|
|
|
|
|
|
|
extern __inline __m128 |
10428
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10429
|
|
|
|
|
|
|
_mm_min_round_ss (__m128 __A, __m128 __B, const int __R) |
10430
|
|
|
|
|
|
|
{ |
10431
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, |
10432
|
|
|
|
|
|
|
(__v4sf) __B, |
10433
|
|
|
|
|
|
|
__R); |
10434
|
|
|
|
|
|
|
} |
10435
|
|
|
|
|
|
|
|
10436
|
|
|
|
|
|
|
#else |
10437
|
|
|
|
|
|
|
#define _mm_max_round_sd(A, B, C) \ |
10438
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_addsd_round(A, B, C) |
10439
|
|
|
|
|
|
|
|
10440
|
|
|
|
|
|
|
#define _mm_max_round_ss(A, B, C) \ |
10441
|
|
|
|
|
|
|
(__m128)__builtin_ia32_addss_round(A, B, C) |
10442
|
|
|
|
|
|
|
|
10443
|
|
|
|
|
|
|
#define _mm_min_round_sd(A, B, C) \ |
10444
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_subsd_round(A, B, C) |
10445
|
|
|
|
|
|
|
|
10446
|
|
|
|
|
|
|
#define _mm_min_round_ss(A, B, C) \ |
10447
|
|
|
|
|
|
|
(__m128)__builtin_ia32_subss_round(A, B, C) |
10448
|
|
|
|
|
|
|
#endif |
10449
|
|
|
|
|
|
|
|
10450
|
|
|
|
|
|
|
extern __inline __m512d |
10451
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10452
|
|
|
|
|
|
|
_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) |
10453
|
|
|
|
|
|
|
{ |
10454
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, |
10455
|
|
|
|
|
|
|
(__v8df) __W, |
10456
|
|
|
|
|
|
|
(__mmask8) __U); |
10457
|
|
|
|
|
|
|
} |
10458
|
|
|
|
|
|
|
|
10459
|
|
|
|
|
|
|
extern __inline __m512 |
10460
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10461
|
|
|
|
|
|
|
_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) |
10462
|
|
|
|
|
|
|
{ |
10463
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, |
10464
|
|
|
|
|
|
|
(__v16sf) __W, |
10465
|
|
|
|
|
|
|
(__mmask16) __U); |
10466
|
|
|
|
|
|
|
} |
10467
|
|
|
|
|
|
|
|
10468
|
|
|
|
|
|
|
extern __inline __m512i |
10469
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10470
|
|
|
|
|
|
|
_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) |
10471
|
|
|
|
|
|
|
{ |
10472
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, |
10473
|
|
|
|
|
|
|
(__v8di) __W, |
10474
|
|
|
|
|
|
|
(__mmask8) __U); |
10475
|
|
|
|
|
|
|
} |
10476
|
|
|
|
|
|
|
|
10477
|
|
|
|
|
|
|
extern __inline __m512i |
10478
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10479
|
|
|
|
|
|
|
_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) |
10480
|
|
|
|
|
|
|
{ |
10481
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, |
10482
|
|
|
|
|
|
|
(__v16si) __W, |
10483
|
|
|
|
|
|
|
(__mmask16) __U); |
10484
|
|
|
|
|
|
|
} |
10485
|
|
|
|
|
|
|
|
10486
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
10487
|
|
|
|
|
|
|
extern __inline __m128d |
10488
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10489
|
|
|
|
|
|
|
_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) |
10490
|
|
|
|
|
|
|
{ |
10491
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, |
10492
|
|
|
|
|
|
|
(__v2df) __A, |
10493
|
|
|
|
|
|
|
(__v2df) __B, |
10494
|
|
|
|
|
|
|
__R); |
10495
|
|
|
|
|
|
|
} |
10496
|
|
|
|
|
|
|
|
10497
|
|
|
|
|
|
|
extern __inline __m128 |
10498
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10499
|
|
|
|
|
|
|
_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) |
10500
|
|
|
|
|
|
|
{ |
10501
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, |
10502
|
|
|
|
|
|
|
(__v4sf) __A, |
10503
|
|
|
|
|
|
|
(__v4sf) __B, |
10504
|
|
|
|
|
|
|
__R); |
10505
|
|
|
|
|
|
|
} |
10506
|
|
|
|
|
|
|
|
10507
|
|
|
|
|
|
|
extern __inline __m128d |
10508
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10509
|
|
|
|
|
|
|
_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) |
10510
|
|
|
|
|
|
|
{ |
10511
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, |
10512
|
|
|
|
|
|
|
(__v2df) __A, |
10513
|
|
|
|
|
|
|
-(__v2df) __B, |
10514
|
|
|
|
|
|
|
__R); |
10515
|
|
|
|
|
|
|
} |
10516
|
|
|
|
|
|
|
|
10517
|
|
|
|
|
|
|
extern __inline __m128 |
10518
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10519
|
|
|
|
|
|
|
_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) |
10520
|
|
|
|
|
|
|
{ |
10521
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, |
10522
|
|
|
|
|
|
|
(__v4sf) __A, |
10523
|
|
|
|
|
|
|
-(__v4sf) __B, |
10524
|
|
|
|
|
|
|
__R); |
10525
|
|
|
|
|
|
|
} |
10526
|
|
|
|
|
|
|
|
10527
|
|
|
|
|
|
|
extern __inline __m128d |
10528
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10529
|
|
|
|
|
|
|
_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) |
10530
|
|
|
|
|
|
|
{ |
10531
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, |
10532
|
|
|
|
|
|
|
-(__v2df) __A, |
10533
|
|
|
|
|
|
|
(__v2df) __B, |
10534
|
|
|
|
|
|
|
__R); |
10535
|
|
|
|
|
|
|
} |
10536
|
|
|
|
|
|
|
|
10537
|
|
|
|
|
|
|
extern __inline __m128 |
10538
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10539
|
|
|
|
|
|
|
_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) |
10540
|
|
|
|
|
|
|
{ |
10541
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, |
10542
|
|
|
|
|
|
|
-(__v4sf) __A, |
10543
|
|
|
|
|
|
|
(__v4sf) __B, |
10544
|
|
|
|
|
|
|
__R); |
10545
|
|
|
|
|
|
|
} |
10546
|
|
|
|
|
|
|
|
10547
|
|
|
|
|
|
|
extern __inline __m128d |
10548
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10549
|
|
|
|
|
|
|
_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) |
10550
|
|
|
|
|
|
|
{ |
10551
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, |
10552
|
|
|
|
|
|
|
-(__v2df) __A, |
10553
|
|
|
|
|
|
|
-(__v2df) __B, |
10554
|
|
|
|
|
|
|
__R); |
10555
|
|
|
|
|
|
|
} |
10556
|
|
|
|
|
|
|
|
10557
|
|
|
|
|
|
|
extern __inline __m128 |
10558
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10559
|
|
|
|
|
|
|
_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) |
10560
|
|
|
|
|
|
|
{ |
10561
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, |
10562
|
|
|
|
|
|
|
-(__v4sf) __A, |
10563
|
|
|
|
|
|
|
-(__v4sf) __B, |
10564
|
|
|
|
|
|
|
__R); |
10565
|
|
|
|
|
|
|
} |
10566
|
|
|
|
|
|
|
#else |
10567
|
|
|
|
|
|
|
#define _mm_fmadd_round_sd(A, B, C, R) \ |
10568
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R) |
10569
|
|
|
|
|
|
|
|
10570
|
|
|
|
|
|
|
#define _mm_fmadd_round_ss(A, B, C, R) \ |
10571
|
|
|
|
|
|
|
(__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R) |
10572
|
|
|
|
|
|
|
|
10573
|
|
|
|
|
|
|
#define _mm_fmsub_round_sd(A, B, C, R) \ |
10574
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R) |
10575
|
|
|
|
|
|
|
|
10576
|
|
|
|
|
|
|
#define _mm_fmsub_round_ss(A, B, C, R) \ |
10577
|
|
|
|
|
|
|
(__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R) |
10578
|
|
|
|
|
|
|
|
10579
|
|
|
|
|
|
|
#define _mm_fnmadd_round_sd(A, B, C, R) \ |
10580
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R) |
10581
|
|
|
|
|
|
|
|
10582
|
|
|
|
|
|
|
#define _mm_fnmadd_round_ss(A, B, C, R) \ |
10583
|
|
|
|
|
|
|
(__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R) |
10584
|
|
|
|
|
|
|
|
10585
|
|
|
|
|
|
|
#define _mm_fnmsub_round_sd(A, B, C, R) \ |
10586
|
|
|
|
|
|
|
(__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R) |
10587
|
|
|
|
|
|
|
|
10588
|
|
|
|
|
|
|
#define _mm_fnmsub_round_ss(A, B, C, R) \ |
10589
|
|
|
|
|
|
|
(__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) |
10590
|
|
|
|
|
|
|
#endif |
10591
|
|
|
|
|
|
|
|
10592
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
10593
|
|
|
|
|
|
|
extern __inline int |
10594
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10595
|
|
|
|
|
|
|
_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) |
10596
|
|
|
|
|
|
|
{ |
10597
|
|
|
|
|
|
|
return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); |
10598
|
|
|
|
|
|
|
} |
10599
|
|
|
|
|
|
|
|
10600
|
|
|
|
|
|
|
extern __inline int |
10601
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10602
|
|
|
|
|
|
|
_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) |
10603
|
|
|
|
|
|
|
{ |
10604
|
|
|
|
|
|
|
return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); |
10605
|
|
|
|
|
|
|
} |
10606
|
|
|
|
|
|
|
#else |
10607
|
|
|
|
|
|
|
#define _mm_comi_round_ss(A, B, C, D)\ |
10608
|
|
|
|
|
|
|
__builtin_ia32_vcomiss(A, B, C, D) |
10609
|
|
|
|
|
|
|
#define _mm_comi_round_sd(A, B, C, D)\ |
10610
|
|
|
|
|
|
|
__builtin_ia32_vcomisd(A, B, C, D) |
10611
|
|
|
|
|
|
|
#endif |
10612
|
|
|
|
|
|
|
|
10613
|
|
|
|
|
|
|
extern __inline __m512d |
10614
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10615
|
|
|
|
|
|
|
_mm512_sqrt_pd (__m512d __A) |
10616
|
|
|
|
|
|
|
{ |
10617
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
10618
|
|
|
|
|
|
|
(__v8df) |
10619
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
10620
|
|
|
|
|
|
|
(__mmask8) -1, |
10621
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10622
|
|
|
|
|
|
|
} |
10623
|
|
|
|
|
|
|
|
10624
|
|
|
|
|
|
|
extern __inline __m512d |
10625
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10626
|
|
|
|
|
|
|
_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) |
10627
|
|
|
|
|
|
|
{ |
10628
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
10629
|
|
|
|
|
|
|
(__v8df) __W, |
10630
|
|
|
|
|
|
|
(__mmask8) __U, |
10631
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10632
|
|
|
|
|
|
|
} |
10633
|
|
|
|
|
|
|
|
10634
|
|
|
|
|
|
|
extern __inline __m512d |
10635
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10636
|
|
|
|
|
|
|
_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) |
10637
|
|
|
|
|
|
|
{ |
10638
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
10639
|
|
|
|
|
|
|
(__v8df) |
10640
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10641
|
|
|
|
|
|
|
(__mmask8) __U, |
10642
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10643
|
|
|
|
|
|
|
} |
10644
|
|
|
|
|
|
|
|
10645
|
|
|
|
|
|
|
extern __inline __m512 |
10646
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10647
|
|
|
|
|
|
|
_mm512_sqrt_ps (__m512 __A) |
10648
|
|
|
|
|
|
|
{ |
10649
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
10650
|
|
|
|
|
|
|
(__v16sf) |
10651
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
10652
|
|
|
|
|
|
|
(__mmask16) -1, |
10653
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10654
|
|
|
|
|
|
|
} |
10655
|
|
|
|
|
|
|
|
10656
|
|
|
|
|
|
|
extern __inline __m512 |
10657
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10658
|
|
|
|
|
|
|
_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) |
10659
|
|
|
|
|
|
|
{ |
10660
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
10661
|
|
|
|
|
|
|
(__v16sf) __W, |
10662
|
|
|
|
|
|
|
(__mmask16) __U, |
10663
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10664
|
|
|
|
|
|
|
} |
10665
|
|
|
|
|
|
|
|
10666
|
|
|
|
|
|
|
extern __inline __m512 |
10667
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10668
|
|
|
|
|
|
|
_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) |
10669
|
|
|
|
|
|
|
{ |
10670
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
10671
|
|
|
|
|
|
|
(__v16sf) |
10672
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10673
|
|
|
|
|
|
|
(__mmask16) __U, |
10674
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10675
|
|
|
|
|
|
|
} |
10676
|
|
|
|
|
|
|
|
10677
|
|
|
|
|
|
|
extern __inline __m512d |
10678
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10679
|
|
|
|
|
|
|
_mm512_add_pd (__m512d __A, __m512d __B) |
10680
|
|
|
|
|
|
|
{ |
10681
|
|
|
|
|
|
|
return (__m512d) ((__v8df)__A + (__v8df)__B); |
10682
|
|
|
|
|
|
|
} |
10683
|
|
|
|
|
|
|
|
10684
|
|
|
|
|
|
|
extern __inline __m512d |
10685
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10686
|
|
|
|
|
|
|
_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
10687
|
|
|
|
|
|
|
{ |
10688
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
10689
|
|
|
|
|
|
|
(__v8df) __B, |
10690
|
|
|
|
|
|
|
(__v8df) __W, |
10691
|
|
|
|
|
|
|
(__mmask8) __U, |
10692
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10693
|
|
|
|
|
|
|
} |
10694
|
|
|
|
|
|
|
|
10695
|
|
|
|
|
|
|
extern __inline __m512d |
10696
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10697
|
|
|
|
|
|
|
_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) |
10698
|
|
|
|
|
|
|
{ |
10699
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
10700
|
|
|
|
|
|
|
(__v8df) __B, |
10701
|
|
|
|
|
|
|
(__v8df) |
10702
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10703
|
|
|
|
|
|
|
(__mmask8) __U, |
10704
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10705
|
|
|
|
|
|
|
} |
10706
|
|
|
|
|
|
|
|
10707
|
|
|
|
|
|
|
extern __inline __m512 |
10708
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10709
|
|
|
|
|
|
|
_mm512_add_ps (__m512 __A, __m512 __B) |
10710
|
|
|
|
|
|
|
{ |
10711
|
|
|
|
|
|
|
return (__m512) ((__v16sf)__A + (__v16sf)__B); |
10712
|
|
|
|
|
|
|
} |
10713
|
|
|
|
|
|
|
|
10714
|
|
|
|
|
|
|
extern __inline __m512 |
10715
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10716
|
|
|
|
|
|
|
_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10717
|
|
|
|
|
|
|
{ |
10718
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
10719
|
|
|
|
|
|
|
(__v16sf) __B, |
10720
|
|
|
|
|
|
|
(__v16sf) __W, |
10721
|
|
|
|
|
|
|
(__mmask16) __U, |
10722
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10723
|
|
|
|
|
|
|
} |
10724
|
|
|
|
|
|
|
|
10725
|
|
|
|
|
|
|
extern __inline __m512 |
10726
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10727
|
|
|
|
|
|
|
_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10728
|
|
|
|
|
|
|
{ |
10729
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
10730
|
|
|
|
|
|
|
(__v16sf) __B, |
10731
|
|
|
|
|
|
|
(__v16sf) |
10732
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10733
|
|
|
|
|
|
|
(__mmask16) __U, |
10734
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10735
|
|
|
|
|
|
|
} |
10736
|
|
|
|
|
|
|
|
10737
|
|
|
|
|
|
|
extern __inline __m512d |
10738
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10739
|
|
|
|
|
|
|
_mm512_sub_pd (__m512d __A, __m512d __B) |
10740
|
|
|
|
|
|
|
{ |
10741
|
|
|
|
|
|
|
return (__m512d) ((__v8df)__A - (__v8df)__B); |
10742
|
|
|
|
|
|
|
} |
10743
|
|
|
|
|
|
|
|
10744
|
|
|
|
|
|
|
extern __inline __m512d |
10745
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10746
|
|
|
|
|
|
|
_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
10747
|
|
|
|
|
|
|
{ |
10748
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
10749
|
|
|
|
|
|
|
(__v8df) __B, |
10750
|
|
|
|
|
|
|
(__v8df) __W, |
10751
|
|
|
|
|
|
|
(__mmask8) __U, |
10752
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10753
|
|
|
|
|
|
|
} |
10754
|
|
|
|
|
|
|
|
10755
|
|
|
|
|
|
|
extern __inline __m512d |
10756
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10757
|
|
|
|
|
|
|
_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) |
10758
|
|
|
|
|
|
|
{ |
10759
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
10760
|
|
|
|
|
|
|
(__v8df) __B, |
10761
|
|
|
|
|
|
|
(__v8df) |
10762
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10763
|
|
|
|
|
|
|
(__mmask8) __U, |
10764
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10765
|
|
|
|
|
|
|
} |
10766
|
|
|
|
|
|
|
|
10767
|
|
|
|
|
|
|
extern __inline __m512 |
10768
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10769
|
|
|
|
|
|
|
_mm512_sub_ps (__m512 __A, __m512 __B) |
10770
|
|
|
|
|
|
|
{ |
10771
|
|
|
|
|
|
|
return (__m512) ((__v16sf)__A - (__v16sf)__B); |
10772
|
|
|
|
|
|
|
} |
10773
|
|
|
|
|
|
|
|
10774
|
|
|
|
|
|
|
extern __inline __m512 |
10775
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10776
|
|
|
|
|
|
|
_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10777
|
|
|
|
|
|
|
{ |
10778
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
10779
|
|
|
|
|
|
|
(__v16sf) __B, |
10780
|
|
|
|
|
|
|
(__v16sf) __W, |
10781
|
|
|
|
|
|
|
(__mmask16) __U, |
10782
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10783
|
|
|
|
|
|
|
} |
10784
|
|
|
|
|
|
|
|
10785
|
|
|
|
|
|
|
extern __inline __m512 |
10786
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10787
|
|
|
|
|
|
|
_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10788
|
|
|
|
|
|
|
{ |
10789
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
10790
|
|
|
|
|
|
|
(__v16sf) __B, |
10791
|
|
|
|
|
|
|
(__v16sf) |
10792
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10793
|
|
|
|
|
|
|
(__mmask16) __U, |
10794
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10795
|
|
|
|
|
|
|
} |
10796
|
|
|
|
|
|
|
|
10797
|
|
|
|
|
|
|
extern __inline __m512d |
10798
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10799
|
|
|
|
|
|
|
_mm512_mul_pd (__m512d __A, __m512d __B) |
10800
|
|
|
|
|
|
|
{ |
10801
|
|
|
|
|
|
|
return (__m512d) ((__v8df)__A * (__v8df)__B); |
10802
|
|
|
|
|
|
|
} |
10803
|
|
|
|
|
|
|
|
10804
|
|
|
|
|
|
|
extern __inline __m512d |
10805
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10806
|
|
|
|
|
|
|
_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
10807
|
|
|
|
|
|
|
{ |
10808
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
10809
|
|
|
|
|
|
|
(__v8df) __B, |
10810
|
|
|
|
|
|
|
(__v8df) __W, |
10811
|
|
|
|
|
|
|
(__mmask8) __U, |
10812
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10813
|
|
|
|
|
|
|
} |
10814
|
|
|
|
|
|
|
|
10815
|
|
|
|
|
|
|
extern __inline __m512d |
10816
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10817
|
|
|
|
|
|
|
_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) |
10818
|
|
|
|
|
|
|
{ |
10819
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
10820
|
|
|
|
|
|
|
(__v8df) __B, |
10821
|
|
|
|
|
|
|
(__v8df) |
10822
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10823
|
|
|
|
|
|
|
(__mmask8) __U, |
10824
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10825
|
|
|
|
|
|
|
} |
10826
|
|
|
|
|
|
|
|
10827
|
|
|
|
|
|
|
extern __inline __m512 |
10828
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10829
|
|
|
|
|
|
|
_mm512_mul_ps (__m512 __A, __m512 __B) |
10830
|
|
|
|
|
|
|
{ |
10831
|
|
|
|
|
|
|
return (__m512) ((__v16sf)__A * (__v16sf)__B); |
10832
|
|
|
|
|
|
|
} |
10833
|
|
|
|
|
|
|
|
10834
|
|
|
|
|
|
|
extern __inline __m512 |
10835
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10836
|
|
|
|
|
|
|
_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10837
|
|
|
|
|
|
|
{ |
10838
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
10839
|
|
|
|
|
|
|
(__v16sf) __B, |
10840
|
|
|
|
|
|
|
(__v16sf) __W, |
10841
|
|
|
|
|
|
|
(__mmask16) __U, |
10842
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10843
|
|
|
|
|
|
|
} |
10844
|
|
|
|
|
|
|
|
10845
|
|
|
|
|
|
|
extern __inline __m512 |
10846
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10847
|
|
|
|
|
|
|
_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10848
|
|
|
|
|
|
|
{ |
10849
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
10850
|
|
|
|
|
|
|
(__v16sf) __B, |
10851
|
|
|
|
|
|
|
(__v16sf) |
10852
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10853
|
|
|
|
|
|
|
(__mmask16) __U, |
10854
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10855
|
|
|
|
|
|
|
} |
10856
|
|
|
|
|
|
|
|
10857
|
|
|
|
|
|
|
extern __inline __m512d |
10858
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10859
|
|
|
|
|
|
|
_mm512_div_pd (__m512d __M, __m512d __V) |
10860
|
|
|
|
|
|
|
{ |
10861
|
|
|
|
|
|
|
return (__m512d) ((__v8df)__M / (__v8df)__V); |
10862
|
|
|
|
|
|
|
} |
10863
|
|
|
|
|
|
|
|
10864
|
|
|
|
|
|
|
extern __inline __m512d |
10865
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10866
|
|
|
|
|
|
|
_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) |
10867
|
|
|
|
|
|
|
{ |
10868
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
10869
|
|
|
|
|
|
|
(__v8df) __V, |
10870
|
|
|
|
|
|
|
(__v8df) __W, |
10871
|
|
|
|
|
|
|
(__mmask8) __U, |
10872
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10873
|
|
|
|
|
|
|
} |
10874
|
|
|
|
|
|
|
|
10875
|
|
|
|
|
|
|
extern __inline __m512d |
10876
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10877
|
|
|
|
|
|
|
_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) |
10878
|
|
|
|
|
|
|
{ |
10879
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
10880
|
|
|
|
|
|
|
(__v8df) __V, |
10881
|
|
|
|
|
|
|
(__v8df) |
10882
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10883
|
|
|
|
|
|
|
(__mmask8) __U, |
10884
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10885
|
|
|
|
|
|
|
} |
10886
|
|
|
|
|
|
|
|
10887
|
|
|
|
|
|
|
extern __inline __m512 |
10888
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10889
|
|
|
|
|
|
|
_mm512_div_ps (__m512 __A, __m512 __B) |
10890
|
|
|
|
|
|
|
{ |
10891
|
|
|
|
|
|
|
return (__m512) ((__v16sf)__A / (__v16sf)__B); |
10892
|
|
|
|
|
|
|
} |
10893
|
|
|
|
|
|
|
|
10894
|
|
|
|
|
|
|
extern __inline __m512 |
10895
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10896
|
|
|
|
|
|
|
_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10897
|
|
|
|
|
|
|
{ |
10898
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
10899
|
|
|
|
|
|
|
(__v16sf) __B, |
10900
|
|
|
|
|
|
|
(__v16sf) __W, |
10901
|
|
|
|
|
|
|
(__mmask16) __U, |
10902
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10903
|
|
|
|
|
|
|
} |
10904
|
|
|
|
|
|
|
|
10905
|
|
|
|
|
|
|
extern __inline __m512 |
10906
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10907
|
|
|
|
|
|
|
_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10908
|
|
|
|
|
|
|
{ |
10909
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
10910
|
|
|
|
|
|
|
(__v16sf) __B, |
10911
|
|
|
|
|
|
|
(__v16sf) |
10912
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10913
|
|
|
|
|
|
|
(__mmask16) __U, |
10914
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10915
|
|
|
|
|
|
|
} |
10916
|
|
|
|
|
|
|
|
10917
|
|
|
|
|
|
|
extern __inline __m512d |
10918
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10919
|
|
|
|
|
|
|
_mm512_max_pd (__m512d __A, __m512d __B) |
10920
|
|
|
|
|
|
|
{ |
10921
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
10922
|
|
|
|
|
|
|
(__v8df) __B, |
10923
|
|
|
|
|
|
|
(__v8df) |
10924
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
10925
|
|
|
|
|
|
|
(__mmask8) -1, |
10926
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10927
|
|
|
|
|
|
|
} |
10928
|
|
|
|
|
|
|
|
10929
|
|
|
|
|
|
|
extern __inline __m512d |
10930
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10931
|
|
|
|
|
|
|
_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
10932
|
|
|
|
|
|
|
{ |
10933
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
10934
|
|
|
|
|
|
|
(__v8df) __B, |
10935
|
|
|
|
|
|
|
(__v8df) __W, |
10936
|
|
|
|
|
|
|
(__mmask8) __U, |
10937
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10938
|
|
|
|
|
|
|
} |
10939
|
|
|
|
|
|
|
|
10940
|
|
|
|
|
|
|
extern __inline __m512d |
10941
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10942
|
|
|
|
|
|
|
_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) |
10943
|
|
|
|
|
|
|
{ |
10944
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
10945
|
|
|
|
|
|
|
(__v8df) __B, |
10946
|
|
|
|
|
|
|
(__v8df) |
10947
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
10948
|
|
|
|
|
|
|
(__mmask8) __U, |
10949
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10950
|
|
|
|
|
|
|
} |
10951
|
|
|
|
|
|
|
|
10952
|
|
|
|
|
|
|
extern __inline __m512 |
10953
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10954
|
|
|
|
|
|
|
_mm512_max_ps (__m512 __A, __m512 __B) |
10955
|
|
|
|
|
|
|
{ |
10956
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
10957
|
|
|
|
|
|
|
(__v16sf) __B, |
10958
|
|
|
|
|
|
|
(__v16sf) |
10959
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
10960
|
|
|
|
|
|
|
(__mmask16) -1, |
10961
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10962
|
|
|
|
|
|
|
} |
10963
|
|
|
|
|
|
|
|
10964
|
|
|
|
|
|
|
extern __inline __m512 |
10965
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10966
|
|
|
|
|
|
|
_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
10967
|
|
|
|
|
|
|
{ |
10968
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
10969
|
|
|
|
|
|
|
(__v16sf) __B, |
10970
|
|
|
|
|
|
|
(__v16sf) __W, |
10971
|
|
|
|
|
|
|
(__mmask16) __U, |
10972
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10973
|
|
|
|
|
|
|
} |
10974
|
|
|
|
|
|
|
|
10975
|
|
|
|
|
|
|
extern __inline __m512 |
10976
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10977
|
|
|
|
|
|
|
_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) |
10978
|
|
|
|
|
|
|
{ |
10979
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
10980
|
|
|
|
|
|
|
(__v16sf) __B, |
10981
|
|
|
|
|
|
|
(__v16sf) |
10982
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
10983
|
|
|
|
|
|
|
(__mmask16) __U, |
10984
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10985
|
|
|
|
|
|
|
} |
10986
|
|
|
|
|
|
|
|
10987
|
|
|
|
|
|
|
extern __inline __m512d |
10988
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
10989
|
|
|
|
|
|
|
_mm512_min_pd (__m512d __A, __m512d __B) |
10990
|
|
|
|
|
|
|
{ |
10991
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
10992
|
|
|
|
|
|
|
(__v8df) __B, |
10993
|
|
|
|
|
|
|
(__v8df) |
10994
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
10995
|
|
|
|
|
|
|
(__mmask8) -1, |
10996
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
10997
|
|
|
|
|
|
|
} |
10998
|
|
|
|
|
|
|
|
10999
|
|
|
|
|
|
|
extern __inline __m512d |
11000
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11001
|
|
|
|
|
|
|
_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
11002
|
|
|
|
|
|
|
{ |
11003
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
11004
|
|
|
|
|
|
|
(__v8df) __B, |
11005
|
|
|
|
|
|
|
(__v8df) __W, |
11006
|
|
|
|
|
|
|
(__mmask8) __U, |
11007
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11008
|
|
|
|
|
|
|
} |
11009
|
|
|
|
|
|
|
|
11010
|
|
|
|
|
|
|
extern __inline __m512d |
11011
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11012
|
|
|
|
|
|
|
_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) |
11013
|
|
|
|
|
|
|
{ |
11014
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
11015
|
|
|
|
|
|
|
(__v8df) __B, |
11016
|
|
|
|
|
|
|
(__v8df) |
11017
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
11018
|
|
|
|
|
|
|
(__mmask8) __U, |
11019
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11020
|
|
|
|
|
|
|
} |
11021
|
|
|
|
|
|
|
|
11022
|
|
|
|
|
|
|
extern __inline __m512 |
11023
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11024
|
|
|
|
|
|
|
_mm512_min_ps (__m512 __A, __m512 __B) |
11025
|
|
|
|
|
|
|
{ |
11026
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
11027
|
|
|
|
|
|
|
(__v16sf) __B, |
11028
|
|
|
|
|
|
|
(__v16sf) |
11029
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
11030
|
|
|
|
|
|
|
(__mmask16) -1, |
11031
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11032
|
|
|
|
|
|
|
} |
11033
|
|
|
|
|
|
|
|
11034
|
|
|
|
|
|
|
extern __inline __m512 |
11035
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11036
|
|
|
|
|
|
|
_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
11037
|
|
|
|
|
|
|
{ |
11038
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
11039
|
|
|
|
|
|
|
(__v16sf) __B, |
11040
|
|
|
|
|
|
|
(__v16sf) __W, |
11041
|
|
|
|
|
|
|
(__mmask16) __U, |
11042
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11043
|
|
|
|
|
|
|
} |
11044
|
|
|
|
|
|
|
|
11045
|
|
|
|
|
|
|
extern __inline __m512 |
11046
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11047
|
|
|
|
|
|
|
_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) |
11048
|
|
|
|
|
|
|
{ |
11049
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
11050
|
|
|
|
|
|
|
(__v16sf) __B, |
11051
|
|
|
|
|
|
|
(__v16sf) |
11052
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
11053
|
|
|
|
|
|
|
(__mmask16) __U, |
11054
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11055
|
|
|
|
|
|
|
} |
11056
|
|
|
|
|
|
|
|
11057
|
|
|
|
|
|
|
extern __inline __m512d |
11058
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11059
|
|
|
|
|
|
|
_mm512_scalef_pd (__m512d __A, __m512d __B) |
11060
|
|
|
|
|
|
|
{ |
11061
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
11062
|
|
|
|
|
|
|
(__v8df) __B, |
11063
|
|
|
|
|
|
|
(__v8df) |
11064
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
11065
|
|
|
|
|
|
|
(__mmask8) -1, |
11066
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11067
|
|
|
|
|
|
|
} |
11068
|
|
|
|
|
|
|
|
11069
|
|
|
|
|
|
|
extern __inline __m512d |
11070
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11071
|
|
|
|
|
|
|
_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
11072
|
|
|
|
|
|
|
{ |
11073
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
11074
|
|
|
|
|
|
|
(__v8df) __B, |
11075
|
|
|
|
|
|
|
(__v8df) __W, |
11076
|
|
|
|
|
|
|
(__mmask8) __U, |
11077
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11078
|
|
|
|
|
|
|
} |
11079
|
|
|
|
|
|
|
|
11080
|
|
|
|
|
|
|
extern __inline __m512d |
11081
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11082
|
|
|
|
|
|
|
_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) |
11083
|
|
|
|
|
|
|
{ |
11084
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
11085
|
|
|
|
|
|
|
(__v8df) __B, |
11086
|
|
|
|
|
|
|
(__v8df) |
11087
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
11088
|
|
|
|
|
|
|
(__mmask8) __U, |
11089
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11090
|
|
|
|
|
|
|
} |
11091
|
|
|
|
|
|
|
|
11092
|
|
|
|
|
|
|
extern __inline __m512 |
11093
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11094
|
|
|
|
|
|
|
_mm512_scalef_ps (__m512 __A, __m512 __B) |
11095
|
|
|
|
|
|
|
{ |
11096
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
11097
|
|
|
|
|
|
|
(__v16sf) __B, |
11098
|
|
|
|
|
|
|
(__v16sf) |
11099
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
11100
|
|
|
|
|
|
|
(__mmask16) -1, |
11101
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11102
|
|
|
|
|
|
|
} |
11103
|
|
|
|
|
|
|
|
11104
|
|
|
|
|
|
|
extern __inline __m512 |
11105
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11106
|
|
|
|
|
|
|
_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
11107
|
|
|
|
|
|
|
{ |
11108
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
11109
|
|
|
|
|
|
|
(__v16sf) __B, |
11110
|
|
|
|
|
|
|
(__v16sf) __W, |
11111
|
|
|
|
|
|
|
(__mmask16) __U, |
11112
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11113
|
|
|
|
|
|
|
} |
11114
|
|
|
|
|
|
|
|
11115
|
|
|
|
|
|
|
extern __inline __m512 |
11116
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11117
|
|
|
|
|
|
|
_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) |
11118
|
|
|
|
|
|
|
{ |
11119
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
11120
|
|
|
|
|
|
|
(__v16sf) __B, |
11121
|
|
|
|
|
|
|
(__v16sf) |
11122
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
11123
|
|
|
|
|
|
|
(__mmask16) __U, |
11124
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11125
|
|
|
|
|
|
|
} |
11126
|
|
|
|
|
|
|
|
11127
|
|
|
|
|
|
|
extern __inline __m128d |
11128
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11129
|
|
|
|
|
|
|
_mm_scalef_sd (__m128d __A, __m128d __B) |
11130
|
|
|
|
|
|
|
{ |
11131
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, |
11132
|
|
|
|
|
|
|
(__v2df) __B, |
11133
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11134
|
|
|
|
|
|
|
} |
11135
|
|
|
|
|
|
|
|
11136
|
|
|
|
|
|
|
extern __inline __m128 |
11137
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11138
|
|
|
|
|
|
|
_mm_scalef_ss (__m128 __A, __m128 __B) |
11139
|
|
|
|
|
|
|
{ |
11140
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, |
11141
|
|
|
|
|
|
|
(__v4sf) __B, |
11142
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11143
|
|
|
|
|
|
|
} |
11144
|
|
|
|
|
|
|
|
11145
|
|
|
|
|
|
|
extern __inline __m512d |
11146
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11147
|
|
|
|
|
|
|
_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) |
11148
|
|
|
|
|
|
|
{ |
11149
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
11150
|
|
|
|
|
|
|
(__v8df) __B, |
11151
|
|
|
|
|
|
|
(__v8df) __C, |
11152
|
|
|
|
|
|
|
(__mmask8) -1, |
11153
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11154
|
|
|
|
|
|
|
} |
11155
|
|
|
|
|
|
|
|
11156
|
|
|
|
|
|
|
extern __inline __m512d |
11157
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11158
|
|
|
|
|
|
|
_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11159
|
|
|
|
|
|
|
{ |
11160
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
11161
|
|
|
|
|
|
|
(__v8df) __B, |
11162
|
|
|
|
|
|
|
(__v8df) __C, |
11163
|
|
|
|
|
|
|
(__mmask8) __U, |
11164
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11165
|
|
|
|
|
|
|
} |
11166
|
|
|
|
|
|
|
|
11167
|
|
|
|
|
|
|
extern __inline __m512d |
11168
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11169
|
|
|
|
|
|
|
_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11170
|
|
|
|
|
|
|
{ |
11171
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, |
11172
|
|
|
|
|
|
|
(__v8df) __B, |
11173
|
|
|
|
|
|
|
(__v8df) __C, |
11174
|
|
|
|
|
|
|
(__mmask8) __U, |
11175
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11176
|
|
|
|
|
|
|
} |
11177
|
|
|
|
|
|
|
|
11178
|
|
|
|
|
|
|
extern __inline __m512d |
11179
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11180
|
|
|
|
|
|
|
_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11181
|
|
|
|
|
|
|
{ |
11182
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
11183
|
|
|
|
|
|
|
(__v8df) __B, |
11184
|
|
|
|
|
|
|
(__v8df) __C, |
11185
|
|
|
|
|
|
|
(__mmask8) __U, |
11186
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11187
|
|
|
|
|
|
|
} |
11188
|
|
|
|
|
|
|
|
11189
|
|
|
|
|
|
|
extern __inline __m512 |
11190
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11191
|
|
|
|
|
|
|
_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) |
11192
|
|
|
|
|
|
|
{ |
11193
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
11194
|
|
|
|
|
|
|
(__v16sf) __B, |
11195
|
|
|
|
|
|
|
(__v16sf) __C, |
11196
|
|
|
|
|
|
|
(__mmask16) -1, |
11197
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11198
|
|
|
|
|
|
|
} |
11199
|
|
|
|
|
|
|
|
11200
|
|
|
|
|
|
|
extern __inline __m512 |
11201
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11202
|
|
|
|
|
|
|
_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11203
|
|
|
|
|
|
|
{ |
11204
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
11205
|
|
|
|
|
|
|
(__v16sf) __B, |
11206
|
|
|
|
|
|
|
(__v16sf) __C, |
11207
|
|
|
|
|
|
|
(__mmask16) __U, |
11208
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11209
|
|
|
|
|
|
|
} |
11210
|
|
|
|
|
|
|
|
11211
|
|
|
|
|
|
|
extern __inline __m512 |
11212
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11213
|
|
|
|
|
|
|
_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11214
|
|
|
|
|
|
|
{ |
11215
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, |
11216
|
|
|
|
|
|
|
(__v16sf) __B, |
11217
|
|
|
|
|
|
|
(__v16sf) __C, |
11218
|
|
|
|
|
|
|
(__mmask16) __U, |
11219
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11220
|
|
|
|
|
|
|
} |
11221
|
|
|
|
|
|
|
|
11222
|
|
|
|
|
|
|
extern __inline __m512 |
11223
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11224
|
|
|
|
|
|
|
_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11225
|
|
|
|
|
|
|
{ |
11226
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
11227
|
|
|
|
|
|
|
(__v16sf) __B, |
11228
|
|
|
|
|
|
|
(__v16sf) __C, |
11229
|
|
|
|
|
|
|
(__mmask16) __U, |
11230
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11231
|
|
|
|
|
|
|
} |
11232
|
|
|
|
|
|
|
|
11233
|
|
|
|
|
|
|
extern __inline __m512d |
11234
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11235
|
|
|
|
|
|
|
_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) |
11236
|
|
|
|
|
|
|
{ |
11237
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
11238
|
|
|
|
|
|
|
(__v8df) __B, |
11239
|
|
|
|
|
|
|
-(__v8df) __C, |
11240
|
|
|
|
|
|
|
(__mmask8) -1, |
11241
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11242
|
|
|
|
|
|
|
} |
11243
|
|
|
|
|
|
|
|
11244
|
|
|
|
|
|
|
extern __inline __m512d |
11245
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11246
|
|
|
|
|
|
|
_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11247
|
|
|
|
|
|
|
{ |
11248
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
11249
|
|
|
|
|
|
|
(__v8df) __B, |
11250
|
|
|
|
|
|
|
-(__v8df) __C, |
11251
|
|
|
|
|
|
|
(__mmask8) __U, |
11252
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11253
|
|
|
|
|
|
|
} |
11254
|
|
|
|
|
|
|
|
11255
|
|
|
|
|
|
|
extern __inline __m512d |
11256
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11257
|
|
|
|
|
|
|
_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11258
|
|
|
|
|
|
|
{ |
11259
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, |
11260
|
|
|
|
|
|
|
(__v8df) __B, |
11261
|
|
|
|
|
|
|
(__v8df) __C, |
11262
|
|
|
|
|
|
|
(__mmask8) __U, |
11263
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11264
|
|
|
|
|
|
|
} |
11265
|
|
|
|
|
|
|
|
11266
|
|
|
|
|
|
|
extern __inline __m512d |
11267
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11268
|
|
|
|
|
|
|
_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11269
|
|
|
|
|
|
|
{ |
11270
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
11271
|
|
|
|
|
|
|
(__v8df) __B, |
11272
|
|
|
|
|
|
|
-(__v8df) __C, |
11273
|
|
|
|
|
|
|
(__mmask8) __U, |
11274
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11275
|
|
|
|
|
|
|
} |
11276
|
|
|
|
|
|
|
|
11277
|
|
|
|
|
|
|
extern __inline __m512 |
11278
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11279
|
|
|
|
|
|
|
_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) |
11280
|
|
|
|
|
|
|
{ |
11281
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
11282
|
|
|
|
|
|
|
(__v16sf) __B, |
11283
|
|
|
|
|
|
|
-(__v16sf) __C, |
11284
|
|
|
|
|
|
|
(__mmask16) -1, |
11285
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11286
|
|
|
|
|
|
|
} |
11287
|
|
|
|
|
|
|
|
11288
|
|
|
|
|
|
|
extern __inline __m512 |
11289
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11290
|
|
|
|
|
|
|
_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11291
|
|
|
|
|
|
|
{ |
11292
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
11293
|
|
|
|
|
|
|
(__v16sf) __B, |
11294
|
|
|
|
|
|
|
-(__v16sf) __C, |
11295
|
|
|
|
|
|
|
(__mmask16) __U, |
11296
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11297
|
|
|
|
|
|
|
} |
11298
|
|
|
|
|
|
|
|
11299
|
|
|
|
|
|
|
extern __inline __m512 |
11300
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11301
|
|
|
|
|
|
|
_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11302
|
|
|
|
|
|
|
{ |
11303
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, |
11304
|
|
|
|
|
|
|
(__v16sf) __B, |
11305
|
|
|
|
|
|
|
(__v16sf) __C, |
11306
|
|
|
|
|
|
|
(__mmask16) __U, |
11307
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11308
|
|
|
|
|
|
|
} |
11309
|
|
|
|
|
|
|
|
11310
|
|
|
|
|
|
|
extern __inline __m512 |
11311
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11312
|
|
|
|
|
|
|
_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11313
|
|
|
|
|
|
|
{ |
11314
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
11315
|
|
|
|
|
|
|
(__v16sf) __B, |
11316
|
|
|
|
|
|
|
-(__v16sf) __C, |
11317
|
|
|
|
|
|
|
(__mmask16) __U, |
11318
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11319
|
|
|
|
|
|
|
} |
11320
|
|
|
|
|
|
|
|
11321
|
|
|
|
|
|
|
extern __inline __m512d |
11322
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11323
|
|
|
|
|
|
|
_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) |
11324
|
|
|
|
|
|
|
{ |
11325
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
11326
|
|
|
|
|
|
|
(__v8df) __B, |
11327
|
|
|
|
|
|
|
(__v8df) __C, |
11328
|
|
|
|
|
|
|
(__mmask8) -1, |
11329
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11330
|
|
|
|
|
|
|
} |
11331
|
|
|
|
|
|
|
|
11332
|
|
|
|
|
|
|
extern __inline __m512d |
11333
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11334
|
|
|
|
|
|
|
_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11335
|
|
|
|
|
|
|
{ |
11336
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
11337
|
|
|
|
|
|
|
(__v8df) __B, |
11338
|
|
|
|
|
|
|
(__v8df) __C, |
11339
|
|
|
|
|
|
|
(__mmask8) __U, |
11340
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11341
|
|
|
|
|
|
|
} |
11342
|
|
|
|
|
|
|
|
11343
|
|
|
|
|
|
|
extern __inline __m512d |
11344
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11345
|
|
|
|
|
|
|
_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11346
|
|
|
|
|
|
|
{ |
11347
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, |
11348
|
|
|
|
|
|
|
(__v8df) __B, |
11349
|
|
|
|
|
|
|
(__v8df) __C, |
11350
|
|
|
|
|
|
|
(__mmask8) __U, |
11351
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11352
|
|
|
|
|
|
|
} |
11353
|
|
|
|
|
|
|
|
11354
|
|
|
|
|
|
|
extern __inline __m512d |
11355
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11356
|
|
|
|
|
|
|
_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11357
|
|
|
|
|
|
|
{ |
11358
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
11359
|
|
|
|
|
|
|
(__v8df) __B, |
11360
|
|
|
|
|
|
|
(__v8df) __C, |
11361
|
|
|
|
|
|
|
(__mmask8) __U, |
11362
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11363
|
|
|
|
|
|
|
} |
11364
|
|
|
|
|
|
|
|
11365
|
|
|
|
|
|
|
extern __inline __m512 |
11366
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11367
|
|
|
|
|
|
|
_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) |
11368
|
|
|
|
|
|
|
{ |
11369
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
11370
|
|
|
|
|
|
|
(__v16sf) __B, |
11371
|
|
|
|
|
|
|
(__v16sf) __C, |
11372
|
|
|
|
|
|
|
(__mmask16) -1, |
11373
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11374
|
|
|
|
|
|
|
} |
11375
|
|
|
|
|
|
|
|
11376
|
|
|
|
|
|
|
extern __inline __m512 |
11377
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11378
|
|
|
|
|
|
|
_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11379
|
|
|
|
|
|
|
{ |
11380
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
11381
|
|
|
|
|
|
|
(__v16sf) __B, |
11382
|
|
|
|
|
|
|
(__v16sf) __C, |
11383
|
|
|
|
|
|
|
(__mmask16) __U, |
11384
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11385
|
|
|
|
|
|
|
} |
11386
|
|
|
|
|
|
|
|
11387
|
|
|
|
|
|
|
extern __inline __m512 |
11388
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11389
|
|
|
|
|
|
|
_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11390
|
|
|
|
|
|
|
{ |
11391
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, |
11392
|
|
|
|
|
|
|
(__v16sf) __B, |
11393
|
|
|
|
|
|
|
(__v16sf) __C, |
11394
|
|
|
|
|
|
|
(__mmask16) __U, |
11395
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11396
|
|
|
|
|
|
|
} |
11397
|
|
|
|
|
|
|
|
11398
|
|
|
|
|
|
|
extern __inline __m512 |
11399
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11400
|
|
|
|
|
|
|
_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11401
|
|
|
|
|
|
|
{ |
11402
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
11403
|
|
|
|
|
|
|
(__v16sf) __B, |
11404
|
|
|
|
|
|
|
(__v16sf) __C, |
11405
|
|
|
|
|
|
|
(__mmask16) __U, |
11406
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11407
|
|
|
|
|
|
|
} |
11408
|
|
|
|
|
|
|
|
11409
|
|
|
|
|
|
|
extern __inline __m512d |
11410
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11411
|
|
|
|
|
|
|
_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) |
11412
|
|
|
|
|
|
|
{ |
11413
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
11414
|
|
|
|
|
|
|
(__v8df) __B, |
11415
|
|
|
|
|
|
|
-(__v8df) __C, |
11416
|
|
|
|
|
|
|
(__mmask8) -1, |
11417
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11418
|
|
|
|
|
|
|
} |
11419
|
|
|
|
|
|
|
|
11420
|
|
|
|
|
|
|
extern __inline __m512d |
11421
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11422
|
|
|
|
|
|
|
_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11423
|
|
|
|
|
|
|
{ |
11424
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
11425
|
|
|
|
|
|
|
(__v8df) __B, |
11426
|
|
|
|
|
|
|
-(__v8df) __C, |
11427
|
|
|
|
|
|
|
(__mmask8) __U, |
11428
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11429
|
|
|
|
|
|
|
} |
11430
|
|
|
|
|
|
|
|
11431
|
|
|
|
|
|
|
extern __inline __m512d |
11432
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11433
|
|
|
|
|
|
|
_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11434
|
|
|
|
|
|
|
{ |
11435
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, |
11436
|
|
|
|
|
|
|
(__v8df) __B, |
11437
|
|
|
|
|
|
|
(__v8df) __C, |
11438
|
|
|
|
|
|
|
(__mmask8) __U, |
11439
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11440
|
|
|
|
|
|
|
} |
11441
|
|
|
|
|
|
|
|
11442
|
|
|
|
|
|
|
extern __inline __m512d |
11443
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11444
|
|
|
|
|
|
|
_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11445
|
|
|
|
|
|
|
{ |
11446
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
11447
|
|
|
|
|
|
|
(__v8df) __B, |
11448
|
|
|
|
|
|
|
-(__v8df) __C, |
11449
|
|
|
|
|
|
|
(__mmask8) __U, |
11450
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11451
|
|
|
|
|
|
|
} |
11452
|
|
|
|
|
|
|
|
11453
|
|
|
|
|
|
|
extern __inline __m512 |
11454
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11455
|
|
|
|
|
|
|
_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) |
11456
|
|
|
|
|
|
|
{ |
11457
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
11458
|
|
|
|
|
|
|
(__v16sf) __B, |
11459
|
|
|
|
|
|
|
-(__v16sf) __C, |
11460
|
|
|
|
|
|
|
(__mmask16) -1, |
11461
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11462
|
|
|
|
|
|
|
} |
11463
|
|
|
|
|
|
|
|
11464
|
|
|
|
|
|
|
extern __inline __m512 |
11465
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11466
|
|
|
|
|
|
|
_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11467
|
|
|
|
|
|
|
{ |
11468
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
11469
|
|
|
|
|
|
|
(__v16sf) __B, |
11470
|
|
|
|
|
|
|
-(__v16sf) __C, |
11471
|
|
|
|
|
|
|
(__mmask16) __U, |
11472
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11473
|
|
|
|
|
|
|
} |
11474
|
|
|
|
|
|
|
|
11475
|
|
|
|
|
|
|
extern __inline __m512 |
11476
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11477
|
|
|
|
|
|
|
_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11478
|
|
|
|
|
|
|
{ |
11479
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, |
11480
|
|
|
|
|
|
|
(__v16sf) __B, |
11481
|
|
|
|
|
|
|
(__v16sf) __C, |
11482
|
|
|
|
|
|
|
(__mmask16) __U, |
11483
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11484
|
|
|
|
|
|
|
} |
11485
|
|
|
|
|
|
|
|
11486
|
|
|
|
|
|
|
extern __inline __m512 |
11487
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11488
|
|
|
|
|
|
|
_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11489
|
|
|
|
|
|
|
{ |
11490
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
11491
|
|
|
|
|
|
|
(__v16sf) __B, |
11492
|
|
|
|
|
|
|
-(__v16sf) __C, |
11493
|
|
|
|
|
|
|
(__mmask16) __U, |
11494
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11495
|
|
|
|
|
|
|
} |
11496
|
|
|
|
|
|
|
|
11497
|
|
|
|
|
|
|
extern __inline __m512d |
11498
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11499
|
|
|
|
|
|
|
_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) |
11500
|
|
|
|
|
|
|
{ |
11501
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, |
11502
|
|
|
|
|
|
|
(__v8df) __B, |
11503
|
|
|
|
|
|
|
(__v8df) __C, |
11504
|
|
|
|
|
|
|
(__mmask8) -1, |
11505
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11506
|
|
|
|
|
|
|
} |
11507
|
|
|
|
|
|
|
|
11508
|
|
|
|
|
|
|
extern __inline __m512d |
11509
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11510
|
|
|
|
|
|
|
_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11511
|
|
|
|
|
|
|
{ |
11512
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, |
11513
|
|
|
|
|
|
|
(__v8df) __B, |
11514
|
|
|
|
|
|
|
(__v8df) __C, |
11515
|
|
|
|
|
|
|
(__mmask8) __U, |
11516
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11517
|
|
|
|
|
|
|
} |
11518
|
|
|
|
|
|
|
|
11519
|
|
|
|
|
|
|
extern __inline __m512d |
11520
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11521
|
|
|
|
|
|
|
_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11522
|
|
|
|
|
|
|
{ |
11523
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, |
11524
|
|
|
|
|
|
|
(__v8df) __B, |
11525
|
|
|
|
|
|
|
(__v8df) __C, |
11526
|
|
|
|
|
|
|
(__mmask8) __U, |
11527
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11528
|
|
|
|
|
|
|
} |
11529
|
|
|
|
|
|
|
|
11530
|
|
|
|
|
|
|
extern __inline __m512d |
11531
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11532
|
|
|
|
|
|
|
_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11533
|
|
|
|
|
|
|
{ |
11534
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
11535
|
|
|
|
|
|
|
(__v8df) __B, |
11536
|
|
|
|
|
|
|
(__v8df) __C, |
11537
|
|
|
|
|
|
|
(__mmask8) __U, |
11538
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11539
|
|
|
|
|
|
|
} |
11540
|
|
|
|
|
|
|
|
11541
|
|
|
|
|
|
|
extern __inline __m512 |
11542
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11543
|
|
|
|
|
|
|
_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) |
11544
|
|
|
|
|
|
|
{ |
11545
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, |
11546
|
|
|
|
|
|
|
(__v16sf) __B, |
11547
|
|
|
|
|
|
|
(__v16sf) __C, |
11548
|
|
|
|
|
|
|
(__mmask16) -1, |
11549
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11550
|
|
|
|
|
|
|
} |
11551
|
|
|
|
|
|
|
|
11552
|
|
|
|
|
|
|
extern __inline __m512 |
11553
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11554
|
|
|
|
|
|
|
_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11555
|
|
|
|
|
|
|
{ |
11556
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, |
11557
|
|
|
|
|
|
|
(__v16sf) __B, |
11558
|
|
|
|
|
|
|
(__v16sf) __C, |
11559
|
|
|
|
|
|
|
(__mmask16) __U, |
11560
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11561
|
|
|
|
|
|
|
} |
11562
|
|
|
|
|
|
|
|
11563
|
|
|
|
|
|
|
extern __inline __m512 |
11564
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11565
|
|
|
|
|
|
|
_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11566
|
|
|
|
|
|
|
{ |
11567
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, |
11568
|
|
|
|
|
|
|
(__v16sf) __B, |
11569
|
|
|
|
|
|
|
(__v16sf) __C, |
11570
|
|
|
|
|
|
|
(__mmask16) __U, |
11571
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11572
|
|
|
|
|
|
|
} |
11573
|
|
|
|
|
|
|
|
11574
|
|
|
|
|
|
|
extern __inline __m512 |
11575
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11576
|
|
|
|
|
|
|
_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11577
|
|
|
|
|
|
|
{ |
11578
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
11579
|
|
|
|
|
|
|
(__v16sf) __B, |
11580
|
|
|
|
|
|
|
(__v16sf) __C, |
11581
|
|
|
|
|
|
|
(__mmask16) __U, |
11582
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11583
|
|
|
|
|
|
|
} |
11584
|
|
|
|
|
|
|
|
11585
|
|
|
|
|
|
|
extern __inline __m512d |
11586
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11587
|
|
|
|
|
|
|
_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) |
11588
|
|
|
|
|
|
|
{ |
11589
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, |
11590
|
|
|
|
|
|
|
(__v8df) __B, |
11591
|
|
|
|
|
|
|
-(__v8df) __C, |
11592
|
|
|
|
|
|
|
(__mmask8) -1, |
11593
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11594
|
|
|
|
|
|
|
} |
11595
|
|
|
|
|
|
|
|
11596
|
|
|
|
|
|
|
extern __inline __m512d |
11597
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11598
|
|
|
|
|
|
|
_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
11599
|
|
|
|
|
|
|
{ |
11600
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, |
11601
|
|
|
|
|
|
|
(__v8df) __B, |
11602
|
|
|
|
|
|
|
(__v8df) __C, |
11603
|
|
|
|
|
|
|
(__mmask8) __U, |
11604
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11605
|
|
|
|
|
|
|
} |
11606
|
|
|
|
|
|
|
|
11607
|
|
|
|
|
|
|
extern __inline __m512d |
11608
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11609
|
|
|
|
|
|
|
_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
11610
|
|
|
|
|
|
|
{ |
11611
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, |
11612
|
|
|
|
|
|
|
(__v8df) __B, |
11613
|
|
|
|
|
|
|
(__v8df) __C, |
11614
|
|
|
|
|
|
|
(__mmask8) __U, |
11615
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11616
|
|
|
|
|
|
|
} |
11617
|
|
|
|
|
|
|
|
11618
|
|
|
|
|
|
|
extern __inline __m512d |
11619
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11620
|
|
|
|
|
|
|
_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
11621
|
|
|
|
|
|
|
{ |
11622
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
11623
|
|
|
|
|
|
|
(__v8df) __B, |
11624
|
|
|
|
|
|
|
-(__v8df) __C, |
11625
|
|
|
|
|
|
|
(__mmask8) __U, |
11626
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11627
|
|
|
|
|
|
|
} |
11628
|
|
|
|
|
|
|
|
11629
|
|
|
|
|
|
|
extern __inline __m512 |
11630
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11631
|
|
|
|
|
|
|
_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) |
11632
|
|
|
|
|
|
|
{ |
11633
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, |
11634
|
|
|
|
|
|
|
(__v16sf) __B, |
11635
|
|
|
|
|
|
|
-(__v16sf) __C, |
11636
|
|
|
|
|
|
|
(__mmask16) -1, |
11637
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11638
|
|
|
|
|
|
|
} |
11639
|
|
|
|
|
|
|
|
11640
|
|
|
|
|
|
|
extern __inline __m512 |
11641
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11642
|
|
|
|
|
|
|
_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
11643
|
|
|
|
|
|
|
{ |
11644
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, |
11645
|
|
|
|
|
|
|
(__v16sf) __B, |
11646
|
|
|
|
|
|
|
(__v16sf) __C, |
11647
|
|
|
|
|
|
|
(__mmask16) __U, |
11648
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11649
|
|
|
|
|
|
|
} |
11650
|
|
|
|
|
|
|
|
11651
|
|
|
|
|
|
|
extern __inline __m512 |
11652
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11653
|
|
|
|
|
|
|
_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
11654
|
|
|
|
|
|
|
{ |
11655
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, |
11656
|
|
|
|
|
|
|
(__v16sf) __B, |
11657
|
|
|
|
|
|
|
(__v16sf) __C, |
11658
|
|
|
|
|
|
|
(__mmask16) __U, |
11659
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11660
|
|
|
|
|
|
|
} |
11661
|
|
|
|
|
|
|
|
11662
|
|
|
|
|
|
|
extern __inline __m512 |
11663
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11664
|
|
|
|
|
|
|
_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
11665
|
|
|
|
|
|
|
{ |
11666
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
11667
|
|
|
|
|
|
|
(__v16sf) __B, |
11668
|
|
|
|
|
|
|
-(__v16sf) __C, |
11669
|
|
|
|
|
|
|
(__mmask16) __U, |
11670
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11671
|
|
|
|
|
|
|
} |
11672
|
|
|
|
|
|
|
|
11673
|
|
|
|
|
|
|
extern __inline __m256i |
11674
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11675
|
|
|
|
|
|
|
_mm512_cvttpd_epi32 (__m512d __A) |
11676
|
|
|
|
|
|
|
{ |
11677
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
11678
|
|
|
|
|
|
|
(__v8si) |
11679
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
11680
|
|
|
|
|
|
|
(__mmask8) -1, |
11681
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11682
|
|
|
|
|
|
|
} |
11683
|
|
|
|
|
|
|
|
11684
|
|
|
|
|
|
|
extern __inline __m256i |
11685
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11686
|
|
|
|
|
|
|
_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) |
11687
|
|
|
|
|
|
|
{ |
11688
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
11689
|
|
|
|
|
|
|
(__v8si) __W, |
11690
|
|
|
|
|
|
|
(__mmask8) __U, |
11691
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11692
|
|
|
|
|
|
|
} |
11693
|
|
|
|
|
|
|
|
11694
|
|
|
|
|
|
|
extern __inline __m256i |
11695
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11696
|
|
|
|
|
|
|
_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) |
11697
|
|
|
|
|
|
|
{ |
11698
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
11699
|
|
|
|
|
|
|
(__v8si) |
11700
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
11701
|
|
|
|
|
|
|
(__mmask8) __U, |
11702
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11703
|
|
|
|
|
|
|
} |
11704
|
|
|
|
|
|
|
|
11705
|
|
|
|
|
|
|
extern __inline __m256i |
11706
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11707
|
|
|
|
|
|
|
_mm512_cvttpd_epu32 (__m512d __A) |
11708
|
|
|
|
|
|
|
{ |
11709
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
11710
|
|
|
|
|
|
|
(__v8si) |
11711
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
11712
|
|
|
|
|
|
|
(__mmask8) -1, |
11713
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11714
|
|
|
|
|
|
|
} |
11715
|
|
|
|
|
|
|
|
11716
|
|
|
|
|
|
|
extern __inline __m256i |
11717
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11718
|
|
|
|
|
|
|
_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) |
11719
|
|
|
|
|
|
|
{ |
11720
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
11721
|
|
|
|
|
|
|
(__v8si) __W, |
11722
|
|
|
|
|
|
|
(__mmask8) __U, |
11723
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11724
|
|
|
|
|
|
|
} |
11725
|
|
|
|
|
|
|
|
11726
|
|
|
|
|
|
|
extern __inline __m256i |
11727
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11728
|
|
|
|
|
|
|
_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) |
11729
|
|
|
|
|
|
|
{ |
11730
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
11731
|
|
|
|
|
|
|
(__v8si) |
11732
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
11733
|
|
|
|
|
|
|
(__mmask8) __U, |
11734
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11735
|
|
|
|
|
|
|
} |
11736
|
|
|
|
|
|
|
|
11737
|
|
|
|
|
|
|
extern __inline __m256i |
11738
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11739
|
|
|
|
|
|
|
_mm512_cvtpd_epi32 (__m512d __A) |
11740
|
|
|
|
|
|
|
{ |
11741
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
11742
|
|
|
|
|
|
|
(__v8si) |
11743
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
11744
|
|
|
|
|
|
|
(__mmask8) -1, |
11745
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11746
|
|
|
|
|
|
|
} |
11747
|
|
|
|
|
|
|
|
11748
|
|
|
|
|
|
|
extern __inline __m256i |
11749
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11750
|
|
|
|
|
|
|
_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) |
11751
|
|
|
|
|
|
|
{ |
11752
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
11753
|
|
|
|
|
|
|
(__v8si) __W, |
11754
|
|
|
|
|
|
|
(__mmask8) __U, |
11755
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11756
|
|
|
|
|
|
|
} |
11757
|
|
|
|
|
|
|
|
11758
|
|
|
|
|
|
|
extern __inline __m256i |
11759
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11760
|
|
|
|
|
|
|
_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) |
11761
|
|
|
|
|
|
|
{ |
11762
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
11763
|
|
|
|
|
|
|
(__v8si) |
11764
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
11765
|
|
|
|
|
|
|
(__mmask8) __U, |
11766
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11767
|
|
|
|
|
|
|
} |
11768
|
|
|
|
|
|
|
|
11769
|
|
|
|
|
|
|
extern __inline __m256i |
11770
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11771
|
|
|
|
|
|
|
_mm512_cvtpd_epu32 (__m512d __A) |
11772
|
|
|
|
|
|
|
{ |
11773
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
11774
|
|
|
|
|
|
|
(__v8si) |
11775
|
|
|
|
|
|
|
_mm256_undefined_si256 (), |
11776
|
|
|
|
|
|
|
(__mmask8) -1, |
11777
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11778
|
|
|
|
|
|
|
} |
11779
|
|
|
|
|
|
|
|
11780
|
|
|
|
|
|
|
extern __inline __m256i |
11781
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11782
|
|
|
|
|
|
|
_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) |
11783
|
|
|
|
|
|
|
{ |
11784
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
11785
|
|
|
|
|
|
|
(__v8si) __W, |
11786
|
|
|
|
|
|
|
(__mmask8) __U, |
11787
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11788
|
|
|
|
|
|
|
} |
11789
|
|
|
|
|
|
|
|
11790
|
|
|
|
|
|
|
extern __inline __m256i |
11791
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11792
|
|
|
|
|
|
|
_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) |
11793
|
|
|
|
|
|
|
{ |
11794
|
|
|
|
|
|
|
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
11795
|
|
|
|
|
|
|
(__v8si) |
11796
|
|
|
|
|
|
|
_mm256_setzero_si256 (), |
11797
|
|
|
|
|
|
|
(__mmask8) __U, |
11798
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11799
|
|
|
|
|
|
|
} |
11800
|
|
|
|
|
|
|
|
11801
|
|
|
|
|
|
|
extern __inline __m512i |
11802
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11803
|
|
|
|
|
|
|
_mm512_cvttps_epi32 (__m512 __A) |
11804
|
|
|
|
|
|
|
{ |
11805
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
11806
|
|
|
|
|
|
|
(__v16si) |
11807
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
11808
|
|
|
|
|
|
|
(__mmask16) -1, |
11809
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11810
|
|
|
|
|
|
|
} |
11811
|
|
|
|
|
|
|
|
11812
|
|
|
|
|
|
|
extern __inline __m512i |
11813
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11814
|
|
|
|
|
|
|
_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) |
11815
|
|
|
|
|
|
|
{ |
11816
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
11817
|
|
|
|
|
|
|
(__v16si) __W, |
11818
|
|
|
|
|
|
|
(__mmask16) __U, |
11819
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11820
|
|
|
|
|
|
|
} |
11821
|
|
|
|
|
|
|
|
11822
|
|
|
|
|
|
|
extern __inline __m512i |
11823
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11824
|
|
|
|
|
|
|
_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) |
11825
|
|
|
|
|
|
|
{ |
11826
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
11827
|
|
|
|
|
|
|
(__v16si) |
11828
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
11829
|
|
|
|
|
|
|
(__mmask16) __U, |
11830
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11831
|
|
|
|
|
|
|
} |
11832
|
|
|
|
|
|
|
|
11833
|
|
|
|
|
|
|
extern __inline __m512i |
11834
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11835
|
|
|
|
|
|
|
_mm512_cvttps_epu32 (__m512 __A) |
11836
|
|
|
|
|
|
|
{ |
11837
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
11838
|
|
|
|
|
|
|
(__v16si) |
11839
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
11840
|
|
|
|
|
|
|
(__mmask16) -1, |
11841
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11842
|
|
|
|
|
|
|
} |
11843
|
|
|
|
|
|
|
|
11844
|
|
|
|
|
|
|
extern __inline __m512i |
11845
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11846
|
|
|
|
|
|
|
_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) |
11847
|
|
|
|
|
|
|
{ |
11848
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
11849
|
|
|
|
|
|
|
(__v16si) __W, |
11850
|
|
|
|
|
|
|
(__mmask16) __U, |
11851
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11852
|
|
|
|
|
|
|
} |
11853
|
|
|
|
|
|
|
|
11854
|
|
|
|
|
|
|
extern __inline __m512i |
11855
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11856
|
|
|
|
|
|
|
_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) |
11857
|
|
|
|
|
|
|
{ |
11858
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
11859
|
|
|
|
|
|
|
(__v16si) |
11860
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
11861
|
|
|
|
|
|
|
(__mmask16) __U, |
11862
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11863
|
|
|
|
|
|
|
} |
11864
|
|
|
|
|
|
|
|
11865
|
|
|
|
|
|
|
extern __inline __m512i |
11866
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11867
|
|
|
|
|
|
|
_mm512_cvtps_epi32 (__m512 __A) |
11868
|
|
|
|
|
|
|
{ |
11869
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
11870
|
|
|
|
|
|
|
(__v16si) |
11871
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
11872
|
|
|
|
|
|
|
(__mmask16) -1, |
11873
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11874
|
|
|
|
|
|
|
} |
11875
|
|
|
|
|
|
|
|
11876
|
|
|
|
|
|
|
extern __inline __m512i |
11877
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11878
|
|
|
|
|
|
|
_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) |
11879
|
|
|
|
|
|
|
{ |
11880
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
11881
|
|
|
|
|
|
|
(__v16si) __W, |
11882
|
|
|
|
|
|
|
(__mmask16) __U, |
11883
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11884
|
|
|
|
|
|
|
} |
11885
|
|
|
|
|
|
|
|
11886
|
|
|
|
|
|
|
extern __inline __m512i |
11887
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11888
|
|
|
|
|
|
|
_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) |
11889
|
|
|
|
|
|
|
{ |
11890
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
11891
|
|
|
|
|
|
|
(__v16si) |
11892
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
11893
|
|
|
|
|
|
|
(__mmask16) __U, |
11894
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11895
|
|
|
|
|
|
|
} |
11896
|
|
|
|
|
|
|
|
11897
|
|
|
|
|
|
|
extern __inline __m512i |
11898
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11899
|
|
|
|
|
|
|
_mm512_cvtps_epu32 (__m512 __A) |
11900
|
|
|
|
|
|
|
{ |
11901
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
11902
|
|
|
|
|
|
|
(__v16si) |
11903
|
|
|
|
|
|
|
_mm512_undefined_si512 (), |
11904
|
|
|
|
|
|
|
(__mmask16) -1, |
11905
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11906
|
|
|
|
|
|
|
} |
11907
|
|
|
|
|
|
|
|
11908
|
|
|
|
|
|
|
extern __inline __m512i |
11909
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11910
|
|
|
|
|
|
|
_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) |
11911
|
|
|
|
|
|
|
{ |
11912
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
11913
|
|
|
|
|
|
|
(__v16si) __W, |
11914
|
|
|
|
|
|
|
(__mmask16) __U, |
11915
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11916
|
|
|
|
|
|
|
} |
11917
|
|
|
|
|
|
|
|
11918
|
|
|
|
|
|
|
extern __inline __m512i |
11919
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11920
|
|
|
|
|
|
|
_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) |
11921
|
|
|
|
|
|
|
{ |
11922
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
11923
|
|
|
|
|
|
|
(__v16si) |
11924
|
|
|
|
|
|
|
_mm512_setzero_si512 (), |
11925
|
|
|
|
|
|
|
(__mmask16) __U, |
11926
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11927
|
|
|
|
|
|
|
} |
11928
|
|
|
|
|
|
|
|
11929
|
|
|
|
|
|
|
#ifdef __x86_64__ |
11930
|
|
|
|
|
|
|
extern __inline __m128 |
11931
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11932
|
|
|
|
|
|
|
_mm_cvtu64_ss (__m128 __A, unsigned long long __B) |
11933
|
|
|
|
|
|
|
{ |
11934
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, |
11935
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11936
|
|
|
|
|
|
|
} |
11937
|
|
|
|
|
|
|
|
11938
|
|
|
|
|
|
|
extern __inline __m128d |
11939
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11940
|
|
|
|
|
|
|
_mm_cvtu64_sd (__m128d __A, unsigned long long __B) |
11941
|
|
|
|
|
|
|
{ |
11942
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, |
11943
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11944
|
|
|
|
|
|
|
} |
11945
|
|
|
|
|
|
|
#endif |
11946
|
|
|
|
|
|
|
|
11947
|
|
|
|
|
|
|
extern __inline __m128 |
11948
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11949
|
|
|
|
|
|
|
_mm_cvtu32_ss (__m128 __A, unsigned __B) |
11950
|
|
|
|
|
|
|
{ |
11951
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, |
11952
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11953
|
|
|
|
|
|
|
} |
11954
|
|
|
|
|
|
|
|
11955
|
|
|
|
|
|
|
extern __inline __m512 |
11956
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11957
|
|
|
|
|
|
|
_mm512_cvtepi32_ps (__m512i __A) |
11958
|
|
|
|
|
|
|
{ |
11959
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
11960
|
|
|
|
|
|
|
(__v16sf) |
11961
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
11962
|
|
|
|
|
|
|
(__mmask16) -1, |
11963
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11964
|
|
|
|
|
|
|
} |
11965
|
|
|
|
|
|
|
|
11966
|
|
|
|
|
|
|
extern __inline __m512 |
11967
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11968
|
|
|
|
|
|
|
_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) |
11969
|
|
|
|
|
|
|
{ |
11970
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
11971
|
|
|
|
|
|
|
(__v16sf) __W, |
11972
|
|
|
|
|
|
|
(__mmask16) __U, |
11973
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11974
|
|
|
|
|
|
|
} |
11975
|
|
|
|
|
|
|
|
11976
|
|
|
|
|
|
|
extern __inline __m512 |
11977
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11978
|
|
|
|
|
|
|
_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) |
11979
|
|
|
|
|
|
|
{ |
11980
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, |
11981
|
|
|
|
|
|
|
(__v16sf) |
11982
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
11983
|
|
|
|
|
|
|
(__mmask16) __U, |
11984
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11985
|
|
|
|
|
|
|
} |
11986
|
|
|
|
|
|
|
|
11987
|
|
|
|
|
|
|
extern __inline __m512 |
11988
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
11989
|
|
|
|
|
|
|
_mm512_cvtepu32_ps (__m512i __A) |
11990
|
|
|
|
|
|
|
{ |
11991
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
11992
|
|
|
|
|
|
|
(__v16sf) |
11993
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
11994
|
|
|
|
|
|
|
(__mmask16) -1, |
11995
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
11996
|
|
|
|
|
|
|
} |
11997
|
|
|
|
|
|
|
|
11998
|
|
|
|
|
|
|
extern __inline __m512 |
11999
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12000
|
|
|
|
|
|
|
_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) |
12001
|
|
|
|
|
|
|
{ |
12002
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
12003
|
|
|
|
|
|
|
(__v16sf) __W, |
12004
|
|
|
|
|
|
|
(__mmask16) __U, |
12005
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12006
|
|
|
|
|
|
|
} |
12007
|
|
|
|
|
|
|
|
12008
|
|
|
|
|
|
|
extern __inline __m512 |
12009
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12010
|
|
|
|
|
|
|
_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) |
12011
|
|
|
|
|
|
|
{ |
12012
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, |
12013
|
|
|
|
|
|
|
(__v16sf) |
12014
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
12015
|
|
|
|
|
|
|
(__mmask16) __U, |
12016
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12017
|
|
|
|
|
|
|
} |
12018
|
|
|
|
|
|
|
|
12019
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
12020
|
|
|
|
|
|
|
extern __inline __m512d |
12021
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12022
|
|
|
|
|
|
|
_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) |
12023
|
|
|
|
|
|
|
{ |
12024
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, |
12025
|
|
|
|
|
|
|
(__v8df) __B, |
12026
|
|
|
|
|
|
|
(__v8di) __C, |
12027
|
|
|
|
|
|
|
__imm, |
12028
|
|
|
|
|
|
|
(__mmask8) -1, |
12029
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12030
|
|
|
|
|
|
|
} |
12031
|
|
|
|
|
|
|
|
12032
|
|
|
|
|
|
|
extern __inline __m512d |
12033
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12034
|
|
|
|
|
|
|
_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, |
12035
|
|
|
|
|
|
|
__m512i __C, const int __imm) |
12036
|
|
|
|
|
|
|
{ |
12037
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, |
12038
|
|
|
|
|
|
|
(__v8df) __B, |
12039
|
|
|
|
|
|
|
(__v8di) __C, |
12040
|
|
|
|
|
|
|
__imm, |
12041
|
|
|
|
|
|
|
(__mmask8) __U, |
12042
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12043
|
|
|
|
|
|
|
} |
12044
|
|
|
|
|
|
|
|
12045
|
|
|
|
|
|
|
extern __inline __m512d |
12046
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12047
|
|
|
|
|
|
|
_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, |
12048
|
|
|
|
|
|
|
__m512i __C, const int __imm) |
12049
|
|
|
|
|
|
|
{ |
12050
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, |
12051
|
|
|
|
|
|
|
(__v8df) __B, |
12052
|
|
|
|
|
|
|
(__v8di) __C, |
12053
|
|
|
|
|
|
|
__imm, |
12054
|
|
|
|
|
|
|
(__mmask8) __U, |
12055
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12056
|
|
|
|
|
|
|
} |
12057
|
|
|
|
|
|
|
|
12058
|
|
|
|
|
|
|
extern __inline __m512 |
12059
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12060
|
|
|
|
|
|
|
_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) |
12061
|
|
|
|
|
|
|
{ |
12062
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, |
12063
|
|
|
|
|
|
|
(__v16sf) __B, |
12064
|
|
|
|
|
|
|
(__v16si) __C, |
12065
|
|
|
|
|
|
|
__imm, |
12066
|
|
|
|
|
|
|
(__mmask16) -1, |
12067
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12068
|
|
|
|
|
|
|
} |
12069
|
|
|
|
|
|
|
|
12070
|
|
|
|
|
|
|
extern __inline __m512 |
12071
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12072
|
|
|
|
|
|
|
_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, |
12073
|
|
|
|
|
|
|
__m512i __C, const int __imm) |
12074
|
|
|
|
|
|
|
{ |
12075
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, |
12076
|
|
|
|
|
|
|
(__v16sf) __B, |
12077
|
|
|
|
|
|
|
(__v16si) __C, |
12078
|
|
|
|
|
|
|
__imm, |
12079
|
|
|
|
|
|
|
(__mmask16) __U, |
12080
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12081
|
|
|
|
|
|
|
} |
12082
|
|
|
|
|
|
|
|
12083
|
|
|
|
|
|
|
extern __inline __m512 |
12084
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12085
|
|
|
|
|
|
|
_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, |
12086
|
|
|
|
|
|
|
__m512i __C, const int __imm) |
12087
|
|
|
|
|
|
|
{ |
12088
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, |
12089
|
|
|
|
|
|
|
(__v16sf) __B, |
12090
|
|
|
|
|
|
|
(__v16si) __C, |
12091
|
|
|
|
|
|
|
__imm, |
12092
|
|
|
|
|
|
|
(__mmask16) __U, |
12093
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12094
|
|
|
|
|
|
|
} |
12095
|
|
|
|
|
|
|
|
12096
|
|
|
|
|
|
|
extern __inline __m128d |
12097
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12098
|
|
|
|
|
|
|
_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) |
12099
|
|
|
|
|
|
|
{ |
12100
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, |
12101
|
|
|
|
|
|
|
(__v2df) __B, |
12102
|
|
|
|
|
|
|
(__v2di) __C, __imm, |
12103
|
|
|
|
|
|
|
(__mmask8) -1, |
12104
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12105
|
|
|
|
|
|
|
} |
12106
|
|
|
|
|
|
|
|
12107
|
|
|
|
|
|
|
extern __inline __m128d |
12108
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12109
|
|
|
|
|
|
|
_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, |
12110
|
|
|
|
|
|
|
__m128i __C, const int __imm) |
12111
|
|
|
|
|
|
|
{ |
12112
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, |
12113
|
|
|
|
|
|
|
(__v2df) __B, |
12114
|
|
|
|
|
|
|
(__v2di) __C, __imm, |
12115
|
|
|
|
|
|
|
(__mmask8) __U, |
12116
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12117
|
|
|
|
|
|
|
} |
12118
|
|
|
|
|
|
|
|
12119
|
|
|
|
|
|
|
extern __inline __m128d |
12120
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12121
|
|
|
|
|
|
|
_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, |
12122
|
|
|
|
|
|
|
__m128i __C, const int __imm) |
12123
|
|
|
|
|
|
|
{ |
12124
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, |
12125
|
|
|
|
|
|
|
(__v2df) __B, |
12126
|
|
|
|
|
|
|
(__v2di) __C, |
12127
|
|
|
|
|
|
|
__imm, |
12128
|
|
|
|
|
|
|
(__mmask8) __U, |
12129
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12130
|
|
|
|
|
|
|
} |
12131
|
|
|
|
|
|
|
|
12132
|
|
|
|
|
|
|
extern __inline __m128 |
12133
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12134
|
|
|
|
|
|
|
_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) |
12135
|
|
|
|
|
|
|
{ |
12136
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, |
12137
|
|
|
|
|
|
|
(__v4sf) __B, |
12138
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
12139
|
|
|
|
|
|
|
(__mmask8) -1, |
12140
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12141
|
|
|
|
|
|
|
} |
12142
|
|
|
|
|
|
|
|
12143
|
|
|
|
|
|
|
extern __inline __m128 |
12144
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12145
|
|
|
|
|
|
|
_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, |
12146
|
|
|
|
|
|
|
__m128i __C, const int __imm) |
12147
|
|
|
|
|
|
|
{ |
12148
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, |
12149
|
|
|
|
|
|
|
(__v4sf) __B, |
12150
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
12151
|
|
|
|
|
|
|
(__mmask8) __U, |
12152
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12153
|
|
|
|
|
|
|
} |
12154
|
|
|
|
|
|
|
|
12155
|
|
|
|
|
|
|
extern __inline __m128 |
12156
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12157
|
|
|
|
|
|
|
_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, |
12158
|
|
|
|
|
|
|
__m128i __C, const int __imm) |
12159
|
|
|
|
|
|
|
{ |
12160
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, |
12161
|
|
|
|
|
|
|
(__v4sf) __B, |
12162
|
|
|
|
|
|
|
(__v4si) __C, __imm, |
12163
|
|
|
|
|
|
|
(__mmask8) __U, |
12164
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12165
|
|
|
|
|
|
|
} |
12166
|
|
|
|
|
|
|
#else |
12167
|
|
|
|
|
|
|
#define _mm512_fixupimm_pd(X, Y, Z, C) \ |
12168
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ |
12169
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
12170
|
|
|
|
|
|
|
(__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) |
12171
|
|
|
|
|
|
|
|
12172
|
|
|
|
|
|
|
#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ |
12173
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ |
12174
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
12175
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12176
|
|
|
|
|
|
|
|
12177
|
|
|
|
|
|
|
#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ |
12178
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ |
12179
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ |
12180
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12181
|
|
|
|
|
|
|
|
12182
|
|
|
|
|
|
|
#define _mm512_fixupimm_ps(X, Y, Z, C) \ |
12183
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ |
12184
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
12185
|
|
|
|
|
|
|
(__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) |
12186
|
|
|
|
|
|
|
|
12187
|
|
|
|
|
|
|
#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ |
12188
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ |
12189
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
12190
|
|
|
|
|
|
|
(__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) |
12191
|
|
|
|
|
|
|
|
12192
|
|
|
|
|
|
|
#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ |
12193
|
|
|
|
|
|
|
((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ |
12194
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ |
12195
|
|
|
|
|
|
|
(__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) |
12196
|
|
|
|
|
|
|
|
12197
|
|
|
|
|
|
|
#define _mm_fixupimm_sd(X, Y, Z, C) \ |
12198
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ |
12199
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
12200
|
|
|
|
|
|
|
(__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) |
12201
|
|
|
|
|
|
|
|
12202
|
|
|
|
|
|
|
#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ |
12203
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ |
12204
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
12205
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12206
|
|
|
|
|
|
|
|
12207
|
|
|
|
|
|
|
#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ |
12208
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ |
12209
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ |
12210
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12211
|
|
|
|
|
|
|
|
12212
|
|
|
|
|
|
|
#define _mm_fixupimm_ss(X, Y, Z, C) \ |
12213
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ |
12214
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
12215
|
|
|
|
|
|
|
(__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) |
12216
|
|
|
|
|
|
|
|
12217
|
|
|
|
|
|
|
#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ |
12218
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ |
12219
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
12220
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12221
|
|
|
|
|
|
|
|
12222
|
|
|
|
|
|
|
#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ |
12223
|
|
|
|
|
|
|
((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ |
12224
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ |
12225
|
|
|
|
|
|
|
(__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12226
|
|
|
|
|
|
|
#endif |
12227
|
|
|
|
|
|
|
|
12228
|
|
|
|
|
|
|
#ifdef __x86_64__ |
12229
|
|
|
|
|
|
|
extern __inline unsigned long long |
12230
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12231
|
|
|
|
|
|
|
_mm_cvtss_u64 (__m128 __A) |
12232
|
|
|
|
|
|
|
{ |
12233
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) |
12234
|
|
|
|
|
|
|
__A, |
12235
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12236
|
|
|
|
|
|
|
} |
12237
|
|
|
|
|
|
|
|
12238
|
|
|
|
|
|
|
extern __inline unsigned long long |
12239
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12240
|
|
|
|
|
|
|
_mm_cvttss_u64 (__m128 __A) |
12241
|
|
|
|
|
|
|
{ |
12242
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) |
12243
|
|
|
|
|
|
|
__A, |
12244
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12245
|
|
|
|
|
|
|
} |
12246
|
|
|
|
|
|
|
|
12247
|
|
|
|
|
|
|
extern __inline long long |
12248
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12249
|
|
|
|
|
|
|
_mm_cvttss_i64 (__m128 __A) |
12250
|
|
|
|
|
|
|
{ |
12251
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, |
12252
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12253
|
|
|
|
|
|
|
} |
12254
|
|
|
|
|
|
|
#endif /* __x86_64__ */ |
12255
|
|
|
|
|
|
|
|
12256
|
|
|
|
|
|
|
extern __inline unsigned |
12257
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12258
|
|
|
|
|
|
|
_mm_cvtss_u32 (__m128 __A) |
12259
|
|
|
|
|
|
|
{ |
12260
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, |
12261
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12262
|
|
|
|
|
|
|
} |
12263
|
|
|
|
|
|
|
|
12264
|
|
|
|
|
|
|
extern __inline unsigned |
12265
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12266
|
|
|
|
|
|
|
_mm_cvttss_u32 (__m128 __A) |
12267
|
|
|
|
|
|
|
{ |
12268
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, |
12269
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12270
|
|
|
|
|
|
|
} |
12271
|
|
|
|
|
|
|
|
12272
|
|
|
|
|
|
|
extern __inline int |
12273
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12274
|
|
|
|
|
|
|
_mm_cvttss_i32 (__m128 __A) |
12275
|
|
|
|
|
|
|
{ |
12276
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, |
12277
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12278
|
|
|
|
|
|
|
} |
12279
|
|
|
|
|
|
|
|
12280
|
|
|
|
|
|
|
#ifdef __x86_64__ |
12281
|
|
|
|
|
|
|
extern __inline unsigned long long |
12282
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12283
|
|
|
|
|
|
|
_mm_cvtsd_u64 (__m128d __A) |
12284
|
|
|
|
|
|
|
{ |
12285
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) |
12286
|
|
|
|
|
|
|
__A, |
12287
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12288
|
|
|
|
|
|
|
} |
12289
|
|
|
|
|
|
|
|
12290
|
|
|
|
|
|
|
extern __inline unsigned long long |
12291
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12292
|
|
|
|
|
|
|
_mm_cvttsd_u64 (__m128d __A) |
12293
|
|
|
|
|
|
|
{ |
12294
|
|
|
|
|
|
|
return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) |
12295
|
|
|
|
|
|
|
__A, |
12296
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12297
|
|
|
|
|
|
|
} |
12298
|
|
|
|
|
|
|
|
12299
|
|
|
|
|
|
|
extern __inline long long |
12300
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12301
|
|
|
|
|
|
|
_mm_cvttsd_i64 (__m128d __A) |
12302
|
|
|
|
|
|
|
{ |
12303
|
|
|
|
|
|
|
return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, |
12304
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12305
|
|
|
|
|
|
|
} |
12306
|
|
|
|
|
|
|
#endif /* __x86_64__ */ |
12307
|
|
|
|
|
|
|
|
12308
|
|
|
|
|
|
|
extern __inline unsigned |
12309
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12310
|
|
|
|
|
|
|
_mm_cvtsd_u32 (__m128d __A) |
12311
|
|
|
|
|
|
|
{ |
12312
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, |
12313
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12314
|
|
|
|
|
|
|
} |
12315
|
|
|
|
|
|
|
|
12316
|
|
|
|
|
|
|
extern __inline unsigned |
12317
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12318
|
|
|
|
|
|
|
_mm_cvttsd_u32 (__m128d __A) |
12319
|
|
|
|
|
|
|
{ |
12320
|
|
|
|
|
|
|
return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, |
12321
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12322
|
|
|
|
|
|
|
} |
12323
|
|
|
|
|
|
|
|
12324
|
|
|
|
|
|
|
extern __inline int |
12325
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12326
|
|
|
|
|
|
|
_mm_cvttsd_i32 (__m128d __A) |
12327
|
|
|
|
|
|
|
{ |
12328
|
|
|
|
|
|
|
return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, |
12329
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12330
|
|
|
|
|
|
|
} |
12331
|
|
|
|
|
|
|
|
12332
|
|
|
|
|
|
|
extern __inline __m512d |
12333
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12334
|
|
|
|
|
|
|
_mm512_cvtps_pd (__m256 __A) |
12335
|
|
|
|
|
|
|
{ |
12336
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
12337
|
|
|
|
|
|
|
(__v8df) |
12338
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
12339
|
|
|
|
|
|
|
(__mmask8) -1, |
12340
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12341
|
|
|
|
|
|
|
} |
12342
|
|
|
|
|
|
|
|
12343
|
|
|
|
|
|
|
extern __inline __m512d |
12344
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12345
|
|
|
|
|
|
|
_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) |
12346
|
|
|
|
|
|
|
{ |
12347
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
12348
|
|
|
|
|
|
|
(__v8df) __W, |
12349
|
|
|
|
|
|
|
(__mmask8) __U, |
12350
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12351
|
|
|
|
|
|
|
} |
12352
|
|
|
|
|
|
|
|
12353
|
|
|
|
|
|
|
extern __inline __m512d |
12354
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12355
|
|
|
|
|
|
|
_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) |
12356
|
|
|
|
|
|
|
{ |
12357
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, |
12358
|
|
|
|
|
|
|
(__v8df) |
12359
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
12360
|
|
|
|
|
|
|
(__mmask8) __U, |
12361
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12362
|
|
|
|
|
|
|
} |
12363
|
|
|
|
|
|
|
|
12364
|
|
|
|
|
|
|
extern __inline __m512 |
12365
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12366
|
|
|
|
|
|
|
_mm512_cvtph_ps (__m256i __A) |
12367
|
|
|
|
|
|
|
{ |
12368
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
12369
|
|
|
|
|
|
|
(__v16sf) |
12370
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
12371
|
|
|
|
|
|
|
(__mmask16) -1, |
12372
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12373
|
|
|
|
|
|
|
} |
12374
|
|
|
|
|
|
|
|
12375
|
|
|
|
|
|
|
extern __inline __m512 |
12376
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12377
|
|
|
|
|
|
|
_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) |
12378
|
|
|
|
|
|
|
{ |
12379
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
12380
|
|
|
|
|
|
|
(__v16sf) __W, |
12381
|
|
|
|
|
|
|
(__mmask16) __U, |
12382
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12383
|
|
|
|
|
|
|
} |
12384
|
|
|
|
|
|
|
|
12385
|
|
|
|
|
|
|
extern __inline __m512 |
12386
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12387
|
|
|
|
|
|
|
_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) |
12388
|
|
|
|
|
|
|
{ |
12389
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
12390
|
|
|
|
|
|
|
(__v16sf) |
12391
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
12392
|
|
|
|
|
|
|
(__mmask16) __U, |
12393
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12394
|
|
|
|
|
|
|
} |
12395
|
|
|
|
|
|
|
|
12396
|
|
|
|
|
|
|
extern __inline __m256 |
12397
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12398
|
|
|
|
|
|
|
_mm512_cvtpd_ps (__m512d __A) |
12399
|
|
|
|
|
|
|
{ |
12400
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
12401
|
|
|
|
|
|
|
(__v8sf) |
12402
|
|
|
|
|
|
|
_mm256_undefined_ps (), |
12403
|
|
|
|
|
|
|
(__mmask8) -1, |
12404
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12405
|
|
|
|
|
|
|
} |
12406
|
|
|
|
|
|
|
|
12407
|
|
|
|
|
|
|
extern __inline __m256 |
12408
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12409
|
|
|
|
|
|
|
_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) |
12410
|
|
|
|
|
|
|
{ |
12411
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
12412
|
|
|
|
|
|
|
(__v8sf) __W, |
12413
|
|
|
|
|
|
|
(__mmask8) __U, |
12414
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12415
|
|
|
|
|
|
|
} |
12416
|
|
|
|
|
|
|
|
12417
|
|
|
|
|
|
|
extern __inline __m256 |
12418
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12419
|
|
|
|
|
|
|
_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) |
12420
|
|
|
|
|
|
|
{ |
12421
|
|
|
|
|
|
|
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
12422
|
|
|
|
|
|
|
(__v8sf) |
12423
|
|
|
|
|
|
|
_mm256_setzero_ps (), |
12424
|
|
|
|
|
|
|
(__mmask8) __U, |
12425
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12426
|
|
|
|
|
|
|
} |
12427
|
|
|
|
|
|
|
|
12428
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
12429
|
|
|
|
|
|
|
extern __inline __m512 |
12430
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12431
|
|
|
|
|
|
|
_mm512_getexp_ps (__m512 __A) |
12432
|
|
|
|
|
|
|
{ |
12433
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
12434
|
|
|
|
|
|
|
(__v16sf) |
12435
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
12436
|
|
|
|
|
|
|
(__mmask16) -1, |
12437
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12438
|
|
|
|
|
|
|
} |
12439
|
|
|
|
|
|
|
|
12440
|
|
|
|
|
|
|
extern __inline __m512 |
12441
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12442
|
|
|
|
|
|
|
_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) |
12443
|
|
|
|
|
|
|
{ |
12444
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
12445
|
|
|
|
|
|
|
(__v16sf) __W, |
12446
|
|
|
|
|
|
|
(__mmask16) __U, |
12447
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12448
|
|
|
|
|
|
|
} |
12449
|
|
|
|
|
|
|
|
12450
|
|
|
|
|
|
|
extern __inline __m512 |
12451
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12452
|
|
|
|
|
|
|
_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) |
12453
|
|
|
|
|
|
|
{ |
12454
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
12455
|
|
|
|
|
|
|
(__v16sf) |
12456
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
12457
|
|
|
|
|
|
|
(__mmask16) __U, |
12458
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12459
|
|
|
|
|
|
|
} |
12460
|
|
|
|
|
|
|
|
12461
|
|
|
|
|
|
|
extern __inline __m512d |
12462
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12463
|
|
|
|
|
|
|
_mm512_getexp_pd (__m512d __A) |
12464
|
|
|
|
|
|
|
{ |
12465
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
12466
|
|
|
|
|
|
|
(__v8df) |
12467
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
12468
|
|
|
|
|
|
|
(__mmask8) -1, |
12469
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12470
|
|
|
|
|
|
|
} |
12471
|
|
|
|
|
|
|
|
12472
|
|
|
|
|
|
|
extern __inline __m512d |
12473
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12474
|
|
|
|
|
|
|
_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) |
12475
|
|
|
|
|
|
|
{ |
12476
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
12477
|
|
|
|
|
|
|
(__v8df) __W, |
12478
|
|
|
|
|
|
|
(__mmask8) __U, |
12479
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12480
|
|
|
|
|
|
|
} |
12481
|
|
|
|
|
|
|
|
12482
|
|
|
|
|
|
|
extern __inline __m512d |
12483
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12484
|
|
|
|
|
|
|
_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) |
12485
|
|
|
|
|
|
|
{ |
12486
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
12487
|
|
|
|
|
|
|
(__v8df) |
12488
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
12489
|
|
|
|
|
|
|
(__mmask8) __U, |
12490
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12491
|
|
|
|
|
|
|
} |
12492
|
|
|
|
|
|
|
|
12493
|
|
|
|
|
|
|
extern __inline __m128 |
12494
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12495
|
|
|
|
|
|
|
_mm_getexp_ss (__m128 __A, __m128 __B) |
12496
|
|
|
|
|
|
|
{ |
12497
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, |
12498
|
|
|
|
|
|
|
(__v4sf) __B, |
12499
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12500
|
|
|
|
|
|
|
} |
12501
|
|
|
|
|
|
|
|
12502
|
|
|
|
|
|
|
extern __inline __m128d |
12503
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12504
|
|
|
|
|
|
|
_mm_getexp_sd (__m128d __A, __m128d __B) |
12505
|
|
|
|
|
|
|
{ |
12506
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, |
12507
|
|
|
|
|
|
|
(__v2df) __B, |
12508
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12509
|
|
|
|
|
|
|
} |
12510
|
|
|
|
|
|
|
|
12511
|
|
|
|
|
|
|
extern __inline __m512d |
12512
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12513
|
|
|
|
|
|
|
_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, |
12514
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C) |
12515
|
|
|
|
|
|
|
{ |
12516
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
12517
|
|
|
|
|
|
|
(__C << 2) | __B, |
12518
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
12519
|
|
|
|
|
|
|
(__mmask8) -1, |
12520
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12521
|
|
|
|
|
|
|
} |
12522
|
|
|
|
|
|
|
|
12523
|
|
|
|
|
|
|
extern __inline __m512d |
12524
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12525
|
|
|
|
|
|
|
_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, |
12526
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) |
12527
|
|
|
|
|
|
|
{ |
12528
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
12529
|
|
|
|
|
|
|
(__C << 2) | __B, |
12530
|
|
|
|
|
|
|
(__v8df) __W, __U, |
12531
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12532
|
|
|
|
|
|
|
} |
12533
|
|
|
|
|
|
|
|
12534
|
|
|
|
|
|
|
extern __inline __m512d |
12535
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12536
|
|
|
|
|
|
|
_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, |
12537
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) |
12538
|
|
|
|
|
|
|
{ |
12539
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, |
12540
|
|
|
|
|
|
|
(__C << 2) | __B, |
12541
|
|
|
|
|
|
|
(__v8df) |
12542
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
12543
|
|
|
|
|
|
|
__U, |
12544
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12545
|
|
|
|
|
|
|
} |
12546
|
|
|
|
|
|
|
|
12547
|
|
|
|
|
|
|
extern __inline __m512 |
12548
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12549
|
|
|
|
|
|
|
_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, |
12550
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __C) |
12551
|
|
|
|
|
|
|
{ |
12552
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
12553
|
|
|
|
|
|
|
(__C << 2) | __B, |
12554
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
12555
|
|
|
|
|
|
|
(__mmask16) -1, |
12556
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12557
|
|
|
|
|
|
|
} |
12558
|
|
|
|
|
|
|
|
12559
|
|
|
|
|
|
|
extern __inline __m512 |
12560
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12561
|
|
|
|
|
|
|
_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, |
12562
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) |
12563
|
|
|
|
|
|
|
{ |
12564
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
12565
|
|
|
|
|
|
|
(__C << 2) | __B, |
12566
|
|
|
|
|
|
|
(__v16sf) __W, __U, |
12567
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12568
|
|
|
|
|
|
|
} |
12569
|
|
|
|
|
|
|
|
12570
|
|
|
|
|
|
|
extern __inline __m512 |
12571
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12572
|
|
|
|
|
|
|
_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, |
12573
|
|
|
|
|
|
|
_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) |
12574
|
|
|
|
|
|
|
{ |
12575
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, |
12576
|
|
|
|
|
|
|
(__C << 2) | __B, |
12577
|
|
|
|
|
|
|
(__v16sf) |
12578
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
12579
|
|
|
|
|
|
|
__U, |
12580
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12581
|
|
|
|
|
|
|
} |
12582
|
|
|
|
|
|
|
|
12583
|
|
|
|
|
|
|
extern __inline __m128d |
12584
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12585
|
|
|
|
|
|
|
_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C, |
12586
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __D) |
12587
|
|
|
|
|
|
|
{ |
12588
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, |
12589
|
|
|
|
|
|
|
(__v2df) __B, |
12590
|
|
|
|
|
|
|
(__D << 2) | __C, |
12591
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12592
|
|
|
|
|
|
|
} |
12593
|
|
|
|
|
|
|
|
12594
|
|
|
|
|
|
|
extern __inline __m128 |
12595
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12596
|
|
|
|
|
|
|
_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C, |
12597
|
|
|
|
|
|
|
_MM_MANTISSA_SIGN_ENUM __D) |
12598
|
|
|
|
|
|
|
{ |
12599
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, |
12600
|
|
|
|
|
|
|
(__v4sf) __B, |
12601
|
|
|
|
|
|
|
(__D << 2) | __C, |
12602
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12603
|
|
|
|
|
|
|
} |
12604
|
|
|
|
|
|
|
|
12605
|
|
|
|
|
|
|
#else |
12606
|
|
|
|
|
|
|
#define _mm512_getmant_pd(X, B, C) \ |
12607
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
12608
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12609
|
|
|
|
|
|
|
(__v8df)_mm512_undefined_pd(), \ |
12610
|
|
|
|
|
|
|
(__mmask8)-1,\ |
12611
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12612
|
|
|
|
|
|
|
|
12613
|
|
|
|
|
|
|
#define _mm512_mask_getmant_pd(W, U, X, B, C) \ |
12614
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
12615
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12616
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), \ |
12617
|
|
|
|
|
|
|
(__mmask8)(U),\ |
12618
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12619
|
|
|
|
|
|
|
|
12620
|
|
|
|
|
|
|
#define _mm512_maskz_getmant_pd(U, X, B, C) \ |
12621
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ |
12622
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12623
|
|
|
|
|
|
|
(__v8df)_mm512_setzero_pd(), \ |
12624
|
|
|
|
|
|
|
(__mmask8)(U),\ |
12625
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12626
|
|
|
|
|
|
|
#define _mm512_getmant_ps(X, B, C) \ |
12627
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
12628
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12629
|
|
|
|
|
|
|
(__v16sf)_mm512_undefined_ps(), \ |
12630
|
|
|
|
|
|
|
(__mmask16)-1,\ |
12631
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12632
|
|
|
|
|
|
|
|
12633
|
|
|
|
|
|
|
#define _mm512_mask_getmant_ps(W, U, X, B, C) \ |
12634
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
12635
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12636
|
|
|
|
|
|
|
(__v16sf)(__m512)(W), \ |
12637
|
|
|
|
|
|
|
(__mmask16)(U),\ |
12638
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12639
|
|
|
|
|
|
|
|
12640
|
|
|
|
|
|
|
#define _mm512_maskz_getmant_ps(U, X, B, C) \ |
12641
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ |
12642
|
|
|
|
|
|
|
(int)(((C)<<2) | (B)), \ |
12643
|
|
|
|
|
|
|
(__v16sf)_mm512_setzero_ps(), \ |
12644
|
|
|
|
|
|
|
(__mmask16)(U),\ |
12645
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12646
|
|
|
|
|
|
|
#define _mm_getmant_sd(X, Y, C, D) \ |
12647
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ |
12648
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), \ |
12649
|
|
|
|
|
|
|
(int)(((D)<<2) | (C)), \ |
12650
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12651
|
|
|
|
|
|
|
|
12652
|
|
|
|
|
|
|
#define _mm_getmant_ss(X, Y, C, D) \ |
12653
|
|
|
|
|
|
|
((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ |
12654
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), \ |
12655
|
|
|
|
|
|
|
(int)(((D)<<2) | (C)), \ |
12656
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12657
|
|
|
|
|
|
|
|
12658
|
|
|
|
|
|
|
#define _mm_getexp_ss(A, B) \ |
12659
|
|
|
|
|
|
|
((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ |
12660
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12661
|
|
|
|
|
|
|
|
12662
|
|
|
|
|
|
|
#define _mm_getexp_sd(A, B) \ |
12663
|
|
|
|
|
|
|
((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\ |
12664
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION)) |
12665
|
|
|
|
|
|
|
|
12666
|
|
|
|
|
|
|
#define _mm512_getexp_ps(A) \ |
12667
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
12668
|
|
|
|
|
|
|
(__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) |
12669
|
|
|
|
|
|
|
|
12670
|
|
|
|
|
|
|
#define _mm512_mask_getexp_ps(W, U, A) \ |
12671
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
12672
|
|
|
|
|
|
|
(__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) |
12673
|
|
|
|
|
|
|
|
12674
|
|
|
|
|
|
|
#define _mm512_maskz_getexp_ps(U, A) \ |
12675
|
|
|
|
|
|
|
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
12676
|
|
|
|
|
|
|
(__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) |
12677
|
|
|
|
|
|
|
|
12678
|
|
|
|
|
|
|
#define _mm512_getexp_pd(A) \ |
12679
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
12680
|
|
|
|
|
|
|
(__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) |
12681
|
|
|
|
|
|
|
|
12682
|
|
|
|
|
|
|
#define _mm512_mask_getexp_pd(W, U, A) \ |
12683
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
12684
|
|
|
|
|
|
|
(__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12685
|
|
|
|
|
|
|
|
12686
|
|
|
|
|
|
|
#define _mm512_maskz_getexp_pd(U, A) \ |
12687
|
|
|
|
|
|
|
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
12688
|
|
|
|
|
|
|
(__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) |
12689
|
|
|
|
|
|
|
#endif |
12690
|
|
|
|
|
|
|
|
12691
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
12692
|
|
|
|
|
|
|
extern __inline __m512 |
12693
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12694
|
|
|
|
|
|
|
_mm512_roundscale_ps (__m512 __A, const int __imm) |
12695
|
|
|
|
|
|
|
{ |
12696
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, |
12697
|
|
|
|
|
|
|
(__v16sf) |
12698
|
|
|
|
|
|
|
_mm512_undefined_ps (), |
12699
|
|
|
|
|
|
|
-1, |
12700
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12701
|
|
|
|
|
|
|
} |
12702
|
|
|
|
|
|
|
|
12703
|
|
|
|
|
|
|
extern __inline __m512 |
12704
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12705
|
|
|
|
|
|
|
_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, |
12706
|
|
|
|
|
|
|
const int __imm) |
12707
|
|
|
|
|
|
|
{ |
12708
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, |
12709
|
|
|
|
|
|
|
(__v16sf) __A, |
12710
|
|
|
|
|
|
|
(__mmask16) __B, |
12711
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12712
|
|
|
|
|
|
|
} |
12713
|
|
|
|
|
|
|
|
12714
|
|
|
|
|
|
|
extern __inline __m512 |
12715
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12716
|
|
|
|
|
|
|
_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) |
12717
|
|
|
|
|
|
|
{ |
12718
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, |
12719
|
|
|
|
|
|
|
__imm, |
12720
|
|
|
|
|
|
|
(__v16sf) |
12721
|
|
|
|
|
|
|
_mm512_setzero_ps (), |
12722
|
|
|
|
|
|
|
(__mmask16) __A, |
12723
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12724
|
|
|
|
|
|
|
} |
12725
|
|
|
|
|
|
|
|
12726
|
|
|
|
|
|
|
extern __inline __m512d |
12727
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12728
|
|
|
|
|
|
|
_mm512_roundscale_pd (__m512d __A, const int __imm) |
12729
|
|
|
|
|
|
|
{ |
12730
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, |
12731
|
|
|
|
|
|
|
(__v8df) |
12732
|
|
|
|
|
|
|
_mm512_undefined_pd (), |
12733
|
|
|
|
|
|
|
-1, |
12734
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12735
|
|
|
|
|
|
|
} |
12736
|
|
|
|
|
|
|
|
12737
|
|
|
|
|
|
|
extern __inline __m512d |
12738
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12739
|
|
|
|
|
|
|
_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, |
12740
|
|
|
|
|
|
|
const int __imm) |
12741
|
|
|
|
|
|
|
{ |
12742
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, |
12743
|
|
|
|
|
|
|
(__v8df) __A, |
12744
|
|
|
|
|
|
|
(__mmask8) __B, |
12745
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12746
|
|
|
|
|
|
|
} |
12747
|
|
|
|
|
|
|
|
12748
|
|
|
|
|
|
|
extern __inline __m512d |
12749
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12750
|
|
|
|
|
|
|
_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) |
12751
|
|
|
|
|
|
|
{ |
12752
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, |
12753
|
|
|
|
|
|
|
__imm, |
12754
|
|
|
|
|
|
|
(__v8df) |
12755
|
|
|
|
|
|
|
_mm512_setzero_pd (), |
12756
|
|
|
|
|
|
|
(__mmask8) __A, |
12757
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12758
|
|
|
|
|
|
|
} |
12759
|
|
|
|
|
|
|
|
12760
|
|
|
|
|
|
|
extern __inline __m128 |
12761
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12762
|
|
|
|
|
|
|
_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) |
12763
|
|
|
|
|
|
|
{ |
12764
|
|
|
|
|
|
|
return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, |
12765
|
|
|
|
|
|
|
(__v4sf) __B, __imm, |
12766
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12767
|
|
|
|
|
|
|
} |
12768
|
|
|
|
|
|
|
|
12769
|
|
|
|
|
|
|
extern __inline __m128d |
12770
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12771
|
|
|
|
|
|
|
_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) |
12772
|
|
|
|
|
|
|
{ |
12773
|
|
|
|
|
|
|
return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, |
12774
|
|
|
|
|
|
|
(__v2df) __B, __imm, |
12775
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12776
|
|
|
|
|
|
|
} |
12777
|
|
|
|
|
|
|
|
12778
|
|
|
|
|
|
|
#else |
12779
|
|
|
|
|
|
|
#define _mm512_roundscale_ps(A, B) \ |
12780
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ |
12781
|
|
|
|
|
|
|
(__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) |
12782
|
|
|
|
|
|
|
#define _mm512_mask_roundscale_ps(A, B, C, D) \ |
12783
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ |
12784
|
|
|
|
|
|
|
(int)(D), \ |
12785
|
|
|
|
|
|
|
(__v16sf)(__m512)(A), \ |
12786
|
|
|
|
|
|
|
(__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) |
12787
|
|
|
|
|
|
|
#define _mm512_maskz_roundscale_ps(A, B, C) \ |
12788
|
|
|
|
|
|
|
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ |
12789
|
|
|
|
|
|
|
(int)(C), \ |
12790
|
|
|
|
|
|
|
(__v16sf)_mm512_setzero_ps(),\ |
12791
|
|
|
|
|
|
|
(__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) |
12792
|
|
|
|
|
|
|
#define _mm512_roundscale_pd(A, B) \ |
12793
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ |
12794
|
|
|
|
|
|
|
(__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) |
12795
|
|
|
|
|
|
|
#define _mm512_mask_roundscale_pd(A, B, C, D) \ |
12796
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ |
12797
|
|
|
|
|
|
|
(int)(D), \ |
12798
|
|
|
|
|
|
|
(__v8df)(__m512d)(A), \ |
12799
|
|
|
|
|
|
|
(__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) |
12800
|
|
|
|
|
|
|
#define _mm512_maskz_roundscale_pd(A, B, C) \ |
12801
|
|
|
|
|
|
|
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ |
12802
|
|
|
|
|
|
|
(int)(C), \ |
12803
|
|
|
|
|
|
|
(__v8df)_mm512_setzero_pd(),\ |
12804
|
|
|
|
|
|
|
(__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) |
12805
|
|
|
|
|
|
|
#define _mm_roundscale_ss(A, B, C) \ |
12806
|
|
|
|
|
|
|
((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ |
12807
|
|
|
|
|
|
|
(__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) |
12808
|
|
|
|
|
|
|
#define _mm_roundscale_sd(A, B, C) \ |
12809
|
|
|
|
|
|
|
((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ |
12810
|
|
|
|
|
|
|
(__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) |
12811
|
|
|
|
|
|
|
#endif |
12812
|
|
|
|
|
|
|
|
12813
|
|
|
|
|
|
|
#ifdef __OPTIMIZE__ |
12814
|
|
|
|
|
|
|
extern __inline __mmask8 |
12815
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12816
|
|
|
|
|
|
|
_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) |
12817
|
|
|
|
|
|
|
{ |
12818
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, |
12819
|
|
|
|
|
|
|
(__v8df) __Y, __P, |
12820
|
|
|
|
|
|
|
(__mmask8) -1, |
12821
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12822
|
|
|
|
|
|
|
} |
12823
|
|
|
|
|
|
|
|
12824
|
|
|
|
|
|
|
extern __inline __mmask16 |
12825
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12826
|
|
|
|
|
|
|
_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) |
12827
|
|
|
|
|
|
|
{ |
12828
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, |
12829
|
|
|
|
|
|
|
(__v16sf) __Y, __P, |
12830
|
|
|
|
|
|
|
(__mmask16) -1, |
12831
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12832
|
|
|
|
|
|
|
} |
12833
|
|
|
|
|
|
|
|
12834
|
|
|
|
|
|
|
extern __inline __mmask16 |
12835
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12836
|
|
|
|
|
|
|
_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) |
12837
|
|
|
|
|
|
|
{ |
12838
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, |
12839
|
|
|
|
|
|
|
(__v16sf) __Y, __P, |
12840
|
|
|
|
|
|
|
(__mmask16) __U, |
12841
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12842
|
|
|
|
|
|
|
} |
12843
|
|
|
|
|
|
|
|
12844
|
|
|
|
|
|
|
extern __inline __mmask8 |
12845
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12846
|
|
|
|
|
|
|
_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) |
12847
|
|
|
|
|
|
|
{ |
12848
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, |
12849
|
|
|
|
|
|
|
(__v8df) __Y, __P, |
12850
|
|
|
|
|
|
|
(__mmask8) __U, |
12851
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12852
|
|
|
|
|
|
|
} |
12853
|
|
|
|
|
|
|
|
12854
|
|
|
|
|
|
|
extern __inline __mmask8 |
12855
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12856
|
|
|
|
|
|
|
_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) |
12857
|
|
|
|
|
|
|
{ |
12858
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, |
12859
|
|
|
|
|
|
|
(__v2df) __Y, __P, |
12860
|
|
|
|
|
|
|
(__mmask8) -1, |
12861
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12862
|
|
|
|
|
|
|
} |
12863
|
|
|
|
|
|
|
|
12864
|
|
|
|
|
|
|
extern __inline __mmask8 |
12865
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12866
|
|
|
|
|
|
|
_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) |
12867
|
|
|
|
|
|
|
{ |
12868
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, |
12869
|
|
|
|
|
|
|
(__v2df) __Y, __P, |
12870
|
|
|
|
|
|
|
(__mmask8) __M, |
12871
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12872
|
|
|
|
|
|
|
} |
12873
|
|
|
|
|
|
|
|
12874
|
|
|
|
|
|
|
extern __inline __mmask8 |
12875
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12876
|
|
|
|
|
|
|
_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) |
12877
|
|
|
|
|
|
|
{ |
12878
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, |
12879
|
|
|
|
|
|
|
(__v4sf) __Y, __P, |
12880
|
|
|
|
|
|
|
(__mmask8) -1, |
12881
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12882
|
|
|
|
|
|
|
} |
12883
|
|
|
|
|
|
|
|
12884
|
|
|
|
|
|
|
extern __inline __mmask8 |
12885
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12886
|
|
|
|
|
|
|
_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) |
12887
|
|
|
|
|
|
|
{ |
12888
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, |
12889
|
|
|
|
|
|
|
(__v4sf) __Y, __P, |
12890
|
|
|
|
|
|
|
(__mmask8) __M, |
12891
|
|
|
|
|
|
|
_MM_FROUND_CUR_DIRECTION); |
12892
|
|
|
|
|
|
|
} |
12893
|
|
|
|
|
|
|
|
12894
|
|
|
|
|
|
|
#else |
12895
|
|
|
|
|
|
|
#define _mm512_cmp_pd_mask(X, Y, P) \ |
12896
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ |
12897
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(P),\ |
12898
|
|
|
|
|
|
|
(__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) |
12899
|
|
|
|
|
|
|
|
12900
|
|
|
|
|
|
|
#define _mm512_cmp_ps_mask(X, Y, P) \ |
12901
|
|
|
|
|
|
|
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ |
12902
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(P),\ |
12903
|
|
|
|
|
|
|
(__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) |
12904
|
|
|
|
|
|
|
|
12905
|
|
|
|
|
|
|
#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ |
12906
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ |
12907
|
|
|
|
|
|
|
(__v8df)(__m512d)(Y), (int)(P),\ |
12908
|
|
|
|
|
|
|
(__mmask8)M, _MM_FROUND_CUR_DIRECTION)) |
12909
|
|
|
|
|
|
|
|
12910
|
|
|
|
|
|
|
#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ |
12911
|
|
|
|
|
|
|
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ |
12912
|
|
|
|
|
|
|
(__v16sf)(__m512)(Y), (int)(P),\ |
12913
|
|
|
|
|
|
|
(__mmask16)M,_MM_FROUND_CUR_DIRECTION)) |
12914
|
|
|
|
|
|
|
|
12915
|
|
|
|
|
|
|
#define _mm_cmp_sd_mask(X, Y, P) \ |
12916
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ |
12917
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (int)(P),\ |
12918
|
|
|
|
|
|
|
(__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) |
12919
|
|
|
|
|
|
|
|
12920
|
|
|
|
|
|
|
#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ |
12921
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ |
12922
|
|
|
|
|
|
|
(__v2df)(__m128d)(Y), (int)(P),\ |
12923
|
|
|
|
|
|
|
M,_MM_FROUND_CUR_DIRECTION)) |
12924
|
|
|
|
|
|
|
|
12925
|
|
|
|
|
|
|
#define _mm_cmp_ss_mask(X, Y, P) \ |
12926
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ |
12927
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (int)(P), \ |
12928
|
|
|
|
|
|
|
(__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) |
12929
|
|
|
|
|
|
|
|
12930
|
|
|
|
|
|
|
#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ |
12931
|
|
|
|
|
|
|
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ |
12932
|
|
|
|
|
|
|
(__v4sf)(__m128)(Y), (int)(P), \ |
12933
|
|
|
|
|
|
|
M,_MM_FROUND_CUR_DIRECTION)) |
12934
|
|
|
|
|
|
|
#endif |
12935
|
|
|
|
|
|
|
|
12936
|
|
|
|
|
|
|
extern __inline __mmask16 |
12937
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12938
|
|
|
|
|
|
|
_mm512_kmov (__mmask16 __A) |
12939
|
|
|
|
|
|
|
{ |
12940
|
|
|
|
|
|
|
return __builtin_ia32_kmov16 (__A); |
12941
|
|
|
|
|
|
|
} |
12942
|
|
|
|
|
|
|
|
12943
|
|
|
|
|
|
|
extern __inline __m512 |
12944
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12945
|
|
|
|
|
|
|
_mm512_castpd_ps (__m512d __A) |
12946
|
|
|
|
|
|
|
{ |
12947
|
|
|
|
|
|
|
return (__m512) (__A); |
12948
|
|
|
|
|
|
|
} |
12949
|
|
|
|
|
|
|
|
12950
|
|
|
|
|
|
|
extern __inline __m512i |
12951
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12952
|
|
|
|
|
|
|
_mm512_castpd_si512 (__m512d __A) |
12953
|
|
|
|
|
|
|
{ |
12954
|
|
|
|
|
|
|
return (__m512i) (__A); |
12955
|
|
|
|
|
|
|
} |
12956
|
|
|
|
|
|
|
|
12957
|
|
|
|
|
|
|
extern __inline __m512d |
12958
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12959
|
|
|
|
|
|
|
_mm512_castps_pd (__m512 __A) |
12960
|
|
|
|
|
|
|
{ |
12961
|
|
|
|
|
|
|
return (__m512d) (__A); |
12962
|
|
|
|
|
|
|
} |
12963
|
|
|
|
|
|
|
|
12964
|
|
|
|
|
|
|
extern __inline __m512i |
12965
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12966
|
|
|
|
|
|
|
_mm512_castps_si512 (__m512 __A) |
12967
|
|
|
|
|
|
|
{ |
12968
|
|
|
|
|
|
|
return (__m512i) (__A); |
12969
|
|
|
|
|
|
|
} |
12970
|
|
|
|
|
|
|
|
12971
|
|
|
|
|
|
|
extern __inline __m512 |
12972
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12973
|
|
|
|
|
|
|
_mm512_castsi512_ps (__m512i __A) |
12974
|
|
|
|
|
|
|
{ |
12975
|
|
|
|
|
|
|
return (__m512) (__A); |
12976
|
|
|
|
|
|
|
} |
12977
|
|
|
|
|
|
|
|
12978
|
|
|
|
|
|
|
extern __inline __m512d |
12979
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12980
|
|
|
|
|
|
|
_mm512_castsi512_pd (__m512i __A) |
12981
|
|
|
|
|
|
|
{ |
12982
|
|
|
|
|
|
|
return (__m512d) (__A); |
12983
|
|
|
|
|
|
|
} |
12984
|
|
|
|
|
|
|
|
12985
|
|
|
|
|
|
|
extern __inline __m128d |
12986
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12987
|
|
|
|
|
|
|
_mm512_castpd512_pd128 (__m512d __A) |
12988
|
|
|
|
|
|
|
{ |
12989
|
|
|
|
|
|
|
return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0); |
12990
|
|
|
|
|
|
|
} |
12991
|
|
|
|
|
|
|
|
12992
|
|
|
|
|
|
|
extern __inline __m128 |
12993
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
12994
|
|
|
|
|
|
|
_mm512_castps512_ps128 (__m512 __A) |
12995
|
|
|
|
|
|
|
{ |
12996
|
|
|
|
|
|
|
return _mm512_extractf32x4_ps(__A, 0); |
12997
|
|
|
|
|
|
|
} |
12998
|
|
|
|
|
|
|
|
12999
|
|
|
|
|
|
|
extern __inline __m128i |
13000
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13001
|
|
|
|
|
|
|
_mm512_castsi512_si128 (__m512i __A) |
13002
|
|
|
|
|
|
|
{ |
13003
|
|
|
|
|
|
|
return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0); |
13004
|
|
|
|
|
|
|
} |
13005
|
|
|
|
|
|
|
|
13006
|
|
|
|
|
|
|
extern __inline __m256d |
13007
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13008
|
|
|
|
|
|
|
_mm512_castpd512_pd256 (__m512d __A) |
13009
|
|
|
|
|
|
|
{ |
13010
|
|
|
|
|
|
|
return _mm512_extractf64x4_pd(__A, 0); |
13011
|
|
|
|
|
|
|
} |
13012
|
|
|
|
|
|
|
|
13013
|
|
|
|
|
|
|
extern __inline __m256 |
13014
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13015
|
|
|
|
|
|
|
_mm512_castps512_ps256 (__m512 __A) |
13016
|
|
|
|
|
|
|
{ |
13017
|
|
|
|
|
|
|
return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0); |
13018
|
|
|
|
|
|
|
} |
13019
|
|
|
|
|
|
|
|
13020
|
|
|
|
|
|
|
extern __inline __m256i |
13021
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13022
|
|
|
|
|
|
|
_mm512_castsi512_si256 (__m512i __A) |
13023
|
|
|
|
|
|
|
{ |
13024
|
|
|
|
|
|
|
return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0); |
13025
|
|
|
|
|
|
|
} |
13026
|
|
|
|
|
|
|
|
13027
|
|
|
|
|
|
|
extern __inline __m512d |
13028
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13029
|
|
|
|
|
|
|
_mm512_castpd128_pd512 (__m128d __A) |
13030
|
|
|
|
|
|
|
{ |
13031
|
|
|
|
|
|
|
return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A); |
13032
|
|
|
|
|
|
|
} |
13033
|
|
|
|
|
|
|
|
13034
|
|
|
|
|
|
|
extern __inline __m512 |
13035
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13036
|
|
|
|
|
|
|
_mm512_castps128_ps512 (__m128 __A) |
13037
|
|
|
|
|
|
|
{ |
13038
|
|
|
|
|
|
|
return (__m512) __builtin_ia32_ps512_ps((__m128)__A); |
13039
|
|
|
|
|
|
|
} |
13040
|
|
|
|
|
|
|
|
13041
|
|
|
|
|
|
|
extern __inline __m512i |
13042
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13043
|
|
|
|
|
|
|
_mm512_castsi128_si512 (__m128i __A) |
13044
|
|
|
|
|
|
|
{ |
13045
|
|
|
|
|
|
|
return (__m512i) __builtin_ia32_si512_si((__v4si)__A); |
13046
|
|
|
|
|
|
|
} |
13047
|
|
|
|
|
|
|
|
13048
|
|
|
|
|
|
|
extern __inline __m512d |
13049
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13050
|
|
|
|
|
|
|
_mm512_castpd256_pd512 (__m256d __A) |
13051
|
|
|
|
|
|
|
{ |
13052
|
|
|
|
|
|
|
return __builtin_ia32_pd512_256pd (__A); |
13053
|
|
|
|
|
|
|
} |
13054
|
|
|
|
|
|
|
|
13055
|
|
|
|
|
|
|
extern __inline __m512 |
13056
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13057
|
|
|
|
|
|
|
_mm512_castps256_ps512 (__m256 __A) |
13058
|
|
|
|
|
|
|
{ |
13059
|
|
|
|
|
|
|
return __builtin_ia32_ps512_256ps (__A); |
13060
|
|
|
|
|
|
|
} |
13061
|
|
|
|
|
|
|
|
13062
|
|
|
|
|
|
|
extern __inline __m512i |
13063
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13064
|
|
|
|
|
|
|
_mm512_castsi256_si512 (__m256i __A) |
13065
|
|
|
|
|
|
|
{ |
13066
|
|
|
|
|
|
|
return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A); |
13067
|
|
|
|
|
|
|
} |
13068
|
|
|
|
|
|
|
|
13069
|
|
|
|
|
|
|
extern __inline __mmask16 |
13070
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13071
|
|
|
|
|
|
|
_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B) |
13072
|
|
|
|
|
|
|
{ |
13073
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, |
13074
|
|
|
|
|
|
|
(__v16si) __B, 0, |
13075
|
|
|
|
|
|
|
(__mmask16) -1); |
13076
|
|
|
|
|
|
|
} |
13077
|
|
|
|
|
|
|
|
13078
|
|
|
|
|
|
|
extern __inline __mmask16 |
13079
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13080
|
|
|
|
|
|
|
_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
13081
|
|
|
|
|
|
|
{ |
13082
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, |
13083
|
|
|
|
|
|
|
(__v16si) __B, 0, __U); |
13084
|
|
|
|
|
|
|
} |
13085
|
|
|
|
|
|
|
|
13086
|
|
|
|
|
|
|
extern __inline __mmask8 |
13087
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13088
|
|
|
|
|
|
|
_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
13089
|
|
|
|
|
|
|
{ |
13090
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, |
13091
|
|
|
|
|
|
|
(__v8di) __B, 0, __U); |
13092
|
|
|
|
|
|
|
} |
13093
|
|
|
|
|
|
|
|
13094
|
|
|
|
|
|
|
extern __inline __mmask8 |
13095
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13096
|
|
|
|
|
|
|
_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B) |
13097
|
|
|
|
|
|
|
{ |
13098
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, |
13099
|
|
|
|
|
|
|
(__v8di) __B, 0, |
13100
|
|
|
|
|
|
|
(__mmask8) -1); |
13101
|
|
|
|
|
|
|
} |
13102
|
|
|
|
|
|
|
|
13103
|
|
|
|
|
|
|
extern __inline __mmask16 |
13104
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13105
|
|
|
|
|
|
|
_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B) |
13106
|
|
|
|
|
|
|
{ |
13107
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, |
13108
|
|
|
|
|
|
|
(__v16si) __B, 6, |
13109
|
|
|
|
|
|
|
(__mmask16) -1); |
13110
|
|
|
|
|
|
|
} |
13111
|
|
|
|
|
|
|
|
13112
|
|
|
|
|
|
|
extern __inline __mmask16 |
13113
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13114
|
|
|
|
|
|
|
_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
13115
|
|
|
|
|
|
|
{ |
13116
|
|
|
|
|
|
|
return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, |
13117
|
|
|
|
|
|
|
(__v16si) __B, 6, __U); |
13118
|
|
|
|
|
|
|
} |
13119
|
|
|
|
|
|
|
|
13120
|
|
|
|
|
|
|
extern __inline __mmask8 |
13121
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13122
|
|
|
|
|
|
|
_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
13123
|
|
|
|
|
|
|
{ |
13124
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, |
13125
|
|
|
|
|
|
|
(__v8di) __B, 6, __U); |
13126
|
|
|
|
|
|
|
} |
13127
|
|
|
|
|
|
|
|
13128
|
|
|
|
|
|
|
extern __inline __mmask8 |
13129
|
|
|
|
|
|
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
13130
|
|
|
|
|
|
|
_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B) |
13131
|
|
|
|
|
|
|
{ |
13132
|
|
|
|
|
|
|
return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, |
13133
|
|
|
|
|
|
|
(__v8di) __B, 6, |
13134
|
|
|
|
|
|
|
(__mmask8) -1); |
13135
|
|
|
|
|
|
|
} |
13136
|
|
|
|
|
|
|
|
13137
|
|
|
|
|
|
|
#ifdef __DISABLE_AVX512F__ |
13138
|
|
|
|
|
|
|
#undef __DISABLE_AVX512F__ |
13139
|
|
|
|
|
|
|
#pragma GCC pop_options |
13140
|
|
|
|
|
|
|
#endif /* __DISABLE_AVX512F__ */ |
13141
|
|
|
|
|
|
|
|
13142
|
|
|
|
|
|
|
#endif /* _AVX512FINTRIN_H_INCLUDED */ |