Lines Matching +full:16 +full:- +full:bit

1 /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
20 __target__("avx2,no-evex512"), __min_vector_width__(256)))
23 __target__("avx2,no-evex512"), __min_vector_width__(128)))
27 /// four unsigned 8-bit integers from the 256-bit integer vectors \a X and
31 /// vectors, and another eight using the upper half. These 16-bit values
32 /// are returned in the lower and upper halves of the 256-bit result,
38 /// difference, and sums these four values to form one 16-bit result. The
39 /// intrinsic computes 16 of these results with different sets of input
43 /// bytes from \a Y; the starting bit position for these four bytes is
45 /// sets of four bytes from \a X; the starting bit position for the first
46 /// set of four bytes is specified by \a M[2] times 32. These bit positions
47 /// are all relative to the 128-bit lane for each set of eight operations.
56 /// temp0 := ABS(X[Xbase+7:Xbase] - Y[Ybase+7:Ybase])
57 /// temp1 := ABS(X[Xbase+15:Xbase+8] - Y[Ybase+15:Ybase+8])
58 /// temp2 := ABS(X[Xbase+23:Xbase+16] - Y[Ybase+23:Ybase+16])
59 /// temp3 := ABS(X[Xbase+31:Xbase+24] - Y[Ybase+31:Ybase+24])
62 /// r := r + 16
76 /// A 256-bit integer vector containing one of the inputs.
78 /// A 256-bit integer vector containing one of the inputs.
82 /// \returns A 256-bit vector of [16 x i16] containing the result.
87 /// Computes the absolute value of each signed byte in the 256-bit integer
96 /// A 256-bit integer vector.
97 /// \returns A 256-bit integer vector containing the result.
104 /// Computes the absolute value of each signed 16-bit element in the 256-bit
105 /// vector of [16 x i16] in \a __a and returns each value in the
113 /// A 256-bit vector of [16 x i16].
114 /// \returns A 256-bit vector of [16 x i16] containing the result.
121 /// Computes the absolute value of each signed 32-bit element in the 256-bit
130 /// A 256-bit vector of [8 x i32].
131 /// \returns A 256-bit vector of [8 x i32] containing the result.
138 /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit
139 /// integers using signed saturation, and returns the 256-bit result.
143 /// j := i*16
157 /// A 256-bit vector of [16 x i16] used to generate result[63:0] and
160 /// A 256-bit vector of [16 x i16] used to generate result[127:64] and
162 /// \returns A 256-bit integer vector containing the result.
169 /// Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit
170 /// integers using signed saturation, and returns the resulting 256-bit
171 /// vector of [16 x i16].
176 /// k := i*16
189 /// A 256-bit vector of [8 x i32] used to generate result[63:0] and
192 /// A 256-bit vector of [8 x i32] used to generate result[127:64] and
194 /// \returns A 256-bit vector of [16 x i16] containing the result.
201 /// Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers
202 /// using unsigned saturation, and returns the 256-bit result.
206 /// j := i*16
220 /// A 256-bit vector of [16 x i16] used to generate result[63:0] and
223 /// A 256-bit vector of [16 x i16] used to generate result[127:64] and
225 /// \returns A 256-bit integer vector containing the result.
232 /// Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers
233 /// using unsigned saturation, and returns the resulting 256-bit vector of
234 /// [16 x i16].
239 /// k := i*16
252 /// A 256-bit vector of [8 x i32] used to generate result[63:0] and
255 /// A 256-bit vector of [8 x i32] used to generate result[127:64] and
257 /// \returns A 256-bit vector of [16 x i16] containing the result.
264 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
266 /// byte of the 256-bit integer vector result (overflow is ignored).
273 /// A 256-bit integer vector containing one of the source operands.
275 /// A 256-bit integer vector containing one of the source operands.
276 /// \returns A 256-bit integer vector containing the sums.
283 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
284 /// [16 x i16] and returns the lower 16 bits of each sum in the
285 /// corresponding element of the [16 x i16] result (overflow is ignored).
292 /// A 256-bit vector of [16 x i16] containing one of the source operands.
294 /// A 256-bit vector of [16 x i16] containing one of the source operands.
295 /// \returns A 256-bit vector of [16 x i16] containing the sums.
302 /// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
311 /// A 256-bit vector of [8 x i32] containing one of the source operands.
313 /// A 256-bit vector of [8 x i32] containing one of the source operands.
314 /// \returns A 256-bit vector of [8 x i32] containing the sums.
321 /// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
330 /// A 256-bit vector of [4 x i64] containing one of the source operands.
332 /// A 256-bit vector of [4 x i64] containing one of the source operands.
333 /// \returns A 256-bit vector of [4 x i64] containing the sums.
340 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
342 /// corresponding byte of the 256-bit integer vector result.
349 /// A 256-bit integer vector containing one of the source operands.
351 /// A 256-bit integer vector containing one of the source operands.
352 /// \returns A 256-bit integer vector containing the sums.
359 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
360 /// [16 x i16] using signed saturation, and returns the [16 x i16] result.
367 /// A 256-bit vector of [16 x i16] containing one of the source operands.
369 /// A 256-bit vector of [16 x i16] containing one of the source operands.
370 /// \returns A 256-bit vector of [16 x i16] containing the sums.
377 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
379 /// corresponding byte of the 256-bit integer vector result.
386 /// A 256-bit integer vector containing one of the source operands.
388 /// A 256-bit integer vector containing one of the source operands.
389 /// \returns A 256-bit integer vector containing the sums.
396 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
397 /// [16 x i16] using unsigned saturation, and returns the [16 x i16] result.
404 /// A 256-bit vector of [16 x i16] containing one of the source operands.
406 /// A 256-bit vector of [16 x i16] containing one of the source operands.
407 /// \returns A 256-bit vector of [16 x i16] containing the sums.
414 /// Uses the lower half of the 256-bit vector \a a as the upper half of a
415 /// temporary 256-bit value, and the lower half of the 256-bit vector \a b
416 /// as the lower half of the temporary value. Right-shifts the temporary
417 /// value by \a n bytes, and uses the lower 16 bytes of the shifted value
418 /// as the lower 16 bytes of the result. Uses the upper halves of \a a and
420 /// the lower 16 bytes of the shifted value as the upper 16 bytes of the
432 /// A 256-bit integer vector containing source values.
434 /// A 256-bit integer vector containing source values.
437 /// \returns A 256-bit integer vector containing the result.
442 /// Computes the bitwise AND of the 256-bit integer vectors in \a __a and
450 /// A 256-bit integer vector.
452 /// A 256-bit integer vector.
453 /// \returns A 256-bit integer vector containing the result.
460 /// Computes the bitwise AND of the 256-bit integer vector in \a __b with
461 /// the bitwise NOT of the 256-bit integer vector in \a __a.
468 /// A 256-bit integer vector.
470 /// A 256-bit integer vector.
471 /// \returns A 256-bit integer vector containing the result.
479 /// 256-bit integer vectors in \a __a and \a __b and returns each
480 /// average in the corresponding byte of the 256-bit result.
494 /// A 256-bit integer vector.
496 /// A 256-bit integer vector.
497 /// \returns A 256-bit integer vector containing the result.
504 /// Computes the averages of the corresponding unsigned 16-bit integers in
505 /// the two 256-bit vectors of [16 x i16] in \a __a and \a __b and returns
506 /// each average in the corresponding element of the 256-bit result.
510 /// j := i*16
520 /// A 256-bit vector of [16 x i16].
522 /// A 256-bit vector of [16 x i16].
523 /// \returns A 256-bit vector of [16 x i16] containing the result.
530 /// Merges 8-bit integer values from either of the two 256-bit vectors
531 /// \a __V1 or \a __V2, as specified by the 256-bit mask \a __M and returns
532 /// the resulting 256-bit integer vector.
550 /// A 256-bit integer vector containing source values.
552 /// A 256-bit integer vector containing source values.
554 /// A 256-bit integer vector, with bit [7] of each byte specifying the
555 /// source for each corresponding byte of the result. When the mask bit
558 /// \returns A 256-bit integer vector containing the result.
566 /// Merges 16-bit integer values from either of the two 256-bit vectors
568 /// and returns the resulting 256-bit vector of [16 x i16].
572 /// j := i*16
592 /// A 256-bit vector of [16 x i16] containing source values.
594 /// A 256-bit vector of [16 x i16] containing source values.
596 /// An immediate 8-bit integer operand, with bits [7:0] specifying the
597 /// source for each element of the result. The position of the mask bit
598 /// corresponds to the index of a copied value. When a mask bit is 0, the
602 /// \returns A 256-bit vector of [16 x i16] containing the result.
607 /// Compares corresponding bytes in the 256-bit integer vectors in \a __a and
609 /// bytes of the 256-bit result.
623 /// A 256-bit integer vector containing one of the inputs.
625 /// A 256-bit integer vector containing one of the inputs.
626 /// \returns A 256-bit integer vector containing the result.
633 /// Compares corresponding elements in the 256-bit vectors of [16 x i16] in
635 /// corresponding elements of the 256-bit result.
639 /// j := i*16
649 /// A 256-bit vector of [16 x i16] containing one of the inputs.
651 /// A 256-bit vector of [16 x i16] containing one of the inputs.
652 /// \returns A 256-bit vector of [16 x i16] containing the result.
659 /// Compares corresponding elements in the 256-bit vectors of [8 x i32] in
661 /// corresponding elements of the 256-bit result.
675 /// A 256-bit vector of [8 x i32] containing one of the inputs.
677 /// A 256-bit vector of [8 x i32] containing one of the inputs.
678 /// \returns A 256-bit vector of [8 x i32] containing the result.
685 /// Compares corresponding elements in the 256-bit vectors of [4 x i64] in
687 /// corresponding elements of the 256-bit result.
701 /// A 256-bit vector of [4 x i64] containing one of the inputs.
703 /// A 256-bit vector of [4 x i64] containing one of the inputs.
704 /// \returns A 256-bit vector of [4 x i64] containing the result.
711 /// Compares corresponding signed bytes in the 256-bit integer vectors in
712 /// \a __a and \a __b for greater-than and returns the outcomes in the
713 /// corresponding bytes of the 256-bit result.
727 /// A 256-bit integer vector containing one of the inputs.
729 /// A 256-bit integer vector containing one of the inputs.
730 /// \returns A 256-bit integer vector containing the result.
739 /// Compares corresponding signed elements in the 256-bit vectors of
740 /// [16 x i16] in \a __a and \a __b for greater-than and returns the
741 /// outcomes in the corresponding elements of the 256-bit result.
745 /// j := i*16
755 /// A 256-bit vector of [16 x i16] containing one of the inputs.
757 /// A 256-bit vector of [16 x i16] containing one of the inputs.
758 /// \returns A 256-bit vector of [16 x i16] containing the result.
765 /// Compares corresponding signed elements in the 256-bit vectors of
766 /// [8 x i32] in \a __a and \a __b for greater-than and returns the
767 /// outcomes in the corresponding elements of the 256-bit result.
781 /// A 256-bit vector of [8 x i32] containing one of the inputs.
783 /// A 256-bit vector of [8 x i32] containing one of the inputs.
784 /// \returns A 256-bit vector of [8 x i32] containing the result.
791 /// Compares corresponding signed elements in the 256-bit vectors of
792 /// [4 x i64] in \a __a and \a __b for greater-than and returns the
793 /// outcomes in the corresponding elements of the 256-bit result.
807 /// A 256-bit vector of [4 x i64] containing one of the inputs.
809 /// A 256-bit vector of [4 x i64] containing one of the inputs.
810 /// \returns A 256-bit vector of [4 x i64] containing the result.
817 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
818 /// vectors of [16 x i16] and returns the lower 16 bits of each sum in an
819 /// element of the [16 x i16] result (overflow is ignored). Sums from
820 /// \a __a are returned in the lower 64 bits of each 128-bit half of the
822 /// 128-bit half of the result.
827 /// result[j+15:j] := __a[j+15:j] + __a[j+31:j+16]
828 /// result[j+31:j+16] := __a[j+47:j+32] + __a[j+63:j+48]
831 /// result[j+79:j+64] := __b[j+15:j] + __b[j+31:j+16]
843 /// A 256-bit vector of [16 x i16] containing one of the source operands.
845 /// A 256-bit vector of [16 x i16] containing one of the source operands.
846 /// \returns A 256-bit vector of [16 x i16] containing the sums.
853 /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
856 /// are returned in the lower 64 bits of each 128-bit half of the result;
857 /// sums from \a __b are returned in the upper 64 bits of each 128-bit half
875 /// A 256-bit vector of [8 x i32] containing one of the source operands.
877 /// A 256-bit vector of [8 x i32] containing one of the source operands.
878 /// \returns A 256-bit vector of [8 x i32] containing the sums.
885 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
886 /// vectors of [16 x i16] using signed saturation and returns each sum in
887 /// an element of the [16 x i16] result. Sums from \a __a are returned in
888 /// the lower 64 bits of each 128-bit half of the result; sums from \a __b
889 /// are returned in the upper 64 bits of each 128-bit half of the result.
894 /// result[j+15:j] := SATURATE16(__a[j+15:j] + __a[j+31:j+16])
895 /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] + __a[j+63:j+48])
898 /// result[j+79:j+64] := SATURATE16(__b[j+15:j] + __b[j+31:j+16])
910 /// A 256-bit vector of [16 x i16] containing one of the source operands.
912 /// A 256-bit vector of [16 x i16] containing one of the source operands.
913 /// \returns A 256-bit vector of [16 x i16] containing the sums.
920 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
921 /// vectors of [16 x i16] and returns the lower 16 bits of each difference
922 /// in an element of the [16 x i16] result (overflow is ignored).
924 /// 128-bit half of the result; differences from \a __b are returned in the
925 /// upper 64 bits of each 128-bit half of the result.
930 /// result[j+15:j] := __a[j+15:j] - __a[j+31:j+16]
931 /// result[j+31:j+16] := __a[j+47:j+32] - __a[j+63:j+48]
932 /// result[j+47:j+32] := __a[j+79:j+64] - __a[j+95:j+80]
933 /// result[j+63:j+48] := __a[j+111:j+96] - __a[j+127:j+112]
934 /// result[j+79:j+64] := __b[j+15:j] - __b[j+31:j+16]
935 /// result[j+95:j+80] := __b[j+47:j+32] - __b[j+63:j+48]
936 /// result[j+111:j+96] := __b[j+79:j+64] - __b[j+95:j+80]
937 /// result[j+127:j+112] := __b[j+111:j+96] - __b[j+127:j+112]
946 /// A 256-bit vector of [16 x i16] containing one of the source operands.
948 /// A 256-bit vector of [16 x i16] containing one of the source operands.
949 /// \returns A 256-bit vector of [16 x i16] containing the differences.
956 /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
959 /// from \a __a are returned in the lower 64 bits of each 128-bit half of
961 /// of each 128-bit half of the result.
966 /// result[j+31:j] := __a[j+31:j] - __a[j+63:j+32]
967 /// result[j+63:j+32] := __a[j+95:j+64] - __a[j+127:j+96]
968 /// result[j+95:j+64] := __b[j+31:j] - __b[j+63:j+32]
969 /// result[j+127:j+96] := __b[j+95:j+64] - __b[j+127:j+96]
978 /// A 256-bit vector of [8 x i32] containing one of the source operands.
980 /// A 256-bit vector of [8 x i32] containing one of the source operands.
981 /// \returns A 256-bit vector of [8 x i32] containing the differences.
988 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
989 /// vectors of [16 x i16] using signed saturation and returns each sum in
990 /// an element of the [16 x i16] result. Differences from \a __a are
991 /// returned in the lower 64 bits of each 128-bit half of the result;
993 /// 128-bit half of the result.
998 /// result[j+15:j] := SATURATE16(__a[j+15:j] - __a[j+31:j+16])
999 /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] - __a[j+63:j+48])
1000 /// result[j+47:j+32] := SATURATE16(__a[j+79:j+64] - __a[j+95:j+80])
1001 /// result[j+63:j+48] := SATURATE16(__a[j+111:j+96] - __a[j+127:j+112])
1002 /// result[j+79:j+64] := SATURATE16(__b[j+15:j] - __b[j+31:j+16])
1003 /// result[j+95:j+80] := SATURATE16(__b[j+47:j+32] - __b[j+63:j+48])
1004 /// result[j+111:j+96] := SATURATE16(__b[j+79:j+64] - __b[j+95:j+80])
1005 /// result[j+127:j+112] := SATURATE16(__b[j+111:j+96] - __b[j+127:j+112])
1014 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1016 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1017 /// \returns A 256-bit vector of [16 x i16] containing the differences.
1024 /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
1025 /// with the corresponding signed byte from the 256-bit integer vector in
1026 /// \a __b, forming signed 16-bit intermediate products. Adds adjacent
1027 /// pairs of those products using signed saturation to form 16-bit sums
1028 /// returned as elements of the [16 x i16] result.
1032 /// j := i*16
1044 /// A 256-bit vector containing one of the source operands.
1046 /// A 256-bit vector containing one of the source operands.
1047 /// \returns A 256-bit vector of [16 x i16] containing the result.
1054 /// Multiplies corresponding 16-bit elements of two 256-bit vectors of
1055 /// [16 x i16], forming 32-bit intermediate products, and adds pairs of
1056 /// those products to form 32-bit sums returned as elements of the
1059 /// There is only one wraparound case: when all four of the 16-bit sources
1066 /// temp2 := __a[j+31:j+16] * __b[j+31:j+16]
1076 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1078 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1079 /// \returns A 256-bit vector of [8 x i32] containing the result.
1086 /// Compares the corresponding signed bytes in the two 256-bit integer vectors
1088 /// corresponding byte of the 256-bit result.
1095 /// A 256-bit integer vector.
1097 /// A 256-bit integer vector.
1098 /// \returns A 256-bit integer vector containing the result.
1105 /// Compares the corresponding signed 16-bit integers in the two 256-bit
1106 /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of
1107 /// each pair in the corresponding element of the 256-bit result.
1114 /// A 256-bit vector of [16 x i16].
1116 /// A 256-bit vector of [16 x i16].
1117 /// \returns A 256-bit vector of [16 x i16] containing the result.
1124 /// Compares the corresponding signed 32-bit integers in the two 256-bit
1126 /// each pair in the corresponding element of the 256-bit result.
1133 /// A 256-bit vector of [8 x i32].
1135 /// A 256-bit vector of [8 x i32].
1136 /// \returns A 256-bit vector of [8 x i32] containing the result.
1143 /// Compares the corresponding unsigned bytes in the two 256-bit integer
1145 /// the corresponding byte of the 256-bit result.
1152 /// A 256-bit integer vector.
1154 /// A 256-bit integer vector.
1155 /// \returns A 256-bit integer vector containing the result.
1162 /// Compares the corresponding unsigned 16-bit integers in the two 256-bit
1163 /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of
1164 /// each pair in the corresponding element of the 256-bit result.
1171 /// A 256-bit vector of [16 x i16].
1173 /// A 256-bit vector of [16 x i16].
1174 /// \returns A 256-bit vector of [16 x i16] containing the result.
1181 /// Compares the corresponding unsigned 32-bit integers in the two 256-bit
1183 /// each pair in the corresponding element of the 256-bit result.
1190 /// A 256-bit vector of [8 x i32].
1192 /// A 256-bit vector of [8 x i32].
1193 /// \returns A 256-bit vector of [8 x i32] containing the result.
1200 /// Compares the corresponding signed bytes in the two 256-bit integer vectors
1202 /// corresponding byte of the 256-bit result.
1209 /// A 256-bit integer vector.
1211 /// A 256-bit integer vector.
1212 /// \returns A 256-bit integer vector containing the result.
1219 /// Compares the corresponding signed 16-bit integers in the two 256-bit
1220 /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of
1221 /// each pair in the corresponding element of the 256-bit result.
1228 /// A 256-bit vector of [16 x i16].
1230 /// A 256-bit vector of [16 x i16].
1231 /// \returns A 256-bit vector of [16 x i16] containing the result.
1238 /// Compares the corresponding signed 32-bit integers in the two 256-bit
1240 /// each pair in the corresponding element of the 256-bit result.
1247 /// A 256-bit vector of [8 x i32].
1249 /// A 256-bit vector of [8 x i32].
1250 /// \returns A 256-bit vector of [8 x i32] containing the result.
1257 /// Compares the corresponding unsigned bytes in the two 256-bit integer
1259 /// the corresponding byte of the 256-bit result.
1266 /// A 256-bit integer vector.
1268 /// A 256-bit integer vector.
1269 /// \returns A 256-bit integer vector containing the result.
1276 /// Compares the corresponding unsigned 16-bit integers in the two 256-bit
1277 /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of
1278 /// each pair in the corresponding element of the 256-bit result.
1285 /// A 256-bit vector of [16 x i16].
1287 /// A 256-bit vector of [16 x i16].
1288 /// \returns A 256-bit vector of [16 x i16] containing the result.
1295 /// Compares the corresponding unsigned 32-bit integers in the two 256-bit
1297 /// each pair in the corresponding element of the 256-bit result.
1304 /// A 256-bit vector of [8 x i32].
1306 /// A 256-bit vector of [8 x i32].
1307 /// \returns A 256-bit vector of [8 x i32] containing the result.
1314 /// Creates a 32-bit integer mask from the most significant bit of each byte
1315 /// in the 256-bit integer vector in \a __a and returns the result.
1329 /// A 256-bit integer vector containing the source bytes.
1330 /// \returns The 32-bit integer mask.
1337 /// Sign-extends bytes from the 128-bit integer vector in \a __V and returns
1338 /// the 16-bit values in the corresponding elements of a 256-bit vector
1339 /// of [16 x i16].
1344 /// k := i*16
1354 /// A 128-bit integer vector containing the source bytes.
1355 /// \returns A 256-bit vector of [16 x i16] containing the sign-extended
1365 /// Sign-extends bytes from the lower half of the 128-bit integer vector in
1366 /// \a __V and returns the 32-bit values in the corresponding elements of a
1367 /// 256-bit vector of [8 x i32].
1382 /// A 128-bit integer vector containing the source bytes.
1383 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
1393 /// Sign-extends the first four bytes from the 128-bit integer vector in
1394 /// \a __V and returns the 64-bit values in the corresponding elements of a
1395 /// 256-bit vector of [4 x i64].
1400 /// result[191:128] := SignExtend(__V[23:16])
1409 /// A 128-bit integer vector containing the source bytes.
1410 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1420 /// Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in
1421 /// \a __V and returns the 32-bit values in the corresponding elements of a
1422 /// 256-bit vector of [8 x i32].
1426 /// j := i*16
1437 /// A 128-bit vector of [8 x i16] containing the source values.
1438 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
1446 /// Sign-extends 16-bit elements from the lower half of the 128-bit vector of
1447 /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding
1448 /// elements of a 256-bit vector of [4 x i64].
1452 /// result[127:64] := SignExtend(__V[31:16])
1462 /// A 128-bit vector of [8 x i16] containing the source values.
1463 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1471 /// Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in
1472 /// \a __V and returns the 64-bit values in the corresponding elements of a
1473 /// 256-bit vector of [4 x i64].
1487 /// A 128-bit vector of [4 x i32] containing the source values.
1488 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1496 /// Zero-extends bytes from the 128-bit integer vector in \a __V and returns
1497 /// the 16-bit values in the corresponding elements of a 256-bit vector
1498 /// of [16 x i16].
1503 /// k := i*16
1513 /// A 128-bit integer vector containing the source bytes.
1514 /// \returns A 256-bit vector of [16 x i16] containing the zero-extended
1522 /// Zero-extends bytes from the lower half of the 128-bit integer vector in
1523 /// \a __V and returns the 32-bit values in the corresponding elements of a
1524 /// 256-bit vector of [8 x i32].
1539 /// A 128-bit integer vector containing the source bytes.
1540 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
1548 /// Zero-extends the first four bytes from the 128-bit integer vector in
1549 /// \a __V and returns the 64-bit values in the corresponding elements of a
1550 /// 256-bit vector of [4 x i64].
1555 /// result[191:128] := ZeroExtend(__V[23:16])
1564 /// A 128-bit integer vector containing the source bytes.
1565 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1573 /// Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in
1574 /// \a __V and returns the 32-bit values in the corresponding elements of a
1575 /// 256-bit vector of [8 x i32].
1579 /// j := i*16
1590 /// A 128-bit vector of [8 x i16] containing the source values.
1591 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
1599 /// Zero-extends 16-bit elements from the lower half of the 128-bit vector of
1600 /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding
1601 /// elements of a 256-bit vector of [4 x i64].
1605 /// result[127:64] := ZeroExtend(__V[31:16])
1615 /// A 128-bit vector of [8 x i16] containing the source values.
1616 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1624 /// Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in
1625 /// \a __V and returns the 64-bit values in the corresponding elements of a
1626 /// 256-bit vector of [4 x i64].
1640 /// A 128-bit vector of [4 x i32] containing the source values.
1641 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1649 /// Multiplies signed 32-bit integers from even-numbered elements of two
1650 /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the
1665 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1667 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1668 /// \returns A 256-bit vector of [4 x i64] containing the products.
1675 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1676 /// [16 x i16], truncates the 32-bit results to the most significant 18
1677 /// bits, rounds by adding 1, and returns bits [16:1] of each rounded
1678 /// product in the [16 x i16] result.
1682 /// j := i*16
1684 /// result[j+15:j] := temp[16:1]
1692 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1694 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1695 /// \returns A 256-bit vector of [16 x i16] containing the rounded products.
1702 /// Multiplies unsigned 16-bit integer elements of two 256-bit vectors of
1703 /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the
1704 /// [16 x i16] result.
1711 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1713 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1714 /// \returns A 256-bit vector of [16 x i16] containing the products.
1721 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1722 /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the
1723 /// [16 x i16] result.
1730 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1732 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1733 /// \returns A 256-bit vector of [16 x i16] containing the products.
1740 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1741 /// [16 x i16], and returns the lower 16 bits of each 32-bit product in the
1742 /// [16 x i16] result.
1749 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1751 /// A 256-bit vector of [16 x i16] containing one of the source operands.
1752 /// \returns A 256-bit vector of [16 x i16] containing the products.
1759 /// Multiplies signed 32-bit integer elements of two 256-bit vectors of
1760 /// [8 x i32], and returns the lower 32 bits of each 64-bit product in the
1768 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1770 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1771 /// \returns A 256-bit vector of [8 x i32] containing the products.
1778 /// Multiplies unsigned 32-bit integers from even-numered elements of two
1779 /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the
1794 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1796 /// A 256-bit vector of [8 x i32] containing one of the source operands.
1797 /// \returns A 256-bit vector of [4 x i64] containing the products.
1804 /// Computes the bitwise OR of the 256-bit integer vectors in \a __a and
1812 /// A 256-bit integer vector.
1814 /// A 256-bit integer vector.
1815 /// \returns A 256-bit integer vector containing the result.
1823 /// unsigned 8-bit integers from the 256-bit integer vectors \a __a and
1827 /// eight bytes from \a __b. The zero-extended SAD value is returned in the
1828 /// corresponding 64-bit element of the result.
1832 /// and sums these eight values to form one 16-bit result. This operation
1838 /// temp0 := ABS(__a[j+7:j] - __b[j+7:j])
1839 /// temp1 := ABS(__a[j+15:j+8] - __b[j+15:j+8])
1840 /// temp2 := ABS(__a[j+23:j+16] - __b[j+23:j+16])
1841 /// temp3 := ABS(__a[j+31:j+24] - __b[j+31:j+24])
1842 /// temp4 := ABS(__a[j+39:j+32] - __b[j+39:j+32])
1843 /// temp5 := ABS(__a[j+47:j+40] - __b[j+47:j+40])
1844 /// temp6 := ABS(__a[j+55:j+48] - __b[j+55:j+48])
1845 /// temp7 := ABS(__a[j+63:j+56] - __b[j+63:j+56])
1848 /// result[j+63:j+16] := 0
1857 /// A 256-bit integer vector.
1859 /// A 256-bit integer vector.
1860 /// \returns A 256-bit integer vector containing the result.
1867 /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according
1868 /// to control information in the 256-bit integer vector \a __b, and
1869 /// returns the 256-bit result. In effect there are two separate 128-bit
1892 /// A 256-bit integer vector containing source values.
1894 /// A 256-bit integer vector containing control information to determine
1895 /// what goes into the corresponding byte of the result. If bit 7 of the
1897 /// control byte specify the index (within the same 128-bit half) of \a __a
1899 /// \returns A 256-bit integer vector containing the result.
1906 /// Shuffles 32-bit integers from the 256-bit vector of [8 x i32] in \a a
1908 /// returns the 256-bit result. In effect there are two parallel 128-bit
1929 /// A 256-bit vector of [8 x i32] containing source values.
1931 /// An immediate 8-bit value specifying which elements to copy from \a a.
1935 /// \returns A 256-bit vector of [8 x i32] containing the result.
1939 /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] in \a a
1941 /// returns the 256-bit result. The upper 64 bits of each 128-bit half
1942 /// are shuffled in parallel; the lower 64 bits of each 128-bit half are
1949 /// j := i * 16 + 64
1950 /// k := (imm >> i*2)[1:0] * 16 + 64
1965 /// A 256-bit vector of [16 x i16] containing source values.
1967 /// An immediate 8-bit value specifying which elements to copy from \a a.
1971 /// \returns A 256-bit vector of [16 x i16] containing the result.
1975 /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] \a a
1977 /// returns the 256-bit [16 x i16] result. The lower 64 bits of each
1978 /// 128-bit half are shuffled; the upper 64 bits of each 128-bit half are
1985 /// j := i * 16
1986 /// k := (imm >> i*2)[1:0] * 16
2001 /// A 256-bit vector of [16 x i16] to use as a source of data for the
2004 /// An immediate 8-bit value specifying which elements to copy from \a a.
2008 /// \returns A 256-bit vector of [16 x i16] containing the result.
2012 /// Sets each byte of the result to the corresponding byte of the 256-bit
2014 /// on whether the corresponding byte of the 256-bit integer vector in
2023 /// A 256-bit integer vector.
2025 /// A 256-bit integer vector].
2026 /// \returns A 256-bit integer vector containing the result.
2034 /// 256-bit vector of [16 x i16] in \a __a, the negative of that element,
2035 /// or zero, depending on whether the corresponding element of the 256-bit
2036 /// vector of [16 x i16] in \a __b is greater than zero, less than zero, or
2044 /// A 256-bit vector of [16 x i16].
2046 /// A 256-bit vector of [16 x i16].
2047 /// \returns A 256-bit vector of [16 x i16] containing the result.
2055 /// 256-bit vector of [8 x i32] in \a __a, the negative of that element, or
2056 /// zero, depending on whether the corresponding element of the 256-bit
2065 /// A 256-bit vector of [8 x i32].
2067 /// A 256-bit vector of [8 x i32].
2068 /// \returns A 256-bit vector of [8 x i32] containing the result.
2075 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
2088 /// A 256-bit integer vector to be shifted.
2091 /// \returns A 256-bit integer vector containing the result.
2095 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
2108 /// A 256-bit integer vector to be shifted.
2111 /// \returns A 256-bit integer vector containing the result.
2115 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2124 /// A 256-bit vector of [16 x i16] to be shifted.
2127 /// \returns A 256-bit vector of [16 x i16] containing the result.
2134 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2144 /// A 256-bit vector of [16 x i16] to be shifted.
2146 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2148 /// \returns A 256-bit vector of [16 x i16] containing the result.
2155 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2164 /// A 256-bit vector of [8 x i32] to be shifted.
2167 /// \returns A 256-bit vector of [8 x i32] containing the result.
2174 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2184 /// A 256-bit vector of [8 x i32] to be shifted.
2186 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2188 /// \returns A 256-bit vector of [8 x i32] containing the result.
2195 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2204 /// A 256-bit vector of [4 x i64] to be shifted.
2207 /// \returns A 256-bit vector of [4 x i64] containing the result.
2214 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2224 /// A 256-bit vector of [4 x i64] to be shifted.
2226 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2228 /// \returns A 256-bit vector of [4 x i64] containing the result.
2235 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2238 /// 0 or -1 according to the corresponding input sign bit.
2245 /// A 256-bit vector of [16 x i16] to be shifted.
2248 /// \returns A 256-bit vector of [16 x i16] containing the result.
2255 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2258 /// than 15, each element of the result is either 0 or -1 according to the
2259 /// corresponding input sign bit.
2266 /// A 256-bit vector of [16 x i16] to be shifted.
2268 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2270 /// \returns A 256-bit vector of [16 x i16] containing the result.
2277 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2280 /// 0 or -1 according to the corresponding input sign bit.
2287 /// A 256-bit vector of [8 x i32] to be shifted.
2290 /// \returns A 256-bit vector of [8 x i32] containing the result.
2297 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2300 /// than 31, each element of the result is either 0 or -1 according to the
2301 /// corresponding input sign bit.
2308 /// A 256-bit vector of [8 x i32] to be shifted.
2310 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2312 /// \returns A 256-bit vector of [8 x i32] containing the result.
2319 /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
2332 /// A 256-bit integer vector to be shifted.
2335 /// \returns A 256-bit integer vector containing the result.
2339 /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
2352 /// A 256-bit integer vector to be shifted.
2355 /// \returns A 256-bit integer vector containing the result.
2359 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2368 /// A 256-bit vector of [16 x i16] to be shifted.
2371 /// \returns A 256-bit vector of [16 x i16] containing the result.
2378 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2388 /// A 256-bit vector of [16 x i16] to be shifted.
2390 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2392 /// \returns A 256-bit vector of [16 x i16] containing the result.
2399 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2408 /// A 256-bit vector of [8 x i32] to be shifted.
2411 /// \returns A 256-bit vector of [8 x i32] containing the result.
2418 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2428 /// A 256-bit vector of [8 x i32] to be shifted.
2430 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2432 /// \returns A 256-bit vector of [8 x i32] containing the result.
2439 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2448 /// A 256-bit vector of [4 x i64] to be shifted.
2451 /// \returns A 256-bit vector of [4 x i64] containing the result.
2458 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2468 /// A 256-bit vector of [4 x i64] to be shifted.
2470 /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2472 /// \returns A 256-bit vector of [4 x i64] containing the result.
2479 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2481 /// corresponding byte of the 256-bit integer vector result (overflow is
2487 /// result[j+7:j] := __a[j+7:j] - __b[j+7:j]
2496 /// A 256-bit integer vector containing the minuends.
2498 /// A 256-bit integer vector containing the subtrahends.
2499 /// \returns A 256-bit integer vector containing the differences.
2503 return (__m256i)((__v32qu)__a - (__v32qu)__b); in _mm256_sub_epi8()
2506 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2507 /// vectors of [16 x i16]. Returns the lower 16 bits of each difference in
2508 /// the corresponding element of the [16 x i16] result (overflow is
2513 /// j := i*16
2514 /// result[j+15:j] := __a[j+15:j] - __b[j+15:j]
2523 /// A 256-bit vector of [16 x i16] containing the minuends.
2525 /// A 256-bit vector of [16 x i16] containing the subtrahends.
2526 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2530 return (__m256i)((__v16hu)__a - (__v16hu)__b); in _mm256_sub_epi16()
2533 /// Subtracts 32-bit integers from corresponding elements of two 256-bit
2540 /// result[j+31:j] := __a[j+31:j] - __b[j+31:j]
2549 /// A 256-bit vector of [8 x i32] containing the minuends.
2551 /// A 256-bit vector of [8 x i32] containing the subtrahends.
2552 /// \returns A 256-bit vector of [8 x i32] containing the differences.
2556 return (__m256i)((__v8su)__a - (__v8su)__b); in _mm256_sub_epi32()
2559 /// Subtracts 64-bit integers from corresponding elements of two 256-bit
2566 /// result[j+63:j] := __a[j+63:j] - __b[j+63:j]
2575 /// A 256-bit vector of [4 x i64] containing the minuends.
2577 /// A 256-bit vector of [4 x i64] containing the subtrahends.
2578 /// \returns A 256-bit vector of [4 x i64] containing the differences.
2582 return (__m256i)((__v4du)__a - (__v4du)__b); in _mm256_sub_epi64()
2585 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2587 /// corresponding byte of the 256-bit integer vector result.
2592 /// result[j+7:j] := SATURATE8(__a[j+7:j] - __b[j+7:j])
2601 /// A 256-bit integer vector containing the minuends.
2603 /// A 256-bit integer vector containing the subtrahends.
2604 /// \returns A 256-bit integer vector containing the differences.
2611 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2612 /// vectors of [16 x i16] using signed saturation, and returns each
2613 /// difference in the corresponding element of the [16 x i16] result.
2617 /// j := i*16
2618 /// result[j+7:j] := SATURATE16(__a[j+7:j] - __b[j+7:j])
2627 /// A 256-bit vector of [16 x i16] containing the minuends.
2629 /// A 256-bit vector of [16 x i16] containing the subtrahends.
2630 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2637 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2639 /// corresponding byte of the 256-bit integer vector result. For each byte,
2640 /// computes <c> result = __a - __b </c>.
2645 /// result[j+7:j] := SATURATE8U(__a[j+7:j] - __b[j+7:j])
2654 /// A 256-bit integer vector containing the minuends.
2656 /// A 256-bit integer vector containing the subtrahends.
2657 /// \returns A 256-bit integer vector containing the differences.
2664 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2665 /// vectors of [16 x i16] using unsigned saturation, and returns each
2666 /// difference in the corresponding element of the [16 x i16] result.
2670 /// j := i*16
2671 /// result[j+15:j] := SATURATE16U(__a[j+15:j] - __b[j+15:j])
2680 /// A 256-bit vector of [16 x i16] containing the minuends.
2682 /// A 256-bit vector of [16 x i16] containing the subtrahends.
2683 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2690 /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer
2691 /// vectors in \a __a and \a __b to form the 256-bit result. Specifically,
2692 /// uses the upper 64 bits of each 128-bit half of \a __a and \a __b as
2698 /// result[23:16] := __a[79:72]
2712 /// A 256-bit integer vector used as the source for the even-numbered bytes
2715 /// A 256-bit integer vector used as the source for the odd-numbered bytes
2717 /// \returns A 256-bit integer vector containing the result.
2724 /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
2725 /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit
2726 /// vector of [16 x i16]. Specifically, uses the upper 64 bits of each
2727 /// 128-bit half of \a __a and \a __b as input; other bits in these
2732 /// result[31:16] := __b[79:64]
2747 /// A 256-bit vector of [16 x i16] used as the source for the even-numbered
2750 /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered
2752 /// \returns A 256-bit vector of [16 x i16] containing the result.
2756 …levector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14,… in _mm256_unpackhi_epi16()
2759 /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
2760 /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector
2761 /// of [8 x i32]. Specifically, uses the upper 64 bits of each 128-bit half
2781 /// A 256-bit vector of [8 x i32] used as the source for the even-numbered
2784 /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered
2786 /// \returns A 256-bit vector of [8 x i32] containing the result.
2793 /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
2794 /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector
2795 /// of [4 x i64]. Specifically, uses the upper 64 bits of each 128-bit half
2811 /// A 256-bit vector of [4 x i64] used as the source for the even-numbered
2814 /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered
2816 /// \returns A 256-bit vector of [4 x i64] containing the result.
2823 /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer
2824 /// vectors in \a __a and \a __b to form the 256-bit result. Specifically,
2825 /// uses the lower 64 bits of each 128-bit half of \a __a and \a __b as
2831 /// result[23:16] := __a[15:8]
2845 /// A 256-bit integer vector used as the source for the even-numbered bytes
2848 /// A 256-bit integer vector used as the source for the odd-numbered bytes
2850 /// \returns A 256-bit integer vector containing the result.
2854 …0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, … in _mm256_unpacklo_epi8()
2857 /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
2858 /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit
2859 /// vector of [16 x i16]. Specifically, uses the lower 64 bits of each
2860 /// 128-bit half of \a __a and \a __b as input; other bits in these
2865 /// result[31:16] := __b[15:0]
2866 /// result[47:32] := __a[31:16]
2867 /// result[63:48] := __b[31:16]
2880 /// A 256-bit vector of [16 x i16] used as the source for the even-numbered
2883 /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered
2885 /// \returns A 256-bit vector of [16 x i16] containing the result.
2889 …fflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 1… in _mm256_unpacklo_epi16()
2892 /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
2893 /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector
2894 /// of [8 x i32]. Specifically, uses the lower 64 bits of each 128-bit half
2914 /// A 256-bit vector of [8 x i32] used as the source for the even-numbered
2917 /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered
2919 /// \returns A 256-bit vector of [8 x i32] containing the result.
2926 /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
2927 /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector
2928 /// of [4 x i64]. Specifically, uses the lower 64 bits of each 128-bit half
2944 /// A 256-bit vector of [4 x i64] used as the source for the even-numbered
2947 /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered
2949 /// \returns A 256-bit vector of [4 x i64] containing the result.
2956 /// Computes the bitwise XOR of the 256-bit integer vectors in \a __a and
2964 /// A 256-bit integer vector.
2966 /// A 256-bit integer vector.
2967 /// \returns A 256-bit integer vector containing the result.
2974 /// Loads the 256-bit integer vector from memory \a __V using a non-temporal
2975 /// memory hint and returns the vector. \a __V must be aligned on a 32-byte
2983 /// A pointer to the 32-byte aligned memory containing the vector to load.
2984 /// \returns A 256-bit integer vector loaded from memory.
2992 /// Broadcasts the 32-bit floating-point value from the low element of the
2993 /// 128-bit vector of [4 x float] in \a __X to all elements of the result's
2994 /// 128-bit vector of [4 x float].
3001 /// A 128-bit vector of [4 x float] whose low element will be broadcast.
3002 /// \returns A 128-bit vector of [4 x float] containing the result.
3009 /// Broadcasts the 64-bit floating-point value from the low element of the
3010 /// 128-bit vector of [2 x double] in \a __a to both elements of the
3011 /// result's 128-bit vector of [2 x double].
3018 /// A 128-bit vector of [2 x double] whose low element will be broadcast.
3019 /// \returns A 128-bit vector of [2 x double] containing the result.
3026 /// Broadcasts the 32-bit floating-point value from the low element of the
3027 /// 128-bit vector of [4 x float] in \a __X to all elements of the
3028 /// result's 256-bit vector of [8 x float].
3035 /// A 128-bit vector of [4 x float] whose low element will be broadcast.
3036 /// \returns A 256-bit vector of [8 x float] containing the result.
3043 /// Broadcasts the 64-bit floating-point value from the low element of the
3044 /// 128-bit vector of [2 x double] in \a __X to all elements of the
3045 /// result's 256-bit vector of [4 x double].
3052 /// A 128-bit vector of [2 x double] whose low element will be broadcast.
3053 /// \returns A 256-bit vector of [4 x double] containing the result.
3060 /// Broadcasts the 128-bit integer data from \a __X to both the lower and
3061 /// upper halves of the 256-bit result.
3068 /// A 128-bit integer vector to be broadcast.
3069 /// \returns A 256-bit integer vector containing the result.
3078 /// Merges 32-bit integer elements from either of the two 128-bit vectors of
3079 /// [4 x i32] in \a V1 or \a V2 to the result's 128-bit vector of [4 x i32],
3102 /// A 128-bit vector of [4 x i32] containing source values.
3104 /// A 128-bit vector of [4 x i32] containing source values.
3106 /// An immediate 8-bit integer operand, with bits [3:0] specifying the
3107 /// source for each element of the result. The position of the mask bit
3108 /// corresponds to the index of a copied value. When a mask bit is 0, the
3110 /// \returns A 128-bit vector of [4 x i32] containing the result.
3115 /// Merges 32-bit integer elements from either of the two 256-bit vectors of
3116 /// [8 x i32] in \a V1 or \a V2 to return a 256-bit vector of [8 x i32],
3139 /// A 256-bit vector of [8 x i32] containing source values.
3141 /// A 256-bit vector of [8 x i32] containing source values.
3143 /// An immediate 8-bit integer operand, with bits [7:0] specifying the
3144 /// source for each element of the result. The position of the mask bit
3145 /// corresponds to the index of a copied value. When a mask bit is 0, the
3147 /// \returns A 256-bit vector of [8 x i32] containing the result.
3152 /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all
3153 /// bytes of the 256-bit result.
3160 /// A 128-bit integer vector whose low byte will be broadcast.
3161 /// \returns A 256-bit integer vector containing the result.
3168 /// Broadcasts the low element from the 128-bit vector of [8 x i16] in \a __X
3169 /// to all elements of the result's 256-bit vector of [16 x i16].
3176 /// A 128-bit vector of [8 x i16] whose low element will be broadcast.
3177 /// \returns A 256-bit vector of [16 x i16] containing the result.
3184 /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
3185 /// to all elements of the result's 256-bit vector of [8 x i32].
3192 /// A 128-bit vector of [4 x i32] whose low element will be broadcast.
3193 /// \returns A 256-bit vector of [8 x i32] containing the result.
3200 /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X
3201 /// to all elements of the result's 256-bit vector of [4 x i64].
3208 /// A 128-bit vector of [2 x i64] whose low element will be broadcast.
3209 /// \returns A 256-bit vector of [4 x i64] containing the result.
3216 /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all
3217 /// bytes of the 128-bit result.
3224 /// A 128-bit integer vector whose low byte will be broadcast.
3225 /// \returns A 128-bit integer vector containing the result.
3232 /// Broadcasts the low element from the 128-bit vector of [8 x i16] in
3233 /// \a __X to all elements of the result's 128-bit vector of [8 x i16].
3240 /// A 128-bit vector of [8 x i16] whose low element will be broadcast.
3241 /// \returns A 128-bit vector of [8 x i16] containing the result.
3248 /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
3256 /// A 128-bit vector of [4 x i32] whose low element will be broadcast.
3257 /// \returns A 128-bit vector of [4 x i32] containing the result.
3264 /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X
3265 /// to both elements of the result's 128-bit vector of [2 x i64].
3272 /// A 128-bit vector of [2 x i64] whose low element will be broadcast.
3273 /// \returns A 128-bit vector of [2 x i64] containing the result.
3280 /// Sets the result's 256-bit vector of [8 x i32] to copies of elements of the
3281 /// 256-bit vector of [8 x i32] in \a __a as specified by indexes in the
3282 /// elements of the 256-bit vector of [8 x i32] in \a __b.
3297 /// A 256-bit vector of [8 x i32] containing the source values.
3299 /// A 256-bit vector of [8 x i32] containing indexes of values to use from
3301 /// \returns A 256-bit vector of [8 x i32] containing the result.
3308 /// Sets the result's 256-bit vector of [4 x double] to copies of elements of
3309 /// the 256-bit vector of [4 x double] in \a V as specified by the
3329 /// A 256-bit vector of [4 x double] containing the source values.
3331 /// An immediate 8-bit value specifying which elements to copy from \a V.
3334 /// \returns A 256-bit vector of [4 x double] containing the result.
3338 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
3339 /// the 256-bit vector of [8 x float] in \a __a as specified by indexes in
3340 /// the elements of the 256-bit vector of [8 x i32] in \a __b.
3355 /// A 256-bit vector of [8 x float] containing the source values.
3357 /// A 256-bit vector of [8 x i32] containing indexes of values to use from
3359 /// \returns A 256-bit vector of [8 x float] containing the result.
3366 /// Sets the result's 256-bit vector of [4 x i64] result to copies of elements
3367 /// of the 256-bit vector of [4 x i64] in \a V as specified by the
3387 /// A 256-bit vector of [4 x i64] containing the source values.
3389 /// An immediate 8-bit value specifying which elements to copy from \a V.
3392 /// \returns A 256-bit vector of [4 x i64] containing the result.
3396 /// Sets each half of the 256-bit result either to zero or to one of the
3397 /// four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2,
3426 /// A 256-bit integer vector containing source values.
3428 /// A 256-bit integer vector containing source values.
3432 /// Within each 4-bit control value, if bit 3 is 1, the result is zero,
3438 /// \returns A 256-bit integer vector containing the result.
3442 /// Extracts half of the 256-bit vector \a V to the 128-bit result. If bit 0
3455 /// A 256-bit integer vector containing the source values.
3458 /// \returns A 128-bit integer vector containing the result.
3462 /// Copies the 256-bit vector \a V1 to the result, then overwrites half of the
3463 /// result with the 128-bit vector \a V2. If bit 0 of the immediate \a M
3476 /// A 256-bit integer vector containing a source value.
3478 /// A 128-bit integer vector containing a source value.
3481 /// \returns A 256-bit integer vector containing the result.
3486 /// Conditionally loads eight 32-bit integer elements from memory \a __X, if
3487 /// the most significant bit of the corresponding element in the mask
3489 /// Returns the 256-bit [8 x i32] result.
3509 /// A 256-bit vector of [8 x i32] containing the mask bits.
3510 /// \returns A 256-bit vector of [8 x i32] containing the loaded or zeroed
3518 /// Conditionally loads four 64-bit integer elements from memory \a __X, if
3519 /// the most significant bit of the corresponding element in the mask
3521 /// Returns the 256-bit [4 x i64] result.
3541 /// A 256-bit vector of [4 x i64] containing the mask bits.
3542 /// \returns A 256-bit vector of [4 x i64] containing the loaded or zeroed
3550 /// Conditionally loads four 32-bit integer elements from memory \a __X, if
3551 /// the most significant bit of the corresponding element in the mask
3553 /// Returns the 128-bit [4 x i32] result.
3573 /// A 128-bit vector of [4 x i32] containing the mask bits.
3574 /// \returns A 128-bit vector of [4 x i32] containing the loaded or zeroed
3582 /// Conditionally loads two 64-bit integer elements from memory \a __X, if
3583 /// the most significant bit of the corresponding element in the mask
3585 /// Returns the 128-bit [2 x i64] result.
3605 /// A 128-bit vector of [2 x i64] containing the mask bits.
3606 /// \returns A 128-bit vector of [2 x i64] containing the loaded or zeroed
3614 /// Conditionally stores eight 32-bit integer elements from the 256-bit vector
3615 /// of [8 x i32] in \a __Y to memory \a __X, if the most significant bit of
3635 /// A 256-bit vector of [8 x i32] containing the mask bits.
3637 /// A 256-bit vector of [8 x i32] containing the values to store.
3644 /// Conditionally stores four 64-bit integer elements from the 256-bit vector
3645 /// of [4 x i64] in \a __Y to memory \a __X, if the most significant bit of
3665 /// A 256-bit vector of [4 x i64] containing the mask bits.
3667 /// A 256-bit vector of [4 x i64] containing the values to store.
3674 /// Conditionally stores four 32-bit integer elements from the 128-bit vector
3675 /// of [4 x i32] in \a __Y to memory \a __X, if the most significant bit of
3695 /// A 128-bit vector of [4 x i32] containing the mask bits.
3697 /// A 128-bit vector of [4 x i32] containing the values to store.
3704 /// Conditionally stores two 64-bit integer elements from the 128-bit vector
3705 /// of [2 x i64] in \a __Y to memory \a __X, if the most significant bit of
3725 /// A 128-bit vector of [2 x i64] containing the mask bits.
3727 /// A 128-bit vector of [2 x i64] containing the values to store.
3734 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3736 /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and
3745 /// A 256-bit vector of [8 x i32] to be shifted.
3747 /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3749 /// \returns A 256-bit vector of [8 x i32] containing the result.
3756 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3758 /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and
3767 /// A 128-bit vector of [4 x i32] to be shifted.
3769 /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3771 /// \returns A 128-bit vector of [4 x i32] containing the result.
3778 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X
3780 /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and
3789 /// A 256-bit vector of [4 x i64] to be shifted.
3791 /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
3793 /// \returns A 256-bit vector of [4 x i64] containing the result.
3800 /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X
3802 /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and
3811 /// A 128-bit vector of [2 x i64] to be shifted.
3813 /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
3815 /// \returns A 128-bit vector of [2 x i64] containing the result.
3822 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3824 /// 256-bit vector of [8 x i32] in \a __Y, shifting in sign bits, and
3826 /// 31, the result for that element is 0 or -1 according to the sign bit
3834 /// A 256-bit vector of [8 x i32] to be shifted.
3836 /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3838 /// \returns A 256-bit vector of [8 x i32] containing the result.
3845 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3847 /// 128-bit vector of [4 x i32] in \a __Y, shifting in sign bits, and
3849 /// 31, the result for that element is 0 or -1 according to the sign bit
3857 /// A 128-bit vector of [4 x i32] to be shifted.
3859 /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3861 /// \returns A 128-bit vector of [4 x i32] containing the result.
3868 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3870 /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and
3879 /// A 256-bit vector of [8 x i32] to be shifted.
3881 /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3883 /// \returns A 256-bit vector of [8 x i32] containing the result.
3890 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3892 /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and
3901 /// A 128-bit vector of [4 x i32] to be shifted.
3903 /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3905 /// \returns A 128-bit vector of [4 x i32] containing the result.
3912 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X
3914 /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and
3923 /// A 256-bit vector of [4 x i64] to be shifted.
3925 /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
3927 /// \returns A 256-bit vector of [4 x i64] containing the result.
3934 /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X
3936 /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and
3945 /// A 128-bit vector of [2 x i64] to be shifted.
3947 /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
3949 /// \returns A 128-bit vector of [2 x i64] containing the result.
3956 /// Conditionally gathers two 64-bit floating-point values, either from the
3957 /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled
3958 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
3983 /// A 128-bit vector of [2 x double] used as the source when a mask bit is
3988 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
3991 /// A 128-bit vector of [2 x double] containing the mask. The most
3992 /// significant bit of each element in the mask vector represents the mask
3993 /// bits. If a mask bit is zero, the corresponding value from vector \a a
3998 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4005 /// Conditionally gathers four 64-bit floating-point values, either from the
4006 /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
4007 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector
4032 /// A 256-bit vector of [4 x double] used as the source when a mask bit is
4037 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4039 /// A 256-bit vector of [4 x double] containing the mask. The most
4040 /// significant bit of each element in the mask vector represents the mask
4041 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4046 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4053 /// Conditionally gathers two 64-bit floating-point values, either from the
4054 /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled
4055 /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4080 /// A 128-bit vector of [2 x double] used as the source when a mask bit is
4085 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4087 /// A 128-bit vector of [2 x double] containing the mask. The most
4088 /// significant bit of each element in the mask vector represents the mask
4089 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4094 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4101 /// Conditionally gathers four 64-bit floating-point values, either from the
4102 /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
4103 /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector
4128 /// A 256-bit vector of [4 x double] used as the source when a mask bit is
4133 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4135 /// A 256-bit vector of [4 x double] containing the mask. The most
4136 /// significant bit of each element in the mask vector represents the mask
4137 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4142 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4149 /// Conditionally gathers four 32-bit floating-point values, either from the
4150 /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4151 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4176 /// A 128-bit vector of [4 x float] used as the source when a mask bit is
4181 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4183 /// A 128-bit vector of [4 x float] containing the mask. The most
4184 /// significant bit of each element in the mask vector represents the mask
4185 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4190 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4197 /// Conditionally gathers eight 32-bit floating-point values, either from the
4198 /// 256-bit vector of [8 x float] in \a a, or from memory \a m using scaled
4199 /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector
4224 /// A 256-bit vector of [8 x float] used as the source when a mask bit is
4229 /// A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4231 /// A 256-bit vector of [8 x float] containing the mask. The most
4232 /// significant bit of each element in the mask vector represents the mask
4233 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4238 /// \returns A 256-bit vector of [8 x float] containing the gathered values.
4245 /// Conditionally gathers two 32-bit floating-point values, either from the
4246 /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4247 /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4274 /// A 128-bit vector of [4 x float] used as the source when a mask bit is
4279 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4281 /// A 128-bit vector of [4 x float] containing the mask. The most
4282 /// significant bit of each element in the mask vector represents the mask
4283 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4289 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4296 /// Conditionally gathers four 32-bit floating-point values, either from the
4297 /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4298 /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector
4323 /// A 128-bit vector of [4 x float] used as the source when a mask bit is
4328 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4330 /// A 128-bit vector of [4 x float] containing the mask. The most
4331 /// significant bit of each element in the mask vector represents the mask
4332 /// bits. If a mask bit is zero, the corresponding value from vector \a a
4337 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4344 /// Conditionally gathers four 32-bit integer values, either from the
4345 /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4346 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4371 /// A 128-bit vector of [4 x i32] used as the source when a mask bit is
4376 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4378 /// A 128-bit vector of [4 x i32] containing the mask. The most significant
4379 /// bit of each element in the mask vector represents the mask bits. If a
4380 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4385 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4392 /// Conditionally gathers eight 32-bit integer values, either from the
4393 /// 256-bit vector of [8 x i32] in \a a, or from memory \a m using scaled
4394 /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector
4419 /// A 256-bit vector of [8 x i32] used as the source when a mask bit is
4424 /// A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4426 /// A 256-bit vector of [8 x i32] containing the mask. The most significant
4427 /// bit of each element in the mask vector represents the mask bits. If a
4428 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4433 /// \returns A 256-bit vector of [8 x i32] containing the gathered values.
4440 /// Conditionally gathers two 32-bit integer values, either from the
4441 /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4442 /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4469 /// A 128-bit vector of [4 x i32] used as the source when a mask bit is
4474 /// A 128-bit vector of [2 x i64] containing indexes into \a m.
4476 /// A 128-bit vector of [4 x i32] containing the mask. The most significant
4477 /// bit of each element in the mask vector represents the mask bits. If a
4478 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4484 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4491 /// Conditionally gathers four 32-bit integer values, either from the
4492 /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4493 /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector
4518 /// A 128-bit vector of [4 x i32] used as the source when a mask bit is
4523 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4525 /// A 128-bit vector of [4 x i32] containing the mask. The most significant
4526 /// bit of each element in the mask vector represents the mask bits. If a
4527 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4532 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4539 /// Conditionally gathers two 64-bit integer values, either from the
4540 /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
4541 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4566 /// A 128-bit vector of [2 x i64] used as the source when a mask bit is
4571 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
4574 /// A 128-bit vector of [2 x i64] containing the mask. The most significant
4575 /// bit of each element in the mask vector represents the mask bits. If a
4576 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4581 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
4588 /// Conditionally gathers four 64-bit integer values, either from the
4589 /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
4590 /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector
4615 /// A 256-bit vector of [4 x i64] used as the source when a mask bit is
4620 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4622 /// A 256-bit vector of [4 x i64] containing the mask. The most significant
4623 /// bit of each element in the mask vector represents the mask bits. If a
4624 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4629 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
4636 /// Conditionally gathers two 64-bit integer values, either from the
4637 /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
4638 /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4663 /// A 128-bit vector of [2 x i64] used as the source when a mask bit is
4668 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4670 /// A 128-bit vector of [2 x i64] containing the mask. The most significant
4671 /// bit of each element in the mask vector represents the mask bits. If a
4672 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4677 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
4684 /// Conditionally gathers four 64-bit integer values, either from the
4685 /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
4686 /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector
4711 /// A 256-bit vector of [4 x i64] used as the source when a mask bit is
4716 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4718 /// A 256-bit vector of [4 x i64] containing the mask. The most significant
4719 /// bit of each element in the mask vector represents the mask bits. If a
4720 /// mask bit is zero, the corresponding value from vector \a a is gathered;
4725 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
4732 /// Gathers two 64-bit floating-point values from memory \a m using scaled
4733 /// indexes from the 128-bit vector of [4 x i32] in \a i.
4754 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
4759 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4768 /// Gathers four 64-bit floating-point values from memory \a m using scaled
4769 /// indexes from the 128-bit vector of [4 x i32] in \a i.
4790 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4794 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4804 /// Gathers two 64-bit floating-point values from memory \a m using scaled
4805 /// indexes from the 128-bit vector of [2 x i64] in \a i.
4826 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4830 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4839 /// Gathers four 64-bit floating-point values from memory \a m using scaled
4840 /// indexes from the 256-bit vector of [4 x i64] in \a i.
4861 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4865 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4875 /// Gathers four 32-bit floating-point values from memory \a m using scaled
4876 /// indexes from the 128-bit vector of [4 x i32] in \a i.
4897 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4901 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4910 /// Gathers eight 32-bit floating-point values from memory \a m using scaled
4911 /// indexes from the 256-bit vector of [8 x i32] in \a i.
4932 /// A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4936 /// \returns A 256-bit vector of [8 x float] containing the gathered values.
4946 /// Gathers two 32-bit floating-point values from memory \a m using scaled
4947 /// indexes from the 128-bit vector of [2 x i64] in \a i. The upper two
4970 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4974 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4983 /// Gathers four 32-bit floating-point values from memory \a m using scaled
4984 /// indexes from the 256-bit vector of [4 x i64] in \a i.
5005 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5009 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
5018 /// Gathers four 32-bit floating-point values from memory \a m using scaled
5019 /// indexes from the 128-bit vector of [4 x i32] in \a i.
5040 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
5044 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5048 (__v4si)_mm_set1_epi32(-1), (s)))
5050 /// Gathers eight 32-bit floating-point values from memory \a m using scaled
5051 /// indexes from the 256-bit vector of [8 x i32] in \a i.
5072 /// A 256-bit vector of [8 x i32] containing signed indexes into \a m.
5076 /// \returns A 256-bit vector of [8 x i32] containing the gathered values.
5080 (__v8si)_mm256_set1_epi32(-1), (s)))
5082 /// Gathers two 32-bit integer values from memory \a m using scaled indexes
5083 /// from the 128-bit vector of [2 x i64] in \a i. The upper two elements
5106 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
5110 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5114 (__v4si)_mm_set1_epi32(-1), (s)))
5116 /// Gathers four 32-bit integer values from memory \a m using scaled indexes
5117 /// from the 256-bit vector of [4 x i64] in \a i.
5138 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5142 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5146 (__v4si)_mm_set1_epi32(-1), (s)))
5148 /// Gathers two 64-bit integer values from memory \a m using scaled indexes
5149 /// from the 128-bit vector of [4 x i32] in \a i.
5170 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
5175 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
5180 (__v2di)_mm_set1_epi64x(-1), (s)))
5182 /// Gathers four 64-bit integer values from memory \a m using scaled indexes
5183 /// from the 128-bit vector of [4 x i32] in \a i.
5204 /// A 128-bit vector of [4 x i32] containing signed indexes into \a m.
5208 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
5213 (__v4di)_mm256_set1_epi64x(-1), (s)))
5215 /// Gathers two 64-bit integer values from memory \a m using scaled indexes
5216 /// from the 128-bit vector of [2 x i64] in \a i.
5237 /// A 128-bit vector of [2 x i64] containing signed indexes into \a m.
5241 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
5246 (__v2di)_mm_set1_epi64x(-1), (s)))
5248 /// Gathers four 64-bit integer values from memory \a m using scaled indexes
5249 /// from the 256-bit vector of [4 x i64] in \a i.
5270 /// A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5274 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
5279 (__v4di)_mm256_set1_epi64x(-1), (s)))