avx2intrin.h - OpenGrok cross reference for /llvm-project/clang/lib/Headers/avx2intrin.h

Lines Matching full:bit
36 ///    four unsigned 8-bit integers from the 256-bit integer vectors \a X and
40 ///    vectors, and another eight using the upper half. These 16-bit values
41 ///    are returned in the lower and upper halves of the 256-bit result,
47 ///    difference, and sums these four values to form one 16-bit result. The
52 ///    bytes from \a Y; the starting bit position for these four bytes is
54 ///    sets of four bytes from \a X; the starting bit position for the first
55 ///    set of four bytes is specified by \a M[2] times 32. These bit positions
56 ///    are all relative to the 128-bit lane for each set of eight operations.
85 ///    A 256-bit integer vector containing one of the inputs.
87 ///    A 256-bit integer vector containing one of the inputs.
91 /// \returns A 256-bit vector of [16 x i16] containing the result.
96 /// Computes the absolute value of each signed byte in the 256-bit integer
105 ///    A 256-bit integer vector.
106 /// \returns A 256-bit integer vector containing the result.
113 /// Computes the absolute value of each signed 16-bit element in the 256-bit
122 ///    A 256-bit vector of [16 x i16].
123 /// \returns A 256-bit vector of [16 x i16] containing the result.
130 /// Computes the absolute value of each signed 32-bit element in the 256-bit
139 ///    A 256-bit vector of [8 x i32].
140 /// \returns A 256-bit vector of [8 x i32] containing the result.
147 /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit
148 ///    integers using signed saturation, and returns the 256-bit result.
166 ///    A 256-bit vector of [16 x i16] used to generate result[63:0] and
169 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
171 /// \returns A 256-bit integer vector containing the result.
178 /// Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit
179 ///    integers using signed saturation, and returns the resulting 256-bit
198 ///    A 256-bit vector of [8 x i32] used to generate result[63:0] and
201 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
203 /// \returns A 256-bit vector of [16 x i16] containing the result.
210 /// Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers
211 ///    using unsigned saturation, and returns the 256-bit result.
229 ///    A 256-bit vector of [16 x i16] used to generate result[63:0] and
232 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
234 /// \returns A 256-bit integer vector containing the result.
241 /// Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers
242 ///    using unsigned saturation, and returns the resulting 256-bit vector of
261 ///    A 256-bit vector of [8 x i32] used to generate result[63:0] and
264 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
266 /// \returns A 256-bit vector of [16 x i16] containing the result.
273 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
275 ///    byte of the 256-bit integer vector result (overflow is ignored).
282 ///    A 256-bit integer vector containing one of the source operands.
284 ///    A 256-bit integer vector containing one of the source operands.
285 /// \returns A 256-bit integer vector containing the sums.
292 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
301 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
303 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
304 /// \returns A 256-bit vector of [16 x i16] containing the sums.
311 /// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
320 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
322 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
323 /// \returns A 256-bit vector of [8 x i32] containing the sums.
330 /// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
339 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
341 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
342 /// \returns A 256-bit vector of [4 x i64] containing the sums.
349 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
351 ///    corresponding byte of the 256-bit integer vector result.
358 ///    A 256-bit integer vector containing one of the source operands.
360 ///    A 256-bit integer vector containing one of the source operands.
361 /// \returns A 256-bit integer vector containing the sums.
368 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
376 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
378 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
379 /// \returns A 256-bit vector of [16 x i16] containing the sums.
386 /// Adds 8-bit integers from corresponding bytes of two 256-bit integer
388 ///    corresponding byte of the 256-bit integer vector result.
395 ///    A 256-bit integer vector containing one of the source operands.
397 ///    A 256-bit integer vector containing one of the source operands.
398 /// \returns A 256-bit integer vector containing the sums.
405 /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
413 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
415 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
416 /// \returns A 256-bit vector of [16 x i16] containing the sums.
423 /// Uses the lower half of the 256-bit vector \a a as the upper half of a
424 ///    temporary 256-bit value, and the lower half of the 256-bit vector \a b
441 ///    A 256-bit integer vector containing source values.
443 ///    A 256-bit integer vector containing source values.
446 /// \returns A 256-bit integer vector containing the result.
451 /// Computes the bitwise AND of the 256-bit integer vectors in \a __a and
459 ///    A 256-bit integer vector.
461 ///    A 256-bit integer vector.
462 /// \returns A 256-bit integer vector containing the result.
469 /// Computes the bitwise AND of the 256-bit integer vector in \a __b with
470 ///    the bitwise NOT of the 256-bit integer vector in \a __a.
477 ///    A 256-bit integer vector.
479 ///    A 256-bit integer vector.
480 /// \returns A 256-bit integer vector containing the result.
488 ///    256-bit integer vectors in \a __a and \a __b and returns each
489 ///    average in the corresponding byte of the 256-bit result.
503 ///    A 256-bit integer vector.
505 ///    A 256-bit integer vector.
506 /// \returns A 256-bit integer vector containing the result.
513 /// Computes the averages of the corresponding unsigned 16-bit integers in
514 ///    the two 256-bit vectors of [16 x i16] in \a __a and \a __b and returns
515 ///    each average in the corresponding element of the 256-bit result.
529 ///    A 256-bit vector of [16 x i16].
531 ///    A 256-bit vector of [16 x i16].
532 /// \returns A 256-bit vector of [16 x i16] containing the result.
539 /// Merges 8-bit integer values from either of the two 256-bit vectors
540 ///    \a __V1 or \a __V2, as specified by the 256-bit mask \a __M and returns
541 ///    the resulting 256-bit integer vector.
559 ///    A 256-bit integer vector containing source values.
561 ///    A 256-bit integer vector containing source values.
563 ///    A 256-bit integer vector, with bit [7] of each byte specifying the
564 ///    source for each corresponding byte of the result. When the mask bit
567 /// \returns A 256-bit integer vector containing the result.
575 /// Merges 16-bit integer values from either of the two 256-bit vectors
577 ///    and returns the resulting 256-bit vector of [16 x i16].
601 ///    A 256-bit vector of [16 x i16] containing source values.
603 ///    A 256-bit vector of [16 x i16] containing source values.
605 ///    An immediate 8-bit integer operand, with bits [7:0] specifying the
606 ///    source for each element of the result. The position of the mask bit
607 ///    corresponds to the index of a copied value. When a mask bit is 0, the
611 /// \returns A 256-bit vector of [16 x i16] containing the result.
616 /// Compares corresponding bytes in the 256-bit integer vectors in \a __a and
618 ///    bytes of the 256-bit result.
632 ///    A 256-bit integer vector containing one of the inputs.
634 ///    A 256-bit integer vector containing one of the inputs.
635 /// \returns A 256-bit integer vector containing the result.
642 /// Compares corresponding elements in the 256-bit vectors of [16 x i16] in
644 ///    corresponding elements of the 256-bit result.
658 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
660 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
661 /// \returns A 256-bit vector of [16 x i16] containing the result.
668 /// Compares corresponding elements in the 256-bit vectors of [8 x i32] in
670 ///    corresponding elements of the 256-bit result.
684 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
686 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
687 /// \returns A 256-bit vector of [8 x i32] containing the result.
694 /// Compares corresponding elements in the 256-bit vectors of [4 x i64] in
696 ///    corresponding elements of the 256-bit result.
710 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
712 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
713 /// \returns A 256-bit vector of [4 x i64] containing the result.
720 /// Compares corresponding signed bytes in the 256-bit integer vectors in
722 ///    corresponding bytes of the 256-bit result.
736 ///    A 256-bit integer vector containing one of the inputs.
738 ///    A 256-bit integer vector containing one of the inputs.
739 /// \returns A 256-bit integer vector containing the result.
748 /// Compares corresponding signed elements in the 256-bit vectors of
750 ///    outcomes in the corresponding elements of the 256-bit result.
764 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
766 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
767 /// \returns A 256-bit vector of [16 x i16] containing the result.
774 /// Compares corresponding signed elements in the 256-bit vectors of
776 ///    outcomes in the corresponding elements of the 256-bit result.
790 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
792 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
793 /// \returns A 256-bit vector of [8 x i32] containing the result.
800 /// Compares corresponding signed elements in the 256-bit vectors of
802 ///    outcomes in the corresponding elements of the 256-bit result.
816 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
818 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
819 /// \returns A 256-bit vector of [4 x i64] containing the result.
826 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
829 ///    \a __a are returned in the lower 64 bits of each 128-bit half of the
831 ///    128-bit half of the result.
852 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
854 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
855 /// \returns A 256-bit vector of [16 x i16] containing the sums.
862 /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
865 ///    are returned in the lower 64 bits of each 128-bit half of the result;
866 ///    sums from \a __b are returned in the upper 64 bits of each 128-bit half
884 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
886 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
887 /// \returns A 256-bit vector of [8 x i32] containing the sums.
894 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
897 ///    the lower 64 bits of each 128-bit half of the result; sums from \a __b
898 ///    are returned in the upper 64 bits of each 128-bit half of the result.
919 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
921 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
922 /// \returns A 256-bit vector of [16 x i16] containing the sums.
929 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
933 ///    128-bit half of the result; differences from \a __b are returned in the
934 ///    upper 64 bits of each 128-bit half of the result.
955 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
957 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
958 /// \returns A 256-bit vector of [16 x i16] containing the differences.
965 /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
968 ///    from \a __a are returned in the lower 64 bits of each 128-bit half of
970 ///    of each 128-bit half of the result.
987 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
989 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
990 /// \returns A 256-bit vector of [8 x i32] containing the differences.
997 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
1000 ///    returned in the lower 64 bits of each 128-bit half of the result;
1002 ///    128-bit half of the result.
1023 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1025 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1026 /// \returns A 256-bit vector of [16 x i16] containing the differences.
1033 /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
1034 ///    with the corresponding signed byte from the 256-bit integer vector in
1035 ///    \a __b, forming signed 16-bit intermediate products. Adds adjacent
1036 ///    pairs of those products using signed saturation to form 16-bit sums
1053 ///    A 256-bit vector containing one of the source operands.
1055 ///    A 256-bit vector containing one of the source operands.
1056 /// \returns A 256-bit vector of [16 x i16] containing the result.
1063 /// Multiplies corresponding 16-bit elements of two 256-bit vectors of
1064 ///    [16 x i16], forming 32-bit intermediate products, and adds pairs of
1065 ///    those products to form 32-bit sums returned as elements of the
1068 ///    There is only one wraparound case: when all four of the 16-bit sources
1085 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1087 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1088 /// \returns A 256-bit vector of [8 x i32] containing the result.
1095 /// Compares the corresponding signed bytes in the two 256-bit integer vectors
1097 ///     corresponding byte of the 256-bit result.
1104 ///    A 256-bit integer vector.
1106 ///    A 256-bit integer vector.
1107 /// \returns A 256-bit integer vector containing the result.
1114 /// Compares the corresponding signed 16-bit integers in the two 256-bit
1116 ///    each pair in the corresponding element of the 256-bit result.
1123 ///    A 256-bit vector of [16 x i16].
1125 ///    A 256-bit vector of [16 x i16].
1126 /// \returns A 256-bit vector of [16 x i16] containing the result.
1133 /// Compares the corresponding signed 32-bit integers in the two 256-bit
1135 ///    each pair in the corresponding element of the 256-bit result.
1142 ///    A 256-bit vector of [8 x i32].
1144 ///    A 256-bit vector of [8 x i32].
1145 /// \returns A 256-bit vector of [8 x i32] containing the result.
1152 /// Compares the corresponding unsigned bytes in the two 256-bit integer
1154 ///     the corresponding byte of the 256-bit result.
1161 ///    A 256-bit integer vector.
1163 ///    A 256-bit integer vector.
1164 /// \returns A 256-bit integer vector containing the result.
1171 /// Compares the corresponding unsigned 16-bit integers in the two 256-bit
1173 ///    each pair in the corresponding element of the 256-bit result.
1180 ///    A 256-bit vector of [16 x i16].
1182 ///    A 256-bit vector of [16 x i16].
1183 /// \returns A 256-bit vector of [16 x i16] containing the result.
1190 /// Compares the corresponding unsigned 32-bit integers in the two 256-bit
1192 ///    each pair in the corresponding element of the 256-bit result.
1199 ///    A 256-bit vector of [8 x i32].
1201 ///    A 256-bit vector of [8 x i32].
1202 /// \returns A 256-bit vector of [8 x i32] containing the result.
1209 /// Compares the corresponding signed bytes in the two 256-bit integer vectors
1211 ///     corresponding byte of the 256-bit result.
1218 ///    A 256-bit integer vector.
1220 ///    A 256-bit integer vector.
1221 /// \returns A 256-bit integer vector containing the result.
1228 /// Compares the corresponding signed 16-bit integers in the two 256-bit
1230 ///    each pair in the corresponding element of the 256-bit result.
1237 ///    A 256-bit vector of [16 x i16].
1239 ///    A 256-bit vector of [16 x i16].
1240 /// \returns A 256-bit vector of [16 x i16] containing the result.
1247 /// Compares the corresponding signed 32-bit integers in the two 256-bit
1249 ///    each pair in the corresponding element of the 256-bit result.
1256 ///    A 256-bit vector of [8 x i32].
1258 ///    A 256-bit vector of [8 x i32].
1259 /// \returns A 256-bit vector of [8 x i32] containing the result.
1266 /// Compares the corresponding unsigned bytes in the two 256-bit integer
1268 ///     the corresponding byte of the 256-bit result.
1275 ///    A 256-bit integer vector.
1277 ///    A 256-bit integer vector.
1278 /// \returns A 256-bit integer vector containing the result.
1285 /// Compares the corresponding unsigned 16-bit integers in the two 256-bit
1287 ///    each pair in the corresponding element of the 256-bit result.
1294 ///    A 256-bit vector of [16 x i16].
1296 ///    A 256-bit vector of [16 x i16].
1297 /// \returns A 256-bit vector of [16 x i16] containing the result.
1304 /// Compares the corresponding unsigned 32-bit integers in the two 256-bit
1306 ///    each pair in the corresponding element of the 256-bit result.
1313 ///    A 256-bit vector of [8 x i32].
1315 ///    A 256-bit vector of [8 x i32].
1316 /// \returns A 256-bit vector of [8 x i32] containing the result.
1323 /// Creates a 32-bit integer mask from the most significant bit of each byte
1324 ///    in the 256-bit integer vector in \a __a and returns the result.
1338 ///    A 256-bit integer vector containing the source bytes.
1339 /// \returns The 32-bit integer mask.
1346 /// Sign-extends bytes from the 128-bit integer vector in \a __V and returns
1347 ///    the 16-bit values in the corresponding elements of a 256-bit vector
1363 ///    A 128-bit integer vector containing the source bytes.
1364 /// \returns A 256-bit vector of [16 x i16] containing the sign-extended
1374 /// Sign-extends bytes from the lower half of the 128-bit integer vector in
1375 ///    \a __V and returns the 32-bit values in the corresponding elements of a
1376 ///    256-bit vector of [8 x i32].
1391 ///    A 128-bit integer vector containing the source bytes.
1392 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
1402 /// Sign-extends the first four bytes from the 128-bit integer vector in
1403 ///    \a __V and returns the 64-bit values in the corresponding elements of a
1404 ///    256-bit vector of [4 x i64].
1418 ///    A 128-bit integer vector containing the source bytes.
1419 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1429 /// Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in
1430 ///    \a __V and returns the 32-bit values in the corresponding elements of a
1431 ///    256-bit vector of [8 x i32].
1446 ///    A 128-bit vector of [8 x i16] containing the source values.
1447 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
1455 /// Sign-extends 16-bit elements from the lower half of the 128-bit vector of
1456 ///    [8 x i16] in \a __V and returns the 64-bit values in the corresponding
1457 ///    elements of a 256-bit vector of [4 x i64].
1471 ///    A 128-bit vector of [8 x i16] containing the source values.
1472 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1480 /// Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in
1481 ///    \a __V and returns the 64-bit values in the corresponding elements of a
1482 ///    256-bit vector of [4 x i64].
1496 ///    A 128-bit vector of [4 x i32] containing the source values.
1497 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
1505 /// Zero-extends bytes from the 128-bit integer vector in \a __V and returns
1506 ///    the 16-bit values in the corresponding elements of a 256-bit vector
1522 ///    A 128-bit integer vector containing the source bytes.
1523 /// \returns A 256-bit vector of [16 x i16] containing the zero-extended
1531 /// Zero-extends bytes from the lower half of the 128-bit integer vector in
1532 ///    \a __V and returns the 32-bit values in the corresponding elements of a
1533 ///    256-bit vector of [8 x i32].
1548 ///    A 128-bit integer vector containing the source bytes.
1549 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
1557 /// Zero-extends the first four bytes from the 128-bit integer vector in
1558 ///    \a __V and returns the 64-bit values in the corresponding elements of a
1559 ///    256-bit vector of [4 x i64].
1573 ///    A 128-bit integer vector containing the source bytes.
1574 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1582 /// Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in
1583 ///    \a __V and returns the 32-bit values in the corresponding elements of a
1584 ///    256-bit vector of [8 x i32].
1599 ///    A 128-bit vector of [8 x i16] containing the source values.
1600 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
1608 /// Zero-extends 16-bit elements from the lower half of the 128-bit vector of
1609 ///    [8 x i16] in \a __V and returns the 64-bit values in the corresponding
1610 ///    elements of a 256-bit vector of [4 x i64].
1624 ///    A 128-bit vector of [8 x i16] containing the source values.
1625 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1633 /// Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in
1634 ///    \a __V and returns the 64-bit values in the corresponding elements of a
1635 ///    256-bit vector of [4 x i64].
1649 ///    A 128-bit vector of [4 x i32] containing the source values.
1650 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
1658 /// Multiplies signed 32-bit integers from even-numbered elements of two
1659 ///    256-bit vectors of [8 x i32] and returns the 64-bit products in the
1674 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1676 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1677 /// \returns A 256-bit vector of [4 x i64] containing the products.
1684 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1685 ///    [16 x i16], truncates the 32-bit results to the most significant 18
1701 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1703 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1704 /// \returns A 256-bit vector of [16 x i16] containing the rounded products.
1711 /// Multiplies unsigned 16-bit integer elements of two 256-bit vectors of
1712 ///    [16 x i16], and returns the upper 16 bits of each 32-bit product in the
1720 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1722 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1723 /// \returns A 256-bit vector of [16 x i16] containing the products.
1730 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1731 ///    [16 x i16], and returns the upper 16 bits of each 32-bit product in the
1739 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1741 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1742 /// \returns A 256-bit vector of [16 x i16] containing the products.
1749 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
1750 ///    [16 x i16], and returns the lower 16 bits of each 32-bit product in the
1758 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1760 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
1761 /// \returns A 256-bit vector of [16 x i16] containing the products.
1768 /// Multiplies signed 32-bit integer elements of two 256-bit vectors of
1769 ///    [8 x i32], and returns the lower 32 bits of each 64-bit product in the
1777 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1779 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1780 /// \returns A 256-bit vector of [8 x i32] containing the products.
1787 /// Multiplies unsigned 32-bit integers from even-numered elements of two
1788 ///    256-bit vectors of [8 x i32] and returns the 64-bit products in the
1803 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1805 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
1806 /// \returns A 256-bit vector of [4 x i64] containing the products.
1813 /// Computes the bitwise OR of the 256-bit integer vectors in \a __a and
1821 ///    A 256-bit integer vector.
1823 ///    A 256-bit integer vector.
1824 /// \returns A 256-bit integer vector containing the result.
1832 ///    unsigned 8-bit integers from the 256-bit integer vectors \a __a and
1837 ///    corresponding 64-bit element of the result.
1841 ///    and sums these eight values to form one 16-bit result. This operation
1866 ///    A 256-bit integer vector.
1868 ///    A 256-bit integer vector.
1869 /// \returns A 256-bit integer vector containing the result.
1876 /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according
1877 ///    to control information in the 256-bit integer vector \a __b, and
1878 ///    returns the 256-bit result. In effect there are two separate 128-bit
1901 ///    A 256-bit integer vector containing source values.
1903 ///    A 256-bit integer vector containing control information to determine
1904 ///    what goes into the corresponding byte of the result. If bit 7 of the
1906 ///    control byte specify the index (within the same 128-bit half) of \a __a
1908 /// \returns A 256-bit integer vector containing the result.
1915 /// Shuffles 32-bit integers from the 256-bit vector of [8 x i32] in \a a
1917 ///    returns the 256-bit result. In effect there are two parallel 128-bit
1938 ///    A 256-bit vector of [8 x i32] containing source values.
1940 ///    An immediate 8-bit value specifying which elements to copy from \a a.
1944 /// \returns A 256-bit vector of [8 x i32] containing the result.
1948 /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] in \a a
1950 ///    returns the 256-bit result. The upper 64 bits of each 128-bit half
1951 ///    are shuffled in parallel; the lower 64 bits of each 128-bit half are
1974 ///    A 256-bit vector of [16 x i16] containing source values.
1976 ///    An immediate 8-bit value specifying which elements to copy from \a a.
1980 /// \returns A 256-bit vector of [16 x i16] containing the result.
1984 /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] \a a
1986 ///    returns the 256-bit [16 x i16] result. The lower 64 bits of each
1987 ///    128-bit half are shuffled; the upper 64 bits of each 128-bit half are
2010 ///    A 256-bit vector of [16 x i16] to use as a source of data for the
2013 ///    An immediate 8-bit value specifying which elements to copy from \a a.
2017 /// \returns A 256-bit vector of [16 x i16] containing the result.
2021 /// Sets each byte of the result to the corresponding byte of the 256-bit
2023 ///    on whether the corresponding byte of the 256-bit integer vector in
2032 ///    A 256-bit integer vector.
2034 ///    A 256-bit integer vector].
2035 /// \returns A 256-bit integer vector containing the result.
2043 ///    256-bit vector of [16 x i16] in \a __a, the negative of that element,
2044 ///    or zero, depending on whether the corresponding element of the 256-bit
2053 ///    A 256-bit vector of [16 x i16].
2055 ///    A 256-bit vector of [16 x i16].
2056 /// \returns A 256-bit vector of [16 x i16] containing the result.
2064 ///    256-bit vector of [8 x i32] in \a __a, the negative of that element, or
2065 ///    zero, depending on whether the corresponding element of the 256-bit
2074 ///    A 256-bit vector of [8 x i32].
2076 ///    A 256-bit vector of [8 x i32].
2077 /// \returns A 256-bit vector of [8 x i32] containing the result.
2084 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
2097 ///    A 256-bit integer vector to be shifted.
2100 /// \returns A 256-bit integer vector containing the result.
2104 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
2117 ///    A 256-bit integer vector to be shifted.
2120 /// \returns A 256-bit integer vector containing the result.
2124 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2133 ///    A 256-bit vector of [16 x i16] to be shifted.
2136 /// \returns A 256-bit vector of [16 x i16] containing the result.
2143 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2153 ///    A 256-bit vector of [16 x i16] to be shifted.
2155 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2157 /// \returns A 256-bit vector of [16 x i16] containing the result.
2164 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2173 ///    A 256-bit vector of [8 x i32] to be shifted.
2176 /// \returns A 256-bit vector of [8 x i32] containing the result.
2183 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2193 ///    A 256-bit vector of [8 x i32] to be shifted.
2195 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2197 /// \returns A 256-bit vector of [8 x i32] containing the result.
2204 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2213 ///    A 256-bit vector of [4 x i64] to be shifted.
2216 /// \returns A 256-bit vector of [4 x i64] containing the result.
2223 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2233 ///    A 256-bit vector of [4 x i64] to be shifted.
2235 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2237 /// \returns A 256-bit vector of [4 x i64] containing the result.
2244 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2247 ///    0 or -1 according to the corresponding input sign bit.
2254 ///    A 256-bit vector of [16 x i16] to be shifted.
2257 /// \returns A 256-bit vector of [16 x i16] containing the result.
2264 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2268 ///    corresponding input sign bit.
2275 ///    A 256-bit vector of [16 x i16] to be shifted.
2277 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2279 /// \returns A 256-bit vector of [16 x i16] containing the result.
2286 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2289 ///    0 or -1 according to the corresponding input sign bit.
2296 ///    A 256-bit vector of [8 x i32] to be shifted.
2299 /// \returns A 256-bit vector of [8 x i32] containing the result.
2306 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2310 ///    corresponding input sign bit.
2317 ///    A 256-bit vector of [8 x i32] to be shifted.
2319 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2321 /// \returns A 256-bit vector of [8 x i32] containing the result.
2328 /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
2341 ///    A 256-bit integer vector to be shifted.
2344 /// \returns A 256-bit integer vector containing the result.
2348 /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
2361 ///    A 256-bit integer vector to be shifted.
2364 /// \returns A 256-bit integer vector containing the result.
2368 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2377 ///    A 256-bit vector of [16 x i16] to be shifted.
2380 /// \returns A 256-bit vector of [16 x i16] containing the result.
2387 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
2397 ///    A 256-bit vector of [16 x i16] to be shifted.
2399 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2401 /// \returns A 256-bit vector of [16 x i16] containing the result.
2408 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2417 ///    A 256-bit vector of [8 x i32] to be shifted.
2420 /// \returns A 256-bit vector of [8 x i32] containing the result.
2427 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a
2437 ///    A 256-bit vector of [8 x i32] to be shifted.
2439 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2441 /// \returns A 256-bit vector of [8 x i32] containing the result.
2448 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2457 ///    A 256-bit vector of [4 x i64] to be shifted.
2460 /// \returns A 256-bit vector of [4 x i64] containing the result.
2467 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a
2477 ///    A 256-bit vector of [4 x i64] to be shifted.
2479 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
2481 /// \returns A 256-bit vector of [4 x i64] containing the result.
2488 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2490 ///    corresponding byte of the 256-bit integer vector result (overflow is
2505 ///    A 256-bit integer vector containing the minuends.
2507 ///    A 256-bit integer vector containing the subtrahends.
2508 /// \returns A 256-bit integer vector containing the differences.
2515 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2532 ///    A 256-bit vector of [16 x i16] containing the minuends.
2534 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
2535 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2542 /// Subtracts 32-bit integers from corresponding elements of two 256-bit
2558 ///    A 256-bit vector of [8 x i32] containing the minuends.
2560 ///    A 256-bit vector of [8 x i32] containing the subtrahends.
2561 /// \returns A 256-bit vector of [8 x i32] containing the differences.
2568 /// Subtracts 64-bit integers from corresponding elements of two 256-bit
2584 ///    A 256-bit vector of [4 x i64] containing the minuends.
2586 ///    A 256-bit vector of [4 x i64] containing the subtrahends.
2587 /// \returns A 256-bit vector of [4 x i64] containing the differences.
2594 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2596 ///    corresponding byte of the 256-bit integer vector result.
2610 ///    A 256-bit integer vector containing the minuends.
2612 ///    A 256-bit integer vector containing the subtrahends.
2613 /// \returns A 256-bit integer vector containing the differences.
2620 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2636 ///    A 256-bit vector of [16 x i16] containing the minuends.
2638 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
2639 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2646 /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer
2648 ///    corresponding byte of the 256-bit integer vector result. For each byte,
2663 ///    A 256-bit integer vector containing the minuends.
2665 ///    A 256-bit integer vector containing the subtrahends.
2666 /// \returns A 256-bit integer vector containing the differences.
2673 /// Subtracts 16-bit integers from corresponding elements of two 256-bit
2689 ///    A 256-bit vector of [16 x i16] containing the minuends.
2691 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
2692 /// \returns A 256-bit vector of [16 x i16] containing the differences.
2699 /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer
2700 ///    vectors in \a __a and \a __b to form the 256-bit result. Specifically,
2701 ///    uses the upper 64 bits of each 128-bit half of \a __a and \a __b as
2721 ///    A 256-bit integer vector used as the source for the even-numbered bytes
2724 ///    A 256-bit integer vector used as the source for the odd-numbered bytes
2726 /// \returns A 256-bit integer vector containing the result.
2733 /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
2734 ///    of [16 x i16] in \a __a and \a __b to return the resulting 256-bit
2736 ///    128-bit half of \a __a and \a __b as input; other bits in these
2756 ///    A 256-bit vector of [16 x i16] used as the source for the even-numbered
2759 ///    A 256-bit vector of [16 x i16] used as the source for the odd-numbered
2761 /// \returns A 256-bit vector of [16 x i16] containing the result.
2768 /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
2769 ///    of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector
2770 ///    of [8 x i32]. Specifically, uses the upper 64 bits of each 128-bit half
2790 ///    A 256-bit vector of [8 x i32] used as the source for the even-numbered
2793 ///    A 256-bit vector of [8 x i32] used as the source for the odd-numbered
2795 /// \returns A 256-bit vector of [8 x i32] containing the result.
2802 /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
2803 ///    of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector
2804 ///    of [4 x i64]. Specifically, uses the upper 64 bits of each 128-bit half
2820 ///    A 256-bit vector of [4 x i64] used as the source for the even-numbered
2823 ///    A 256-bit vector of [4 x i64] used as the source for the odd-numbered
2825 /// \returns A 256-bit vector of [4 x i64] containing the result.
2832 /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer
2833 ///    vectors in \a __a and \a __b to form the 256-bit result. Specifically,
2834 ///    uses the lower 64 bits of each 128-bit half of \a __a and \a __b as
2854 ///    A 256-bit integer vector used as the source for the even-numbered bytes
2857 ///    A 256-bit integer vector used as the source for the odd-numbered bytes
2859 /// \returns A 256-bit integer vector containing the result.
2866 /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
2867 ///    of [16 x i16] in \a __a and \a __b to return the resulting 256-bit
2869 ///    128-bit half of \a __a and \a __b as input; other bits in these
2889 ///    A 256-bit vector of [16 x i16] used as the source for the even-numbered
2892 ///    A 256-bit vector of [16 x i16] used as the source for the odd-numbered
2894 /// \returns A 256-bit vector of [16 x i16] containing the result.
2901 /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
2902 ///    of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector
2903 ///    of [8 x i32]. Specifically, uses the lower 64 bits of each 128-bit half
2923 ///    A 256-bit vector of [8 x i32] used as the source for the even-numbered
2926 ///    A 256-bit vector of [8 x i32] used as the source for the odd-numbered
2928 /// \returns A 256-bit vector of [8 x i32] containing the result.
2935 /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
2936 ///    of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector
2937 ///    of [4 x i64]. Specifically, uses the lower 64 bits of each 128-bit half
2953 ///    A 256-bit vector of [4 x i64] used as the source for the even-numbered
2956 ///    A 256-bit vector of [4 x i64] used as the source for the odd-numbered
2958 /// \returns A 256-bit vector of [4 x i64] containing the result.
2965 /// Computes the bitwise XOR of the 256-bit integer vectors in \a __a and
2973 ///    A 256-bit integer vector.
2975 ///    A 256-bit integer vector.
2976 /// \returns A 256-bit integer vector containing the result.
2983 /// Loads the 256-bit integer vector from memory \a __V using a non-temporal
2993 /// \returns A 256-bit integer vector loaded from memory.
3001 /// Broadcasts the 32-bit floating-point value from the low element of the
3002 ///    128-bit vector of [4 x float] in \a __X to all elements of the result's
3003 ///    128-bit vector of [4 x float].
3010 ///    A 128-bit vector of [4 x float] whose low element will be broadcast.
3011 /// \returns A 128-bit vector of [4 x float] containing the result.
3018 /// Broadcasts the 64-bit floating-point value from the low element of the
3019 ///    128-bit vector of [2 x double] in \a __a to both elements of the
3020 ///    result's 128-bit vector of [2 x double].
3027 ///    A 128-bit vector of [2 x double] whose low element will be broadcast.
3028 /// \returns A 128-bit vector of [2 x double] containing the result.
3035 /// Broadcasts the 32-bit floating-point value from the low element of the
3036 ///    128-bit vector of [4 x float] in \a __X to all elements of the
3037 ///    result's 256-bit vector of [8 x float].
3044 ///    A 128-bit vector of [4 x float] whose low element will be broadcast.
3045 /// \returns A 256-bit vector of [8 x float] containing the result.
3052 /// Broadcasts the 64-bit floating-point value from the low element of the
3053 ///    128-bit vector of [2 x double] in \a __X to all elements of the
3054 ///    result's 256-bit vector of [4 x double].
3061 ///    A 128-bit vector of [2 x double] whose low element will be broadcast.
3062 /// \returns A 256-bit vector of [4 x double] containing the result.
3069 /// Broadcasts the 128-bit integer data from \a __X to both the lower and
3070 ///    upper halves of the 256-bit result.
3077 ///    A 128-bit integer vector to be broadcast.
3078 /// \returns A 256-bit integer vector containing the result.
3087 /// Merges 32-bit integer elements from either of the two 128-bit vectors of
3088 ///    [4 x i32] in \a V1 or \a V2 to the result's 128-bit vector of [4 x i32],
3111 ///    A 128-bit vector of [4 x i32] containing source values.
3113 ///    A 128-bit vector of [4 x i32] containing source values.
3115 ///    An immediate 8-bit integer operand, with bits [3:0] specifying the
3116 ///    source for each element of the result. The position of the mask bit
3117 ///    corresponds to the index of a copied value. When a mask bit is 0, the
3119 /// \returns A 128-bit vector of [4 x i32] containing the result.
3124 /// Merges 32-bit integer elements from either of the two 256-bit vectors of
3125 ///    [8 x i32] in \a V1 or \a V2 to return a 256-bit vector of [8 x i32],
3148 ///    A 256-bit vector of [8 x i32] containing source values.
3150 ///    A 256-bit vector of [8 x i32] containing source values.
3152 ///    An immediate 8-bit integer operand, with bits [7:0] specifying the
3153 ///    source for each element of the result. The position of the mask bit
3154 ///    corresponds to the index of a copied value. When a mask bit is 0, the
3156 /// \returns A 256-bit vector of [8 x i32] containing the result.
3161 /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all
3162 ///    bytes of the 256-bit result.
3169 ///    A 128-bit integer vector whose low byte will be broadcast.
3170 /// \returns A 256-bit integer vector containing the result.
3177 /// Broadcasts the low element from the 128-bit vector of [8 x i16] in \a __X
3178 ///    to all elements of the result's 256-bit vector of [16 x i16].
3185 ///    A 128-bit vector of [8 x i16] whose low element will be broadcast.
3186 /// \returns A 256-bit vector of [16 x i16] containing the result.
3193 /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
3194 ///    to all elements of the result's 256-bit vector of [8 x i32].
3201 ///    A 128-bit vector of [4 x i32] whose low element will be broadcast.
3202 /// \returns A 256-bit vector of [8 x i32] containing the result.
3209 /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X
3210 ///    to all elements of the result's 256-bit vector of [4 x i64].
3217 ///    A 128-bit vector of [2 x i64] whose low element will be broadcast.
3218 /// \returns A 256-bit vector of [4 x i64] containing the result.
3225 /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all
3226 ///    bytes of the 128-bit result.
3233 ///    A 128-bit integer vector whose low byte will be broadcast.
3234 /// \returns A 128-bit integer vector containing the result.
3241 /// Broadcasts the low element from the 128-bit vector of [8 x i16] in
3242 ///    \a __X to all elements of the result's 128-bit vector of [8 x i16].
3249 ///    A 128-bit vector of [8 x i16] whose low element will be broadcast.
3250 /// \returns A 128-bit vector of [8 x i16] containing the result.
3257 /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
3265 ///    A 128-bit vector of [4 x i32] whose low element will be broadcast.
3266 /// \returns A 128-bit vector of [4 x i32] containing the result.
3273 /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X
3274 ///    to both elements of the result's 128-bit vector of [2 x i64].
3281 ///    A 128-bit vector of [2 x i64] whose low element will be broadcast.
3282 /// \returns A 128-bit vector of [2 x i64] containing the result.
3289 /// Sets the result's 256-bit vector of [8 x i32] to copies of elements of the
3290 ///    256-bit vector of [8 x i32] in \a __a as specified by indexes in the
3291 ///    elements of the 256-bit vector of [8 x i32] in \a __b.
3306 ///    A 256-bit vector of [8 x i32] containing the source values.
3308 ///    A 256-bit vector of [8 x i32] containing indexes of values to use from
3310 /// \returns A 256-bit vector of [8 x i32] containing the result.
3317 /// Sets the result's 256-bit vector of [4 x double] to copies of elements of
3318 ///    the 256-bit vector of [4 x double] in \a V as specified by the
3338 ///    A 256-bit vector of [4 x double] containing the source values.
3340 ///    An immediate 8-bit value specifying which elements to copy from \a V.
3343 /// \returns A 256-bit vector of [4 x double] containing the result.
3347 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
3348 ///    the 256-bit vector of [8 x float] in \a __a as specified by indexes in
3349 ///    the elements of the 256-bit vector of [8 x i32] in \a __b.
3364 ///    A 256-bit vector of [8 x float] containing the source values.
3366 ///    A 256-bit vector of [8 x i32] containing indexes of values to use from
3368 /// \returns A 256-bit vector of [8 x float] containing the result.
3375 /// Sets the result's 256-bit vector of [4 x i64] result to copies of elements
3376 ///    of the 256-bit vector of [4 x i64] in \a V as specified by the
3396 ///    A 256-bit vector of [4 x i64] containing the source values.
3398 ///    An immediate 8-bit value specifying which elements to copy from \a V.
3401 /// \returns A 256-bit vector of [4 x i64] containing the result.
3405 /// Sets each half of the 256-bit result either to zero or to one of the
3406 ///    four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2,
3435 ///    A 256-bit integer vector containing source values.
3437 ///    A 256-bit integer vector containing source values.
3441 ///    Within each 4-bit control value, if bit 3 is 1, the result is zero,
3447 /// \returns A 256-bit integer vector containing the result.
3451 /// Extracts half of the 256-bit vector \a V to the 128-bit result. If bit 0
3464 ///    A 256-bit integer vector containing the source values.
3467 /// \returns A 128-bit integer vector containing the result.
3471 /// Copies the 256-bit vector \a V1 to the result, then overwrites half of the
3472 ///     result with the 128-bit vector \a V2. If bit 0 of the immediate \a M
3485 ///    A 256-bit integer vector containing a source value.
3487 ///    A 128-bit integer vector containing a source value.
3490 /// \returns A 256-bit integer vector containing the result.
3495 /// Conditionally loads eight 32-bit integer elements from memory \a __X, if
3496 ///    the most significant bit of the corresponding element in the mask
3498 ///    Returns the 256-bit [8 x i32] result.
3518 ///    A 256-bit vector of [8 x i32] containing the mask bits.
3519 /// \returns A 256-bit vector of [8 x i32] containing the loaded or zeroed
3527 /// Conditionally loads four 64-bit integer elements from memory \a __X, if
3528 ///    the most significant bit of the corresponding element in the mask
3530 ///    Returns the 256-bit [4 x i64] result.
3550 ///    A 256-bit vector of [4 x i64] containing the mask bits.
3551 /// \returns A 256-bit vector of [4 x i64] containing the loaded or zeroed
3559 /// Conditionally loads four 32-bit integer elements from memory \a __X, if
3560 ///    the most significant bit of the corresponding element in the mask
3562 ///    Returns the 128-bit [4 x i32] result.
3582 ///    A 128-bit vector of [4 x i32] containing the mask bits.
3583 /// \returns A 128-bit vector of [4 x i32] containing the loaded or zeroed
3591 /// Conditionally loads two 64-bit integer elements from memory \a __X, if
3592 ///    the most significant bit of the corresponding element in the mask
3594 ///    Returns the 128-bit [2 x i64] result.
3614 ///    A 128-bit vector of [2 x i64] containing the mask bits.
3615 /// \returns A 128-bit vector of [2 x i64] containing the loaded or zeroed
3623 /// Conditionally stores eight 32-bit integer elements from the 256-bit vector
3624 ///    of [8 x i32] in \a __Y to memory \a __X, if the most significant bit of
3644 ///    A 256-bit vector of [8 x i32] containing the mask bits.
3646 ///    A 256-bit vector of [8 x i32] containing the values to store.
3653 /// Conditionally stores four 64-bit integer elements from the 256-bit vector
3654 ///    of [4 x i64] in \a __Y to memory \a __X, if the most significant bit of
3674 ///    A 256-bit vector of [4 x i64] containing the mask bits.
3676 ///    A 256-bit vector of [4 x i64] containing the values to store.
3683 /// Conditionally stores four 32-bit integer elements from the 128-bit vector
3684 ///    of [4 x i32] in \a __Y to memory \a __X, if the most significant bit of
3704 ///    A 128-bit vector of [4 x i32] containing the mask bits.
3706 ///    A 128-bit vector of [4 x i32] containing the values to store.
3713 /// Conditionally stores two 64-bit integer elements from the 128-bit vector
3714 ///    of [2 x i64] in \a __Y to memory \a __X, if the most significant bit of
3734 ///    A 128-bit vector of [2 x i64] containing the mask bits.
3736 ///    A 128-bit vector of [2 x i64] containing the values to store.
3743 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3745 ///    256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and
3754 ///    A 256-bit vector of [8 x i32] to be shifted.
3756 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3758 /// \returns A 256-bit vector of [8 x i32] containing the result.
3765 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3767 ///    128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and
3776 ///    A 128-bit vector of [4 x i32] to be shifted.
3778 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3780 /// \returns A 128-bit vector of [4 x i32] containing the result.
3787 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X
3789 ///    128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and
3798 ///    A 256-bit vector of [4 x i64] to be shifted.
3800 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
3802 /// \returns A 256-bit vector of [4 x i64] containing the result.
3809 /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X
3811 ///    128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and
3820 ///    A 128-bit vector of [2 x i64] to be shifted.
3822 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
3824 /// \returns A 128-bit vector of [2 x i64] containing the result.
3831 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3833 ///    256-bit vector of [8 x i32] in \a __Y, shifting in sign bits, and
3835 ///    31, the result for that element is 0 or -1 according to the sign bit
3843 ///    A 256-bit vector of [8 x i32] to be shifted.
3845 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3847 /// \returns A 256-bit vector of [8 x i32] containing the result.
3854 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3856 ///    128-bit vector of [4 x i32] in \a __Y, shifting in sign bits, and
3858 ///    31, the result for that element is 0 or -1 according to the sign bit
3866 ///    A 128-bit vector of [4 x i32] to be shifted.
3868 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3870 /// \returns A 128-bit vector of [4 x i32] containing the result.
3877 /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
3879 ///    256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and
3888 ///    A 256-bit vector of [8 x i32] to be shifted.
3890 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
3892 /// \returns A 256-bit vector of [8 x i32] containing the result.
3899 /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X
3901 ///    128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and
3910 ///    A 128-bit vector of [4 x i32] to be shifted.
3912 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
3914 /// \returns A 128-bit vector of [4 x i32] containing the result.
3921 /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X
3923 ///    128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and
3932 ///    A 256-bit vector of [4 x i64] to be shifted.
3934 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
3936 /// \returns A 256-bit vector of [4 x i64] containing the result.
3943 /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X
3945 ///    128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and
3954 ///    A 128-bit vector of [2 x i64] to be shifted.
3956 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
3958 /// \returns A 128-bit vector of [2 x i64] containing the result.
3965 /// Conditionally gathers two 64-bit floating-point values, either from the
3966 ///    128-bit vector of [2 x double] in \a a, or from memory \a m using scaled
3967 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
3992 ///    A 128-bit vector of [2 x double] used as the source when a mask bit is
3997 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
4000 ///    A 128-bit vector of [2 x double] containing the mask. The most
4001 ///    significant bit of each element in the mask vector represents the mask
4002 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4007 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4014 /// Conditionally gathers four 64-bit floating-point values, either from the
4015 ///    256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
4016 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector
4041 ///    A 256-bit vector of [4 x double] used as the source when a mask bit is
4046 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4048 ///    A 256-bit vector of [4 x double] containing the mask. The most
4049 ///    significant bit of each element in the mask vector represents the mask
4050 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4055 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4062 /// Conditionally gathers two 64-bit floating-point values, either from the
4063 ///    128-bit vector of [2 x double] in \a a, or from memory \a m using scaled
4064 ///    indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4089 ///    A 128-bit vector of [2 x double] used as the source when a mask bit is
4094 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4096 ///    A 128-bit vector of [2 x double] containing the mask. The most
4097 ///    significant bit of each element in the mask vector represents the mask
4098 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4103 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4110 /// Conditionally gathers four 64-bit floating-point values, either from the
4111 ///    256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
4112 ///    indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector
4137 ///    A 256-bit vector of [4 x double] used as the source when a mask bit is
4142 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4144 ///    A 256-bit vector of [4 x double] containing the mask. The most
4145 ///    significant bit of each element in the mask vector represents the mask
4146 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4151 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4158 /// Conditionally gathers four 32-bit floating-point values, either from the
4159 ///    128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4160 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4185 ///    A 128-bit vector of [4 x float] used as the source when a mask bit is
4190 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4192 ///    A 128-bit vector of [4 x float] containing the mask. The most
4193 ///    significant bit of each element in the mask vector represents the mask
4194 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4199 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4206 /// Conditionally gathers eight 32-bit floating-point values, either from the
4207 ///    256-bit vector of [8 x float] in \a a, or from memory \a m using scaled
4208 ///    indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector
4233 ///    A 256-bit vector of [8 x float] used as the source when a mask bit is
4238 ///    A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4240 ///    A 256-bit vector of [8 x float] containing the mask. The most
4241 ///    significant bit of each element in the mask vector represents the mask
4242 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4247 /// \returns A 256-bit vector of [8 x float] containing the gathered values.
4254 /// Conditionally gathers two 32-bit floating-point values, either from the
4255 ///    128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4256 ///    indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4283 ///    A 128-bit vector of [4 x float] used as the source when a mask bit is
4288 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4290 ///    A 128-bit vector of [4 x float] containing the mask. The most
4291 ///    significant bit of each element in the mask vector represents the mask
4292 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4298 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4305 /// Conditionally gathers four 32-bit floating-point values, either from the
4306 ///    128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
4307 ///    indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector
4332 ///    A 128-bit vector of [4 x float] used as the source when a mask bit is
4337 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4339 ///    A 128-bit vector of [4 x float] containing the mask. The most
4340 ///    significant bit of each element in the mask vector represents the mask
4341 ///    bits. If a mask bit is zero, the corresponding value from vector \a a
4346 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4353 /// Conditionally gathers four 32-bit integer values, either from the
4354 ///    128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4355 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4380 ///    A 128-bit vector of [4 x i32] used as the source when a mask bit is
4385 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4387 ///    A 128-bit vector of [4 x i32] containing the mask. The most significant
4388 ///    bit of each element in the mask vector represents the mask bits. If a
4389 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4394 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4401 /// Conditionally gathers eight 32-bit integer values, either from the
4402 ///    256-bit vector of [8 x i32] in \a a, or from memory \a m using scaled
4403 ///    indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector
4428 ///    A 256-bit vector of [8 x i32] used as the source when a mask bit is
4433 ///    A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4435 ///    A 256-bit vector of [8 x i32] containing the mask. The most significant
4436 ///    bit of each element in the mask vector represents the mask bits. If a
4437 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4442 /// \returns A 256-bit vector of [8 x i32] containing the gathered values.
4449 /// Conditionally gathers two 32-bit integer values, either from the
4450 ///    128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4451 ///    indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4478 ///    A 128-bit vector of [4 x i32] used as the source when a mask bit is
4483 ///    A 128-bit vector of [2 x i64] containing indexes into \a m.
4485 ///    A 128-bit vector of [4 x i32] containing the mask. The most significant
4486 ///    bit of each element in the mask vector represents the mask bits. If a
4487 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4493 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4500 /// Conditionally gathers four 32-bit integer values, either from the
4501 ///    128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
4502 ///    indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector
4527 ///    A 128-bit vector of [4 x i32] used as the source when a mask bit is
4532 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4534 ///    A 128-bit vector of [4 x i32] containing the mask. The most significant
4535 ///    bit of each element in the mask vector represents the mask bits. If a
4536 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4541 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
4548 /// Conditionally gathers two 64-bit integer values, either from the
4549 ///    128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
4550 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector
4575 ///    A 128-bit vector of [2 x i64] used as the source when a mask bit is
4580 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
4583 ///    A 128-bit vector of [2 x i64] containing the mask. The most significant
4584 ///    bit of each element in the mask vector represents the mask bits. If a
4585 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4590 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
4597 /// Conditionally gathers four 64-bit integer values, either from the
4598 ///    256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
4599 ///    indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector
4624 ///    A 256-bit vector of [4 x i64] used as the source when a mask bit is
4629 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4631 ///    A 256-bit vector of [4 x i64] containing the mask. The most significant
4632 ///    bit of each element in the mask vector represents the mask bits. If a
4633 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4638 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
4645 /// Conditionally gathers two 64-bit integer values, either from the
4646 ///    128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
4647 ///    indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector
4672 ///    A 128-bit vector of [2 x i64] used as the source when a mask bit is
4677 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4679 ///    A 128-bit vector of [2 x i64] containing the mask. The most significant
4680 ///    bit of each element in the mask vector represents the mask bits. If a
4681 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4686 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
4693 /// Conditionally gathers four 64-bit integer values, either from the
4694 ///    256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
4695 ///    indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector
4720 ///    A 256-bit vector of [4 x i64] used as the source when a mask bit is
4725 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4727 ///    A 256-bit vector of [4 x i64] containing the mask. The most significant
4728 ///    bit of each element in the mask vector represents the mask bits. If a
4729 ///    mask bit is zero, the corresponding value from vector \a a is gathered;
4734 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
4741 /// Gathers two 64-bit floating-point values from memory \a m using scaled
4742 ///    indexes from the 128-bit vector of [4 x i32] in \a i.
4763 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
4768 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4777 /// Gathers four 64-bit floating-point values from memory \a m using scaled
4778 ///    indexes from the 128-bit vector of [4 x i32] in \a i.
4799 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4803 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4813 /// Gathers two 64-bit floating-point values from memory \a m using scaled
4814 ///    indexes from the 128-bit vector of [2 x i64] in \a i.
4835 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4839 /// \returns A 128-bit vector of [2 x double] containing the gathered values.
4848 /// Gathers four 64-bit floating-point values from memory \a m using scaled
4849 ///    indexes from the 256-bit vector of [4 x i64] in \a i.
4870 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
4874 /// \returns A 256-bit vector of [4 x double] containing the gathered values.
4884 /// Gathers four 32-bit floating-point values from memory \a m using scaled
4885 ///    indexes from the 128-bit vector of [4 x i32] in \a i.
4906 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
4910 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4919 /// Gathers eight 32-bit floating-point values from memory \a m using scaled
4920 ///    indexes from the 256-bit vector of [8 x i32] in \a i.
4941 ///    A 256-bit vector of [8 x i32] containing signed indexes into \a m.
4945 /// \returns A 256-bit vector of [8 x float] containing the gathered values.
4955 /// Gathers two 32-bit floating-point values from memory \a m using scaled
4956 ///    indexes from the 128-bit vector of [2 x i64] in \a i. The upper two
4979 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
4983 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
4992 /// Gathers four 32-bit floating-point values from memory \a m using scaled
4993 ///    indexes from the 256-bit vector of [4 x i64] in \a i.
5014 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5018 /// \returns A 128-bit vector of [4 x float] containing the gathered values.
5027 /// Gathers four 32-bit floating-point values from memory \a m using scaled
5028 ///    indexes from the 128-bit vector of [4 x i32] in \a i.
5049 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
5053 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5059 /// Gathers eight 32-bit floating-point values from memory \a m using scaled
5060 ///    indexes from the 256-bit vector of [8 x i32] in \a i.
5081 ///    A 256-bit vector of [8 x i32] containing signed indexes into \a m.
5085 /// \returns A 256-bit vector of [8 x i32] containing the gathered values.
5091 /// Gathers two 32-bit integer values from memory \a m using scaled indexes
5092 ///    from the 128-bit vector of [2 x i64] in \a i. The upper two elements
5115 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
5119 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5125 /// Gathers four 32-bit integer values from memory \a m using scaled indexes
5126 ///    from the 256-bit vector of [4 x i64] in \a i.
5147 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5151 /// \returns A 128-bit vector of [4 x i32] containing the gathered values.
5157 /// Gathers two 64-bit integer values from memory \a m using scaled indexes
5158 ///    from the 128-bit vector of [4 x i32] in \a i.
5179 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only
5184 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
5191 /// Gathers four 64-bit integer values from memory \a m using scaled indexes
5192 ///    from the 128-bit vector of [4 x i32] in \a i.
5213 ///    A 128-bit vector of [4 x i32] containing signed indexes into \a m.
5217 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.
5224 /// Gathers two 64-bit integer values from memory \a m using scaled indexes
5225 ///    from the 128-bit vector of [2 x i64] in \a i.
5246 ///    A 128-bit vector of [2 x i64] containing signed indexes into \a m.
5250 /// \returns A 128-bit vector of [2 x i64] containing the gathered values.
5257 /// Gathers four 64-bit integer values from memory \a m using scaled indexes
5258 ///    from the 256-bit vector of [4 x i64] in \a i.
5279 ///    A 256-bit vector of [4 x i64] containing signed indexes into \a m.
5283 /// \returns A 256-bit vector of [4 x i64] containing the gathered values.