1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __MMINTRIN_H 11 #define __MMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 18 19 typedef long long __v1di __attribute__((__vector_size__(8))); 20 typedef int __v2si __attribute__((__vector_size__(8))); 21 typedef short __v4hi __attribute__((__vector_size__(8))); 22 typedef char __v8qi __attribute__((__vector_size__(8))); 23 24 /* Unsigned types */ 25 typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8))); 26 typedef unsigned int __v2su __attribute__ ((__vector_size__ (8))); 27 typedef unsigned short __v4hu __attribute__((__vector_size__(8))); 28 typedef unsigned char __v8qu __attribute__((__vector_size__(8))); 29 30 /* We need an explicitly signed variant for char. Note that this shouldn't 31 * appear in the interface though. */ 32 typedef signed char __v8qs __attribute__((__vector_size__(8))); 33 34 /* SSE/SSE2 types */ 35 typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 36 typedef long long __v2di __attribute__ ((__vector_size__ (16))); 37 typedef int __v4si __attribute__((__vector_size__(16))); 38 typedef short __v8hi __attribute__((__vector_size__(16))); 39 typedef char __v16qi __attribute__((__vector_size__(16))); 40 41 /* Define the default attributes for the functions in this file. */ 42 #if defined(__EVEX512__) && !defined(__AVX10_1_512__) 43 #define __DEFAULT_FN_ATTRS_SSE2 \ 44 __attribute__((__always_inline__, __nodebug__, \ 45 __target__("sse2,no-evex512"), __min_vector_width__(128))) 46 #else 47 #define __DEFAULT_FN_ATTRS_SSE2 \ 48 __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ 49 __min_vector_width__(128))) 50 #endif 51 52 #if defined(__cplusplus) && (__cplusplus >= 201103L) 53 #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr 54 #else 55 #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 56 #endif 57 58 #define __trunc64(x) \ 59 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) 60 #define __anyext128(x) \ 61 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 62 1, -1, -1) 63 64 /// Clears the MMX state by setting the state of the x87 stack registers 65 /// to empty. 66 /// 67 /// \headerfile <x86intrin.h> 68 /// 69 /// This intrinsic corresponds to the <c> EMMS </c> instruction. 70 /// 71 static __inline__ void __attribute__((__always_inline__, __nodebug__, 72 __target__("mmx,no-evex512"))) 73 _mm_empty(void) { 74 __builtin_ia32_emms(); 75 } 76 77 /// Constructs a 64-bit integer vector, setting the lower 32 bits to the 78 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 79 /// 80 /// \headerfile <x86intrin.h> 81 /// 82 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 83 /// 84 /// \param __i 85 /// A 32-bit integer value. 86 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 87 /// parameter. The upper 32 bits are set to 0. 88 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 89 _mm_cvtsi32_si64(int __i) 90 { 91 return __extension__ (__m64)(__v2si){__i, 0}; 92 } 93 94 /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 95 /// signed integer. 96 /// 97 /// \headerfile <x86intrin.h> 98 /// 99 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 100 /// 101 /// \param __m 102 /// A 64-bit integer vector. 103 /// \returns A 32-bit signed integer value containing the lower 32 bits of the 104 /// parameter. 105 static __inline__ int __DEFAULT_FN_ATTRS_SSE2 106 _mm_cvtsi64_si32(__m64 __m) 107 { 108 return ((__v2si)__m)[0]; 109 } 110 111 /// Casts a 64-bit signed integer value into a 64-bit integer vector. 112 /// 113 /// \headerfile <x86intrin.h> 114 /// 115 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 116 /// 117 /// \param __i 118 /// A 64-bit signed integer. 119 /// \returns A 64-bit integer vector containing the same bitwise pattern as the 120 /// parameter. 121 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 122 _mm_cvtsi64_m64(long long __i) 123 { 124 return (__m64)__i; 125 } 126 127 /// Casts a 64-bit integer vector into a 64-bit signed integer value. 128 /// 129 /// \headerfile <x86intrin.h> 130 /// 131 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 132 /// 133 /// \param __m 134 /// A 64-bit integer vector. 135 /// \returns A 64-bit signed integer containing the same bitwise pattern as the 136 /// parameter. 137 static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 138 _mm_cvtm64_si64(__m64 __m) 139 { 140 return (long long)__m; 141 } 142 143 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 144 /// vector parameters of [4 x i16] into 8-bit signed integer values, and 145 /// constructs a 64-bit integer vector of [8 x i8] as the result. 146 /// 147 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values 148 /// less than 0x80 are saturated to 0x80. 149 /// 150 /// \headerfile <x86intrin.h> 151 /// 152 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 153 /// 154 /// \param __m1 155 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 156 /// written to the lower 32 bits of the result. 157 /// \param __m2 158 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 159 /// written to the upper 32 bits of the result. 160 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 161 /// values. 162 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 163 _mm_packs_pi16(__m64 __m1, __m64 __m2) 164 { 165 return __trunc64(__builtin_ia32_packsswb128( 166 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); 167 } 168 169 /// Converts, with saturation, 32-bit signed integers from both 64-bit integer 170 /// vector parameters of [2 x i32] into 16-bit signed integer values, and 171 /// constructs a 64-bit integer vector of [4 x i16] as the result. 172 /// 173 /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative 174 /// values less than 0x8000 are saturated to 0x8000. 175 /// 176 /// \headerfile <x86intrin.h> 177 /// 178 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 179 /// 180 /// \param __m1 181 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 182 /// written to the lower 32 bits of the result. 183 /// \param __m2 184 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 185 /// written to the upper 32 bits of the result. 186 /// \returns A 64-bit integer vector of [4 x i16] containing the converted 187 /// values. 188 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 189 _mm_packs_pi32(__m64 __m1, __m64 __m2) 190 { 191 return __trunc64(__builtin_ia32_packssdw128( 192 (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); 193 } 194 195 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 196 /// vector parameters of [4 x i16] into 8-bit unsigned integer values, and 197 /// constructs a 64-bit integer vector of [8 x i8] as the result. 198 /// 199 /// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are 200 /// saturated to 0. 201 /// 202 /// \headerfile <x86intrin.h> 203 /// 204 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 205 /// 206 /// \param __m1 207 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 208 /// written to the lower 32 bits of the result. 209 /// \param __m2 210 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 211 /// written to the upper 32 bits of the result. 212 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 213 /// values. 214 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 215 _mm_packs_pu16(__m64 __m1, __m64 __m2) 216 { 217 return __trunc64(__builtin_ia32_packuswb128( 218 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); 219 } 220 221 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 222 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 223 /// 224 /// \headerfile <x86intrin.h> 225 /// 226 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 227 /// 228 /// \param __m1 229 /// A 64-bit integer vector of [8 x i8]. \n 230 /// Bits [39:32] are written to bits [7:0] of the result. \n 231 /// Bits [47:40] are written to bits [23:16] of the result. \n 232 /// Bits [55:48] are written to bits [39:32] of the result. \n 233 /// Bits [63:56] are written to bits [55:48] of the result. 234 /// \param __m2 235 /// A 64-bit integer vector of [8 x i8]. 236 /// Bits [39:32] are written to bits [15:8] of the result. \n 237 /// Bits [47:40] are written to bits [31:24] of the result. \n 238 /// Bits [55:48] are written to bits [47:40] of the result. \n 239 /// Bits [63:56] are written to bits [63:56] of the result. 240 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 241 /// values. 242 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 243 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 244 { 245 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 246 4, 12, 5, 13, 6, 14, 7, 15); 247 } 248 249 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 250 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 251 /// 252 /// \headerfile <x86intrin.h> 253 /// 254 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 255 /// 256 /// \param __m1 257 /// A 64-bit integer vector of [4 x i16]. 258 /// Bits [47:32] are written to bits [15:0] of the result. \n 259 /// Bits [63:48] are written to bits [47:32] of the result. 260 /// \param __m2 261 /// A 64-bit integer vector of [4 x i16]. 262 /// Bits [47:32] are written to bits [31:16] of the result. \n 263 /// Bits [63:48] are written to bits [63:48] of the result. 264 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 265 /// values. 266 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 267 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 268 { 269 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 270 2, 6, 3, 7); 271 } 272 273 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 274 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 275 /// 276 /// \headerfile <x86intrin.h> 277 /// 278 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 279 /// 280 /// \param __m1 281 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 282 /// the lower 32 bits of the result. 283 /// \param __m2 284 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 285 /// the upper 32 bits of the result. 286 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 287 /// values. 288 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 289 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 290 { 291 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); 292 } 293 294 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 295 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 296 /// 297 /// \headerfile <x86intrin.h> 298 /// 299 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 300 /// 301 /// \param __m1 302 /// A 64-bit integer vector of [8 x i8]. 303 /// Bits [7:0] are written to bits [7:0] of the result. \n 304 /// Bits [15:8] are written to bits [23:16] of the result. \n 305 /// Bits [23:16] are written to bits [39:32] of the result. \n 306 /// Bits [31:24] are written to bits [55:48] of the result. 307 /// \param __m2 308 /// A 64-bit integer vector of [8 x i8]. 309 /// Bits [7:0] are written to bits [15:8] of the result. \n 310 /// Bits [15:8] are written to bits [31:24] of the result. \n 311 /// Bits [23:16] are written to bits [47:40] of the result. \n 312 /// Bits [31:24] are written to bits [63:56] of the result. 313 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 314 /// values. 315 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 316 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 317 { 318 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 319 0, 8, 1, 9, 2, 10, 3, 11); 320 } 321 322 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 323 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 324 /// 325 /// \headerfile <x86intrin.h> 326 /// 327 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 328 /// 329 /// \param __m1 330 /// A 64-bit integer vector of [4 x i16]. 331 /// Bits [15:0] are written to bits [15:0] of the result. \n 332 /// Bits [31:16] are written to bits [47:32] of the result. 333 /// \param __m2 334 /// A 64-bit integer vector of [4 x i16]. 335 /// Bits [15:0] are written to bits [31:16] of the result. \n 336 /// Bits [31:16] are written to bits [63:48] of the result. 337 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 338 /// values. 339 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 340 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 341 { 342 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 343 0, 4, 1, 5); 344 } 345 346 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 347 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 348 /// 349 /// \headerfile <x86intrin.h> 350 /// 351 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 352 /// 353 /// \param __m1 354 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 355 /// the lower 32 bits of the result. 356 /// \param __m2 357 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 358 /// the upper 32 bits of the result. 359 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 360 /// values. 361 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 362 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 363 { 364 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); 365 } 366 367 /// Adds each 8-bit integer element of the first 64-bit integer vector 368 /// of [8 x i8] to the corresponding 8-bit integer element of the second 369 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 370 /// packed into a 64-bit integer vector of [8 x i8]. 371 /// 372 /// \headerfile <x86intrin.h> 373 /// 374 /// This intrinsic corresponds to the <c> PADDB </c> instruction. 375 /// 376 /// \param __m1 377 /// A 64-bit integer vector of [8 x i8]. 378 /// \param __m2 379 /// A 64-bit integer vector of [8 x i8]. 380 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 381 /// parameters. 382 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 383 _mm_add_pi8(__m64 __m1, __m64 __m2) 384 { 385 return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); 386 } 387 388 /// Adds each 16-bit integer element of the first 64-bit integer vector 389 /// of [4 x i16] to the corresponding 16-bit integer element of the second 390 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 391 /// packed into a 64-bit integer vector of [4 x i16]. 392 /// 393 /// \headerfile <x86intrin.h> 394 /// 395 /// This intrinsic corresponds to the <c> PADDW </c> instruction. 396 /// 397 /// \param __m1 398 /// A 64-bit integer vector of [4 x i16]. 399 /// \param __m2 400 /// A 64-bit integer vector of [4 x i16]. 401 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 402 /// parameters. 403 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 404 _mm_add_pi16(__m64 __m1, __m64 __m2) 405 { 406 return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); 407 } 408 409 /// Adds each 32-bit integer element of the first 64-bit integer vector 410 /// of [2 x i32] to the corresponding 32-bit integer element of the second 411 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 412 /// packed into a 64-bit integer vector of [2 x i32]. 413 /// 414 /// \headerfile <x86intrin.h> 415 /// 416 /// This intrinsic corresponds to the <c> PADDD </c> instruction. 417 /// 418 /// \param __m1 419 /// A 64-bit integer vector of [2 x i32]. 420 /// \param __m2 421 /// A 64-bit integer vector of [2 x i32]. 422 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 423 /// parameters. 424 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 425 _mm_add_pi32(__m64 __m1, __m64 __m2) 426 { 427 return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); 428 } 429 430 /// Adds, with saturation, each 8-bit signed integer element of the first 431 /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed 432 /// integer element of the second 64-bit integer vector of [8 x i8]. 433 /// 434 /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums 435 /// less than 0x80 are saturated to 0x80. The results are packed into a 436 /// 64-bit integer vector of [8 x i8]. 437 /// 438 /// \headerfile <x86intrin.h> 439 /// 440 /// This intrinsic corresponds to the <c> PADDSB </c> instruction. 441 /// 442 /// \param __m1 443 /// A 64-bit integer vector of [8 x i8]. 444 /// \param __m2 445 /// A 64-bit integer vector of [8 x i8]. 446 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 447 /// of both parameters. 448 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 449 _mm_adds_pi8(__m64 __m1, __m64 __m2) 450 { 451 return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); 452 } 453 454 /// Adds, with saturation, each 16-bit signed integer element of the first 455 /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed 456 /// integer element of the second 64-bit integer vector of [4 x i16]. 457 /// 458 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 459 /// less than 0x8000 are saturated to 0x8000. The results are packed into a 460 /// 64-bit integer vector of [4 x i16]. 461 /// 462 /// \headerfile <x86intrin.h> 463 /// 464 /// This intrinsic corresponds to the <c> PADDSW </c> instruction. 465 /// 466 /// \param __m1 467 /// A 64-bit integer vector of [4 x i16]. 468 /// \param __m2 469 /// A 64-bit integer vector of [4 x i16]. 470 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 471 /// of both parameters. 472 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 473 _mm_adds_pi16(__m64 __m1, __m64 __m2) 474 { 475 return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); 476 } 477 478 /// Adds, with saturation, each 8-bit unsigned integer element of the first 479 /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned 480 /// integer element of the second 64-bit integer vector of [8 x i8]. 481 /// 482 /// Sums greater than 0xFF are saturated to 0xFF. The results are packed 483 /// into a 64-bit integer vector of [8 x i8]. 484 /// 485 /// \headerfile <x86intrin.h> 486 /// 487 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 488 /// 489 /// \param __m1 490 /// A 64-bit integer vector of [8 x i8]. 491 /// \param __m2 492 /// A 64-bit integer vector of [8 x i8]. 493 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 494 /// unsigned sums of both parameters. 495 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 496 _mm_adds_pu8(__m64 __m1, __m64 __m2) 497 { 498 return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); 499 } 500 501 /// Adds, with saturation, each 16-bit unsigned integer element of the first 502 /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned 503 /// integer element of the second 64-bit integer vector of [4 x i16]. 504 /// 505 /// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed 506 /// into a 64-bit integer vector of [4 x i16]. 507 /// 508 /// \headerfile <x86intrin.h> 509 /// 510 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 511 /// 512 /// \param __m1 513 /// A 64-bit integer vector of [4 x i16]. 514 /// \param __m2 515 /// A 64-bit integer vector of [4 x i16]. 516 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 517 /// unsigned sums of both parameters. 518 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 519 _mm_adds_pu16(__m64 __m1, __m64 __m2) 520 { 521 return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); 522 } 523 524 /// Subtracts each 8-bit integer element of the second 64-bit integer 525 /// vector of [8 x i8] from the corresponding 8-bit integer element of the 526 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 527 /// are packed into a 64-bit integer vector of [8 x i8]. 528 /// 529 /// \headerfile <x86intrin.h> 530 /// 531 /// This intrinsic corresponds to the <c> PSUBB </c> instruction. 532 /// 533 /// \param __m1 534 /// A 64-bit integer vector of [8 x i8] containing the minuends. 535 /// \param __m2 536 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 537 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 538 /// both parameters. 539 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 540 _mm_sub_pi8(__m64 __m1, __m64 __m2) 541 { 542 return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); 543 } 544 545 /// Subtracts each 16-bit integer element of the second 64-bit integer 546 /// vector of [4 x i16] from the corresponding 16-bit integer element of the 547 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 548 /// results are packed into a 64-bit integer vector of [4 x i16]. 549 /// 550 /// \headerfile <x86intrin.h> 551 /// 552 /// This intrinsic corresponds to the <c> PSUBW </c> instruction. 553 /// 554 /// \param __m1 555 /// A 64-bit integer vector of [4 x i16] containing the minuends. 556 /// \param __m2 557 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 558 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 559 /// both parameters. 560 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 561 _mm_sub_pi16(__m64 __m1, __m64 __m2) 562 { 563 return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); 564 } 565 566 /// Subtracts each 32-bit integer element of the second 64-bit integer 567 /// vector of [2 x i32] from the corresponding 32-bit integer element of the 568 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 569 /// results are packed into a 64-bit integer vector of [2 x i32]. 570 /// 571 /// \headerfile <x86intrin.h> 572 /// 573 /// This intrinsic corresponds to the <c> PSUBD </c> instruction. 574 /// 575 /// \param __m1 576 /// A 64-bit integer vector of [2 x i32] containing the minuends. 577 /// \param __m2 578 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 579 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 580 /// both parameters. 581 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 582 _mm_sub_pi32(__m64 __m1, __m64 __m2) 583 { 584 return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); 585 } 586 587 /// Subtracts, with saturation, each 8-bit signed integer element of the second 588 /// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed 589 /// integer element of the first 64-bit integer vector of [8 x i8]. 590 /// 591 /// Positive results greater than 0x7F are saturated to 0x7F. Negative 592 /// results less than 0x80 are saturated to 0x80. The results are packed 593 /// into a 64-bit integer vector of [8 x i8]. 594 /// 595 /// \headerfile <x86intrin.h> 596 /// 597 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 598 /// 599 /// \param __m1 600 /// A 64-bit integer vector of [8 x i8] containing the minuends. 601 /// \param __m2 602 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 603 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 604 /// differences of both parameters. 605 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 606 _mm_subs_pi8(__m64 __m1, __m64 __m2) 607 { 608 return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); 609 } 610 611 /// Subtracts, with saturation, each 16-bit signed integer element of the 612 /// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit 613 /// signed integer element of the first 64-bit integer vector of [4 x i16]. 614 /// 615 /// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative 616 /// results less than 0x8000 are saturated to 0x8000. The results are packed 617 /// into a 64-bit integer vector of [4 x i16]. 618 /// 619 /// \headerfile <x86intrin.h> 620 /// 621 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 622 /// 623 /// \param __m1 624 /// A 64-bit integer vector of [4 x i16] containing the minuends. 625 /// \param __m2 626 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 627 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 628 /// differences of both parameters. 629 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 630 _mm_subs_pi16(__m64 __m1, __m64 __m2) 631 { 632 return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); 633 } 634 635 /// Subtracts each 8-bit unsigned integer element of the second 64-bit 636 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 637 /// element of the first 64-bit integer vector of [8 x i8]. 638 /// 639 /// If an element of the first vector is less than the corresponding element 640 /// of the second vector, the result is saturated to 0. The results are 641 /// packed into a 64-bit integer vector of [8 x i8]. 642 /// 643 /// \headerfile <x86intrin.h> 644 /// 645 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 646 /// 647 /// \param __m1 648 /// A 64-bit integer vector of [8 x i8] containing the minuends. 649 /// \param __m2 650 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 651 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 652 /// differences of both parameters. 653 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 654 _mm_subs_pu8(__m64 __m1, __m64 __m2) 655 { 656 return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); 657 } 658 659 /// Subtracts each 16-bit unsigned integer element of the second 64-bit 660 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 661 /// integer element of the first 64-bit integer vector of [4 x i16]. 662 /// 663 /// If an element of the first vector is less than the corresponding element 664 /// of the second vector, the result is saturated to 0. The results are 665 /// packed into a 64-bit integer vector of [4 x i16]. 666 /// 667 /// \headerfile <x86intrin.h> 668 /// 669 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 670 /// 671 /// \param __m1 672 /// A 64-bit integer vector of [4 x i16] containing the minuends. 673 /// \param __m2 674 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 675 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 676 /// differences of both parameters. 677 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 678 _mm_subs_pu16(__m64 __m1, __m64 __m2) 679 { 680 return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); 681 } 682 683 /// Multiplies each 16-bit signed integer element of the first 64-bit 684 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 685 /// element of the second 64-bit integer vector of [4 x i16] and get four 686 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 687 /// The lower 32 bits of these two sums are packed into a 64-bit integer 688 /// vector of [2 x i32]. 689 /// 690 /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 691 /// of both parameters are multiplied, and the sum of both results is written 692 /// to bits [31:0] of the result. 693 /// 694 /// \headerfile <x86intrin.h> 695 /// 696 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 697 /// 698 /// \param __m1 699 /// A 64-bit integer vector of [4 x i16]. 700 /// \param __m2 701 /// A 64-bit integer vector of [4 x i16]. 702 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 703 /// products of both parameters. 704 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 705 _mm_madd_pi16(__m64 __m1, __m64 __m2) 706 { 707 return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1), 708 (__v8hi)__anyext128(__m2))); 709 } 710 711 /// Multiplies each 16-bit signed integer element of the first 64-bit 712 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 713 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 714 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 715 /// 716 /// \headerfile <x86intrin.h> 717 /// 718 /// This intrinsic corresponds to the <c> PMULHW </c> instruction. 719 /// 720 /// \param __m1 721 /// A 64-bit integer vector of [4 x i16]. 722 /// \param __m2 723 /// A 64-bit integer vector of [4 x i16]. 724 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 725 /// of the products of both parameters. 726 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 727 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 728 { 729 return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), 730 (__v8hi)__anyext128(__m2))); 731 } 732 733 /// Multiplies each 16-bit signed integer element of the first 64-bit 734 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 735 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 736 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 737 /// 738 /// \headerfile <x86intrin.h> 739 /// 740 /// This intrinsic corresponds to the <c> PMULLW </c> instruction. 741 /// 742 /// \param __m1 743 /// A 64-bit integer vector of [4 x i16]. 744 /// \param __m2 745 /// A 64-bit integer vector of [4 x i16]. 746 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 747 /// of the products of both parameters. 748 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 749 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 750 { 751 return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); 752 } 753 754 /// Left-shifts each 16-bit signed integer element of the first 755 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 756 /// of bits specified by the second parameter, which is a 64-bit integer. The 757 /// lower 16 bits of the results are packed into a 64-bit integer vector of 758 /// [4 x i16]. 759 /// 760 /// \headerfile <x86intrin.h> 761 /// 762 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 763 /// 764 /// \param __m 765 /// A 64-bit integer vector of [4 x i16]. 766 /// \param __count 767 /// A 64-bit integer vector interpreted as a single 64-bit integer. 768 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 769 /// values. If \a __count is greater or equal to 16, the result is set to all 770 /// 0. 771 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 772 _mm_sll_pi16(__m64 __m, __m64 __count) 773 { 774 return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m), 775 (__v8hi)__anyext128(__count))); 776 } 777 778 /// Left-shifts each 16-bit signed integer element of a 64-bit integer 779 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 780 /// The lower 16 bits of the results are packed into a 64-bit integer vector 781 /// of [4 x i16]. 782 /// 783 /// \headerfile <x86intrin.h> 784 /// 785 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 786 /// 787 /// \param __m 788 /// A 64-bit integer vector of [4 x i16]. 789 /// \param __count 790 /// A 32-bit integer value. 791 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 792 /// values. If \a __count is greater or equal to 16, the result is set to all 793 /// 0. 794 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 795 _mm_slli_pi16(__m64 __m, int __count) 796 { 797 return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m), 798 __count)); 799 } 800 801 /// Left-shifts each 32-bit signed integer element of the first 802 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 803 /// of bits specified by the second parameter, which is a 64-bit integer. The 804 /// lower 32 bits of the results are packed into a 64-bit integer vector of 805 /// [2 x i32]. 806 /// 807 /// \headerfile <x86intrin.h> 808 /// 809 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 810 /// 811 /// \param __m 812 /// A 64-bit integer vector of [2 x i32]. 813 /// \param __count 814 /// A 64-bit integer vector interpreted as a single 64-bit integer. 815 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 816 /// values. If \a __count is greater or equal to 32, the result is set to all 817 /// 0. 818 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 819 _mm_sll_pi32(__m64 __m, __m64 __count) 820 { 821 return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m), 822 (__v4si)__anyext128(__count))); 823 } 824 825 /// Left-shifts each 32-bit signed integer element of a 64-bit integer 826 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 827 /// The lower 32 bits of the results are packed into a 64-bit integer vector 828 /// of [2 x i32]. 829 /// 830 /// \headerfile <x86intrin.h> 831 /// 832 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 833 /// 834 /// \param __m 835 /// A 64-bit integer vector of [2 x i32]. 836 /// \param __count 837 /// A 32-bit integer value. 838 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 839 /// values. If \a __count is greater or equal to 32, the result is set to all 840 /// 0. 841 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 842 _mm_slli_pi32(__m64 __m, int __count) 843 { 844 return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m), 845 __count)); 846 } 847 848 /// Left-shifts the first 64-bit integer parameter by the number of bits 849 /// specified by the second 64-bit integer parameter. The lower 64 bits of 850 /// result are returned. 851 /// 852 /// \headerfile <x86intrin.h> 853 /// 854 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 855 /// 856 /// \param __m 857 /// A 64-bit integer vector interpreted as a single 64-bit integer. 858 /// \param __count 859 /// A 64-bit integer vector interpreted as a single 64-bit integer. 860 /// \returns A 64-bit integer vector containing the left-shifted value. If 861 /// \a __count is greater or equal to 64, the result is set to 0. 862 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 863 _mm_sll_si64(__m64 __m, __m64 __count) 864 { 865 return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m), 866 (__v2di)__anyext128(__count))); 867 } 868 869 /// Left-shifts the first parameter, which is a 64-bit integer, by the 870 /// number of bits specified by the second parameter, which is a 32-bit 871 /// integer. The lower 64 bits of result are returned. 872 /// 873 /// \headerfile <x86intrin.h> 874 /// 875 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 876 /// 877 /// \param __m 878 /// A 64-bit integer vector interpreted as a single 64-bit integer. 879 /// \param __count 880 /// A 32-bit integer value. 881 /// \returns A 64-bit integer vector containing the left-shifted value. If 882 /// \a __count is greater or equal to 64, the result is set to 0. 883 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 884 _mm_slli_si64(__m64 __m, int __count) 885 { 886 return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), 887 __count)); 888 } 889 890 /// Right-shifts each 16-bit integer element of the first parameter, 891 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 892 /// specified by the second parameter, which is a 64-bit integer. 893 /// 894 /// High-order bits are filled with the sign bit of the initial value of each 895 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 896 /// vector of [4 x i16]. 897 /// 898 /// \headerfile <x86intrin.h> 899 /// 900 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 901 /// 902 /// \param __m 903 /// A 64-bit integer vector of [4 x i16]. 904 /// \param __count 905 /// A 64-bit integer vector interpreted as a single 64-bit integer. 906 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 907 /// values. 908 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 909 _mm_sra_pi16(__m64 __m, __m64 __count) 910 { 911 return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m), 912 (__v8hi)__anyext128(__count))); 913 } 914 915 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 916 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 917 /// 918 /// High-order bits are filled with the sign bit of the initial value of each 919 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 920 /// vector of [4 x i16]. 921 /// 922 /// \headerfile <x86intrin.h> 923 /// 924 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 925 /// 926 /// \param __m 927 /// A 64-bit integer vector of [4 x i16]. 928 /// \param __count 929 /// A 32-bit integer value. 930 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 931 /// values. 932 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 933 _mm_srai_pi16(__m64 __m, int __count) 934 { 935 return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m), 936 __count)); 937 } 938 939 /// Right-shifts each 32-bit integer element of the first parameter, 940 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 941 /// specified by the second parameter, which is a 64-bit integer. 942 /// 943 /// High-order bits are filled with the sign bit of the initial value of each 944 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 945 /// vector of [2 x i32]. 946 /// 947 /// \headerfile <x86intrin.h> 948 /// 949 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 950 /// 951 /// \param __m 952 /// A 64-bit integer vector of [2 x i32]. 953 /// \param __count 954 /// A 64-bit integer vector interpreted as a single 64-bit integer. 955 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 956 /// values. 957 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 958 _mm_sra_pi32(__m64 __m, __m64 __count) 959 { 960 return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m), 961 (__v4si)__anyext128(__count))); 962 } 963 964 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 965 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 966 /// 967 /// High-order bits are filled with the sign bit of the initial value of each 968 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 969 /// vector of [2 x i32]. 970 /// 971 /// \headerfile <x86intrin.h> 972 /// 973 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 974 /// 975 /// \param __m 976 /// A 64-bit integer vector of [2 x i32]. 977 /// \param __count 978 /// A 32-bit integer value. 979 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 980 /// values. 981 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 982 _mm_srai_pi32(__m64 __m, int __count) 983 { 984 return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m), 985 __count)); 986 } 987 988 /// Right-shifts each 16-bit integer element of the first parameter, 989 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 990 /// specified by the second parameter, which is a 64-bit integer. 991 /// 992 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 993 /// integer vector of [4 x i16]. 994 /// 995 /// \headerfile <x86intrin.h> 996 /// 997 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 998 /// 999 /// \param __m 1000 /// A 64-bit integer vector of [4 x i16]. 1001 /// \param __count 1002 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1003 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 1004 /// values. 1005 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1006 _mm_srl_pi16(__m64 __m, __m64 __count) 1007 { 1008 return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m), 1009 (__v8hi)__anyext128(__count))); 1010 } 1011 1012 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 1013 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 1014 /// 1015 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 1016 /// integer vector of [4 x i16]. 1017 /// 1018 /// \headerfile <x86intrin.h> 1019 /// 1020 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 1021 /// 1022 /// \param __m 1023 /// A 64-bit integer vector of [4 x i16]. 1024 /// \param __count 1025 /// A 32-bit integer value. 1026 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 1027 /// values. 1028 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1029 _mm_srli_pi16(__m64 __m, int __count) 1030 { 1031 return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m), 1032 __count)); 1033 } 1034 1035 /// Right-shifts each 32-bit integer element of the first parameter, 1036 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 1037 /// specified by the second parameter, which is a 64-bit integer. 1038 /// 1039 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1040 /// integer vector of [2 x i32]. 1041 /// 1042 /// \headerfile <x86intrin.h> 1043 /// 1044 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1045 /// 1046 /// \param __m 1047 /// A 64-bit integer vector of [2 x i32]. 1048 /// \param __count 1049 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1050 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1051 /// values. 1052 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1053 _mm_srl_pi32(__m64 __m, __m64 __count) 1054 { 1055 return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m), 1056 (__v4si)__anyext128(__count))); 1057 } 1058 1059 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 1060 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 1061 /// 1062 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1063 /// integer vector of [2 x i32]. 1064 /// 1065 /// \headerfile <x86intrin.h> 1066 /// 1067 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1068 /// 1069 /// \param __m 1070 /// A 64-bit integer vector of [2 x i32]. 1071 /// \param __count 1072 /// A 32-bit integer value. 1073 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1074 /// values. 1075 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1076 _mm_srli_pi32(__m64 __m, int __count) 1077 { 1078 return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m), 1079 __count)); 1080 } 1081 1082 /// Right-shifts the first 64-bit integer parameter by the number of bits 1083 /// specified by the second 64-bit integer parameter. 1084 /// 1085 /// High-order bits are cleared. 1086 /// 1087 /// \headerfile <x86intrin.h> 1088 /// 1089 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1090 /// 1091 /// \param __m 1092 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1093 /// \param __count 1094 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1095 /// \returns A 64-bit integer vector containing the right-shifted value. 1096 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1097 _mm_srl_si64(__m64 __m, __m64 __count) 1098 { 1099 return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m), 1100 (__v2di)__anyext128(__count))); 1101 } 1102 1103 /// Right-shifts the first parameter, which is a 64-bit integer, by the 1104 /// number of bits specified by the second parameter, which is a 32-bit 1105 /// integer. 1106 /// 1107 /// High-order bits are cleared. 1108 /// 1109 /// \headerfile <x86intrin.h> 1110 /// 1111 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1112 /// 1113 /// \param __m 1114 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1115 /// \param __count 1116 /// A 32-bit integer value. 1117 /// \returns A 64-bit integer vector containing the right-shifted value. 1118 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1119 _mm_srli_si64(__m64 __m, int __count) 1120 { 1121 return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), 1122 __count)); 1123 } 1124 1125 /// Performs a bitwise AND of two 64-bit integer vectors. 1126 /// 1127 /// \headerfile <x86intrin.h> 1128 /// 1129 /// This intrinsic corresponds to the <c> PAND </c> instruction. 1130 /// 1131 /// \param __m1 1132 /// A 64-bit integer vector. 1133 /// \param __m2 1134 /// A 64-bit integer vector. 1135 /// \returns A 64-bit integer vector containing the bitwise AND of both 1136 /// parameters. 1137 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1138 _mm_and_si64(__m64 __m1, __m64 __m2) 1139 { 1140 return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); 1141 } 1142 1143 /// Performs a bitwise NOT of the first 64-bit integer vector, and then 1144 /// performs a bitwise AND of the intermediate result and the second 64-bit 1145 /// integer vector. 1146 /// 1147 /// \headerfile <x86intrin.h> 1148 /// 1149 /// This intrinsic corresponds to the <c> PANDN </c> instruction. 1150 /// 1151 /// \param __m1 1152 /// A 64-bit integer vector. The one's complement of this parameter is used 1153 /// in the bitwise AND. 1154 /// \param __m2 1155 /// A 64-bit integer vector. 1156 /// \returns A 64-bit integer vector containing the bitwise AND of the second 1157 /// parameter and the one's complement of the first parameter. 1158 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1159 _mm_andnot_si64(__m64 __m1, __m64 __m2) 1160 { 1161 return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); 1162 } 1163 1164 /// Performs a bitwise OR of two 64-bit integer vectors. 1165 /// 1166 /// \headerfile <x86intrin.h> 1167 /// 1168 /// This intrinsic corresponds to the <c> POR </c> instruction. 1169 /// 1170 /// \param __m1 1171 /// A 64-bit integer vector. 1172 /// \param __m2 1173 /// A 64-bit integer vector. 1174 /// \returns A 64-bit integer vector containing the bitwise OR of both 1175 /// parameters. 1176 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1177 _mm_or_si64(__m64 __m1, __m64 __m2) 1178 { 1179 return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); 1180 } 1181 1182 /// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1183 /// 1184 /// \headerfile <x86intrin.h> 1185 /// 1186 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1187 /// 1188 /// \param __m1 1189 /// A 64-bit integer vector. 1190 /// \param __m2 1191 /// A 64-bit integer vector. 1192 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1193 /// parameters. 1194 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1195 _mm_xor_si64(__m64 __m1, __m64 __m2) 1196 { 1197 return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); 1198 } 1199 1200 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1201 /// [8 x i8] to determine if the element of the first vector is equal to the 1202 /// corresponding element of the second vector. 1203 /// 1204 /// Each comparison returns 0 for false, 0xFF for true. 1205 /// 1206 /// \headerfile <x86intrin.h> 1207 /// 1208 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1209 /// 1210 /// \param __m1 1211 /// A 64-bit integer vector of [8 x i8]. 1212 /// \param __m2 1213 /// A 64-bit integer vector of [8 x i8]. 1214 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1215 /// results. 1216 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1217 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1218 { 1219 return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); 1220 } 1221 1222 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1223 /// [4 x i16] to determine if the element of the first vector is equal to the 1224 /// corresponding element of the second vector. 1225 /// 1226 /// Each comparison returns 0 for false, 0xFFFF for true. 1227 /// 1228 /// \headerfile <x86intrin.h> 1229 /// 1230 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1231 /// 1232 /// \param __m1 1233 /// A 64-bit integer vector of [4 x i16]. 1234 /// \param __m2 1235 /// A 64-bit integer vector of [4 x i16]. 1236 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1237 /// results. 1238 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1239 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1240 { 1241 return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); 1242 } 1243 1244 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1245 /// [2 x i32] to determine if the element of the first vector is equal to the 1246 /// corresponding element of the second vector. 1247 /// 1248 /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 1249 /// 1250 /// \headerfile <x86intrin.h> 1251 /// 1252 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1253 /// 1254 /// \param __m1 1255 /// A 64-bit integer vector of [2 x i32]. 1256 /// \param __m2 1257 /// A 64-bit integer vector of [2 x i32]. 1258 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1259 /// results. 1260 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1261 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1262 { 1263 return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); 1264 } 1265 1266 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1267 /// [8 x i8] to determine if the element of the first vector is greater than 1268 /// the corresponding element of the second vector. 1269 /// 1270 /// Each comparison returns 0 for false, 0xFF for true. 1271 /// 1272 /// \headerfile <x86intrin.h> 1273 /// 1274 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1275 /// 1276 /// \param __m1 1277 /// A 64-bit integer vector of [8 x i8]. 1278 /// \param __m2 1279 /// A 64-bit integer vector of [8 x i8]. 1280 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1281 /// results. 1282 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1283 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1284 { 1285 /* This function always performs a signed comparison, but __v8qi is a char 1286 which may be signed or unsigned, so use __v8qs. */ 1287 return (__m64)((__v8qs)__m1 > (__v8qs)__m2); 1288 } 1289 1290 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1291 /// [4 x i16] to determine if the element of the first vector is greater than 1292 /// the corresponding element of the second vector. 1293 /// 1294 /// Each comparison returns 0 for false, 0xFFFF for true. 1295 /// 1296 /// \headerfile <x86intrin.h> 1297 /// 1298 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1299 /// 1300 /// \param __m1 1301 /// A 64-bit integer vector of [4 x i16]. 1302 /// \param __m2 1303 /// A 64-bit integer vector of [4 x i16]. 1304 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1305 /// results. 1306 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1307 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1308 { 1309 return (__m64)((__v4hi)__m1 > (__v4hi)__m2); 1310 } 1311 1312 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1313 /// [2 x i32] to determine if the element of the first vector is greater than 1314 /// the corresponding element of the second vector. 1315 /// 1316 /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 1317 /// 1318 /// \headerfile <x86intrin.h> 1319 /// 1320 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1321 /// 1322 /// \param __m1 1323 /// A 64-bit integer vector of [2 x i32]. 1324 /// \param __m2 1325 /// A 64-bit integer vector of [2 x i32]. 1326 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1327 /// results. 1328 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 1329 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1330 { 1331 return (__m64)((__v2si)__m1 > (__v2si)__m2); 1332 } 1333 1334 /// Constructs a 64-bit integer vector initialized to zero. 1335 /// 1336 /// \headerfile <x86intrin.h> 1337 /// 1338 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1339 /// 1340 /// \returns An initialized 64-bit integer vector with all elements set to zero. 1341 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1342 _mm_setzero_si64(void) { 1343 return __extension__(__m64){0LL}; 1344 } 1345 1346 /// Constructs a 64-bit integer vector initialized with the specified 1347 /// 32-bit integer values. 1348 /// 1349 /// \headerfile <x86intrin.h> 1350 /// 1351 /// This intrinsic is a utility function and does not correspond to a specific 1352 /// instruction. 1353 /// 1354 /// \param __i1 1355 /// A 32-bit integer value used to initialize the upper 32 bits of the 1356 /// result. 1357 /// \param __i0 1358 /// A 32-bit integer value used to initialize the lower 32 bits of the 1359 /// result. 1360 /// \returns An initialized 64-bit integer vector. 1361 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1362 _mm_set_pi32(int __i1, int __i0) { 1363 return __extension__(__m64)(__v2si){__i0, __i1}; 1364 } 1365 1366 /// Constructs a 64-bit integer vector initialized with the specified 1367 /// 16-bit integer values. 1368 /// 1369 /// \headerfile <x86intrin.h> 1370 /// 1371 /// This intrinsic is a utility function and does not correspond to a specific 1372 /// instruction. 1373 /// 1374 /// \param __s3 1375 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1376 /// \param __s2 1377 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1378 /// \param __s1 1379 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1380 /// \param __s0 1381 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1382 /// \returns An initialized 64-bit integer vector. 1383 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1384 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { 1385 return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3}; 1386 } 1387 1388 /// Constructs a 64-bit integer vector initialized with the specified 1389 /// 8-bit integer values. 1390 /// 1391 /// \headerfile <x86intrin.h> 1392 /// 1393 /// This intrinsic is a utility function and does not correspond to a specific 1394 /// instruction. 1395 /// 1396 /// \param __b7 1397 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1398 /// \param __b6 1399 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1400 /// \param __b5 1401 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1402 /// \param __b4 1403 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1404 /// \param __b3 1405 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1406 /// \param __b2 1407 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1408 /// \param __b1 1409 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1410 /// \param __b0 1411 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1412 /// \returns An initialized 64-bit integer vector. 1413 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1414 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1415 char __b1, char __b0) { 1416 return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3, 1417 __b4, __b5, __b6, __b7}; 1418 } 1419 1420 /// Constructs a 64-bit integer vector of [2 x i32], with each of the 1421 /// 32-bit integer vector elements set to the specified 32-bit integer 1422 /// value. 1423 /// 1424 /// \headerfile <x86intrin.h> 1425 /// 1426 /// This intrinsic is a utility function and does not correspond to a specific 1427 /// instruction. 1428 /// 1429 /// \param __i 1430 /// A 32-bit integer value used to initialize each vector element of the 1431 /// result. 1432 /// \returns An initialized 64-bit integer vector of [2 x i32]. 1433 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1434 _mm_set1_pi32(int __i) { 1435 return _mm_set_pi32(__i, __i); 1436 } 1437 1438 /// Constructs a 64-bit integer vector of [4 x i16], with each of the 1439 /// 16-bit integer vector elements set to the specified 16-bit integer 1440 /// value. 1441 /// 1442 /// \headerfile <x86intrin.h> 1443 /// 1444 /// This intrinsic is a utility function and does not correspond to a specific 1445 /// instruction. 1446 /// 1447 /// \param __w 1448 /// A 16-bit integer value used to initialize each vector element of the 1449 /// result. 1450 /// \returns An initialized 64-bit integer vector of [4 x i16]. 1451 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1452 _mm_set1_pi16(short __w) { 1453 return _mm_set_pi16(__w, __w, __w, __w); 1454 } 1455 1456 /// Constructs a 64-bit integer vector of [8 x i8], with each of the 1457 /// 8-bit integer vector elements set to the specified 8-bit integer value. 1458 /// 1459 /// \headerfile <x86intrin.h> 1460 /// 1461 /// This intrinsic is a utility function and does not correspond to a specific 1462 /// instruction. 1463 /// 1464 /// \param __b 1465 /// An 8-bit integer value used to initialize each vector element of the 1466 /// result. 1467 /// \returns An initialized 64-bit integer vector of [8 x i8]. 1468 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1469 _mm_set1_pi8(char __b) { 1470 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1471 } 1472 1473 /// Constructs a 64-bit integer vector, initialized in reverse order with 1474 /// the specified 32-bit integer values. 1475 /// 1476 /// \headerfile <x86intrin.h> 1477 /// 1478 /// This intrinsic is a utility function and does not correspond to a specific 1479 /// instruction. 1480 /// 1481 /// \param __i0 1482 /// A 32-bit integer value used to initialize the lower 32 bits of the 1483 /// result. 1484 /// \param __i1 1485 /// A 32-bit integer value used to initialize the upper 32 bits of the 1486 /// result. 1487 /// \returns An initialized 64-bit integer vector. 1488 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1489 _mm_setr_pi32(int __i0, int __i1) { 1490 return _mm_set_pi32(__i1, __i0); 1491 } 1492 1493 /// Constructs a 64-bit integer vector, initialized in reverse order with 1494 /// the specified 16-bit integer values. 1495 /// 1496 /// \headerfile <x86intrin.h> 1497 /// 1498 /// This intrinsic is a utility function and does not correspond to a specific 1499 /// instruction. 1500 /// 1501 /// \param __w0 1502 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1503 /// \param __w1 1504 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1505 /// \param __w2 1506 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1507 /// \param __w3 1508 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1509 /// \returns An initialized 64-bit integer vector. 1510 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1511 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { 1512 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1513 } 1514 1515 /// Constructs a 64-bit integer vector, initialized in reverse order with 1516 /// the specified 8-bit integer values. 1517 /// 1518 /// \headerfile <x86intrin.h> 1519 /// 1520 /// This intrinsic is a utility function and does not correspond to a specific 1521 /// instruction. 1522 /// 1523 /// \param __b0 1524 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1525 /// \param __b1 1526 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1527 /// \param __b2 1528 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1529 /// \param __b3 1530 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1531 /// \param __b4 1532 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1533 /// \param __b5 1534 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1535 /// \param __b6 1536 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1537 /// \param __b7 1538 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1539 /// \returns An initialized 64-bit integer vector. 1540 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR 1541 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1542 char __b6, char __b7) { 1543 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1544 } 1545 1546 #undef __anyext128 1547 #undef __trunc64 1548 #undef __DEFAULT_FN_ATTRS_SSE2 1549 1550 /* Aliases for compatibility. */ 1551 #define _m_empty _mm_empty 1552 #define _m_from_int _mm_cvtsi32_si64 1553 #define _m_from_int64 _mm_cvtsi64_m64 1554 #define _m_to_int _mm_cvtsi64_si32 1555 #define _m_to_int64 _mm_cvtm64_si64 1556 #define _m_packsswb _mm_packs_pi16 1557 #define _m_packssdw _mm_packs_pi32 1558 #define _m_packuswb _mm_packs_pu16 1559 #define _m_punpckhbw _mm_unpackhi_pi8 1560 #define _m_punpckhwd _mm_unpackhi_pi16 1561 #define _m_punpckhdq _mm_unpackhi_pi32 1562 #define _m_punpcklbw _mm_unpacklo_pi8 1563 #define _m_punpcklwd _mm_unpacklo_pi16 1564 #define _m_punpckldq _mm_unpacklo_pi32 1565 #define _m_paddb _mm_add_pi8 1566 #define _m_paddw _mm_add_pi16 1567 #define _m_paddd _mm_add_pi32 1568 #define _m_paddsb _mm_adds_pi8 1569 #define _m_paddsw _mm_adds_pi16 1570 #define _m_paddusb _mm_adds_pu8 1571 #define _m_paddusw _mm_adds_pu16 1572 #define _m_psubb _mm_sub_pi8 1573 #define _m_psubw _mm_sub_pi16 1574 #define _m_psubd _mm_sub_pi32 1575 #define _m_psubsb _mm_subs_pi8 1576 #define _m_psubsw _mm_subs_pi16 1577 #define _m_psubusb _mm_subs_pu8 1578 #define _m_psubusw _mm_subs_pu16 1579 #define _m_pmaddwd _mm_madd_pi16 1580 #define _m_pmulhw _mm_mulhi_pi16 1581 #define _m_pmullw _mm_mullo_pi16 1582 #define _m_psllw _mm_sll_pi16 1583 #define _m_psllwi _mm_slli_pi16 1584 #define _m_pslld _mm_sll_pi32 1585 #define _m_pslldi _mm_slli_pi32 1586 #define _m_psllq _mm_sll_si64 1587 #define _m_psllqi _mm_slli_si64 1588 #define _m_psraw _mm_sra_pi16 1589 #define _m_psrawi _mm_srai_pi16 1590 #define _m_psrad _mm_sra_pi32 1591 #define _m_psradi _mm_srai_pi32 1592 #define _m_psrlw _mm_srl_pi16 1593 #define _m_psrlwi _mm_srli_pi16 1594 #define _m_psrld _mm_srl_pi32 1595 #define _m_psrldi _mm_srli_pi32 1596 #define _m_psrlq _mm_srl_si64 1597 #define _m_psrlqi _mm_srli_si64 1598 #define _m_pand _mm_and_si64 1599 #define _m_pandn _mm_andnot_si64 1600 #define _m_por _mm_or_si64 1601 #define _m_pxor _mm_xor_si64 1602 #define _m_pcmpeqb _mm_cmpeq_pi8 1603 #define _m_pcmpeqw _mm_cmpeq_pi16 1604 #define _m_pcmpeqd _mm_cmpeq_pi32 1605 #define _m_pcmpgtb _mm_cmpgt_pi8 1606 #define _m_pcmpgtw _mm_cmpgt_pi16 1607 #define _m_pcmpgtd _mm_cmpgt_pi32 1608 1609 #endif /* __MMINTRIN_H */ 1610 1611