1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 3 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 4 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 5 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 6 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 7 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 8 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx 9 10 11 #include <immintrin.h> 12 #include "builtin_test_helpers.h" 13 14 __m64 test_mm_abs_pi8(__m64 a) { 15 // CHECK-LABEL: test_mm_abs_pi8 16 // CHECK: call <8 x i8> @llvm.abs.v8i8( 17 return _mm_abs_pi8(a); 18 } 19 20 __m64 test_mm_abs_pi16(__m64 a) { 21 // CHECK-LABEL: test_mm_abs_pi16 22 // CHECK: call <4 x i16> @llvm.abs.v4i16( 23 return _mm_abs_pi16(a); 24 } 25 26 __m64 test_mm_abs_pi32(__m64 a) { 27 // CHECK-LABEL: test_mm_abs_pi32 28 // CHECK: call <2 x i32> @llvm.abs.v2i32( 29 return _mm_abs_pi32(a); 30 } 31 32 __m64 test_mm_add_pi8(__m64 a, __m64 b) { 33 // CHECK-LABEL: test_mm_add_pi8 34 // CHECK: add <8 x i8> {{%.*}}, {{%.*}} 35 return _mm_add_pi8(a, b); 36 } 37 38 __m64 test_mm_add_pi16(__m64 a, __m64 b) { 39 // CHECK-LABEL: test_mm_add_pi16 40 // CHECK: add <4 x i16> {{%.*}}, {{%.*}} 41 return _mm_add_pi16(a, b); 42 } 43 44 __m64 test_mm_add_pi32(__m64 a, __m64 b) { 45 // CHECK-LABEL: test_mm_add_pi32 46 // CHECK: add <2 x i32> {{%.*}}, {{%.*}} 47 return _mm_add_pi32(a, b); 48 } 49 50 __m64 test_mm_add_si64(__m64 a, __m64 b) { 51 // CHECK-LABEL: test_mm_add_si64 52 // CHECK: add i64 {{%.*}}, {{%.*}} 53 return _mm_add_si64(a, b); 54 } 55 56 __m64 test_mm_adds_pi8(__m64 a, __m64 b) { 57 // CHECK-LABEL: test_mm_adds_pi8 58 // CHECK: call <8 x i8> @llvm.sadd.sat.v8i8( 59 return _mm_adds_pi8(a, b); 60 } 61 62 __m64 test_mm_adds_pi16(__m64 a, __m64 b) { 63 // CHECK-LABEL: test_mm_adds_pi16 64 // CHECK: call <4 x i16> @llvm.sadd.sat.v4i16( 65 return _mm_adds_pi16(a, b); 66 } 67 68 __m64 test_mm_adds_pu8(__m64 a, __m64 b) { 69 // CHECK-LABEL: test_mm_adds_pu8 70 // CHECK: call <8 x i8> @llvm.uadd.sat.v8i8( 71 return _mm_adds_pu8(a, b); 72 } 73 74 __m64 test_mm_adds_pu16(__m64 a, __m64 b) { 75 // CHECK-LABEL: test_mm_adds_pu16 76 // CHECK: call <4 x i16> @llvm.uadd.sat.v4i16( 77 return _mm_adds_pu16(a, b); 78 } 79 80 __m64 test_mm_alignr_pi8(__m64 a, __m64 b) { 81 // CHECK-LABEL: test_mm_alignr_pi8 82 // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 83 return _mm_alignr_pi8(a, b, 2); 84 } 85 86 __m64 test_mm_and_si64(__m64 a, __m64 b) { 87 // CHECK-LABEL: test_mm_and_si64 88 // CHECK: and <1 x i64> {{%.*}}, {{%.*}} 89 return _mm_and_si64(a, b); 90 } 91 92 __m64 test_mm_andnot_si64(__m64 a, __m64 b) { 93 // CHECK-LABEL: test_mm_andnot_si64 94 // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, splat (i64 -1) 95 // CHECK: and <1 x i64> [[TMP]], {{%.*}} 96 return _mm_andnot_si64(a, b); 97 } 98 99 __m64 test_mm_avg_pu8(__m64 a, __m64 b) { 100 // CHECK-LABEL: test_mm_avg_pu8 101 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b( 102 return _mm_avg_pu8(a, b); 103 } 104 105 __m64 test_mm_avg_pu16(__m64 a, __m64 b) { 106 // CHECK-LABEL: test_mm_avg_pu16 107 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w( 108 return _mm_avg_pu16(a, b); 109 } 110 111 __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { 112 // CHECK-LABEL: test_mm_cmpeq_pi8 113 // CHECK: [[CMP:%.*]] = icmp eq <8 x i8> {{%.*}}, {{%.*}} 114 // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8> 115 return _mm_cmpeq_pi8(a, b); 116 } 117 118 __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) { 119 // CHECK-LABEL: test_mm_cmpeq_pi16 120 // CHECK: [[CMP:%.*]] = icmp eq <4 x i16> {{%.*}}, {{%.*}} 121 // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16> 122 return _mm_cmpeq_pi16(a, b); 123 } 124 125 __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) { 126 // CHECK-LABEL: test_mm_cmpeq_pi32 127 // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> {{%.*}}, {{%.*}} 128 // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32> 129 return _mm_cmpeq_pi32(a, b); 130 } 131 132 __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) { 133 // CHECK-LABEL: test_mm_cmpgt_pi8 134 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i8> {{%.*}}, {{%.*}} 135 // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8> 136 return _mm_cmpgt_pi8(a, b); 137 } 138 139 __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) { 140 // CHECK-LABEL: test_mm_cmpgt_pi16 141 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i16> {{%.*}}, {{%.*}} 142 // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16> 143 return _mm_cmpgt_pi16(a, b); 144 } 145 146 __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) { 147 // CHECK-LABEL: test_mm_cmpgt_pi32 148 // CHECK: [[CMP:%.*]] = icmp sgt <2 x i32> {{%.*}}, {{%.*}} 149 // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32> 150 return _mm_cmpgt_pi32(a, b); 151 } 152 153 __m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) { 154 // CHECK-LABEL: test_mm_cvt_pi2ps 155 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float> 156 return _mm_cvt_pi2ps(a, b); 157 } 158 159 __m64 test_mm_cvt_ps2pi(__m128 a) { 160 // CHECK-LABEL: test_mm_cvt_ps2pi 161 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq( 162 return _mm_cvt_ps2pi(a); 163 } 164 165 __m64 test_mm_cvtpd_pi32(__m128d a) { 166 // CHECK-LABEL: test_mm_cvtpd_pi32 167 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq( 168 return _mm_cvtpd_pi32(a); 169 } 170 171 __m128 test_mm_cvtpi16_ps(__m64 a) { 172 // CHECK-LABEL: test_mm_cvtpi16_ps 173 // CHECK: sitofp <4 x i16> {{%.*}} to <4 x float> 174 return _mm_cvtpi16_ps(a); 175 } 176 177 __m128d test_mm_cvtpi32_pd(__m64 a) { 178 // CHECK-LABEL: test_mm_cvtpi32_pd 179 // CHECK: sitofp <2 x i32> {{%.*}} to <2 x double> 180 return _mm_cvtpi32_pd(a); 181 } 182 183 __m128 test_mm_cvtpi32_ps(__m128 a, __m64 b) { 184 // CHECK-LABEL: test_mm_cvtpi32_ps 185 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float> 186 return _mm_cvtpi32_ps(a, b); 187 } 188 189 __m128 test_mm_cvtpi32x2_ps(__m64 a, __m64 b) { 190 // CHECK-LABEL: test_mm_cvtpi32x2_ps 191 // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float> 192 return _mm_cvtpi32x2_ps(a, b); 193 } 194 195 __m64 test_mm_cvtps_pi16(__m128 a) { 196 // CHECK-LABEL: test_mm_cvtps_pi16 197 // CHECK: [[TMP0:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> {{%.*}}) 198 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP0]], 199 return _mm_cvtps_pi16(a); 200 } 201 202 __m64 test_mm_cvtps_pi32(__m128 a) { 203 // CHECK-LABEL: test_mm_cvtps_pi32 204 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq( 205 return _mm_cvtps_pi32(a); 206 } 207 208 __m64 test_mm_cvtsi32_si64(int a) { 209 // CHECK-LABEL: test_mm_cvtsi32_si64 210 // CHECK: insertelement <2 x i32> 211 return _mm_cvtsi32_si64(a); 212 } 213 214 int test_mm_cvtsi64_si32(__m64 a) { 215 // CHECK-LABEL: test_mm_cvtsi64_si32 216 // CHECK: extractelement <2 x i32> 217 return _mm_cvtsi64_si32(a); 218 } 219 220 __m64 test_mm_cvttpd_pi32(__m128d a) { 221 // CHECK-LABEL: test_mm_cvttpd_pi32 222 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq( 223 return _mm_cvttpd_pi32(a); 224 } 225 226 __m64 test_mm_cvttps_pi32(__m128 a) { 227 // CHECK-LABEL: test_mm_cvttps_pi32 228 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq( 229 return _mm_cvttps_pi32(a); 230 } 231 232 int test_mm_extract_pi16(__m64 a) { 233 // CHECK-LABEL: test_mm_extract_pi16 234 // CHECK: extractelement <4 x i16> {{%.*}}, i64 2 235 return _mm_extract_pi16(a, 2); 236 } 237 238 __m64 test_m_from_int(int a) { 239 // CHECK-LABEL: test_m_from_int 240 // CHECK: insertelement <2 x i32> 241 return _m_from_int(a); 242 } 243 244 __m64 test_m_from_int64(long long a) { 245 // CHECK-LABEL: test_m_from_int64 246 return _m_from_int64(a); 247 } 248 249 __m64 test_mm_hadd_pi16(__m64 a, __m64 b) { 250 // CHECK-LABEL: test_mm_hadd_pi16 251 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128( 252 return _mm_hadd_pi16(a, b); 253 } 254 255 __m64 test_mm_hadd_pi32(__m64 a, __m64 b) { 256 // CHECK-LABEL: test_mm_hadd_pi32 257 // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128( 258 return _mm_hadd_pi32(a, b); 259 } 260 261 __m64 test_mm_hadds_pi16(__m64 a, __m64 b) { 262 // CHECK-LABEL: test_mm_hadds_pi16 263 // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128( 264 return _mm_hadds_pi16(a, b); 265 } 266 267 __m64 test_mm_hsub_pi16(__m64 a, __m64 b) { 268 // CHECK-LABEL: test_mm_hsub_pi16 269 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128( 270 return _mm_hsub_pi16(a, b); 271 } 272 273 __m64 test_mm_hsub_pi32(__m64 a, __m64 b) { 274 // CHECK-LABEL: test_mm_hsub_pi32 275 // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128( 276 return _mm_hsub_pi32(a, b); 277 } 278 279 __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) { 280 // CHECK-LABEL: test_mm_hsubs_pi16 281 // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128( 282 return _mm_hsubs_pi16(a, b); 283 } 284 285 __m64 test_mm_insert_pi16(__m64 a, int d) { 286 // CHECK-LABEL: test_mm_insert_pi16 287 // CHECK: insertelement <4 x i16> 288 return _mm_insert_pi16(a, d, 2); 289 } 290 291 __m64 test_mm_madd_pi16(__m64 a, __m64 b) { 292 // CHECK-LABEL: test_mm_madd_pi16 293 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd( 294 return _mm_madd_pi16(a, b); 295 } 296 297 __m64 test_mm_maddubs_pi16(__m64 a, __m64 b) { 298 // CHECK-LABEL: test_mm_maddubs_pi16 299 // CHECK: call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128( 300 return _mm_maddubs_pi16(a, b); 301 } 302 303 void test_mm_maskmove_si64(__m64 d, __m64 n, char *p) { 304 // CHECK-LABEL: test_mm_maskmove_si64 305 // CHECK: call void @llvm.x86.sse2.maskmov.dqu( 306 _mm_maskmove_si64(d, n, p); 307 } 308 309 __m64 test_mm_max_pi16(__m64 a, __m64 b) { 310 // CHECK-LABEL: test_mm_max_pi16 311 // CHECK: call <4 x i16> @llvm.smax.v4i16( 312 return _mm_max_pi16(a, b); 313 } 314 315 __m64 test_mm_max_pu8(__m64 a, __m64 b) { 316 // CHECK-LABEL: test_mm_max_pu8 317 // CHECK: call <8 x i8> @llvm.umax.v8i8( 318 return _mm_max_pu8(a, b); 319 } 320 321 __m64 test_mm_min_pi16(__m64 a, __m64 b) { 322 // CHECK-LABEL: test_mm_min_pi16 323 // CHECK: call <4 x i16> @llvm.smin.v4i16( 324 return _mm_min_pi16(a, b); 325 } 326 327 __m64 test_mm_min_pu8(__m64 a, __m64 b) { 328 // CHECK-LABEL: test_mm_min_pu8 329 // CHECK: call <8 x i8> @llvm.umin.v8i8( 330 return _mm_min_pu8(a, b); 331 } 332 333 int test_mm_movemask_pi8(__m64 a) { 334 // CHECK-LABEL: test_mm_movemask_pi8 335 // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128( 336 return _mm_movemask_pi8(a); 337 } 338 339 __m64 test_mm_mul_su32(__m64 a, __m64 b) { 340 // CHECK-LABEL: test_mm_mul_su32 341 // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) 342 // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) 343 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} 344 return _mm_mul_su32(a, b); 345 } 346 347 __m64 test_mm_mulhi_pi16(__m64 a, __m64 b) { 348 // CHECK-LABEL: test_mm_mulhi_pi16 349 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w( 350 return _mm_mulhi_pi16(a, b); 351 } 352 353 __m64 test_mm_mulhi_pu16(__m64 a, __m64 b) { 354 // CHECK-LABEL: test_mm_mulhi_pu16 355 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w( 356 return _mm_mulhi_pu16(a, b); 357 } 358 359 __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) { 360 // CHECK-LABEL: test_mm_mulhrs_pi16 361 // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128( 362 return _mm_mulhrs_pi16(a, b); 363 } 364 365 __m64 test_mm_mullo_pi16(__m64 a, __m64 b) { 366 // CHECK-LABEL: test_mm_mullo_pi16 367 // CHECK: mul <4 x i16> {{%.*}}, {{%.*}} 368 return _mm_mullo_pi16(a, b); 369 } 370 371 __m64 test_mm_or_si64(__m64 a, __m64 b) { 372 // CHECK-LABEL: test_mm_or_si64 373 // CHECK: or <1 x i64> {{%.*}}, {{%.*}} 374 return _mm_or_si64(a, b); 375 } 376 377 __m64 test_mm_packs_pi16(__m64 a, __m64 b) { 378 // CHECK-LABEL: test_mm_packs_pi16 379 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128( 380 return _mm_packs_pi16(a, b); 381 } 382 383 __m64 test_mm_packs_pi32(__m64 a, __m64 b) { 384 // CHECK-LABEL: test_mm_packs_pi32 385 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128( 386 return _mm_packs_pi32(a, b); 387 } 388 389 __m64 test_mm_packs_pu16(__m64 a, __m64 b) { 390 // CHECK-LABEL: test_mm_packs_pu16 391 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128( 392 return _mm_packs_pu16(a, b); 393 } 394 395 __m64 test_mm_sad_pu8(__m64 a, __m64 b) { 396 // CHECK-LABEL: test_mm_sad_pu8 397 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> 398 return _mm_sad_pu8(a, b); 399 } 400 401 __m64 test_mm_set_pi8(char a, char b, char c, char d, char e, char f, char g, char h) { 402 // CHECK-LABEL: test_mm_set_pi8 403 // CHECK: insertelement <8 x i8> 404 // CHECK: insertelement <8 x i8> 405 // CHECK: insertelement <8 x i8> 406 // CHECK: insertelement <8 x i8> 407 // CHECK: insertelement <8 x i8> 408 // CHECK: insertelement <8 x i8> 409 // CHECK: insertelement <8 x i8> 410 // CHECK: insertelement <8 x i8> 411 return _mm_set_pi8(a, b, c, d, e, f, g, h); 412 } 413 TEST_CONSTEXPR(match_v8qi(_mm_set_pi8(0, -1, 2, -3, 4, -5, 6, -7), -7, 6, -5, 4, -3, 2, -1, 0)); 414 415 __m64 test_mm_set_pi16(short a, short b, short c, short d) { 416 // CHECK-LABEL: test_mm_set_pi16 417 // CHECK: insertelement <4 x i16> 418 // CHECK: insertelement <4 x i16> 419 // CHECK: insertelement <4 x i16> 420 // CHECK: insertelement <4 x i16> 421 return _mm_set_pi16(a, b, c, d); 422 } 423 TEST_CONSTEXPR(match_v4hi(_mm_set_pi16(101, 102, -103, -104), -104, -103, 102, 101)); 424 425 __m64 test_mm_set_pi32(int a, int b) { 426 // CHECK-LABEL: test_mm_set_pi32 427 // CHECK: insertelement <2 x i32> 428 // CHECK: insertelement <2 x i32> 429 return _mm_set_pi32(a, b); 430 } 431 TEST_CONSTEXPR(match_v2si(_mm_set_pi32(5000, -1500), -1500, 5000)); 432 433 __m64 test_mm_setr_pi8(char a, char b, char c, char d, char e, char f, char g, char h) { 434 // CHECK-LABEL: test_mm_setr_pi8 435 // CHECK: insertelement <8 x i8> 436 // CHECK: insertelement <8 x i8> 437 // CHECK: insertelement <8 x i8> 438 // CHECK: insertelement <8 x i8> 439 // CHECK: insertelement <8 x i8> 440 // CHECK: insertelement <8 x i8> 441 // CHECK: insertelement <8 x i8> 442 // CHECK: insertelement <8 x i8> 443 return _mm_setr_pi8(a, b, c, d, e, f, g, h); 444 } 445 TEST_CONSTEXPR(match_v8qi(_mm_setr_pi8(0, -1, 2, -3, 4, -5, 6, -7), 0, -1, 2, -3, 4, -5, 6, -7)); 446 447 __m64 test_mm_setr_pi16(short a, short b, short c, short d) { 448 // CHECK-LABEL: test_mm_setr_pi16 449 // CHECK: insertelement <4 x i16> 450 // CHECK: insertelement <4 x i16> 451 // CHECK: insertelement <4 x i16> 452 // CHECK: insertelement <4 x i16> 453 return _mm_setr_pi16(a, b, c, d); 454 } 455 TEST_CONSTEXPR(match_v4hi(_mm_setr_pi16(101, 102, -103, -104), 101, 102, -103, -104)); 456 457 __m64 test_mm_setr_pi32(int a, int b) { 458 // CHECK-LABEL: test_mm_setr_pi32 459 // CHECK: insertelement <2 x i32> 460 // CHECK: insertelement <2 x i32> 461 return _mm_setr_pi32(a, b); 462 } 463 TEST_CONSTEXPR(match_v2si(_mm_setr_pi32(5000, -1500), 5000, -1500)); 464 465 __m64 test_mm_setzero_si64() { 466 // CHECK-LABEL: test_mm_setzero_si64 467 // CHECK: zeroinitializer 468 return _mm_setzero_si64(); 469 } 470 TEST_CONSTEXPR(match_m64(_mm_setzero_si64(), 0ULL)); 471 472 __m64 test_mm_set1_pi8(char a) { 473 // CHECK-LABEL: test_mm_set1_pi8 474 // CHECK: insertelement <8 x i8> 475 // CHECK: insertelement <8 x i8> 476 // CHECK: insertelement <8 x i8> 477 // CHECK: insertelement <8 x i8> 478 // CHECK: insertelement <8 x i8> 479 // CHECK: insertelement <8 x i8> 480 // CHECK: insertelement <8 x i8> 481 // CHECK: insertelement <8 x i8> 482 return _mm_set1_pi8(a); 483 } 484 TEST_CONSTEXPR(match_v8qi(_mm_set1_pi8(99), 99, 99, 99, 99, 99, 99, 99, 99)); 485 486 __m64 test_mm_set1_pi16(short a) { 487 // CHECK-LABEL: test_mm_set1_pi16 488 // CHECK: insertelement <4 x i16> 489 // CHECK: insertelement <4 x i16> 490 // CHECK: insertelement <4 x i16> 491 // CHECK: insertelement <4 x i16> 492 return _mm_set1_pi16(a); 493 } 494 TEST_CONSTEXPR(match_v4hi(_mm_set1_pi16(-128), -128, -128, -128, -128)); 495 496 __m64 test_mm_set1_pi32(int a) { 497 // CHECK-LABEL: test_mm_set1_pi32 498 // CHECK: insertelement <2 x i32> 499 // CHECK: insertelement <2 x i32> 500 return _mm_set1_pi32(a); 501 } 502 TEST_CONSTEXPR(match_v2si(_mm_set1_pi32(55), 55, 55)); 503 504 __m64 test_mm_shuffle_pi8(__m64 a, __m64 b) { 505 // CHECK-LABEL: test_mm_shuffle_pi8 506 // CHECK: call <16 x i8> @llvm.x86.ssse3.pshuf.b.128( 507 return _mm_shuffle_pi8(a, b); 508 } 509 510 __m64 test_mm_shuffle_pi16(__m64 a) { 511 // CHECK-LABEL: test_mm_shuffle_pi16 512 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 513 return _mm_shuffle_pi16(a, 3); 514 } 515 516 __m64 test_mm_sign_pi8(__m64 a, __m64 b) { 517 // CHECK-LABEL: test_mm_sign_pi8 518 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128( 519 return _mm_sign_pi8(a, b); 520 } 521 522 __m64 test_mm_sign_pi16(__m64 a, __m64 b) { 523 // CHECK-LABEL: test_mm_sign_pi16 524 // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128( 525 return _mm_sign_pi16(a, b); 526 } 527 528 __m64 test_mm_sign_pi32(__m64 a, __m64 b) { 529 // CHECK-LABEL: test_mm_sign_pi32 530 // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128( 531 return _mm_sign_pi32(a, b); 532 } 533 534 __m64 test_mm_sll_pi16(__m64 a, __m64 b) { 535 // CHECK-LABEL: test_mm_sll_pi16 536 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w( 537 return _mm_sll_pi16(a, b); 538 } 539 540 __m64 test_mm_sll_pi32(__m64 a, __m64 b) { 541 // CHECK-LABEL: test_mm_sll_pi32 542 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d( 543 return _mm_sll_pi32(a, b); 544 } 545 546 __m64 test_mm_sll_si64(__m64 a, __m64 b) { 547 // CHECK-LABEL: test_mm_sll_si64 548 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q( 549 return _mm_sll_si64(a, b); 550 } 551 552 __m64 test_mm_slli_pi16(__m64 a) { 553 // CHECK-LABEL: test_mm_slli_pi16 554 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w( 555 return _mm_slli_pi16(a, 3); 556 } 557 558 __m64 test_mm_slli_pi32(__m64 a) { 559 // CHECK-LABEL: test_mm_slli_pi32 560 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d( 561 return _mm_slli_pi32(a, 3); 562 } 563 564 __m64 test_mm_slli_si64(__m64 a) { 565 // CHECK-LABEL: test_mm_slli_si64 566 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q( 567 return _mm_slli_si64(a, 3); 568 } 569 570 __m64 test_mm_sra_pi16(__m64 a, __m64 b) { 571 // CHECK-LABEL: test_mm_sra_pi16 572 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w( 573 return _mm_sra_pi16(a, b); 574 } 575 576 __m64 test_mm_sra_pi32(__m64 a, __m64 b) { 577 // CHECK-LABEL: test_mm_sra_pi32 578 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d( 579 return _mm_sra_pi32(a, b); 580 } 581 582 __m64 test_mm_srai_pi16(__m64 a) { 583 // CHECK-LABEL: test_mm_srai_pi16 584 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w( 585 return _mm_srai_pi16(a, 3); 586 } 587 588 __m64 test_mm_srai_pi32(__m64 a) { 589 // CHECK-LABEL: test_mm_srai_pi32 590 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d( 591 return _mm_srai_pi32(a, 3); 592 } 593 594 __m64 test_mm_srl_pi16(__m64 a, __m64 b) { 595 // CHECK-LABEL: test_mm_srl_pi16 596 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w( 597 return _mm_srl_pi16(a, b); 598 } 599 600 __m64 test_mm_srl_pi32(__m64 a, __m64 b) { 601 // CHECK-LABEL: test_mm_srl_pi32 602 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d( 603 return _mm_srl_pi32(a, b); 604 } 605 606 __m64 test_mm_srl_si64(__m64 a, __m64 b) { 607 // CHECK-LABEL: test_mm_srl_si64 608 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q( 609 return _mm_srl_si64(a, b); 610 } 611 612 __m64 test_mm_srli_pi16(__m64 a) { 613 // CHECK-LABEL: test_mm_srli_pi16 614 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w( 615 return _mm_srli_pi16(a, 3); 616 } 617 618 __m64 test_mm_srli_pi32(__m64 a) { 619 // CHECK-LABEL: test_mm_srli_pi32 620 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d( 621 return _mm_srli_pi32(a, 3); 622 } 623 624 __m64 test_mm_srli_si64(__m64 a) { 625 // CHECK-LABEL: test_mm_srli_si64 626 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q( 627 return _mm_srli_si64(a, 3); 628 } 629 630 void test_mm_stream_pi(__m64 *p, __m64 a) { 631 // CHECK-LABEL: test_mm_stream_pi 632 // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal 633 _mm_stream_pi(p, a); 634 } 635 636 void test_mm_stream_pi_void(void *p, __m64 a) { 637 // CHECK-LABEL: test_mm_stream_pi_void 638 // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal 639 _mm_stream_pi(p, a); 640 } 641 642 __m64 test_mm_sub_pi8(__m64 a, __m64 b) { 643 // CHECK-LABEL: test_mm_sub_pi8 644 // CHECK: sub <8 x i8> {{%.*}}, {{%.*}} 645 return _mm_sub_pi8(a, b); 646 } 647 648 __m64 test_mm_sub_pi16(__m64 a, __m64 b) { 649 // CHECK-LABEL: test_mm_sub_pi16 650 // CHECK: sub <4 x i16> {{%.*}}, {{%.*}} 651 return _mm_sub_pi16(a, b); 652 } 653 654 __m64 test_mm_sub_pi32(__m64 a, __m64 b) { 655 // CHECK-LABEL: test_mm_sub_pi32 656 // CHECK: sub <2 x i32> {{%.*}}, {{%.*}} 657 return _mm_sub_pi32(a, b); 658 } 659 660 __m64 test_mm_sub_si64(__m64 a, __m64 b) { 661 // CHECK-LABEL: test_mm_sub_si64 662 // CHECK: sub i64 {{%.*}}, {{%.*}} 663 return _mm_sub_si64(a, b); 664 } 665 666 __m64 test_mm_subs_pi8(__m64 a, __m64 b) { 667 // CHECK-LABEL: test_mm_subs_pi8 668 // CHECK: call <8 x i8> @llvm.ssub.sat.v8i8( 669 return _mm_subs_pi8(a, b); 670 } 671 672 __m64 test_mm_subs_pi16(__m64 a, __m64 b) { 673 // CHECK-LABEL: test_mm_subs_pi16 674 // CHECK: call <4 x i16> @llvm.ssub.sat.v4i16( 675 return _mm_subs_pi16(a, b); 676 } 677 678 __m64 test_mm_subs_pu8(__m64 a, __m64 b) { 679 // CHECK-LABEL: test_mm_subs_pu8 680 // CHECK: call <8 x i8> @llvm.usub.sat.v8i8( 681 return _mm_subs_pu8(a, b); 682 } 683 684 __m64 test_mm_subs_pu16(__m64 a, __m64 b) { 685 // CHECK-LABEL: test_mm_subs_pu16 686 // CHECK: call <4 x i16> @llvm.usub.sat.v4i16( 687 return _mm_subs_pu16(a, b); 688 } 689 690 int test_m_to_int(__m64 a) { 691 // CHECK-LABEL: test_m_to_int 692 // CHECK: extractelement <2 x i32> 693 return _m_to_int(a); 694 } 695 696 long long test_m_to_int64(__m64 a) { 697 // CHECK-LABEL: test_m_to_int64 698 return _m_to_int64(a); 699 } 700 701 __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) { 702 // CHECK-LABEL: test_mm_unpackhi_pi8 703 // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 704 return _mm_unpackhi_pi8(a, b); 705 } 706 707 __m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) { 708 // CHECK-LABEL: test_mm_unpackhi_pi16 709 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 710 return _mm_unpackhi_pi16(a, b); 711 } 712 713 __m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) { 714 // CHECK-LABEL: test_mm_unpackhi_pi32 715 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 3> 716 return _mm_unpackhi_pi32(a, b); 717 } 718 719 __m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) { 720 // CHECK-LABEL: test_mm_unpacklo_pi8 721 // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 722 return _mm_unpacklo_pi8(a, b); 723 } 724 725 __m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) { 726 // CHECK-LABEL: test_mm_unpacklo_pi16 727 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 728 return _mm_unpacklo_pi16(a, b); 729 } 730 731 __m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) { 732 // CHECK-LABEL: test_mm_unpacklo_pi32 733 // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 0, i32 2> 734 return _mm_unpacklo_pi32(a, b); 735 } 736 737 __m64 test_mm_xor_si64(__m64 a, __m64 b) { 738 // CHECK-LABEL: test_mm_xor_si64 739 // CHECK: xor <1 x i64> {{%.*}}, {{%.*}} 740 return _mm_xor_si64(a, b); 741 } 742