1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 3 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 4 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 5 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 6 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 7 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 8 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 9 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 10 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 11 12 13 #include <immintrin.h> 14 #include "builtin_test_helpers.h" 15 16 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll 17 18 __m128i test_mm_add_epi8(__m128i A, __m128i B) { 19 // CHECK-LABEL: test_mm_add_epi8 20 // CHECK: add <16 x i8> 21 return _mm_add_epi8(A, B); 22 } 23 24 __m128i test_mm_add_epi16(__m128i A, __m128i B) { 25 // CHECK-LABEL: test_mm_add_epi16 26 // CHECK: add <8 x i16> 27 return _mm_add_epi16(A, B); 28 } 29 30 __m128i test_mm_add_epi32(__m128i A, __m128i B) { 31 // CHECK-LABEL: test_mm_add_epi32 32 // CHECK: add <4 x i32> 33 return _mm_add_epi32(A, B); 34 } 35 TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8)); 36 37 __m128i test_mm_add_epi64(__m128i A, __m128i B) { 38 // CHECK-LABEL: test_mm_add_epi64 39 // CHECK: add <2 x i64> 40 return _mm_add_epi64(A, B); 41 } 42 TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5)); 43 44 __m128d test_mm_add_pd(__m128d A, __m128d B) { 45 // CHECK-LABEL: test_mm_add_pd 46 // CHECK: fadd <2 x double> 47 return _mm_add_pd(A, B); 48 } 49 TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0)); 50 51 __m128d test_mm_add_sd(__m128d A, __m128d B) { 52 // CHECK-LABEL: test_mm_add_sd 53 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 54 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 55 // CHECK: fadd double 56 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 57 return _mm_add_sd(A, B); 58 } 59 TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0)); 60 61 __m128i test_mm_adds_epi8(__m128i A, __m128i B) { 62 // CHECK-LABEL: test_mm_adds_epi8 63 // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 64 return _mm_adds_epi8(A, B); 65 } 66 67 __m128i test_mm_adds_epi16(__m128i A, __m128i B) { 68 // CHECK-LABEL: test_mm_adds_epi16 69 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 70 return _mm_adds_epi16(A, B); 71 } 72 73 __m128i test_mm_adds_epu8(__m128i A, __m128i B) { 74 // CHECK-LABEL: test_mm_adds_epu8 75 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 76 // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 77 return _mm_adds_epu8(A, B); 78 } 79 80 __m128i test_mm_adds_epu16(__m128i A, __m128i B) { 81 // CHECK-LABEL: test_mm_adds_epu16 82 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 83 // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 84 return _mm_adds_epu16(A, B); 85 } 86 87 __m128d test_mm_and_pd(__m128d A, __m128d B) { 88 // CHECK-LABEL: test_mm_and_pd 89 // CHECK: and <2 x i64> 90 return _mm_and_pd(A, B); 91 } 92 TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0)); 93 94 __m128i test_mm_and_si128(__m128i A, __m128i B) { 95 // CHECK-LABEL: test_mm_and_si128 96 // CHECK: and <2 x i64> 97 return _mm_and_si128(A, B); 98 } 99 100 __m128d test_mm_andnot_pd(__m128d A, __m128d B) { 101 // CHECK-LABEL: test_mm_andnot_pd 102 // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) 103 // CHECK: and <2 x i64> 104 return _mm_andnot_pd(A, B); 105 } 106 TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0)); 107 108 __m128i test_mm_andnot_si128(__m128i A, __m128i B) { 109 // CHECK-LABEL: test_mm_andnot_si128 110 // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) 111 // CHECK: and <2 x i64> 112 return _mm_andnot_si128(A, B); 113 } 114 115 __m128i test_mm_avg_epu8(__m128i A, __m128i B) { 116 // CHECK-LABEL: test_mm_avg_epu8 117 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 118 return _mm_avg_epu8(A, B); 119 } 120 121 __m128i test_mm_avg_epu16(__m128i A, __m128i B) { 122 // CHECK-LABEL: test_mm_avg_epu16 123 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 124 return _mm_avg_epu16(A, B); 125 } 126 127 __m128i test_mm_bslli_si128(__m128i A) { 128 // CHECK-LABEL: test_mm_bslli_si128 129 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 130 return _mm_bslli_si128(A, 5); 131 } 132 133 __m128i test_mm_bsrli_si128(__m128i A) { 134 // CHECK-LABEL: test_mm_bsrli_si128 135 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 136 return _mm_bsrli_si128(A, 5); 137 } 138 139 __m128 test_mm_castpd_ps(__m128d A) { 140 // CHECK-LABEL: test_mm_castpd_ps 141 return _mm_castpd_ps(A); 142 } 143 TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f)); 144 145 __m128i test_mm_castpd_si128(__m128d A) { 146 // CHECK-LABEL: test_mm_castpd_si128 147 return _mm_castpd_si128(A); 148 } 149 TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL)); 150 151 __m128d test_mm_castps_pd(__m128 A) { 152 // CHECK-LABEL: test_mm_castps_pd 153 return _mm_castps_pd(A); 154 } 155 TEST_CONSTEXPR(match_m128d(_mm_castps_pd((__m128){0.0f, -1.0f, 0.0f, 4.0f}), -0.0078125, 512.0)); 156 157 __m128i test_mm_castps_si128(__m128 A) { 158 // CHECK-LABEL: test_mm_castps_si128 159 return _mm_castps_si128(A); 160 } 161 TEST_CONSTEXPR(match_m128i(_mm_castps_si128((__m128){1.0f, -2.0f, -4.0f, 8.0f}), 0xC00000003F800000ULL, 0x41000000c0800000ULL)); 162 163 __m128d test_mm_castsi128_pd(__m128i A) { 164 // CHECK-LABEL: test_mm_castsi128_pd 165 return _mm_castsi128_pd(A); 166 } 167 TEST_CONSTEXPR(match_m128d(_mm_castsi128_pd((__m128i)(__v2du){0x4070000000000000ULL, 0xC000000000000000ULL}), 256.0, -2.0)); 168 169 __m128 test_mm_castsi128_ps(__m128i A) { 170 // CHECK-LABEL: test_mm_castsi128_ps 171 return _mm_castsi128_ps(A); 172 } 173 TEST_CONSTEXPR(match_m128(_mm_castsi128_ps((__m128i)(__v2du){0x42000000c1800000ULL, 0x43000000c2800000ULL}), -16.0f, 32.0f, -64.0f, 128.0f)); 174 175 void test_mm_clflush(void* A) { 176 // CHECK-LABEL: test_mm_clflush 177 // CHECK: call void @llvm.x86.sse2.clflush(ptr %{{.*}}) 178 _mm_clflush(A); 179 } 180 181 __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { 182 // CHECK-LABEL: test_mm_cmp_pd_eq_oq 183 // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} 184 return _mm_cmp_pd(a, b, _CMP_EQ_OQ); 185 } 186 187 __m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) { 188 // CHECK-LABEL: test_mm_cmp_pd_lt_os 189 // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} 190 return _mm_cmp_pd(a, b, _CMP_LT_OS); 191 } 192 193 __m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) { 194 // CHECK-LABEL: test_mm_cmp_pd_le_os 195 // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} 196 return _mm_cmp_pd(a, b, _CMP_LE_OS); 197 } 198 199 __m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) { 200 // CHECK-LABEL: test_mm_cmp_pd_unord_q 201 // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} 202 return _mm_cmp_pd(a, b, _CMP_UNORD_Q); 203 } 204 205 __m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) { 206 // CHECK-LABEL: test_mm_cmp_pd_neq_uq 207 // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} 208 return _mm_cmp_pd(a, b, _CMP_NEQ_UQ); 209 } 210 211 __m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) { 212 // CHECK-LABEL: test_mm_cmp_pd_nlt_us 213 // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} 214 return _mm_cmp_pd(a, b, _CMP_NLT_US); 215 } 216 217 __m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) { 218 // CHECK-LABEL: test_mm_cmp_pd_nle_us 219 // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} 220 return _mm_cmp_pd(a, b, _CMP_NLE_US); 221 } 222 223 __m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) { 224 // CHECK-LABEL: test_mm_cmp_pd_ord_q 225 // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} 226 return _mm_cmp_pd(a, b, _CMP_ORD_Q); 227 } 228 229 __m128d test_mm_cmp_sd(__m128d A, __m128d B) { 230 // CHECK-LABEL: test_mm_cmp_sd 231 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) 232 return _mm_cmp_sd(A, B, _CMP_ORD_Q); 233 } 234 235 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) { 236 // CHECK-LABEL: test_mm_cmpeq_epi8 237 // CHECK: icmp eq <16 x i8> 238 return _mm_cmpeq_epi8(A, B); 239 } 240 241 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) { 242 // CHECK-LABEL: test_mm_cmpeq_epi16 243 // CHECK: icmp eq <8 x i16> 244 return _mm_cmpeq_epi16(A, B); 245 } 246 247 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) { 248 // CHECK-LABEL: test_mm_cmpeq_epi32 249 // CHECK: icmp eq <4 x i32> 250 return _mm_cmpeq_epi32(A, B); 251 } 252 253 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) { 254 // CHECK-LABEL: test_mm_cmpeq_pd 255 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> 256 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 257 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 258 return _mm_cmpeq_pd(A, B); 259 } 260 261 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) { 262 // CHECK-LABEL: test_mm_cmpeq_sd 263 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) 264 return _mm_cmpeq_sd(A, B); 265 } 266 267 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) { 268 // CHECK-LABEL: test_mm_cmpge_pd 269 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> 270 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 271 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 272 return _mm_cmpge_pd(A, B); 273 } 274 275 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) { 276 // CHECK-LABEL: test_mm_cmpge_sd 277 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 278 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 279 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 280 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 281 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 282 return _mm_cmpge_sd(A, B); 283 } 284 285 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) { 286 // CHECK-LABEL: test_mm_cmpgt_epi8 287 // CHECK: icmp sgt <16 x i8> 288 return _mm_cmpgt_epi8(A, B); 289 } 290 291 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) { 292 // CHECK-LABEL: test_mm_cmpgt_epi16 293 // CHECK: icmp sgt <8 x i16> 294 return _mm_cmpgt_epi16(A, B); 295 } 296 297 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) { 298 // CHECK-LABEL: test_mm_cmpgt_epi32 299 // CHECK: icmp sgt <4 x i32> 300 return _mm_cmpgt_epi32(A, B); 301 } 302 303 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) { 304 // CHECK-LABEL: test_mm_cmpgt_pd 305 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> 306 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 307 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 308 return _mm_cmpgt_pd(A, B); 309 } 310 311 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) { 312 // CHECK-LABEL: test_mm_cmpgt_sd 313 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 314 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 315 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 316 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 317 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 318 return _mm_cmpgt_sd(A, B); 319 } 320 321 __m128d test_mm_cmple_pd(__m128d A, __m128d B) { 322 // CHECK-LABEL: test_mm_cmple_pd 323 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> 324 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 325 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 326 return _mm_cmple_pd(A, B); 327 } 328 329 __m128d test_mm_cmple_sd(__m128d A, __m128d B) { 330 // CHECK-LABEL: test_mm_cmple_sd 331 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 332 return _mm_cmple_sd(A, B); 333 } 334 335 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) { 336 // CHECK-LABEL: test_mm_cmplt_epi8 337 // CHECK: icmp sgt <16 x i8> 338 return _mm_cmplt_epi8(A, B); 339 } 340 341 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) { 342 // CHECK-LABEL: test_mm_cmplt_epi16 343 // CHECK: icmp sgt <8 x i16> 344 return _mm_cmplt_epi16(A, B); 345 } 346 347 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) { 348 // CHECK-LABEL: test_mm_cmplt_epi32 349 // CHECK: icmp sgt <4 x i32> 350 return _mm_cmplt_epi32(A, B); 351 } 352 353 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) { 354 // CHECK-LABEL: test_mm_cmplt_pd 355 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> 356 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 357 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 358 return _mm_cmplt_pd(A, B); 359 } 360 361 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) { 362 // CHECK-LABEL: test_mm_cmplt_sd 363 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 364 return _mm_cmplt_sd(A, B); 365 } 366 367 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) { 368 // CHECK-LABEL: test_mm_cmpneq_pd 369 // CHECK: [[CMP:%.*]] = fcmp une <2 x double> 370 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 371 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 372 return _mm_cmpneq_pd(A, B); 373 } 374 375 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) { 376 // CHECK-LABEL: test_mm_cmpneq_sd 377 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4) 378 return _mm_cmpneq_sd(A, B); 379 } 380 381 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) { 382 // CHECK-LABEL: test_mm_cmpnge_pd 383 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> 384 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 385 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 386 return _mm_cmpnge_pd(A, B); 387 } 388 389 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) { 390 // CHECK-LABEL: test_mm_cmpnge_sd 391 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 392 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 393 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 394 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 395 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 396 return _mm_cmpnge_sd(A, B); 397 } 398 399 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) { 400 // CHECK-LABEL: test_mm_cmpngt_pd 401 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> 402 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 403 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 404 return _mm_cmpngt_pd(A, B); 405 } 406 407 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) { 408 // CHECK-LABEL: test_mm_cmpngt_sd 409 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 410 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 411 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 412 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 413 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 414 return _mm_cmpngt_sd(A, B); 415 } 416 417 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) { 418 // CHECK-LABEL: test_mm_cmpnle_pd 419 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> 420 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 421 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 422 return _mm_cmpnle_pd(A, B); 423 } 424 425 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) { 426 // CHECK-LABEL: test_mm_cmpnle_sd 427 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 428 return _mm_cmpnle_sd(A, B); 429 } 430 431 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) { 432 // CHECK-LABEL: test_mm_cmpnlt_pd 433 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> 434 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 435 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 436 return _mm_cmpnlt_pd(A, B); 437 } 438 439 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) { 440 // CHECK-LABEL: test_mm_cmpnlt_sd 441 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 442 return _mm_cmpnlt_sd(A, B); 443 } 444 445 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) { 446 // CHECK-LABEL: test_mm_cmpord_pd 447 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double> 448 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 449 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 450 return _mm_cmpord_pd(A, B); 451 } 452 453 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) { 454 // CHECK-LABEL: test_mm_cmpord_sd 455 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) 456 return _mm_cmpord_sd(A, B); 457 } 458 459 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) { 460 // CHECK-LABEL: test_mm_cmpunord_pd 461 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double> 462 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 463 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 464 return _mm_cmpunord_pd(A, B); 465 } 466 467 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) { 468 // CHECK-LABEL: test_mm_cmpunord_sd 469 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3) 470 return _mm_cmpunord_sd(A, B); 471 } 472 473 int test_mm_comieq_sd(__m128d A, __m128d B) { 474 // CHECK-LABEL: test_mm_comieq_sd 475 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 476 return _mm_comieq_sd(A, B); 477 } 478 479 int test_mm_comige_sd(__m128d A, __m128d B) { 480 // CHECK-LABEL: test_mm_comige_sd 481 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 482 return _mm_comige_sd(A, B); 483 } 484 485 int test_mm_comigt_sd(__m128d A, __m128d B) { 486 // CHECK-LABEL: test_mm_comigt_sd 487 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 488 return _mm_comigt_sd(A, B); 489 } 490 491 int test_mm_comile_sd(__m128d A, __m128d B) { 492 // CHECK-LABEL: test_mm_comile_sd 493 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 494 return _mm_comile_sd(A, B); 495 } 496 497 int test_mm_comilt_sd(__m128d A, __m128d B) { 498 // CHECK-LABEL: test_mm_comilt_sd 499 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 500 return _mm_comilt_sd(A, B); 501 } 502 503 int test_mm_comineq_sd(__m128d A, __m128d B) { 504 // CHECK-LABEL: test_mm_comineq_sd 505 // CHECK: call {{.*}}i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 506 return _mm_comineq_sd(A, B); 507 } 508 509 __m128d test_mm_cvtepi32_pd(__m128i A) { 510 // CHECK-LABEL: test_mm_cvtepi32_pd 511 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> 512 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> 513 return _mm_cvtepi32_pd(A); 514 } 515 TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0)); 516 517 __m128 test_mm_cvtepi32_ps(__m128i A) { 518 // CHECK-LABEL: test_mm_cvtepi32_ps 519 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> 520 return _mm_cvtepi32_ps(A); 521 } 522 TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f)); 523 524 __m128i test_mm_cvtpd_epi32(__m128d A) { 525 // CHECK-LABEL: test_mm_cvtpd_epi32 526 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}}) 527 return _mm_cvtpd_epi32(A); 528 } 529 530 __m128 test_mm_cvtpd_ps(__m128d A) { 531 // CHECK-LABEL: test_mm_cvtpd_ps 532 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}}) 533 return _mm_cvtpd_ps(A); 534 } 535 536 __m128i test_mm_cvtps_epi32(__m128 A) { 537 // CHECK-LABEL: test_mm_cvtps_epi32 538 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}}) 539 return _mm_cvtps_epi32(A); 540 } 541 542 __m128d test_mm_cvtps_pd(__m128 A) { 543 // CHECK-LABEL: test_mm_cvtps_pd 544 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1> 545 // CHECK: fpext <2 x float> %{{.*}} to <2 x double> 546 return _mm_cvtps_pd(A); 547 } 548 TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0)); 549 550 double test_mm_cvtsd_f64(__m128d A) { 551 // CHECK-LABEL: test_mm_cvtsd_f64 552 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 553 return _mm_cvtsd_f64(A); 554 } 555 TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0); 556 557 int test_mm_cvtsd_si32(__m128d A) { 558 // CHECK-LABEL: test_mm_cvtsd_si32 559 // CHECK: call {{.*}}i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}}) 560 return _mm_cvtsd_si32(A); 561 } 562 563 #ifdef __x86_64__ 564 long long test_mm_cvtsd_si64(__m128d A) { 565 // X64-LABEL: test_mm_cvtsd_si64 566 // X64: call {{.*}}i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}}) 567 return _mm_cvtsd_si64(A); 568 } 569 #endif 570 571 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { 572 // CHECK-LABEL: test_mm_cvtsd_ss 573 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}}) 574 return _mm_cvtsd_ss(A, B); 575 } 576 577 int test_mm_cvtsi128_si32(__m128i A) { 578 // CHECK-LABEL: test_mm_cvtsi128_si32 579 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 580 return _mm_cvtsi128_si32(A); 581 } 582 583 long long test_mm_cvtsi128_si64(__m128i A) { 584 // CHECK-LABEL: test_mm_cvtsi128_si64 585 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0 586 return _mm_cvtsi128_si64(A); 587 } 588 589 __m128d test_mm_cvtsi32_sd(__m128d A, int B) { 590 // CHECK-LABEL: test_mm_cvtsi32_sd 591 // CHECK: sitofp i32 %{{.*}} to double 592 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 593 return _mm_cvtsi32_sd(A, B); 594 } 595 TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0)); 596 597 __m128i test_mm_cvtsi32_si128(int A) { 598 // CHECK-LABEL: test_mm_cvtsi32_si128 599 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0 600 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1 601 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2 602 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3 603 return _mm_cvtsi32_si128(A); 604 } 605 606 #ifdef __x86_64__ 607 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) { 608 // X64-LABEL: test_mm_cvtsi64_sd 609 // X64: sitofp i64 %{{.*}} to double 610 // X64: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 611 return _mm_cvtsi64_sd(A, B); 612 } 613 TEST_CONSTEXPR(match_m128d(_mm_cvtsi64_sd((__m128d){-42.0, +99.0}, 55), +55.0, +99.0)); 614 #endif 615 616 __m128i test_mm_cvtsi64_si128(long long A) { 617 // CHECK-LABEL: test_mm_cvtsi64_si128 618 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 619 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 620 return _mm_cvtsi64_si128(A); 621 } 622 623 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) { 624 // CHECK-LABEL: test_mm_cvtss_sd 625 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 626 // CHECK: fpext float %{{.*}} to double 627 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 628 return _mm_cvtss_sd(A, B); 629 } 630 TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0)); 631 632 __m128i test_mm_cvttpd_epi32(__m128d A) { 633 // CHECK-LABEL: test_mm_cvttpd_epi32 634 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}}) 635 return _mm_cvttpd_epi32(A); 636 } 637 638 __m128i test_mm_cvttps_epi32(__m128 A) { 639 // CHECK-LABEL: test_mm_cvttps_epi32 640 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) 641 return _mm_cvttps_epi32(A); 642 } 643 644 int test_mm_cvttsd_si32(__m128d A) { 645 // CHECK-LABEL: test_mm_cvttsd_si32 646 // CHECK: call {{.*}}i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}}) 647 return _mm_cvttsd_si32(A); 648 } 649 650 #ifdef __x86_64__ 651 long long test_mm_cvttsd_si64(__m128d A) { 652 // X64-LABEL: test_mm_cvttsd_si64 653 // X64: call {{.*}}i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}}) 654 return _mm_cvttsd_si64(A); 655 } 656 #endif 657 658 __m128d test_mm_div_pd(__m128d A, __m128d B) { 659 // CHECK-LABEL: test_mm_div_pd 660 // CHECK: fdiv <2 x double> 661 return _mm_div_pd(A, B); 662 } 663 TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0)); 664 665 __m128d test_mm_div_sd(__m128d A, __m128d B) { 666 // CHECK-LABEL: test_mm_div_sd 667 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 668 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 669 // CHECK: fdiv double 670 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 671 return _mm_div_sd(A, B); 672 } 673 TEST_CONSTEXPR(match_m128d(_mm_div_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +8.0)); 674 675 // Lowering to pextrw requires optimization. 676 int test_mm_extract_epi16(__m128i A) { 677 // CHECK-LABEL: test_mm_extract_epi16 678 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1 679 // CHECK: zext i16 %{{.*}} to i32 680 return _mm_extract_epi16(A, 1); 681 } 682 683 __m128i test_mm_insert_epi16(__m128i A, int B) { 684 // CHECK-LABEL: test_mm_insert_epi16 685 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0 686 return _mm_insert_epi16(A, B, 0); 687 } 688 689 void test_mm_lfence(void) { 690 // CHECK-LABEL: test_mm_lfence 691 // CHECK: call void @llvm.x86.sse2.lfence() 692 _mm_lfence(); 693 } 694 695 __m128d test_mm_load_pd(double const* A) { 696 // CHECK-LABEL: test_mm_load_pd 697 // CHECK: load <2 x double>, ptr %{{.*}}, align 16 698 return _mm_load_pd(A); 699 } 700 701 __m128d test_mm_load_pd1(double const* A) { 702 // CHECK-LABEL: test_mm_load_pd1 703 // CHECK: load double, ptr %{{.*}}, align 8 704 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 705 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 706 return _mm_load_pd1(A); 707 } 708 709 __m128d test_mm_load_sd(double const* A) { 710 // CHECK-LABEL: test_mm_load_sd 711 // CHECK: load double, ptr %{{.*}}, align 1{{$}} 712 return _mm_load_sd(A); 713 } 714 715 __m128i test_mm_load_si128(__m128i const* A) { 716 // CHECK-LABEL: test_mm_load_si128 717 // CHECK: load <2 x i64>, ptr %{{.*}}, align 16 718 return _mm_load_si128(A); 719 } 720 721 __m128d test_mm_load1_pd(double const* A) { 722 // CHECK-LABEL: test_mm_load1_pd 723 // CHECK: load double, ptr %{{.*}}, align 8 724 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 725 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 726 return _mm_load1_pd(A); 727 } 728 729 __m128d test_mm_loadh_pd(__m128d x, double const* y) { 730 // CHECK-LABEL: test_mm_loadh_pd 731 // CHECK: load double, ptr %{{.*}}, align 1{{$}} 732 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 733 return _mm_loadh_pd(x, y); 734 } 735 736 __m128i test_mm_loadl_epi64(__m128i* y) { 737 // CHECK: test_mm_loadl_epi64 738 // CHECK: load i64, ptr {{.*}}, align 1{{$}} 739 // CHECK: insertelement <2 x i64> poison, i64 {{.*}}, i32 0 740 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1 741 return _mm_loadl_epi64(y); 742 } 743 744 __m128d test_mm_loadl_pd(__m128d x, double const* y) { 745 // CHECK-LABEL: test_mm_loadl_pd 746 // CHECK: load double, ptr %{{.*}}, align 1{{$}} 747 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 748 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 749 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 750 return _mm_loadl_pd(x, y); 751 } 752 753 __m128d test_mm_loadr_pd(double const* A) { 754 // CHECK-LABEL: test_mm_loadr_pd 755 // CHECK: load <2 x double>, ptr %{{.*}}, align 16 756 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0> 757 return _mm_loadr_pd(A); 758 } 759 760 __m128d test_mm_loadu_pd(double const* A) { 761 // CHECK-LABEL: test_mm_loadu_pd 762 // CHECK: load <2 x double>, ptr %{{.*}}, align 1{{$}} 763 return _mm_loadu_pd(A); 764 } 765 766 __m128i test_mm_loadu_si128(__m128i const* A) { 767 // CHECK-LABEL: test_mm_loadu_si128 768 // CHECK: load <2 x i64>, ptr %{{.*}}, align 1{{$}} 769 return _mm_loadu_si128(A); 770 } 771 772 __m128i test_mm_loadu_si64(void const* A) { 773 // CHECK-LABEL: test_mm_loadu_si64 774 // CHECK: load i64, ptr %{{.*}}, align 1{{$}} 775 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 776 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 777 return _mm_loadu_si64(A); 778 } 779 780 __m128i test_mm_loadu_si32(void const* A) { 781 // CHECK-LABEL: test_mm_loadu_si32 782 // CHECK: load i32, ptr %{{.*}}, align 1{{$}} 783 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0 784 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1 785 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2 786 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3 787 return _mm_loadu_si32(A); 788 } 789 790 __m128i test_mm_loadu_si16(void const* A) { 791 // CHECK-LABEL: test_mm_loadu_si16 792 // CHECK: load i16, ptr %{{.*}}, align 1{{$}} 793 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0 794 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1 795 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2 796 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3 797 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4 798 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5 799 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6 800 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7 801 return _mm_loadu_si16(A); 802 } 803 804 __m128i test_mm_madd_epi16(__m128i A, __m128i B) { 805 // CHECK-LABEL: test_mm_madd_epi16 806 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 807 return _mm_madd_epi16(A, B); 808 } 809 810 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) { 811 // CHECK-LABEL: test_mm_maskmoveu_si128 812 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, ptr %{{.*}}) 813 _mm_maskmoveu_si128(A, B, C); 814 } 815 816 __m128i test_mm_max_epi16(__m128i A, __m128i B) { 817 // CHECK-LABEL: test_mm_max_epi16 818 // CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 819 return _mm_max_epi16(A, B); 820 } 821 822 __m128i test_mm_max_epu8(__m128i A, __m128i B) { 823 // CHECK-LABEL: test_mm_max_epu8 824 // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 825 return _mm_max_epu8(A, B); 826 } 827 828 __m128d test_mm_max_pd(__m128d A, __m128d B) { 829 // CHECK-LABEL: test_mm_max_pd 830 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 831 return _mm_max_pd(A, B); 832 } 833 834 __m128d test_mm_max_sd(__m128d A, __m128d B) { 835 // CHECK-LABEL: test_mm_max_sd 836 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 837 return _mm_max_sd(A, B); 838 } 839 840 void test_mm_mfence(void) { 841 // CHECK-LABEL: test_mm_mfence 842 // CHECK: call void @llvm.x86.sse2.mfence() 843 _mm_mfence(); 844 } 845 846 __m128i test_mm_min_epi16(__m128i A, __m128i B) { 847 // CHECK-LABEL: test_mm_min_epi16 848 // CHECK: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 849 return _mm_min_epi16(A, B); 850 } 851 852 __m128i test_mm_min_epu8(__m128i A, __m128i B) { 853 // CHECK-LABEL: test_mm_min_epu8 854 // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 855 return _mm_min_epu8(A, B); 856 } 857 858 __m128d test_mm_min_pd(__m128d A, __m128d B) { 859 // CHECK-LABEL: test_mm_min_pd 860 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 861 return _mm_min_pd(A, B); 862 } 863 864 __m128d test_mm_min_sd(__m128d A, __m128d B) { 865 // CHECK-LABEL: test_mm_min_sd 866 // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 867 return _mm_min_sd(A, B); 868 } 869 870 __m64 test_mm_movepi64_pi64(__m128i A) { 871 // CHECK-LABEL: test_mm_movepi64_pi64 872 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0 873 return _mm_movepi64_pi64(A); 874 } 875 TEST_CONSTEXPR(match_m64(_mm_movepi64_pi64((__m128i){8, -8}), 8ULL)); 876 877 __m128i test_mm_movpi64_epi64(__m64 A) { 878 // CHECK-LABEL: test_mm_movpi64_epi64 879 // CHECK: shufflevector <1 x i64> %{{.*}}, <1 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1> 880 return _mm_movpi64_epi64(A); 881 } 882 TEST_CONSTEXPR(match_m128i(_mm_movpi64_epi64((__m64){5LL}), 5ULL, 0ULL)); 883 884 __m128i test_mm_move_epi64(__m128i A) { 885 // CHECK-LABEL: test_mm_move_epi64 886 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2> 887 return _mm_move_epi64(A); 888 } 889 TEST_CONSTEXPR(match_m128i(_mm_move_epi64((__m128i){16LL, 15LL}), 16ULL, 0ULL)); 890 891 __m128d test_mm_move_sd(__m128d A, __m128d B) { 892 // CHECK-LABEL: test_mm_move_sd 893 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 894 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 895 return _mm_move_sd(A, B); 896 } 897 TEST_CONSTEXPR(match_m128d(_mm_move_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -4.0, +8.0)); 898 899 int test_mm_movemask_epi8(__m128i A) { 900 // CHECK-LABEL: test_mm_movemask_epi8 901 // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) 902 return _mm_movemask_epi8(A); 903 } 904 905 int test_mm_movemask_pd(__m128d A) { 906 // CHECK-LABEL: test_mm_movemask_pd 907 // CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) 908 return _mm_movemask_pd(A); 909 } 910 911 __m128i test_mm_mul_epu32(__m128i A, __m128i B) { 912 // CHECK-LABEL: test_mm_mul_epu32 913 // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) 914 // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) 915 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} 916 return _mm_mul_epu32(A, B); 917 } 918 919 __m128d test_mm_mul_pd(__m128d A, __m128d B) { 920 // CHECK-LABEL: test_mm_mul_pd 921 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} 922 return _mm_mul_pd(A, B); 923 } 924 TEST_CONSTEXPR(match_m128d(_mm_mul_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, +15.0)); 925 926 __m128d test_mm_mul_sd(__m128d A, __m128d B) { 927 // CHECK-LABEL: test_mm_mul_sd 928 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 929 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 930 // CHECK: fmul double 931 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 932 return _mm_mul_sd(A, B); 933 } 934 TEST_CONSTEXPR(match_m128d(_mm_mul_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, -3.0)); 935 936 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) { 937 // CHECK-LABEL: test_mm_mulhi_epi16 938 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 939 return _mm_mulhi_epi16(A, B); 940 } 941 942 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) { 943 // CHECK-LABEL: test_mm_mulhi_epu16 944 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 945 return _mm_mulhi_epu16(A, B); 946 } 947 948 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) { 949 // CHECK-LABEL: test_mm_mullo_epi16 950 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}} 951 return _mm_mullo_epi16(A, B); 952 } 953 954 __m128d test_mm_or_pd(__m128d A, __m128d B) { 955 // CHECK-LABEL: test_mm_or_pd 956 // CHECK: or <2 x i64> %{{.*}}, %{{.*}} 957 return _mm_or_pd(A, B); 958 } 959 TEST_CONSTEXPR(match_m128d(_mm_or_pd((__m128d){+1.0, -3.0}, (__m128d){-0.0, +0.0}), -1.0, -3.0)); 960 961 __m128i test_mm_or_si128(__m128i A, __m128i B) { 962 // CHECK-LABEL: test_mm_or_si128 963 // CHECK: or <2 x i64> %{{.*}}, %{{.*}} 964 return _mm_or_si128(A, B); 965 } 966 967 __m128i test_mm_packs_epi16(__m128i A, __m128i B) { 968 // CHECK-LABEL: test_mm_packs_epi16 969 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 970 return _mm_packs_epi16(A, B); 971 } 972 973 __m128i test_mm_packs_epi32(__m128i A, __m128i B) { 974 // CHECK-LABEL: test_mm_packs_epi32 975 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 976 return _mm_packs_epi32(A, B); 977 } 978 979 __m128i test_mm_packus_epi16(__m128i A, __m128i B) { 980 // CHECK-LABEL: test_mm_packus_epi16 981 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 982 return _mm_packus_epi16(A, B); 983 } 984 985 void test_mm_pause(void) { 986 // CHECK-LABEL: test_mm_pause 987 // CHECK: call void @llvm.x86.sse2.pause() 988 return _mm_pause(); 989 } 990 991 __m128i test_mm_sad_epu8(__m128i A, __m128i B) { 992 // CHECK-LABEL: test_mm_sad_epu8 993 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 994 return _mm_sad_epu8(A, B); 995 } 996 997 __m128i test_mm_set_epi8(char A, char B, char C, char D, 998 char E, char F, char G, char H, 999 char I, char J, char K, char L, 1000 char M, char N, char O, char P) { 1001 // CHECK-LABEL: test_mm_set_epi8 1002 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0 1003 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 1004 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 1005 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 1006 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 1007 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 1008 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 1009 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 1010 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 1011 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 1012 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 1013 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 1014 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 1015 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 1016 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 1017 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 1018 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); 1019 } 1020 TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); 1021 1022 __m128i test_mm_set_epi16(short A, short B, short C, short D, 1023 short E, short F, short G, short H) { 1024 // CHECK-LABEL: test_mm_set_epi16 1025 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0 1026 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 1027 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 1028 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 1029 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 1030 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 1031 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 1032 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 1033 return _mm_set_epi16(A, B, C, D, E, F, G, H); 1034 } 1035 TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0)); 1036 1037 __m128i test_mm_set_epi32(int A, int B, int C, int D) { 1038 // CHECK-LABEL: test_mm_set_epi32 1039 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0 1040 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 1041 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 1042 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 1043 return _mm_set_epi32(A, B, C, D); 1044 } 1045 TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1)); 1046 1047 __m128i test_mm_set_epi64(__m64 A, __m64 B) { 1048 // CHECK-LABEL: test_mm_set_epi64 1049 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 1050 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1051 return _mm_set_epi64(A, B); 1052 } 1053 TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1)); 1054 1055 __m128i test_mm_set_epi64x(long long A, long long B) { 1056 // CHECK-LABEL: test_mm_set_epi64x 1057 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 1058 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1059 return _mm_set_epi64x(A, B); 1060 } 1061 TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100)); 1062 1063 __m128d test_mm_set_pd(double A, double B) { 1064 // CHECK-LABEL: test_mm_set_pd 1065 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 1066 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1067 return _mm_set_pd(A, B); 1068 } 1069 TEST_CONSTEXPR(match_m128d(_mm_set_pd(-9.0, +3.0), +3.0, -9.0)); 1070 1071 __m128d test_mm_set_pd1(double A) { 1072 // CHECK-LABEL: test_mm_set_pd1 1073 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 1074 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1075 return _mm_set_pd1(A); 1076 } 1077 TEST_CONSTEXPR(match_m128d(_mm_set_pd1(+5.0), +5.0, +5.0)); 1078 1079 __m128d test_mm_set_sd(double A) { 1080 // CHECK-LABEL: test_mm_set_sd 1081 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 1082 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 1083 return _mm_set_sd(A); 1084 } 1085 TEST_CONSTEXPR(match_m128d(_mm_set_sd(+1.0), +1.0, +0.0)); 1086 1087 __m128i test_mm_set1_epi8(char A) { 1088 // CHECK-LABEL: test_mm_set1_epi8 1089 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0 1090 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 1091 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 1092 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 1093 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 1094 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 1095 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 1096 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 1097 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 1098 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 1099 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 1100 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 1101 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 1102 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 1103 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 1104 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 1105 return _mm_set1_epi8(A); 1106 } 1107 TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); 1108 1109 __m128i test_mm_set1_epi16(short A) { 1110 // CHECK-LABEL: test_mm_set1_epi16 1111 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0 1112 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 1113 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 1114 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 1115 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 1116 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 1117 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 1118 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 1119 return _mm_set1_epi16(A); 1120 } 1121 TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128)); 1122 1123 __m128i test_mm_set1_epi32(int A) { 1124 // CHECK-LABEL: test_mm_set1_epi32 1125 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0 1126 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 1127 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 1128 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 1129 return _mm_set1_epi32(A); 1130 } 1131 TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55)); 1132 1133 __m128i test_mm_set1_epi64(__m64 A) { 1134 // CHECK-LABEL: test_mm_set1_epi64 1135 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 1136 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1137 return _mm_set1_epi64(A); 1138 } 1139 TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535)); 1140 1141 __m128i test_mm_set1_epi64x(long long A) { 1142 // CHECK-LABEL: test_mm_set1_epi64x 1143 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 1144 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1145 return _mm_set1_epi64x(A); 1146 } 1147 TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536)); 1148 1149 __m128d test_mm_set1_pd(double A) { 1150 // CHECK-LABEL: test_mm_set1_pd 1151 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 1152 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1153 return _mm_set1_pd(A); 1154 } 1155 TEST_CONSTEXPR(match_m128d(_mm_set1_pd(-42.0), -42.0, -42.0)); 1156 1157 __m128i test_mm_setr_epi8(char A, char B, char C, char D, 1158 char E, char F, char G, char H, 1159 char I, char J, char K, char L, 1160 char M, char N, char O, char P) { 1161 // CHECK-LABEL: test_mm_setr_epi8 1162 // CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0 1163 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 1164 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 1165 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 1166 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 1167 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 1168 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 1169 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 1170 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 1171 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 1172 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 1173 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 1174 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 1175 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 1176 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 1177 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 1178 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); 1179 } 1180 TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); 1181 1182 __m128i test_mm_setr_epi16(short A, short B, short C, short D, 1183 short E, short F, short G, short H) { 1184 // CHECK-LABEL: test_mm_setr_epi16 1185 // CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0 1186 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 1187 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 1188 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 1189 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 1190 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 1191 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 1192 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 1193 return _mm_setr_epi16(A, B, C, D, E, F, G, H); 1194 } 1195 TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7)); 1196 1197 __m128i test_mm_setr_epi32(int A, int B, int C, int D) { 1198 // CHECK-LABEL: test_mm_setr_epi32 1199 // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0 1200 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 1201 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 1202 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 1203 return _mm_setr_epi32(A, B, C, D); 1204 } 1205 TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7)); 1206 1207 __m128i test_mm_setr_epi64(__m64 A, __m64 B) { 1208 // CHECK-LABEL: test_mm_setr_epi64 1209 // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0 1210 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1211 return _mm_setr_epi64(A, B); 1212 } 1213 TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42)); 1214 1215 __m128d test_mm_setr_pd(double A, double B) { 1216 // CHECK-LABEL: test_mm_setr_pd 1217 // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 1218 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1219 return _mm_setr_pd(A, B); 1220 } 1221 TEST_CONSTEXPR(match_m128d(_mm_setr_pd(-9.0, +3.0), -9.0, +3.0)); 1222 1223 __m128d test_mm_setzero_pd(void) { 1224 // CHECK-LABEL: test_mm_setzero_pd 1225 // CHECK: store <2 x double> zeroinitializer 1226 return _mm_setzero_pd(); 1227 } 1228 TEST_CONSTEXPR(match_m128d(_mm_setzero_pd(), +0.0, +0.0)); 1229 1230 __m128i test_mm_setzero_si128(void) { 1231 // CHECK-LABEL: test_mm_setzero_si128 1232 // CHECK: store <2 x i64> zeroinitializer 1233 return _mm_setzero_si128(); 1234 } 1235 TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0)); 1236 1237 __m128i test_mm_shuffle_epi32(__m128i A) { 1238 // CHECK-LABEL: test_mm_shuffle_epi32 1239 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer 1240 return _mm_shuffle_epi32(A, 0); 1241 } 1242 1243 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { 1244 // CHECK-LABEL: test_mm_shuffle_pd 1245 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2> 1246 return _mm_shuffle_pd(A, B, 1); 1247 } 1248 1249 __m128i test_mm_shufflehi_epi16(__m128i A) { 1250 // CHECK-LABEL: test_mm_shufflehi_epi16 1251 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 1252 return _mm_shufflehi_epi16(A, 0); 1253 } 1254 1255 __m128i test_mm_shufflelo_epi16(__m128i A) { 1256 // CHECK-LABEL: test_mm_shufflelo_epi16 1257 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 1258 return _mm_shufflelo_epi16(A, 0); 1259 } 1260 1261 __m128i test_mm_sll_epi16(__m128i A, __m128i B) { 1262 // CHECK-LABEL: test_mm_sll_epi16 1263 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1264 return _mm_sll_epi16(A, B); 1265 } 1266 1267 __m128i test_mm_sll_epi32(__m128i A, __m128i B) { 1268 // CHECK-LABEL: test_mm_sll_epi32 1269 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1270 return _mm_sll_epi32(A, B); 1271 } 1272 1273 __m128i test_mm_sll_epi64(__m128i A, __m128i B) { 1274 // CHECK-LABEL: test_mm_sll_epi64 1275 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 1276 return _mm_sll_epi64(A, B); 1277 } 1278 1279 __m128i test_mm_slli_epi16(__m128i A) { 1280 // CHECK-LABEL: test_mm_slli_epi16 1281 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1282 return _mm_slli_epi16(A, 1); 1283 } 1284 1285 __m128i test_mm_slli_epi16_1(__m128i A) { 1286 // CHECK-LABEL: test_mm_slli_epi16_1 1287 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1288 return _mm_slli_epi16(A, -1); 1289 } 1290 1291 __m128i test_mm_slli_epi16_2(__m128i A, int B) { 1292 // CHECK-LABEL: test_mm_slli_epi16_2 1293 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1294 return _mm_slli_epi16(A, B); 1295 } 1296 1297 __m128i test_mm_slli_epi32(__m128i A) { 1298 // CHECK-LABEL: test_mm_slli_epi32 1299 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1300 return _mm_slli_epi32(A, 1); 1301 } 1302 1303 __m128i test_mm_slli_epi32_1(__m128i A) { 1304 // CHECK-LABEL: test_mm_slli_epi32_1 1305 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1306 return _mm_slli_epi32(A, -1); 1307 } 1308 1309 __m128i test_mm_slli_epi32_2(__m128i A, int B) { 1310 // CHECK-LABEL: test_mm_slli_epi32_2 1311 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1312 return _mm_slli_epi32(A, B); 1313 } 1314 1315 __m128i test_mm_slli_epi64(__m128i A) { 1316 // CHECK-LABEL: test_mm_slli_epi64 1317 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1318 return _mm_slli_epi64(A, 1); 1319 } 1320 1321 __m128i test_mm_slli_epi64_1(__m128i A) { 1322 // CHECK-LABEL: test_mm_slli_epi64_1 1323 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1324 return _mm_slli_epi64(A, -1); 1325 } 1326 1327 __m128i test_mm_slli_epi64_2(__m128i A, int B) { 1328 // CHECK-LABEL: test_mm_slli_epi64_2 1329 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1330 return _mm_slli_epi64(A, B); 1331 } 1332 1333 __m128i test_mm_slli_si128(__m128i A) { 1334 // CHECK-LABEL: test_mm_slli_si128 1335 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 1336 return _mm_slli_si128(A, 5); 1337 } 1338 1339 __m128i test_mm_slli_si128_2(__m128i A) { 1340 // CHECK-LABEL: test_mm_slli_si128_2 1341 // CHECK: ret <2 x i64> zeroinitializer 1342 return _mm_slli_si128(A, 17); 1343 } 1344 1345 __m128d test_mm_sqrt_pd(__m128d A) { 1346 // CHECK-LABEL: test_mm_sqrt_pd 1347 // CHECK: call {{.*}}<2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}}) 1348 return _mm_sqrt_pd(A); 1349 } 1350 1351 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { 1352 // CHECK-LABEL: test_mm_sqrt_sd 1353 // CHECK: extractelement <2 x double> %{{.*}}, i64 0 1354 // CHECK: call double @llvm.sqrt.f64(double {{.*}}) 1355 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 1356 return _mm_sqrt_sd(A, B); 1357 } 1358 1359 __m128i test_mm_sra_epi16(__m128i A, __m128i B) { 1360 // CHECK-LABEL: test_mm_sra_epi16 1361 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1362 return _mm_sra_epi16(A, B); 1363 } 1364 1365 __m128i test_mm_sra_epi32(__m128i A, __m128i B) { 1366 // CHECK-LABEL: test_mm_sra_epi32 1367 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1368 return _mm_sra_epi32(A, B); 1369 } 1370 1371 __m128i test_mm_srai_epi16(__m128i A) { 1372 // CHECK-LABEL: test_mm_srai_epi16 1373 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1374 return _mm_srai_epi16(A, 1); 1375 } 1376 1377 __m128i test_mm_srai_epi16_1(__m128i A) { 1378 // CHECK-LABEL: test_mm_srai_epi16_1 1379 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1380 return _mm_srai_epi16(A, -1); 1381 } 1382 1383 __m128i test_mm_srai_epi16_2(__m128i A, int B) { 1384 // CHECK-LABEL: test_mm_srai_epi16_2 1385 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1386 return _mm_srai_epi16(A, B); 1387 } 1388 1389 __m128i test_mm_srai_epi32(__m128i A) { 1390 // CHECK-LABEL: test_mm_srai_epi32 1391 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1392 return _mm_srai_epi32(A, 1); 1393 } 1394 1395 __m128i test_mm_srai_epi32_1(__m128i A) { 1396 // CHECK-LABEL: test_mm_srai_epi32_1 1397 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1398 return _mm_srai_epi32(A, -1); 1399 } 1400 1401 __m128i test_mm_srai_epi32_2(__m128i A, int B) { 1402 // CHECK-LABEL: test_mm_srai_epi32_2 1403 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1404 return _mm_srai_epi32(A, B); 1405 } 1406 1407 __m128i test_mm_srl_epi16(__m128i A, __m128i B) { 1408 // CHECK-LABEL: test_mm_srl_epi16 1409 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1410 return _mm_srl_epi16(A, B); 1411 } 1412 1413 __m128i test_mm_srl_epi32(__m128i A, __m128i B) { 1414 // CHECK-LABEL: test_mm_srl_epi32 1415 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1416 return _mm_srl_epi32(A, B); 1417 } 1418 1419 __m128i test_mm_srl_epi64(__m128i A, __m128i B) { 1420 // CHECK-LABEL: test_mm_srl_epi64 1421 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 1422 return _mm_srl_epi64(A, B); 1423 } 1424 1425 __m128i test_mm_srli_epi16(__m128i A) { 1426 // CHECK-LABEL: test_mm_srli_epi16 1427 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1428 return _mm_srli_epi16(A, 1); 1429 } 1430 1431 __m128i test_mm_srli_epi16_1(__m128i A) { 1432 // CHECK-LABEL: test_mm_srli_epi16_1 1433 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1434 return _mm_srli_epi16(A, -1); 1435 } 1436 1437 __m128i test_mm_srli_epi16_2(__m128i A, int B) { 1438 // CHECK-LABEL: test_mm_srli_epi16 1439 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1440 return _mm_srli_epi16(A, B); 1441 } 1442 1443 __m128i test_mm_srli_epi32(__m128i A) { 1444 // CHECK-LABEL: test_mm_srli_epi32 1445 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1446 return _mm_srli_epi32(A, 1); 1447 } 1448 1449 __m128i test_mm_srli_epi32_1(__m128i A) { 1450 // CHECK-LABEL: test_mm_srli_epi32_1 1451 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1452 return _mm_srli_epi32(A, -1); 1453 } 1454 1455 __m128i test_mm_srli_epi32_2(__m128i A, int B) { 1456 // CHECK-LABEL: test_mm_srli_epi32_2 1457 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1458 return _mm_srli_epi32(A, B); 1459 } 1460 1461 __m128i test_mm_srli_epi64(__m128i A) { 1462 // CHECK-LABEL: test_mm_srli_epi64 1463 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1464 return _mm_srli_epi64(A, 1); 1465 } 1466 1467 __m128i test_mm_srli_epi64_1(__m128i A) { 1468 // CHECK-LABEL: test_mm_srli_epi64_1 1469 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1470 return _mm_srli_epi64(A, -1); 1471 } 1472 1473 __m128i test_mm_srli_epi64_2(__m128i A, int B) { 1474 // CHECK-LABEL: test_mm_srli_epi64_2 1475 // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1476 return _mm_srli_epi64(A, B); 1477 } 1478 1479 __m128i test_mm_srli_si128(__m128i A) { 1480 // CHECK-LABEL: test_mm_srli_si128 1481 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 1482 return _mm_srli_si128(A, 5); 1483 } 1484 1485 __m128i test_mm_srli_si128_2(__m128i A) { 1486 // CHECK-LABEL: test_mm_srli_si128_2 1487 // ret <2 x i64> zeroinitializer 1488 return _mm_srli_si128(A, 17); 1489 } 1490 1491 void test_mm_store_pd(double* A, __m128d B) { 1492 // CHECK-LABEL: test_mm_store_pd 1493 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16 1494 _mm_store_pd(A, B); 1495 } 1496 1497 void test_mm_store_pd1(double* x, __m128d y) { 1498 // CHECK-LABEL: test_mm_store_pd1 1499 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer 1500 // CHECK: store <2 x double> %{{.*}}, ptr {{.*}}, align 16 1501 _mm_store_pd1(x, y); 1502 } 1503 1504 void test_mm_store_sd(double* A, __m128d B) { 1505 // CHECK-LABEL: test_mm_store_sd 1506 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1507 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}} 1508 _mm_store_sd(A, B); 1509 } 1510 1511 void test_mm_store_si128(__m128i* A, __m128i B) { 1512 // CHECK-LABEL: test_mm_store_si128 1513 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16 1514 _mm_store_si128(A, B); 1515 } 1516 1517 void test_mm_store1_pd(double* x, __m128d y) { 1518 // CHECK-LABEL: test_mm_store1_pd 1519 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer 1520 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16 1521 _mm_store1_pd(x, y); 1522 } 1523 1524 void test_mm_storeh_pd(double* A, __m128d B) { 1525 // CHECK-LABEL: test_mm_storeh_pd 1526 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 1527 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}} 1528 _mm_storeh_pd(A, B); 1529 } 1530 1531 void test_mm_storel_epi64(__m128i x, void* y) { 1532 // CHECK-LABEL: test_mm_storel_epi64 1533 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0 1534 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}} 1535 _mm_storel_epi64((__m128i_u*)y, x); 1536 } 1537 1538 void test_mm_storel_pd(double* A, __m128d B) { 1539 // CHECK-LABEL: test_mm_storel_pd 1540 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1541 // CHECK: store double %{{.*}}, ptr %{{.*}}, align 1{{$}} 1542 _mm_storel_pd(A, B); 1543 } 1544 1545 void test_mm_storer_pd(__m128d A, double* B) { 1546 // CHECK-LABEL: test_mm_storer_pd 1547 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0> 1548 // CHECK: store {{.*}} ptr {{.*}}, align 16{{$}} 1549 _mm_storer_pd(B, A); 1550 } 1551 1552 void test_mm_storeu_pd(double* A, __m128d B) { 1553 // CHECK-LABEL: test_mm_storeu_pd 1554 // CHECK: store {{.*}} ptr {{.*}}, align 1{{$}} 1555 // CHECK-NEXT: ret void 1556 _mm_storeu_pd(A, B); 1557 } 1558 1559 void test_mm_storeu_si128(__m128i* A, __m128i B) { 1560 // CHECK-LABEL: test_mm_storeu_si128 1561 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}} 1562 // CHECK-NEXT: ret void 1563 _mm_storeu_si128(A, B); 1564 } 1565 1566 void test_mm_storeu_si64(void* A, __m128i B) { 1567 // CHECK-LABEL: test_mm_storeu_si64 1568 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0 1569 // CHECK: store i64 [[EXT]], ptr %{{.*}}, align 1{{$}} 1570 // CHECK-NEXT: ret void 1571 _mm_storeu_si64(A, B); 1572 } 1573 1574 void test_mm_storeu_si32(void* A, __m128i B) { 1575 // CHECK-LABEL: test_mm_storeu_si32 1576 // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0 1577 // CHECK: store i32 [[EXT]], ptr %{{.*}}, align 1{{$}} 1578 // CHECK-NEXT: ret void 1579 _mm_storeu_si32(A, B); 1580 } 1581 1582 void test_mm_storeu_si16(void* A, __m128i B) { 1583 // CHECK-LABEL: test_mm_storeu_si16 1584 // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0 1585 // CHECK: store i16 [[EXT]], ptr %{{.*}}, align 1{{$}} 1586 // CHECK-NEXT: ret void 1587 _mm_storeu_si16(A, B); 1588 } 1589 1590 void test_mm_stream_pd(double *A, __m128d B) { 1591 // CHECK-LABEL: test_mm_stream_pd 1592 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 1593 _mm_stream_pd(A, B); 1594 } 1595 1596 void test_mm_stream_pd_void(void *A, __m128d B) { 1597 // CHECK-LABEL: test_mm_stream_pd_void 1598 // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 1599 _mm_stream_pd(A, B); 1600 } 1601 1602 void test_mm_stream_si32(int *A, int B) { 1603 // CHECK-LABEL: test_mm_stream_si32 1604 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal 1605 _mm_stream_si32(A, B); 1606 } 1607 1608 void test_mm_stream_si32_void(void *A, int B) { 1609 // CHECK-LABEL: test_mm_stream_si32_void 1610 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal 1611 _mm_stream_si32(A, B); 1612 } 1613 1614 #ifdef __x86_64__ 1615 void test_mm_stream_si64(long long *A, long long B) { 1616 // X64-LABEL: test_mm_stream_si64 1617 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal 1618 _mm_stream_si64(A, B); 1619 } 1620 1621 void test_mm_stream_si64_void(void *A, long long B) { 1622 // X64-LABEL: test_mm_stream_si64_void 1623 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal 1624 _mm_stream_si64(A, B); 1625 } 1626 #endif 1627 1628 void test_mm_stream_si128(__m128i *A, __m128i B) { 1629 // CHECK-LABEL: test_mm_stream_si128 1630 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 1631 _mm_stream_si128(A, B); 1632 } 1633 1634 void test_mm_stream_si128_void(void *A, __m128i B) { 1635 // CHECK-LABEL: test_mm_stream_si128_void 1636 // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 1637 _mm_stream_si128(A, B); 1638 } 1639 1640 __m128i test_mm_sub_epi8(__m128i A, __m128i B) { 1641 // CHECK-LABEL: test_mm_sub_epi8 1642 // CHECK: sub <16 x i8> 1643 return _mm_sub_epi8(A, B); 1644 } 1645 1646 __m128i test_mm_sub_epi16(__m128i A, __m128i B) { 1647 // CHECK-LABEL: test_mm_sub_epi16 1648 // CHECK: sub <8 x i16> 1649 return _mm_sub_epi16(A, B); 1650 } 1651 1652 __m128i test_mm_sub_epi32(__m128i A, __m128i B) { 1653 // CHECK-LABEL: test_mm_sub_epi32 1654 // CHECK: sub <4 x i32> 1655 return _mm_sub_epi32(A, B); 1656 } 1657 TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0)); 1658 1659 __m128i test_mm_sub_epi64(__m128i A, __m128i B) { 1660 // CHECK-LABEL: test_mm_sub_epi64 1661 // CHECK: sub <2 x i64> 1662 return _mm_sub_epi64(A, B); 1663 } 1664 TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11)); 1665 1666 __m128d test_mm_sub_pd(__m128d A, __m128d B) { 1667 // CHECK-LABEL: test_mm_sub_pd 1668 // CHECK: fsub <2 x double> 1669 return _mm_sub_pd(A, B); 1670 } 1671 TEST_CONSTEXPR(match_m128d(_mm_sub_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, +2.0)); 1672 1673 __m128d test_mm_sub_sd(__m128d A, __m128d B) { 1674 // CHECK-LABEL: test_mm_sub_sd 1675 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1676 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1677 // CHECK: fsub double 1678 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 1679 return _mm_sub_sd(A, B); 1680 } 1681 TEST_CONSTEXPR(match_m128d(_mm_sub_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, -3.0)); 1682 1683 __m128i test_mm_subs_epi8(__m128i A, __m128i B) { 1684 // CHECK-LABEL: test_mm_subs_epi8 1685 // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 1686 return _mm_subs_epi8(A, B); 1687 } 1688 1689 __m128i test_mm_subs_epi16(__m128i A, __m128i B) { 1690 // CHECK-LABEL: test_mm_subs_epi16 1691 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1692 return _mm_subs_epi16(A, B); 1693 } 1694 1695 __m128i test_mm_subs_epu8(__m128i A, __m128i B) { 1696 // CHECK-LABEL: test_mm_subs_epu8 1697 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 1698 // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 1699 return _mm_subs_epu8(A, B); 1700 } 1701 1702 __m128i test_mm_subs_epu16(__m128i A, __m128i B) { 1703 // CHECK-LABEL: test_mm_subs_epu16 1704 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1705 // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1706 return _mm_subs_epu16(A, B); 1707 } 1708 1709 int test_mm_ucomieq_sd(__m128d A, __m128d B) { 1710 // CHECK-LABEL: test_mm_ucomieq_sd 1711 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1712 return _mm_ucomieq_sd(A, B); 1713 } 1714 1715 int test_mm_ucomige_sd(__m128d A, __m128d B) { 1716 // CHECK-LABEL: test_mm_ucomige_sd 1717 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1718 return _mm_ucomige_sd(A, B); 1719 } 1720 1721 int test_mm_ucomigt_sd(__m128d A, __m128d B) { 1722 // CHECK-LABEL: test_mm_ucomigt_sd 1723 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1724 return _mm_ucomigt_sd(A, B); 1725 } 1726 1727 int test_mm_ucomile_sd(__m128d A, __m128d B) { 1728 // CHECK-LABEL: test_mm_ucomile_sd 1729 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1730 return _mm_ucomile_sd(A, B); 1731 } 1732 1733 int test_mm_ucomilt_sd(__m128d A, __m128d B) { 1734 // CHECK-LABEL: test_mm_ucomilt_sd 1735 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1736 return _mm_ucomilt_sd(A, B); 1737 } 1738 1739 int test_mm_ucomineq_sd(__m128d A, __m128d B) { 1740 // CHECK-LABEL: test_mm_ucomineq_sd 1741 // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1742 return _mm_ucomineq_sd(A, B); 1743 } 1744 1745 __m128d test_mm_undefined_pd(void) { 1746 // X64-LABEL: test_mm_undefined_pd 1747 // X64: ret <2 x double> zeroinitializer 1748 // 1749 // X86-LABEL: test_mm_undefined_pd 1750 // X86: store <2 x double> zeroinitializer 1751 return _mm_undefined_pd(); 1752 } 1753 1754 __m128i test_mm_undefined_si128(void) { 1755 // CHECK-LABEL: test_mm_undefined_si128 1756 // CHECK: ret <2 x i64> zeroinitializer 1757 return _mm_undefined_si128(); 1758 } 1759 1760 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) { 1761 // CHECK-LABEL: test_mm_unpackhi_epi8 1762 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1763 return _mm_unpackhi_epi8(A, B); 1764 } 1765 1766 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) { 1767 // CHECK-LABEL: test_mm_unpackhi_epi16 1768 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1769 return _mm_unpackhi_epi16(A, B); 1770 } 1771 1772 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) { 1773 // CHECK-LABEL: test_mm_unpackhi_epi32 1774 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1775 return _mm_unpackhi_epi32(A, B); 1776 } 1777 1778 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) { 1779 // CHECK-LABEL: test_mm_unpackhi_epi64 1780 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3> 1781 return _mm_unpackhi_epi64(A, B); 1782 } 1783 1784 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) { 1785 // CHECK-LABEL: test_mm_unpackhi_pd 1786 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3> 1787 return _mm_unpackhi_pd(A, B); 1788 } 1789 TEST_CONSTEXPR(match_m128d(_mm_unpackhi_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +8.0, -2.0)); 1790 1791 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) { 1792 // CHECK-LABEL: test_mm_unpacklo_epi8 1793 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1794 return _mm_unpacklo_epi8(A, B); 1795 } 1796 1797 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) { 1798 // CHECK-LABEL: test_mm_unpacklo_epi16 1799 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1800 return _mm_unpacklo_epi16(A, B); 1801 } 1802 1803 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) { 1804 // CHECK-LABEL: test_mm_unpacklo_epi32 1805 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1806 return _mm_unpacklo_epi32(A, B); 1807 } 1808 1809 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) { 1810 // CHECK-LABEL: test_mm_unpacklo_epi64 1811 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2> 1812 return _mm_unpacklo_epi64(A, B); 1813 } 1814 1815 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) { 1816 // CHECK-LABEL: test_mm_unpacklo_pd 1817 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2> 1818 return _mm_unpacklo_pd(A, B); 1819 } 1820 TEST_CONSTEXPR(match_m128d(_mm_unpacklo_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +2.0, -4.0)); 1821 1822 __m128d test_mm_xor_pd(__m128d A, __m128d B) { 1823 // CHECK-LABEL: test_mm_xor_pd 1824 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} 1825 return _mm_xor_pd(A, B); 1826 } 1827 TEST_CONSTEXPR(match_m128d(_mm_xor_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +1.0, +3.0)); 1828 1829 __m128i test_mm_xor_si128(__m128i A, __m128i B) { 1830 // CHECK-LABEL: test_mm_xor_si128 1831 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} 1832 return _mm_xor_si128(A, B); 1833 } 1834