1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s 2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s 3 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s 4 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s 5 6 7 #include <immintrin.h> 8 #include "builtin_test_helpers.h" 9 10 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll 11 12 __m128 test_mm_add_ps(__m128 A, __m128 B) { 13 // CHECK-LABEL: test_mm_add_ps 14 // CHECK: fadd <4 x float> 15 return _mm_add_ps(A, B); 16 } 17 TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f)); 18 19 __m128 test_mm_add_ss(__m128 A, __m128 B) { 20 // CHECK-LABEL: test_mm_add_ss 21 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 22 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 23 // CHECK: fadd float 24 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 25 return _mm_add_ss(A, B); 26 } 27 TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f)); 28 29 __m128 test_mm_and_ps(__m128 A, __m128 B) { 30 // CHECK-LABEL: test_mm_and_ps 31 // CHECK: and <4 x i32> 32 return _mm_and_ps(A, B); 33 } 34 TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f)); 35 36 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { 37 // CHECK-LABEL: test_mm_andnot_ps 38 // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) 39 // CHECK: and <4 x i32> 40 return _mm_andnot_ps(A, B); 41 } 42 TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f)); 43 44 __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { 45 // CHECK-LABEL: test_mm_cmp_ps_eq_oq 46 // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} 47 return _mm_cmp_ps(a, b, _CMP_EQ_OQ); 48 } 49 50 __m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) { 51 // CHECK-LABEL: test_mm_cmp_ps_lt_os 52 // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} 53 return _mm_cmp_ps(a, b, _CMP_LT_OS); 54 } 55 56 __m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) { 57 // CHECK-LABEL: test_mm_cmp_ps_le_os 58 // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} 59 return _mm_cmp_ps(a, b, _CMP_LE_OS); 60 } 61 62 __m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) { 63 // CHECK-LABEL: test_mm_cmp_ps_unord_q 64 // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} 65 return _mm_cmp_ps(a, b, _CMP_UNORD_Q); 66 } 67 68 __m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) { 69 // CHECK-LABEL: test_mm_cmp_ps_neq_uq 70 // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} 71 return _mm_cmp_ps(a, b, _CMP_NEQ_UQ); 72 } 73 74 __m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) { 75 // CHECK-LABEL: test_mm_cmp_ps_nlt_us 76 // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} 77 return _mm_cmp_ps(a, b, _CMP_NLT_US); 78 } 79 80 __m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) { 81 // CHECK-LABEL: test_mm_cmp_ps_nle_us 82 // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} 83 return _mm_cmp_ps(a, b, _CMP_NLE_US); 84 } 85 86 __m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) { 87 // CHECK-LABEL: test_mm_cmp_ps_ord_q 88 // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} 89 return _mm_cmp_ps(a, b, _CMP_ORD_Q); 90 } 91 92 __m128 test_mm_cmp_ss(__m128 A, __m128 B) { 93 // CHECK-LABEL: test_mm_cmp_ss 94 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) 95 return _mm_cmp_ss(A, B, _CMP_ORD_Q); 96 } 97 98 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { 99 // CHECK-LABEL: test_mm_cmpeq_ps 100 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> 101 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 102 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 103 // CHECK-NEXT: ret <4 x float> [[BC]] 104 return _mm_cmpeq_ps(__a, __b); 105 } 106 107 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { 108 // CHECK-LABEL: test_mm_cmpeq_ss 109 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) 110 return _mm_cmpeq_ss(__a, __b); 111 } 112 113 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { 114 // CHECK-LABEL: test_mm_cmpge_ps 115 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> 116 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 117 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 118 // CHECK-NEXT: ret <4 x float> [[BC]] 119 return _mm_cmpge_ps(__a, __b); 120 } 121 122 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { 123 // CHECK-LABEL: test_mm_cmpge_ss 124 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) 125 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 126 return _mm_cmpge_ss(__a, __b); 127 } 128 129 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { 130 // CHECK-LABEL: test_mm_cmpgt_ps 131 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> 132 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 133 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 134 // CHECK-NEXT: ret <4 x float> [[BC]] 135 return _mm_cmpgt_ps(__a, __b); 136 } 137 138 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { 139 // CHECK-LABEL: test_mm_cmpgt_ss 140 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) 141 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 142 return _mm_cmpgt_ss(__a, __b); 143 } 144 145 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { 146 // CHECK-LABEL: test_mm_cmple_ps 147 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> 148 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 149 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 150 // CHECK-NEXT: ret <4 x float> [[BC]] 151 return _mm_cmple_ps(__a, __b); 152 } 153 154 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) { 155 // CHECK-LABEL: test_mm_cmple_ss 156 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) 157 return _mm_cmple_ss(__a, __b); 158 } 159 160 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { 161 // CHECK-LABEL: test_mm_cmplt_ps 162 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> 163 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 164 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 165 // CHECK-NEXT: ret <4 x float> [[BC]] 166 return _mm_cmplt_ps(__a, __b); 167 } 168 169 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) { 170 // CHECK-LABEL: test_mm_cmplt_ss 171 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) 172 return _mm_cmplt_ss(__a, __b); 173 } 174 175 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { 176 // CHECK-LABEL: test_mm_cmpneq_ps 177 // CHECK: [[CMP:%.*]] = fcmp une <4 x float> 178 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 179 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 180 // CHECK-NEXT: ret <4 x float> [[BC]] 181 return _mm_cmpneq_ps(__a, __b); 182 } 183 184 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) { 185 // CHECK-LABEL: test_mm_cmpneq_ss 186 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4) 187 return _mm_cmpneq_ss(__a, __b); 188 } 189 190 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { 191 // CHECK-LABEL: test_mm_cmpnge_ps 192 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> 193 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 194 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 195 // CHECK-NEXT: ret <4 x float> [[BC]] 196 return _mm_cmpnge_ps(__a, __b); 197 } 198 199 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) { 200 // CHECK-LABEL: test_mm_cmpnge_ss 201 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) 202 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 203 return _mm_cmpnge_ss(__a, __b); 204 } 205 206 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { 207 // CHECK-LABEL: test_mm_cmpngt_ps 208 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> 209 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 210 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 211 // CHECK-NEXT: ret <4 x float> [[BC]] 212 return _mm_cmpngt_ps(__a, __b); 213 } 214 215 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) { 216 // CHECK-LABEL: test_mm_cmpngt_ss 217 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) 218 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 219 return _mm_cmpngt_ss(__a, __b); 220 } 221 222 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { 223 // CHECK-LABEL: test_mm_cmpnle_ps 224 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> 225 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 226 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 227 // CHECK-NEXT: ret <4 x float> [[BC]] 228 return _mm_cmpnle_ps(__a, __b); 229 } 230 231 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) { 232 // CHECK-LABEL: test_mm_cmpnle_ss 233 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) 234 return _mm_cmpnle_ss(__a, __b); 235 } 236 237 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { 238 // CHECK-LABEL: test_mm_cmpnlt_ps 239 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> 240 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 241 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 242 // CHECK-NEXT: ret <4 x float> [[BC]] 243 return _mm_cmpnlt_ps(__a, __b); 244 } 245 246 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) { 247 // CHECK-LABEL: test_mm_cmpnlt_ss 248 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) 249 return _mm_cmpnlt_ss(__a, __b); 250 } 251 252 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { 253 // CHECK-LABEL: test_mm_cmpord_ps 254 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> 255 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 256 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 257 // CHECK-NEXT: ret <4 x float> [[BC]] 258 return _mm_cmpord_ps(__a, __b); 259 } 260 261 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) { 262 // CHECK-LABEL: test_mm_cmpord_ss 263 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) 264 return _mm_cmpord_ss(__a, __b); 265 } 266 267 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { 268 // CHECK-LABEL: test_mm_cmpunord_ps 269 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> 270 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 271 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 272 // CHECK-NEXT: ret <4 x float> [[BC]] 273 return _mm_cmpunord_ps(__a, __b); 274 } 275 276 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) { 277 // CHECK-LABEL: test_mm_cmpunord_ss 278 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3) 279 return _mm_cmpunord_ss(__a, __b); 280 } 281 282 int test_mm_comieq_ss(__m128 A, __m128 B) { 283 // CHECK-LABEL: test_mm_comieq_ss 284 // CHECK: call {{.*}}i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 285 return _mm_comieq_ss(A, B); 286 } 287 288 int test_mm_comige_ss(__m128 A, __m128 B) { 289 // CHECK-LABEL: test_mm_comige_ss 290 // CHECK: call {{.*}}i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 291 return _mm_comige_ss(A, B); 292 } 293 294 int test_mm_comigt_ss(__m128 A, __m128 B) { 295 // CHECK-LABEL: test_mm_comigt_ss 296 // CHECK: call {{.*}}i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 297 return _mm_comigt_ss(A, B); 298 } 299 300 int test_mm_comile_ss(__m128 A, __m128 B) { 301 // CHECK-LABEL: test_mm_comile_ss 302 // CHECK: call {{.*}}i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 303 return _mm_comile_ss(A, B); 304 } 305 306 int test_mm_comilt_ss(__m128 A, __m128 B) { 307 // CHECK-LABEL: test_mm_comilt_ss 308 // CHECK: call {{.*}}i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 309 return _mm_comilt_ss(A, B); 310 } 311 312 int test_mm_comineq_ss(__m128 A, __m128 B) { 313 // CHECK-LABEL: test_mm_comineq_ss 314 // CHECK: call {{.*}}i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 315 return _mm_comineq_ss(A, B); 316 } 317 318 int test_mm_cvt_ss2si(__m128 A) { 319 // CHECK-LABEL: test_mm_cvt_ss2si 320 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) 321 return _mm_cvt_ss2si(A); 322 } 323 324 __m128 test_mm_cvtsi32_ss(__m128 A, int B) { 325 // CHECK-LABEL: test_mm_cvtsi32_ss 326 // CHECK: sitofp i32 %{{.*}} to float 327 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 328 return _mm_cvtsi32_ss(A, B); 329 } 330 TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f)); 331 332 __m128 test_mm_cvt_si2ss(__m128 A, int B) { 333 // CHECK-LABEL: test_mm_cvt_si2ss 334 // CHECK: sitofp i32 %{{.*}} to float 335 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 336 return _mm_cvt_si2ss(A, B); 337 } 338 TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f)); 339 340 #ifdef __x86_64__ 341 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { 342 // CHECK-LABEL: test_mm_cvtsi64_ss 343 // CHECK: sitofp i64 %{{.*}} to float 344 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 345 return _mm_cvtsi64_ss(A, B); 346 } 347 TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f)); 348 #endif 349 350 float test_mm_cvtss_f32(__m128 A) { 351 // CHECK-LABEL: test_mm_cvtss_f32 352 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 353 return _mm_cvtss_f32(A); 354 } 355 TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f); 356 357 int test_mm_cvtss_si32(__m128 A) { 358 // CHECK-LABEL: test_mm_cvtss_si32 359 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) 360 return _mm_cvtss_si32(A); 361 } 362 363 #ifdef __x86_64__ 364 long long test_mm_cvtss_si64(__m128 A) { 365 // CHECK-LABEL: test_mm_cvtss_si64 366 // CHECK: call {{.*}}i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) 367 return _mm_cvtss_si64(A); 368 } 369 #endif 370 371 int test_mm_cvtt_ss2si(__m128 A) { 372 // CHECK-LABEL: test_mm_cvtt_ss2si 373 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) 374 return _mm_cvtt_ss2si(A); 375 } 376 377 int test_mm_cvttss_si32(__m128 A) { 378 // CHECK-LABEL: test_mm_cvttss_si32 379 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) 380 return _mm_cvttss_si32(A); 381 } 382 383 #ifdef __x86_64__ 384 long long test_mm_cvttss_si64(__m128 A) { 385 // CHECK-LABEL: test_mm_cvttss_si64 386 // CHECK: call {{.*}}i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) 387 return _mm_cvttss_si64(A); 388 } 389 #endif 390 391 __m128 test_mm_div_ps(__m128 A, __m128 B) { 392 // CHECK-LABEL: test_mm_div_ps 393 // CHECK: fdiv <4 x float> 394 return _mm_div_ps(A, B); 395 } 396 TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f)); 397 398 __m128 test_mm_div_ss(__m128 A, __m128 B) { 399 // CHECK-LABEL: test_mm_div_ss 400 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 401 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 402 // CHECK: fdiv float 403 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 404 return _mm_div_ss(A, B); 405 } 406 TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f)); 407 408 unsigned int test_MM_GET_EXCEPTION_MASK(void) { 409 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK 410 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}}) 411 // CHECK: and i32 %{{.*}}, 8064 412 return _MM_GET_EXCEPTION_MASK(); 413 } 414 415 unsigned int test_MM_GET_EXCEPTION_STATE(void) { 416 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE 417 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}}) 418 // CHECK: and i32 %{{.*}}, 63 419 return _MM_GET_EXCEPTION_STATE(); 420 } 421 422 unsigned int test_MM_GET_FLUSH_ZERO_MODE(void) { 423 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE 424 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}}) 425 // CHECK: and i32 %{{.*}}, 32768 426 return _MM_GET_FLUSH_ZERO_MODE(); 427 } 428 429 unsigned int test_MM_GET_ROUNDING_MODE(void) { 430 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE 431 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}}) 432 // CHECK: and i32 %{{.*}}, 24576 433 return _MM_GET_ROUNDING_MODE(); 434 } 435 436 unsigned int test_mm_getcsr(void) { 437 // CHECK-LABEL: test_mm_getcsr 438 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}}) 439 // CHECK: load i32 440 return _mm_getcsr(); 441 } 442 443 __m128 test_mm_load_ps(float* y) { 444 // CHECK-LABEL: test_mm_load_ps 445 // CHECK: load <4 x float>, ptr {{.*}}, align 16 446 return _mm_load_ps(y); 447 } 448 449 __m128 test_mm_load_ps1(float* y) { 450 // CHECK-LABEL: test_mm_load_ps1 451 // CHECK: load float, ptr %{{.*}}, align 4 452 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 453 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 454 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 455 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 456 return _mm_load_ps1(y); 457 } 458 459 __m128 test_mm_load_ss(float* y) { 460 // CHECK-LABEL: test_mm_load_ss 461 // CHECK: load float, ptr {{.*}}, align 1{{$}} 462 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 463 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 464 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 465 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 466 return _mm_load_ss(y); 467 } 468 469 __m128 test_mm_load1_ps(float* y) { 470 // CHECK-LABEL: test_mm_load1_ps 471 // CHECK: load float, ptr %{{.*}}, align 4 472 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 473 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 474 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 475 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 476 return _mm_load1_ps(y); 477 } 478 479 __m128 test_mm_loadh_pi(__m128 x, __m64* y) { 480 // CHECK-LABEL: test_mm_loadh_pi 481 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}} 482 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 483 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5> 484 return _mm_loadh_pi(x,y); 485 } 486 487 __m128 test_mm_loadl_pi(__m128 x, __m64* y) { 488 // CHECK-LABEL: test_mm_loadl_pi 489 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}} 490 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 491 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3> 492 return _mm_loadl_pi(x,y); 493 } 494 495 __m128 test_mm_loadr_ps(float* A) { 496 // CHECK-LABEL: test_mm_loadr_ps 497 // CHECK: load <4 x float>, ptr %{{.*}}, align 16 498 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 499 return _mm_loadr_ps(A); 500 } 501 502 __m128 test_mm_loadu_ps(float* A) { 503 // CHECK-LABEL: test_mm_loadu_ps 504 // CHECK: load <4 x float>, ptr %{{.*}}, align 1{{$}} 505 return _mm_loadu_ps(A); 506 } 507 508 __m128 test_mm_max_ps(__m128 A, __m128 B) { 509 // CHECK-LABEL: test_mm_max_ps 510 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 511 return _mm_max_ps(A, B); 512 } 513 514 __m128 test_mm_max_ss(__m128 A, __m128 B) { 515 // CHECK-LABEL: test_mm_max_ss 516 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 517 return _mm_max_ss(A, B); 518 } 519 520 __m128 test_mm_min_ps(__m128 A, __m128 B) { 521 // CHECK-LABEL: test_mm_min_ps 522 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 523 return _mm_min_ps(A, B); 524 } 525 526 __m128 test_mm_min_ss(__m128 A, __m128 B) { 527 // CHECK-LABEL: test_mm_min_ss 528 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 529 return _mm_min_ss(A, B); 530 } 531 532 __m128 test_mm_move_ss(__m128 A, __m128 B) { 533 // CHECK-LABEL: test_mm_move_ss 534 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 535 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 536 return _mm_move_ss(A, B); 537 } 538 TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f)); 539 540 __m128 test_mm_movehl_ps(__m128 A, __m128 B) { 541 // CHECK-LABEL: test_mm_movehl_ps 542 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 543 return _mm_movehl_ps(A, B); 544 } 545 TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f)); 546 547 __m128 test_mm_movelh_ps(__m128 A, __m128 B) { 548 // CHECK-LABEL: test_mm_movelh_ps 549 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 550 return _mm_movelh_ps(A, B); 551 } 552 TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f)); 553 554 int test_mm_movemask_ps(__m128 A) { 555 // CHECK-LABEL: test_mm_movemask_ps 556 // CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) 557 return _mm_movemask_ps(A); 558 } 559 560 __m128 test_mm_mul_ps(__m128 A, __m128 B) { 561 // CHECK-LABEL: test_mm_mul_ps 562 // CHECK: fmul <4 x float> 563 return _mm_mul_ps(A, B); 564 } 565 TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f)); 566 567 __m128 test_mm_mul_ss(__m128 A, __m128 B) { 568 // CHECK-LABEL: test_mm_mul_ss 569 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 570 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 571 // CHECK: fmul float 572 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 573 return _mm_mul_ss(A, B); 574 } 575 TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f)); 576 577 __m128 test_mm_or_ps(__m128 A, __m128 B) { 578 // CHECK-LABEL: test_mm_or_ps 579 // CHECK: or <4 x i32> 580 return _mm_or_ps(A, B); 581 } 582 TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f)); 583 584 void test_mm_prefetch(char const* p) { 585 // CHECK-LABEL: test_mm_prefetch 586 // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1) 587 _mm_prefetch(p, 0); 588 } 589 590 __m128 test_mm_rcp_ps(__m128 x) { 591 // CHECK-LABEL: test_mm_rcp_ps 592 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}}) 593 return _mm_rcp_ps(x); 594 } 595 596 __m128 test_mm_rcp_ss(__m128 x) { 597 // CHECK-LABEL: test_mm_rcp_ss 598 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}}) 599 return _mm_rcp_ss(x); 600 } 601 602 __m128 test_mm_rsqrt_ps(__m128 x) { 603 // CHECK-LABEL: test_mm_rsqrt_ps 604 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}}) 605 return _mm_rsqrt_ps(x); 606 } 607 608 __m128 test_mm_rsqrt_ss(__m128 x) { 609 // CHECK-LABEL: test_mm_rsqrt_ss 610 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) 611 return _mm_rsqrt_ss(x); 612 } 613 614 void test_MM_SET_EXCEPTION_MASK(unsigned int A) { 615 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK 616 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}}) 617 // CHECK: load i32 618 // CHECK: and i32 {{.*}}, -8065 619 // CHECK: or i32 620 // CHECK: store i32 621 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}}) 622 _MM_SET_EXCEPTION_MASK(A); 623 } 624 625 void test_MM_SET_EXCEPTION_STATE(unsigned int A) { 626 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE 627 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}}) 628 // CHECK: load i32 629 // CHECK: and i32 {{.*}}, -64 630 // CHECK: or i32 631 // CHECK: store i32 632 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}}) 633 _MM_SET_EXCEPTION_STATE(A); 634 } 635 636 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) { 637 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE 638 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}}) 639 // CHECK: load i32 640 // CHECK: and i32 {{.*}}, -32769 641 // CHECK: or i32 642 // CHECK: store i32 643 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}}) 644 _MM_SET_FLUSH_ZERO_MODE(A); 645 } 646 647 __m128 test_mm_set_ps(float A, float B, float C, float D) { 648 // CHECK-LABEL: test_mm_set_ps 649 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0 650 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 651 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 652 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 653 return _mm_set_ps(A, B, C, D); 654 } 655 TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f)); 656 657 __m128 test_mm_set_ps1(float A) { 658 // CHECK-LABEL: test_mm_set_ps1 659 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0 660 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 661 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 662 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 663 return _mm_set_ps1(A); 664 } 665 TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f)); 666 667 void test_MM_SET_ROUNDING_MODE(unsigned int A) { 668 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE 669 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}}) 670 // CHECK: load i32 671 // CHECK: and i32 {{.*}}, -24577 672 // CHECK: or i32 673 // CHECK: store i32 674 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}}) 675 _MM_SET_ROUNDING_MODE(A); 676 } 677 678 __m128 test_mm_set_ss(float A) { 679 // CHECK-LABEL: test_mm_set_ss 680 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0 681 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1 682 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2 683 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3 684 return _mm_set_ss(A); 685 } 686 TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f)); 687 688 __m128 test_mm_set1_ps(float A) { 689 // CHECK-LABEL: test_mm_set1_ps 690 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0 691 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 692 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 693 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 694 return _mm_set1_ps(A); 695 } 696 TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f)); 697 698 void test_mm_setcsr(unsigned int A) { 699 // CHECK-LABEL: test_mm_setcsr 700 // CHECK: store i32 701 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}}) 702 _mm_setcsr(A); 703 } 704 705 __m128 test_mm_setr_ps(float A, float B, float C, float D) { 706 // CHECK-LABEL: test_mm_setr_ps 707 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0 708 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 709 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 710 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 711 return _mm_setr_ps(A, B, C, D); 712 } 713 TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f)); 714 715 __m128 test_mm_setzero_ps(void) { 716 // CHECK-LABEL: test_mm_setzero_ps 717 // CHECK: store <4 x float> zeroinitializer 718 return _mm_setzero_ps(); 719 } 720 TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f)); 721 722 void test_mm_sfence(void) { 723 // CHECK-LABEL: test_mm_sfence 724 // CHECK: call void @llvm.x86.sse.sfence() 725 _mm_sfence(); 726 } 727 728 __m128 test_mm_shuffle_ps(__m128 A, __m128 B) { 729 // CHECK-LABEL: test_mm_shuffle_ps 730 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 731 return _mm_shuffle_ps(A, B, 0); 732 } 733 734 __m128 test_mm_sqrt_ps(__m128 x) { 735 // CHECK-LABEL: test_mm_sqrt_ps 736 // CHECK: call {{.*}}<4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}}) 737 return _mm_sqrt_ps(x); 738 } 739 740 __m128 test_mm_sqrt_ss(__m128 x) { 741 // CHECK-LABEL: test_mm_sqrt_ss 742 // CHECK: extractelement <4 x float> {{.*}}, i64 0 743 // CHECK: call float @llvm.sqrt.f32(float {{.*}}) 744 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 745 return _mm_sqrt_ss(x); 746 } 747 748 void test_mm_store_ps(float* x, __m128 y) { 749 // CHECK-LABEL: test_mm_store_ps 750 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16 751 _mm_store_ps(x, y); 752 } 753 754 void test_mm_store_ps1(float* x, __m128 y) { 755 // CHECK-LABEL: test_mm_store_ps1 756 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer 757 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16 758 _mm_store_ps1(x, y); 759 } 760 761 void test_mm_store_ss(float* x, __m128 y) { 762 // CHECK-LABEL: test_mm_store_ss 763 // CHECK: extractelement <4 x float> {{.*}}, i32 0 764 // CHECK: store float %{{.*}}, ptr {{.*}}, align 1{{$}} 765 _mm_store_ss(x, y); 766 } 767 768 void test_mm_store1_ps(float* x, __m128 y) { 769 // CHECK-LABEL: test_mm_store1_ps 770 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer 771 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16 772 _mm_store1_ps(x, y); 773 } 774 775 void test_mm_storeh_pi(__m64* x, __m128 y) { 776 // CHECK-LABEL: test_mm_storeh_pi 777 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3> 778 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}} 779 _mm_storeh_pi(x, y); 780 } 781 782 void test_mm_storel_pi(__m64* x, __m128 y) { 783 // CHECK-LABEL: test_mm_storel_pi 784 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1> 785 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}} 786 _mm_storel_pi(x, y); 787 } 788 789 void test_mm_storer_ps(float* x, __m128 y) { 790 // CHECK-LABEL: test_mm_storer_ps 791 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 792 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16 793 _mm_storer_ps(x, y); 794 } 795 796 void test_mm_storeu_ps(float* x, __m128 y) { 797 // CHECK-LABEL: test_mm_storeu_ps 798 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}} 799 // CHECK-NEXT: ret void 800 _mm_storeu_ps(x, y); 801 } 802 803 void test_mm_stream_ps(float*A, __m128 B) { 804 // CHECK-LABEL: test_mm_stream_ps 805 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 806 _mm_stream_ps(A, B); 807 } 808 809 void test_mm_stream_ps_void(void *A, __m128 B) { 810 // CHECK-LABEL: test_mm_stream_ps_void 811 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal 812 _mm_stream_ps(A, B); 813 } 814 815 __m128 test_mm_sub_ps(__m128 A, __m128 B) { 816 // CHECK-LABEL: test_mm_sub_ps 817 // CHECK: fsub <4 x float> 818 return _mm_sub_ps(A, B); 819 } 820 TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f)); 821 822 __m128 test_mm_sub_ss(__m128 A, __m128 B) { 823 // CHECK-LABEL: test_mm_sub_ss 824 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 825 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 826 // CHECK: fsub float 827 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 828 return _mm_sub_ss(A, B); 829 } 830 TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f)); 831 832 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) { 833 // CHECK-LABEL: test_MM_TRANSPOSE4_PS 834 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 835 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 836 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 837 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 838 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 839 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 840 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 841 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 842 _MM_TRANSPOSE4_PS(*A, *B, *C, *D); 843 } 844 845 int test_mm_ucomieq_ss(__m128 A, __m128 B) { 846 // CHECK-LABEL: test_mm_ucomieq_ss 847 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 848 return _mm_ucomieq_ss(A, B); 849 } 850 851 int test_mm_ucomige_ss(__m128 A, __m128 B) { 852 // CHECK-LABEL: test_mm_ucomige_ss 853 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 854 return _mm_ucomige_ss(A, B); 855 } 856 857 int test_mm_ucomigt_ss(__m128 A, __m128 B) { 858 // CHECK-LABEL: test_mm_ucomigt_ss 859 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 860 return _mm_ucomigt_ss(A, B); 861 } 862 863 int test_mm_ucomile_ss(__m128 A, __m128 B) { 864 // CHECK-LABEL: test_mm_ucomile_ss 865 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 866 return _mm_ucomile_ss(A, B); 867 } 868 869 int test_mm_ucomilt_ss(__m128 A, __m128 B) { 870 // CHECK-LABEL: test_mm_ucomilt_ss 871 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 872 return _mm_ucomilt_ss(A, B); 873 } 874 875 int test_mm_ucomineq_ss(__m128 A, __m128 B) { 876 // CHECK-LABEL: test_mm_ucomineq_ss 877 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 878 return _mm_ucomineq_ss(A, B); 879 } 880 881 __m128 test_mm_undefined_ps(void) { 882 // CHECK-LABEL: test_mm_undefined_ps 883 // CHECK: ret <4 x float> zeroinitializer 884 return _mm_undefined_ps(); 885 } 886 887 __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) { 888 // CHECK-LABEL: test_mm_unpackhi_ps 889 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 890 return _mm_unpackhi_ps(A, B); 891 } 892 TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f)); 893 894 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) { 895 // CHECK-LABEL: test_mm_unpacklo_ps 896 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 897 return _mm_unpacklo_ps(A, B); 898 } 899 TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f)); 900 901 __m128 test_mm_xor_ps(__m128 A, __m128 B) { 902 // CHECK-LABEL: test_mm_xor_ps 903 // CHECK: xor <4 x i32> 904 return _mm_xor_ps(A, B); 905 } 906 TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f)); 907