1 // REQUIRES: powerpc-registered-target 2 3 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 4 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE 5 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 6 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns 7 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 8 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE 9 // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 10 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns 11 12 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 13 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-LE 14 15 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \ 16 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE 17 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \ 18 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns 19 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \ 20 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE 21 // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \ 22 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns 23 24 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 25 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE 26 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \ 27 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns 28 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 29 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE 30 31 #include <xmmintrin.h> 32 33 __m128 res, m1, m2; 34 __m64 res64, ms[2]; 35 float fs[4]; 36 int i, i2; 37 long long i64; 38 39 // CHECK-LE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2 40 // CHECK-BE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2 41 42 // CHECK-LE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4 43 // CHECK-BE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4 44 45 void __attribute__((noinline)) 46 test_add() { 47 res = _mm_add_ps(m1, m2); 48 res = _mm_add_ss(m1, m2); 49 } 50 51 // CHECK-LABEL: @test_add 52 53 // CHECK-LABEL: define available_externally <4 x float> @_mm_add_ps 54 // CHECK: fadd <4 x float> 55 56 // CHECK-LABEL: define available_externally <4 x float> @_mm_add_ss 57 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 58 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 59 // CHECK: fadd <4 x float> 60 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 61 62 void __attribute__((noinline)) 63 test_avg() { 64 res64 = _mm_avg_pu16(ms[0], ms[1]); 65 res64 = _mm_avg_pu8(ms[0], ms[1]); 66 } 67 68 // CHECK-LABEL: @test_avg 69 70 // CHECK-LABEL: define available_externally i64 @_mm_avg_pu16 71 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 72 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 73 // CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8]) 74 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 75 76 // CHECK-LABEL: define available_externally i64 @_mm_avg_pu8 77 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 78 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 79 // CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16]) 80 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 81 82 void __attribute__((noinline)) 83 test_alt_name_avg() { 84 res64 = _m_pavgw(ms[0], ms[1]); 85 res64 = _m_pavgb(ms[0], ms[1]); 86 } 87 88 // CHECK-LABEL: @test_alt_name_avg 89 90 // CHECK-LABEL: define available_externally i64 @_m_pavgw 91 // CHECK: call i64 @_mm_avg_pu16 92 93 // CHECK-LABEL: define available_externally i64 @_m_pavgb 94 // CHECK: call i64 @_mm_avg_pu8 95 96 void __attribute__((noinline)) 97 test_cmp() { 98 res = _mm_cmpeq_ps(m1, m2); 99 res = _mm_cmpeq_ss(m1, m2); 100 res = _mm_cmpge_ps(m1, m2); 101 res = _mm_cmpge_ss(m1, m2); 102 res = _mm_cmpgt_ps(m1, m2); 103 res = _mm_cmpgt_ss(m1, m2); 104 res = _mm_cmple_ps(m1, m2); 105 res = _mm_cmple_ss(m1, m2); 106 res = _mm_cmplt_ps(m1, m2); 107 res = _mm_cmplt_ss(m1, m2); 108 res = _mm_cmpneq_ps(m1, m2); 109 res = _mm_cmpneq_ss(m1, m2); 110 res = _mm_cmpnge_ps(m1, m2); 111 res = _mm_cmpnge_ss(m1, m2); 112 res = _mm_cmpngt_ps(m1, m2); 113 res = _mm_cmpngt_ss(m1, m2); 114 res = _mm_cmpnle_ps(m1, m2); 115 res = _mm_cmpnle_ss(m1, m2); 116 res = _mm_cmpnlt_ps(m1, m2); 117 res = _mm_cmpnlt_ss(m1, m2); 118 res = _mm_cmpord_ps(m1, m2); 119 res = _mm_cmpord_ss(m1, m2); 120 res = _mm_cmpunord_ps(m1, m2); 121 res = _mm_cmpunord_ss(m1, m2); 122 } 123 124 // CHECK-LABEL: @test_cmp 125 126 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ps 127 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4]) 128 129 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ss 130 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 131 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 132 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4]) 133 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 134 135 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ps 136 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4]) 137 138 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ss 139 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 140 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 141 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4]) 142 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 143 144 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ps 145 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 146 147 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ss 148 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 149 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 150 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 151 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 152 153 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ps 154 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4]) 155 156 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ss 157 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 158 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 159 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4]) 160 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 161 162 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmplt_ps 163 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4]) 164 165 // CHECK: @_mm_cmplt_ss 166 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 167 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 168 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4]) 169 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 170 171 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ps 172 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4]) 173 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4]) 174 175 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ss 176 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 177 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 178 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4]) 179 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4]) 180 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 181 182 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ps 183 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4]) 184 185 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ss 186 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 187 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 188 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4]) 189 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 190 191 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ps 192 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4]) 193 194 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ss 195 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 196 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 197 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4]) 198 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 199 200 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ps 201 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 202 203 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ss 204 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 205 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 206 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 207 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 208 209 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ps 210 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4]) 211 212 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ss 213 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 214 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 215 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4]) 216 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 217 218 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps 219 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 220 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 221 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 222 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 223 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4]) 224 225 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss 226 // CHECK: call <4 x float> @vec_abs(float vector[4]) 227 // CHECK: call <4 x float> @vec_abs(float vector[4]) 228 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 229 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 230 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 231 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 232 233 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps 234 // CHECK: call <4 x float> @vec_abs(float vector[4]) 235 // CHECK: call <4 x float> @vec_abs(float vector[4]) 236 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) 237 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) 238 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) 239 240 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss 241 // CHECK: call <4 x float> @vec_abs(float vector[4]) 242 // CHECK: call <4 x float> @vec_abs(float vector[4]) 243 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) 244 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) 245 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) 246 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 247 248 void __attribute__((noinline)) 249 test_comi() { 250 i = _mm_comieq_ss(m1, m2); 251 i = _mm_comige_ss(m1, m2); 252 i = _mm_comigt_ss(m1, m2); 253 i = _mm_comile_ss(m1, m2); 254 i = _mm_comilt_ss(m1, m2); 255 i = _mm_comineq_ss(m1, m2); 256 } 257 258 // CHECK-LABEL: @test_comi 259 260 // CHECK-LABEL: define available_externally signext i32 @_mm_comieq_ss 261 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 262 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 263 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq float %[[VAL1]], %[[VAL2]] 264 // CHECK: zext i1 %[[CMP]] to i32 265 266 // CHECK-LABEL: define available_externally signext i32 @_mm_comige_ss 267 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 268 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 269 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge float %[[VAL1]], %[[VAL2]] 270 // CHECK: zext i1 %[[CMP]] to i32 271 272 // CHECK-LABEL: define available_externally signext i32 @_mm_comigt_ss 273 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 274 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 275 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt float %[[VAL1]], %[[VAL2]] 276 // CHECK: zext i1 %[[CMP]] to i32 277 278 // CHECK-LABEL: define available_externally signext i32 @_mm_comile_ss 279 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 280 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 281 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole float %[[VAL1]], %[[VAL2]] 282 // CHECK: zext i1 %[[CMP]] to i32 283 284 // CHECK-LABEL: define available_externally signext i32 @_mm_comilt_ss 285 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 286 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 287 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt float %[[VAL1]], %[[VAL2]] 288 // CHECK: zext i1 %[[CMP]] to i32 289 290 // CHECK-LABEL: define available_externally signext i32 @_mm_comineq_ss 291 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 292 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 293 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une float %[[VAL1]], %[[VAL2]] 294 // CHECK: zext i1 %[[CMP]] to i32 295 296 void __attribute__((noinline)) 297 test_convert() { 298 res = _mm_cvt_pi2ps(m1, ms[1]); 299 res64 = _mm_cvt_ps2pi(m1); 300 res = _mm_cvt_si2ss(m1, i); 301 i = _mm_cvt_ss2si(m1); 302 res = _mm_cvtpi16_ps(ms[0]); 303 res = _mm_cvtpi32_ps(m1, ms[1]); 304 res = _mm_cvtpi32x2_ps(ms[0], ms[1]); 305 res = _mm_cvtpi8_ps(ms[0]); 306 res64 = _mm_cvtps_pi16(m1); 307 res64 = _mm_cvtps_pi32(m1); 308 res64 = _mm_cvtps_pi8(m1); 309 res = _mm_cvtpu16_ps(ms[0]); 310 res = _mm_cvtpu8_ps(ms[0]); 311 res = _mm_cvtsi32_ss(m1, i); 312 res = _mm_cvtsi64_ss(m1, i64); 313 fs[0] = _mm_cvtss_f32(m1); 314 i = _mm_cvtss_si32(m1); 315 i64 = _mm_cvtss_si64(m1); 316 res64 = _mm_cvtt_ps2pi(m1); 317 i = _mm_cvtt_ss2si(m1); 318 res64 = _mm_cvttps_pi32(m1); 319 i = _mm_cvttss_si32(m1); 320 i64 = _mm_cvttss_si64(m1); 321 } 322 323 // CHECK-LABEL: @test_convert 324 325 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_pi2ps 326 // CHECK: call <4 x float> @_mm_cvtpi32_ps 327 328 // CHECK-LABEL: define available_externally i64 @_mm_cvt_ps2pi 329 // CHECK: call i64 @_mm_cvtps_pi32 330 331 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_si2ss 332 // CHECK: call <4 x float> @_mm_cvtsi32_ss 333 334 // CHECK-LABEL: define available_externally signext i32 @_mm_cvt_ss2si 335 // CHECK: call signext i32 @_mm_cvtss_si32 336 337 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi16_ps 338 // CHECK: call <4 x i32> @vec_vupklsh(short vector[8]) 339 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 340 341 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32_ps 342 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 343 344 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32x2_ps 345 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 346 347 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi8_ps 348 // CHECK: call <8 x i16> @vec_vupkhsb(signed char vector[16]) 349 // CHECK: call <4 x i32> @vec_vupkhsh(short vector[8]) 350 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 351 352 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi16 353 // CHECK: call <4 x float> @vec_rint(float vector[4]) 354 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 355 // CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4]) 356 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 357 358 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi32 359 // CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 360 // CHECK: call <4 x float> @vec_rint(float vector[4]) 361 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 362 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 363 364 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi8 365 // CHECK: call <4 x float> @vec_rint(float vector[4]) 366 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 367 // CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer) 368 // CHECK: call <16 x i8> @vec_pack(short vector[8], short vector[8]) 369 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 370 371 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu16_ps 372 // CHECK-LE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer) 373 // CHECK-BE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 374 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 375 376 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu8_ps 377 // CHECK-BE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}) 378 // CHECK-BE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 379 // CHECK-LE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer) 380 // CHECK-LE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer) 381 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 382 383 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi32_ss 384 // CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to float 385 // CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0 386 387 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi64_ss 388 // CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to float 389 // CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0 390 391 // CHECK-LABEL: define available_externally float @_mm_cvtss_f32 392 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 393 394 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtss_si32 395 // CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 396 // CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 397 // CHECK-P10-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 398 // CHECK-P10-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 399 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 0 400 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 1 401 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 2 402 403 // CHECK-LABEL: define available_externally i64 @_mm_cvtss_si64 404 // CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 405 // CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0" 406 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 0 407 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 1 408 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 2 409 410 // CHECK-LABEL: define available_externally i64 @_mm_cvtt_ps2pi 411 // CHECK: call i64 @_mm_cvttps_pi32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 412 413 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtt_ss2si 414 // CHECK: call signext i32 @_mm_cvttss_si32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 415 416 // CHECK-LABEL: define available_externally i64 @_mm_cvttps_pi32 417 // CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 418 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 419 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 420 421 // CHECK-LABEL: define available_externally signext i32 @_mm_cvttss_si32 422 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 423 // CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i32 424 425 // CHECK-LABEL: define available_externally i64 @_mm_cvttss_si64 426 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 427 // CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i64 428 429 void __attribute__((noinline)) 430 test_div() { 431 res = _mm_div_ps(m1, m2); 432 res = _mm_div_ss(m1, m2); 433 } 434 435 // CHECK-LABEL: @test_div 436 437 // CHECK-LABEL: define available_externally <4 x float> @_mm_div_ps 438 // CHECK: fdiv <4 x float> 439 440 // CHECK-LABEL: define available_externally <4 x float> @_mm_div_ss 441 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 442 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 443 // CHECK: fdiv <4 x float> 444 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 445 446 void __attribute__((noinline)) 447 test_extract() { 448 i = _mm_extract_pi16(ms[0], i2); 449 i = _m_pextrw(ms[0], i2); 450 } 451 452 // CHECK-LABEL: @test_extract 453 454 // CHECK-LABEL: define available_externally signext i32 @_mm_extract_pi16 455 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3 456 // CHECK-BE: sub i32 3, %{{[0-9a-zA-Z_.]+}} 457 // CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 16 458 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %[[MUL]] to i64 459 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = lshr i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]] 460 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64 %[[SHR]], 65535 461 // CHECK: trunc i64 %[[AND]] to i32 462 463 // CHECK-LABEL: define available_externally signext i32 @_m_pextrw 464 // CHECK: call signext i32 @_mm_extract_pi16 465 466 void __attribute__((noinline)) 467 test_insert() { 468 res64 = _mm_insert_pi16(ms[0], i, i2); 469 res64 = _m_pinsrw(ms[0], i, i2); 470 } 471 472 // CHECK-LABEL: @test_insert 473 474 // CHECK-LABEL: define available_externally i64 @_mm_insert_pi16 475 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3 476 // CHECK: mul nsw i32 %[[AND]], 16 477 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64 478 // CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64 479 // CHECK: shl i64 %[[EXT]], %[[EXT2]] 480 // CHECK: %[[EXT3:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64 481 // CHECK: shl i64 65535, %[[EXT3]] 482 // CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1 483 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]] 484 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}} 485 // CHECK: or i64 %[[AND2]], %[[AND3]] 486 487 // CHECK-LABEL: define available_externally i64 @_m_pinsrw 488 // CHECK: call i64 @_mm_insert_pi16 489 490 void __attribute__((noinline)) 491 test_load() { 492 res = _mm_load_ps(fs); 493 res = _mm_load_ps1(fs); 494 res = _mm_load_ss(fs); 495 res = _mm_load1_ps(fs); 496 res = _mm_loadh_pi(m1, &ms[0]); 497 res = _mm_loadl_pi(m1, &ms[0]); 498 res = _mm_loadr_ps(fs); 499 res = _mm_loadu_ps(fs); 500 } 501 502 // CHECK-LABEL: @test_load 503 504 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps 505 // CHECK: call <4 x float> @vec_ld(long, float vector[4] const*) 506 507 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps1 508 // CHECK: call <4 x float> @_mm_load1_ps 509 510 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ss 511 // CHECK: call <4 x float> @_mm_set_ss 512 513 // CHECK-LABEL: define available_externally <4 x float> @_mm_load1_ps 514 // CHECK: call <4 x float> @_mm_set1_ps 515 516 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadh_pi 517 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 518 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1 519 // CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[VAL]], i32 1 520 521 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadl_pi 522 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 523 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 524 // CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[EXT]], i32 0 525 526 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadr_ps 527 // CHECK: call <4 x float> @vec_ld(long, float vector[4] const*) 528 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>) 529 530 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadu_ps 531 // CHECK: call <4 x float> @vec_vsx_ld(int, float const*) 532 533 void __attribute__((noinline)) 534 test_logic() { 535 res = _mm_or_ps(m1, m2); 536 res = _mm_and_ps(m1, m2); 537 res = _mm_andnot_ps(m1, m2); 538 res = _mm_xor_ps(m1, m2); 539 } 540 541 // CHECK-LABEL: @test_logic 542 543 // CHECK-LABEL: define available_externally <4 x float> @_mm_or_ps 544 // CHECK: call <4 x float> @vec_or(float vector[4], float vector[4]) 545 546 // CHECK-LABEL: define available_externally <4 x float> @_mm_and_ps 547 // CHECK: call <4 x float> @vec_and(float vector[4], float vector[4]) 548 549 // CHECK-LABEL: define available_externally <4 x float> @_mm_andnot_ps 550 // CHECK: call <4 x float> @vec_andc(float vector[4], float vector[4]) 551 552 // CHECK-LABEL: define available_externally <4 x float> @_mm_xor_ps 553 // CHECK: call <4 x float> @vec_xor(float vector[4], float vector[4]) 554 555 void __attribute__((noinline)) 556 test_max() { 557 res = _mm_max_ps(m1, m2); 558 res = _mm_max_ss(m1, m2); 559 res64 = _mm_max_pi16(ms[0], ms[1]); 560 res64 = _mm_max_pu8(ms[0], ms[1]); 561 } 562 563 // CHECK-LABEL: @test_max 564 565 // CHECK-LABEL: define available_externally <4 x float> @_mm_max_ps 566 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 567 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4]) 568 569 // CHECK-LABEL: define available_externally <4 x float> @_mm_max_ss 570 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 571 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 572 // CHECK: call <4 x float> @vec_max(float vector[4], float vector[4]) 573 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 574 575 // CHECK-LABEL: define available_externally i64 @_mm_max_pi16 576 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 577 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 578 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8]) 579 // CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8]) 580 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64> 581 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0 582 583 // CHECK-LABEL: define available_externally i64 @_mm_max_pu8 584 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 585 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 586 // CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16]) 587 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16]) 588 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64> 589 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0 590 591 void __attribute__((noinline)) 592 test_alt_name_max() { 593 res64 = _m_pmaxsw(ms[0], ms[1]); 594 res64 = _m_pmaxub(ms[0], ms[1]); 595 } 596 597 // CHECK-LABEL: @test_alt_name_max 598 599 // CHECK-LABEL: define available_externally i64 @_m_pmaxsw 600 // CHECK: call i64 @_mm_max_pi16 601 602 // CHECK-LABEL: define available_externally i64 @_m_pmaxub 603 // CHECK: call i64 @_mm_max_pu8 604 605 void __attribute__((noinline)) 606 test_min() { 607 res = _mm_min_ps(m1, m2); 608 res = _mm_min_ss(m1, m2); 609 res64 = _mm_min_pi16(ms[0], ms[1]); 610 res64 = _mm_min_pu8(ms[0], ms[1]); 611 } 612 613 // CHECK-LABEL: @test_min 614 615 // CHECK-LABEL: define available_externally <4 x float> @_mm_min_ps 616 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4]) 617 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4]) 618 619 // CHECK-LABEL: define available_externally <4 x float> @_mm_min_ss 620 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 621 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0) 622 // CHECK: call <4 x float> @vec_min(float vector[4], float vector[4]) 623 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 624 625 // CHECK-LABEL: define available_externally i64 @_mm_min_pi16 626 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 627 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 628 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8]) 629 // CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8]) 630 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64> 631 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0 632 633 // CHECK-LABEL: define available_externally i64 @_mm_min_pu8 634 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 635 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 636 // CHECK: call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16]) 637 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16]) 638 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64> 639 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0 640 641 void __attribute__((noinline)) 642 test_alt_name_min() { 643 res64 = _m_pminsw(ms[0], ms[1]); 644 res64 = _m_pminub(ms[0], ms[1]); 645 } 646 647 // CHECK-LABEL: @test_alt_name_min 648 649 // CHECK-LABEL: define available_externally i64 @_m_pminsw 650 // CHECK: call i64 @_mm_min_pi16 651 652 // CHECK-LABEL: define available_externally i64 @_m_pminub 653 // CHECK: call i64 @_mm_min_pu8 654 655 void __attribute__((noinline)) 656 test_move() { 657 _mm_maskmove_si64(ms[0], ms[1], (char *)&res64); 658 res = _mm_move_ss(m1, m2); 659 res = _mm_movehl_ps(m1, m2); 660 res = _mm_movelh_ps(m1, m2); 661 i = _mm_movemask_pi8(ms[0]); 662 i = _mm_movemask_ps(m1); 663 } 664 665 // CHECK-LABEL: @test_move 666 667 // CHECK-LABEL: define available_externally void @_mm_maskmove_si64 668 // CHECK: store i64 -9187201950435737472, ptr %{{[0-9a-zA-Z_.]+}}, align 8 669 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64 670 // CHECK: call i64 @_mm_cmpeq_pi8(i64 noundef %[[AND]], i64 noundef %{{[0-9a-zA-Z_.]+}}) 671 // CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1 672 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]] 673 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64 674 // CHECK: or i64 %[[AND2]], %[[AND3]] 675 676 // CHECK-LABEL: define available_externally <4 x float> @_mm_move_ss 677 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 678 679 // CHECK-LABEL: define available_externally <4 x float> @_mm_movehl_ps 680 // CHECK: call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2]) 681 682 // CHECK-LABEL: define available_externally <4 x float> @_mm_movelh_ps 683 // CHECK: call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2]) 684 685 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pi8 686 // CHECK-LE: store i64 2269495618449464, ptr %{{[0-9a-zA-Z_.]+}}, align 8 687 // CHECK-BE: store i64 4048780183313844224, ptr %{{[0-9a-zA-Z_.]+}}, align 8 688 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call i64 @llvm.ppc.bpermd 689 // CHECK: trunc i64 %[[CALL]] to i32 690 691 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_ps 692 // CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>)) 693 // CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1 694 // CHECK-LE: trunc i64 %[[EXT]] to i32 695 // CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>)) 696 // CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 697 // CHECK-BE: trunc i64 %[[EXT]] to i32 698 // CHECK-P10-LE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 699 // CHECK-P10-BE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) 700 701 void __attribute__((noinline)) 702 test_alt_name_move() { 703 i = _m_pmovmskb(ms[0]); 704 _m_maskmovq(ms[0], ms[1], (char *)&res64); 705 } 706 707 // CHECK-LABEL: @test_alt_name_move 708 709 // CHECK-LABEL: define available_externally signext i32 @_m_pmovmskb 710 // CHECK: call signext i32 @_mm_movemask_pi8 711 712 // CHECK-LABEL: define available_externally void @_m_maskmovq 713 // CHECK: call void @_mm_maskmove_si64 714 715 void __attribute__((noinline)) 716 test_mul() { 717 res = _mm_mul_ps(m1, m2); 718 res = _mm_mul_ss(m1, m2); 719 res64 = _mm_mulhi_pu16(ms[0], ms[1]); 720 res64 = _m_pmulhuw(ms[0], ms[1]); 721 } 722 723 // CHECK-LABEL: @test_mul 724 725 // CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ps 726 // CHECK: fmul <4 x float> 727 728 // CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ss 729 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 730 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 731 // CHECK: fmul <4 x float> 732 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 733 734 // CHECK-LABEL: define available_externally i64 @_mm_mulhi_pu16 735 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 736 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 737 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 738 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 739 // CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 740 // CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 741 // CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16]) 742 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 743 744 // CHECK-LABEL: define available_externally i64 @_m_pmulhuw 745 // CHECK: call i64 @_mm_mulhi_pu16 746 747 void __attribute__((noinline)) 748 test_prefetch() { 749 _mm_prefetch(ms, _MM_HINT_NTA); 750 } 751 752 // CHECK-LABEL: @test_prefetch 753 754 // CHECK-LABEL: define available_externally void @_mm_prefetch 755 // CHECK: call void @llvm.prefetch.p0(ptr %{{[0-9a-zA-Z_.]+}}, i32 0, i32 3, i32 1) 756 757 void __attribute__((noinline)) 758 test_rcp() { 759 res = _mm_rcp_ps(m1); 760 res = _mm_rcp_ss(m1); 761 } 762 763 // CHECK-LABEL: @test_rcp 764 765 // CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ps 766 // CHECK: call <4 x float> @vec_re(float vector[4]) 767 768 // CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ss 769 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int) 770 // CHECK: call <4 x float> @_mm_rcp_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 771 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 772 773 void __attribute__((noinline)) 774 test_rsqrt() { 775 res = _mm_rsqrt_ps(m1); 776 res = _mm_rsqrt_ss(m1); 777 } 778 779 // CHECK-LABEL: @test_rsqrt 780 781 // CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ps 782 // CHECK: call <4 x float> @vec_rsqrte(float vector[4]) 783 784 // CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ss 785 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 786 // CHECK: call <4 x float> @vec_rsqrte(float vector[4]) 787 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 788 789 void __attribute__((noinline)) 790 test_sad() { 791 res64 = _mm_sad_pu8(ms[0], ms[1]); 792 res64 = _m_psadbw(ms[0], ms[1]); 793 } 794 795 // CHECK-LABEL: @test_sad 796 797 // CHECK-LABEL: define available_externally i64 @_mm_sad_pu8 798 // CHECK: call void @llvm.memset.p0.i64(ptr align 8 %{{[0-9a-zA-Z_.]+}}, i8 0, i64 8, i1 false) 799 // CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1 800 // CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1 801 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16]) 802 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16]) 803 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16]) 804 // CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer) 805 // CHECK: call <4 x i32> @vec_sums(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer) 806 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 3 807 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[EXT]] to i16 808 // CHECK: %[[GEP:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0 809 // CHECK: store i16 %[[TRUNC]], ptr %[[GEP]], align 8 810 811 // CHECK-LABEL: define available_externally i64 @_m_psadbw 812 // CHECK: call i64 @_mm_sad_pu8 813 814 void __attribute__((noinline)) 815 test_set() { 816 res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]); 817 res = _mm_set_ps1(fs[0]); 818 res = _mm_set_ss(fs[0]); 819 res = _mm_set1_ps(fs[0]); 820 res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]); 821 } 822 823 // CHECK-LABEL: @test_set 824 825 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps 826 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0 827 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1 828 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2 829 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3 830 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16 831 832 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps1 833 // CHECK: call <4 x float> @_mm_set1_ps 834 835 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ss 836 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0 837 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float 0.000000e+00, i32 1 838 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float 0.000000e+00, i32 2 839 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float 0.000000e+00, i32 3 840 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16 841 842 // CHECK-LABEL: define available_externally <4 x float> @_mm_set1_ps 843 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0 844 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1 845 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2 846 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3 847 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16 848 849 // CHECK-LABEL: define available_externally <4 x float> @_mm_setr_ps 850 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0 851 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1 852 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2 853 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3 854 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16 855 856 void __attribute__((noinline)) 857 test_setzero() { 858 res = _mm_setzero_ps(); 859 } 860 861 // CHECK-LABEL: @test_setzero 862 863 // CHECK-LABEL: define available_externally <4 x float> @_mm_setzero_ps 864 // CHECK: store <4 x float> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 865 866 void __attribute__((noinline)) 867 test_sfence() { 868 _mm_sfence(); 869 } 870 871 // CHECK-LABEL: @test_sfence 872 873 // CHECK-LABEL: define available_externally void @_mm_sfence 874 // CHECK: fence release 875 876 void __attribute__((noinline)) 877 test_shuffle() { 878 res64 = _mm_shuffle_pi16(ms[0], i); 879 res = _mm_shuffle_ps(m1, m2, i); 880 res64 = _m_pshufw(ms[0], i); 881 } 882 883 // CHECK-LABEL: @test_shuffle 884 885 // CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi16 886 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3 887 // CHECK: sext i32 %[[AND]] to i64 888 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2 889 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 890 // CHECK: sext i32 %[[AND2]] to i64 891 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4 892 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3 893 // CHECK: sext i32 %[[AND3]] to i64 894 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 895 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 896 // CHECK: sext i32 %[[AND4]] to i64 897 // CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 898 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0 899 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3 900 // CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 901 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1 902 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2 903 // CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 904 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2 905 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1 906 // CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 907 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3 908 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0 909 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 910 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 911 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16]) 912 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 913 914 // CHECK-LABEL: define available_externally <4 x float> @_mm_shuffle_ps 915 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3 916 // CHECK: sext i32 %[[AND]] to i64 917 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2 918 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 919 // CHECK: sext i32 %[[AND2]] to i64 920 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4 921 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3 922 // CHECK: sext i32 %[[AND3]] to i64 923 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 924 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 925 // CHECK: sext i32 %[[AND4]] to i64 926 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 927 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0 928 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 929 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1 930 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 931 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 932 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2 933 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 934 // CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 935 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3 936 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16]) 937 938 // CHECK-LABEL: define available_externally i64 @_m_pshufw 939 // CHECK: call i64 @_mm_shuffle_pi16 940 941 void __attribute__((noinline)) 942 test_sqrt() { 943 res = _mm_sqrt_ps(m1); 944 res = _mm_sqrt_ss(m1); 945 } 946 947 // CHECK-LABEL: @test_sqrt 948 949 // CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ps 950 // CHECK: call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) 951 952 // CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ss 953 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 954 // CHECK: call <4 x float> @vec_sqrt(float vector[4]) 955 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 956 957 void __attribute__((noinline)) 958 test_store() { 959 _mm_store_ps(fs, m1); 960 _mm_store_ps1(fs, m1); 961 _mm_store_ss(fs, m1); 962 _mm_store1_ps(fs, m1); 963 _mm_storeh_pi(ms, m1); 964 _mm_storel_pi(ms, m1); 965 _mm_storer_ps(fs, m1); 966 } 967 968 // CHECK-LABEL: @test_store 969 970 // CHECK-LABEL: define available_externally void @_mm_store_ps 971 // CHECK: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}}) 972 973 // CHECK-LABEL: define available_externally void @_mm_store_ps1 974 // CHECK: call void @_mm_store1_ps 975 976 // CHECK-LABEL: define available_externally void @_mm_store_ss 977 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 978 // CHECK: store float %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 4 979 980 // CHECK-LABEL: define available_externally void @_mm_store1_ps 981 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 982 // CHECK: call void @_mm_store_ps 983 984 // CHECK-LABEL: define available_externally void @_mm_storeh_pi 985 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1 986 // CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8 987 988 // CHECK-LABEL: define available_externally void @_mm_storel_pi 989 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 990 // CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8 991 992 // CHECK-LABEL: define available_externally void @_mm_storer_ps 993 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>) 994 // CHECK: call void @_mm_store_ps 995 996 void __attribute__((noinline)) 997 test_stream() { 998 _mm_stream_pi(&res64, ms[0]); 999 _mm_stream_ps(&fs[0], m1); 1000 } 1001 1002 // CHECK-LABEL: @test_stream 1003 1004 // CHECK-LABEL: define available_externally void @_mm_stream_pi 1005 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1006 1007 // CHECK-LABEL: define available_externally void @_mm_stream_ps 1008 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1009 // CHECK: call void @_mm_store_ps 1010 1011 void __attribute__((noinline)) 1012 test_sub() { 1013 res = _mm_sub_ps(m1, m2); 1014 res = _mm_sub_ss(m1, m2); 1015 } 1016 1017 // CHECK-LABEL: @test_sub 1018 1019 // CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ps 1020 // CHECK: fsub <4 x float> 1021 1022 // CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ss 1023 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1024 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1025 // CHECK: fsub <4 x float> 1026 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>) 1027 1028 void __attribute__((noinline)) 1029 test_transpose() { 1030 __m128 m3, m4; 1031 _MM_TRANSPOSE4_PS(m1, m2, m3, m4); 1032 } 1033 1034 // CHECK-LABEL: @test_transpose 1035 1036 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4]) 1037 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4]) 1038 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4]) 1039 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4]) 1040 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2]) 1041 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2]) 1042 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2]) 1043 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2]) 1044 1045 void __attribute__((noinline)) 1046 test_ucomi() { 1047 i = _mm_ucomieq_ss(m1, m2); 1048 i = _mm_ucomige_ss(m1, m2); 1049 i = _mm_ucomigt_ss(m1, m2); 1050 i = _mm_ucomile_ss(m1, m2); 1051 i = _mm_ucomilt_ss(m1, m2); 1052 i = _mm_ucomineq_ss(m1, m2); 1053 } 1054 1055 // CHECK-LABEL: @test_ucomi 1056 1057 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_ss 1058 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1059 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1060 // CHECK: fcmp oeq float %[[VAL1]], %[[VAL2]] 1061 1062 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_ss 1063 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1064 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1065 // CHECK: fcmp oge float %[[VAL1]], %[[VAL2]] 1066 1067 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_ss 1068 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1069 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1070 // CHECK: fcmp ogt float %[[VAL1]], %[[VAL2]] 1071 1072 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_ss 1073 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1074 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1075 // CHECK: fcmp ole float %[[VAL1]], %[[VAL2]] 1076 1077 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_ss 1078 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1079 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1080 // CHECK: fcmp olt float %[[VAL1]], %[[VAL2]] 1081 1082 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_ss 1083 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1084 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 1085 // CHECK: fcmp une float %[[VAL1]], %[[VAL2]] 1086 1087 void __attribute__((noinline)) 1088 test_undefined() { 1089 res = _mm_undefined_ps(); 1090 } 1091 1092 // CHECK-LABEL: @test_undefined 1093 1094 // CHECK-LABEL: define available_externally <4 x float> @_mm_undefined_ps 1095 // CHECK: alloca <4 x float>, align 16 1096 // CHECK: load <4 x float>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16 1097 // CHECK: load <4 x float>, ptr %[[ADDR]], align 16 1098 1099 void __attribute__((noinline)) 1100 test_unpack() { 1101 res = _mm_unpackhi_ps(m1, m2); 1102 res = _mm_unpacklo_ps(m1, m2); 1103 } 1104 1105 // CHECK-LABEL: @test_unpack 1106 1107 // CHECK-LABEL: define available_externally <4 x float> @_mm_unpackhi_ps 1108 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4]) 1109 1110 // CHECK-LABEL: define available_externally <4 x float> @_mm_unpacklo_ps 1111 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4]) 1112