1 // REQUIRES: powerpc-registered-target 2 3 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 4 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE 5 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 6 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE 7 8 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 9 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10 10 11 // RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 12 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only 13 // RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 14 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only 15 16 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 17 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE 18 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ 19 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10 20 21 // CHECK-BE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16 22 // CHECK-BE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4 23 // CHECK-BE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2057, i16 2571, i16 3085, i16 3599], align 2 24 // CHECK-BE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 1, i16 515, i16 1029, i16 1543], align 2 25 26 // CHECK-LE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144>, align 16 27 // CHECK-LE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4 28 // CHECK-LE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2 29 // CHECK-LE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 256, i16 770, i16 1284, i16 1798], align 2 30 31 #include <emmintrin.h> 32 33 __m128i resi, mi1, mi2; 34 __m128i *mip; 35 double dp[2]; 36 __m128d resd, md1, md2; 37 __m64 res64, m641, m642; 38 __m128 res, m1; 39 int i; 40 char chs[16]; 41 int is[4]; 42 short ss[8]; 43 long long i64s[2]; 44 45 void __attribute__((noinline)) 46 test_add() { 47 resi = _mm_add_epi64(mi1, mi2); 48 resi = _mm_add_epi32(mi1, mi2); 49 resi = _mm_add_epi16(mi1, mi2); 50 resi = _mm_add_epi8(mi1, mi2); 51 resd = _mm_add_pd(md1, md2); 52 resd = _mm_add_sd(md1, md2); 53 res64 = _mm_add_si64(m641, m642); 54 resi = _mm_adds_epi16(mi1, mi2); 55 resi = _mm_adds_epi8(mi1, mi2); 56 resi = _mm_adds_epu16(mi1, mi2); 57 resi = _mm_adds_epu8(mi1, mi2); 58 } 59 60 // CHECK-LABEL: @test_add 61 62 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi64 63 // CHECK: add <2 x i64> 64 65 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi32 66 // CHECK: add <4 x i32> 67 68 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi16 69 // CHECK: add <8 x i16> 70 71 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi8 72 // CHECK: add <16 x i8> 73 74 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_pd 75 // CHECK: fadd <2 x double> 76 77 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_sd 78 // CHECK: fadd double 79 80 // CHECK-LABEL: define available_externally i64 @_mm_add_si64 81 // CHECK: add i64 82 83 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi16 84 // CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8]) 85 86 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi8 87 // CHECK: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16]) 88 89 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu16 90 // CHECK: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8]) 91 92 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu8 93 // CHECK: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16]) 94 95 void __attribute__((noinline)) 96 test_avg() { 97 resi = _mm_avg_epu16(mi1, mi2); 98 resi = _mm_avg_epu8(mi1, mi2); 99 } 100 101 // CHECK-LABEL: @test_avg 102 103 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu16 104 // CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8]) 105 106 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu8 107 // CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16]) 108 109 void __attribute__((noinline)) 110 test_bs() { 111 resi = _mm_bslli_si128(mi1, i); 112 resi = _mm_bsrli_si128(mi1, i); 113 } 114 115 // CHECK-LABEL: @test_bs 116 117 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bslli_si128 118 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 119 // CHECK: br i1 %[[CMP]] 120 // CHECK: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}}) 121 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 122 123 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bsrli_si128 124 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 125 // CHECK: br i1 %[[CMP]] 126 // CHECK-LE: call i1 @llvm.is.constant 127 // CHECK-LE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}} 128 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]]) 129 // CHECK-LE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8 130 // CHECK-LE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8 131 // CHECK-LE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]]) 132 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16]) 133 // CHECK-LE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 134 // CHECK-BE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8 135 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8 136 // CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]]) 137 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16]) 138 // CHECK-BE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 139 140 void __attribute__((noinline)) 141 test_cast() { 142 res = _mm_castpd_ps(md1); 143 resi = _mm_castpd_si128(md1); 144 resd = _mm_castps_pd(m1); 145 resi = _mm_castps_si128(m1); 146 resd = _mm_castsi128_pd(mi1); 147 res = _mm_castsi128_ps(mi1); 148 } 149 150 // CHECK-LABEL: @test_cast 151 152 // CHECK-LABEL: define available_externally <4 x float> @_mm_castpd_ps 153 154 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castpd_si128 155 156 // CHECK-LABEL: define available_externally <2 x double> @_mm_castps_pd 157 158 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castps_si128 159 160 // CHECK-LABEL: define available_externally <2 x double> @_mm_castsi128_pd 161 162 // CHECK-LABEL: define available_externally <4 x float> @_mm_castsi128_ps 163 164 void __attribute__((noinline)) 165 test_cmp() { 166 resi = _mm_cmpeq_epi32(mi1, mi2); 167 resi = _mm_cmpeq_epi16(mi1, mi2); 168 resi = _mm_cmpeq_epi8(mi1, mi2); 169 resi = _mm_cmpgt_epi32(mi1, mi2); 170 resi = _mm_cmpgt_epi16(mi1, mi2); 171 resi = _mm_cmpgt_epi8(mi1, mi2); 172 resi = _mm_cmplt_epi32(mi1, mi2); 173 resi = _mm_cmplt_epi16(mi1, mi2); 174 resi = _mm_cmplt_epi8(mi1, mi2); 175 resd = _mm_cmpeq_pd(md1, md2); 176 resd = _mm_cmpeq_sd(md1, md2); 177 resd = _mm_cmpge_pd(md1, md2); 178 resd = _mm_cmpge_sd(md1, md2); 179 resd = _mm_cmpgt_pd(md1, md2); 180 resd = _mm_cmpgt_sd(md1, md2); 181 resd = _mm_cmple_pd(md1, md2); 182 resd = _mm_cmple_sd(md1, md2); 183 resd = _mm_cmplt_pd(md1, md2); 184 resd = _mm_cmplt_sd(md1, md2); 185 resd = _mm_cmpneq_pd(md1, md2); 186 resd = _mm_cmpneq_sd(md1, md2); 187 resd = _mm_cmpnge_pd(md1, md2); 188 resd = _mm_cmpnge_sd(md1, md2); 189 resd = _mm_cmpngt_pd(md1, md2); 190 resd = _mm_cmpngt_sd(md1, md2); 191 resd = _mm_cmpnle_pd(md1, md2); 192 resd = _mm_cmpnle_sd(md1, md2); 193 resd = _mm_cmpnlt_pd(md1, md2); 194 resd = _mm_cmpnlt_sd(md1, md2); 195 resd = _mm_cmpord_pd(md1, md2); 196 resd = _mm_cmpord_sd(md1, md2); 197 resd = _mm_cmpunord_pd(md1, md2); 198 resd = _mm_cmpunord_sd(md1, md2); 199 } 200 201 // CHECK-LABEL: @test_cmp 202 203 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi32 204 // CHECK: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4]) 205 206 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi16 207 // CHECK: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8]) 208 209 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi8 210 // CHECK: call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16]) 211 212 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi32 213 // CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4]) 214 215 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi16 216 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8]) 217 218 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi8 219 // CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16]) 220 221 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi32 222 // CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4]) 223 224 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi16 225 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8]) 226 227 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi8 228 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16]) 229 230 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_pd 231 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 232 233 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_sd 234 // CHECK: call <2 x double> @vec_splats(double) 235 // CHECK: call <2 x double> @vec_splats(double) 236 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 237 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 238 239 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_pd 240 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2]) 241 242 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_sd 243 // CHECK: call <2 x double> @vec_splats(double) 244 // CHECK: call <2 x double> @vec_splats(double) 245 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2]) 246 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 247 248 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_pd 249 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2]) 250 251 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_sd 252 // CHECK: call <2 x double> @vec_splats(double) 253 // CHECK: call <2 x double> @vec_splats(double) 254 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2]) 255 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 256 257 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_pd 258 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2]) 259 260 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_sd 261 // CHECK: call <2 x double> @vec_splats(double) 262 // CHECK: call <2 x double> @vec_splats(double) 263 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2]) 264 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 265 266 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_pd 267 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2]) 268 269 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_sd 270 // CHECK: call <2 x double> @vec_splats(double) 271 // CHECK: call <2 x double> @vec_splats(double) 272 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2]) 273 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 274 275 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_pd 276 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 277 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2]) 278 279 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_sd 280 // CHECK: call <2 x double> @vec_splats(double) 281 // CHECK: call <2 x double> @vec_splats(double) 282 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 283 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2]) 284 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 285 286 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_pd 287 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2]) 288 289 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_sd 290 // CHECK: call <2 x double> @vec_splats(double) 291 // CHECK: call <2 x double> @vec_splats(double) 292 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2]) 293 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 294 295 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_pd 296 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2]) 297 298 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_sd 299 // CHECK: call <2 x double> @vec_splats(double) 300 // CHECK: call <2 x double> @vec_splats(double) 301 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2]) 302 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 303 304 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_pd 305 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2]) 306 307 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_sd 308 // CHECK: call <2 x double> @vec_splats(double) 309 // CHECK: call <2 x double> @vec_splats(double) 310 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2]) 311 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 312 313 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_pd 314 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2]) 315 316 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_sd 317 // CHECK: call <2 x double> @vec_splats(double) 318 // CHECK: call <2 x double> @vec_splats(double) 319 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2]) 320 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 321 322 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_pd 323 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 324 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 325 // CHECK: call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2]) 326 327 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_sd 328 // CHECK: call <2 x double> @vec_splats(double) 329 // CHECK: call <2 x double> @vec_splats(double) 330 // CHECK: call <2 x double> @_mm_cmpord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 331 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 332 333 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_pd 334 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 335 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2]) 336 // CHECK: call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2]) 337 // CHECK: call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2]) 338 339 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_sd 340 // CHECK: call <2 x double> @vec_splats(double) 341 // CHECK: call <2 x double> @vec_splats(double) 342 // CHECK: call <2 x double> @_mm_cmpunord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 343 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 344 345 void __attribute__((noinline)) 346 test_comi() { 347 i = _mm_comieq_sd(md1, md2); 348 i = _mm_comige_sd(md1, md2); 349 i = _mm_comigt_sd(md1, md2); 350 i = _mm_comile_sd(md1, md2); 351 i = _mm_comilt_sd(md1, md2); 352 i = _mm_comineq_sd(md1, md2); 353 } 354 355 // CHECK-LABEL: @test_comi 356 357 // CHECK-LABEL: define available_externally signext i32 @_mm_comieq_sd 358 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq double 359 // CHECK: zext i1 %[[CMP]] to i32 360 361 // CHECK-LABEL: define available_externally signext i32 @_mm_comige_sd 362 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge double 363 // CHECK: zext i1 %[[CMP]] to i32 364 365 // CHECK-LABEL: define available_externally signext i32 @_mm_comigt_sd 366 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt double 367 // CHECK: zext i1 %[[CMP]] to i32 368 369 // CHECK-LABEL: define available_externally signext i32 @_mm_comile_sd 370 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole double 371 // CHECK: zext i1 %[[CMP]] to i32 372 373 // CHECK-LABEL: define available_externally signext i32 @_mm_comilt_sd 374 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt double 375 // CHECK: zext i1 %[[CMP]] to i32 376 377 // CHECK-LABEL: define available_externally signext i32 @_mm_comineq_sd 378 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une double 379 // CHECK: zext i1 %[[CMP]] to i32 380 381 void __attribute__((noinline)) 382 test_control() { 383 _mm_clflush(dp); 384 _mm_lfence(); 385 _mm_mfence(); 386 _mm_pause(); 387 } 388 389 // CHECK-LABEL: @test_control 390 391 // CHECK-LABEL: define available_externally void @_mm_clflush 392 // CHECK: call void asm sideeffect "dcbf 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 393 394 // CHECK-LABEL: define available_externally void @_mm_lfence() 395 // CHECK: fence release 396 397 // CHECK-LABEL: define available_externally void @_mm_mfence() 398 // CHECK: fence seq_cst 399 400 // CHECK-LABEL: define available_externally void @_mm_pause() 401 // CHECK: call i64 asm sideeffect "\09mfppr\09$0; or 31,31,31; isync; lwsync; isync; mtppr\09$0;", "=r,~{memory}"() 402 403 void __attribute__((noinline)) 404 test_converts() { 405 resd = _mm_cvtepi32_pd(mi1); 406 res = _mm_cvtepi32_ps(mi1); 407 resi = _mm_cvtpd_epi32(md1); 408 res64 = _mm_cvtpd_pi32(md1); 409 res = _mm_cvtpd_ps(md1); 410 resd = _mm_cvtpi32_pd(res64); 411 resi = _mm_cvtps_epi32(m1); 412 resd = _mm_cvtps_pd(m1); 413 *dp = _mm_cvtsd_f64(md1); 414 i = _mm_cvtsd_si32(md1); 415 i64s[0] = _mm_cvtsd_si64(md1); 416 i64s[0] = _mm_cvtsd_si64x(md1); 417 res = _mm_cvtsd_ss(m1, md2); 418 i = _mm_cvtsi128_si32(mi1); 419 i64s[0] = _mm_cvtsi128_si64(mi1); 420 i64s[0] = _mm_cvtsi128_si64x(mi1); 421 resd = _mm_cvtsi32_sd(md1, i); 422 resi = _mm_cvtsi32_si128(i); 423 resd = _mm_cvtsi64_sd(md1, i64s[1]); 424 resi = _mm_cvtsi64_si128(i64s[1]); 425 resd = _mm_cvtsi64x_sd(md1, i64s[1]); 426 resi = _mm_cvtsi64x_si128(i64s[1]); 427 resd = _mm_cvtss_sd(md1, m1); 428 resi = _mm_cvttpd_epi32(md1); 429 res64 = _mm_cvttpd_pi32(md1); 430 resi = _mm_cvttps_epi32(m1); 431 i = _mm_cvttsd_si32(md1); 432 i64s[0] = _mm_cvttsd_si64(md1); 433 i64s[0] = _mm_cvttsd_si64x(md1); 434 } 435 436 // CHECK-LABEL: @test_converts 437 438 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd 439 // CHECK: call <2 x i64> @vec_unpackh(int vector[4]) 440 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double> 441 // CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) 442 443 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps 444 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) 445 446 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtpd_epi32 447 // CHECK: call <2 x double> @vec_rint(double vector[2]) 448 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 449 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}}) 450 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4]) 451 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4]) 452 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer) 453 454 // CHECK-LABEL: define available_externally i64 @_mm_cvtpd_pi32 455 // CHECK: call <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 456 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 457 458 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpd_ps 459 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 460 // CHECK: call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}}) 461 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4]) 462 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4]) 463 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer) 464 465 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtpi32_pd 466 // CHECK: call <2 x i64> @vec_splats(unsigned long long) 467 // CHECK: call <2 x i64> @vec_unpackl(int vector[4]) 468 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double> 469 // CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) 470 471 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32 472 // CHECK: call <4 x float> @vec_rint(float vector[4]) 473 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 474 475 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtps_pd 476 // CHECK-BE: call <4 x float> @vec_vmrghw(float vector[4], float vector[4]) 477 // CHECK-BE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}}) 478 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0> 479 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 480 // CHECK-LE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}}) 481 482 // CHECK-LABEL: define available_externally double @_mm_cvtsd_f64 483 // CHECK: extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0 484 485 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsd_si32 486 // CHECK: call <2 x double> @vec_rint(double vector[2]) 487 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32 488 489 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64 490 // CHECK: call <2 x double> @vec_rint(double vector[2]) 491 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64 492 493 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64x 494 // CHECK: call i64 @_mm_cvtsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 495 496 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsd_ss 497 // CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0 498 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = fptrunc double %[[EXT]] to float 499 // CHECK-BE: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %[[TRUNC]], i32 0 500 // CHECK-LE: call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 501 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0> 502 // CHECK-LE: call <4 x float> asm "xscvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}}) 503 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 504 505 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsi128_si32 506 // CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0 507 508 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64 509 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 510 511 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64x 512 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 513 514 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi32_sd 515 // CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to double 516 517 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi32_si128 518 // CHECK: call <2 x i64> @_mm_set_epi32(i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) 519 520 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64_sd 521 // CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to double 522 523 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64_si128 524 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0 525 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1 526 527 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64x_sd 528 // CHECK: call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}}) 529 530 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64x_si128 531 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0 532 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1 533 534 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtss_sd 535 // CHECK-BE: fpext float %{{[0-9a-zA-Z_.]+}} to double 536 // CHECK-LE: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 537 // CHECK-LE: call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}}) 538 // CHECK-LE: call <2 x double> @vec_mergel(double vector[2], double vector[2]) 539 540 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttpd_epi32 541 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa" 542 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4]) 543 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4]) 544 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer) 545 546 // CHECK-LABEL: define available_externally i64 @_mm_cvttpd_pi32 547 // CHECK: call <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 548 549 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttps_epi32 550 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0) 551 552 // CHECK-LABEL: define available_externally signext i32 @_mm_cvttsd_si32 553 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32 554 555 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64 556 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64 557 558 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64x 559 // CHECK: call i64 @_mm_cvttsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 560 561 void __attribute__((noinline)) 562 test_div() { 563 resd = _mm_div_pd(md1, md2); 564 resd = _mm_div_sd(md1, md2); 565 } 566 567 // CHECK-LABEL: @test_div 568 569 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_pd 570 // CHECK: fdiv <2 x double> 571 572 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_sd 573 // CHECK: fdiv double 574 575 void __attribute__((noinline)) 576 test_extract() { 577 i = _mm_extract_epi16(mi1, i); 578 } 579 580 // CHECK-LABEL: @test_extract 581 582 // CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi16 583 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 7 584 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <8 x i16> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]] 585 // CHECK: zext i16 %[[EXT]] to i32 586 587 void __attribute__((noinline)) 588 test_insert() { 589 resi = _mm_insert_epi16 (mi1, i, is[0]); 590 } 591 592 // CHECK-LABEL: @test_insert 593 594 // CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi16 595 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i16 596 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 7 597 598 void __attribute__((noinline)) 599 test_load() { 600 resd = _mm_load_pd(dp); 601 resd = _mm_load_pd1(dp); 602 resd = _mm_load_sd(dp); 603 resi = _mm_load_si128(mip); 604 resd = _mm_load1_pd(dp); 605 resd = _mm_loadh_pd(md1, dp); 606 resi = _mm_loadl_epi64(mip); 607 resd = _mm_loadl_pd(md1, dp); 608 resd = _mm_loadr_pd(dp); 609 resd = _mm_loadu_pd(dp); 610 resi = _mm_loadu_si128(mip); 611 } 612 613 // CHECK-LABEL: @test_load 614 615 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd 616 // CHECK: call <16 x i8> @vec_ld(long, unsigned char vector[16] const*)(i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}}) 617 618 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd1 619 // CHECK: call <2 x double> @_mm_load1_pd(ptr noundef %{{[0-9a-zA-Z_.]+}}) 620 621 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_sd 622 // CHECK: call <2 x double> @_mm_set_sd(double noundef %{{[0-9a-zA-Z_.]+}}) 623 624 // CHECK-LABEL: define available_externally <2 x i64> @_mm_load_si128 625 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 626 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16 627 628 // CHECK-LABEL: define available_externally <2 x double> @_mm_load1_pd 629 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 630 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]] 631 // CHECK: call <2 x double> @vec_splats(double)(double noundef %[[VAL]]) 632 633 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadh_pd 634 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 635 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %{{[0-9a-zA-Z_.]+}} 636 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 637 // CHECK: insertelement <2 x double> %[[VEC]], double %[[VAL]], i32 1 638 639 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadl_epi64 640 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}}) 641 642 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadl_pd 643 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 644 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]] 645 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 646 // CHECK: insertelement <2 x double> %[[VEC]], double %[[ADDR2]], i32 0 647 648 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadr_pd 649 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 650 // CHECK: call <2 x double> @_mm_load_pd(ptr noundef %[[ADDR]]) 651 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2> 652 653 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadu_pd 654 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 655 // CHECK: call <2 x double> @vec_vsx_ld(int, double const*)(i32 noundef signext 0, ptr noundef %[[ADDR]]) 656 657 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadu_si128 658 // CHECK: load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 659 // CHECK: call <4 x i32> @vec_vsx_ld(int, int const*)(i32 noundef signext 0, ptr noundef %{{[0-9a-zA-Z_.]+}}) 660 661 void __attribute__((noinline)) 662 test_logical() { 663 resd = _mm_and_pd(md1, md2); 664 resi = _mm_and_si128(mi1, mi2); 665 resd = _mm_andnot_pd(md1, md2); 666 resi = _mm_andnot_si128(mi1, mi2); 667 resd = _mm_xor_pd(md1, md2); 668 resi = _mm_xor_si128(mi1, mi2); 669 resd = _mm_or_pd(md1, md2); 670 resi = _mm_or_si128(mi1, mi2); 671 } 672 673 // CHECK-LABEL: @test_logical 674 675 // CHECK-LABEL: define available_externally <2 x double> @_mm_and_pd 676 // CHECK: call <2 x double> @vec_and(double vector[2], double vector[2]) 677 678 // CHECK-LABEL: define available_externally <2 x i64> @_mm_and_si128 679 // CHECK: call <2 x i64> @vec_and(long long vector[2], long long vector[2]) 680 681 // CHECK-LABEL: define available_externally <2 x double> @_mm_andnot_pd 682 // CHECK: call <2 x double> @vec_andc(double vector[2], double vector[2]) 683 684 // CHECK-LABEL: define available_externally <2 x i64> @_mm_andnot_si128 685 // CHECK: call <2 x i64> @vec_andc(long long vector[2], long long vector[2]) 686 687 // CHECK-LABEL: define available_externally <2 x double> @_mm_xor_pd 688 // CHECK: call <2 x double> @vec_xor(double vector[2], double vector[2]) 689 690 // CHECK-LABEL: define available_externally <2 x i64> @_mm_xor_si128 691 // CHECK: call <2 x i64> @vec_xor(long long vector[2], long long vector[2]) 692 693 // CHECK-LABEL: define available_externally <2 x double> @_mm_or_pd 694 // CHECK: call <2 x double> @vec_or(double vector[2], double vector[2]) 695 696 // CHECK-LABEL: define available_externally <2 x i64> @_mm_or_si128 697 // CHECK: call <2 x i64> @vec_or(long long vector[2], long long vector[2]) 698 699 void __attribute__((noinline)) 700 test_max() { 701 resi = _mm_max_epi16(mi1, mi2); 702 resi = _mm_max_epu8(mi1, mi2); 703 resd = _mm_max_pd(md1, md2); 704 resd = _mm_max_sd(md1, md2); 705 } 706 707 // CHECK-LABEL: @test_max 708 709 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epi16 710 // CHECK: call <8 x i16> @vec_max(short vector[8], short vector[8]) 711 712 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epu8 713 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16]) 714 715 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_pd 716 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2]) 717 718 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_sd 719 // CHECK: call <2 x double> @vec_splats(double) 720 // CHECK: call <2 x double> @vec_splats(double) 721 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2]) 722 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 723 724 void __attribute__((noinline)) 725 test_min() { 726 resi = _mm_min_epi16(mi1, mi2); 727 resi = _mm_min_epu8(mi1, mi2); 728 resd = _mm_min_pd(md1, md2); 729 resd = _mm_min_sd(md1, md2); 730 } 731 732 // CHECK-LABEL: @test_min 733 734 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epi16 735 // CHECK: call <8 x i16> @vec_min(short vector[8], short vector[8]) 736 737 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epu8 738 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16]) 739 740 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_pd 741 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2]) 742 743 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_sd 744 // CHECK: call <2 x double> @vec_splats(double) 745 // CHECK: call <2 x double> @vec_splats(double) 746 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2]) 747 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 748 749 void __attribute__((noinline)) 750 test_move() { 751 resi = _mm_move_epi64(mi1); 752 resd = _mm_move_sd(md1, md2); 753 i = _mm_movemask_epi8(mi1); 754 i = _mm_movemask_pd(md1); 755 res64 = _mm_movepi64_pi64(mi1); 756 resi = _mm_movpi64_epi64(m641); 757 _mm_maskmoveu_si128(mi1, mi2, chs); 758 } 759 760 // CHECK-LABEL: @test_move 761 762 // CHECK-LABEL: define available_externally <2 x i64> @_mm_move_epi64 763 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}}) 764 765 // CHECK-LABEL: define available_externally <2 x double> @_mm_move_sd 766 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0 767 // CHECK: insertelement <2 x double> %{{[0-9a-zA-Z_.]+}}, double %[[EXT]], i32 0 768 769 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_epi8 770 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}) 771 772 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_epi8 773 // CHECK: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 120, i8 112, i8 104, i8 96, i8 88, i8 80, i8 72, i8 64, i8 56, i8 48, i8 40, i8 32, i8 24, i8 16, i8 8, i8 0>) 774 // CHECK-LE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1 775 // CHECK-BE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 776 // CHECK: trunc i64 %[[VAL]] to i32 777 778 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_pd 779 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}) 780 781 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pd 782 // CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>)) 783 // CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1 784 // CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656> to <16 x i8>)) 785 // CHECK-BE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 786 787 // CHECK-LABEL: define available_externally i64 @_mm_movepi64_pi64 788 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0 789 790 // CHECK-LABEL: define available_externally <2 x i64> @_mm_movpi64_epi64 791 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}}) 792 793 // CHECK-LABEL: define available_externally void @_mm_maskmoveu_si128 794 // CHECK: call <2 x i64> @_mm_loadu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}}) 795 // CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16]) 796 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16]) 797 // CHECK: call void @_mm_storeu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}}) 798 799 void __attribute__((noinline)) 800 test_mul() { 801 resi = _mm_mul_epu32(mi1, mi2); 802 resd = _mm_mul_pd(md1, md2); 803 resd = _mm_mul_sd(md1, md2); 804 res64 = _mm_mul_su32(m641, m642); 805 resi = _mm_mulhi_epi16(mi1, mi2); 806 resi = _mm_mulhi_epu16(mi1, mi2); 807 resi = _mm_mullo_epi16(mi1, mi2); 808 } 809 810 // CHECK-LABEL: @test_mul 811 812 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mul_epu32 813 // CHECK-LE: call <2 x i64> asm "vmulouw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}) 814 // CHECK-BE: call <2 x i64> asm "vmuleuw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}) 815 816 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_pd 817 // CHECK: fmul <2 x double> 818 819 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_sd 820 // CHECK: fmul double 821 822 // CHECK-LABEL: define available_externally i64 @_mm_mul_su32 823 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32 824 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32 825 // CHECK: %[[EXT1:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64 826 // CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64 827 // CHECK: mul i64 %[[EXT1]], %[[EXT2]] 828 829 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epi16 830 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 831 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 832 // CHECK: call <4 x i32> @vec_vmulesh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 833 // CHECK: call <4 x i32> @vec_vmulosh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 834 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16]) 835 836 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epu16 837 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 838 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 839 // CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 840 // CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}) 841 // CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16]) 842 843 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mullo_epi16 844 // CHECK: mul <8 x i16> 845 846 void __attribute__((noinline)) 847 test_pack() { 848 resi = _mm_packs_epi16(mi1, mi2); 849 resi = _mm_packs_epi32(mi1, mi2); 850 resi = _mm_packus_epi16(mi1, mi2); 851 } 852 853 // CHECK-LABEL: @test_pack 854 855 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi16 856 // CHECK: call <16 x i8> @vec_packs(short vector[8], short vector[8]) 857 858 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi32 859 // CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4]) 860 861 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packus_epi16 862 // CHECK: call <16 x i8> @vec_packsu(short vector[8], short vector[8]) 863 864 void __attribute__((noinline)) 865 test_sad() { 866 resi = _mm_sad_epu8(mi1, mi2); 867 } 868 869 // CHECK-LABEL: @test_sad 870 871 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sad_epu8 872 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16]) 873 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16]) 874 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16]) 875 // CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer) 876 // CHECK-LE: call <4 x i32> asm "vsum2sws $0,$1,$2", "=v,v,v"(<4 x i32> %11, <4 x i32> zeroinitializer) 877 // CHECK-BE: call <4 x i32> @vec_sum2s(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer) 878 // CHECK-BE: call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int) 879 880 void __attribute__((noinline)) 881 test_set() { 882 resi = _mm_set_epi16(ss[7], ss[6], ss[5], ss[4], ss[3], ss[2], ss[1], ss[0]); 883 resi = _mm_set_epi32(is[3], is[2], is[1], is[0]); 884 resi = _mm_set_epi64(m641, m642); 885 resi = _mm_set_epi64x(i64s[0], i64s[1]); 886 resi = _mm_set_epi8(chs[15], chs[14], chs[13], chs[12], chs[11], chs[10], chs[9], chs[8], chs[7], chs[6], chs[5], chs[4], chs[3], chs[2], chs[1], chs[0]); 887 resd = _mm_set_pd(dp[0], dp[1]); 888 resd = _mm_set_pd1(dp[0]); 889 resd = _mm_set_sd(dp[0]); 890 resi = _mm_set1_epi16(ss[0]); 891 resi = _mm_set1_epi32(i); 892 resi = _mm_set1_epi64(m641); 893 resi = _mm_set1_epi64x(i64s[0]); 894 resi = _mm_set1_epi8(chs[0]); 895 resd = _mm_set1_pd(dp[0]); 896 resi = _mm_setr_epi16(ss[7], ss[6], ss[5], ss[4], ss[3], ss[2], ss[1], ss[0]); 897 resi = _mm_setr_epi32(is[3], is[2], is[1], is[0]); 898 resi = _mm_setr_epi64(m641, m642); 899 resi = _mm_setr_epi8(chs[15], chs[14], chs[13], chs[12], chs[11], chs[10], chs[9], chs[8], chs[7], chs[6], chs[5], chs[4], chs[3], chs[2], chs[1], chs[0]); 900 resd = _mm_setr_pd(dp[0], dp[1]); 901 resd = _mm_setzero_pd(); 902 resi = _mm_setzero_si128(); 903 } 904 905 // CHECK-LABEL: @test_set 906 907 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi16 908 // CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 2 909 // CHECK: insertelement <8 x i16> poison, i16 {{[0-9a-zA-Z_%.]+}}, i32 0 910 // CHECK-COUNT-7: insertelement <8 x i16> {{[0-9a-zA-Z_%.]+}}, i16 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-7]}} 911 912 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi32 913 // CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 4 914 // CHECK: insertelement <4 x i32> poison, i32 {{[0-9a-zA-Z_%.]+}}, i32 0 915 // CHECK-COUNT-3: insertelement <4 x i32> {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-3]}} 916 917 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64 918 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}}) 919 920 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64x 921 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0 922 // CHECK: insertelement <2 x i64> %[[VEC]], i64 %{{[0-9a-zA-Z_.]+}}, i32 1 923 924 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi8 925 // CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 1 926 // CHECK: insertelement <16 x i8> poison, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}} 927 // CHECK-COUNT-15: {{[0-9a-zA-Z_%.]+}} = insertelement <16 x i8> {{[0-9a-zA-Z_%.]+}}, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}} 928 929 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd 930 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0 931 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1 932 933 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd1 934 // CHECK: call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}}) 935 936 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_sd 937 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0 938 // CHECK: insertelement <2 x double> %[[VEC]], double 0.000000e+00, i32 1 939 940 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi16 941 // CHECK-COUNT-8: load i16, ptr %{{[0-9a-zA-Z_.]+}}, align 2 942 // CHECK: call <2 x i64> @_mm_set_epi16 943 944 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi32 945 // CHECK-COUNT-4: load i32, ptr %{{[0-9a-zA-Z_.]+}}, align 4 946 // CHECK: call <2 x i64> @_mm_set_epi32 947 948 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64 949 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 950 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 951 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]]) 952 953 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64x 954 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 955 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 956 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]]) 957 958 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi8 959 // CHECK-COUNT-16: load i8, ptr %{{[0-9a-zA-Z_.]+}}, align 1 960 // CHECK: call <2 x i64> @_mm_set_epi8 961 962 // CHECK-LABEL: define available_externally <2 x double> @_mm_set1_pd 963 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0 964 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1 965 966 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi16 967 // CHECK-COUNT-8: load i16, ptr {{[0-9a-zA-Z_%.]+}}, align 2 968 // CHECK: call <2 x i64> @_mm_set_epi16 969 970 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi32 971 // CHECK-COUNT-4: load i32, ptr {{[0-9a-zA-Z_%.]+}}, align 4 972 // CHECK: call <2 x i64> @_mm_set_epi32 973 974 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi64 975 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 976 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8 977 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]]) 978 979 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi8 980 // CHECK-COUNT-16: load i8, ptr {{[0-9a-zA-Z_%.]+}}, align 1 981 // CHECK: call <2 x i64> @_mm_set_epi8 982 983 // CHECK-LABEL: define available_externally <2 x double> @_mm_setr_pd 984 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0 985 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1 986 987 // CHECK-LABEL: define available_externally <2 x double> @_mm_setzero_pd() 988 // CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 0) 989 990 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setzero_si128() 991 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 992 993 void __attribute__((noinline)) 994 test_shuffle() { 995 resi = _mm_shuffle_epi32(mi1, i); 996 resd = _mm_shuffle_pd(md1, md2, i); 997 resi = _mm_shufflehi_epi16(mi1, i); 998 resi = _mm_shufflelo_epi16(mi1, i); 999 } 1000 1001 // CHECK-LABEL: @test_shuffle 1002 1003 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi32 1004 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3 1005 // CHECK: sext i32 %[[AND]] to i64 1006 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2 1007 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 1008 // CHECK: sext i32 %[[AND2]] to i64 1009 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4 1010 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3 1011 // CHECK: sext i32 %[[AND3]] to i64 1012 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 1013 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 1014 // CHECK: sext i32 %[[AND4]] to i64 1015 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 1016 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0 1017 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 1018 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1 1019 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 1020 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 1021 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2 1022 // CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} 1023 // CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 1024 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16]) 1025 1026 // CHECK-LABEL: define available_externally <2 x double> @_mm_shuffle_pd 1027 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3 1028 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 0 1029 // CHECK: br i1 %[[CMP]] 1030 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2]) 1031 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 1 1032 // CHECK: br i1 %[[CMP2]] 1033 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2> 1034 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 2 1035 // CHECK: br i1 %[[CMP3]] 1036 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 0, i32 3> 1037 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2]) 1038 1039 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflehi_epi16 1040 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3 1041 // CHECK: sext i32 %[[AND]] to i64 1042 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2 1043 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 1044 // CHECK: sext i32 %[[AND2]] to i64 1045 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4 1046 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3 1047 // CHECK: sext i32 %[[AND3]] to i64 1048 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 1049 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 1050 // CHECK: sext i32 %[[AND4]] to i64 1051 // CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1052 // CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1053 // CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} 1054 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16]) 1055 1056 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16 1057 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 {{[0-9a-zA-Z_%.]+}}, 3 1058 // CHECK: sext i32 %[[AND]] to i64 1059 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 2 1060 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 1061 // CHECK: sext i32 %[[AND2]] to i64 1062 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 4 1063 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3 1064 // CHECK: sext i32 %[[AND3]] to i64 1065 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 6 1066 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 1067 // CHECK: sext i32 %[[AND4]] to i64 1068 // CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1069 // CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1070 // CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} 1071 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16]) 1072 1073 void __attribute__((noinline)) 1074 test_sll() { 1075 resi = _mm_sll_epi16(mi1, mi2); 1076 resi = _mm_sll_epi32(mi1, mi2); 1077 resi = _mm_sll_epi64(mi1, mi2); 1078 resi = _mm_slli_epi16(mi1, i); 1079 resi = _mm_slli_epi32(mi1, i); 1080 resi = _mm_slli_epi64(mi1, i); 1081 resi = _mm_slli_si128(mi1, i); 1082 } 1083 1084 // CHECK-LABEL: @test_sll 1085 1086 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16 1087 // CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1088 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) 1089 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) 1090 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) 1091 // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8]) 1092 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) 1093 1094 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32 1095 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1096 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) 1097 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef splat (i32 32)) 1098 // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4]) 1099 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) 1100 1101 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64 1102 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0) 1103 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef splat (i64 64)) 1104 // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2]) 1105 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) 1106 1107 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi16 1108 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1109 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0 1110 // CHECK: br i1 %[[CMP]] 1111 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1112 // CHECK: br i1 %[[CMP2]] 1113 // CHECK: call i1 @llvm.is.constant 1114 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1115 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]]) 1116 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16 1117 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC2]]) 1118 // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8]) 1119 1120 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi32 1121 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1122 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0 1123 // CHECK: br i1 %[[CMP]] 1124 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32 1125 // CHECK: br i1 %[[CMP2]] 1126 // CHECK: call i1 @llvm.is.constant 1127 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1128 // CHECK: br i1 %[[CMP3]] 1129 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1130 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]]) 1131 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1132 // CHECK: call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4]) 1133 1134 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi64 1135 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1136 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0 1137 // CHECK: br i1 %[[CMP]] 1138 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64 1139 // CHECK: br i1 %[[CMP2]] 1140 // CHECK: call i1 @llvm.is.constant 1141 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1142 // CHECK: br i1 %[[CMP3]] 1143 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1144 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]]) 1145 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1146 // CHECK: call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2]) 1147 1148 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_si128 1149 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1150 // CHECK-BE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}} 1151 // CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]]) 1152 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}}) 1153 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1154 1155 void __attribute__((noinline)) 1156 test_sqrt() { 1157 resd = _mm_sqrt_pd(md1); 1158 resd = _mm_sqrt_sd(md1, md2); 1159 } 1160 1161 // CHECK-LABEL: @test_sqrt 1162 1163 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_pd 1164 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef {{[0-9a-zA-Z_%.]+}}) 1165 1166 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_sd 1167 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}}) 1168 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 1169 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}}) 1170 1171 void __attribute__((noinline)) 1172 test_sra() { 1173 resi = _mm_sra_epi16(mi1, mi2); 1174 resi = _mm_sra_epi32(mi1, mi2); 1175 resi = _mm_srai_epi16(mi1, i); 1176 resi = _mm_srai_epi32(mi1, i); 1177 } 1178 1179 // CHECK-LABEL: @test_sra 1180 1181 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16 1182 // CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1183 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1184 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) 1185 // CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) 1186 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) 1187 1188 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32 1189 // CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1190 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1191 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) 1192 // CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 31)) 1193 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4]) 1194 1195 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16 1196 // CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1197 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1198 // CHECK: br i1 %[[CMP]] 1199 // CHECK: call i1 @llvm.is.constant 1200 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1201 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]]) 1202 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16 1203 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %{{[0-9a-zA-Z_.]+}}) 1204 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) 1205 1206 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32 1207 // CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1208 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32 1209 // CHECK: br i1 %[[CMP]] 1210 // CHECK: call i1 @llvm.is.constant 1211 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1212 // CHECK: br i1 %[[CMP2]] 1213 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1214 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]]) 1215 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1216 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1217 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4]) 1218 1219 void __attribute__((noinline)) 1220 test_srl() { 1221 resi = _mm_srl_epi16(mi1, mi2); 1222 resi = _mm_srl_epi32(mi1, mi2); 1223 resi = _mm_srl_epi64(mi1, mi2); 1224 resi = _mm_srli_epi16(mi1, i); 1225 resi = _mm_srli_epi32(mi1, i); 1226 resi = _mm_srli_epi64(mi1, i); 1227 resi = _mm_srli_si128(mi1, i); 1228 } 1229 1230 // CHECK-LABEL: @test_srl 1231 1232 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16 1233 // CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 1234 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1235 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) 1236 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) 1237 // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8]) 1238 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) 1239 1240 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32 1241 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1242 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) 1243 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 32)) 1244 // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4]) 1245 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) 1246 1247 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64 1248 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) 1249 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef splat (i64 64)) 1250 // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2]) 1251 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) 1252 1253 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi16 1254 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1255 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1256 // CHECK: br i1 %[[CMP]] 1257 // CHECK: call i1 @llvm.is.constant 1258 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1259 // CHECK: call <8 x i16> @vec_splat_s16(signed char) 1260 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16 1261 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC]]) 1262 // CHECK: call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8]) 1263 1264 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi32 1265 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1266 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32 1267 // CHECK: br i1 %[[CMP]] 1268 // CHECK: call i1 @llvm.is.constant 1269 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1270 // CHECK: br i1 %[[CMP2]] 1271 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1272 // CHECK: call <4 x i32> @vec_splat_s32(signed char) 1273 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1274 // CHECK: call <4 x i32> @vec_splats(unsigned int) 1275 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4]) 1276 1277 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi64 1278 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1279 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64 1280 // CHECK: br i1 %[[CMP]] 1281 // CHECK: call i1 @llvm.is.constant 1282 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 1283 // CHECK: br i1 %[[CMP2]] 1284 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8 1285 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]]) 1286 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64 1287 // CHECK: call <2 x i64> @vec_splats(unsigned long long)(i64 noundef %[[EXT]]) 1288 // CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext %{{[0-9a-zA-Z_.]+}}) 1289 // CHECK: call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2]) 1290 1291 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_si128 1292 // CHECK: call <2 x i64> @_mm_bsrli_si128 1293 1294 void __attribute__((noinline)) 1295 test_store() { 1296 _mm_store_pd(dp, md1); 1297 _mm_store_pd1(dp, md1); 1298 _mm_store_sd(dp, md1); 1299 _mm_store_si128(mip, mi1); 1300 _mm_store1_pd(dp, md1); 1301 _mm_storeh_pd(dp, md1); 1302 _mm_storel_epi64(mip, mi1); 1303 _mm_storel_pd(dp, md1); 1304 _mm_storer_pd(dp, md1); 1305 _mm_storeu_pd(dp, md1); 1306 _mm_storeu_si128(mip, mi1); 1307 } 1308 1309 // CHECK-LABEL: @test_store 1310 1311 // CHECK-LABEL: define available_externally void @_mm_store_pd 1312 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1313 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[ADDR]]) 1314 1315 // CHECK-LABEL: define available_externally void @_mm_store_pd1 1316 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1317 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1318 // CHECK: call void @_mm_store1_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[ADDR2]]) 1319 1320 // CHECK-LABEL: define available_externally void @_mm_store_sd 1321 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1322 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]] 1323 1324 // CHECK-LABEL: define available_externally void @_mm_store_si128 1325 // CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1326 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[LOAD]]) 1327 1328 // CHECK-LABEL: define available_externally void @_mm_store1_pd 1329 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1330 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1331 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %[[VAL]], i32 noundef zeroext 0) 1332 // CHECK: call void @_mm_store_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[CALL]]) 1333 1334 // CHECK-LABEL: define available_externally void @_mm_storeh_pd 1335 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1336 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]] 1337 1338 // CHECK-LABEL: define available_externally void @_mm_storel_epi64 1339 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1340 // CHECK: store i64 %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 8 1341 1342 // CHECK-LABEL: define available_externally void @_mm_storel_pd 1343 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1344 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16 1345 // CHECK: call void @_mm_store_sd(ptr noundef %[[ADDR]], <2 x double> noundef %[[VAL]]) 1346 1347 // CHECK-LABEL: define available_externally void @_mm_storer_pd 1348 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2> 1349 // CHECK: call void @_mm_store_pd(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}) 1350 1351 // CHECK-LABEL: define available_externally void @_mm_storeu_pd 1352 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1353 // CHECK: store <2 x double> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1 1354 1355 // CHECK-LABEL: define available_externally void @_mm_storeu_si128 1356 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8 1357 // CHECK: store <2 x i64> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1 1358 1359 void __attribute__((noinline)) 1360 test_stream() { 1361 _mm_stream_pd(dp, md1); 1362 _mm_stream_si128(mip, mi1); 1363 _mm_stream_si32(is, i); 1364 _mm_stream_si64(i64s, i64s[1]); 1365 } 1366 1367 // CHECK-LABEL: @test_stream 1368 1369 // CHECK-LABEL: define available_externally void @_mm_stream_pd 1370 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1371 1372 // CHECK-LABEL: define available_externally void @_mm_stream_si128 1373 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1374 1375 // CHECK-LABEL: define available_externally void @_mm_stream_si32 1376 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1377 1378 // CHECK-LABEL: define available_externally void @_mm_stream_si64 1379 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}}) 1380 1381 void __attribute__((noinline)) 1382 test_sub() { 1383 resi = _mm_sub_epi64(mi1, mi2); 1384 resi = _mm_sub_epi32(mi1, mi2); 1385 resi = _mm_sub_epi16(mi1, mi2); 1386 resi = _mm_sub_epi8(mi1, mi2); 1387 resd = _mm_sub_pd(md1, md2); 1388 resd = _mm_sub_sd(md1, md2); 1389 res64 = _mm_sub_si64(m641, m642); 1390 resi = _mm_subs_epi16(mi1, mi2); 1391 resi = _mm_subs_epi8(mi1, mi2); 1392 resi = _mm_subs_epu16(mi1, mi2); 1393 resi = _mm_subs_epu8(mi1, mi2); 1394 } 1395 1396 // CHECK-LABEL: @test_sub 1397 1398 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi64 1399 // CHECK: sub <2 x i64> 1400 1401 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi32 1402 // CHECK: sub <4 x i32> 1403 1404 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi16 1405 // CHECK: sub <8 x i16> 1406 1407 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi8 1408 // CHECK: sub <16 x i8> 1409 1410 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_pd 1411 // CHECK: fsub <2 x double> 1412 1413 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_sd 1414 // CHECK: fsub double 1415 1416 // CHECK-LABEL: define available_externally i64 @_mm_sub_si64 1417 // CHECK: sub i64 1418 1419 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi16 1420 // CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8]) 1421 1422 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi8 1423 // CHECK: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16]) 1424 1425 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu16 1426 // CHECK: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8]) 1427 1428 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu8 1429 // CHECK: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16]) 1430 1431 void __attribute__((noinline)) 1432 test_ucomi() { 1433 i = _mm_ucomieq_sd(md1, md2); 1434 i = _mm_ucomige_sd(md1, md2); 1435 i = _mm_ucomigt_sd(md1, md2); 1436 i = _mm_ucomile_sd(md1, md2); 1437 i = _mm_ucomilt_sd(md1, md2); 1438 i = _mm_ucomineq_sd(md1, md2); 1439 } 1440 1441 // CHECK-LABEL: @test_ucomi 1442 1443 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_sd 1444 // CHECK: fcmp oeq double 1445 1446 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_sd 1447 // CHECK: fcmp oge double 1448 1449 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_sd 1450 // CHECK: fcmp ogt double 1451 1452 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_sd 1453 // CHECK: fcmp ole double 1454 1455 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_sd 1456 // CHECK: fcmp olt double 1457 1458 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_sd 1459 // CHECK: fcmp une double 1460 1461 void __attribute__((noinline)) 1462 test_undefined() { 1463 resd = _mm_undefined_pd(); 1464 resi = _mm_undefined_si128(); 1465 } 1466 1467 // CHECK-LABEL: @test_undefined 1468 1469 // CHECK-LABEL: define available_externally <2 x double> @_mm_undefined_pd() 1470 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16 1471 // CHECK: store <2 x double> %[[VAL]], ptr %[[ADDR]], align 16 1472 // CHECK: load <2 x double>, ptr %[[ADDR]], align 16 1473 1474 // CHECK-LABEL: define available_externally <2 x i64> @_mm_undefined_si128() 1475 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x i64>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16 1476 // CHECK: store <2 x i64> %[[VAL]], ptr %[[ADDR]], align 16 1477 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16 1478 1479 void __attribute__((noinline)) 1480 test_unpack() { 1481 resi = _mm_unpackhi_epi16(mi1, mi2); 1482 resi = _mm_unpackhi_epi32(mi1, mi2); 1483 resi = _mm_unpackhi_epi64(mi1, mi2); 1484 resi = _mm_unpackhi_epi8(mi1, mi2); 1485 resd = _mm_unpackhi_pd(md1, md2); 1486 resi = _mm_unpacklo_epi16(mi1, mi2); 1487 resi = _mm_unpacklo_epi32(mi1, mi2); 1488 resi = _mm_unpacklo_epi64(mi1, mi2); 1489 resi = _mm_unpacklo_epi8(mi1, mi2); 1490 resd = _mm_unpacklo_pd(md1, md2); 1491 } 1492 1493 // CHECK-LABEL: @test_unpack 1494 1495 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi16 1496 // CHECK: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8]) 1497 1498 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi32 1499 // CHECK: call <4 x i32> @vec_mergel(unsigned int vector[4], unsigned int vector[4]) 1500 1501 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi64 1502 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2]) 1503 1504 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi8 1505 // CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16]) 1506 1507 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpackhi_pd 1508 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2]) 1509 1510 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi16 1511 // CHECK: call <8 x i16> @vec_mergeh(short vector[8], short vector[8]) 1512 1513 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi32 1514 // CHECK: call <4 x i32> @vec_mergeh(int vector[4], int vector[4]) 1515 1516 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi64 1517 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2]) 1518 1519 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi8 1520 // CHECK: call <16 x i8> @vec_mergeh(unsigned char vector[16], unsigned char vector[16]) 1521 1522 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpacklo_pd 1523 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2]) 1524