1 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s 2 3 typedef _Float16 half; 4 5 typedef half half2 __attribute__((ext_vector_type(2))); 6 typedef float float2 __attribute__((ext_vector_type(2))); 7 typedef float float4 __attribute__((ext_vector_type(4))); 8 typedef short int si8 __attribute__((ext_vector_type(8))); 9 typedef unsigned int u4 __attribute__((ext_vector_type(4))); 10 typedef double double2 __attribute__((ext_vector_type(2))); 11 typedef double double3 __attribute__((ext_vector_type(3))); 12 13 __attribute__((address_space(1))) int int_as_one; 14 typedef int bar; 15 bar b; 16 17 struct StructWithBitfield { 18 int i : 5; 19 short s : 3; 20 char c: 2; 21 long long int lli : 3; 22 }; 23 24 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2, 25 float4 vf1, float4 vf2, si8 vi1, si8 vi2, 26 long long int i1, long long int i2, short si, 27 _BitInt(31) bi1, _BitInt(31) bi2, int i, 28 char ci) { 29 // CHECK-LABEL: define void @test_builtin_elementwise_abs( 30 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 31 // CHECK-NEXT: call float @llvm.fabs.f32(float [[F1]]) 32 f2 = __builtin_elementwise_abs(f1); 33 34 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 35 // CHECK-NEXT: call double @llvm.fabs.f64(double [[D1]]) 36 d2 = __builtin_elementwise_abs(d1); 37 38 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 39 // CHECK-NEXT: call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VF1]]) 40 vf2 = __builtin_elementwise_abs(vf1); 41 42 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 43 // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false) 44 i2 = __builtin_elementwise_abs(i1); 45 46 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 47 // CHECK: [[S1:%.+]] = trunc i64 [[I1]] to i16 48 // CHECK-NEXT: call i16 @llvm.abs.i16(i16 [[S1]], i1 false) 49 i1 = __builtin_elementwise_abs((short)i1); 50 51 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 52 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false) 53 vi2 = __builtin_elementwise_abs(vi1); 54 55 // CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16 56 // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[CVI2]], i1 false) 57 const si8 cvi2 = vi2; 58 vi2 = __builtin_elementwise_abs(cvi2); 59 60 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 61 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 62 // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[LOADEDV]], i1 false) 63 bi2 = __builtin_elementwise_abs(bi1); 64 65 // CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 66 // CHECK-NEXT: call i32 @llvm.abs.i32(i32 [[IA1]], i1 false) 67 b = __builtin_elementwise_abs(int_as_one); 68 69 // CHECK: call i32 @llvm.abs.i32(i32 -10, i1 false) 70 b = __builtin_elementwise_abs(-10); 71 72 // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 73 // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.abs.i16(i16 [[SI]], i1 false) 74 si = __builtin_elementwise_abs(si); 75 76 struct StructWithBitfield t; 77 78 // CHECK: [[BFLOAD:%.+]] = load i16, ptr %t, align 8 79 // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 11 80 // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 11 81 // CHECK-NEXT: [[BFCAST:%.+]] = sext i16 [[BFASHR]] to i32 82 // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[BFCAST]], i1 false) 83 i = __builtin_elementwise_abs(t.i); 84 85 // CHECK: [[BFLOAD:%.+]] = load i16, ptr %t, align 8 86 // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 8 87 // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 13 88 // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.abs.i16(i16 [[BFASHR]], i1 false) 89 si = __builtin_elementwise_abs(t.s); 90 91 // CHECK: [[BFLOAD:%.+]] = load i16, ptr %t, align 8 92 // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 6 93 // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 14 94 // CHECK-NEXT: [[BFCAST:%.+]] = trunc i16 [[BFASHR]] to i8 95 // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.abs.i8(i8 [[BFCAST]], i1 false) 96 ci = __builtin_elementwise_abs(t.c); 97 98 // CHECK: [[BFLOAD:%.+]] = load i16, ptr %t, align 8 99 // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 3 100 // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 13 101 // CHECK-NEXT: [[BFCAST:%.+]] = sext i16 [[BFASHR]] to i64 102 // CHECK-NEXT: [[RES:%.+]] = call i64 @llvm.abs.i64(i64 [[BFCAST]], i1 false) 103 i1 = __builtin_elementwise_abs(t.lli); 104 } 105 106 void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2, 107 float4 vf1, float4 vf2, long long int i1, 108 long long int i2, si8 vi1, si8 vi2, 109 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 110 _BitInt(31) bi1, _BitInt(31) bi2, 111 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2, 112 char c1, char c2, unsigned char uc1, 113 unsigned char uc2, short s1, short s2, 114 unsigned short us1, unsigned short us2) { 115 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 116 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 117 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]]) 118 i1 = __builtin_elementwise_add_sat(i1, i2); 119 120 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 121 // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10) 122 i1 = __builtin_elementwise_add_sat(i1, 10ll); 123 124 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 125 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16 126 // CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 127 vi1 = __builtin_elementwise_add_sat(vi1, vi2); 128 129 // CHECK: [[U1:%.+]] = load i32, ptr %u1.addr, align 4 130 // CHECK-NEXT: [[U2:%.+]] = load i32, ptr %u2.addr, align 4 131 // CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]]) 132 u1 = __builtin_elementwise_add_sat(u1, u2); 133 134 // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 135 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 136 // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 137 vu1 = __builtin_elementwise_add_sat(vu1, vu2); 138 139 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 140 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 141 // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 142 // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31 143 // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]]) 144 bi1 = __builtin_elementwise_add_sat(bi1, bi2); 145 146 // CHECK: [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8 147 // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55 148 // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8 149 // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55 150 // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) 151 bu1 = __builtin_elementwise_add_sat(bu1, bu2); 152 153 // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 154 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 155 // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]]) 156 int_as_one = __builtin_elementwise_add_sat(int_as_one, b); 157 158 // CHECK: store i64 98, ptr %i1.addr, align 8 159 i1 = __builtin_elementwise_add_sat(1, 'a'); 160 161 // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1 162 // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1 163 // CHECK-NEXT: call i8 @llvm.sadd.sat.i8(i8 [[C1]], i8 [[C2]]) 164 c1 = __builtin_elementwise_add_sat(c1, c2); 165 166 // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1 167 // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1 168 // CHECK-NEXT: call i8 @llvm.uadd.sat.i8(i8 [[UC1]], i8 [[UC2]]) 169 uc1 = __builtin_elementwise_add_sat(uc1, uc2); 170 171 // CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2 172 // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2 173 // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[S2]]) 174 s1 = __builtin_elementwise_add_sat(s1, s2); 175 176 // CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2 177 // CHECK: [[I1:%.+]] = sext i16 [[S1]] to i32 178 // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2 179 // CHECK: [[I2:%.+]] = sext i16 [[S2]] to i32 180 // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[I1]], i32 [[I2]]) 181 s1 = __builtin_elementwise_add_sat((int)s1, (int)s2); 182 183 // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2 184 // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2 185 // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]]) 186 us1 = __builtin_elementwise_add_sat(us1, us2); 187 } 188 189 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2, 190 float4 vf1, float4 vf2, long long int i1, 191 long long int i2, si8 vi1, si8 vi2, 192 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 193 _BitInt(31) bi1, _BitInt(31) bi2, 194 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2, 195 char c1, char c2, unsigned char uc1, 196 unsigned char uc2, short s1, short s2, 197 unsigned short us1, unsigned short us2) { 198 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 199 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 200 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]]) 201 i1 = __builtin_elementwise_sub_sat(i1, i2); 202 203 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 204 // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10) 205 i1 = __builtin_elementwise_sub_sat(i1, 10ll); 206 207 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 208 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16 209 // CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 210 vi1 = __builtin_elementwise_sub_sat(vi1, vi2); 211 212 // CHECK: [[U1:%.+]] = load i32, ptr %u1.addr, align 4 213 // CHECK-NEXT: [[U2:%.+]] = load i32, ptr %u2.addr, align 4 214 // CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]]) 215 u1 = __builtin_elementwise_sub_sat(u1, u2); 216 217 // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 218 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 219 // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 220 vu1 = __builtin_elementwise_sub_sat(vu1, vu2); 221 222 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 223 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 224 // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 225 // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31 226 // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]]) 227 bi1 = __builtin_elementwise_sub_sat(bi1, bi2); 228 229 // CHECK: [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8 230 // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55 231 // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8 232 // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55 233 // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) 234 bu1 = __builtin_elementwise_sub_sat(bu1, bu2); 235 236 // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 237 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 238 // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]]) 239 int_as_one = __builtin_elementwise_sub_sat(int_as_one, b); 240 241 // CHECK: store i64 -96, ptr %i1.addr, align 8 242 i1 = __builtin_elementwise_sub_sat(1, 'a'); 243 244 // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1 245 // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1 246 // CHECK-NEXT: call i8 @llvm.ssub.sat.i8(i8 [[C1]], i8 [[C2]]) 247 c1 = __builtin_elementwise_sub_sat(c1, c2); 248 249 // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1 250 // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1 251 // CHECK-NEXT: call i8 @llvm.usub.sat.i8(i8 [[UC1]], i8 [[UC2]]) 252 uc1 = __builtin_elementwise_sub_sat(uc1, uc2); 253 254 // CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2 255 // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2 256 // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[S2]]) 257 s1 = __builtin_elementwise_sub_sat(s1, s2); 258 259 // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2 260 // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2 261 // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[US2]]) 262 us1 = __builtin_elementwise_sub_sat(us1, us2); 263 } 264 265 void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2, 266 float4 vf1, float4 vf2, long long int i1, 267 long long int i2, si8 vi1, si8 vi2, 268 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 269 _BitInt(31) bi1, _BitInt(31) bi2, 270 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 271 // CHECK-LABEL: define void @test_builtin_elementwise_maximum( 272 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 273 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 274 // CHECK-NEXT: call float @llvm.maximum.f32(float [[F1]], float [[F2]]) 275 f1 = __builtin_elementwise_maximum(f1, f2); 276 277 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 278 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 279 // CHECK-NEXT: call double @llvm.maximum.f64(double [[D1]], double [[D2]]) 280 d1 = __builtin_elementwise_maximum(d1, d2); 281 282 // CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8 283 // CHECK-NEXT: call double @llvm.maximum.f64(double 2.000000e+01, double [[D2]]) 284 d1 = __builtin_elementwise_maximum(20.0, d2); 285 286 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 287 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 288 // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 289 vf1 = __builtin_elementwise_maximum(vf1, vf2); 290 291 // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 292 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 293 // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 294 const float4 cvf1 = vf1; 295 vf1 = __builtin_elementwise_maximum(cvf1, vf2); 296 297 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 298 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 299 // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 300 vf1 = __builtin_elementwise_maximum(vf2, cvf1); 301 } 302 303 void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2, 304 float4 vf1, float4 vf2, long long int i1, 305 long long int i2, si8 vi1, si8 vi2, 306 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 307 _BitInt(31) bi1, _BitInt(31) bi2, 308 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 309 // CHECK-LABEL: define void @test_builtin_elementwise_minimum( 310 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 311 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 312 // CHECK-NEXT: call float @llvm.minimum.f32(float [[F1]], float [[F2]]) 313 f1 = __builtin_elementwise_minimum(f1, f2); 314 315 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 316 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 317 // CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double [[D2]]) 318 d1 = __builtin_elementwise_minimum(d1, d2); 319 320 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 321 // CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double 2.000000e+00) 322 d1 = __builtin_elementwise_minimum(d1, 2.0); 323 324 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 325 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 326 // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 327 vf1 = __builtin_elementwise_minimum(vf1, vf2); 328 329 // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 330 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 331 // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 332 const float4 cvf1 = vf1; 333 vf1 = __builtin_elementwise_minimum(cvf1, vf2); 334 335 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 336 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 337 // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 338 vf1 = __builtin_elementwise_minimum(vf2, cvf1); 339 } 340 341 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, 342 float4 vf1, float4 vf2, long long int i1, 343 long long int i2, si8 vi1, si8 vi2, 344 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 345 _BitInt(31) bi1, _BitInt(31) bi2, 346 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 347 // CHECK-LABEL: define void @test_builtin_elementwise_max( 348 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 349 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 350 // CHECK-NEXT: call float @llvm.maxnum.f32(float [[F1]], float [[F2]]) 351 f1 = __builtin_elementwise_max(f1, f2); 352 353 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 354 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 355 // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) 356 d1 = __builtin_elementwise_max(d1, d2); 357 358 // CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8 359 // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) 360 d1 = __builtin_elementwise_max(20.0, d2); 361 362 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 363 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 364 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 365 vf1 = __builtin_elementwise_max(vf1, vf2); 366 367 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 368 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 369 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) 370 i1 = __builtin_elementwise_max(i1, i2); 371 372 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 373 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10) 374 i1 = __builtin_elementwise_max(i1, 10ll); 375 376 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 377 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16 378 // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 379 vi1 = __builtin_elementwise_max(vi1, vi2); 380 381 // CHECK: [[U1:%.+]] = load i32, ptr %u1.addr, align 4 382 // CHECK-NEXT: [[U2:%.+]] = load i32, ptr %u2.addr, align 4 383 // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]]) 384 u1 = __builtin_elementwise_max(u1, u2); 385 386 // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 387 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 388 // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 389 vu1 = __builtin_elementwise_max(vu1, vu2); 390 391 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 392 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 393 // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 394 // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31 395 // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]]) 396 bi1 = __builtin_elementwise_max(bi1, bi2); 397 398 // CHECK: [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8 399 // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55 400 // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8 401 // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55 402 // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) 403 bu1 = __builtin_elementwise_max(bu1, bu2); 404 405 // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 406 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 407 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 408 const float4 cvf1 = vf1; 409 vf1 = __builtin_elementwise_max(cvf1, vf2); 410 411 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 412 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 413 // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 414 vf1 = __builtin_elementwise_max(vf2, cvf1); 415 416 // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 417 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 418 // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) 419 int_as_one = __builtin_elementwise_max(int_as_one, b); 420 421 // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97) 422 i1 = __builtin_elementwise_max(1, 'a'); 423 } 424 425 void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, 426 float4 vf1, float4 vf2, long long int i1, 427 long long int i2, si8 vi1, si8 vi2, 428 unsigned u1, unsigned u2, u4 vu1, u4 vu2, 429 _BitInt(31) bi1, _BitInt(31) bi2, 430 unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { 431 // CHECK-LABEL: define void @test_builtin_elementwise_min( 432 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 433 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 434 // CHECK-NEXT: call float @llvm.minnum.f32(float [[F1]], float [[F2]]) 435 f1 = __builtin_elementwise_min(f1, f2); 436 437 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 438 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 439 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) 440 d1 = __builtin_elementwise_min(d1, d2); 441 442 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 443 // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) 444 d1 = __builtin_elementwise_min(d1, 2.0); 445 446 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 447 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 448 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 449 vf1 = __builtin_elementwise_min(vf1, vf2); 450 451 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 452 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 453 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) 454 i1 = __builtin_elementwise_min(i1, i2); 455 456 // CHECK: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 457 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]]) 458 i1 = __builtin_elementwise_min(-11ll, i2); 459 460 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 461 // CHECK: [[S1:%.+]] = trunc i64 [[I1]] to i16 462 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 463 // CHECK: [[S2:%.+]] = trunc i64 [[I2]] to i16 464 // CHECK-NEXT: call i16 @llvm.smin.i16(i16 [[S1]], i16 [[S2]]) 465 i1 = __builtin_elementwise_min((short)i1, (short)i2); 466 467 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 468 // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16 469 // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) 470 vi1 = __builtin_elementwise_min(vi1, vi2); 471 472 // CHECK: [[U1:%.+]] = load i32, ptr %u1.addr, align 4 473 // CHECK-NEXT: [[U2:%.+]] = load i32, ptr %u2.addr, align 4 474 // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]]) 475 u1 = __builtin_elementwise_min(u1, u2); 476 477 // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 478 // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 479 // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) 480 vu1 = __builtin_elementwise_min(vu1, vu2); 481 482 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 483 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 484 // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 485 // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31 486 // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]]) 487 bi1 = __builtin_elementwise_min(bi1, bi2); 488 489 // CHECK: [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8 490 // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55 491 // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8 492 // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55 493 // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) 494 bu1 = __builtin_elementwise_min(bu1, bu2); 495 496 // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 497 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 498 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 499 const float4 cvf1 = vf1; 500 vf1 = __builtin_elementwise_min(cvf1, vf2); 501 502 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 503 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 504 // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 505 vf1 = __builtin_elementwise_min(vf2, cvf1); 506 507 // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 508 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 509 // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) 510 int_as_one = __builtin_elementwise_min(int_as_one, b); 511 } 512 513 void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2, 514 long long int i1, long long int i2, short si, 515 _BitInt(31) bi1, _BitInt(31) bi2, 516 char ci) { 517 518 519 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 520 // CHECK-NEXT: call i64 @llvm.bitreverse.i64(i64 [[I1]]) 521 i2 = __builtin_elementwise_bitreverse(i1); 522 523 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 524 // CHECK-NEXT: call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[VI1]]) 525 vi2 = __builtin_elementwise_bitreverse(vi1); 526 527 // CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16 528 // CHECK-NEXT: call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[CVI2]]) 529 const si8 cvi2 = vi2; 530 vi2 = __builtin_elementwise_bitreverse(cvi2); 531 532 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 533 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 534 // CHECK-NEXT: call i31 @llvm.bitreverse.i31(i31 [[LOADEDV]]) 535 bi2 = __builtin_elementwise_bitreverse(bi1); 536 537 // CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 538 // CHECK-NEXT: call i32 @llvm.bitreverse.i32(i32 [[IA1]]) 539 b = __builtin_elementwise_bitreverse(int_as_one); 540 541 // CHECK: store i32 1879048191, ptr @b, align 4 542 b = __builtin_elementwise_bitreverse(-10); 543 544 // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 545 // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.bitreverse.i16(i16 [[SI]]) 546 si = __builtin_elementwise_bitreverse(si); 547 548 // CHECK: store i16 28671, ptr %si.addr, align 2 549 si = __builtin_elementwise_bitreverse((short)-10); 550 551 // CHECK: store i16 28671, ptr %si.addr, align 2 552 si = __builtin_elementwise_bitreverse((unsigned short)-10); 553 554 // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr, align 1 555 // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.bitreverse.i8(i8 [[CI]]) 556 ci = __builtin_elementwise_bitreverse(ci); 557 558 // CHECK: store i8 111, ptr %ci.addr, align 1 559 ci = __builtin_elementwise_bitreverse((unsigned char)-10); 560 561 // CHECK: store i8 111, ptr %ci.addr, align 1 562 ci = __builtin_elementwise_bitreverse((char)-10); 563 } 564 565 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2, 566 float4 vf1, float4 vf2) { 567 // CHECK-LABEL: define void @test_builtin_elementwise_ceil( 568 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 569 // CHECK-NEXT: call float @llvm.ceil.f32(float [[F1]]) 570 f2 = __builtin_elementwise_ceil(f1); 571 572 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 573 // CHECK-NEXT: call double @llvm.ceil.f64(double [[D1]]) 574 d2 = __builtin_elementwise_ceil(d1); 575 576 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 577 // CHECK-NEXT: call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VF1]]) 578 vf2 = __builtin_elementwise_ceil(vf1); 579 } 580 581 void test_builtin_elementwise_acos(float f1, float f2, double d1, double d2, 582 float4 vf1, float4 vf2) { 583 // CHECK-LABEL: define void @test_builtin_elementwise_acos( 584 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 585 // CHECK-NEXT: call float @llvm.acos.f32(float [[F1]]) 586 f2 = __builtin_elementwise_acos(f1); 587 588 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 589 // CHECK-NEXT: call double @llvm.acos.f64(double [[D1]]) 590 d2 = __builtin_elementwise_acos(d1); 591 592 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 593 // CHECK-NEXT: call <4 x float> @llvm.acos.v4f32(<4 x float> [[VF1]]) 594 vf2 = __builtin_elementwise_acos(vf1); 595 } 596 597 void test_builtin_elementwise_asin(float f1, float f2, double d1, double d2, 598 float4 vf1, float4 vf2) { 599 // CHECK-LABEL: define void @test_builtin_elementwise_asin( 600 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 601 // CHECK-NEXT: call float @llvm.asin.f32(float [[F1]]) 602 f2 = __builtin_elementwise_asin(f1); 603 604 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 605 // CHECK-NEXT: call double @llvm.asin.f64(double [[D1]]) 606 d2 = __builtin_elementwise_asin(d1); 607 608 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 609 // CHECK-NEXT: call <4 x float> @llvm.asin.v4f32(<4 x float> [[VF1]]) 610 vf2 = __builtin_elementwise_asin(vf1); 611 } 612 613 void test_builtin_elementwise_atan(float f1, float f2, double d1, double d2, 614 float4 vf1, float4 vf2) { 615 // CHECK-LABEL: define void @test_builtin_elementwise_atan( 616 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 617 // CHECK-NEXT: call float @llvm.atan.f32(float [[F1]]) 618 f2 = __builtin_elementwise_atan(f1); 619 620 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 621 // CHECK-NEXT: call double @llvm.atan.f64(double [[D1]]) 622 d2 = __builtin_elementwise_atan(d1); 623 624 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 625 // CHECK-NEXT: call <4 x float> @llvm.atan.v4f32(<4 x float> [[VF1]]) 626 vf2 = __builtin_elementwise_atan(vf1); 627 } 628 629 void test_builtin_elementwise_atan2(float f1, float f2, float f3, double d1, 630 double d2, double d3, float4 vf1, 631 float4 vf2, float4 vf3) { 632 // CHECK-LABEL: define void @test_builtin_elementwise_atan2( 633 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 634 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 635 // CHECK-NEXT: call float @llvm.atan2.f32(float [[F1]], float [[F2]]) 636 f3 = __builtin_elementwise_atan2(f1, f2); 637 638 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 639 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 640 // CHECK-NEXT: call double @llvm.atan2.f64(double [[D1]], double [[D2]]) 641 d3 = __builtin_elementwise_atan2(d1, d2); 642 643 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 644 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 645 // CHECK-NEXT: call <4 x float> @llvm.atan2.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 646 vf3 = __builtin_elementwise_atan2(vf1, vf2); 647 } 648 649 void test_builtin_elementwise_cos(float f1, float f2, double d1, double d2, 650 float4 vf1, float4 vf2) { 651 // CHECK-LABEL: define void @test_builtin_elementwise_cos( 652 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 653 // CHECK-NEXT: call float @llvm.cos.f32(float [[F1]]) 654 f2 = __builtin_elementwise_cos(f1); 655 656 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 657 // CHECK-NEXT: call double @llvm.cos.f64(double [[D1]]) 658 d2 = __builtin_elementwise_cos(d1); 659 660 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 661 // CHECK-NEXT: call <4 x float> @llvm.cos.v4f32(<4 x float> [[VF1]]) 662 vf2 = __builtin_elementwise_cos(vf1); 663 } 664 665 void test_builtin_elementwise_cosh(float f1, float f2, double d1, double d2, 666 float4 vf1, float4 vf2) { 667 // CHECK-LABEL: define void @test_builtin_elementwise_cosh( 668 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 669 // CHECK-NEXT: call float @llvm.cosh.f32(float [[F1]]) 670 f2 = __builtin_elementwise_cosh(f1); 671 672 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 673 // CHECK-NEXT: call double @llvm.cosh.f64(double [[D1]]) 674 d2 = __builtin_elementwise_cosh(d1); 675 676 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 677 // CHECK-NEXT: call <4 x float> @llvm.cosh.v4f32(<4 x float> [[VF1]]) 678 vf2 = __builtin_elementwise_cosh(vf1); 679 } 680 681 void test_builtin_elementwise_exp(float f1, float f2, double d1, double d2, 682 float4 vf1, float4 vf2) { 683 // CHECK-LABEL: define void @test_builtin_elementwise_exp( 684 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 685 // CHECK-NEXT: call float @llvm.exp.f32(float [[F1]]) 686 f2 = __builtin_elementwise_exp(f1); 687 688 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 689 // CHECK-NEXT: call double @llvm.exp.f64(double [[D1]]) 690 d2 = __builtin_elementwise_exp(d1); 691 692 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 693 // CHECK-NEXT: call <4 x float> @llvm.exp.v4f32(<4 x float> [[VF1]]) 694 vf2 = __builtin_elementwise_exp(vf1); 695 } 696 697 void test_builtin_elementwise_exp2(float f1, float f2, double d1, double d2, 698 float4 vf1, float4 vf2) { 699 // CHECK-LABEL: define void @test_builtin_elementwise_exp2( 700 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 701 // CHECK-NEXT: call float @llvm.exp2.f32(float [[F1]]) 702 f2 = __builtin_elementwise_exp2(f1); 703 704 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 705 // CHECK-NEXT: call double @llvm.exp2.f64(double [[D1]]) 706 d2 = __builtin_elementwise_exp2(d1); 707 708 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 709 // CHECK-NEXT: call <4 x float> @llvm.exp2.v4f32(<4 x float> [[VF1]]) 710 vf2 = __builtin_elementwise_exp2(vf1); 711 } 712 713 714 void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2, 715 float4 vf1, float4 vf2) { 716 // CHECK-LABEL: define void @test_builtin_elementwise_floor( 717 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 718 // CHECK-NEXT: call float @llvm.floor.f32(float [[F1]]) 719 f2 = __builtin_elementwise_floor(f1); 720 721 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 722 // CHECK-NEXT: call double @llvm.floor.f64(double [[D1]]) 723 d2 = __builtin_elementwise_floor(d1); 724 725 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 726 // CHECK-NEXT: call <4 x float> @llvm.floor.v4f32(<4 x float> [[VF1]]) 727 vf2 = __builtin_elementwise_floor(vf1); 728 } 729 730 void test_builtin_elementwise_log(float f1, float f2, double d1, double d2, 731 float4 vf1, float4 vf2) { 732 // CHECK-LABEL: define void @test_builtin_elementwise_log( 733 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 734 // CHECK-NEXT: call float @llvm.log.f32(float [[F1]]) 735 f2 = __builtin_elementwise_log(f1); 736 737 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 738 // CHECK-NEXT: call double @llvm.log.f64(double [[D1]]) 739 d2 = __builtin_elementwise_log(d1); 740 741 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 742 // CHECK-NEXT: call <4 x float> @llvm.log.v4f32(<4 x float> [[VF1]]) 743 vf2 = __builtin_elementwise_log(vf1); 744 } 745 746 void test_builtin_elementwise_log10(float f1, float f2, double d1, double d2, 747 float4 vf1, float4 vf2) { 748 // CHECK-LABEL: define void @test_builtin_elementwise_log10( 749 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 750 // CHECK-NEXT: call float @llvm.log10.f32(float [[F1]]) 751 f2 = __builtin_elementwise_log10(f1); 752 753 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 754 // CHECK-NEXT: call double @llvm.log10.f64(double [[D1]]) 755 d2 = __builtin_elementwise_log10(d1); 756 757 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 758 // CHECK-NEXT: call <4 x float> @llvm.log10.v4f32(<4 x float> [[VF1]]) 759 vf2 = __builtin_elementwise_log10(vf1); 760 } 761 762 void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2, 763 float4 vf1, float4 vf2) { 764 // CHECK-LABEL: define void @test_builtin_elementwise_log2( 765 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 766 // CHECK-NEXT: call float @llvm.log2.f32(float [[F1]]) 767 f2 = __builtin_elementwise_log2(f1); 768 769 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 770 // CHECK-NEXT: call double @llvm.log2.f64(double [[D1]]) 771 d2 = __builtin_elementwise_log2(d1); 772 773 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 774 // CHECK-NEXT: call <4 x float> @llvm.log2.v4f32(<4 x float> [[VF1]]) 775 vf2 = __builtin_elementwise_log2(vf1); 776 } 777 778 void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, long long int i1, 779 long long int i2, short si, 780 _BitInt(31) bi1, _BitInt(31) bi2, 781 char ci) { 782 // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 783 // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]]) 784 i2 = __builtin_elementwise_popcount(i1); 785 786 // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 787 // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]]) 788 vi2 = __builtin_elementwise_popcount(vi1); 789 790 // CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16 791 // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]]) 792 const si8 cvi2 = vi2; 793 vi2 = __builtin_elementwise_popcount(cvi2); 794 795 // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 796 // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 797 // CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]]) 798 bi2 = __builtin_elementwise_popcount(bi1); 799 800 // CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 801 // CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]]) 802 b = __builtin_elementwise_popcount(int_as_one); 803 804 // CHECK: store i32 30, ptr @b, align 4 805 b = __builtin_elementwise_popcount(-10); 806 807 // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 808 // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.ctpop.i16(i16 [[SI]]) 809 si = __builtin_elementwise_popcount(si); 810 811 // CHECK: store i16 3, ptr %si.addr, align 2 812 si = __builtin_elementwise_popcount((unsigned short)32771); 813 814 // CHECK: store i16 3, ptr %si.addr, align 2 815 si = __builtin_elementwise_popcount((short)32771); 816 817 // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr, align 1 818 // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.ctpop.i8(i8 [[CI]]) 819 ci = __builtin_elementwise_popcount(ci); 820 821 // CHECK: store i8 2, ptr %ci.addr, align 1 822 ci = __builtin_elementwise_popcount((unsigned char)192); 823 824 // CHECK: store i8 2, ptr %ci.addr, align 1 825 ci = __builtin_elementwise_popcount((char)192); 826 } 827 828 void test_builtin_elementwise_fmod(float f1, float f2, double d1, double d2, 829 float4 vf1, float4 vf2) { 830 831 // CHECK-LABEL: define void @test_builtin_elementwise_fmod( 832 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 833 // CHECK: [[F2:%.+]] = load float, ptr %f2.addr, align 4 834 // CHECK-NEXT: frem float [[F1]], [[F2]] 835 f2 = __builtin_elementwise_fmod(f1, f2); 836 837 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 838 // CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8 839 // CHECK-NEXT: frem double [[D1]], [[D2]] 840 d2 = __builtin_elementwise_fmod(d1, d2); 841 842 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 843 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 844 // CHECK-NEXT: frem <4 x float> [[VF1]], [[VF2]] 845 vf2 = __builtin_elementwise_fmod(vf1, vf2); 846 } 847 848 void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2, 849 float4 vf1, float4 vf2) { 850 851 // CHECK-LABEL: define void @test_builtin_elementwise_pow( 852 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 853 // CHECK: [[F2:%.+]] = load float, ptr %f2.addr, align 4 854 // CHECK-NEXT: call float @llvm.pow.f32(float [[F1]], float [[F2]]) 855 f2 = __builtin_elementwise_pow(f1, f2); 856 857 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 858 // CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8 859 // CHECK-NEXT: call double @llvm.pow.f64(double [[D1]], double [[D2]]) 860 d2 = __builtin_elementwise_pow(d1, d2); 861 862 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 863 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 864 // CHECK-NEXT: call <4 x float> @llvm.pow.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 865 vf2 = __builtin_elementwise_pow(vf1, vf2); 866 } 867 868 void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2, 869 float4 vf1, float4 vf2) { 870 // CHECK-LABEL: define void @test_builtin_elementwise_roundeven( 871 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 872 // CHECK-NEXT: call float @llvm.roundeven.f32(float [[F1]]) 873 f2 = __builtin_elementwise_roundeven(f1); 874 875 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 876 // CHECK-NEXT: call double @llvm.roundeven.f64(double [[D1]]) 877 d2 = __builtin_elementwise_roundeven(d1); 878 879 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 880 // CHECK-NEXT: call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VF1]]) 881 vf2 = __builtin_elementwise_roundeven(vf1); 882 } 883 884 void test_builtin_elementwise_round(float f1, float f2, double d1, double d2, 885 float4 vf1, float4 vf2) { 886 // CHECK-LABEL: define void @test_builtin_elementwise_round( 887 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 888 // CHECK-NEXT: call float @llvm.round.f32(float [[F1]]) 889 f2 = __builtin_elementwise_round(f1); 890 891 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 892 // CHECK-NEXT: call double @llvm.round.f64(double [[D1]]) 893 d2 = __builtin_elementwise_round(d1); 894 895 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 896 // CHECK-NEXT: call <4 x float> @llvm.round.v4f32(<4 x float> [[VF1]]) 897 vf2 = __builtin_elementwise_round(vf1); 898 } 899 900 void test_builtin_elementwise_rint(float f1, float f2, double d1, double d2, 901 float4 vf1, float4 vf2) { 902 // CHECK-LABEL: define void @test_builtin_elementwise_rint( 903 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 904 // CHECK-NEXT: call float @llvm.rint.f32(float [[F1]]) 905 f2 = __builtin_elementwise_rint(f1); 906 907 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 908 // CHECK-NEXT: call double @llvm.rint.f64(double [[D1]]) 909 d2 = __builtin_elementwise_rint(d1); 910 911 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 912 // CHECK-NEXT: call <4 x float> @llvm.rint.v4f32(<4 x float> [[VF1]]) 913 vf2 = __builtin_elementwise_rint(vf1); 914 } 915 916 void test_builtin_elementwise_nearbyint(float f1, float f2, double d1, double d2, 917 float4 vf1, float4 vf2) { 918 // CHECK-LABEL: define void @test_builtin_elementwise_nearbyint( 919 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 920 // CHECK-NEXT: call float @llvm.nearbyint.f32(float [[F1]]) 921 f2 = __builtin_elementwise_nearbyint(f1); 922 923 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 924 // CHECK-NEXT: call double @llvm.nearbyint.f64(double [[D1]]) 925 d2 = __builtin_elementwise_nearbyint(d1); 926 927 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 928 // CHECK-NEXT: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VF1]]) 929 vf2 = __builtin_elementwise_nearbyint(vf1); 930 } 931 932 void test_builtin_elementwise_sin(float f1, float f2, double d1, double d2, 933 float4 vf1, float4 vf2) { 934 // CHECK-LABEL: define void @test_builtin_elementwise_sin( 935 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 936 // CHECK-NEXT: call float @llvm.sin.f32(float [[F1]]) 937 f2 = __builtin_elementwise_sin(f1); 938 939 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 940 // CHECK-NEXT: call double @llvm.sin.f64(double [[D1]]) 941 d2 = __builtin_elementwise_sin(d1); 942 943 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 944 // CHECK-NEXT: call <4 x float> @llvm.sin.v4f32(<4 x float> [[VF1]]) 945 vf2 = __builtin_elementwise_sin(vf1); 946 } 947 948 void test_builtin_elementwise_sinh(float f1, float f2, double d1, double d2, 949 float4 vf1, float4 vf2) { 950 // CHECK-LABEL: define void @test_builtin_elementwise_sinh( 951 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 952 // CHECK-NEXT: call float @llvm.sinh.f32(float [[F1]]) 953 f2 = __builtin_elementwise_sinh(f1); 954 955 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 956 // CHECK-NEXT: call double @llvm.sinh.f64(double [[D1]]) 957 d2 = __builtin_elementwise_sinh(d1); 958 959 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 960 // CHECK-NEXT: call <4 x float> @llvm.sinh.v4f32(<4 x float> [[VF1]]) 961 vf2 = __builtin_elementwise_sinh(vf1); 962 } 963 964 void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2, 965 float4 vf1, float4 vf2) { 966 // CHECK-LABEL: define void @test_builtin_elementwise_sqrt( 967 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 968 // CHECK-NEXT: call float @llvm.sqrt.f32(float [[F1]]) 969 f2 = __builtin_elementwise_sqrt(f1); 970 971 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 972 // CHECK-NEXT: call double @llvm.sqrt.f64(double [[D1]]) 973 d2 = __builtin_elementwise_sqrt(d1); 974 975 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 976 // CHECK-NEXT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[VF1]]) 977 vf2 = __builtin_elementwise_sqrt(vf1); 978 } 979 980 void test_builtin_elementwise_tan(float f1, float f2, double d1, double d2, 981 float4 vf1, float4 vf2) { 982 // CHECK-LABEL: define void @test_builtin_elementwise_tan( 983 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 984 // CHECK-NEXT: call float @llvm.tan.f32(float [[F1]]) 985 f2 = __builtin_elementwise_tan(f1); 986 987 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 988 // CHECK-NEXT: call double @llvm.tan.f64(double [[D1]]) 989 d2 = __builtin_elementwise_tan(d1); 990 991 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 992 // CHECK-NEXT: call <4 x float> @llvm.tan.v4f32(<4 x float> [[VF1]]) 993 vf2 = __builtin_elementwise_tan(vf1); 994 } 995 996 void test_builtin_elementwise_tanh(float f1, float f2, double d1, double d2, 997 float4 vf1, float4 vf2) { 998 // CHECK-LABEL: define void @test_builtin_elementwise_tanh( 999 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 1000 // CHECK-NEXT: call float @llvm.tanh.f32(float [[F1]]) 1001 f2 = __builtin_elementwise_tanh(f1); 1002 1003 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 1004 // CHECK-NEXT: call double @llvm.tanh.f64(double [[D1]]) 1005 d2 = __builtin_elementwise_tanh(d1); 1006 1007 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 1008 // CHECK-NEXT: call <4 x float> @llvm.tanh.v4f32(<4 x float> [[VF1]]) 1009 vf2 = __builtin_elementwise_tanh(vf1); 1010 } 1011 1012 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2, 1013 float4 vf1, float4 vf2) { 1014 // CHECK-LABEL: define void @test_builtin_elementwise_trunc( 1015 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 1016 // CHECK-NEXT: call float @llvm.trunc.f32(float [[F1]]) 1017 f2 = __builtin_elementwise_trunc(f1); 1018 1019 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 1020 // CHECK-NEXT: call double @llvm.trunc.f64(double [[D1]]) 1021 d2 = __builtin_elementwise_trunc(d1); 1022 1023 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 1024 // CHECK-NEXT: call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VF1]]) 1025 vf2 = __builtin_elementwise_trunc(vf1); 1026 } 1027 1028 void test_builtin_elementwise_canonicalize(float f1, float f2, double d1, double d2, 1029 float4 vf1, float4 vf2) { 1030 // CHECK-LABEL: define void @test_builtin_elementwise_canonicalize( 1031 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 1032 // CHECK-NEXT: call float @llvm.canonicalize.f32(float [[F1]]) 1033 f2 = __builtin_elementwise_canonicalize(f1); 1034 1035 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 1036 // CHECK-NEXT: call double @llvm.canonicalize.f64(double [[D1]]) 1037 d2 = __builtin_elementwise_canonicalize(d1); 1038 1039 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 1040 // CHECK-NEXT: call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[VF1]]) 1041 vf2 = __builtin_elementwise_canonicalize(vf1); 1042 } 1043 1044 void test_builtin_elementwise_copysign(float f1, float f2, double d1, double d2, 1045 float4 vf1, float4 vf2, double2 v2f64) { 1046 // CHECK-LABEL: define void @test_builtin_elementwise_copysign( 1047 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 1048 // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 1049 // CHECK-NEXT: call float @llvm.copysign.f32(float %0, float %1) 1050 f1 = __builtin_elementwise_copysign(f1, f2); 1051 1052 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 1053 // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 1054 // CHECK-NEXT: call double @llvm.copysign.f64(double [[D1]], double [[D2]]) 1055 d1 = __builtin_elementwise_copysign(d1, d2); 1056 1057 // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 1058 // CHECK-NEXT: call double @llvm.copysign.f64(double [[D1]], double 2.000000e+00) 1059 d1 = __builtin_elementwise_copysign(d1, 2.0); 1060 1061 // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 1062 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 1063 // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) 1064 vf1 = __builtin_elementwise_copysign(vf1, vf2); 1065 1066 // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 1067 // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 1068 // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) 1069 const float4 cvf1 = vf1; 1070 vf1 = __builtin_elementwise_copysign(cvf1, vf2); 1071 1072 // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 1073 // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 1074 // CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) 1075 vf1 = __builtin_elementwise_copysign(vf2, cvf1); 1076 1077 1078 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr 1079 // CHECK-NEXT: call float @llvm.copysign.f32(float [[F1]], float 2.000000e+00) 1080 f1 = __builtin_elementwise_copysign(f1, 2.0f); 1081 1082 // CHECK: [[F1:%.+]] = load float, ptr %f1.addr 1083 // CHECK-NEXT: call float @llvm.copysign.f32(float 2.000000e+00, float [[F1]]) 1084 f1 = __builtin_elementwise_copysign(2.0f, f1); 1085 1086 // CHECK: [[V2F64:%.+]] = load <2 x double>, ptr %v2f64.addr, align 16 1087 // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 1.000000e+00), <2 x double> [[V2F64]]) 1088 v2f64 = __builtin_elementwise_copysign((double2)1.0, v2f64); 1089 } 1090 1091 void test_builtin_elementwise_fma(float f32, double f64, 1092 float2 v2f32, float4 v4f32, 1093 double2 v2f64, double3 v3f64, 1094 const float4 c_v4f32, 1095 half f16, half2 v2f16) { 1096 // CHECK-LABEL: define void @test_builtin_elementwise_fma( 1097 // CHECK: [[F32_0:%.+]] = load float, ptr %f32.addr 1098 // CHECK-NEXT: [[F32_1:%.+]] = load float, ptr %f32.addr 1099 // CHECK-NEXT: [[F32_2:%.+]] = load float, ptr %f32.addr 1100 // CHECK-NEXT: call float @llvm.fma.f32(float [[F32_0]], float [[F32_1]], float [[F32_2]]) 1101 float f2 = __builtin_elementwise_fma(f32, f32, f32); 1102 1103 // CHECK: [[F64_0:%.+]] = load double, ptr %f64.addr 1104 // CHECK-NEXT: [[F64_1:%.+]] = load double, ptr %f64.addr 1105 // CHECK-NEXT: [[F64_2:%.+]] = load double, ptr %f64.addr 1106 // CHECK-NEXT: call double @llvm.fma.f64(double [[F64_0]], double [[F64_1]], double [[F64_2]]) 1107 double d2 = __builtin_elementwise_fma(f64, f64, f64); 1108 1109 // CHECK: [[V4F32_0:%.+]] = load <4 x float>, ptr %v4f32.addr 1110 // CHECK-NEXT: [[V4F32_1:%.+]] = load <4 x float>, ptr %v4f32.addr 1111 // CHECK-NEXT: [[V4F32_2:%.+]] = load <4 x float>, ptr %v4f32.addr 1112 // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> [[V4F32_0]], <4 x float> [[V4F32_1]], <4 x float> [[V4F32_2]]) 1113 float4 tmp_v4f32 = __builtin_elementwise_fma(v4f32, v4f32, v4f32); 1114 1115 1116 // FIXME: Are we really still doing the 3 vector load workaround 1117 // CHECK: [[V3F64_LOAD_0:%.+]] = load <4 x double>, ptr %v3f64.addr 1118 // CHECK-NEXT: [[V3F64_0:%.+]] = shufflevector 1119 // CHECK-NEXT: [[V3F64_LOAD_1:%.+]] = load <4 x double>, ptr %v3f64.addr 1120 // CHECK-NEXT: [[V3F64_1:%.+]] = shufflevector 1121 // CHECK-NEXT: [[V3F64_LOAD_2:%.+]] = load <4 x double>, ptr %v3f64.addr 1122 // CHECK-NEXT: [[V3F64_2:%.+]] = shufflevector 1123 // CHECK-NEXT: call <3 x double> @llvm.fma.v3f64(<3 x double> [[V3F64_0]], <3 x double> [[V3F64_1]], <3 x double> [[V3F64_2]]) 1124 v3f64 = __builtin_elementwise_fma(v3f64, v3f64, v3f64); 1125 1126 // CHECK: [[F64_0:%.+]] = load double, ptr %f64.addr 1127 // CHECK-NEXT: [[F64_1:%.+]] = load double, ptr %f64.addr 1128 // CHECK-NEXT: [[F64_2:%.+]] = load double, ptr %f64.addr 1129 // CHECK-NEXT: call double @llvm.fma.f64(double [[F64_0]], double [[F64_1]], double [[F64_2]]) 1130 v2f64 = __builtin_elementwise_fma(f64, f64, f64); 1131 1132 // CHECK: [[V4F32_0:%.+]] = load <4 x float>, ptr %c_v4f32.addr 1133 // CHECK-NEXT: [[V4F32_1:%.+]] = load <4 x float>, ptr %c_v4f32.addr 1134 // CHECK-NEXT: [[V4F32_2:%.+]] = load <4 x float>, ptr %c_v4f32.addr 1135 // CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> [[V4F32_0]], <4 x float> [[V4F32_1]], <4 x float> [[V4F32_2]]) 1136 v4f32 = __builtin_elementwise_fma(c_v4f32, c_v4f32, c_v4f32); 1137 1138 // CHECK: [[F16_0:%.+]] = load half, ptr %f16.addr 1139 // CHECK-NEXT: [[F16_1:%.+]] = load half, ptr %f16.addr 1140 // CHECK-NEXT: [[F16_2:%.+]] = load half, ptr %f16.addr 1141 // CHECK-NEXT: call half @llvm.fma.f16(half [[F16_0]], half [[F16_1]], half [[F16_2]]) 1142 half tmp_f16 = __builtin_elementwise_fma(f16, f16, f16); 1143 1144 // CHECK: [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr 1145 // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr 1146 // CHECK-NEXT: [[V2F16_2:%.+]] = load <2 x half>, ptr %v2f16.addr 1147 // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> [[V2F16_2]]) 1148 half2 tmp0_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, v2f16); 1149 1150 // CHECK: [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr 1151 // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr 1152 // CHECK-NEXT: [[F16_2:%.+]] = load half, ptr %f16.addr 1153 // CHECK-NEXT: [[V2F16_2_INSERT:%.+]] = insertelement 1154 // CHECK-NEXT: [[V2F16_2:%.+]] = shufflevector <2 x half> [[V2F16_2_INSERT]], <2 x half> poison, <2 x i32> zeroinitializer 1155 // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> [[V2F16_2]]) 1156 half2 tmp1_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)f16); 1157 1158 // CHECK: [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr 1159 // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr 1160 // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> splat (half 0xH4400)) 1161 half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); 1162 1163 } 1164