1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ 2*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \ 3*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s 4*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ 5*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=strict \ 6*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \ 7*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s 8*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ 9*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \ 10*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s 11*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ 12*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=strict \ 13*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \ 14*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s 15*207e5cccSFangrui Song 16*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 17*207e5cccSFangrui Song 18*207e5cccSFangrui Song // Test new aarch64 intrinsics and types but constrained 19*207e5cccSFangrui Song 20*207e5cccSFangrui Song #include <arm_neon.h> 21*207e5cccSFangrui Song 22*207e5cccSFangrui Song // COMMON-LABEL: test_vfmas_lane_f32 23*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1 24*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) 25*207e5cccSFangrui Song // CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float %b, float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 26*207e5cccSFangrui Song // CHECK-ASM: fmla s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}] 27*207e5cccSFangrui Song // COMMONIR: ret float [[TMP2]] 28*207e5cccSFangrui Song float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { 29*207e5cccSFangrui Song return vfmas_lane_f32(a, b, c, 1); 30*207e5cccSFangrui Song } 31*207e5cccSFangrui Song 32*207e5cccSFangrui Song // COMMON-LABEL: test_vfmad_lane_f64 33*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0 34*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) 35*207e5cccSFangrui Song // CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 36*207e5cccSFangrui Song // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 37*207e5cccSFangrui Song // COMMONIR: ret double [[TMP2]] 38*207e5cccSFangrui Song float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { 39*207e5cccSFangrui Song return vfmad_lane_f64(a, b, c, 0); 40*207e5cccSFangrui Song } 41*207e5cccSFangrui Song 42*207e5cccSFangrui Song // COMMON-LABEL: test_vfmad_laneq_f64 43*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1 44*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) 45*207e5cccSFangrui Song // CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 46*207e5cccSFangrui Song // CHECK-ASM: fmla d{{[0-9]+}}, d{{[0-9]+}}, v{{[0-9]+}}.d[{{[0-9]+}}] 47*207e5cccSFangrui Song // COMMONIR: ret double [[TMP2]] 48*207e5cccSFangrui Song float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { 49*207e5cccSFangrui Song return vfmad_laneq_f64(a, b, c, 1); 50*207e5cccSFangrui Song } 51*207e5cccSFangrui Song 52*207e5cccSFangrui Song // COMMON-LABEL: test_vfmss_lane_f32 53*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg float %b 54*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1 55*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) 56*207e5cccSFangrui Song // CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[SUB]], float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 57*207e5cccSFangrui Song // CHECK-ASM: fmls s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}] 58*207e5cccSFangrui Song // COMMONIR: ret float [[TMP2]] 59*207e5cccSFangrui Song float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { 60*207e5cccSFangrui Song return vfmss_lane_f32(a, b, c, 1); 61*207e5cccSFangrui Song } 62*207e5cccSFangrui Song 63*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_lane_f64 64*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 65*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 66*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> 67*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 68*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer 69*207e5cccSFangrui Song // COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 70*207e5cccSFangrui Song // COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 71*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) 72*207e5cccSFangrui Song // CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 73*207e5cccSFangrui Song // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 74*207e5cccSFangrui Song // COMMONIR: ret <1 x double> [[FMLA2]] 75*207e5cccSFangrui Song float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { 76*207e5cccSFangrui Song return vfma_lane_f64(a, b, v, 0); 77*207e5cccSFangrui Song } 78*207e5cccSFangrui Song 79*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_lane_f64 80*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b 81*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 82*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> 83*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> 84*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 85*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer 86*207e5cccSFangrui Song // COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 87*207e5cccSFangrui Song // COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 88*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) 89*207e5cccSFangrui Song // CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 90*207e5cccSFangrui Song // CHECK-ASM: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 91*207e5cccSFangrui Song // COMMONIR: ret <1 x double> [[FMLA2]] 92*207e5cccSFangrui Song float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { 93*207e5cccSFangrui Song return vfms_lane_f64(a, b, v, 0); 94*207e5cccSFangrui Song } 95*207e5cccSFangrui Song 96*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_laneq_f64 97*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 98*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 99*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 100*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double 101*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double 102*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 103*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 104*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]]) 105*207e5cccSFangrui Song // CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 106*207e5cccSFangrui Song // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 107*207e5cccSFangrui Song // COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double> 108*207e5cccSFangrui Song // COMMONIR: ret <1 x double> [[TMP7]] 109*207e5cccSFangrui Song float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { 110*207e5cccSFangrui Song return vfma_laneq_f64(a, b, v, 0); 111*207e5cccSFangrui Song } 112*207e5cccSFangrui Song 113*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_laneq_f64 114*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b 115*207e5cccSFangrui Song // CHECK-ASM: fneg d{{[0-9]+}}, d{{[0-9]+}} 116*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 117*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> 118*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 119*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double 120*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double 121*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 122*207e5cccSFangrui Song // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 123*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]]) 124*207e5cccSFangrui Song // CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 125*207e5cccSFangrui Song // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 126*207e5cccSFangrui Song // COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double> 127*207e5cccSFangrui Song // COMMONIR: ret <1 x double> [[TMP7]] 128*207e5cccSFangrui Song float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { 129*207e5cccSFangrui Song return vfms_laneq_f64(a, b, v, 0); 130*207e5cccSFangrui Song } 131*207e5cccSFangrui Song 132