1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 2 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 3 // RUN: | opt -S -passes=mem2reg \ 4 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s 5 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 6 // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \ 7 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 8 // RUN: | opt -S -passes=mem2reg \ 9 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED --implicit-check-not=fpexcept.maytrap %s 10 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 11 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 12 // RUN: | opt -S -passes=mem2reg | llc -o=- - \ 13 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s 14 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 15 // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \ 16 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 17 // RUN: | opt -S -passes=mem2reg | llc -o=- - \ 18 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM --implicit-check-not=fpexcept.maytrap %s 19 20 // REQUIRES: aarch64-registered-target 21 22 // Test that the constrained intrinsics are picking up the exception 23 // metadata from the AST instead of the global default from the command line. 24 // Any cases of "fpexcept.maytrap" in this test are clang bugs. 25 26 #if EXCEPT 27 #pragma float_control(except, on) 28 #endif 29 30 #include <arm_neon.h> 31 32 // COMMON-LABEL: test_vsqrt_f16 33 // UNCONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a) 34 // CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 35 // CHECK-ASM: fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 36 // COMMONIR: ret <4 x half> [[SQR]] 37 float16x4_t test_vsqrt_f16(float16x4_t a) { 38 return vsqrt_f16(a); 39 } 40 41 // COMMON-LABEL: test_vsqrtq_f16 42 // UNCONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) 43 // CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 44 // CHECK-ASM: fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 45 // COMMONIR: ret <8 x half> [[SQR]] 46 float16x8_t test_vsqrtq_f16(float16x8_t a) { 47 return vsqrtq_f16(a); 48 } 49 50 // COMMON-LABEL: test_vfma_f16 51 // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) 52 // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 53 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 54 // COMMONIR: ret <4 x half> [[ADD]] 55 float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 56 return vfma_f16(a, b, c); 57 } 58 59 // COMMON-LABEL: test_vfmaq_f16 60 // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) 61 // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 62 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 63 // COMMONIR: ret <8 x half> [[ADD]] 64 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 65 return vfmaq_f16(a, b, c); 66 } 67 68 // COMMON-LABEL: test_vfms_f16 69 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 70 // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a) 71 // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 72 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 73 // COMMONIR: ret <4 x half> [[ADD]] 74 float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 75 return vfms_f16(a, b, c); 76 } 77 78 // COMMON-LABEL: test_vfmsq_f16 79 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 80 // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a) 81 // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 82 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 83 // COMMONIR: ret <8 x half> [[ADD]] 84 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 85 return vfmsq_f16(a, b, c); 86 } 87 88 // COMMON-LABEL: test_vfma_lane_f16 89 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 90 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 91 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 92 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 93 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 94 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 95 // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 96 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) 97 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 98 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 99 // COMMONIR: ret <4 x half> [[FMLA]] 100 float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 101 return vfma_lane_f16(a, b, c, 3); 102 } 103 104 // COMMON-LABEL: test_vfmaq_lane_f16 105 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 106 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 107 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 108 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 109 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 110 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 111 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 112 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) 113 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 114 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 115 // COMMONIR: ret <8 x half> [[FMLA]] 116 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { 117 return vfmaq_lane_f16(a, b, c, 3); 118 } 119 120 // COMMON-LABEL: test_vfma_laneq_f16 121 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 122 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 123 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 124 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 125 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 126 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 127 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 128 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) 129 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 130 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 131 // COMMONIR: ret <4 x half> [[FMLA]] 132 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { 133 return vfma_laneq_f16(a, b, c, 7); 134 } 135 136 // COMMON-LABEL: test_vfmaq_laneq_f16 137 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 138 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 139 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 140 // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 141 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 142 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 143 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 144 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) 145 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 146 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 147 // COMMONIR: ret <8 x half> [[FMLA]] 148 float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 149 return vfmaq_laneq_f16(a, b, c, 7); 150 } 151 152 // COMMON-LABEL: test_vfma_n_f16 153 // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0 154 // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1 155 // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 156 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 157 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a) 158 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 159 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 160 // COMMONIR: ret <4 x half> [[FMA]] 161 float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) { 162 return vfma_n_f16(a, b, c); 163 } 164 165 // COMMON-LABEL: test_vfmaq_n_f16 166 // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0 167 // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1 168 // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2 169 // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3 170 // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4 171 // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5 172 // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 173 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 174 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a) 175 // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 176 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 177 // COMMONIR: ret <8 x half> [[FMA]] 178 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { 179 return vfmaq_n_f16(a, b, c); 180 } 181 182 // COMMON-LABEL: test_vfmah_lane_f16 183 // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 184 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) 185 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 186 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 187 // COMMONIR: ret half [[FMA]] 188 float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) { 189 return vfmah_lane_f16(a, b, c, 3); 190 } 191 192 // COMMON-LABEL: test_vfmah_laneq_f16 193 // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 194 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) 195 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 196 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 197 // COMMONIR: ret half [[FMA]] 198 float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { 199 return vfmah_laneq_f16(a, b, c, 7); 200 } 201 202 // COMMON-LABEL: test_vfms_lane_f16 203 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 204 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 205 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8> 206 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 207 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 208 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 209 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 210 // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 211 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) 212 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 213 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 214 // COMMONIR: ret <4 x half> [[FMA]] 215 float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 216 return vfms_lane_f16(a, b, c, 3); 217 } 218 219 // COMMON-LABEL: test_vfmsq_lane_f16 220 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 221 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 222 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8> 223 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 224 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 225 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 226 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 227 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 228 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) 229 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 230 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 231 // COMMONIR: ret <8 x half> [[FMLA]] 232 float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { 233 return vfmsq_lane_f16(a, b, c, 3); 234 } 235 236 // COMMON-LABEL: test_vfms_laneq_f16 237 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 238 // CHECK-ASM-NOT: fneg 239 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 240 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8> 241 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 242 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 243 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 244 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 245 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 246 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) 247 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 248 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 249 // COMMONIR: ret <4 x half> [[FMLA]] 250 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { 251 return vfms_laneq_f16(a, b, c, 7); 252 } 253 254 // COMMON-LABEL: test_vfmsq_laneq_f16 255 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 256 // CHECK-ASM-NOT: fneg 257 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 258 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8> 259 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 260 // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 261 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 262 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 263 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 264 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) 265 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 266 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 267 // COMMONIR: ret <8 x half> [[FMLA]] 268 float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 269 return vfmsq_laneq_f16(a, b, c, 7); 270 } 271 272 // COMMON-LABEL: test_vfms_n_f16 273 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 274 // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0 275 // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1 276 // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 277 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 278 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a) 279 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 280 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 281 // COMMONIR: ret <4 x half> [[FMA]] 282 float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) { 283 return vfms_n_f16(a, b, c); 284 } 285 286 // COMMON-LABEL: test_vfmsq_n_f16 287 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 288 // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0 289 // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1 290 // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2 291 // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3 292 // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4 293 // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5 294 // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 295 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 296 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a) 297 // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 298 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 299 // COMMONIR: ret <8 x half> [[FMA]] 300 float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { 301 return vfmsq_n_f16(a, b, c); 302 } 303 304 // COMMON-LABEL: test_vfmsh_lane_f16 305 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float 306 // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") 307 // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}} 308 // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]] 309 // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}} 310 // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half 311 // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 312 // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} 313 // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 314 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) 315 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 316 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 317 // COMMONIR: ret half [[FMA]] 318 float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) { 319 return vfmsh_lane_f16(a, b, c, 3); 320 } 321 322 // COMMON-LABEL: test_vfmsh_laneq_f16 323 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float 324 // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") 325 // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}} 326 // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]] 327 // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}} 328 // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half 329 // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 330 // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} 331 // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 332 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) 333 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 334 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 335 // COMMONIR: ret half [[FMA]] 336 float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) { 337 return vfmsh_laneq_f16(a, b, c, 7); 338 } 339