1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4 5; SOFTFP: 6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 9 10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 13 14; Test fast-isel 15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 17 18; HARD: 19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 22 23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 26 27; FP-CONTRACT=FAST 28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 30 31; TODO: we can't pass half-precision arguments as "half" types yet. We do 32; that for the time being by passing "float %f.coerce" and the necessary 33; bitconverts/truncates. But when we can pass half types, we do want to use 34; and test that here. 35 36define float @RetValBug(float %A.coerce) { 37entry: 38 ret float undef 39; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 40; any operands) when FullFP16 is enabled. 41; 42; CHECK-LABEL: RetValBug: 43; CHECK-HARDFP-FULLFP16: {{.*}} lr 44} 45 46; 2. VADD 47define float @Add(float %a.coerce, float %b.coerce) { 48entry: 49 %0 = bitcast float %a.coerce to i32 50 %tmp.0.extract.trunc = trunc i32 %0 to i16 51 %1 = bitcast i16 %tmp.0.extract.trunc to half 52 %2 = bitcast float %b.coerce to i32 53 %tmp1.0.extract.trunc = trunc i32 %2 to i16 54 %3 = bitcast i16 %tmp1.0.extract.trunc to half 55 %add = fadd half %1, %3 56 %4 = bitcast half %add to i16 57 %tmp4.0.insert.ext = zext i16 %4 to i32 58 %5 = bitcast i32 %tmp4.0.insert.ext to float 59 ret float %5 60 61; CHECK-LABEL: Add: 62 63; CHECK-SOFT: bl __aeabi_h2f 64; CHECK-SOFT: bl __aeabi_h2f 65; CHECK-SOFT: bl __aeabi_fadd 66; CHECK-SOFT: bl __aeabi_f2h 67 68; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 69; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 70; CHECK-SOFTFP-VFP3: vadd.f32 71; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 72 73; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 74; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 75; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 76; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 77; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 78; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 79; CHECK-SOFTFP-FP16: vmov r0, s0 80 81; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 82; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 83; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 84; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 85 86; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 87; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 88; CHECK-HARDFP-VFP3: bl __aeabi_h2f 89; CHECK-HARDFP-VFP3: bl __aeabi_h2f 90; CHECK-HARDFP-VFP3: vadd.f32 91; CHECK-HARDFP-VFP3: bl __aeabi_f2h 92; CHECK-HARDFP-VFP3: vmov s0, r0 93 94; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 95; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 96; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 97; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 98 99; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 100} 101 102; 3. VCMP 103define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 104entry: 105 %0 = bitcast float %F.coerce to i32 106 %tmp.0.extract.trunc = trunc i32 %0 to i16 107 %1 = bitcast i16 %tmp.0.extract.trunc to half 108 %2 = bitcast float %G.coerce to i32 109 %tmp1.0.extract.trunc = trunc i32 %2 to i16 110 %3 = bitcast i16 %tmp1.0.extract.trunc to half 111 %cmp = fcmp une half %1, %3 112 ret i1 %cmp 113 114; CHECK-LABEL: VCMP1: 115 116; CHECK-SOFT: bl __aeabi_fcmpeq 117 118; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 119; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 120; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 121 122; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 123; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 124; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 125 126; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 127; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 128; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 129 130; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 131; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 132; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 133} 134 135; Check VCMPZH 136define zeroext i1 @VCMP2(float %F.coerce) { 137entry: 138 %0 = bitcast float %F.coerce to i32 139 %tmp.0.extract.trunc = trunc i32 %0 to i16 140 %1 = bitcast i16 %tmp.0.extract.trunc to half 141 %cmp = fcmp une half %1, 0.000000e+00 142 ret i1 %cmp 143 144; CHECK-LABEL: VCMP2: 145 146; CHECK-SOFT: bl __aeabi_fcmpeq 147; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 148; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 149; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 150} 151 152; 4. VCMPE 153define i32 @VCMPE1(float %F.coerce) { 154entry: 155 %0 = bitcast float %F.coerce to i32 156 %tmp.0.extract.trunc = trunc i32 %0 to i16 157 %1 = bitcast i16 %tmp.0.extract.trunc to half 158 %tmp = fcmp olt half %1, 0.000000e+00 159 %tmp1 = zext i1 %tmp to i32 160 ret i32 %tmp1 161 162; CHECK-LABEL: VCMPE1: 163 164; CHECK-SOFT: bl __aeabi_fcmplt 165; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 166; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 167; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 168} 169 170define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 171entry: 172 %0 = bitcast float %F.coerce to i32 173 %tmp.0.extract.trunc = trunc i32 %0 to i16 174 %1 = bitcast i16 %tmp.0.extract.trunc to half 175 %2 = bitcast float %G.coerce to i32 176 %tmp.1.extract.trunc = trunc i32 %2 to i16 177 %3 = bitcast i16 %tmp.1.extract.trunc to half 178 %tmp = fcmp olt half %1, %3 179 %tmp1 = zext i1 %tmp to i32 180 ret i32 %tmp1 181 182; CHECK-LABEL: VCMPE2: 183 184; CHECK-SOFT: bl __aeabi_fcmplt 185; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} 186; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 187; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 188} 189 190; Test lowering of BR_CC 191define hidden i32 @VCMPBRCC() { 192entry: 193 %f = alloca half, align 2 194 br label %for.cond 195 196for.cond: 197 %0 = load half, half* %f, align 2 198 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 199 br i1 %cmp, label %for.body, label %for.end 200 201for.body: 202 ret i32 1 203 204for.end: 205 ret i32 0 206 207; CHECK-LABEL: VCMPBRCC: 208 209; CHECK-SOFT: bl __aeabi_fcmpgt 210; CHECK-SOFT: cmp r0, #0 211 212; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 213; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 214; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 215 216; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 217; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 218} 219 220; 5. VCVT (between floating-point and fixed-point) 221; Only assembly/disassembly support 222 223; 6. VCVT (between floating-point and integer, both directions) 224define i32 @fptosi(i32 %A.coerce) { 225entry: 226 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 227 %0 = bitcast i16 %tmp.0.extract.trunc to half 228 %conv = fptosi half %0 to i32 229 ret i32 %conv 230 231; CHECK-LABEL: fptosi: 232 233; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 234; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 235; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 236} 237 238define i32 @fptoui(i32 %A.coerce) { 239entry: 240 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 241 %0 = bitcast i16 %tmp.0.extract.trunc to half 242 %conv = fptoui half %0 to i32 243 ret i32 %conv 244 245; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 246; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 247} 248 249define float @UintToH(i32 %a, i32 %b) { 250entry: 251 %0 = uitofp i32 %a to half 252 %1 = bitcast half %0 to i16 253 %tmp0.insert.ext = zext i16 %1 to i32 254 %2 = bitcast i32 %tmp0.insert.ext to float 255 ret float %2 256 257; CHECK-LABEL: UintToH: 258 259; CHECK-HARDFP-FULLFP16: vmov s0, r0 260; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 261} 262 263define float @SintToH(i32 %a, i32 %b) { 264entry: 265 %0 = sitofp i32 %a to half 266 %1 = bitcast half %0 to i16 267 %tmp0.insert.ext = zext i16 %1 to i32 268 %2 = bitcast i32 %tmp0.insert.ext to float 269 ret float %2 270 271; CHECK-LABEL: SintToH: 272 273; CHECK-HARDFP-FULLFP16: vmov s0, r0 274; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 275} 276 277define i32 @f2h(float %f) { 278entry: 279 %conv = fptrunc float %f to half 280 %0 = bitcast half %conv to i16 281 %tmp.0.insert.ext = zext i16 %0 to i32 282 ret i32 %tmp.0.insert.ext 283 284; CHECK-LABEL: f2h: 285; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 286} 287 288define float @h2f(i32 %h.coerce) { 289entry: 290 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 291 %0 = bitcast i16 %tmp.0.extract.trunc to half 292 %conv = fpext half %0 to float 293 ret float %conv 294 295; CHECK-LABEL: h2f: 296; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 297} 298 299 300define double @h2d(i32 %h.coerce) { 301entry: 302 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 303 %0 = bitcast i16 %tmp.0.extract.trunc to half 304 %conv = fpext half %0 to double 305 ret double %conv 306 307; CHECK-LABEL: h2d: 308; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 309} 310 311define i32 @d2h(double %d) { 312entry: 313 %conv = fptrunc double %d to half 314 %0 = bitcast half %conv to i16 315 %tmp.0.insert.ext = zext i16 %0 to i32 316 ret i32 %tmp.0.insert.ext 317 318; CHECK-LABEL: d2h: 319; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 320} 321 322; TODO: 323; 7. VCVTA 324; 8. VCVTM 325; 9. VCVTN 326; 10. VCVTP 327; 11. VCVTR 328 329; 12. VDIV 330define float @Div(float %a.coerce, float %b.coerce) { 331entry: 332 %0 = bitcast float %a.coerce to i32 333 %tmp.0.extract.trunc = trunc i32 %0 to i16 334 %1 = bitcast i16 %tmp.0.extract.trunc to half 335 %2 = bitcast float %b.coerce to i32 336 %tmp1.0.extract.trunc = trunc i32 %2 to i16 337 %3 = bitcast i16 %tmp1.0.extract.trunc to half 338 %add = fdiv half %1, %3 339 %4 = bitcast half %add to i16 340 %tmp4.0.insert.ext = zext i16 %4 to i32 341 %5 = bitcast i32 %tmp4.0.insert.ext to float 342 ret float %5 343 344; CHECK-LABEL: Div: 345 346; CHECK-SOFT: bl __aeabi_h2f 347; CHECK-SOFT: bl __aeabi_h2f 348; CHECK-SOFT: bl __aeabi_fdiv 349; CHECK-SOFT: bl __aeabi_f2h 350 351; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 352; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 353; CHECK-SOFTFP-VFP3: vdiv.f32 354; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 355 356; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 357; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 358; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 359; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 360; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 361; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 362; CHECK-SOFTFP-FP16: vmov r0, s0 363 364; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 365; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 366; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 367; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 368 369; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 370; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 371; CHECK-HARDFP-VFP3: bl __aeabi_h2f 372; CHECK-HARDFP-VFP3: bl __aeabi_h2f 373; CHECK-HARDFP-VFP3: vdiv.f32 374; CHECK-HARDFP-VFP3: bl __aeabi_f2h 375; CHECK-HARDFP-VFP3: vmov s0, r0 376 377; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 378; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 379; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 380; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 381 382; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 383} 384 385; 13. VFMA 386define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 387entry: 388 %0 = bitcast float %a.coerce to i32 389 %tmp.0.extract.trunc = trunc i32 %0 to i16 390 %1 = bitcast i16 %tmp.0.extract.trunc to half 391 %2 = bitcast float %b.coerce to i32 392 %tmp1.0.extract.trunc = trunc i32 %2 to i16 393 %3 = bitcast i16 %tmp1.0.extract.trunc to half 394 %4 = bitcast float %c.coerce to i32 395 %tmp2.0.extract.trunc = trunc i32 %4 to i16 396 %5 = bitcast i16 %tmp2.0.extract.trunc to half 397 %mul = fmul half %1, %3 398 %add = fadd half %mul, %5 399 %6 = bitcast half %add to i16 400 %tmp4.0.insert.ext = zext i16 %6 to i32 401 %7 = bitcast i32 %tmp4.0.insert.ext to float 402 ret float %7 403 404; CHECK-LABEL: VFMA: 405; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 406; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 407} 408 409; 14. VFMS 410define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 411entry: 412 %0 = bitcast float %a.coerce to i32 413 %tmp.0.extract.trunc = trunc i32 %0 to i16 414 %1 = bitcast i16 %tmp.0.extract.trunc to half 415 %2 = bitcast float %b.coerce to i32 416 %tmp1.0.extract.trunc = trunc i32 %2 to i16 417 %3 = bitcast i16 %tmp1.0.extract.trunc to half 418 %4 = bitcast float %c.coerce to i32 419 %tmp2.0.extract.trunc = trunc i32 %4 to i16 420 %5 = bitcast i16 %tmp2.0.extract.trunc to half 421 %mul = fmul half %1, %3 422 %sub = fsub half %5, %mul 423 %6 = bitcast half %sub to i16 424 %tmp4.0.insert.ext = zext i16 %6 to i32 425 %7 = bitcast i32 %tmp4.0.insert.ext to float 426 ret float %7 427 428; CHECK-LABEL: VFMS: 429; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 430; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 431} 432 433; 15. VFNMA 434define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 435entry: 436 %0 = bitcast float %a.coerce to i32 437 %tmp.0.extract.trunc = trunc i32 %0 to i16 438 %1 = bitcast i16 %tmp.0.extract.trunc to half 439 %2 = bitcast float %b.coerce to i32 440 %tmp1.0.extract.trunc = trunc i32 %2 to i16 441 %3 = bitcast i16 %tmp1.0.extract.trunc to half 442 %4 = bitcast float %c.coerce to i32 443 %tmp2.0.extract.trunc = trunc i32 %4 to i16 444 %5 = bitcast i16 %tmp2.0.extract.trunc to half 445 %mul = fmul half %1, %3 446 %sub = fsub half -0.0, %mul 447 %sub2 = fsub half %sub, %5 448 %6 = bitcast half %sub2 to i16 449 %tmp4.0.insert.ext = zext i16 %6 to i32 450 %7 = bitcast i32 %tmp4.0.insert.ext to float 451 ret float %7 452 453; CHECK-LABEL: VFNMA: 454; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 455; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 456} 457 458; 16. VFNMS 459define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 460entry: 461 %0 = bitcast float %a.coerce to i32 462 %tmp.0.extract.trunc = trunc i32 %0 to i16 463 %1 = bitcast i16 %tmp.0.extract.trunc to half 464 %2 = bitcast float %b.coerce to i32 465 %tmp1.0.extract.trunc = trunc i32 %2 to i16 466 %3 = bitcast i16 %tmp1.0.extract.trunc to half 467 %4 = bitcast float %c.coerce to i32 468 %tmp2.0.extract.trunc = trunc i32 %4 to i16 469 %5 = bitcast i16 %tmp2.0.extract.trunc to half 470 %mul = fmul half %1, %3 471 %sub2 = fsub half %mul, %5 472 %6 = bitcast half %sub2 to i16 473 %tmp4.0.insert.ext = zext i16 %6 to i32 474 %7 = bitcast i32 %tmp4.0.insert.ext to float 475 ret float %7 476 477; CHECK-LABEL: VFNMS: 478; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 479; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 480} 481 482; 17. VMAXNM 483; 18. VMINNM 484; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll 485 486; 19. VMLA 487define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 488entry: 489 %0 = bitcast float %a.coerce to i32 490 %tmp.0.extract.trunc = trunc i32 %0 to i16 491 %1 = bitcast i16 %tmp.0.extract.trunc to half 492 %2 = bitcast float %b.coerce to i32 493 %tmp1.0.extract.trunc = trunc i32 %2 to i16 494 %3 = bitcast i16 %tmp1.0.extract.trunc to half 495 %4 = bitcast float %c.coerce to i32 496 %tmp2.0.extract.trunc = trunc i32 %4 to i16 497 %5 = bitcast i16 %tmp2.0.extract.trunc to half 498 %mul = fmul half %1, %3 499 %add = fadd half %5, %mul 500 %6 = bitcast half %add to i16 501 %tmp4.0.insert.ext = zext i16 %6 to i32 502 %7 = bitcast i32 %tmp4.0.insert.ext to float 503 ret float %7 504 505; CHECK-LABEL: VMLA: 506; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 507; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 508} 509 510; 20. VMLS 511define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 512entry: 513 %0 = bitcast float %a.coerce to i32 514 %tmp.0.extract.trunc = trunc i32 %0 to i16 515 %1 = bitcast i16 %tmp.0.extract.trunc to half 516 %2 = bitcast float %b.coerce to i32 517 %tmp1.0.extract.trunc = trunc i32 %2 to i16 518 %3 = bitcast i16 %tmp1.0.extract.trunc to half 519 %4 = bitcast float %c.coerce to i32 520 %tmp2.0.extract.trunc = trunc i32 %4 to i16 521 %5 = bitcast i16 %tmp2.0.extract.trunc to half 522 %mul = fmul half %1, %3 523 %add = fsub half %5, %mul 524 %6 = bitcast half %add to i16 525 %tmp4.0.insert.ext = zext i16 %6 to i32 526 %7 = bitcast i32 %tmp4.0.insert.ext to float 527 ret float %7 528 529; CHECK-LABEL: VMLS: 530; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 531; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 532} 533 534; TODO: fix immediates. 535; 21. VMOV (between general-purpose register and half-precision register) 536 537; 22. VMOV (immediate) 538define i32 @movi(i32 %a.coerce) { 539entry: 540 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 541 %0 = bitcast i16 %tmp.0.extract.trunc to half 542 %add = fadd half %0, 0xHC000 543 %1 = bitcast half %add to i16 544 %tmp2.0.insert.ext = zext i16 %1 to i32 545 ret i32 %tmp2.0.insert.ext 546 547; CHECK-LABEL: movi: 548; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 549} 550 551; 23. VMUL 552define float @Mul(float %a.coerce, float %b.coerce) { 553entry: 554 %0 = bitcast float %a.coerce to i32 555 %tmp.0.extract.trunc = trunc i32 %0 to i16 556 %1 = bitcast i16 %tmp.0.extract.trunc to half 557 %2 = bitcast float %b.coerce to i32 558 %tmp1.0.extract.trunc = trunc i32 %2 to i16 559 %3 = bitcast i16 %tmp1.0.extract.trunc to half 560 %add = fmul half %1, %3 561 %4 = bitcast half %add to i16 562 %tmp4.0.insert.ext = zext i16 %4 to i32 563 %5 = bitcast i32 %tmp4.0.insert.ext to float 564 ret float %5 565 566; CHECK-LABEL: Mul: 567 568; CHECK-SOFT: bl __aeabi_h2f 569; CHECK-SOFT: bl __aeabi_h2f 570; CHECK-SOFT: bl __aeabi_fmul 571; CHECK-SOFT: bl __aeabi_f2h 572 573; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 574; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 575; CHECK-SOFTFP-VFP3: vmul.f32 576; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 577 578; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 579; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 580; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 581; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 582; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 583; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 584; CHECK-SOFTFP-FP16: vmov r0, s0 585 586; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 587; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 588; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 589; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 590 591; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 592; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 593; CHECK-HARDFP-VFP3: bl __aeabi_h2f 594; CHECK-HARDFP-VFP3: bl __aeabi_h2f 595; CHECK-HARDFP-VFP3: vmul.f32 596; CHECK-HARDFP-VFP3: bl __aeabi_f2h 597; CHECK-HARDFP-VFP3: vmov s0, r0 598 599; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 600; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 601; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 602; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 603 604; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 605} 606 607; 24. VNEG 608define float @Neg(float %a.coerce) { 609entry: 610 %0 = bitcast float %a.coerce to i32 611 %tmp.0.extract.trunc = trunc i32 %0 to i16 612 %1 = bitcast i16 %tmp.0.extract.trunc to half 613 %2 = fsub half -0.000000e+00, %1 614 %3 = bitcast half %2 to i16 615 %tmp4.0.insert.ext = zext i16 %3 to i32 616 %4 = bitcast i32 %tmp4.0.insert.ext to float 617 ret float %4 618 619; CHECK-LABEL: Neg: 620; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 621} 622 623; 25. VNMLA 624define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 625entry: 626 %0 = bitcast float %a.coerce to i32 627 %tmp.0.extract.trunc = trunc i32 %0 to i16 628 %1 = bitcast i16 %tmp.0.extract.trunc to half 629 %2 = bitcast float %b.coerce to i32 630 %tmp1.0.extract.trunc = trunc i32 %2 to i16 631 %3 = bitcast i16 %tmp1.0.extract.trunc to half 632 %4 = bitcast float %c.coerce to i32 633 %tmp2.0.extract.trunc = trunc i32 %4 to i16 634 %5 = bitcast i16 %tmp2.0.extract.trunc to half 635 %add = fmul half %1, %3 636 %add2 = fsub half -0.000000e+00, %add 637 %add3 = fsub half %add2, %5 638 %6 = bitcast half %add3 to i16 639 %tmp4.0.insert.ext = zext i16 %6 to i32 640 %7 = bitcast i32 %tmp4.0.insert.ext to float 641 ret float %7 642 643; CHECK-LABEL: VNMLA: 644; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 645; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 646} 647 648; 26. VNMLS 649define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 650entry: 651 %0 = bitcast float %a.coerce to i32 652 %tmp.0.extract.trunc = trunc i32 %0 to i16 653 %1 = bitcast i16 %tmp.0.extract.trunc to half 654 %2 = bitcast float %b.coerce to i32 655 %tmp1.0.extract.trunc = trunc i32 %2 to i16 656 %3 = bitcast i16 %tmp1.0.extract.trunc to half 657 %4 = bitcast float %c.coerce to i32 658 %tmp2.0.extract.trunc = trunc i32 %4 to i16 659 %5 = bitcast i16 %tmp2.0.extract.trunc to half 660 %add = fmul half %1, %3 661 %add2 = fsub half %add, %5 662 %6 = bitcast half %add2 to i16 663 %tmp4.0.insert.ext = zext i16 %6 to i32 664 %7 = bitcast i32 %tmp4.0.insert.ext to float 665 ret float %7 666 667; CHECK-LABEL: VNMLS: 668; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 669; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 670} 671 672; 27. VNMUL 673define float @NMul(float %a.coerce, float %b.coerce) { 674entry: 675 %0 = bitcast float %a.coerce to i32 676 %tmp.0.extract.trunc = trunc i32 %0 to i16 677 %1 = bitcast i16 %tmp.0.extract.trunc to half 678 %2 = bitcast float %b.coerce to i32 679 %tmp1.0.extract.trunc = trunc i32 %2 to i16 680 %3 = bitcast i16 %tmp1.0.extract.trunc to half 681 %add = fmul half %1, %3 682 %add2 = fsub half -0.0, %add 683 %4 = bitcast half %add2 to i16 684 %tmp4.0.insert.ext = zext i16 %4 to i32 685 %5 = bitcast i32 %tmp4.0.insert.ext to float 686 ret float %5 687 688; CHECK-LABEL: NMul: 689; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 690} 691 692; 35. VSELEQ 693define half @select_cc1(half* %a0) { 694 %1 = load half, half* %a0 695 %2 = fcmp nsz oeq half %1, 0xH0001 696 %3 = select i1 %2, half 0xHC000, half 0xH0002 697 ret half %3 698 699; CHECK-LABEL: select_cc1: 700 701; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 702; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 703; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} 704 705; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 706; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 707; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} 708 709; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 710; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 711; CHECK-SOFTFP-FP16-T32: it eq 712; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} 713} 714 715; FIXME: more tests need to be added for VSELGE and VSELGT. 716; That is, more combinations of immediate operands that can or can't 717; be encoded as an FP16 immediate need to be added here. 718; 719; 36. VSELGE 720define half @select_cc_ge1(half* %a0) { 721 %1 = load half, half* %a0 722 %2 = fcmp nsz oge half %1, 0xH0001 723 %3 = select i1 %2, half 0xHC000, half 0xH0002 724 ret half %3 725 726; CHECK-LABEL: select_cc_ge1: 727 728; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 729; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 730; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 731 732; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 733; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 734; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} 735 736; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 737; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 738; CHECK-SOFTFP-FP16-T32-NEXT: it ge 739; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} 740} 741 742define half @select_cc_ge2(half* %a0) { 743 %1 = load half, half* %a0 744 %2 = fcmp nsz ole half %1, 0xH0001 745 %3 = select i1 %2, half 0xHC000, half 0xH0002 746 ret half %3 747 748; CHECK-LABEL: select_cc_ge2: 749 750; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 751; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 752; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 753 754; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 755; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 756; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} 757 758; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 759; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 760; CHECK-SOFTFP-FP16-T32-NEXT: it ls 761; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} 762} 763 764define half @select_cc_ge3(half* %a0) { 765 %1 = load half, half* %a0 766 %2 = fcmp nsz ugt half %1, 0xH0001 767 %3 = select i1 %2, half 0xHC000, half 0xH0002 768 ret half %3 769 770; CHECK-LABEL: select_cc_ge3: 771 772; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 773; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 774; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 775 776; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 777; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 778; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 779 780; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 781; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 782; CHECK-SOFTFP-FP16-T32-NEXT: it hi 783; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 784} 785 786define half @select_cc_ge4(half* %a0) { 787 %1 = load half, half* %a0 788 %2 = fcmp nsz ult half %1, 0xH0001 789 %3 = select i1 %2, half 0xHC000, half 0xH0002 790 ret half %3 791 792; CHECK-LABEL: select_cc_ge4: 793 794; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 795; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 796; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 797 798; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 799; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 800; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 801 802; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 803; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 804; CHECK-SOFTFP-FP16-T32-NEXT: it lt 805; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 806} 807 808; 37. VSELGT 809define half @select_cc_gt1(half* %a0) { 810 %1 = load half, half* %a0 811 %2 = fcmp nsz ogt half %1, 0xH0001 812 %3 = select i1 %2, half 0xHC000, half 0xH0002 813 ret half %3 814 815; CHECK-LABEL: select_cc_gt1: 816 817; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 818; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 819; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 820 821; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 822; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 823; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 824 825; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 826; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 827; CHECK-SOFTFP-FP16-T32-NEXT: it gt 828; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 829} 830 831define half @select_cc_gt2(half* %a0) { 832 %1 = load half, half* %a0 833 %2 = fcmp nsz uge half %1, 0xH0001 834 %3 = select i1 %2, half 0xHC000, half 0xH0002 835 ret half %3 836 837; CHECK-LABEL: select_cc_gt2: 838 839; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 840; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 841; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 842 843; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 844; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 845; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 846 847; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 848; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 849; CHECK-SOFTFP-FP16-T32-NEXT: it pl 850; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 851} 852 853define half @select_cc_gt3(half* %a0) { 854 %1 = load half, half* %a0 855 %2 = fcmp nsz ule half %1, 0xH0001 856 %3 = select i1 %2, half 0xHC000, half 0xH0002 857 ret half %3 858 859; CHECK-LABEL: select_cc_gt3: 860 861; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 862; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 863; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 864 865; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 866; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 867; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} 868 869; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 870; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 871; CHECK-SOFTFP-FP16-T32-NEXT: it le 872; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} 873} 874 875define half @select_cc_gt4(half* %a0) { 876 %1 = load half, half* %a0 877 %2 = fcmp nsz olt half %1, 0xH0001 878 %3 = select i1 %2, half 0xHC000, half 0xH0002 879 ret half %3 880 881; CHECK-LABEL: select_cc_gt4: 882 883; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 884; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 885; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 886 887; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 888; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 889; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 890 891; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 892; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 893; CHECK-SOFTFP-FP16-T32-NEXT: it mi 894; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 895} 896 897; 38. VSELVS 898define float @select_cc4(float %a.coerce) { 899entry: 900 %0 = bitcast float %a.coerce to i32 901 %tmp.0.extract.trunc = trunc i32 %0 to i16 902 %1 = bitcast i16 %tmp.0.extract.trunc to half 903 904 %2 = fcmp nsz ueq half %1, 0xH0001 905 %3 = select i1 %2, half 0xHC000, half 0xH0002 906 907 %4 = bitcast half %3 to i16 908 %tmp4.0.insert.ext = zext i16 %4 to i32 909 %5 = bitcast i32 %tmp4.0.insert.ext to float 910 ret float %5 911 912; CHECK-LABEL: select_cc4: 913 914; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} 915; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} 916; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 917; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] 918; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 919; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] 920; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] 921 922; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 923; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} 924; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] 925; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 926; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 927; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 928; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr 929; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] 930; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] 931; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] 932 933; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 934; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} 935; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] 936; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 937; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 938; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 939; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 940; CHECK-SOFTFP-FP16-T32: it eq 941; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] 942; CHECK-SOFTFP-FP16-T32: it vs 943; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] 944; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] 945} 946 947; 40. VSUB 948define float @Sub(float %a.coerce, float %b.coerce) { 949entry: 950 %0 = bitcast float %a.coerce to i32 951 %tmp.0.extract.trunc = trunc i32 %0 to i16 952 %1 = bitcast i16 %tmp.0.extract.trunc to half 953 %2 = bitcast float %b.coerce to i32 954 %tmp1.0.extract.trunc = trunc i32 %2 to i16 955 %3 = bitcast i16 %tmp1.0.extract.trunc to half 956 %add = fsub half %1, %3 957 %4 = bitcast half %add to i16 958 %tmp4.0.insert.ext = zext i16 %4 to i32 959 %5 = bitcast i32 %tmp4.0.insert.ext to float 960 ret float %5 961 962; CHECK-LABEL: Sub: 963 964; CHECK-SOFT: bl __aeabi_h2f 965; CHECK-SOFT: bl __aeabi_h2f 966; CHECK-SOFT: bl __aeabi_fsub 967; CHECK-SOFT: bl __aeabi_f2h 968 969; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 970; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 971; CHECK-SOFTFP-VFP3: vsub.f32 972; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 973 974; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 975; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 976; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 977; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 978; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 979; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 980; CHECK-SOFTFP-FP16: vmov r0, s0 981 982; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 983; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 984; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 985; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 986 987; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 988; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 989; CHECK-HARDFP-VFP3: bl __aeabi_h2f 990; CHECK-HARDFP-VFP3: bl __aeabi_h2f 991; CHECK-HARDFP-VFP3: vsub.f32 992; CHECK-HARDFP-VFP3: bl __aeabi_f2h 993; CHECK-HARDFP-VFP3: vmov s0, r0 994 995; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 996; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 997; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 998; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 999 1000; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 1001} 1002 1003; Check for VSTRH with a FCONSTH, this checks that addressing mode 1004; AddrMode5FP16 is supported. 1005define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 1006entry: 1007 %S = alloca half, align 2 1008 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 1009 %0 = bitcast i16 %tmp.0.extract.trunc to half 1010 %S.0.S.0..sroa_cast = bitcast half* %S to i8* 1011 store volatile half 0xH3C00, half* %S, align 2 1012 %S.0.S.0. = load volatile half, half* %S, align 2 1013 %add = fadd half %S.0.S.0., %0 1014 %1 = bitcast half %add to i16 1015 %tmp2.0.insert.ext = zext i16 %1 to i32 1016 ret i32 %tmp2.0.insert.ext 1017 1018; CHECK-LABEL: ThumbAddrMode5FP16 1019 1020; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 1021; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 1022; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 1023; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 1024; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 1025} 1026 1027; Test function calls to check store/load reg to/from stack 1028define i32 @fn1() { 1029entry: 1030 %coerce = alloca half, align 2 1031 %tmp2 = alloca i32, align 4 1032 store half 0xH7C00, half* %coerce, align 2 1033 %0 = load i32, i32* %tmp2, align 4 1034 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) 1035 store half 0xH7C00, half* %coerce, align 2 1036 %1 = load i32, i32* %tmp2, align 4 1037 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) 1038 ret i32 %call3 1039 1040; CHECK-SPILL-RELOAD-LABEL: fn1: 1041; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 1042; CHECK-SPILL-RELOAD: bl fn2 1043; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 1044} 1045 1046declare dso_local i32 @fn2(...) 1047declare dso_local i32 @fn3(...) 1048