1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4 5; SOFTFP: 6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 9 10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 13 14; Test fast-isel 15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 17 18; HARD: 19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 22 23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 26 27; FP-CONTRACT=FAST 28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 30 31 32define float @RetValBug(float %A.coerce) { 33entry: 34 ret float undef 35; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 36; any operands) when FullFP16 is enabled. 37; 38; CHECK-LABEL: RetValBug: 39; CHECK-HARDFP-FULLFP16: {{.*}} lr 40} 41 42; 1. VABS: TODO 43 44; 2. VADD 45define float @Add(float %a.coerce, float %b.coerce) { 46entry: 47 %0 = bitcast float %a.coerce to i32 48 %tmp.0.extract.trunc = trunc i32 %0 to i16 49 %1 = bitcast i16 %tmp.0.extract.trunc to half 50 %2 = bitcast float %b.coerce to i32 51 %tmp1.0.extract.trunc = trunc i32 %2 to i16 52 %3 = bitcast i16 %tmp1.0.extract.trunc to half 53 %add = fadd half %1, %3 54 %4 = bitcast half %add to i16 55 %tmp4.0.insert.ext = zext i16 %4 to i32 56 %5 = bitcast i32 %tmp4.0.insert.ext to float 57 ret float %5 58 59; CHECK-LABEL: Add: 60 61; CHECK-SOFT: bl __aeabi_h2f 62; CHECK-SOFT: bl __aeabi_h2f 63; CHECK-SOFT: bl __aeabi_fadd 64; CHECK-SOFT: bl __aeabi_f2h 65 66; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 67; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 68; CHECK-SOFTFP-VFP3: vadd.f32 69; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 70 71; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 72; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 73; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 74; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 75; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 76; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 77; CHECK-SOFTFP-FP16: vmov r0, s0 78 79; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 80; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 81; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 82; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 83 84; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 85; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 86; CHECK-HARDFP-VFP3: bl __aeabi_h2f 87; CHECK-HARDFP-VFP3: bl __aeabi_h2f 88; CHECK-HARDFP-VFP3: vadd.f32 89; CHECK-HARDFP-VFP3: bl __aeabi_f2h 90; CHECK-HARDFP-VFP3: vmov s0, r0 91 92; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 93; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 94; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 95; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 96 97; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 98} 99 100; 3. VCMP 101define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 102entry: 103 %0 = bitcast float %F.coerce to i32 104 %tmp.0.extract.trunc = trunc i32 %0 to i16 105 %1 = bitcast i16 %tmp.0.extract.trunc to half 106 %2 = bitcast float %G.coerce to i32 107 %tmp1.0.extract.trunc = trunc i32 %2 to i16 108 %3 = bitcast i16 %tmp1.0.extract.trunc to half 109 %cmp = fcmp une half %1, %3 110 ret i1 %cmp 111 112; CHECK-LABEL: VCMP1: 113 114; CHECK-SOFT: bl __aeabi_fcmpeq 115 116; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 117; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 118; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 119 120; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 121; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 122; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 123 124; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 125; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 126; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 127 128; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 129; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 130; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 131} 132 133; Check VCMPZH 134define zeroext i1 @VCMP2(float %F.coerce) { 135entry: 136 %0 = bitcast float %F.coerce to i32 137 %tmp.0.extract.trunc = trunc i32 %0 to i16 138 %1 = bitcast i16 %tmp.0.extract.trunc to half 139 %cmp = fcmp une half %1, 0.000000e+00 140 ret i1 %cmp 141 142; CHECK-LABEL: VCMP2: 143 144; CHECK-SOFT: bl __aeabi_fcmpeq 145; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 146; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 147; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 148} 149 150; 4. VCMPE 151define i32 @VCMPE1(float %F.coerce) { 152entry: 153 %0 = bitcast float %F.coerce to i32 154 %tmp.0.extract.trunc = trunc i32 %0 to i16 155 %1 = bitcast i16 %tmp.0.extract.trunc to half 156 %tmp = fcmp olt half %1, 0.000000e+00 157 %tmp1 = zext i1 %tmp to i32 158 ret i32 %tmp1 159 160; CHECK-LABEL: VCMPE1: 161 162; CHECK-SOFT: bl __aeabi_fcmplt 163; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 164; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 165; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 166} 167 168define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 169entry: 170 %0 = bitcast float %F.coerce to i32 171 %tmp.0.extract.trunc = trunc i32 %0 to i16 172 %1 = bitcast i16 %tmp.0.extract.trunc to half 173 %2 = bitcast float %G.coerce to i32 174 %tmp.1.extract.trunc = trunc i32 %2 to i16 175 %3 = bitcast i16 %tmp.1.extract.trunc to half 176 %tmp = fcmp olt half %1, %3 177 %tmp1 = zext i1 %tmp to i32 178 ret i32 %tmp1 179 180; CHECK-LABEL: VCMPE2: 181 182; CHECK-SOFT: bl __aeabi_fcmplt 183; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} 184; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 185; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 186} 187 188; Test lowering of BR_CC 189define hidden i32 @VCMPBRCC() { 190entry: 191 %f = alloca half, align 2 192 br label %for.cond 193 194for.cond: 195 %0 = load half, half* %f, align 2 196 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 197 br i1 %cmp, label %for.body, label %for.end 198 199for.body: 200 ret i32 1 201 202for.end: 203 ret i32 0 204 205; CHECK-LABEL: VCMPBRCC: 206 207; CHECK-SOFT: bl __aeabi_fcmple 208; CHECK-SOFT: cmp r0, #0 209 210; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 211; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 212; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 213 214; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 215; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 216} 217 218; 5. VCVT (between floating-point and fixed-point) 219; Only assembly/disassembly support 220 221; 6. VCVT (between floating-point and integer, both directions) 222define i32 @fptosi(i32 %A.coerce) { 223entry: 224 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 225 %0 = bitcast i16 %tmp.0.extract.trunc to half 226 %conv = fptosi half %0 to i32 227 ret i32 %conv 228 229; CHECK-LABEL: fptosi: 230 231; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 232; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 233; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 234} 235 236define i32 @fptoui(i32 %A.coerce) { 237entry: 238 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 239 %0 = bitcast i16 %tmp.0.extract.trunc to half 240 %conv = fptoui half %0 to i32 241 ret i32 %conv 242 243; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 244; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 245} 246 247define float @UintToH(i32 %a, i32 %b) { 248entry: 249 %0 = uitofp i32 %a to half 250 %1 = bitcast half %0 to i16 251 %tmp0.insert.ext = zext i16 %1 to i32 252 %2 = bitcast i32 %tmp0.insert.ext to float 253 ret float %2 254 255; CHECK-LABEL: UintToH: 256 257; CHECK-HARDFP-FULLFP16: vmov s0, r0 258; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 259} 260 261define float @SintToH(i32 %a, i32 %b) { 262entry: 263 %0 = sitofp i32 %a to half 264 %1 = bitcast half %0 to i16 265 %tmp0.insert.ext = zext i16 %1 to i32 266 %2 = bitcast i32 %tmp0.insert.ext to float 267 ret float %2 268 269; CHECK-LABEL: SintToH: 270 271; CHECK-HARDFP-FULLFP16: vmov s0, r0 272; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 273} 274 275define i32 @f2h(float %f) { 276entry: 277 %conv = fptrunc float %f to half 278 %0 = bitcast half %conv to i16 279 %tmp.0.insert.ext = zext i16 %0 to i32 280 ret i32 %tmp.0.insert.ext 281 282; CHECK-LABEL: f2h: 283; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 284} 285 286define float @h2f(i32 %h.coerce) { 287entry: 288 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 289 %0 = bitcast i16 %tmp.0.extract.trunc to half 290 %conv = fpext half %0 to float 291 ret float %conv 292 293; CHECK-LABEL: h2f: 294; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 295} 296 297 298define double @h2d(i32 %h.coerce) { 299entry: 300 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 301 %0 = bitcast i16 %tmp.0.extract.trunc to half 302 %conv = fpext half %0 to double 303 ret double %conv 304 305; CHECK-LABEL: h2d: 306; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 307} 308 309define i32 @d2h(double %d) { 310entry: 311 %conv = fptrunc double %d to half 312 %0 = bitcast half %conv to i16 313 %tmp.0.insert.ext = zext i16 %0 to i32 314 ret i32 %tmp.0.insert.ext 315 316; CHECK-LABEL: d2h: 317; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 318} 319 320; TODO: 321; 7. VCVTA 322; 8. VCVTM 323; 9. VCVTN 324; 10. VCVTP 325; 11. VCVTR 326 327; 12. VDIV 328define float @Div(float %a.coerce, float %b.coerce) { 329entry: 330 %0 = bitcast float %a.coerce to i32 331 %tmp.0.extract.trunc = trunc i32 %0 to i16 332 %1 = bitcast i16 %tmp.0.extract.trunc to half 333 %2 = bitcast float %b.coerce to i32 334 %tmp1.0.extract.trunc = trunc i32 %2 to i16 335 %3 = bitcast i16 %tmp1.0.extract.trunc to half 336 %add = fdiv half %1, %3 337 %4 = bitcast half %add to i16 338 %tmp4.0.insert.ext = zext i16 %4 to i32 339 %5 = bitcast i32 %tmp4.0.insert.ext to float 340 ret float %5 341 342; CHECK-LABEL: Div: 343 344; CHECK-SOFT: bl __aeabi_h2f 345; CHECK-SOFT: bl __aeabi_h2f 346; CHECK-SOFT: bl __aeabi_fdiv 347; CHECK-SOFT: bl __aeabi_f2h 348 349; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 350; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 351; CHECK-SOFTFP-VFP3: vdiv.f32 352; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 353 354; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 355; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 356; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 357; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 358; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 359; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 360; CHECK-SOFTFP-FP16: vmov r0, s0 361 362; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 363; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 364; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 365; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 366 367; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 368; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 369; CHECK-HARDFP-VFP3: bl __aeabi_h2f 370; CHECK-HARDFP-VFP3: bl __aeabi_h2f 371; CHECK-HARDFP-VFP3: vdiv.f32 372; CHECK-HARDFP-VFP3: bl __aeabi_f2h 373; CHECK-HARDFP-VFP3: vmov s0, r0 374 375; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 376; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 377; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 378; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 379 380; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 381} 382 383; 13. VFMA 384define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 385entry: 386 %0 = bitcast float %a.coerce to i32 387 %tmp.0.extract.trunc = trunc i32 %0 to i16 388 %1 = bitcast i16 %tmp.0.extract.trunc to half 389 %2 = bitcast float %b.coerce to i32 390 %tmp1.0.extract.trunc = trunc i32 %2 to i16 391 %3 = bitcast i16 %tmp1.0.extract.trunc to half 392 %4 = bitcast float %c.coerce to i32 393 %tmp2.0.extract.trunc = trunc i32 %4 to i16 394 %5 = bitcast i16 %tmp2.0.extract.trunc to half 395 %mul = fmul half %1, %3 396 %add = fadd half %mul, %5 397 %6 = bitcast half %add to i16 398 %tmp4.0.insert.ext = zext i16 %6 to i32 399 %7 = bitcast i32 %tmp4.0.insert.ext to float 400 ret float %7 401 402; CHECK-LABEL: VFMA: 403; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 404; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 405} 406 407; 14. VFMS 408define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 409entry: 410 %0 = bitcast float %a.coerce to i32 411 %tmp.0.extract.trunc = trunc i32 %0 to i16 412 %1 = bitcast i16 %tmp.0.extract.trunc to half 413 %2 = bitcast float %b.coerce to i32 414 %tmp1.0.extract.trunc = trunc i32 %2 to i16 415 %3 = bitcast i16 %tmp1.0.extract.trunc to half 416 %4 = bitcast float %c.coerce to i32 417 %tmp2.0.extract.trunc = trunc i32 %4 to i16 418 %5 = bitcast i16 %tmp2.0.extract.trunc to half 419 %mul = fmul half %1, %3 420 %sub = fsub half %5, %mul 421 %6 = bitcast half %sub to i16 422 %tmp4.0.insert.ext = zext i16 %6 to i32 423 %7 = bitcast i32 %tmp4.0.insert.ext to float 424 ret float %7 425 426; CHECK-LABEL: VFMS: 427; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 428; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 429} 430 431; 15. VFNMA 432define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 433entry: 434 %0 = bitcast float %a.coerce to i32 435 %tmp.0.extract.trunc = trunc i32 %0 to i16 436 %1 = bitcast i16 %tmp.0.extract.trunc to half 437 %2 = bitcast float %b.coerce to i32 438 %tmp1.0.extract.trunc = trunc i32 %2 to i16 439 %3 = bitcast i16 %tmp1.0.extract.trunc to half 440 %4 = bitcast float %c.coerce to i32 441 %tmp2.0.extract.trunc = trunc i32 %4 to i16 442 %5 = bitcast i16 %tmp2.0.extract.trunc to half 443 %mul = fmul half %1, %3 444 %sub = fsub half -0.0, %mul 445 %sub2 = fsub half %sub, %5 446 %6 = bitcast half %sub2 to i16 447 %tmp4.0.insert.ext = zext i16 %6 to i32 448 %7 = bitcast i32 %tmp4.0.insert.ext to float 449 ret float %7 450 451; CHECK-LABEL: VFNMA: 452; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 453; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 454} 455 456; 16. VFNMS 457define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 458entry: 459 %0 = bitcast float %a.coerce to i32 460 %tmp.0.extract.trunc = trunc i32 %0 to i16 461 %1 = bitcast i16 %tmp.0.extract.trunc to half 462 %2 = bitcast float %b.coerce to i32 463 %tmp1.0.extract.trunc = trunc i32 %2 to i16 464 %3 = bitcast i16 %tmp1.0.extract.trunc to half 465 %4 = bitcast float %c.coerce to i32 466 %tmp2.0.extract.trunc = trunc i32 %4 to i16 467 %5 = bitcast i16 %tmp2.0.extract.trunc to half 468 %mul = fmul half %1, %3 469 %sub2 = fsub half %mul, %5 470 %6 = bitcast half %sub2 to i16 471 %tmp4.0.insert.ext = zext i16 %6 to i32 472 %7 = bitcast i32 %tmp4.0.insert.ext to float 473 ret float %7 474 475; CHECK-LABEL: VFNMS: 476; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 477; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 478} 479 480; TODO: 481; 17. VMAXNM 482; 18. VMINNM 483 484; 19. VMLA 485define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 486entry: 487 %0 = bitcast float %a.coerce to i32 488 %tmp.0.extract.trunc = trunc i32 %0 to i16 489 %1 = bitcast i16 %tmp.0.extract.trunc to half 490 %2 = bitcast float %b.coerce to i32 491 %tmp1.0.extract.trunc = trunc i32 %2 to i16 492 %3 = bitcast i16 %tmp1.0.extract.trunc to half 493 %4 = bitcast float %c.coerce to i32 494 %tmp2.0.extract.trunc = trunc i32 %4 to i16 495 %5 = bitcast i16 %tmp2.0.extract.trunc to half 496 %mul = fmul half %1, %3 497 %add = fadd half %5, %mul 498 %6 = bitcast half %add to i16 499 %tmp4.0.insert.ext = zext i16 %6 to i32 500 %7 = bitcast i32 %tmp4.0.insert.ext to float 501 ret float %7 502 503; CHECK-LABEL: VMLA: 504; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 505; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 506} 507 508; 20. VMLS 509define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 510entry: 511 %0 = bitcast float %a.coerce to i32 512 %tmp.0.extract.trunc = trunc i32 %0 to i16 513 %1 = bitcast i16 %tmp.0.extract.trunc to half 514 %2 = bitcast float %b.coerce to i32 515 %tmp1.0.extract.trunc = trunc i32 %2 to i16 516 %3 = bitcast i16 %tmp1.0.extract.trunc to half 517 %4 = bitcast float %c.coerce to i32 518 %tmp2.0.extract.trunc = trunc i32 %4 to i16 519 %5 = bitcast i16 %tmp2.0.extract.trunc to half 520 %mul = fmul half %1, %3 521 %add = fsub half %5, %mul 522 %6 = bitcast half %add to i16 523 %tmp4.0.insert.ext = zext i16 %6 to i32 524 %7 = bitcast i32 %tmp4.0.insert.ext to float 525 ret float %7 526 527; CHECK-LABEL: VMLS: 528; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 529; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 530} 531 532; TODO: fix immediates. 533; 21. VMOV (between general-purpose register and half-precision register) 534 535; 22. VMOV (immediate) 536define i32 @movi(i32 %a.coerce) { 537entry: 538 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 539 %0 = bitcast i16 %tmp.0.extract.trunc to half 540 %add = fadd half %0, 0xHC000 541 %1 = bitcast half %add to i16 542 %tmp2.0.insert.ext = zext i16 %1 to i32 543 ret i32 %tmp2.0.insert.ext 544 545; CHECK-LABEL: movi: 546; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 547} 548 549; 23. VMUL 550define float @Mul(float %a.coerce, float %b.coerce) { 551entry: 552 %0 = bitcast float %a.coerce to i32 553 %tmp.0.extract.trunc = trunc i32 %0 to i16 554 %1 = bitcast i16 %tmp.0.extract.trunc to half 555 %2 = bitcast float %b.coerce to i32 556 %tmp1.0.extract.trunc = trunc i32 %2 to i16 557 %3 = bitcast i16 %tmp1.0.extract.trunc to half 558 %add = fmul half %1, %3 559 %4 = bitcast half %add to i16 560 %tmp4.0.insert.ext = zext i16 %4 to i32 561 %5 = bitcast i32 %tmp4.0.insert.ext to float 562 ret float %5 563 564; CHECK-LABEL: Mul: 565 566; CHECK-SOFT: bl __aeabi_h2f 567; CHECK-SOFT: bl __aeabi_h2f 568; CHECK-SOFT: bl __aeabi_fmul 569; CHECK-SOFT: bl __aeabi_f2h 570 571; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 572; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 573; CHECK-SOFTFP-VFP3: vmul.f32 574; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 575 576; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 577; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 578; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 579; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 580; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 581; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 582; CHECK-SOFTFP-FP16: vmov r0, s0 583 584; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 585; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 586; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 587; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 588 589; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 590; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 591; CHECK-HARDFP-VFP3: bl __aeabi_h2f 592; CHECK-HARDFP-VFP3: bl __aeabi_h2f 593; CHECK-HARDFP-VFP3: vmul.f32 594; CHECK-HARDFP-VFP3: bl __aeabi_f2h 595; CHECK-HARDFP-VFP3: vmov s0, r0 596 597; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 598; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 599; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 600; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 601 602; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 603} 604 605; 24. VNEG 606define float @Neg(float %a.coerce) { 607entry: 608 %0 = bitcast float %a.coerce to i32 609 %tmp.0.extract.trunc = trunc i32 %0 to i16 610 %1 = bitcast i16 %tmp.0.extract.trunc to half 611 %2 = fsub half -0.000000e+00, %1 612 %3 = bitcast half %2 to i16 613 %tmp4.0.insert.ext = zext i16 %3 to i32 614 %4 = bitcast i32 %tmp4.0.insert.ext to float 615 ret float %4 616 617; CHECK-LABEL: Neg: 618; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 619} 620 621; 25. VNMLA 622define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 623entry: 624 %0 = bitcast float %a.coerce to i32 625 %tmp.0.extract.trunc = trunc i32 %0 to i16 626 %1 = bitcast i16 %tmp.0.extract.trunc to half 627 %2 = bitcast float %b.coerce to i32 628 %tmp1.0.extract.trunc = trunc i32 %2 to i16 629 %3 = bitcast i16 %tmp1.0.extract.trunc to half 630 %4 = bitcast float %c.coerce to i32 631 %tmp2.0.extract.trunc = trunc i32 %4 to i16 632 %5 = bitcast i16 %tmp2.0.extract.trunc to half 633 %add = fmul half %1, %3 634 %add2 = fsub half -0.000000e+00, %add 635 %add3 = fsub half %add2, %5 636 %6 = bitcast half %add3 to i16 637 %tmp4.0.insert.ext = zext i16 %6 to i32 638 %7 = bitcast i32 %tmp4.0.insert.ext to float 639 ret float %7 640 641; CHECK-LABEL: VNMLA: 642; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 643; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 644} 645 646; 26. VNMLS 647define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 648entry: 649 %0 = bitcast float %a.coerce to i32 650 %tmp.0.extract.trunc = trunc i32 %0 to i16 651 %1 = bitcast i16 %tmp.0.extract.trunc to half 652 %2 = bitcast float %b.coerce to i32 653 %tmp1.0.extract.trunc = trunc i32 %2 to i16 654 %3 = bitcast i16 %tmp1.0.extract.trunc to half 655 %4 = bitcast float %c.coerce to i32 656 %tmp2.0.extract.trunc = trunc i32 %4 to i16 657 %5 = bitcast i16 %tmp2.0.extract.trunc to half 658 %add = fmul half %1, %3 659 %add2 = fsub half %add, %5 660 %6 = bitcast half %add2 to i16 661 %tmp4.0.insert.ext = zext i16 %6 to i32 662 %7 = bitcast i32 %tmp4.0.insert.ext to float 663 ret float %7 664 665; CHECK-LABEL: VNMLS: 666; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 667; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 668} 669 670; 27. VNMUL 671define float @NMul(float %a.coerce, float %b.coerce) { 672entry: 673 %0 = bitcast float %a.coerce to i32 674 %tmp.0.extract.trunc = trunc i32 %0 to i16 675 %1 = bitcast i16 %tmp.0.extract.trunc to half 676 %2 = bitcast float %b.coerce to i32 677 %tmp1.0.extract.trunc = trunc i32 %2 to i16 678 %3 = bitcast i16 %tmp1.0.extract.trunc to half 679 %add = fmul half %1, %3 680 %add2 = fsub half -0.0, %add 681 %4 = bitcast half %add2 to i16 682 %tmp4.0.insert.ext = zext i16 %4 to i32 683 %5 = bitcast i32 %tmp4.0.insert.ext to float 684 ret float %5 685 686; CHECK-LABEL: NMul: 687; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 688} 689 690; 28. VRINTA 691; 29. VRINTM 692; 30. VRINTN 693; 31. VRINTP 694; 32. VRINTR 695; 33. VRINTX 696; 34. VRINTZ 697; 35. VSELEQ 698; 36. VSELGE 699; 37. VSELGT 700; 38. VSELVS 701; 39. VSQRT 702 703; 40. VSUB 704define float @Sub(float %a.coerce, float %b.coerce) { 705entry: 706 %0 = bitcast float %a.coerce to i32 707 %tmp.0.extract.trunc = trunc i32 %0 to i16 708 %1 = bitcast i16 %tmp.0.extract.trunc to half 709 %2 = bitcast float %b.coerce to i32 710 %tmp1.0.extract.trunc = trunc i32 %2 to i16 711 %3 = bitcast i16 %tmp1.0.extract.trunc to half 712 %add = fsub half %1, %3 713 %4 = bitcast half %add to i16 714 %tmp4.0.insert.ext = zext i16 %4 to i32 715 %5 = bitcast i32 %tmp4.0.insert.ext to float 716 ret float %5 717 718; CHECK-LABEL: Sub: 719 720; CHECK-SOFT: bl __aeabi_h2f 721; CHECK-SOFT: bl __aeabi_h2f 722; CHECK-SOFT: bl __aeabi_fsub 723; CHECK-SOFT: bl __aeabi_f2h 724 725; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 726; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 727; CHECK-SOFTFP-VFP3: vsub.f32 728; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 729 730; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 731; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 732; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 733; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 734; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 735; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 736; CHECK-SOFTFP-FP16: vmov r0, s0 737 738; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 739; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 740; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 741; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 742 743; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 744; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 745; CHECK-HARDFP-VFP3: bl __aeabi_h2f 746; CHECK-HARDFP-VFP3: bl __aeabi_h2f 747; CHECK-HARDFP-VFP3: vsub.f32 748; CHECK-HARDFP-VFP3: bl __aeabi_f2h 749; CHECK-HARDFP-VFP3: vmov s0, r0 750 751; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 752; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 753; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 754; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 755 756; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 757} 758 759; Check for VSTRH with a FCONSTH, this checks that addressing mode 760; AddrMode5FP16 is supported. 761define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 762entry: 763 %S = alloca half, align 2 764 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 765 %0 = bitcast i16 %tmp.0.extract.trunc to half 766 %S.0.S.0..sroa_cast = bitcast half* %S to i8* 767 store volatile half 0xH3C00, half* %S, align 2 768 %S.0.S.0. = load volatile half, half* %S, align 2 769 %add = fadd half %S.0.S.0., %0 770 %1 = bitcast half %add to i16 771 %tmp2.0.insert.ext = zext i16 %1 to i32 772 ret i32 %tmp2.0.insert.ext 773 774; CHECK-LABEL: ThumbAddrMode5FP16 775 776; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 777; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 778; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 779; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 780; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 781} 782 783; Test function calls to check store/load reg to/from stack 784define i32 @fn1() { 785entry: 786 %coerce = alloca half, align 2 787 %tmp2 = alloca i32, align 4 788 store half 0xH7C00, half* %coerce, align 2 789 %0 = load i32, i32* %tmp2, align 4 790 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) 791 store half 0xH7C00, half* %coerce, align 2 792 %1 = load i32, i32* %tmp2, align 4 793 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) 794 ret i32 %call3 795 796; CHECK-SPILL-RELOAD-LABEL: fn1: 797; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 798; CHECK-SPILL-RELOAD-NEXT: bl fn2 799; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 800} 801 802declare dso_local i32 @fn2(...) 803declare dso_local i32 @fn3(...) 804