1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4 5; SOFTFP: 6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 9 10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 13 14; HARD: 15; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 16; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 17; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 18 19; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 20; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 21; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 22 23; FP-CONTRACT=FAST 24; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 26 27 28define float @RetValBug(float %A.coerce) { 29entry: 30 ret float undef 31; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 32; any operands) when FullFP16 is enabled. 33; 34; CHECK-LABEL: RetValBug: 35; CHECK-HARDFP-FULLFP16: {{.*}} lr 36} 37 38; 1. VABS: TODO 39 40; 2. VADD 41define float @Add(float %a.coerce, float %b.coerce) { 42entry: 43 %0 = bitcast float %a.coerce to i32 44 %tmp.0.extract.trunc = trunc i32 %0 to i16 45 %1 = bitcast i16 %tmp.0.extract.trunc to half 46 %2 = bitcast float %b.coerce to i32 47 %tmp1.0.extract.trunc = trunc i32 %2 to i16 48 %3 = bitcast i16 %tmp1.0.extract.trunc to half 49 %add = fadd half %1, %3 50 %4 = bitcast half %add to i16 51 %tmp4.0.insert.ext = zext i16 %4 to i32 52 %5 = bitcast i32 %tmp4.0.insert.ext to float 53 ret float %5 54 55; CHECK-LABEL: Add: 56 57; CHECK-SOFT: bl __aeabi_h2f 58; CHECK-SOFT: bl __aeabi_h2f 59; CHECK-SOFT: bl __aeabi_fadd 60; CHECK-SOFT: bl __aeabi_f2h 61 62; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 63; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 64; CHECK-SOFTFP-VFP3: vadd.f32 65; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 66 67; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 68; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 69; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 70; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 71; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 72; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 73; CHECK-SOFTFP-FP16: vmov r0, s0 74 75; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 76; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 77; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 78; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 79 80; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 81; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 82; CHECK-HARDFP-VFP3: bl __aeabi_h2f 83; CHECK-HARDFP-VFP3: bl __aeabi_h2f 84; CHECK-HARDFP-VFP3: vadd.f32 85; CHECK-HARDFP-VFP3: bl __aeabi_f2h 86; CHECK-HARDFP-VFP3: vmov s0, r0 87 88; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 89; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 90; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 91; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 92 93; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 94} 95 96; 3. VCMP 97define zeroext i1 @VCMP(float %F.coerce, float %G.coerce) { 98entry: 99 %0 = bitcast float %F.coerce to i32 100 %tmp.0.extract.trunc = trunc i32 %0 to i16 101 %1 = bitcast i16 %tmp.0.extract.trunc to half 102 %2 = bitcast float %G.coerce to i32 103 %tmp1.0.extract.trunc = trunc i32 %2 to i16 104 %3 = bitcast i16 %tmp1.0.extract.trunc to half 105 %cmp = fcmp ogt half %1, %3 106 ret i1 %cmp 107 108; CHECK-LABEL: VCMP: 109 110; CHECK-SOFT: bl __aeabi_fcmpgt 111 112; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 113; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 114; CHECK-SOFTFP-VFP3: vcmpe.f32 s{{.}}, s{{.}} 115 116; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 117; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 118; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} 119 120; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 121; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 122; CHECK-SOFTFP-FULLFP16: vcmpe.f16 [[S2]], [[S0]] 123 124; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 125; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 126; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s1 127} 128 129; 4. VCMPE 130 131; FIXME: enable when constant pool is fixed 132; 133;define i32 @VCMPE_IMM(float %F.coerce) { 134;entry: 135; %0 = bitcast float %F.coerce to i32 136; %tmp.0.extract.trunc = trunc i32 %0 to i16 137; %1 = bitcast i16 %tmp.0.extract.trunc to half 138; %tmp = fcmp olt half %1, 1.000000e+00 139; %tmp1 = zext i1 %tmp to i32 140; ret i32 %tmp1 141;} 142 143define i32 @VCMPE(float %F.coerce, float %G.coerce) { 144entry: 145 %0 = bitcast float %F.coerce to i32 146 %tmp.0.extract.trunc = trunc i32 %0 to i16 147 %1 = bitcast i16 %tmp.0.extract.trunc to half 148 %2 = bitcast float %G.coerce to i32 149 %tmp.1.extract.trunc = trunc i32 %2 to i16 150 %3 = bitcast i16 %tmp.1.extract.trunc to half 151 %tmp = fcmp olt half %1, %3 152 %tmp1 = zext i1 %tmp to i32 153 ret i32 %tmp1 154 155; CHECK-LABEL: VCMPE: 156} 157 158; 5. VCVT (between floating-point and fixed-point) 159; Only assembly/disassembly support 160 161; 6. VCVT (between floating-point and integer, both directions) 162define i32 @fptosi(i32 %A.coerce) { 163entry: 164 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 165 %0 = bitcast i16 %tmp.0.extract.trunc to half 166 %conv = fptosi half %0 to i32 167 ret i32 %conv 168 169; CHECK-LABEL: fptosi: 170 171; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 172; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 173; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 174} 175 176define i32 @fptoui(i32 %A.coerce) { 177entry: 178 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 179 %0 = bitcast i16 %tmp.0.extract.trunc to half 180 %conv = fptoui half %0 to i32 181 ret i32 %conv 182 183; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 184; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 185} 186 187define float @UintToH(i32 %a, i32 %b) { 188entry: 189 %0 = uitofp i32 %a to half 190 %1 = bitcast half %0 to i16 191 %tmp0.insert.ext = zext i16 %1 to i32 192 %2 = bitcast i32 %tmp0.insert.ext to float 193 ret float %2 194 195; CHECK-LABEL: UintToH: 196 197; CHECK-HARDFP-FULLFP16: vmov s0, r0 198; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 199} 200 201define float @SintToH(i32 %a, i32 %b) { 202entry: 203 %0 = sitofp i32 %a to half 204 %1 = bitcast half %0 to i16 205 %tmp0.insert.ext = zext i16 %1 to i32 206 %2 = bitcast i32 %tmp0.insert.ext to float 207 ret float %2 208 209; CHECK-LABEL: SintToH: 210 211; CHECK-HARDFP-FULLFP16: vmov s0, r0 212; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 213} 214 215; TODO: 216; 7. VCVTA 217; 8. VCVTM 218; 9. VCVTN 219; 10. VCVTP 220; 11. VCVTR 221 222; 12. VDIV 223define float @Div(float %a.coerce, float %b.coerce) { 224entry: 225 %0 = bitcast float %a.coerce to i32 226 %tmp.0.extract.trunc = trunc i32 %0 to i16 227 %1 = bitcast i16 %tmp.0.extract.trunc to half 228 %2 = bitcast float %b.coerce to i32 229 %tmp1.0.extract.trunc = trunc i32 %2 to i16 230 %3 = bitcast i16 %tmp1.0.extract.trunc to half 231 %add = fdiv half %1, %3 232 %4 = bitcast half %add to i16 233 %tmp4.0.insert.ext = zext i16 %4 to i32 234 %5 = bitcast i32 %tmp4.0.insert.ext to float 235 ret float %5 236 237; CHECK-LABEL: Div: 238 239; CHECK-SOFT: bl __aeabi_h2f 240; CHECK-SOFT: bl __aeabi_h2f 241; CHECK-SOFT: bl __aeabi_fdiv 242; CHECK-SOFT: bl __aeabi_f2h 243 244; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 245; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 246; CHECK-SOFTFP-VFP3: vdiv.f32 247; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 248 249; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 250; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 251; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 252; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 253; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 254; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 255; CHECK-SOFTFP-FP16: vmov r0, s0 256 257; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 258; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 259; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 260; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 261 262; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 263; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 264; CHECK-HARDFP-VFP3: bl __aeabi_h2f 265; CHECK-HARDFP-VFP3: bl __aeabi_h2f 266; CHECK-HARDFP-VFP3: vdiv.f32 267; CHECK-HARDFP-VFP3: bl __aeabi_f2h 268; CHECK-HARDFP-VFP3: vmov s0, r0 269 270; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 271; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 272; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 273; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 274 275; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 276} 277 278; 13. VFMA 279define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 280entry: 281 %0 = bitcast float %a.coerce to i32 282 %tmp.0.extract.trunc = trunc i32 %0 to i16 283 %1 = bitcast i16 %tmp.0.extract.trunc to half 284 %2 = bitcast float %b.coerce to i32 285 %tmp1.0.extract.trunc = trunc i32 %2 to i16 286 %3 = bitcast i16 %tmp1.0.extract.trunc to half 287 %4 = bitcast float %c.coerce to i32 288 %tmp2.0.extract.trunc = trunc i32 %4 to i16 289 %5 = bitcast i16 %tmp2.0.extract.trunc to half 290 %mul = fmul half %1, %3 291 %add = fadd half %mul, %5 292 %6 = bitcast half %add to i16 293 %tmp4.0.insert.ext = zext i16 %6 to i32 294 %7 = bitcast i32 %tmp4.0.insert.ext to float 295 ret float %7 296 297; CHECK-LABEL: VFMA: 298; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 299; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 300} 301 302; 14. VFMS 303define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 304entry: 305 %0 = bitcast float %a.coerce to i32 306 %tmp.0.extract.trunc = trunc i32 %0 to i16 307 %1 = bitcast i16 %tmp.0.extract.trunc to half 308 %2 = bitcast float %b.coerce to i32 309 %tmp1.0.extract.trunc = trunc i32 %2 to i16 310 %3 = bitcast i16 %tmp1.0.extract.trunc to half 311 %4 = bitcast float %c.coerce to i32 312 %tmp2.0.extract.trunc = trunc i32 %4 to i16 313 %5 = bitcast i16 %tmp2.0.extract.trunc to half 314 %mul = fmul half %1, %3 315 %sub = fsub half %5, %mul 316 %6 = bitcast half %sub to i16 317 %tmp4.0.insert.ext = zext i16 %6 to i32 318 %7 = bitcast i32 %tmp4.0.insert.ext to float 319 ret float %7 320 321; CHECK-LABEL: VFMS: 322; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 323; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 324} 325 326; 15. VFNMA 327define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 328entry: 329 %0 = bitcast float %a.coerce to i32 330 %tmp.0.extract.trunc = trunc i32 %0 to i16 331 %1 = bitcast i16 %tmp.0.extract.trunc to half 332 %2 = bitcast float %b.coerce to i32 333 %tmp1.0.extract.trunc = trunc i32 %2 to i16 334 %3 = bitcast i16 %tmp1.0.extract.trunc to half 335 %4 = bitcast float %c.coerce to i32 336 %tmp2.0.extract.trunc = trunc i32 %4 to i16 337 %5 = bitcast i16 %tmp2.0.extract.trunc to half 338 %mul = fmul half %1, %3 339 %sub = fsub half -0.0, %mul 340 %sub2 = fsub half %sub, %5 341 %6 = bitcast half %sub2 to i16 342 %tmp4.0.insert.ext = zext i16 %6 to i32 343 %7 = bitcast i32 %tmp4.0.insert.ext to float 344 ret float %7 345 346; CHECK-LABEL: VFNMA: 347; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 348; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 349} 350 351; 16. VFNMS 352define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 353entry: 354 %0 = bitcast float %a.coerce to i32 355 %tmp.0.extract.trunc = trunc i32 %0 to i16 356 %1 = bitcast i16 %tmp.0.extract.trunc to half 357 %2 = bitcast float %b.coerce to i32 358 %tmp1.0.extract.trunc = trunc i32 %2 to i16 359 %3 = bitcast i16 %tmp1.0.extract.trunc to half 360 %4 = bitcast float %c.coerce to i32 361 %tmp2.0.extract.trunc = trunc i32 %4 to i16 362 %5 = bitcast i16 %tmp2.0.extract.trunc to half 363 %mul = fmul half %1, %3 364 %sub2 = fsub half %mul, %5 365 %6 = bitcast half %sub2 to i16 366 %tmp4.0.insert.ext = zext i16 %6 to i32 367 %7 = bitcast i32 %tmp4.0.insert.ext to float 368 ret float %7 369 370; CHECK-LABEL: VFNMS: 371; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 372; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 373} 374 375; TODO: 376; 17. VMAXNM 377; 18. VMINNM 378 379; 19. VMLA 380define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 381entry: 382 %0 = bitcast float %a.coerce to i32 383 %tmp.0.extract.trunc = trunc i32 %0 to i16 384 %1 = bitcast i16 %tmp.0.extract.trunc to half 385 %2 = bitcast float %b.coerce to i32 386 %tmp1.0.extract.trunc = trunc i32 %2 to i16 387 %3 = bitcast i16 %tmp1.0.extract.trunc to half 388 %4 = bitcast float %c.coerce to i32 389 %tmp2.0.extract.trunc = trunc i32 %4 to i16 390 %5 = bitcast i16 %tmp2.0.extract.trunc to half 391 %mul = fmul half %1, %3 392 %add = fadd half %5, %mul 393 %6 = bitcast half %add to i16 394 %tmp4.0.insert.ext = zext i16 %6 to i32 395 %7 = bitcast i32 %tmp4.0.insert.ext to float 396 ret float %7 397 398; CHECK-LABEL: VMLA: 399; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 400; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 401} 402 403; 20. VMLS 404define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 405entry: 406 %0 = bitcast float %a.coerce to i32 407 %tmp.0.extract.trunc = trunc i32 %0 to i16 408 %1 = bitcast i16 %tmp.0.extract.trunc to half 409 %2 = bitcast float %b.coerce to i32 410 %tmp1.0.extract.trunc = trunc i32 %2 to i16 411 %3 = bitcast i16 %tmp1.0.extract.trunc to half 412 %4 = bitcast float %c.coerce to i32 413 %tmp2.0.extract.trunc = trunc i32 %4 to i16 414 %5 = bitcast i16 %tmp2.0.extract.trunc to half 415 %mul = fmul half %1, %3 416 %add = fsub half %5, %mul 417 %6 = bitcast half %add to i16 418 %tmp4.0.insert.ext = zext i16 %6 to i32 419 %7 = bitcast i32 %tmp4.0.insert.ext to float 420 ret float %7 421 422; CHECK-LABEL: VMLS: 423; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 424; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 425} 426 427; TODO: fix immediates. 428; 21. VMOV (between general-purpose register and half-precision register) 429; 22. VMOV (immediate) 430 431; 23. VMUL 432define float @Mul(float %a.coerce, float %b.coerce) { 433entry: 434 %0 = bitcast float %a.coerce to i32 435 %tmp.0.extract.trunc = trunc i32 %0 to i16 436 %1 = bitcast i16 %tmp.0.extract.trunc to half 437 %2 = bitcast float %b.coerce to i32 438 %tmp1.0.extract.trunc = trunc i32 %2 to i16 439 %3 = bitcast i16 %tmp1.0.extract.trunc to half 440 %add = fmul half %1, %3 441 %4 = bitcast half %add to i16 442 %tmp4.0.insert.ext = zext i16 %4 to i32 443 %5 = bitcast i32 %tmp4.0.insert.ext to float 444 ret float %5 445 446; CHECK-LABEL: Mul: 447 448; CHECK-SOFT: bl __aeabi_h2f 449; CHECK-SOFT: bl __aeabi_h2f 450; CHECK-SOFT: bl __aeabi_fmul 451; CHECK-SOFT: bl __aeabi_f2h 452 453; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 454; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 455; CHECK-SOFTFP-VFP3: vmul.f32 456; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 457 458; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 459; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 460; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 461; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 462; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 463; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 464; CHECK-SOFTFP-FP16: vmov r0, s0 465 466; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 467; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 468; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 469; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 470 471; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 472; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 473; CHECK-HARDFP-VFP3: bl __aeabi_h2f 474; CHECK-HARDFP-VFP3: bl __aeabi_h2f 475; CHECK-HARDFP-VFP3: vmul.f32 476; CHECK-HARDFP-VFP3: bl __aeabi_f2h 477; CHECK-HARDFP-VFP3: vmov s0, r0 478 479; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 480; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 481; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 482; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 483 484; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 485} 486 487; 24. VNEG 488define float @Neg(float %a.coerce) { 489entry: 490 %0 = bitcast float %a.coerce to i32 491 %tmp.0.extract.trunc = trunc i32 %0 to i16 492 %1 = bitcast i16 %tmp.0.extract.trunc to half 493 %2 = fsub half -0.000000e+00, %1 494 %3 = bitcast half %2 to i16 495 %tmp4.0.insert.ext = zext i16 %3 to i32 496 %4 = bitcast i32 %tmp4.0.insert.ext to float 497 ret float %4 498 499; CHECK-LABEL: Neg: 500; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 501} 502 503; 25. VNMLA 504define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 505entry: 506 %0 = bitcast float %a.coerce to i32 507 %tmp.0.extract.trunc = trunc i32 %0 to i16 508 %1 = bitcast i16 %tmp.0.extract.trunc to half 509 %2 = bitcast float %b.coerce to i32 510 %tmp1.0.extract.trunc = trunc i32 %2 to i16 511 %3 = bitcast i16 %tmp1.0.extract.trunc to half 512 %4 = bitcast float %c.coerce to i32 513 %tmp2.0.extract.trunc = trunc i32 %4 to i16 514 %5 = bitcast i16 %tmp2.0.extract.trunc to half 515 %add = fmul half %1, %3 516 %add2 = fsub half -0.000000e+00, %add 517 %add3 = fsub half %add2, %5 518 %6 = bitcast half %add3 to i16 519 %tmp4.0.insert.ext = zext i16 %6 to i32 520 %7 = bitcast i32 %tmp4.0.insert.ext to float 521 ret float %7 522 523; CHECK-LABEL: VNMLA: 524; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 525; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 526} 527 528; 26. VNMLS 529define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 530entry: 531 %0 = bitcast float %a.coerce to i32 532 %tmp.0.extract.trunc = trunc i32 %0 to i16 533 %1 = bitcast i16 %tmp.0.extract.trunc to half 534 %2 = bitcast float %b.coerce to i32 535 %tmp1.0.extract.trunc = trunc i32 %2 to i16 536 %3 = bitcast i16 %tmp1.0.extract.trunc to half 537 %4 = bitcast float %c.coerce to i32 538 %tmp2.0.extract.trunc = trunc i32 %4 to i16 539 %5 = bitcast i16 %tmp2.0.extract.trunc to half 540 %add = fmul half %1, %3 541 %add2 = fsub half %add, %5 542 %6 = bitcast half %add2 to i16 543 %tmp4.0.insert.ext = zext i16 %6 to i32 544 %7 = bitcast i32 %tmp4.0.insert.ext to float 545 ret float %7 546 547; CHECK-LABEL: VNMLS: 548; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 549; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 550} 551 552; 27. VNMUL 553define float @NMul(float %a.coerce, float %b.coerce) { 554entry: 555 %0 = bitcast float %a.coerce to i32 556 %tmp.0.extract.trunc = trunc i32 %0 to i16 557 %1 = bitcast i16 %tmp.0.extract.trunc to half 558 %2 = bitcast float %b.coerce to i32 559 %tmp1.0.extract.trunc = trunc i32 %2 to i16 560 %3 = bitcast i16 %tmp1.0.extract.trunc to half 561 %add = fmul half %1, %3 562 %add2 = fsub half -0.0, %add 563 %4 = bitcast half %add2 to i16 564 %tmp4.0.insert.ext = zext i16 %4 to i32 565 %5 = bitcast i32 %tmp4.0.insert.ext to float 566 ret float %5 567 568; CHECK-LABEL: NMul: 569; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 570} 571 572; 28. VRINTA 573; 29. VRINTM 574; 30. VRINTN 575; 31. VRINTP 576; 32. VRINTR 577; 33. VRINTX 578; 34. VRINTZ 579; 35. VSELEQ 580; 36. VSELGE 581; 37. VSELGT 582; 38. VSELVS 583; 39. VSQRT 584 585; 40. VSUB 586define float @Sub(float %a.coerce, float %b.coerce) { 587entry: 588 %0 = bitcast float %a.coerce to i32 589 %tmp.0.extract.trunc = trunc i32 %0 to i16 590 %1 = bitcast i16 %tmp.0.extract.trunc to half 591 %2 = bitcast float %b.coerce to i32 592 %tmp1.0.extract.trunc = trunc i32 %2 to i16 593 %3 = bitcast i16 %tmp1.0.extract.trunc to half 594 %add = fsub half %1, %3 595 %4 = bitcast half %add to i16 596 %tmp4.0.insert.ext = zext i16 %4 to i32 597 %5 = bitcast i32 %tmp4.0.insert.ext to float 598 ret float %5 599 600; CHECK-LABEL: Sub: 601 602; CHECK-SOFT: bl __aeabi_h2f 603; CHECK-SOFT: bl __aeabi_h2f 604; CHECK-SOFT: bl __aeabi_fsub 605; CHECK-SOFT: bl __aeabi_f2h 606 607; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 608; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 609; CHECK-SOFTFP-VFP3: vsub.f32 610; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 611 612; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 613; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 614; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 615; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 616; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 617; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 618; CHECK-SOFTFP-FP16: vmov r0, s0 619 620; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 621; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 622; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 623; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 624 625; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 626; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 627; CHECK-HARDFP-VFP3: bl __aeabi_h2f 628; CHECK-HARDFP-VFP3: bl __aeabi_h2f 629; CHECK-HARDFP-VFP3: vsub.f32 630; CHECK-HARDFP-VFP3: bl __aeabi_f2h 631; CHECK-HARDFP-VFP3: vmov s0, r0 632 633; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 634; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 635; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 636; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 637 638; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 639} 640