1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4 5; SOFTFP: 6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 9 10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 13 14; Test fast-isel 15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 17 18; HARD: 19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 22 23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 26 27; FP-CONTRACT=FAST 28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 30 31; TODO: we can't pass half-precision arguments as "half" types yet. We do 32; that for the time being by passing "float %f.coerce" and the necessary 33; bitconverts/truncates. But when we can pass half types, we do want to use 34; and test that here. 35 36define float @RetValBug(float %A.coerce) { 37entry: 38 ret float undef 39; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 40; any operands) when FullFP16 is enabled. 41; 42; CHECK-LABEL: RetValBug: 43; CHECK-HARDFP-FULLFP16: {{.*}} lr 44} 45 46; 1. VABS: TODO 47 48; 2. VADD 49define float @Add(float %a.coerce, float %b.coerce) { 50entry: 51 %0 = bitcast float %a.coerce to i32 52 %tmp.0.extract.trunc = trunc i32 %0 to i16 53 %1 = bitcast i16 %tmp.0.extract.trunc to half 54 %2 = bitcast float %b.coerce to i32 55 %tmp1.0.extract.trunc = trunc i32 %2 to i16 56 %3 = bitcast i16 %tmp1.0.extract.trunc to half 57 %add = fadd half %1, %3 58 %4 = bitcast half %add to i16 59 %tmp4.0.insert.ext = zext i16 %4 to i32 60 %5 = bitcast i32 %tmp4.0.insert.ext to float 61 ret float %5 62 63; CHECK-LABEL: Add: 64 65; CHECK-SOFT: bl __aeabi_h2f 66; CHECK-SOFT: bl __aeabi_h2f 67; CHECK-SOFT: bl __aeabi_fadd 68; CHECK-SOFT: bl __aeabi_f2h 69 70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 72; CHECK-SOFTFP-VFP3: vadd.f32 73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 74 75; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 76; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 77; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 78; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 81; CHECK-SOFTFP-FP16: vmov r0, s0 82 83; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 87 88; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 89; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 90; CHECK-HARDFP-VFP3: bl __aeabi_h2f 91; CHECK-HARDFP-VFP3: bl __aeabi_h2f 92; CHECK-HARDFP-VFP3: vadd.f32 93; CHECK-HARDFP-VFP3: bl __aeabi_f2h 94; CHECK-HARDFP-VFP3: vmov s0, r0 95 96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 100 101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 102} 103 104; 3. VCMP 105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 106entry: 107 %0 = bitcast float %F.coerce to i32 108 %tmp.0.extract.trunc = trunc i32 %0 to i16 109 %1 = bitcast i16 %tmp.0.extract.trunc to half 110 %2 = bitcast float %G.coerce to i32 111 %tmp1.0.extract.trunc = trunc i32 %2 to i16 112 %3 = bitcast i16 %tmp1.0.extract.trunc to half 113 %cmp = fcmp une half %1, %3 114 ret i1 %cmp 115 116; CHECK-LABEL: VCMP1: 117 118; CHECK-SOFT: bl __aeabi_fcmpeq 119 120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 123 124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 127 128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 131 132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 135} 136 137; Check VCMPZH 138define zeroext i1 @VCMP2(float %F.coerce) { 139entry: 140 %0 = bitcast float %F.coerce to i32 141 %tmp.0.extract.trunc = trunc i32 %0 to i16 142 %1 = bitcast i16 %tmp.0.extract.trunc to half 143 %cmp = fcmp une half %1, 0.000000e+00 144 ret i1 %cmp 145 146; CHECK-LABEL: VCMP2: 147 148; CHECK-SOFT: bl __aeabi_fcmpeq 149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 152} 153 154; 4. VCMPE 155define i32 @VCMPE1(float %F.coerce) { 156entry: 157 %0 = bitcast float %F.coerce to i32 158 %tmp.0.extract.trunc = trunc i32 %0 to i16 159 %1 = bitcast i16 %tmp.0.extract.trunc to half 160 %tmp = fcmp olt half %1, 0.000000e+00 161 %tmp1 = zext i1 %tmp to i32 162 ret i32 %tmp1 163 164; CHECK-LABEL: VCMPE1: 165 166; CHECK-SOFT: bl __aeabi_fcmplt 167; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 168; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 169; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 170} 171 172define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 173entry: 174 %0 = bitcast float %F.coerce to i32 175 %tmp.0.extract.trunc = trunc i32 %0 to i16 176 %1 = bitcast i16 %tmp.0.extract.trunc to half 177 %2 = bitcast float %G.coerce to i32 178 %tmp.1.extract.trunc = trunc i32 %2 to i16 179 %3 = bitcast i16 %tmp.1.extract.trunc to half 180 %tmp = fcmp olt half %1, %3 181 %tmp1 = zext i1 %tmp to i32 182 ret i32 %tmp1 183 184; CHECK-LABEL: VCMPE2: 185 186; CHECK-SOFT: bl __aeabi_fcmplt 187; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} 188; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 189; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 190} 191 192; Test lowering of BR_CC 193define hidden i32 @VCMPBRCC() { 194entry: 195 %f = alloca half, align 2 196 br label %for.cond 197 198for.cond: 199 %0 = load half, half* %f, align 2 200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 201 br i1 %cmp, label %for.body, label %for.end 202 203for.body: 204 ret i32 1 205 206for.end: 207 ret i32 0 208 209; CHECK-LABEL: VCMPBRCC: 210 211; CHECK-SOFT: bl __aeabi_fcmpgt 212; CHECK-SOFT: cmp r0, #0 213 214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 215; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 217 218; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 220} 221 222; 5. VCVT (between floating-point and fixed-point) 223; Only assembly/disassembly support 224 225; 6. VCVT (between floating-point and integer, both directions) 226define i32 @fptosi(i32 %A.coerce) { 227entry: 228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 229 %0 = bitcast i16 %tmp.0.extract.trunc to half 230 %conv = fptosi half %0 to i32 231 ret i32 %conv 232 233; CHECK-LABEL: fptosi: 234 235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 238} 239 240define i32 @fptoui(i32 %A.coerce) { 241entry: 242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 243 %0 = bitcast i16 %tmp.0.extract.trunc to half 244 %conv = fptoui half %0 to i32 245 ret i32 %conv 246 247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 249} 250 251define float @UintToH(i32 %a, i32 %b) { 252entry: 253 %0 = uitofp i32 %a to half 254 %1 = bitcast half %0 to i16 255 %tmp0.insert.ext = zext i16 %1 to i32 256 %2 = bitcast i32 %tmp0.insert.ext to float 257 ret float %2 258 259; CHECK-LABEL: UintToH: 260 261; CHECK-HARDFP-FULLFP16: vmov s0, r0 262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 263} 264 265define float @SintToH(i32 %a, i32 %b) { 266entry: 267 %0 = sitofp i32 %a to half 268 %1 = bitcast half %0 to i16 269 %tmp0.insert.ext = zext i16 %1 to i32 270 %2 = bitcast i32 %tmp0.insert.ext to float 271 ret float %2 272 273; CHECK-LABEL: SintToH: 274 275; CHECK-HARDFP-FULLFP16: vmov s0, r0 276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 277} 278 279define i32 @f2h(float %f) { 280entry: 281 %conv = fptrunc float %f to half 282 %0 = bitcast half %conv to i16 283 %tmp.0.insert.ext = zext i16 %0 to i32 284 ret i32 %tmp.0.insert.ext 285 286; CHECK-LABEL: f2h: 287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 288} 289 290define float @h2f(i32 %h.coerce) { 291entry: 292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 293 %0 = bitcast i16 %tmp.0.extract.trunc to half 294 %conv = fpext half %0 to float 295 ret float %conv 296 297; CHECK-LABEL: h2f: 298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 299} 300 301 302define double @h2d(i32 %h.coerce) { 303entry: 304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 305 %0 = bitcast i16 %tmp.0.extract.trunc to half 306 %conv = fpext half %0 to double 307 ret double %conv 308 309; CHECK-LABEL: h2d: 310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 311} 312 313define i32 @d2h(double %d) { 314entry: 315 %conv = fptrunc double %d to half 316 %0 = bitcast half %conv to i16 317 %tmp.0.insert.ext = zext i16 %0 to i32 318 ret i32 %tmp.0.insert.ext 319 320; CHECK-LABEL: d2h: 321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 322} 323 324; TODO: 325; 7. VCVTA 326; 8. VCVTM 327; 9. VCVTN 328; 10. VCVTP 329; 11. VCVTR 330 331; 12. VDIV 332define float @Div(float %a.coerce, float %b.coerce) { 333entry: 334 %0 = bitcast float %a.coerce to i32 335 %tmp.0.extract.trunc = trunc i32 %0 to i16 336 %1 = bitcast i16 %tmp.0.extract.trunc to half 337 %2 = bitcast float %b.coerce to i32 338 %tmp1.0.extract.trunc = trunc i32 %2 to i16 339 %3 = bitcast i16 %tmp1.0.extract.trunc to half 340 %add = fdiv half %1, %3 341 %4 = bitcast half %add to i16 342 %tmp4.0.insert.ext = zext i16 %4 to i32 343 %5 = bitcast i32 %tmp4.0.insert.ext to float 344 ret float %5 345 346; CHECK-LABEL: Div: 347 348; CHECK-SOFT: bl __aeabi_h2f 349; CHECK-SOFT: bl __aeabi_h2f 350; CHECK-SOFT: bl __aeabi_fdiv 351; CHECK-SOFT: bl __aeabi_f2h 352 353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 355; CHECK-SOFTFP-VFP3: vdiv.f32 356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 357 358; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 359; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 360; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 361; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 364; CHECK-SOFTFP-FP16: vmov r0, s0 365 366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 370 371; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 372; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 373; CHECK-HARDFP-VFP3: bl __aeabi_h2f 374; CHECK-HARDFP-VFP3: bl __aeabi_h2f 375; CHECK-HARDFP-VFP3: vdiv.f32 376; CHECK-HARDFP-VFP3: bl __aeabi_f2h 377; CHECK-HARDFP-VFP3: vmov s0, r0 378 379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 383 384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 385} 386 387; 13. VFMA 388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 389entry: 390 %0 = bitcast float %a.coerce to i32 391 %tmp.0.extract.trunc = trunc i32 %0 to i16 392 %1 = bitcast i16 %tmp.0.extract.trunc to half 393 %2 = bitcast float %b.coerce to i32 394 %tmp1.0.extract.trunc = trunc i32 %2 to i16 395 %3 = bitcast i16 %tmp1.0.extract.trunc to half 396 %4 = bitcast float %c.coerce to i32 397 %tmp2.0.extract.trunc = trunc i32 %4 to i16 398 %5 = bitcast i16 %tmp2.0.extract.trunc to half 399 %mul = fmul half %1, %3 400 %add = fadd half %mul, %5 401 %6 = bitcast half %add to i16 402 %tmp4.0.insert.ext = zext i16 %6 to i32 403 %7 = bitcast i32 %tmp4.0.insert.ext to float 404 ret float %7 405 406; CHECK-LABEL: VFMA: 407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 409} 410 411; 14. VFMS 412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 413entry: 414 %0 = bitcast float %a.coerce to i32 415 %tmp.0.extract.trunc = trunc i32 %0 to i16 416 %1 = bitcast i16 %tmp.0.extract.trunc to half 417 %2 = bitcast float %b.coerce to i32 418 %tmp1.0.extract.trunc = trunc i32 %2 to i16 419 %3 = bitcast i16 %tmp1.0.extract.trunc to half 420 %4 = bitcast float %c.coerce to i32 421 %tmp2.0.extract.trunc = trunc i32 %4 to i16 422 %5 = bitcast i16 %tmp2.0.extract.trunc to half 423 %mul = fmul half %1, %3 424 %sub = fsub half %5, %mul 425 %6 = bitcast half %sub to i16 426 %tmp4.0.insert.ext = zext i16 %6 to i32 427 %7 = bitcast i32 %tmp4.0.insert.ext to float 428 ret float %7 429 430; CHECK-LABEL: VFMS: 431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 433} 434 435; 15. VFNMA 436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 437entry: 438 %0 = bitcast float %a.coerce to i32 439 %tmp.0.extract.trunc = trunc i32 %0 to i16 440 %1 = bitcast i16 %tmp.0.extract.trunc to half 441 %2 = bitcast float %b.coerce to i32 442 %tmp1.0.extract.trunc = trunc i32 %2 to i16 443 %3 = bitcast i16 %tmp1.0.extract.trunc to half 444 %4 = bitcast float %c.coerce to i32 445 %tmp2.0.extract.trunc = trunc i32 %4 to i16 446 %5 = bitcast i16 %tmp2.0.extract.trunc to half 447 %mul = fmul half %1, %3 448 %sub = fsub half -0.0, %mul 449 %sub2 = fsub half %sub, %5 450 %6 = bitcast half %sub2 to i16 451 %tmp4.0.insert.ext = zext i16 %6 to i32 452 %7 = bitcast i32 %tmp4.0.insert.ext to float 453 ret float %7 454 455; CHECK-LABEL: VFNMA: 456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 458} 459 460; 16. VFNMS 461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 462entry: 463 %0 = bitcast float %a.coerce to i32 464 %tmp.0.extract.trunc = trunc i32 %0 to i16 465 %1 = bitcast i16 %tmp.0.extract.trunc to half 466 %2 = bitcast float %b.coerce to i32 467 %tmp1.0.extract.trunc = trunc i32 %2 to i16 468 %3 = bitcast i16 %tmp1.0.extract.trunc to half 469 %4 = bitcast float %c.coerce to i32 470 %tmp2.0.extract.trunc = trunc i32 %4 to i16 471 %5 = bitcast i16 %tmp2.0.extract.trunc to half 472 %mul = fmul half %1, %3 473 %sub2 = fsub half %mul, %5 474 %6 = bitcast half %sub2 to i16 475 %tmp4.0.insert.ext = zext i16 %6 to i32 476 %7 = bitcast i32 %tmp4.0.insert.ext to float 477 ret float %7 478 479; CHECK-LABEL: VFNMS: 480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 482} 483 484; 17. VMAXNM 485; 18. VMINNM 486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll 487 488; 19. VMLA 489define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 490entry: 491 %0 = bitcast float %a.coerce to i32 492 %tmp.0.extract.trunc = trunc i32 %0 to i16 493 %1 = bitcast i16 %tmp.0.extract.trunc to half 494 %2 = bitcast float %b.coerce to i32 495 %tmp1.0.extract.trunc = trunc i32 %2 to i16 496 %3 = bitcast i16 %tmp1.0.extract.trunc to half 497 %4 = bitcast float %c.coerce to i32 498 %tmp2.0.extract.trunc = trunc i32 %4 to i16 499 %5 = bitcast i16 %tmp2.0.extract.trunc to half 500 %mul = fmul half %1, %3 501 %add = fadd half %5, %mul 502 %6 = bitcast half %add to i16 503 %tmp4.0.insert.ext = zext i16 %6 to i32 504 %7 = bitcast i32 %tmp4.0.insert.ext to float 505 ret float %7 506 507; CHECK-LABEL: VMLA: 508; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 509; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 510} 511 512; 20. VMLS 513define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 514entry: 515 %0 = bitcast float %a.coerce to i32 516 %tmp.0.extract.trunc = trunc i32 %0 to i16 517 %1 = bitcast i16 %tmp.0.extract.trunc to half 518 %2 = bitcast float %b.coerce to i32 519 %tmp1.0.extract.trunc = trunc i32 %2 to i16 520 %3 = bitcast i16 %tmp1.0.extract.trunc to half 521 %4 = bitcast float %c.coerce to i32 522 %tmp2.0.extract.trunc = trunc i32 %4 to i16 523 %5 = bitcast i16 %tmp2.0.extract.trunc to half 524 %mul = fmul half %1, %3 525 %add = fsub half %5, %mul 526 %6 = bitcast half %add to i16 527 %tmp4.0.insert.ext = zext i16 %6 to i32 528 %7 = bitcast i32 %tmp4.0.insert.ext to float 529 ret float %7 530 531; CHECK-LABEL: VMLS: 532; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 533; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 534} 535 536; TODO: fix immediates. 537; 21. VMOV (between general-purpose register and half-precision register) 538 539; 22. VMOV (immediate) 540define i32 @movi(i32 %a.coerce) { 541entry: 542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 543 %0 = bitcast i16 %tmp.0.extract.trunc to half 544 %add = fadd half %0, 0xHC000 545 %1 = bitcast half %add to i16 546 %tmp2.0.insert.ext = zext i16 %1 to i32 547 ret i32 %tmp2.0.insert.ext 548 549; CHECK-LABEL: movi: 550; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 551} 552 553; 23. VMUL 554define float @Mul(float %a.coerce, float %b.coerce) { 555entry: 556 %0 = bitcast float %a.coerce to i32 557 %tmp.0.extract.trunc = trunc i32 %0 to i16 558 %1 = bitcast i16 %tmp.0.extract.trunc to half 559 %2 = bitcast float %b.coerce to i32 560 %tmp1.0.extract.trunc = trunc i32 %2 to i16 561 %3 = bitcast i16 %tmp1.0.extract.trunc to half 562 %add = fmul half %1, %3 563 %4 = bitcast half %add to i16 564 %tmp4.0.insert.ext = zext i16 %4 to i32 565 %5 = bitcast i32 %tmp4.0.insert.ext to float 566 ret float %5 567 568; CHECK-LABEL: Mul: 569 570; CHECK-SOFT: bl __aeabi_h2f 571; CHECK-SOFT: bl __aeabi_h2f 572; CHECK-SOFT: bl __aeabi_fmul 573; CHECK-SOFT: bl __aeabi_f2h 574 575; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 577; CHECK-SOFTFP-VFP3: vmul.f32 578; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 579 580; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 581; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 582; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 583; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 584; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 585; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 586; CHECK-SOFTFP-FP16: vmov r0, s0 587 588; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 590; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 591; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 592 593; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 594; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 595; CHECK-HARDFP-VFP3: bl __aeabi_h2f 596; CHECK-HARDFP-VFP3: bl __aeabi_h2f 597; CHECK-HARDFP-VFP3: vmul.f32 598; CHECK-HARDFP-VFP3: bl __aeabi_f2h 599; CHECK-HARDFP-VFP3: vmov s0, r0 600 601; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 603; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 604; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 605 606; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 607} 608 609; 24. VNEG 610define float @Neg(float %a.coerce) { 611entry: 612 %0 = bitcast float %a.coerce to i32 613 %tmp.0.extract.trunc = trunc i32 %0 to i16 614 %1 = bitcast i16 %tmp.0.extract.trunc to half 615 %2 = fsub half -0.000000e+00, %1 616 %3 = bitcast half %2 to i16 617 %tmp4.0.insert.ext = zext i16 %3 to i32 618 %4 = bitcast i32 %tmp4.0.insert.ext to float 619 ret float %4 620 621; CHECK-LABEL: Neg: 622; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 623} 624 625; 25. VNMLA 626define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 627entry: 628 %0 = bitcast float %a.coerce to i32 629 %tmp.0.extract.trunc = trunc i32 %0 to i16 630 %1 = bitcast i16 %tmp.0.extract.trunc to half 631 %2 = bitcast float %b.coerce to i32 632 %tmp1.0.extract.trunc = trunc i32 %2 to i16 633 %3 = bitcast i16 %tmp1.0.extract.trunc to half 634 %4 = bitcast float %c.coerce to i32 635 %tmp2.0.extract.trunc = trunc i32 %4 to i16 636 %5 = bitcast i16 %tmp2.0.extract.trunc to half 637 %add = fmul half %1, %3 638 %add2 = fsub half -0.000000e+00, %add 639 %add3 = fsub half %add2, %5 640 %6 = bitcast half %add3 to i16 641 %tmp4.0.insert.ext = zext i16 %6 to i32 642 %7 = bitcast i32 %tmp4.0.insert.ext to float 643 ret float %7 644 645; CHECK-LABEL: VNMLA: 646; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 647; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 648} 649 650; 26. VNMLS 651define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 652entry: 653 %0 = bitcast float %a.coerce to i32 654 %tmp.0.extract.trunc = trunc i32 %0 to i16 655 %1 = bitcast i16 %tmp.0.extract.trunc to half 656 %2 = bitcast float %b.coerce to i32 657 %tmp1.0.extract.trunc = trunc i32 %2 to i16 658 %3 = bitcast i16 %tmp1.0.extract.trunc to half 659 %4 = bitcast float %c.coerce to i32 660 %tmp2.0.extract.trunc = trunc i32 %4 to i16 661 %5 = bitcast i16 %tmp2.0.extract.trunc to half 662 %add = fmul half %1, %3 663 %add2 = fsub half %add, %5 664 %6 = bitcast half %add2 to i16 665 %tmp4.0.insert.ext = zext i16 %6 to i32 666 %7 = bitcast i32 %tmp4.0.insert.ext to float 667 ret float %7 668 669; CHECK-LABEL: VNMLS: 670; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 671; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 672} 673 674; 27. VNMUL 675define float @NMul(float %a.coerce, float %b.coerce) { 676entry: 677 %0 = bitcast float %a.coerce to i32 678 %tmp.0.extract.trunc = trunc i32 %0 to i16 679 %1 = bitcast i16 %tmp.0.extract.trunc to half 680 %2 = bitcast float %b.coerce to i32 681 %tmp1.0.extract.trunc = trunc i32 %2 to i16 682 %3 = bitcast i16 %tmp1.0.extract.trunc to half 683 %add = fmul half %1, %3 684 %add2 = fsub half -0.0, %add 685 %4 = bitcast half %add2 to i16 686 %tmp4.0.insert.ext = zext i16 %4 to i32 687 %5 = bitcast i32 %tmp4.0.insert.ext to float 688 ret float %5 689 690; CHECK-LABEL: NMul: 691; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 692} 693 694; TODO: 695; 28. VRINTA 696; 29. VRINTM 697; 30. VRINTN 698; 31. VRINTP 699; 32. VRINTR 700; 33. VRINTX 701; 34. VRINTZ 702 703; 35. VSELEQ 704define half @select_cc1(half* %a0) { 705 %1 = load half, half* %a0 706 %2 = fcmp nsz oeq half %1, 0xH0001 707 %3 = select i1 %2, half 0xHC000, half 0xH0002 708 ret half %3 709 710; CHECK-LABEL: select_cc1: 711 712; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 713; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 714; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} 715 716; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 717; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 718; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} 719 720; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 721; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 722; CHECK-SOFTFP-FP16-T32: it eq 723; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} 724} 725 726; FIXME: more tests need to be added for VSELGE and VSELGT. 727; That is, more combinations of immediate operands that can or can't 728; be encoded as an FP16 immediate need to be added here. 729; 730; 36. VSELGE 731define half @select_cc_ge1(half* %a0) { 732 %1 = load half, half* %a0 733 %2 = fcmp nsz oge half %1, 0xH0001 734 %3 = select i1 %2, half 0xHC000, half 0xH0002 735 ret half %3 736 737; CHECK-LABEL: select_cc_ge1: 738 739; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 740; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 741; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 742 743; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 744; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 745; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} 746 747; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 748; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 749; CHECK-SOFTFP-FP16-T32-NEXT: it ge 750; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} 751} 752 753define half @select_cc_ge2(half* %a0) { 754 %1 = load half, half* %a0 755 %2 = fcmp nsz ole half %1, 0xH0001 756 %3 = select i1 %2, half 0xHC000, half 0xH0002 757 ret half %3 758 759; CHECK-LABEL: select_cc_ge2: 760 761; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 762; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 763; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 764 765; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 766; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 767; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} 768 769; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 770; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 771; CHECK-SOFTFP-FP16-T32-NEXT: it ls 772; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} 773} 774 775define half @select_cc_ge3(half* %a0) { 776 %1 = load half, half* %a0 777 %2 = fcmp nsz ugt half %1, 0xH0001 778 %3 = select i1 %2, half 0xHC000, half 0xH0002 779 ret half %3 780 781; CHECK-LABEL: select_cc_ge3: 782 783; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 784; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 785; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 786 787; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 788; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 789; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 790 791; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 792; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 793; CHECK-SOFTFP-FP16-T32-NEXT: it hi 794; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 795} 796 797define half @select_cc_ge4(half* %a0) { 798 %1 = load half, half* %a0 799 %2 = fcmp nsz ult half %1, 0xH0001 800 %3 = select i1 %2, half 0xHC000, half 0xH0002 801 ret half %3 802 803; CHECK-LABEL: select_cc_ge4: 804 805; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 806; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 807; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 808 809; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 810; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 811; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 812 813; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 814; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 815; CHECK-SOFTFP-FP16-T32-NEXT: it lt 816; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 817} 818 819; 37. VSELGT 820define half @select_cc_gt1(half* %a0) { 821 %1 = load half, half* %a0 822 %2 = fcmp nsz ogt half %1, 0xH0001 823 %3 = select i1 %2, half 0xHC000, half 0xH0002 824 ret half %3 825 826; CHECK-LABEL: select_cc_gt1: 827 828; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 829; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 830; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 831 832; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 833; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 834; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 835 836; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 837; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 838; CHECK-SOFTFP-FP16-T32-NEXT: it gt 839; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 840} 841 842define half @select_cc_gt2(half* %a0) { 843 %1 = load half, half* %a0 844 %2 = fcmp nsz uge half %1, 0xH0001 845 %3 = select i1 %2, half 0xHC000, half 0xH0002 846 ret half %3 847 848; CHECK-LABEL: select_cc_gt2: 849 850; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 851; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 852; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 853 854; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 855; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 856; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 857 858; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 859; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 860; CHECK-SOFTFP-FP16-T32-NEXT: it pl 861; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 862} 863 864define half @select_cc_gt3(half* %a0) { 865 %1 = load half, half* %a0 866 %2 = fcmp nsz ule half %1, 0xH0001 867 %3 = select i1 %2, half 0xHC000, half 0xH0002 868 ret half %3 869 870; CHECK-LABEL: select_cc_gt3: 871 872; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 873; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 874; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 875 876; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 877; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 878; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} 879 880; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 881; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 882; CHECK-SOFTFP-FP16-T32-NEXT: it le 883; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} 884} 885 886define half @select_cc_gt4(half* %a0) { 887 %1 = load half, half* %a0 888 %2 = fcmp nsz olt half %1, 0xH0001 889 %3 = select i1 %2, half 0xHC000, half 0xH0002 890 ret half %3 891 892; CHECK-LABEL: select_cc_gt4: 893 894; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 895; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 896; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 897 898; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 899; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 900; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 901 902; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 903; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 904; CHECK-SOFTFP-FP16-T32-NEXT: it mi 905; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 906} 907 908; 38. VSELVS 909define float @select_cc4(float %a.coerce) { 910entry: 911 %0 = bitcast float %a.coerce to i32 912 %tmp.0.extract.trunc = trunc i32 %0 to i16 913 %1 = bitcast i16 %tmp.0.extract.trunc to half 914 915 %2 = fcmp nsz ueq half %1, 0xH0001 916 %3 = select i1 %2, half 0xHC000, half 0xH0002 917 918 %4 = bitcast half %3 to i16 919 %tmp4.0.insert.ext = zext i16 %4 to i32 920 %5 = bitcast i32 %tmp4.0.insert.ext to float 921 ret float %5 922 923; CHECK-LABEL: select_cc4: 924 925; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} 926; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} 927; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 928; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] 929; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 930; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] 931; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] 932 933; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 934; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} 935; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] 936; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 937; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 938; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 939; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr 940; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] 941; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] 942; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] 943 944; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 945; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} 946; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] 947; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 948; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 949; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 950; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 951; CHECK-SOFTFP-FP16-T32: it eq 952; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] 953; CHECK-SOFTFP-FP16-T32: it vs 954; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] 955; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] 956} 957 958; 39. VSQRT - TODO 959 960; 40. VSUB 961define float @Sub(float %a.coerce, float %b.coerce) { 962entry: 963 %0 = bitcast float %a.coerce to i32 964 %tmp.0.extract.trunc = trunc i32 %0 to i16 965 %1 = bitcast i16 %tmp.0.extract.trunc to half 966 %2 = bitcast float %b.coerce to i32 967 %tmp1.0.extract.trunc = trunc i32 %2 to i16 968 %3 = bitcast i16 %tmp1.0.extract.trunc to half 969 %add = fsub half %1, %3 970 %4 = bitcast half %add to i16 971 %tmp4.0.insert.ext = zext i16 %4 to i32 972 %5 = bitcast i32 %tmp4.0.insert.ext to float 973 ret float %5 974 975; CHECK-LABEL: Sub: 976 977; CHECK-SOFT: bl __aeabi_h2f 978; CHECK-SOFT: bl __aeabi_h2f 979; CHECK-SOFT: bl __aeabi_fsub 980; CHECK-SOFT: bl __aeabi_f2h 981 982; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 983; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 984; CHECK-SOFTFP-VFP3: vsub.f32 985; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 986 987; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 988; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 989; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 990; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 991; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 992; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 993; CHECK-SOFTFP-FP16: vmov r0, s0 994 995; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 996; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 997; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 998; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 999 1000; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 1001; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 1002; CHECK-HARDFP-VFP3: bl __aeabi_h2f 1003; CHECK-HARDFP-VFP3: bl __aeabi_h2f 1004; CHECK-HARDFP-VFP3: vsub.f32 1005; CHECK-HARDFP-VFP3: bl __aeabi_f2h 1006; CHECK-HARDFP-VFP3: vmov s0, r0 1007 1008; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 1009; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 1010; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 1011; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 1012 1013; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 1014} 1015 1016; Check for VSTRH with a FCONSTH, this checks that addressing mode 1017; AddrMode5FP16 is supported. 1018define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 1019entry: 1020 %S = alloca half, align 2 1021 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 1022 %0 = bitcast i16 %tmp.0.extract.trunc to half 1023 %S.0.S.0..sroa_cast = bitcast half* %S to i8* 1024 store volatile half 0xH3C00, half* %S, align 2 1025 %S.0.S.0. = load volatile half, half* %S, align 2 1026 %add = fadd half %S.0.S.0., %0 1027 %1 = bitcast half %add to i16 1028 %tmp2.0.insert.ext = zext i16 %1 to i32 1029 ret i32 %tmp2.0.insert.ext 1030 1031; CHECK-LABEL: ThumbAddrMode5FP16 1032 1033; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 1034; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 1035; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 1036; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 1037; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 1038} 1039 1040; Test function calls to check store/load reg to/from stack 1041define i32 @fn1() { 1042entry: 1043 %coerce = alloca half, align 2 1044 %tmp2 = alloca i32, align 4 1045 store half 0xH7C00, half* %coerce, align 2 1046 %0 = load i32, i32* %tmp2, align 4 1047 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) 1048 store half 0xH7C00, half* %coerce, align 2 1049 %1 = load i32, i32* %tmp2, align 4 1050 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) 1051 ret i32 %call3 1052 1053; CHECK-SPILL-RELOAD-LABEL: fn1: 1054; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 1055; CHECK-SPILL-RELOAD: bl fn2 1056; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 1057} 1058 1059declare dso_local i32 @fn2(...) 1060declare dso_local i32 @fn3(...) 1061