1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4 5; SOFTFP: 6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 9 10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 13 14; Test fast-isel 15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 17 18; HARD: 19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 22 23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 26 27; FP-CONTRACT=FAST 28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 30 31; TODO: we can't pass half-precision arguments as "half" types yet. We do 32; that for the time being by passing "float %f.coerce" and the necessary 33; bitconverts/truncates. But when we can pass half types, we do want to use 34; and test that here. 35 36define float @RetValBug(float %A.coerce) { 37entry: 38 ret float undef 39; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 40; any operands) when FullFP16 is enabled. 41; 42; CHECK-LABEL: RetValBug: 43; CHECK-HARDFP-FULLFP16: {{.*}} lr 44} 45 46; 1. VABS: TODO 47 48; 2. VADD 49define float @Add(float %a.coerce, float %b.coerce) { 50entry: 51 %0 = bitcast float %a.coerce to i32 52 %tmp.0.extract.trunc = trunc i32 %0 to i16 53 %1 = bitcast i16 %tmp.0.extract.trunc to half 54 %2 = bitcast float %b.coerce to i32 55 %tmp1.0.extract.trunc = trunc i32 %2 to i16 56 %3 = bitcast i16 %tmp1.0.extract.trunc to half 57 %add = fadd half %1, %3 58 %4 = bitcast half %add to i16 59 %tmp4.0.insert.ext = zext i16 %4 to i32 60 %5 = bitcast i32 %tmp4.0.insert.ext to float 61 ret float %5 62 63; CHECK-LABEL: Add: 64 65; CHECK-SOFT: bl __aeabi_h2f 66; CHECK-SOFT: bl __aeabi_h2f 67; CHECK-SOFT: bl __aeabi_fadd 68; CHECK-SOFT: bl __aeabi_f2h 69 70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 72; CHECK-SOFTFP-VFP3: vadd.f32 73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 74 75; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 76; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 77; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 78; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 81; CHECK-SOFTFP-FP16: vmov r0, s0 82 83; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 87 88; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 89; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 90; CHECK-HARDFP-VFP3: bl __aeabi_h2f 91; CHECK-HARDFP-VFP3: bl __aeabi_h2f 92; CHECK-HARDFP-VFP3: vadd.f32 93; CHECK-HARDFP-VFP3: bl __aeabi_f2h 94; CHECK-HARDFP-VFP3: vmov s0, r0 95 96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 100 101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 102} 103 104; 3. VCMP 105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 106entry: 107 %0 = bitcast float %F.coerce to i32 108 %tmp.0.extract.trunc = trunc i32 %0 to i16 109 %1 = bitcast i16 %tmp.0.extract.trunc to half 110 %2 = bitcast float %G.coerce to i32 111 %tmp1.0.extract.trunc = trunc i32 %2 to i16 112 %3 = bitcast i16 %tmp1.0.extract.trunc to half 113 %cmp = fcmp une half %1, %3 114 ret i1 %cmp 115 116; CHECK-LABEL: VCMP1: 117 118; CHECK-SOFT: bl __aeabi_fcmpeq 119 120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 123 124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 127 128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 131 132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 135} 136 137; Check VCMPZH 138define zeroext i1 @VCMP2(float %F.coerce) { 139entry: 140 %0 = bitcast float %F.coerce to i32 141 %tmp.0.extract.trunc = trunc i32 %0 to i16 142 %1 = bitcast i16 %tmp.0.extract.trunc to half 143 %cmp = fcmp une half %1, 0.000000e+00 144 ret i1 %cmp 145 146; CHECK-LABEL: VCMP2: 147 148; CHECK-SOFT: bl __aeabi_fcmpeq 149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 152} 153 154; 4. VCMPE 155define i32 @VCMPE1(float %F.coerce) { 156entry: 157 %0 = bitcast float %F.coerce to i32 158 %tmp.0.extract.trunc = trunc i32 %0 to i16 159 %1 = bitcast i16 %tmp.0.extract.trunc to half 160 %tmp = fcmp olt half %1, 0.000000e+00 161 %tmp1 = zext i1 %tmp to i32 162 ret i32 %tmp1 163 164; CHECK-LABEL: VCMPE1: 165 166; CHECK-SOFT: bl __aeabi_fcmplt 167; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 168; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 169; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 170} 171 172define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 173entry: 174 %0 = bitcast float %F.coerce to i32 175 %tmp.0.extract.trunc = trunc i32 %0 to i16 176 %1 = bitcast i16 %tmp.0.extract.trunc to half 177 %2 = bitcast float %G.coerce to i32 178 %tmp.1.extract.trunc = trunc i32 %2 to i16 179 %3 = bitcast i16 %tmp.1.extract.trunc to half 180 %tmp = fcmp olt half %1, %3 181 %tmp1 = zext i1 %tmp to i32 182 ret i32 %tmp1 183 184; CHECK-LABEL: VCMPE2: 185 186; CHECK-SOFT: bl __aeabi_fcmplt 187; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} 188; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 189; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 190} 191 192; Test lowering of BR_CC 193define hidden i32 @VCMPBRCC() { 194entry: 195 %f = alloca half, align 2 196 br label %for.cond 197 198for.cond: 199 %0 = load half, half* %f, align 2 200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 201 br i1 %cmp, label %for.body, label %for.end 202 203for.body: 204 ret i32 1 205 206for.end: 207 ret i32 0 208 209; CHECK-LABEL: VCMPBRCC: 210 211; CHECK-SOFT: bl __aeabi_fcmple 212; CHECK-SOFT: cmp r0, #0 213 214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 215; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 217 218; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} 219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 220} 221 222; 5. VCVT (between floating-point and fixed-point) 223; Only assembly/disassembly support 224 225; 6. VCVT (between floating-point and integer, both directions) 226define i32 @fptosi(i32 %A.coerce) { 227entry: 228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 229 %0 = bitcast i16 %tmp.0.extract.trunc to half 230 %conv = fptosi half %0 to i32 231 ret i32 %conv 232 233; CHECK-LABEL: fptosi: 234 235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 238} 239 240define i32 @fptoui(i32 %A.coerce) { 241entry: 242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 243 %0 = bitcast i16 %tmp.0.extract.trunc to half 244 %conv = fptoui half %0 to i32 245 ret i32 %conv 246 247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 249} 250 251define float @UintToH(i32 %a, i32 %b) { 252entry: 253 %0 = uitofp i32 %a to half 254 %1 = bitcast half %0 to i16 255 %tmp0.insert.ext = zext i16 %1 to i32 256 %2 = bitcast i32 %tmp0.insert.ext to float 257 ret float %2 258 259; CHECK-LABEL: UintToH: 260 261; CHECK-HARDFP-FULLFP16: vmov s0, r0 262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 263} 264 265define float @SintToH(i32 %a, i32 %b) { 266entry: 267 %0 = sitofp i32 %a to half 268 %1 = bitcast half %0 to i16 269 %tmp0.insert.ext = zext i16 %1 to i32 270 %2 = bitcast i32 %tmp0.insert.ext to float 271 ret float %2 272 273; CHECK-LABEL: SintToH: 274 275; CHECK-HARDFP-FULLFP16: vmov s0, r0 276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 277} 278 279define i32 @f2h(float %f) { 280entry: 281 %conv = fptrunc float %f to half 282 %0 = bitcast half %conv to i16 283 %tmp.0.insert.ext = zext i16 %0 to i32 284 ret i32 %tmp.0.insert.ext 285 286; CHECK-LABEL: f2h: 287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 288} 289 290define float @h2f(i32 %h.coerce) { 291entry: 292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 293 %0 = bitcast i16 %tmp.0.extract.trunc to half 294 %conv = fpext half %0 to float 295 ret float %conv 296 297; CHECK-LABEL: h2f: 298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 299} 300 301 302define double @h2d(i32 %h.coerce) { 303entry: 304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 305 %0 = bitcast i16 %tmp.0.extract.trunc to half 306 %conv = fpext half %0 to double 307 ret double %conv 308 309; CHECK-LABEL: h2d: 310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 311} 312 313define i32 @d2h(double %d) { 314entry: 315 %conv = fptrunc double %d to half 316 %0 = bitcast half %conv to i16 317 %tmp.0.insert.ext = zext i16 %0 to i32 318 ret i32 %tmp.0.insert.ext 319 320; CHECK-LABEL: d2h: 321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 322} 323 324; TODO: 325; 7. VCVTA 326; 8. VCVTM 327; 9. VCVTN 328; 10. VCVTP 329; 11. VCVTR 330 331; 12. VDIV 332define float @Div(float %a.coerce, float %b.coerce) { 333entry: 334 %0 = bitcast float %a.coerce to i32 335 %tmp.0.extract.trunc = trunc i32 %0 to i16 336 %1 = bitcast i16 %tmp.0.extract.trunc to half 337 %2 = bitcast float %b.coerce to i32 338 %tmp1.0.extract.trunc = trunc i32 %2 to i16 339 %3 = bitcast i16 %tmp1.0.extract.trunc to half 340 %add = fdiv half %1, %3 341 %4 = bitcast half %add to i16 342 %tmp4.0.insert.ext = zext i16 %4 to i32 343 %5 = bitcast i32 %tmp4.0.insert.ext to float 344 ret float %5 345 346; CHECK-LABEL: Div: 347 348; CHECK-SOFT: bl __aeabi_h2f 349; CHECK-SOFT: bl __aeabi_h2f 350; CHECK-SOFT: bl __aeabi_fdiv 351; CHECK-SOFT: bl __aeabi_f2h 352 353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 355; CHECK-SOFTFP-VFP3: vdiv.f32 356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 357 358; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 359; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 360; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 361; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 364; CHECK-SOFTFP-FP16: vmov r0, s0 365 366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 370 371; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 372; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 373; CHECK-HARDFP-VFP3: bl __aeabi_h2f 374; CHECK-HARDFP-VFP3: bl __aeabi_h2f 375; CHECK-HARDFP-VFP3: vdiv.f32 376; CHECK-HARDFP-VFP3: bl __aeabi_f2h 377; CHECK-HARDFP-VFP3: vmov s0, r0 378 379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 383 384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 385} 386 387; 13. VFMA 388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 389entry: 390 %0 = bitcast float %a.coerce to i32 391 %tmp.0.extract.trunc = trunc i32 %0 to i16 392 %1 = bitcast i16 %tmp.0.extract.trunc to half 393 %2 = bitcast float %b.coerce to i32 394 %tmp1.0.extract.trunc = trunc i32 %2 to i16 395 %3 = bitcast i16 %tmp1.0.extract.trunc to half 396 %4 = bitcast float %c.coerce to i32 397 %tmp2.0.extract.trunc = trunc i32 %4 to i16 398 %5 = bitcast i16 %tmp2.0.extract.trunc to half 399 %mul = fmul half %1, %3 400 %add = fadd half %mul, %5 401 %6 = bitcast half %add to i16 402 %tmp4.0.insert.ext = zext i16 %6 to i32 403 %7 = bitcast i32 %tmp4.0.insert.ext to float 404 ret float %7 405 406; CHECK-LABEL: VFMA: 407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 409} 410 411; 14. VFMS 412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 413entry: 414 %0 = bitcast float %a.coerce to i32 415 %tmp.0.extract.trunc = trunc i32 %0 to i16 416 %1 = bitcast i16 %tmp.0.extract.trunc to half 417 %2 = bitcast float %b.coerce to i32 418 %tmp1.0.extract.trunc = trunc i32 %2 to i16 419 %3 = bitcast i16 %tmp1.0.extract.trunc to half 420 %4 = bitcast float %c.coerce to i32 421 %tmp2.0.extract.trunc = trunc i32 %4 to i16 422 %5 = bitcast i16 %tmp2.0.extract.trunc to half 423 %mul = fmul half %1, %3 424 %sub = fsub half %5, %mul 425 %6 = bitcast half %sub to i16 426 %tmp4.0.insert.ext = zext i16 %6 to i32 427 %7 = bitcast i32 %tmp4.0.insert.ext to float 428 ret float %7 429 430; CHECK-LABEL: VFMS: 431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 433} 434 435; 15. VFNMA 436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 437entry: 438 %0 = bitcast float %a.coerce to i32 439 %tmp.0.extract.trunc = trunc i32 %0 to i16 440 %1 = bitcast i16 %tmp.0.extract.trunc to half 441 %2 = bitcast float %b.coerce to i32 442 %tmp1.0.extract.trunc = trunc i32 %2 to i16 443 %3 = bitcast i16 %tmp1.0.extract.trunc to half 444 %4 = bitcast float %c.coerce to i32 445 %tmp2.0.extract.trunc = trunc i32 %4 to i16 446 %5 = bitcast i16 %tmp2.0.extract.trunc to half 447 %mul = fmul half %1, %3 448 %sub = fsub half -0.0, %mul 449 %sub2 = fsub half %sub, %5 450 %6 = bitcast half %sub2 to i16 451 %tmp4.0.insert.ext = zext i16 %6 to i32 452 %7 = bitcast i32 %tmp4.0.insert.ext to float 453 ret float %7 454 455; CHECK-LABEL: VFNMA: 456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 458} 459 460; 16. VFNMS 461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 462entry: 463 %0 = bitcast float %a.coerce to i32 464 %tmp.0.extract.trunc = trunc i32 %0 to i16 465 %1 = bitcast i16 %tmp.0.extract.trunc to half 466 %2 = bitcast float %b.coerce to i32 467 %tmp1.0.extract.trunc = trunc i32 %2 to i16 468 %3 = bitcast i16 %tmp1.0.extract.trunc to half 469 %4 = bitcast float %c.coerce to i32 470 %tmp2.0.extract.trunc = trunc i32 %4 to i16 471 %5 = bitcast i16 %tmp2.0.extract.trunc to half 472 %mul = fmul half %1, %3 473 %sub2 = fsub half %mul, %5 474 %6 = bitcast half %sub2 to i16 475 %tmp4.0.insert.ext = zext i16 %6 to i32 476 %7 = bitcast i32 %tmp4.0.insert.ext to float 477 ret float %7 478 479; CHECK-LABEL: VFNMS: 480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 482} 483 484; 17. VMAXNM 485; 18. VMINNM 486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll 487 488 489; 19. VMLA 490define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 491entry: 492 %0 = bitcast float %a.coerce to i32 493 %tmp.0.extract.trunc = trunc i32 %0 to i16 494 %1 = bitcast i16 %tmp.0.extract.trunc to half 495 %2 = bitcast float %b.coerce to i32 496 %tmp1.0.extract.trunc = trunc i32 %2 to i16 497 %3 = bitcast i16 %tmp1.0.extract.trunc to half 498 %4 = bitcast float %c.coerce to i32 499 %tmp2.0.extract.trunc = trunc i32 %4 to i16 500 %5 = bitcast i16 %tmp2.0.extract.trunc to half 501 %mul = fmul half %1, %3 502 %add = fadd half %5, %mul 503 %6 = bitcast half %add to i16 504 %tmp4.0.insert.ext = zext i16 %6 to i32 505 %7 = bitcast i32 %tmp4.0.insert.ext to float 506 ret float %7 507 508; CHECK-LABEL: VMLA: 509; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 510; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 511} 512 513; 20. VMLS 514define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 515entry: 516 %0 = bitcast float %a.coerce to i32 517 %tmp.0.extract.trunc = trunc i32 %0 to i16 518 %1 = bitcast i16 %tmp.0.extract.trunc to half 519 %2 = bitcast float %b.coerce to i32 520 %tmp1.0.extract.trunc = trunc i32 %2 to i16 521 %3 = bitcast i16 %tmp1.0.extract.trunc to half 522 %4 = bitcast float %c.coerce to i32 523 %tmp2.0.extract.trunc = trunc i32 %4 to i16 524 %5 = bitcast i16 %tmp2.0.extract.trunc to half 525 %mul = fmul half %1, %3 526 %add = fsub half %5, %mul 527 %6 = bitcast half %add to i16 528 %tmp4.0.insert.ext = zext i16 %6 to i32 529 %7 = bitcast i32 %tmp4.0.insert.ext to float 530 ret float %7 531 532; CHECK-LABEL: VMLS: 533; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 534; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 535} 536 537; TODO: fix immediates. 538; 21. VMOV (between general-purpose register and half-precision register) 539 540; 22. VMOV (immediate) 541define i32 @movi(i32 %a.coerce) { 542entry: 543 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 544 %0 = bitcast i16 %tmp.0.extract.trunc to half 545 %add = fadd half %0, 0xHC000 546 %1 = bitcast half %add to i16 547 %tmp2.0.insert.ext = zext i16 %1 to i32 548 ret i32 %tmp2.0.insert.ext 549 550; CHECK-LABEL: movi: 551; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 552} 553 554; 23. VMUL 555define float @Mul(float %a.coerce, float %b.coerce) { 556entry: 557 %0 = bitcast float %a.coerce to i32 558 %tmp.0.extract.trunc = trunc i32 %0 to i16 559 %1 = bitcast i16 %tmp.0.extract.trunc to half 560 %2 = bitcast float %b.coerce to i32 561 %tmp1.0.extract.trunc = trunc i32 %2 to i16 562 %3 = bitcast i16 %tmp1.0.extract.trunc to half 563 %add = fmul half %1, %3 564 %4 = bitcast half %add to i16 565 %tmp4.0.insert.ext = zext i16 %4 to i32 566 %5 = bitcast i32 %tmp4.0.insert.ext to float 567 ret float %5 568 569; CHECK-LABEL: Mul: 570 571; CHECK-SOFT: bl __aeabi_h2f 572; CHECK-SOFT: bl __aeabi_h2f 573; CHECK-SOFT: bl __aeabi_fmul 574; CHECK-SOFT: bl __aeabi_f2h 575 576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 577; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 578; CHECK-SOFTFP-VFP3: vmul.f32 579; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 580 581; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 582; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 583; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 584; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 585; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 586; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 587; CHECK-SOFTFP-FP16: vmov r0, s0 588 589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 590; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 591; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 592; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 593 594; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 595; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 596; CHECK-HARDFP-VFP3: bl __aeabi_h2f 597; CHECK-HARDFP-VFP3: bl __aeabi_h2f 598; CHECK-HARDFP-VFP3: vmul.f32 599; CHECK-HARDFP-VFP3: bl __aeabi_f2h 600; CHECK-HARDFP-VFP3: vmov s0, r0 601 602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 603; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 604; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 605; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 606 607; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 608} 609 610; 24. VNEG 611define float @Neg(float %a.coerce) { 612entry: 613 %0 = bitcast float %a.coerce to i32 614 %tmp.0.extract.trunc = trunc i32 %0 to i16 615 %1 = bitcast i16 %tmp.0.extract.trunc to half 616 %2 = fsub half -0.000000e+00, %1 617 %3 = bitcast half %2 to i16 618 %tmp4.0.insert.ext = zext i16 %3 to i32 619 %4 = bitcast i32 %tmp4.0.insert.ext to float 620 ret float %4 621 622; CHECK-LABEL: Neg: 623; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 624} 625 626; 25. VNMLA 627define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 628entry: 629 %0 = bitcast float %a.coerce to i32 630 %tmp.0.extract.trunc = trunc i32 %0 to i16 631 %1 = bitcast i16 %tmp.0.extract.trunc to half 632 %2 = bitcast float %b.coerce to i32 633 %tmp1.0.extract.trunc = trunc i32 %2 to i16 634 %3 = bitcast i16 %tmp1.0.extract.trunc to half 635 %4 = bitcast float %c.coerce to i32 636 %tmp2.0.extract.trunc = trunc i32 %4 to i16 637 %5 = bitcast i16 %tmp2.0.extract.trunc to half 638 %add = fmul half %1, %3 639 %add2 = fsub half -0.000000e+00, %add 640 %add3 = fsub half %add2, %5 641 %6 = bitcast half %add3 to i16 642 %tmp4.0.insert.ext = zext i16 %6 to i32 643 %7 = bitcast i32 %tmp4.0.insert.ext to float 644 ret float %7 645 646; CHECK-LABEL: VNMLA: 647; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 648; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 649} 650 651; 26. VNMLS 652define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 653entry: 654 %0 = bitcast float %a.coerce to i32 655 %tmp.0.extract.trunc = trunc i32 %0 to i16 656 %1 = bitcast i16 %tmp.0.extract.trunc to half 657 %2 = bitcast float %b.coerce to i32 658 %tmp1.0.extract.trunc = trunc i32 %2 to i16 659 %3 = bitcast i16 %tmp1.0.extract.trunc to half 660 %4 = bitcast float %c.coerce to i32 661 %tmp2.0.extract.trunc = trunc i32 %4 to i16 662 %5 = bitcast i16 %tmp2.0.extract.trunc to half 663 %add = fmul half %1, %3 664 %add2 = fsub half %add, %5 665 %6 = bitcast half %add2 to i16 666 %tmp4.0.insert.ext = zext i16 %6 to i32 667 %7 = bitcast i32 %tmp4.0.insert.ext to float 668 ret float %7 669 670; CHECK-LABEL: VNMLS: 671; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 672; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 673} 674 675; 27. VNMUL 676define float @NMul(float %a.coerce, float %b.coerce) { 677entry: 678 %0 = bitcast float %a.coerce to i32 679 %tmp.0.extract.trunc = trunc i32 %0 to i16 680 %1 = bitcast i16 %tmp.0.extract.trunc to half 681 %2 = bitcast float %b.coerce to i32 682 %tmp1.0.extract.trunc = trunc i32 %2 to i16 683 %3 = bitcast i16 %tmp1.0.extract.trunc to half 684 %add = fmul half %1, %3 685 %add2 = fsub half -0.0, %add 686 %4 = bitcast half %add2 to i16 687 %tmp4.0.insert.ext = zext i16 %4 to i32 688 %5 = bitcast i32 %tmp4.0.insert.ext to float 689 ret float %5 690 691; CHECK-LABEL: NMul: 692; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 693} 694 695; TODO: 696; 28. VRINTA 697; 29. VRINTM 698; 30. VRINTN 699; 31. VRINTP 700; 32. VRINTR 701; 33. VRINTX 702; 34. VRINTZ 703 704; 35. VSELEQ 705define half @select_cc1() { 706 %1 = fcmp nsz oeq half undef, 0xH0001 707 %2 = select i1 %1, half 0xHC000, half 0xH0002 708 ret half %2 709 710; CHECK-LABEL: select_cc1: 711 712; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0 713; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 714; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} 715 716; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0 717; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 718; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} 719 720; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0 721; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 722; CHECK-SOFTFP-FP16-T32: it eq 723; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} 724} 725 726; FIXME: more tests need to be added for VSELGE and VSELGT. 727; That is, more combinations of immediate operands that can or can't 728; be encoded as an FP16 immediate need to be added here. 729; 730; 36. VSELGE 731define half @select_cc_ge1() { 732 %1 = fcmp nsz oge half undef, 0xH0001 733 %2 = select i1 %1, half 0xHC000, half 0xH0002 734 ret half %2 735 736; CHECK-LABEL: select_cc_ge1: 737 738; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 739; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 740; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 741 742; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 743; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 744; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} 745 746; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 747; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 748; CHECK-SOFTFP-FP16-T32-NEXT: it ge 749; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} 750} 751 752; 753; FIXME: add fcmp ole, ult here. 754; 755 756define half @select_cc_ge3() { 757 %1 = fcmp nsz ugt half undef, 0xH0001 758 %2 = select i1 %1, half 0xHC000, half 0xH0002 759 ret half %2 760 761; CHECK-LABEL: select_cc_ge3: 762 763; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 764; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 765; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 766 767; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 768; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 769; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 770 771; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 772; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 773; CHECK-SOFTFP-FP16-T32-NEXT: it hi 774; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 775} 776 777; 37. VSELGT 778define half @select_cc_gt1() { 779 %1 = fcmp nsz ogt half undef, 0xH0001 780 %2 = select i1 %1, half 0xHC000, half 0xH0002 781 ret half %2 782 783; CHECK-LABEL: select_cc_gt1: 784 785; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 786; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 787; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 788 789; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 790; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 791; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 792 793; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 794; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 795; CHECK-SOFTFP-FP16-T32-NEXT: it gt 796; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 797} 798 799define half @select_cc_gt2() { 800 %1 = fcmp nsz uge half undef, 0xH0001 801 %2 = select i1 %1, half 0xHC000, half 0xH0002 802 ret half %2 803 804; CHECK-LABEL: select_cc_gt2: 805 806; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 807; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 808; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 809 810; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 811; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 812; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 813 814; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 815; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 816; CHECK-SOFTFP-FP16-T32-NEXT: it pl 817; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 818} 819 820; 821; FIXME: add fcmp ule, olt here. 822; 823 824; 38. VSELVS 825define float @select_cc4(float %a.coerce) { 826entry: 827 %0 = bitcast float %a.coerce to i32 828 %tmp.0.extract.trunc = trunc i32 %0 to i16 829 %1 = bitcast i16 %tmp.0.extract.trunc to half 830 831 %2 = fcmp nsz ueq half %1, 0xH0001 832 %3 = select i1 %2, half 0xHC000, half 0xH0002 833 834 %4 = bitcast half %3 to i16 835 %tmp4.0.insert.ext = zext i16 %4 to i32 836 %5 = bitcast i32 %tmp4.0.insert.ext to float 837 ret float %5 838 839; CHECK-LABEL: select_cc4: 840 841; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} 842; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} 843; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 844; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] 845; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 846; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] 847; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] 848 849; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 850; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} 851; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] 852; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 853; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 854; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 855; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr 856; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] 857; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] 858; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] 859 860; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 861; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} 862; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] 863; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 864; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 865; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 866; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 867; CHECK-SOFTFP-FP16-T32: it eq 868; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] 869; CHECK-SOFTFP-FP16-T32: it vs 870; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] 871; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] 872} 873 874; 39. VSQRT - TODO 875 876; 40. VSUB 877define float @Sub(float %a.coerce, float %b.coerce) { 878entry: 879 %0 = bitcast float %a.coerce to i32 880 %tmp.0.extract.trunc = trunc i32 %0 to i16 881 %1 = bitcast i16 %tmp.0.extract.trunc to half 882 %2 = bitcast float %b.coerce to i32 883 %tmp1.0.extract.trunc = trunc i32 %2 to i16 884 %3 = bitcast i16 %tmp1.0.extract.trunc to half 885 %add = fsub half %1, %3 886 %4 = bitcast half %add to i16 887 %tmp4.0.insert.ext = zext i16 %4 to i32 888 %5 = bitcast i32 %tmp4.0.insert.ext to float 889 ret float %5 890 891; CHECK-LABEL: Sub: 892 893; CHECK-SOFT: bl __aeabi_h2f 894; CHECK-SOFT: bl __aeabi_h2f 895; CHECK-SOFT: bl __aeabi_fsub 896; CHECK-SOFT: bl __aeabi_f2h 897 898; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 899; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 900; CHECK-SOFTFP-VFP3: vsub.f32 901; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 902 903; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 904; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 905; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] 906; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] 907; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 908; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 909; CHECK-SOFTFP-FP16: vmov r0, s0 910 911; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 912; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 913; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 914; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 915 916; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 917; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 918; CHECK-HARDFP-VFP3: bl __aeabi_h2f 919; CHECK-HARDFP-VFP3: bl __aeabi_h2f 920; CHECK-HARDFP-VFP3: vsub.f32 921; CHECK-HARDFP-VFP3: bl __aeabi_f2h 922; CHECK-HARDFP-VFP3: vmov s0, r0 923 924; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 925; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 926; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 927; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 928 929; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 930} 931 932; Check for VSTRH with a FCONSTH, this checks that addressing mode 933; AddrMode5FP16 is supported. 934define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 935entry: 936 %S = alloca half, align 2 937 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 938 %0 = bitcast i16 %tmp.0.extract.trunc to half 939 %S.0.S.0..sroa_cast = bitcast half* %S to i8* 940 store volatile half 0xH3C00, half* %S, align 2 941 %S.0.S.0. = load volatile half, half* %S, align 2 942 %add = fadd half %S.0.S.0., %0 943 %1 = bitcast half %add to i16 944 %tmp2.0.insert.ext = zext i16 %1 to i32 945 ret i32 %tmp2.0.insert.ext 946 947; CHECK-LABEL: ThumbAddrMode5FP16 948 949; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 950; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 951; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 952; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 953; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 954} 955 956; Test function calls to check store/load reg to/from stack 957define i32 @fn1() { 958entry: 959 %coerce = alloca half, align 2 960 %tmp2 = alloca i32, align 4 961 store half 0xH7C00, half* %coerce, align 2 962 %0 = load i32, i32* %tmp2, align 4 963 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) 964 store half 0xH7C00, half* %coerce, align 2 965 %1 = load i32, i32* %tmp2, align 4 966 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) 967 ret i32 %call3 968 969; CHECK-SPILL-RELOAD-LABEL: fn1: 970; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 971; CHECK-SPILL-RELOAD-NEXT: bl fn2 972; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 973} 974 975declare dso_local i32 @fn2(...) 976declare dso_local i32 @fn3(...) 977