1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 5; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 6 7; SOFTFP: 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 9; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 10; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 11 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 13; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 14; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 15 16; Test fast-isel 17; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 18; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 19 20; HARD: 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 22; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 23; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 24 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 26; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 27; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 28 29; FP-CONTRACT=FAST 30; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 31; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 32 33; TODO: we can't pass half-precision arguments as "half" types yet. We do 34; that for the time being by passing "float %f.coerce" and the necessary 35; bitconverts/truncates. But when we can pass half types, we do want to use 36; and test that here. 37 38define float @RetValBug(float %A.coerce) { 39entry: 40 ret float undef 41; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 42; any operands) when FullFP16 is enabled. 43; 44; CHECK-LABEL: RetValBug: 45; CHECK-HARDFP-FULLFP16: {{.*}} lr 46} 47 48; 2. VADD 49define float @Add(float %a.coerce, float %b.coerce) { 50entry: 51 %0 = bitcast float %a.coerce to i32 52 %tmp.0.extract.trunc = trunc i32 %0 to i16 53 %1 = bitcast i16 %tmp.0.extract.trunc to half 54 %2 = bitcast float %b.coerce to i32 55 %tmp1.0.extract.trunc = trunc i32 %2 to i16 56 %3 = bitcast i16 %tmp1.0.extract.trunc to half 57 %add = fadd half %1, %3 58 %4 = bitcast half %add to i16 59 %tmp4.0.insert.ext = zext i16 %4 to i32 60 %5 = bitcast i32 %tmp4.0.insert.ext to float 61 ret float %5 62 63; CHECK-LABEL: Add: 64 65; CHECK-SOFT: bl __aeabi_h2f 66; CHECK-SOFT: bl __aeabi_h2f 67; CHECK-SOFT: bl __aeabi_fadd 68; CHECK-SOFT: bl __aeabi_f2h 69 70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 72; CHECK-SOFTFP-VFP3: vadd.f32 73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 74 75; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 76; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 77; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 78; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 81; CHECK-SOFTFP-FP16: vmov r0, s0 82 83; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 87 88; CHECK-HARDFP-VFP3: vmov r 89; CHECK-HARDFP-VFP3: vmov.f32 s 90; CHECK-HARDFP-VFP3: bl __aeabi_h2f 91; CHECK-HARDFP-VFP3: bl __aeabi_h2f 92; CHECK-HARDFP-VFP3: vadd.f32 93; CHECK-HARDFP-VFP3: bl __aeabi_f2h 94; CHECK-HARDFP-VFP3: vmov s0, r0 95 96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 100 101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 102} 103 104; 3. VCMP 105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 106entry: 107 %0 = bitcast float %F.coerce to i32 108 %tmp.0.extract.trunc = trunc i32 %0 to i16 109 %1 = bitcast i16 %tmp.0.extract.trunc to half 110 %2 = bitcast float %G.coerce to i32 111 %tmp1.0.extract.trunc = trunc i32 %2 to i16 112 %3 = bitcast i16 %tmp1.0.extract.trunc to half 113 %cmp = fcmp une half %1, %3 114 ret i1 %cmp 115 116; CHECK-LABEL: VCMP1: 117 118; CHECK-SOFT: bl __aeabi_fcmpeq 119 120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 123 124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 127 128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 131 132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 135} 136 137; Check VCMPZH 138define zeroext i1 @VCMP2(float %F.coerce) { 139entry: 140 %0 = bitcast float %F.coerce to i32 141 %tmp.0.extract.trunc = trunc i32 %0 to i16 142 %1 = bitcast i16 %tmp.0.extract.trunc to half 143 %cmp = fcmp une half %1, 0.000000e+00 144 ret i1 %cmp 145 146; CHECK-LABEL: VCMP2: 147 148; CHECK-SOFT: bl __aeabi_fcmpeq 149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 152} 153 154; 4. VCMPE 155define i32 @VCMPE1(float %F.coerce) { 156entry: 157 %0 = bitcast float %F.coerce to i32 158 %tmp.0.extract.trunc = trunc i32 %0 to i16 159 %1 = bitcast i16 %tmp.0.extract.trunc to half 160 %tmp = fcmp olt half %1, 0.000000e+00 161 %tmp1 = zext i1 %tmp to i32 162 ret i32 %tmp1 163 164; CHECK-LABEL: VCMPE1: 165 166; CHECK-SOFT: bl __aeabi_fcmplt 167; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 168; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 169; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 170} 171 172define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 173entry: 174 %0 = bitcast float %F.coerce to i32 175 %tmp.0.extract.trunc = trunc i32 %0 to i16 176 %1 = bitcast i16 %tmp.0.extract.trunc to half 177 %2 = bitcast float %G.coerce to i32 178 %tmp.1.extract.trunc = trunc i32 %2 to i16 179 %3 = bitcast i16 %tmp.1.extract.trunc to half 180 %tmp = fcmp olt half %1, %3 181 %tmp1 = zext i1 %tmp to i32 182 ret i32 %tmp1 183 184; CHECK-LABEL: VCMPE2: 185 186; CHECK-SOFT: bl __aeabi_fcmplt 187; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 188; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 189; CHECK-HARDFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 190} 191 192; Test lowering of BR_CC 193define hidden i32 @VCMPBRCC() { 194entry: 195 %f = alloca half, align 2 196 br label %for.cond 197 198for.cond: 199 %0 = load half, ptr %f, align 2 200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 201 br i1 %cmp, label %for.body, label %for.end 202 203for.body: 204 ret i32 1 205 206for.end: 207 ret i32 0 208 209; CHECK-LABEL: VCMPBRCC: 210 211; CHECK-SOFT: bl __aeabi_fcmp{{gt|le}} 212; CHECK-SOFT: cmp r0, #{{0|1}} 213 214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 215; CHECK-SOFTFP-FP16: vcmp.f32 [[S2]], s0 216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 217 218; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 220} 221 222; 5. VCVT (between floating-point and fixed-point) 223; Only assembly/disassembly support 224 225; 6. VCVT (between floating-point and integer, both directions) 226define i32 @fptosi(i32 %A.coerce) { 227entry: 228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 229 %0 = bitcast i16 %tmp.0.extract.trunc to half 230 %conv = fptosi half %0 to i32 231 ret i32 %conv 232 233; CHECK-LABEL: fptosi: 234 235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 238} 239 240define i32 @fptoui(i32 %A.coerce) { 241entry: 242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 243 %0 = bitcast i16 %tmp.0.extract.trunc to half 244 %conv = fptoui half %0 to i32 245 ret i32 %conv 246 247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 249} 250 251define float @UintToH(i32 %a, i32 %b) { 252entry: 253 %0 = uitofp i32 %a to half 254 %1 = bitcast half %0 to i16 255 %tmp0.insert.ext = zext i16 %1 to i32 256 %2 = bitcast i32 %tmp0.insert.ext to float 257 ret float %2 258 259; CHECK-LABEL: UintToH: 260 261; CHECK-HARDFP-FULLFP16: vmov s0, r0 262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 263} 264 265define float @SintToH(i32 %a, i32 %b) { 266entry: 267 %0 = sitofp i32 %a to half 268 %1 = bitcast half %0 to i16 269 %tmp0.insert.ext = zext i16 %1 to i32 270 %2 = bitcast i32 %tmp0.insert.ext to float 271 ret float %2 272 273; CHECK-LABEL: SintToH: 274 275; CHECK-HARDFP-FULLFP16: vmov s0, r0 276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 277} 278 279define i32 @f2h(float %f) { 280entry: 281 %conv = fptrunc float %f to half 282 %0 = bitcast half %conv to i16 283 %tmp.0.insert.ext = zext i16 %0 to i32 284 ret i32 %tmp.0.insert.ext 285 286; CHECK-LABEL: f2h: 287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 288} 289 290define float @h2f(i32 %h.coerce) { 291entry: 292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 293 %0 = bitcast i16 %tmp.0.extract.trunc to half 294 %conv = fpext half %0 to float 295 ret float %conv 296 297; CHECK-LABEL: h2f: 298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 299} 300 301 302define double @h2d(i32 %h.coerce) { 303entry: 304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 305 %0 = bitcast i16 %tmp.0.extract.trunc to half 306 %conv = fpext half %0 to double 307 ret double %conv 308 309; CHECK-LABEL: h2d: 310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 311} 312 313define i32 @d2h(double %d) { 314entry: 315 %conv = fptrunc double %d to half 316 %0 = bitcast half %conv to i16 317 %tmp.0.insert.ext = zext i16 %0 to i32 318 ret i32 %tmp.0.insert.ext 319 320; CHECK-LABEL: d2h: 321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 322} 323 324; TODO: 325; 7. VCVTA 326; 8. VCVTM 327; 9. VCVTN 328; 10. VCVTP 329; 11. VCVTR 330 331; 12. VDIV 332define float @Div(float %a.coerce, float %b.coerce) { 333entry: 334 %0 = bitcast float %a.coerce to i32 335 %tmp.0.extract.trunc = trunc i32 %0 to i16 336 %1 = bitcast i16 %tmp.0.extract.trunc to half 337 %2 = bitcast float %b.coerce to i32 338 %tmp1.0.extract.trunc = trunc i32 %2 to i16 339 %3 = bitcast i16 %tmp1.0.extract.trunc to half 340 %add = fdiv half %1, %3 341 %4 = bitcast half %add to i16 342 %tmp4.0.insert.ext = zext i16 %4 to i32 343 %5 = bitcast i32 %tmp4.0.insert.ext to float 344 ret float %5 345 346; CHECK-LABEL: Div: 347 348; CHECK-SOFT: bl __aeabi_h2f 349; CHECK-SOFT: bl __aeabi_h2f 350; CHECK-SOFT: bl __aeabi_fdiv 351; CHECK-SOFT: bl __aeabi_f2h 352 353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 355; CHECK-SOFTFP-VFP3: vdiv.f32 356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 357 358; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 359; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 360; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 361; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 364; CHECK-SOFTFP-FP16: vmov r0, s0 365 366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 370 371; CHECK-HARDFP-VFP3: vmov r 372; CHECK-HARDFP-VFP3: vmov.f32 s 373; CHECK-HARDFP-VFP3: bl __aeabi_h2f 374; CHECK-HARDFP-VFP3: bl __aeabi_h2f 375; CHECK-HARDFP-VFP3: vdiv.f32 376; CHECK-HARDFP-VFP3: bl __aeabi_f2h 377; CHECK-HARDFP-VFP3: vmov s0, r0 378 379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 383 384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 385} 386 387; 13. VFMA 388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 389entry: 390 %0 = bitcast float %a.coerce to i32 391 %tmp.0.extract.trunc = trunc i32 %0 to i16 392 %1 = bitcast i16 %tmp.0.extract.trunc to half 393 %2 = bitcast float %b.coerce to i32 394 %tmp1.0.extract.trunc = trunc i32 %2 to i16 395 %3 = bitcast i16 %tmp1.0.extract.trunc to half 396 %4 = bitcast float %c.coerce to i32 397 %tmp2.0.extract.trunc = trunc i32 %4 to i16 398 %5 = bitcast i16 %tmp2.0.extract.trunc to half 399 %mul = fmul half %1, %3 400 %add = fadd half %mul, %5 401 %6 = bitcast half %add to i16 402 %tmp4.0.insert.ext = zext i16 %6 to i32 403 %7 = bitcast i32 %tmp4.0.insert.ext to float 404 ret float %7 405 406; CHECK-LABEL: VFMA: 407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 409} 410 411; 14. VFMS 412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 413entry: 414 %0 = bitcast float %a.coerce to i32 415 %tmp.0.extract.trunc = trunc i32 %0 to i16 416 %1 = bitcast i16 %tmp.0.extract.trunc to half 417 %2 = bitcast float %b.coerce to i32 418 %tmp1.0.extract.trunc = trunc i32 %2 to i16 419 %3 = bitcast i16 %tmp1.0.extract.trunc to half 420 %4 = bitcast float %c.coerce to i32 421 %tmp2.0.extract.trunc = trunc i32 %4 to i16 422 %5 = bitcast i16 %tmp2.0.extract.trunc to half 423 %mul = fmul half %1, %3 424 %sub = fsub half %5, %mul 425 %6 = bitcast half %sub to i16 426 %tmp4.0.insert.ext = zext i16 %6 to i32 427 %7 = bitcast i32 %tmp4.0.insert.ext to float 428 ret float %7 429 430; CHECK-LABEL: VFMS: 431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 433} 434 435; 15. VFNMA 436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 437entry: 438 %0 = bitcast float %a.coerce to i32 439 %tmp.0.extract.trunc = trunc i32 %0 to i16 440 %1 = bitcast i16 %tmp.0.extract.trunc to half 441 %2 = bitcast float %b.coerce to i32 442 %tmp1.0.extract.trunc = trunc i32 %2 to i16 443 %3 = bitcast i16 %tmp1.0.extract.trunc to half 444 %4 = bitcast float %c.coerce to i32 445 %tmp2.0.extract.trunc = trunc i32 %4 to i16 446 %5 = bitcast i16 %tmp2.0.extract.trunc to half 447 %mul = fmul half %1, %3 448 %sub = fsub half -0.0, %mul 449 %sub2 = fsub half %sub, %5 450 %6 = bitcast half %sub2 to i16 451 %tmp4.0.insert.ext = zext i16 %6 to i32 452 %7 = bitcast i32 %tmp4.0.insert.ext to float 453 ret float %7 454 455; CHECK-LABEL: VFNMA: 456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 458} 459 460; 16. VFNMS 461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 462entry: 463 %0 = bitcast float %a.coerce to i32 464 %tmp.0.extract.trunc = trunc i32 %0 to i16 465 %1 = bitcast i16 %tmp.0.extract.trunc to half 466 %2 = bitcast float %b.coerce to i32 467 %tmp1.0.extract.trunc = trunc i32 %2 to i16 468 %3 = bitcast i16 %tmp1.0.extract.trunc to half 469 %4 = bitcast float %c.coerce to i32 470 %tmp2.0.extract.trunc = trunc i32 %4 to i16 471 %5 = bitcast i16 %tmp2.0.extract.trunc to half 472 %mul = fmul half %1, %3 473 %sub2 = fsub half %mul, %5 474 %6 = bitcast half %sub2 to i16 475 %tmp4.0.insert.ext = zext i16 %6 to i32 476 %7 = bitcast i32 %tmp4.0.insert.ext to float 477 ret float %7 478 479; CHECK-LABEL: VFNMS: 480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 482} 483 484; 17. VMAXNM 485; 18. VMINNM 486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll 487 488; 19. VMLA 489define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 490entry: 491 %0 = bitcast float %a.coerce to i32 492 %tmp.0.extract.trunc = trunc i32 %0 to i16 493 %1 = bitcast i16 %tmp.0.extract.trunc to half 494 %2 = bitcast float %b.coerce to i32 495 %tmp1.0.extract.trunc = trunc i32 %2 to i16 496 %3 = bitcast i16 %tmp1.0.extract.trunc to half 497 %4 = bitcast float %c.coerce to i32 498 %tmp2.0.extract.trunc = trunc i32 %4 to i16 499 %5 = bitcast i16 %tmp2.0.extract.trunc to half 500 %mul = fmul half %1, %3 501 %add = fadd half %5, %mul 502 %6 = bitcast half %add to i16 503 %tmp4.0.insert.ext = zext i16 %6 to i32 504 %7 = bitcast i32 %tmp4.0.insert.ext to float 505 ret float %7 506 507; CHECK-LABEL: VMLA: 508; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 509; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 510} 511 512; 20. VMLS 513define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 514entry: 515 %0 = bitcast float %a.coerce to i32 516 %tmp.0.extract.trunc = trunc i32 %0 to i16 517 %1 = bitcast i16 %tmp.0.extract.trunc to half 518 %2 = bitcast float %b.coerce to i32 519 %tmp1.0.extract.trunc = trunc i32 %2 to i16 520 %3 = bitcast i16 %tmp1.0.extract.trunc to half 521 %4 = bitcast float %c.coerce to i32 522 %tmp2.0.extract.trunc = trunc i32 %4 to i16 523 %5 = bitcast i16 %tmp2.0.extract.trunc to half 524 %mul = fmul half %1, %3 525 %add = fsub half %5, %mul 526 %6 = bitcast half %add to i16 527 %tmp4.0.insert.ext = zext i16 %6 to i32 528 %7 = bitcast i32 %tmp4.0.insert.ext to float 529 ret float %7 530 531; CHECK-LABEL: VMLS: 532; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 533; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 534} 535 536; TODO: fix immediates. 537; 21. VMOV (between general-purpose register and half-precision register) 538 539; 22. VMOV (immediate) 540define i32 @movi(i32 %a.coerce) { 541entry: 542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 543 %0 = bitcast i16 %tmp.0.extract.trunc to half 544 %add = fadd half %0, 0xHC000 545 %1 = bitcast half %add to i16 546 %tmp2.0.insert.ext = zext i16 %1 to i32 547 ret i32 %tmp2.0.insert.ext 548 549; CHECK-LABEL: movi: 550; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 551} 552 553; 23. VMUL 554define float @Mul(float %a.coerce, float %b.coerce) { 555entry: 556 %0 = bitcast float %a.coerce to i32 557 %tmp.0.extract.trunc = trunc i32 %0 to i16 558 %1 = bitcast i16 %tmp.0.extract.trunc to half 559 %2 = bitcast float %b.coerce to i32 560 %tmp1.0.extract.trunc = trunc i32 %2 to i16 561 %3 = bitcast i16 %tmp1.0.extract.trunc to half 562 %add = fmul half %1, %3 563 %4 = bitcast half %add to i16 564 %tmp4.0.insert.ext = zext i16 %4 to i32 565 %5 = bitcast i32 %tmp4.0.insert.ext to float 566 ret float %5 567 568; CHECK-LABEL: Mul: 569 570; CHECK-SOFT: bl __aeabi_h2f 571; CHECK-SOFT: bl __aeabi_h2f 572; CHECK-SOFT: bl __aeabi_fmul 573; CHECK-SOFT: bl __aeabi_f2h 574 575; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 577; CHECK-SOFTFP-VFP3: vmul.f32 578; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 579 580; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 581; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 582; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 583; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 584; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 585; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 586; CHECK-SOFTFP-FP16: vmov r0, s0 587 588; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 590; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 591; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 592 593; CHECK-HARDFP-VFP3: vmov r 594; CHECK-HARDFP-VFP3: vmov.f32 s 595; CHECK-HARDFP-VFP3: bl __aeabi_h2f 596; CHECK-HARDFP-VFP3: bl __aeabi_h2f 597; CHECK-HARDFP-VFP3: vmul.f32 598; CHECK-HARDFP-VFP3: bl __aeabi_f2h 599; CHECK-HARDFP-VFP3: vmov s0, r0 600 601; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 603; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 604; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 605 606; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 607} 608 609; 24. VNEG 610define float @Neg(float %a.coerce) { 611entry: 612 %0 = bitcast float %a.coerce to i32 613 %tmp.0.extract.trunc = trunc i32 %0 to i16 614 %1 = bitcast i16 %tmp.0.extract.trunc to half 615 %2 = fsub half -0.000000e+00, %1 616 %3 = bitcast half %2 to i16 617 %tmp4.0.insert.ext = zext i16 %3 to i32 618 %4 = bitcast i32 %tmp4.0.insert.ext to float 619 ret float %4 620 621; CHECK-LABEL: Neg: 622; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 623} 624 625; 25. VNMLA 626define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 627entry: 628 %0 = bitcast float %a.coerce to i32 629 %tmp.0.extract.trunc = trunc i32 %0 to i16 630 %1 = bitcast i16 %tmp.0.extract.trunc to half 631 %2 = bitcast float %b.coerce to i32 632 %tmp1.0.extract.trunc = trunc i32 %2 to i16 633 %3 = bitcast i16 %tmp1.0.extract.trunc to half 634 %4 = bitcast float %c.coerce to i32 635 %tmp2.0.extract.trunc = trunc i32 %4 to i16 636 %5 = bitcast i16 %tmp2.0.extract.trunc to half 637 %add = fmul half %1, %3 638 %add2 = fsub half -0.000000e+00, %add 639 %add3 = fsub half %add2, %5 640 %6 = bitcast half %add3 to i16 641 %tmp4.0.insert.ext = zext i16 %6 to i32 642 %7 = bitcast i32 %tmp4.0.insert.ext to float 643 ret float %7 644 645; CHECK-LABEL: VNMLA: 646; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 647; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 648} 649 650; 26. VNMLS 651define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 652entry: 653 %0 = bitcast float %a.coerce to i32 654 %tmp.0.extract.trunc = trunc i32 %0 to i16 655 %1 = bitcast i16 %tmp.0.extract.trunc to half 656 %2 = bitcast float %b.coerce to i32 657 %tmp1.0.extract.trunc = trunc i32 %2 to i16 658 %3 = bitcast i16 %tmp1.0.extract.trunc to half 659 %4 = bitcast float %c.coerce to i32 660 %tmp2.0.extract.trunc = trunc i32 %4 to i16 661 %5 = bitcast i16 %tmp2.0.extract.trunc to half 662 %add = fmul half %1, %3 663 %add2 = fsub half %add, %5 664 %6 = bitcast half %add2 to i16 665 %tmp4.0.insert.ext = zext i16 %6 to i32 666 %7 = bitcast i32 %tmp4.0.insert.ext to float 667 ret float %7 668 669; CHECK-LABEL: VNMLS: 670; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 671; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 672} 673 674; 27. VNMUL 675define float @NMul(float %a.coerce, float %b.coerce) { 676entry: 677 %0 = bitcast float %a.coerce to i32 678 %tmp.0.extract.trunc = trunc i32 %0 to i16 679 %1 = bitcast i16 %tmp.0.extract.trunc to half 680 %2 = bitcast float %b.coerce to i32 681 %tmp1.0.extract.trunc = trunc i32 %2 to i16 682 %3 = bitcast i16 %tmp1.0.extract.trunc to half 683 %add = fmul half %1, %3 684 %add2 = fsub half -0.0, %add 685 %4 = bitcast half %add2 to i16 686 %tmp4.0.insert.ext = zext i16 %4 to i32 687 %5 = bitcast i32 %tmp4.0.insert.ext to float 688 ret float %5 689 690; CHECK-LABEL: NMul: 691; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 692} 693 694; 35. VSELEQ 695define half @select_cc1(ptr %a0) { 696 %1 = load half, ptr %a0 697 %2 = fcmp nsz oeq half %1, 0xH0001 698 %3 = select i1 %2, half 0xHC000, half 0xH0002 699 ret half %3 700 701; CHECK-LABEL: select_cc1: 702 703; CHECK-HARDFP-FULLFP16: vcmp.f16 704; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 705; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, 706 707; CHECK-SOFTFP-FP16-A32: vcmp.f32 708; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 709; CHECK-SOFTFP-FP16-A32-NEXT: movne r0, 710 711; CHECK-SOFTFP-FP16-T32: vcmp.f32 712; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 713; CHECK-SOFTFP-FP16-T32-NEXT: itt eq 714; CHECK-SOFTFP-FP16-T32-NEXT: movweq r0, 715; CHECK-SOFTFP-FP16-T32-NEXT: movteq r0, 716} 717 718; FIXME: more tests need to be added for VSELGE and VSELGT. 719; That is, more combinations of immediate operands that can or can't 720; be encoded as an FP16 immediate need to be added here. 721; 722; 36. VSELGE 723define half @select_cc_ge1(ptr %a0) { 724 %1 = load half, ptr %a0 725 %2 = fcmp nsz oge half %1, 0xH0001 726 %3 = select i1 %2, half 0xHC000, half 0xH0002 727 ret half %3 728 729; CHECK-LABEL: select_cc_ge1: 730 731; CHECK-HARDFP-FULLFP16: vcmp.f16 732; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 733; CHECK-HARDFP-FULLFP16: vselge.f16 s0, 734 735; CHECK-SOFTFP-FP16-A32: vcmp.f32 736; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 737; CHECK-SOFTFP-FP16-A32-NEXT: movlt r0, 738 739; CHECK-SOFTFP-FP16-T32: vcmp.f32 740; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 741; CHECK-SOFTFP-FP16-T32-NEXT: itt ge 742; CHECK-SOFTFP-FP16-T32-NEXT: movwge r0, 743; CHECK-SOFTFP-FP16-T32-NEXT: movtge r0, 744} 745 746define half @select_cc_ge2(ptr %a0) { 747 %1 = load half, ptr %a0 748 %2 = fcmp nsz ole half %1, 0xH0001 749 %3 = select i1 %2, half 0xHC000, half 0xH0002 750 ret half %3 751 752; CHECK-LABEL: select_cc_ge2: 753 754; CHECK-HARDFP-FULLFP16: vcmp.f16 755; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 756; CHECK-HARDFP-FULLFP16: vselge.f16 s0, 757 758; CHECK-SOFTFP-FP16-A32: vcmp.f32 759; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 760; CHECK-SOFTFP-FP16-A32-NEXT: movhi r0, 761 762; CHECK-SOFTFP-FP16-T32: vcmp.f32 763; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 764; CHECK-SOFTFP-FP16-T32-NEXT: itt ls 765; CHECK-SOFTFP-FP16-T32-NEXT: movwls r0, 766; CHECK-SOFTFP-FP16-T32-NEXT: movtls r0, 767} 768 769define half @select_cc_ge3(ptr %a0) { 770 %1 = load half, ptr %a0 771 %2 = fcmp nsz ugt half %1, 0xH0001 772 %3 = select i1 %2, half 0xHC000, half 0xH0002 773 ret half %3 774 775; CHECK-LABEL: select_cc_ge3: 776 777; CHECK-HARDFP-FULLFP16: vcmp.f16 778; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 779; CHECK-HARDFP-FULLFP16: vselge.f16 s0, 780 781; CHECK-SOFTFP-FP16-A32: vcmp.f32 782; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 783; CHECK-SOFTFP-FP16-A32-NEXT: movls r0, 784 785; CHECK-SOFTFP-FP16-T32: vcmp.f32 786; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 787; CHECK-SOFTFP-FP16-T32-NEXT: itt hi 788; CHECK-SOFTFP-FP16-T32-NEXT: movwhi r0, 789; CHECK-SOFTFP-FP16-T32-NEXT: movthi r0, 790} 791 792define half @select_cc_ge4(ptr %a0) { 793 %1 = load half, ptr %a0 794 %2 = fcmp nsz ult half %1, 0xH0001 795 %3 = select i1 %2, half 0xHC000, half 0xH0002 796 ret half %3 797 798; CHECK-LABEL: select_cc_ge4: 799 800; CHECK-HARDFP-FULLFP16: vcmp.f16 801; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 802; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}} 803 804; CHECK-SOFTFP-FP16-A32: vcmp.f32 805; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 806; CHECK-SOFTFP-FP16-A32-NEXT: movge r0, 807 808; CHECK-SOFTFP-FP16-T32: vcmp.f32 809; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 810; CHECK-SOFTFP-FP16-T32-NEXT: itt lt 811; CHECK-SOFTFP-FP16-T32-NEXT: movwlt r0, 812; CHECK-SOFTFP-FP16-T32-NEXT: movtlt r0, 813} 814 815; 37. VSELGT 816define half @select_cc_gt1(ptr %a0) { 817 %1 = load half, ptr %a0 818 %2 = fcmp nsz ogt half %1, 0xH0001 819 %3 = select i1 %2, half 0xHC000, half 0xH0002 820 ret half %3 821 822; CHECK-LABEL: select_cc_gt1: 823 824; CHECK-HARDFP-FULLFP16: vcmp.f16 825; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 826; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} 827 828; CHECK-SOFTFP-FP16-A32: vcmp.f32 829; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 830; CHECK-SOFTFP-FP16-A32-NEXT: movle r0, 831 832; CHECK-SOFTFP-FP16-T32: vcmp.f32 833; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 834; CHECK-SOFTFP-FP16-T32-NEXT: itt gt 835; CHECK-SOFTFP-FP16-T32-NEXT: movwgt r0, 836; CHECK-SOFTFP-FP16-T32-NEXT: movtgt r0, 837} 838 839define half @select_cc_gt2(ptr %a0) { 840 %1 = load half, ptr %a0 841 %2 = fcmp nsz uge half %1, 0xH0001 842 %3 = select i1 %2, half 0xHC000, half 0xH0002 843 ret half %3 844 845; CHECK-LABEL: select_cc_gt2: 846 847; CHECK-HARDFP-FULLFP16: vcmp.f16 848; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 849; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} 850 851; CHECK-SOFTFP-FP16-A32: vcmp.f32 852; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 853; CHECK-SOFTFP-FP16-A32-NEXT: movmi r0, 854 855; CHECK-SOFTFP-FP16-T32: vcmp.f32 856; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 857; CHECK-SOFTFP-FP16-T32-NEXT: itt pl 858; CHECK-SOFTFP-FP16-T32-NEXT: movwpl r0, 859; CHECK-SOFTFP-FP16-T32-NEXT: movtpl r0, 860} 861 862define half @select_cc_gt3(ptr %a0) { 863 %1 = load half, ptr %a0 864 %2 = fcmp nsz ule half %1, 0xH0001 865 %3 = select i1 %2, half 0xHC000, half 0xH0002 866 ret half %3 867 868; CHECK-LABEL: select_cc_gt3: 869 870; CHECK-HARDFP-FULLFP16: vcmp.f16 871; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 872; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} 873 874; CHECK-SOFTFP-FP16-A32: vcmp.f32 875; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 876; CHECK-SOFTFP-FP16-A32-NEXT: movgt r0, 877 878; CHECK-SOFTFP-FP16-T32: vcmp.f32 879; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 880; CHECK-SOFTFP-FP16-T32-NEXT: itt le 881; CHECK-SOFTFP-FP16-T32-NEXT: movwle r0, 882; CHECK-SOFTFP-FP16-T32-NEXT: movtle r0, 883} 884 885define half @select_cc_gt4(ptr %a0) { 886 %1 = load half, ptr %a0 887 %2 = fcmp nsz olt half %1, 0xH0001 888 %3 = select i1 %2, half 0xHC000, half 0xH0002 889 ret half %3 890 891; CHECK-LABEL: select_cc_gt4: 892 893; CHECK-HARDFP-FULLFP16: vcmp.f16 894; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 895; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} 896 897; CHECK-SOFTFP-FP16-A32: vcmp.f32 898; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 899; CHECK-SOFTFP-FP16-A32-NEXT: movpl r0, 900 901; CHECK-SOFTFP-FP16-T32: vcmp.f32 902; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 903; CHECK-SOFTFP-FP16-T32-NEXT: itt mi 904; CHECK-SOFTFP-FP16-T32-NEXT: movwmi r0, 905; CHECK-SOFTFP-FP16-T32-NEXT: movtmi r0, 906} 907 908; 38. VSELVS 909define float @select_cc4(float %a.coerce) { 910entry: 911 %0 = bitcast float %a.coerce to i32 912 %tmp.0.extract.trunc = trunc i32 %0 to i16 913 %1 = bitcast i16 %tmp.0.extract.trunc to half 914 915 %2 = fcmp nsz ueq half %1, 0xH0001 916 %3 = select i1 %2, half 0xHC000, half 0xH0002 917 918 %4 = bitcast half %3 to i16 919 %tmp4.0.insert.ext = zext i16 %4 to i32 920 %5 = bitcast i32 %tmp4.0.insert.ext to float 921 ret float %5 922 923; CHECK-LABEL: select_cc4: 924 925; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} 926; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] 927; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} 928; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr 929; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 930; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] 931; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] 932 933; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 934; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} 935; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] 936; CHECK-SOFTFP-FP16-A32: ldr r1, .LCP{{.*}} 937; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 938; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr 939; CHECK-SOFTFP-FP16-A32: mov r0, r1 940; CHECK-SOFTFP-FP16-A32-NEXT: movne r0, #2 941; CHECK-SOFTFP-FP16-A32-NEXT: movvs r0, r1 942 943; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 944; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} 945; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] 946; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 947; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 948; CHECK-SOFTFP-FP16-T32: itt eq 949; CHECK-SOFTFP-FP16-T32-NEXT: movweq r1, 950; CHECK-SOFTFP-FP16-T32-NEXT: movteq r1, 951; CHECK-SOFTFP-FP16-T32-NEXT: itt vs 952; CHECK-SOFTFP-FP16-T32-NEXT: movwvs r1, 953; CHECK-SOFTFP-FP16-T32-NEXT: movtvs r1, 954; CHECK-SOFTFP-FP16-T32-NEXT: uxth r0, r1 955} 956 957; 40. VSUB 958define float @Sub(float %a.coerce, float %b.coerce) { 959entry: 960 %0 = bitcast float %a.coerce to i32 961 %tmp.0.extract.trunc = trunc i32 %0 to i16 962 %1 = bitcast i16 %tmp.0.extract.trunc to half 963 %2 = bitcast float %b.coerce to i32 964 %tmp1.0.extract.trunc = trunc i32 %2 to i16 965 %3 = bitcast i16 %tmp1.0.extract.trunc to half 966 %add = fsub half %1, %3 967 %4 = bitcast half %add to i16 968 %tmp4.0.insert.ext = zext i16 %4 to i32 969 %5 = bitcast i32 %tmp4.0.insert.ext to float 970 ret float %5 971 972; CHECK-LABEL: Sub: 973 974; CHECK-SOFT: bl __aeabi_h2f 975; CHECK-SOFT: bl __aeabi_h2f 976; CHECK-SOFT: bl __aeabi_fsub 977; CHECK-SOFT: bl __aeabi_f2h 978 979; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 980; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 981; CHECK-SOFTFP-VFP3: vsub.f32 982; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 983 984; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 985; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 986; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 987; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 988; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 989; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 990; CHECK-SOFTFP-FP16: vmov r0, s0 991 992; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 993; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 994; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 995; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 996 997; CHECK-HARDFP-VFP3: vmov r 998; CHECK-HARDFP-VFP3: vmov.f32 s 999; CHECK-HARDFP-VFP3: bl __aeabi_h2f 1000; CHECK-HARDFP-VFP3: bl __aeabi_h2f 1001; CHECK-HARDFP-VFP3: vsub.f32 1002; CHECK-HARDFP-VFP3: bl __aeabi_f2h 1003; CHECK-HARDFP-VFP3: vmov s0, r0 1004 1005; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 1006; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 1007; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 1008; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 1009 1010; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 1011} 1012 1013; Check for VSTRH with a FCONSTH, this checks that addressing mode 1014; AddrMode5FP16 is supported. 1015define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 1016entry: 1017 %S = alloca half, align 2 1018 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 1019 %0 = bitcast i16 %tmp.0.extract.trunc to half 1020 store volatile half 0xH3C00, ptr %S, align 2 1021 %S.0.S.0. = load volatile half, ptr %S, align 2 1022 %add = fadd half %S.0.S.0., %0 1023 %1 = bitcast half %add to i16 1024 %tmp2.0.insert.ext = zext i16 %1 to i32 1025 ret i32 %tmp2.0.insert.ext 1026 1027; CHECK-LABEL: ThumbAddrMode5FP16 1028 1029; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 1030; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 1031; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 1032; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 1033; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 1034} 1035 1036; Test function calls to check store/load reg to/from stack 1037define i32 @fn1() { 1038entry: 1039 %coerce = alloca half, align 2 1040 %tmp2 = alloca i32, align 4 1041 store half 0xH7C00, ptr %coerce, align 2 1042 %0 = load i32, ptr %tmp2, align 4 1043 %call = call i32 @fn2(i32 %0) 1044 store half 0xH7C00, ptr %coerce, align 2 1045 %1 = load i32, ptr %tmp2, align 4 1046 %call3 = call i32 @fn3(i32 %1) 1047 ret i32 %call3 1048 1049; CHECK-SPILL-RELOAD-LABEL: fn1: 1050; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 1051; CHECK-SPILL-RELOAD: bl fn2 1052; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 1053} 1054 1055declare dso_local i32 @fn2(...) 1056declare dso_local i32 @fn3(...) 1057