1; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL 2; RUN: llc -asm-verbose=false < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP 3; RUN: llc -asm-verbose=false < %s -mattr=-fpregs | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL 4 5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" 6target triple = "armv7---eabihf" 7 8; CHECK-ALL-LABEL: test_fadd: 9; CHECK-FP16: vcvtb.f32.f16 10; CHECK-FP16: vcvtb.f32.f16 11; CHECK-LIBCALL: bl __aeabi_h2f 12; CHECK-LIBCALL: bl __aeabi_h2f 13; CHECK-VFP: vadd.f32 14; CHECK-NOVFP: bl __aeabi_fadd 15; CHECK-FP16: vcvtb.f16.f32 16; CHECK-LIBCALL: bl __aeabi_f2h 17define void @test_fadd(ptr %p, ptr %q) #0 { 18 %a = load half, ptr %p, align 2 19 %b = load half, ptr %q, align 2 20 %r = fadd half %a, %b 21 store half %r, ptr %p 22 ret void 23} 24 25; CHECK-ALL-LABEL: test_fsub: 26; CHECK-FP16: vcvtb.f32.f16 27; CHECK-FP16: vcvtb.f32.f16 28; CHECK-LIBCALL: bl __aeabi_h2f 29; CHECK-LIBCALL: bl __aeabi_h2f 30; CHECK-VFP: vsub.f32 31; CHECK-NOVFP: bl __aeabi_fsub 32; CHECK-FP16: vcvtb.f16.f32 33; CHECK-LIBCALL: bl __aeabi_f2h 34define void @test_fsub(ptr %p, ptr %q) #0 { 35 %a = load half, ptr %p, align 2 36 %b = load half, ptr %q, align 2 37 %r = fsub half %a, %b 38 store half %r, ptr %p 39 ret void 40} 41 42; CHECK-ALL-LABEL: test_fmul: 43; CHECK-FP16: vcvtb.f32.f16 44; CHECK-FP16: vcvtb.f32.f16 45; CHECK-LIBCALL: bl __aeabi_h2f 46; CHECK-LIBCALL: bl __aeabi_h2f 47; CHECK-VFP: vmul.f32 48; CHECK-NOVFP: bl __aeabi_fmul 49; CHECK-FP16: vcvtb.f16.f32 50; CHECK-LIBCALL: bl __aeabi_f2h 51define void @test_fmul(ptr %p, ptr %q) #0 { 52 %a = load half, ptr %p, align 2 53 %b = load half, ptr %q, align 2 54 %r = fmul half %a, %b 55 store half %r, ptr %p 56 ret void 57} 58 59; CHECK-ALL-LABEL: test_fdiv: 60; CHECK-FP16: vcvtb.f32.f16 61; CHECK-FP16: vcvtb.f32.f16 62; CHECK-LIBCALL: bl __aeabi_h2f 63; CHECK-LIBCALL: bl __aeabi_h2f 64; CHECK-VFP: vdiv.f32 65; CHECK-NOVFP: bl __aeabi_fdiv 66; CHECK-FP16: vcvtb.f16.f32 67; CHECK-LIBCALL: bl __aeabi_f2h 68define void @test_fdiv(ptr %p, ptr %q) #0 { 69 %a = load half, ptr %p, align 2 70 %b = load half, ptr %q, align 2 71 %r = fdiv half %a, %b 72 store half %r, ptr %p 73 ret void 74} 75 76; CHECK-ALL-LABEL: test_frem: 77; CHECK-FP16: vcvtb.f32.f16 78; CHECK-FP16: vcvtb.f32.f16 79; CHECK-LIBCALL: bl __aeabi_h2f 80; CHECK-LIBCALL: bl __aeabi_h2f 81; CHECK-LIBCALL: bl fmodf 82; CHECK-FP16: vcvtb.f16.f32 83; CHECK-LIBCALL: bl __aeabi_f2h 84define void @test_frem(ptr %p, ptr %q) #0 { 85 %a = load half, ptr %p, align 2 86 %b = load half, ptr %q, align 2 87 %r = frem half %a, %b 88 store half %r, ptr %p 89 ret void 90} 91 92; CHECK-ALL-LABEL: test_load_store: 93; CHECK-ALL-NEXT: .fnstart 94; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] 95; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] 96define void @test_load_store(ptr %p, ptr %q) #0 { 97 %a = load half, ptr %p, align 2 98 store half %a, ptr %q 99 ret void 100} 101 102; Testing only successfull compilation of function calls. In ARM ABI, half 103; args and returns are handled as f32. 104 105declare half @test_callee(half %a, half %b) #0 106 107; CHECK-ALL-LABEL: test_call: 108; CHECK-ALL-NEXT: .fnstart 109; CHECK-ALL-NEXT: .save {r11, lr} 110; CHECK-ALL-NEXT: push {r11, lr} 111; CHECK-ALL-NEXT: bl test_callee 112; CHECK-ALL-NEXT: pop {r11, pc} 113define half @test_call(half %a, half %b) #0 { 114 %r = call half @test_callee(half %a, half %b) 115 ret half %r 116} 117 118; CHECK-ALL-LABEL: test_call_flipped: 119; CHECK-ALL-NEXT: .fnstart 120; CHECK-ALL-NEXT: .save {r11, lr} 121; CHECK-ALL-NEXT: push {r11, lr} 122; CHECK-VFP-NEXT: vmov.f32 s2, s0 123; CHECK-VFP-NEXT: vmov.f32 s0, s1 124; CHECK-VFP-NEXT: vmov.f32 s1, s2 125; CHECK-NOVFP-NEXT: mov r2, r0 126; CHECK-NOVFP-NEXT: mov r0, r1 127; CHECK-NOVFP-NEXT: mov r1, r2 128; CHECK-ALL-NEXT: bl test_callee 129; CHECK-ALL-NEXT: pop {r11, pc} 130define half @test_call_flipped(half %a, half %b) #0 { 131 %r = call half @test_callee(half %b, half %a) 132 ret half %r 133} 134 135; CHECK-ALL-LABEL: test_tailcall_flipped: 136; CHECK-ALL-NEXT: .fnstart 137; CHECK-VFP-NEXT: vmov.f32 s2, s0 138; CHECK-VFP-NEXT: vmov.f32 s0, s1 139; CHECK-VFP-NEXT: vmov.f32 s1, s2 140; CHECK-NOVFP-NEXT: mov r2, r0 141; CHECK-NOVFP-NEXT: mov r0, r1 142; CHECK-NOVFP-NEXT: mov r1, r2 143; CHECK-ALL-NEXT: b test_callee 144define half @test_tailcall_flipped(half %a, half %b) #0 { 145 %r = tail call half @test_callee(half %b, half %a) 146 ret half %r 147} 148 149; Optimizer picks %p or %q based on %c and only loads that value 150; No conversion is needed 151; CHECK-ALL-LABEL: test_select: 152; CHECK-ALL: cmp {{r[0-9]+}}, #0 153; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}} 154; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] 155; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] 156define void @test_select(ptr %p, ptr %q, i1 zeroext %c) #0 { 157 %a = load half, ptr %p, align 2 158 %b = load half, ptr %q, align 2 159 %r = select i1 %c, half %a, half %b 160 store half %r, ptr %p 161 ret void 162} 163 164; Test only two variants of fcmp. These get translated to f32 vcmp 165; instructions anyway. 166; CHECK-ALL-LABEL: test_fcmp_une: 167; CHECK-FP16: vcvtb.f32.f16 168; CHECK-FP16: vcvtb.f32.f16 169; CHECK-LIBCALL: bl __aeabi_h2f 170; CHECK-LIBCALL: bl __aeabi_h2f 171; CHECK-VFP: vcmp.f32 172; CHECK-NOVFP: bl __aeabi_fcmpeq 173; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr 174; CHECK-VFP-NEXT: movwne 175; CHECK-NOVFP-NEXT: clz r0, r0 176; CHECK-NOVFP-NEXT: lsr r0, r0, #5 177define i1 @test_fcmp_une(ptr %p, ptr %q) #0 { 178 %a = load half, ptr %p, align 2 179 %b = load half, ptr %q, align 2 180 %r = fcmp une half %a, %b 181 ret i1 %r 182} 183 184; CHECK-ALL-LABEL: test_fcmp_ueq: 185; CHECK-FP16: vcvtb.f32.f16 186; CHECK-FP16: vcvtb.f32.f16 187; CHECK-LIBCALL: bl __aeabi_h2f 188; CHECK-LIBCALL: bl __aeabi_h2f 189; CHECK-VFP: vcmp.f32 190; CHECK-NOVFP: bl __aeabi_fcmpeq 191; CHECK-FP16: vmrs APSR_nzcv, fpscr 192; CHECK-LIBCALL: movw{{ne|eq}} 193define i1 @test_fcmp_ueq(ptr %p, ptr %q) #0 { 194 %a = load half, ptr %p, align 2 195 %b = load half, ptr %q, align 2 196 %r = fcmp ueq half %a, %b 197 ret i1 %r 198} 199 200; CHECK-ALL-LABEL: test_br_cc: 201; CHECK-FP16: vcvtb.f32.f16 202; CHECK-FP16: vcvtb.f32.f16 203; CHECK-LIBCALL: bl __aeabi_h2f 204; CHECK-LIBCALL: bl __aeabi_h2f 205; CHECK-VFP: vcmp.f32 206; CHECK-NOVFP: bl __aeabi_fcmplt 207; CHECK-FP16: vmrs APSR_nzcv, fpscr 208; CHECK-VFP: movmi 209; CHECK-VFP: str 210; CHECK-NOVFP: str 211define void @test_br_cc(ptr %p, ptr %q, ptr %p1, ptr %p2) #0 { 212 %a = load half, ptr %p, align 2 213 %b = load half, ptr %q, align 2 214 %c = fcmp uge half %a, %b 215 br i1 %c, label %then, label %else 216then: 217 store i32 0, ptr %p1 218 ret void 219else: 220 store i32 0, ptr %p2 221 ret void 222} 223 224declare i1 @test_dummy(ptr %p) #0 225; CHECK-ALL-LABEL: test_phi: 226; CHECK-FP16: [[LOOP:.LBB[0-9_]+]]: 227; CHECK-FP16: bl test_dummy 228; CHECK-FP16: bne [[LOOP]] 229; CHECK-LIBCALL: [[LOOP:.LBB[0-9_]+]]: 230; CHECK-LIBCALL: bl test_dummy 231; CHECK-LIBCALL: bne [[LOOP]] 232define void @test_phi(ptr %p) #0 { 233entry: 234 %a = load half, ptr %p 235 br label %loop 236loop: 237 %r = phi half [%a, %entry], [%b, %loop] 238 %b = load half, ptr %p 239 %c = call i1 @test_dummy(ptr %p) 240 br i1 %c, label %loop, label %return 241return: 242 store half %r, ptr %p 243 ret void 244} 245 246; CHECK-ALL-LABEL: test_fptosi_i32: 247; CHECK-FP16: vcvtb.f32.f16 248; CHECK-LIBCALL: bl __aeabi_h2f 249; CHECK-VFP: vcvt.s32.f32 250; CHECK-NOVFP: bl __aeabi_f2iz 251define i32 @test_fptosi_i32(ptr %p) #0 { 252 %a = load half, ptr %p, align 2 253 %r = fptosi half %a to i32 254 ret i32 %r 255} 256 257; CHECK-ALL-LABEL: test_fptosi_i64: 258; CHECK-FP16: vcvtb.f32.f16 259; CHECK-LIBCALL: bl __aeabi_h2f 260; CHECK-ALL: bl __aeabi_f2lz 261define i64 @test_fptosi_i64(ptr %p) #0 { 262 %a = load half, ptr %p, align 2 263 %r = fptosi half %a to i64 264 ret i64 %r 265} 266 267; CHECK-ALL-LABEL: test_fptoui_i32: 268; CHECK-FP16: vcvtb.f32.f16 269; CHECK-LIBCALL: bl __aeabi_h2f 270; CHECK-VFP: vcvt.u32.f32 271; CHECK-NOVFP: bl __aeabi_f2uiz 272define i32 @test_fptoui_i32(ptr %p) #0 { 273 %a = load half, ptr %p, align 2 274 %r = fptoui half %a to i32 275 ret i32 %r 276} 277 278; CHECK-ALL-LABEL: test_fptoui_i64: 279; CHECK-FP16: vcvtb.f32.f16 280; CHECK-LIBCALL: bl __aeabi_h2f 281; CHECK-ALL: bl __aeabi_f2ulz 282define i64 @test_fptoui_i64(ptr %p) #0 { 283 %a = load half, ptr %p, align 2 284 %r = fptoui half %a to i64 285 ret i64 %r 286} 287 288; CHECK-ALL-LABEL: test_sitofp_i32: 289; CHECK-VFP: vcvt.f32.s32 290; CHECK-NOVFP: bl __aeabi_i2f 291; CHECK-FP16: vcvtb.f16.f32 292; CHECK-LIBCALL: bl __aeabi_f2h 293define void @test_sitofp_i32(i32 %a, ptr %p) #0 { 294 %r = sitofp i32 %a to half 295 store half %r, ptr %p 296 ret void 297} 298 299; CHECK-ALL-LABEL: test_uitofp_i32: 300; CHECK-VFP: vcvt.f32.u32 301; CHECK-NOVFP: bl __aeabi_ui2f 302; CHECK-FP16: vcvtb.f16.f32 303; CHECK-LIBCALL: bl __aeabi_f2h 304define void @test_uitofp_i32(i32 %a, ptr %p) #0 { 305 %r = uitofp i32 %a to half 306 store half %r, ptr %p 307 ret void 308} 309 310; CHECK-ALL-LABEL: test_sitofp_i64: 311; CHECK-ALL: bl __aeabi_l2f 312; CHECK-FP16: vcvtb.f16.f32 313; CHECK-LIBCALL: bl __aeabi_f2h 314define void @test_sitofp_i64(i64 %a, ptr %p) #0 { 315 %r = sitofp i64 %a to half 316 store half %r, ptr %p 317 ret void 318} 319 320; CHECK-ALL-LABEL: test_uitofp_i64: 321; CHECK-ALL: bl __aeabi_ul2f 322; CHECK-FP16: vcvtb.f16.f32 323; CHECK-LIBCALL: bl __aeabi_f2h 324define void @test_uitofp_i64(i64 %a, ptr %p) #0 { 325 %r = uitofp i64 %a to half 326 store half %r, ptr %p 327 ret void 328} 329 330; CHECK-FP16-LABEL: test_fptrunc_float: 331; CHECK-FP16: vcvtb.f16.f32 332; CHECK-LIBCALL-LABEL: test_fptrunc_float: 333; CHECK-LIBCALL: bl __aeabi_f2h 334define void @test_fptrunc_float(float %f, ptr %p) #0 { 335 %a = fptrunc float %f to half 336 store half %a, ptr %p 337 ret void 338} 339 340; CHECK-FP16-LABEL: test_fptrunc_double: 341; CHECK-FP16: bl __aeabi_d2h 342; CHECK-LIBCALL-LABEL: test_fptrunc_double: 343; CHECK-LIBCALL: bl __aeabi_d2h 344define void @test_fptrunc_double(double %d, ptr %p) #0 { 345 %a = fptrunc double %d to half 346 store half %a, ptr %p 347 ret void 348} 349 350; CHECK-FP16-LABEL: test_fpextend_float: 351; CHECK-FP16: vcvtb.f32.f16 352; CHECK-LIBCALL-LABEL: test_fpextend_float: 353; CHECK-LIBCALL: bl __aeabi_h2f 354define float @test_fpextend_float(ptr %p) { 355 %a = load half, ptr %p, align 2 356 %r = fpext half %a to float 357 ret float %r 358} 359 360; CHECK-FP16-LABEL: test_fpextend_double: 361; CHECK-FP16: vcvtb.f32.f16 362; CHECK-LIBCALL-LABEL: test_fpextend_double: 363; CHECK-LIBCALL: bl __aeabi_h2f 364; CHECK-VFP: vcvt.f64.f32 365; CHECK-NOVFP: bl __aeabi_f2d 366define double @test_fpextend_double(ptr %p) { 367 %a = load half, ptr %p, align 2 368 %r = fpext half %a to double 369 ret double %r 370} 371 372; CHECK-ALL-LABEL: test_bitcast_halftoi16: 373; CHECK-ALL-NEXT: .fnstart 374; CHECK-ALL-NEXT: ldrh r0, [r0] 375; CHECK-ALL-NEXT: bx lr 376define i16 @test_bitcast_halftoi16(ptr %p) #0 { 377 %a = load half, ptr %p, align 2 378 %r = bitcast half %a to i16 379 ret i16 %r 380} 381 382; CHECK-ALL-LABEL: test_bitcast_i16tohalf: 383; CHECK-ALL-NEXT: .fnstart 384; CHECK-ALL-NEXT: strh r0, [r1] 385; CHECK-ALL-NEXT: bx lr 386define void @test_bitcast_i16tohalf(i16 %a, ptr %p) #0 { 387 %r = bitcast i16 %a to half 388 store half %r, ptr %p 389 ret void 390} 391 392declare half @llvm.sqrt.f16(half %a) #0 393declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 394declare half @llvm.sin.f16(half %a) #0 395declare half @llvm.cos.f16(half %a) #0 396declare half @llvm.tan.f16(half %a) #0 397declare half @llvm.pow.f16(half %a, half %b) #0 398declare half @llvm.exp.f16(half %a) #0 399declare half @llvm.exp2.f16(half %a) #0 400declare half @llvm.log.f16(half %a) #0 401declare half @llvm.log10.f16(half %a) #0 402declare half @llvm.log2.f16(half %a) #0 403declare half @llvm.fma.f16(half %a, half %b, half %c) #0 404declare half @llvm.fabs.f16(half %a) #0 405declare half @llvm.minnum.f16(half %a, half %b) #0 406declare half @llvm.maxnum.f16(half %a, half %b) #0 407declare half @llvm.copysign.f16(half %a, half %b) #0 408declare half @llvm.floor.f16(half %a) #0 409declare half @llvm.ceil.f16(half %a) #0 410declare half @llvm.trunc.f16(half %a) #0 411declare half @llvm.rint.f16(half %a) #0 412declare half @llvm.nearbyint.f16(half %a) #0 413declare half @llvm.round.f16(half %a) #0 414declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 415 416; CHECK-ALL-LABEL: test_sqrt: 417; CHECK-FP16: vcvtb.f32.f16 418; CHECK-FP16: vsqrt.f32 419; CHECK-FP16: vcvtb.f16.f32 420; CHECK-LIBCALL: bl __aeabi_h2f 421; CHECK-LIBCALL-VFP: vsqrt.f32 422; CHECK-NOVFP: bl sqrtf 423; CHECK-LIBCALL: bl __aeabi_f2h 424define void @test_sqrt(ptr %p) #0 { 425 %a = load half, ptr %p, align 2 426 %r = call half @llvm.sqrt.f16(half %a) 427 store half %r, ptr %p 428 ret void 429} 430 431; CHECK-FP16-LABEL: test_fpowi: 432; CHECK-FP16: vcvtb.f32.f16 433; CHECK-FP16: bl __powisf2 434; CHECK-FP16: vcvtb.f16.f32 435; CHECK-LIBCALL-LABEL: test_fpowi: 436; CHECK-LIBCALL: bl __aeabi_h2f 437; CHECK-LIBCALL: bl __powisf2 438; CHECK-LIBCALL: bl __aeabi_f2h 439define void @test_fpowi(ptr %p, i32 %b) #0 { 440 %a = load half, ptr %p, align 2 441 %r = call half @llvm.powi.f16.i32(half %a, i32 %b) 442 store half %r, ptr %p 443 ret void 444} 445 446; CHECK-FP16-LABEL: test_sin: 447; CHECK-FP16: vcvtb.f32.f16 448; CHECK-FP16: bl sinf 449; CHECK-FP16: vcvtb.f16.f32 450; CHECK-LIBCALL-LABEL: test_sin: 451; CHECK-LIBCALL: bl __aeabi_h2f 452; CHECK-LIBCALL: bl sinf 453; CHECK-LIBCALL: bl __aeabi_f2h 454define void @test_sin(ptr %p) #0 { 455 %a = load half, ptr %p, align 2 456 %r = call half @llvm.sin.f16(half %a) 457 store half %r, ptr %p 458 ret void 459} 460 461; CHECK-FP16-LABEL: test_cos: 462; CHECK-FP16: vcvtb.f32.f16 463; CHECK-FP16: bl cosf 464; CHECK-FP16: vcvtb.f16.f32 465; CHECK-LIBCALL-LABEL: test_cos: 466; CHECK-LIBCALL: bl __aeabi_h2f 467; CHECK-LIBCALL: bl cosf 468; CHECK-LIBCALL: bl __aeabi_f2h 469define void @test_cos(ptr %p) #0 { 470 %a = load half, ptr %p, align 2 471 %r = call half @llvm.cos.f16(half %a) 472 store half %r, ptr %p 473 ret void 474} 475 476; CHECK-FP16-LABEL: test_tan: 477; CHECK-FP16: vcvtb.f32.f16 478; CHECK-FP16: bl tanf 479; CHECK-FP16: vcvtb.f16.f32 480; CHECK-LIBCALL-LABEL: test_tan: 481; CHECK-LIBCALL: bl __aeabi_h2f 482; CHECK-LIBCALL: bl tanf 483; CHECK-LIBCALL: bl __aeabi_f2h 484define void @test_tan(ptr %p) #0 { 485 %a = load half, ptr %p, align 2 486 %r = call half @llvm.tan.f16(half %a) 487 store half %r, ptr %p 488 ret void 489} 490 491; CHECK-FP16-LABEL: test_pow: 492; CHECK-FP16: vcvtb.f32.f16 493; CHECK-FP16: vcvtb.f32.f16 494; CHECK-FP16: bl powf 495; CHECK-FP16: vcvtb.f16.f32 496; CHECK-LIBCALL-LABEL: test_pow: 497; CHECK-LIBCALL: bl __aeabi_h2f 498; CHECK-LIBCALL: bl __aeabi_h2f 499; CHECK-LIBCALL: bl powf 500; CHECK-LIBCALL: bl __aeabi_f2h 501define void @test_pow(ptr %p, ptr %q) #0 { 502 %a = load half, ptr %p, align 2 503 %b = load half, ptr %q, align 2 504 %r = call half @llvm.pow.f16(half %a, half %b) 505 store half %r, ptr %p 506 ret void 507} 508 509; CHECK-FP16-LABEL: test_cbrt: 510; CHECK-FP16: vcvtb.f32.f16 511; CHECK-FP16: bl powf 512; CHECK-FP16: vcvtb.f16.f32 513; CHECK-LIBCALL-LABEL: test_cbrt: 514; CHECK-LIBCALL: bl __aeabi_h2f 515; CHECK-LIBCALL: bl powf 516; CHECK-LIBCALL: bl __aeabi_f2h 517define void @test_cbrt(ptr %p) #0 { 518 %a = load half, ptr %p, align 2 519 %r = call half @llvm.pow.f16(half %a, half 0x3FD5540000000000) 520 store half %r, ptr %p 521 ret void 522} 523 524; CHECK-FP16-LABEL: test_exp: 525; CHECK-FP16: vcvtb.f32.f16 526; CHECK-FP16: bl expf 527; CHECK-FP16: vcvtb.f16.f32 528; CHECK-LIBCALL-LABEL: test_exp: 529; CHECK-LIBCALL: bl __aeabi_h2f 530; CHECK-LIBCALL: bl expf 531; CHECK-LIBCALL: bl __aeabi_f2h 532define void @test_exp(ptr %p) #0 { 533 %a = load half, ptr %p, align 2 534 %r = call half @llvm.exp.f16(half %a) 535 store half %r, ptr %p 536 ret void 537} 538 539; CHECK-FP16-LABEL: test_exp2: 540; CHECK-FP16: vcvtb.f32.f16 541; CHECK-FP16: bl exp2f 542; CHECK-FP16: vcvtb.f16.f32 543; CHECK-LIBCALL-LABEL: test_exp2: 544; CHECK-LIBCALL: bl __aeabi_h2f 545; CHECK-LIBCALL: bl exp2f 546; CHECK-LIBCALL: bl __aeabi_f2h 547define void @test_exp2(ptr %p) #0 { 548 %a = load half, ptr %p, align 2 549 %r = call half @llvm.exp2.f16(half %a) 550 store half %r, ptr %p 551 ret void 552} 553 554; CHECK-FP16-LABEL: test_log: 555; CHECK-FP16: vcvtb.f32.f16 556; CHECK-FP16: bl logf 557; CHECK-FP16: vcvtb.f16.f32 558; CHECK-LIBCALL-LABEL: test_log: 559; CHECK-LIBCALL: bl __aeabi_h2f 560; CHECK-LIBCALL: bl logf 561; CHECK-LIBCALL: bl __aeabi_f2h 562define void @test_log(ptr %p) #0 { 563 %a = load half, ptr %p, align 2 564 %r = call half @llvm.log.f16(half %a) 565 store half %r, ptr %p 566 ret void 567} 568 569; CHECK-FP16-LABEL: test_log10: 570; CHECK-FP16: vcvtb.f32.f16 571; CHECK-FP16: bl log10f 572; CHECK-FP16: vcvtb.f16.f32 573; CHECK-LIBCALL-LABEL: test_log10: 574; CHECK-LIBCALL: bl __aeabi_h2f 575; CHECK-LIBCALL: bl log10f 576; CHECK-LIBCALL: bl __aeabi_f2h 577define void @test_log10(ptr %p) #0 { 578 %a = load half, ptr %p, align 2 579 %r = call half @llvm.log10.f16(half %a) 580 store half %r, ptr %p 581 ret void 582} 583 584; CHECK-FP16-LABEL: test_log2: 585; CHECK-FP16: vcvtb.f32.f16 586; CHECK-FP16: bl log2f 587; CHECK-FP16: vcvtb.f16.f32 588; CHECK-LIBCALL-LABEL: test_log2: 589; CHECK-LIBCALL: bl __aeabi_h2f 590; CHECK-LIBCALL: bl log2f 591; CHECK-LIBCALL: bl __aeabi_f2h 592define void @test_log2(ptr %p) #0 { 593 %a = load half, ptr %p, align 2 594 %r = call half @llvm.log2.f16(half %a) 595 store half %r, ptr %p 596 ret void 597} 598 599; CHECK-FP16-LABEL: test_fma: 600; CHECK-FP16: vcvtb.f32.f16 601; CHECK-FP16: vcvtb.f32.f16 602; CHECK-FP16: vcvtb.f32.f16 603; CHECK-FP16: bl fmaf 604; CHECK-FP16: vcvtb.f16.f32 605; CHECK-LIBCALL-LABEL: test_fma: 606; CHECK-LIBCALL: bl __aeabi_h2f 607; CHECK-LIBCALL: bl __aeabi_h2f 608; CHECK-LIBCALL: bl __aeabi_h2f 609; CHECK-LIBCALL: bl fmaf 610; CHECK-LIBCALL: bl __aeabi_f2h 611define void @test_fma(ptr %p, ptr %q, ptr %r) #0 { 612 %a = load half, ptr %p, align 2 613 %b = load half, ptr %q, align 2 614 %c = load half, ptr %r, align 2 615 %v = call half @llvm.fma.f16(half %a, half %b, half %c) 616 store half %v, ptr %p 617 ret void 618} 619 620; CHECK-FP16-LABEL: test_fabs: 621; CHECK-FP16: vcvtb.f32.f16 622; CHECK-FP16: vabs.f32 623; CHECK-FP16: vcvtb.f16.f32 624; CHECK-LIBCALL-LABEL: test_fabs: 625; CHECK-LIBCALL: bl __aeabi_h2f 626; CHECK-LIBCALL: bic 627; CHECK-LIBCALL: bl __aeabi_f2h 628define void @test_fabs(ptr %p) { 629 %a = load half, ptr %p, align 2 630 %r = call half @llvm.fabs.f16(half %a) 631 store half %r, ptr %p 632 ret void 633} 634 635; CHECK-FP16-LABEL: test_minnum: 636; CHECK-FP16: vcvtb.f32.f16 637; CHECK-FP16: vcvtb.f32.f16 638; CHECK-FP16: bl fminf 639; CHECK-FP16: vcvtb.f16.f32 640; CHECK-LIBCALL-LABEL: test_minnum: 641; CHECK-LIBCALL: bl __aeabi_h2f 642; CHECK-LIBCALL: bl __aeabi_h2f 643; CHECK-LIBCALL: bl fminf 644; CHECK-LIBCALL: bl __aeabi_f2h 645define void @test_minnum(ptr %p, ptr %q) #0 { 646 %a = load half, ptr %p, align 2 647 %b = load half, ptr %q, align 2 648 %r = call half @llvm.minnum.f16(half %a, half %b) 649 store half %r, ptr %p 650 ret void 651} 652 653; CHECK-FP16-LABEL: test_maxnum: 654; CHECK-FP16: vcvtb.f32.f16 655; CHECK-FP16: vcvtb.f32.f16 656; CHECK-FP16: bl fmaxf 657; CHECK-FP16: vcvtb.f16.f32 658; CHECK-LIBCALL-LABEL: test_maxnum: 659; CHECK-LIBCALL: bl __aeabi_h2f 660; CHECK-LIBCALL: bl __aeabi_h2f 661; CHECK-LIBCALL: bl fmaxf 662; CHECK-LIBCALL: bl __aeabi_f2h 663define void @test_maxnum(ptr %p, ptr %q) #0 { 664 %a = load half, ptr %p, align 2 665 %b = load half, ptr %q, align 2 666 %r = call half @llvm.maxnum.f16(half %a, half %b) 667 store half %r, ptr %p 668 ret void 669} 670 671; CHECK-ALL-LABEL: test_minimum: 672; CHECK-FP16: vmov.f32 s0, #1.000000e+00 673; CHECK-FP16: vcvtb.f32.f16 674; CHECK-LIBCALL: bl __aeabi_h2f 675; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00 676; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 677; CHECK-VFP: vcmp.f32 678; CHECK-VFP: vmrs 679; CHECK-VFP: movge 680; CHECK-NOVFP: bl __aeabi_fcmpge 681define void @test_minimum(ptr %p) #0 { 682 %a = load half, ptr %p, align 2 683 %c = fcmp ult half %a, 1.0 684 %r = select i1 %c, half %a, half 1.0 685 store half %r, ptr %p 686 ret void 687} 688 689; CHECK-ALL-LABEL: test_maximum: 690; CHECK-FP16: vmov.f32 s0, #1.000000e+00 691; CHECK-FP16: vcvtb.f32.f16 692; CHECK-LIBCALL: bl __aeabi_h2f 693; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00 694; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 695; CHECK-VFP: vcmp.f32 696; CHECK-VFP: vmrs 697; CHECK-VFP: movls 698; CHECK-NOVFP: bl __aeabi_fcmple 699define void @test_maximum(ptr %p) #0 { 700 %a = load half, ptr %p, align 2 701 %c = fcmp ugt half %a, 1.0 702 %r = select i1 %c, half %a, half 1.0 703 store half %r, ptr %p 704 ret void 705} 706 707; CHECK-ALL-LABEL: test_copysign: 708; CHECK-ALL: ldrh r2, [r0] 709; CHECK-ALL-NEXT: ldrh r1, [r1] 710; CHECK-ALL-NEXT: and r1, r1, #32768 711; CHECK-ALL-NEXT: bfc r2, #15, #17 712; CHECK-ALL-NEXT: orr r1, r2, r1 713; CHECK-ALL-NEXT: strh r1, [r0] 714; CHECK-ALL-NEXT: bx lr 715 716define void @test_copysign(ptr %p, ptr %q) #0 { 717 %a = load half, ptr %p, align 2 718 %b = load half, ptr %q, align 2 719 %r = call half @llvm.copysign.f16(half %a, half %b) 720 store half %r, ptr %p 721 ret void 722} 723 724; CHECK-FP16-LABEL: test_floor: 725; CHECK-FP16: vcvtb.f32.f16 726; CHECK-FP16: bl floorf 727; CHECK-FP16: vcvtb.f16.f32 728; CHECK-LIBCALL-LABEL: test_floor: 729; CHECK-LIBCALL: bl __aeabi_h2f 730; CHECK-LIBCALL: bl floorf 731; CHECK-LIBCALL: bl __aeabi_f2h 732define void @test_floor(ptr %p) { 733 %a = load half, ptr %p, align 2 734 %r = call half @llvm.floor.f16(half %a) 735 store half %r, ptr %p 736 ret void 737} 738 739; CHECK-FP16-LABEL: test_ceil: 740; CHECK-FP16: vcvtb.f32.f16 741; CHECK-FP16: bl ceilf 742; CHECK-FP16: vcvtb.f16.f32 743; CHECK-LIBCALL-LABEL: test_ceil: 744; CHECK-LIBCALL: bl __aeabi_h2f 745; CHECK-LIBCALL: bl ceilf 746; CHECK-LIBCALL: bl __aeabi_f2h 747define void @test_ceil(ptr %p) { 748 %a = load half, ptr %p, align 2 749 %r = call half @llvm.ceil.f16(half %a) 750 store half %r, ptr %p 751 ret void 752} 753 754; CHECK-FP16-LABEL: test_trunc: 755; CHECK-FP16: vcvtb.f32.f16 756; CHECK-FP16: bl truncf 757; CHECK-FP16: vcvtb.f16.f32 758; CHECK-LIBCALL-LABEL: test_trunc: 759; CHECK-LIBCALL: bl __aeabi_h2f 760; CHECK-LIBCALL: bl truncf 761; CHECK-LIBCALL: bl __aeabi_f2h 762define void @test_trunc(ptr %p) { 763 %a = load half, ptr %p, align 2 764 %r = call half @llvm.trunc.f16(half %a) 765 store half %r, ptr %p 766 ret void 767} 768 769; CHECK-FP16-LABEL: test_rint: 770; CHECK-FP16: vcvtb.f32.f16 771; CHECK-FP16: bl rintf 772; CHECK-FP16: vcvtb.f16.f32 773; CHECK-LIBCALL-LABEL: test_rint: 774; CHECK-LIBCALL: bl __aeabi_h2f 775; CHECK-LIBCALL: bl rintf 776; CHECK-LIBCALL: bl __aeabi_f2h 777define void @test_rint(ptr %p) { 778 %a = load half, ptr %p, align 2 779 %r = call half @llvm.rint.f16(half %a) 780 store half %r, ptr %p 781 ret void 782} 783 784; CHECK-FP16-LABEL: test_nearbyint: 785; CHECK-FP16: vcvtb.f32.f16 786; CHECK-FP16: bl nearbyintf 787; CHECK-FP16: vcvtb.f16.f32 788; CHECK-LIBCALL-LABEL: test_nearbyint: 789; CHECK-LIBCALL: bl __aeabi_h2f 790; CHECK-LIBCALL: bl nearbyintf 791; CHECK-LIBCALL: bl __aeabi_f2h 792define void @test_nearbyint(ptr %p) { 793 %a = load half, ptr %p, align 2 794 %r = call half @llvm.nearbyint.f16(half %a) 795 store half %r, ptr %p 796 ret void 797} 798 799; CHECK-FP16-LABEL: test_round: 800; CHECK-FP16: vcvtb.f32.f16 801; CHECK-FP16: bl roundf 802; CHECK-FP16: vcvtb.f16.f32 803; CHECK-LIBCALL-LABEL: test_round: 804; CHECK-LIBCALL: bl __aeabi_h2f 805; CHECK-LIBCALL: bl roundf 806; CHECK-LIBCALL: bl __aeabi_f2h 807define void @test_round(ptr %p) { 808 %a = load half, ptr %p, align 2 809 %r = call half @llvm.round.f16(half %a) 810 store half %r, ptr %p 811 ret void 812} 813 814; CHECK-FP16-LABEL: test_fmuladd: 815; CHECK-FP16: vcvtb.f32.f16 816; CHECK-FP16: vcvtb.f32.f16 817; CHECK-FP16: vmul.f32 818; CHECK-FP16: vcvtb.f16.f32 819; CHECK-FP16: vcvtb.f32.f16 820; CHECK-FP16: vcvtb.f32.f16 821; CHECK-FP16: vadd.f32 822; CHECK-FP16: vcvtb.f16.f32 823; CHECK-LIBCALL-LABEL: test_fmuladd: 824; CHECK-LIBCALL: bl __aeabi_h2f 825; CHECK-LIBCALL: bl __aeabi_h2f 826; CHECK-LIBCALL-VFP: vmul.f32 827; CHECK-NOVFP: bl __aeabi_fmul 828; CHECK-LIBCALL: bl __aeabi_f2h 829; CHECK-LIBCALL: bl __aeabi_h2f 830; CHECK-LIBCALL: bl __aeabi_h2f 831; CHECK-LIBCALL-VFP: vadd.f32 832; CHECK-NOVFP: bl __aeabi_fadd 833; CHECK-LIBCALL: bl __aeabi_f2h 834define void @test_fmuladd(ptr %p, ptr %q, ptr %r) #0 { 835 %a = load half, ptr %p, align 2 836 %b = load half, ptr %q, align 2 837 %c = load half, ptr %r, align 2 838 %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c) 839 store half %v, ptr %p 840 ret void 841} 842 843; f16 vectors are not legal in the backend. Vector elements are not assigned 844; to the register, but are stored in the stack instead. Hence insertelement 845; and extractelement have these extra loads and stores. 846 847; CHECK-ALL-LABEL: test_insertelement: 848; CHECK-ALL: sub sp, sp, #8 849 850; CHECK-ALL-DAG: and 851; CHECK-ALL-DAG: mov 852; CHECK-ALL-DAG: ldrd 853; CHECK-ALL-DAG: orr 854; CHECK-ALL-DAG: ldrh 855; CHECK-ALL-DAG: stm 856; CHECK-ALL: ldrh 857; CHECK-ALL-DAG: ldrh 858; CHECK-ALL-DAG: ldrh 859; CHECK-ALL-DAG: ldrh 860; CHECK-ALL-DAG: strh 861; CHECK-ALL-DAG: strh 862; CHECK-ALL-DAG: strh 863; CHECK-ALL-DAG: strh 864; CHECK-ALL: strh 865 866; CHECK-ALL: add sp, sp, #8 867define void @test_insertelement(ptr %p, ptr %q, i32 %i) #0 { 868 %a = load half, ptr %p, align 2 869 %b = load <4 x half>, ptr %q, align 8 870 %c = insertelement <4 x half> %b, half %a, i32 %i 871 store volatile <4 x half> %c, ptr %q 872 ret void 873} 874 875; CHECK-ALL-LABEL: test_extractelement: 876; CHECK-ALL: push {{{.*}}, lr} 877; CHECK-ALL: sub sp, sp, #8 878; CHECK-ALL: ldrd 879; CHECK-ALL: mov 880; CHECK-ALL: orr 881; CHECK-ALL: ldrh 882; CHECK-ALL: strh 883; CHECK-ALL: add sp, sp, #8 884; CHECK-ALL: pop {{{.*}}, pc} 885define void @test_extractelement(ptr %p, ptr %q, i32 %i) #0 { 886 %a = load <4 x half>, ptr %q, align 8 887 %b = extractelement <4 x half> %a, i32 %i 888 store half %b, ptr %p 889 ret void 890} 891 892; test struct operations 893 894%struct.dummy = type { i32, half } 895 896; CHECK-ALL-LABEL: test_insertvalue: 897; CHECK-ALL-DAG: ldr 898; CHECK-ALL-DAG: ldrh 899; CHECK-ALL-DAG: strh 900; CHECK-ALL-DAG: str 901define void @test_insertvalue(ptr %p, ptr %q) { 902 %a = load %struct.dummy, ptr %p 903 %b = load half, ptr %q 904 %c = insertvalue %struct.dummy %a, half %b, 1 905 store %struct.dummy %c, ptr %p 906 ret void 907} 908 909; CHECK-ALL-LABEL: test_extractvalue: 910; CHECK-ALL: .fnstart 911; CHECK-ALL: ldrh 912; CHECK-ALL: strh 913define void @test_extractvalue(ptr %p, ptr %q) { 914 %a = load %struct.dummy, ptr %p 915 %b = extractvalue %struct.dummy %a, 1 916 store half %b, ptr %q 917 ret void 918} 919 920; CHECK-ALL-LABEL: test_struct_return: 921; CHECK-VFP-LIBCALL: bl __aeabi_h2f 922; CHECK-NOVFP-DAG: ldr 923; CHECK-NOVFP-DAG: ldrh 924define %struct.dummy @test_struct_return(ptr %p) { 925 %a = load %struct.dummy, ptr %p 926 ret %struct.dummy %a 927} 928 929; CHECK-ALL-LABEL: test_struct_arg: 930; CHECK-ALL-NEXT: .fnstart 931; CHECK-NOVFP-NEXT: mov r0, r1 932; CHECK-ALL-NEXT: bx lr 933define half @test_struct_arg(%struct.dummy %p) { 934 %a = extractvalue %struct.dummy %p, 1 935 ret half %a 936} 937 938; CHECK-LABEL: test_uitofp_i32_fadd: 939; CHECK-VFP-DAG: vcvt.f32.u32 940; CHECK-NOVFP-DAG: bl __aeabi_ui2f 941 942; CHECK-FP16-DAG: vcvtb.f16.f32 943; CHECK-FP16-DAG: vcvtb.f32.f16 944; CHECK-LIBCALL-DAG: bl __aeabi_h2f 945; CHECK-LIBCALL-DAG: bl __aeabi_h2f 946 947; CHECK-VFP-DAG: vadd.f32 948; CHECK-NOVFP-DAG: bl __aeabi_fadd 949 950; CHECK-FP16-DAG: vcvtb.f16.f32 951; CHECK-LIBCALL-DAG: bl __aeabi_f2h 952define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { 953 %c = uitofp i32 %a to half 954 %r = fadd half %b, %c 955 ret half %r 956} 957 958; CHECK-LABEL: test_sitofp_i32_fadd: 959; CHECK-VFP-DAG: vcvt.f32.s32 960; CHECK-NOVFP-DAG: bl __aeabi_i2f 961 962; CHECK-FP16-DAG: vcvtb.f16.f32 963; CHECK-FP16-DAG: vcvtb.f32.f16 964; CHECK-LIBCALL-DAG: bl __aeabi_h2f 965; CHECK-LIBCALL-DAG: bl __aeabi_h2f 966 967; CHECK-VFP-DAG: vadd.f32 968; CHECK-NOVFP-DAG: bl __aeabi_fadd 969 970; CHECK-FP16-DAG: vcvtb.f16.f32 971; CHECK-LIBCALL-DAG: bl __aeabi_f2h 972define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { 973 %c = sitofp i32 %a to half 974 %r = fadd half %b, %c 975 ret half %r 976} 977 978attributes #0 = { nounwind } 979