1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck --check-prefixes=CHECK,CHECK-CYC %s 3; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck --check-prefixes=CHECK,CHECK-A57 %s 4 5define float @t1(ptr nocapture %src) nounwind ssp { 6; CHECK-LABEL: t1: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: ldr s0, [x0] 9; CHECK-NEXT: scvtf s0, s0 10; CHECK-NEXT: ret 11entry: 12 %tmp1 = load i32, ptr %src, align 4 13 %tmp2 = sitofp i32 %tmp1 to float 14 ret float %tmp2 15} 16 17define float @t2(ptr nocapture %src) nounwind ssp { 18; CHECK-LABEL: t2: 19; CHECK: // %bb.0: // %entry 20; CHECK-NEXT: ldr s0, [x0] 21; CHECK-NEXT: ucvtf s0, s0 22; CHECK-NEXT: ret 23entry: 24 %tmp1 = load i32, ptr %src, align 4 25 %tmp2 = uitofp i32 %tmp1 to float 26 ret float %tmp2 27} 28 29define double @t3(ptr nocapture %src) nounwind ssp { 30; CHECK-LABEL: t3: 31; CHECK: // %bb.0: // %entry 32; CHECK-NEXT: ldr d0, [x0] 33; CHECK-NEXT: scvtf d0, d0 34; CHECK-NEXT: ret 35entry: 36 %tmp1 = load i64, ptr %src, align 4 37 %tmp2 = sitofp i64 %tmp1 to double 38 ret double %tmp2 39} 40 41define double @t4(ptr nocapture %src) nounwind ssp { 42; CHECK-LABEL: t4: 43; CHECK: // %bb.0: // %entry 44; CHECK-NEXT: ldr d0, [x0] 45; CHECK-NEXT: ucvtf d0, d0 46; CHECK-NEXT: ret 47entry: 48 %tmp1 = load i64, ptr %src, align 4 49 %tmp2 = uitofp i64 %tmp1 to double 50 ret double %tmp2 51} 52 53; rdar://13136456 54define double @t5(ptr nocapture %src) nounwind ssp optsize { 55; CHECK-LABEL: t5: 56; CHECK: // %bb.0: // %entry 57; CHECK-NEXT: ldr w8, [x0] 58; CHECK-NEXT: scvtf d0, w8 59; CHECK-NEXT: ret 60entry: 61 %tmp1 = load i32, ptr %src, align 4 62 %tmp2 = sitofp i32 %tmp1 to double 63 ret double %tmp2 64} 65 66; Check that we load in FP register when we want to convert into 67; floating point value. 68; This is much faster than loading on GPR and making the conversion 69; GPR -> FPR. 70; <rdar://problem/14599607> 71; 72; Check the flollowing patterns for signed/unsigned: 73; 1. load with scaled imm to float. 74; 2. load with scaled register to float. 75; 3. load with scaled imm to double. 76; 4. load with scaled register to double. 77; 5. load with unscaled imm to float. 78; 6. load with unscaled imm to double. 79; With loading size: 8, 16, 32, and 64-bits. 80 81; ********* 1. load with scaled imm to float. ********* 82define float @fct1(ptr nocapture %sp0) { 83; CHECK-LABEL: fct1: 84; CHECK: // %bb.0: // %entry 85; CHECK-NEXT: ldr b0, [x0, #1] 86; CHECK-NEXT: ucvtf s0, s0 87; CHECK-NEXT: fmul s0, s0, s0 88; CHECK-NEXT: ret 89entry: 90 %addr = getelementptr i8, ptr %sp0, i64 1 91 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 92 %val = uitofp i8 %pix_sp0.0.copyload to float 93 %vmull.i = fmul float %val, %val 94 ret float %vmull.i 95} 96 97define float @fct2(ptr nocapture %sp0) { 98; CHECK-LABEL: fct2: 99; CHECK: // %bb.0: // %entry 100; CHECK-NEXT: ldr h0, [x0, #2] 101; CHECK-NEXT: ucvtf s0, s0 102; CHECK-NEXT: fmul s0, s0, s0 103; CHECK-NEXT: ret 104entry: 105 %addr = getelementptr i16, ptr %sp0, i64 1 106 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 107 %val = uitofp i16 %pix_sp0.0.copyload to float 108 %vmull.i = fmul float %val, %val 109 ret float %vmull.i 110} 111 112define float @fct3(ptr nocapture %sp0) { 113; CHECK-LABEL: fct3: 114; CHECK: // %bb.0: // %entry 115; CHECK-NEXT: ldr s0, [x0, #4] 116; CHECK-NEXT: ucvtf s0, s0 117; CHECK-NEXT: fmul s0, s0, s0 118; CHECK-NEXT: ret 119entry: 120 %addr = getelementptr i32, ptr %sp0, i64 1 121 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 122 %val = uitofp i32 %pix_sp0.0.copyload to float 123 %vmull.i = fmul float %val, %val 124 ret float %vmull.i 125} 126 127; i64 -> f32 is not supported on floating point unit. 128define float @fct4(ptr nocapture %sp0) { 129; CHECK-LABEL: fct4: 130; CHECK: // %bb.0: // %entry 131; CHECK-NEXT: ldr x8, [x0, #8] 132; CHECK-NEXT: ucvtf s0, x8 133; CHECK-NEXT: fmul s0, s0, s0 134; CHECK-NEXT: ret 135entry: 136 %addr = getelementptr i64, ptr %sp0, i64 1 137 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 138 %val = uitofp i64 %pix_sp0.0.copyload to float 139 %vmull.i = fmul float %val, %val 140 ret float %vmull.i 141} 142 143; ********* 2. load with scaled register to float. ********* 144define float @fct5(ptr nocapture %sp0, i64 %offset) { 145; CHECK-LABEL: fct5: 146; CHECK: // %bb.0: // %entry 147; CHECK-NEXT: ldr b0, [x0, x1] 148; CHECK-NEXT: ucvtf s0, s0 149; CHECK-NEXT: fmul s0, s0, s0 150; CHECK-NEXT: ret 151entry: 152 %addr = getelementptr i8, ptr %sp0, i64 %offset 153 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 154 %val = uitofp i8 %pix_sp0.0.copyload to float 155 %vmull.i = fmul float %val, %val 156 ret float %vmull.i 157} 158 159define float @fct6(ptr nocapture %sp0, i64 %offset) { 160; CHECK-LABEL: fct6: 161; CHECK: // %bb.0: // %entry 162; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] 163; CHECK-NEXT: ucvtf s0, s0 164; CHECK-NEXT: fmul s0, s0, s0 165; CHECK-NEXT: ret 166entry: 167 %addr = getelementptr i16, ptr %sp0, i64 %offset 168 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 169 %val = uitofp i16 %pix_sp0.0.copyload to float 170 %vmull.i = fmul float %val, %val 171 ret float %vmull.i 172} 173 174define float @fct7(ptr nocapture %sp0, i64 %offset) { 175; CHECK-LABEL: fct7: 176; CHECK: // %bb.0: // %entry 177; CHECK-NEXT: ldr s0, [x0, x1, lsl #2] 178; CHECK-NEXT: ucvtf s0, s0 179; CHECK-NEXT: fmul s0, s0, s0 180; CHECK-NEXT: ret 181entry: 182 %addr = getelementptr i32, ptr %sp0, i64 %offset 183 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 184 %val = uitofp i32 %pix_sp0.0.copyload to float 185 %vmull.i = fmul float %val, %val 186 ret float %vmull.i 187} 188 189; i64 -> f32 is not supported on floating point unit. 190define float @fct8(ptr nocapture %sp0, i64 %offset) { 191; CHECK-LABEL: fct8: 192; CHECK: // %bb.0: // %entry 193; CHECK-NEXT: ldr x8, [x0, x1, lsl #3] 194; CHECK-NEXT: ucvtf s0, x8 195; CHECK-NEXT: fmul s0, s0, s0 196; CHECK-NEXT: ret 197entry: 198 %addr = getelementptr i64, ptr %sp0, i64 %offset 199 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 200 %val = uitofp i64 %pix_sp0.0.copyload to float 201 %vmull.i = fmul float %val, %val 202 ret float %vmull.i 203} 204 205 206; ********* 3. load with scaled imm to double. ********* 207define double @fct9(ptr nocapture %sp0) { 208; CHECK-LABEL: fct9: 209; CHECK: // %bb.0: // %entry 210; CHECK-NEXT: ldr b0, [x0, #1] 211; CHECK-NEXT: ucvtf d0, d0 212; CHECK-NEXT: fmul d0, d0, d0 213; CHECK-NEXT: ret 214entry: 215 %addr = getelementptr i8, ptr %sp0, i64 1 216 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 217 %val = uitofp i8 %pix_sp0.0.copyload to double 218 %vmull.i = fmul double %val, %val 219 ret double %vmull.i 220} 221 222define double @fct10(ptr nocapture %sp0) { 223; CHECK-LABEL: fct10: 224; CHECK: // %bb.0: // %entry 225; CHECK-NEXT: ldr h0, [x0, #2] 226; CHECK-NEXT: ucvtf d0, d0 227; CHECK-NEXT: fmul d0, d0, d0 228; CHECK-NEXT: ret 229entry: 230 %addr = getelementptr i16, ptr %sp0, i64 1 231 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 232 %val = uitofp i16 %pix_sp0.0.copyload to double 233 %vmull.i = fmul double %val, %val 234 ret double %vmull.i 235} 236 237define double @fct11(ptr nocapture %sp0) { 238; CHECK-LABEL: fct11: 239; CHECK: // %bb.0: // %entry 240; CHECK-NEXT: ldr s0, [x0, #4] 241; CHECK-NEXT: ucvtf d0, d0 242; CHECK-NEXT: fmul d0, d0, d0 243; CHECK-NEXT: ret 244entry: 245 %addr = getelementptr i32, ptr %sp0, i64 1 246 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 247 %val = uitofp i32 %pix_sp0.0.copyload to double 248 %vmull.i = fmul double %val, %val 249 ret double %vmull.i 250} 251 252define double @fct12(ptr nocapture %sp0) { 253; CHECK-LABEL: fct12: 254; CHECK: // %bb.0: // %entry 255; CHECK-NEXT: ldr d0, [x0, #8] 256; CHECK-NEXT: ucvtf d0, d0 257; CHECK-NEXT: fmul d0, d0, d0 258; CHECK-NEXT: ret 259entry: 260 %addr = getelementptr i64, ptr %sp0, i64 1 261 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 262 %val = uitofp i64 %pix_sp0.0.copyload to double 263 %vmull.i = fmul double %val, %val 264 ret double %vmull.i 265} 266 267; ********* 4. load with scaled register to double. ********* 268define double @fct13(ptr nocapture %sp0, i64 %offset) { 269; CHECK-LABEL: fct13: 270; CHECK: // %bb.0: // %entry 271; CHECK-NEXT: ldr b0, [x0, x1] 272; CHECK-NEXT: ucvtf d0, d0 273; CHECK-NEXT: fmul d0, d0, d0 274; CHECK-NEXT: ret 275entry: 276 %addr = getelementptr i8, ptr %sp0, i64 %offset 277 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 278 %val = uitofp i8 %pix_sp0.0.copyload to double 279 %vmull.i = fmul double %val, %val 280 ret double %vmull.i 281} 282 283define double @fct14(ptr nocapture %sp0, i64 %offset) { 284; CHECK-LABEL: fct14: 285; CHECK: // %bb.0: // %entry 286; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] 287; CHECK-NEXT: ucvtf d0, d0 288; CHECK-NEXT: fmul d0, d0, d0 289; CHECK-NEXT: ret 290entry: 291 %addr = getelementptr i16, ptr %sp0, i64 %offset 292 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 293 %val = uitofp i16 %pix_sp0.0.copyload to double 294 %vmull.i = fmul double %val, %val 295 ret double %vmull.i 296} 297 298define double @fct15(ptr nocapture %sp0, i64 %offset) { 299; CHECK-LABEL: fct15: 300; CHECK: // %bb.0: // %entry 301; CHECK-NEXT: ldr s0, [x0, x1, lsl #2] 302; CHECK-NEXT: ucvtf d0, d0 303; CHECK-NEXT: fmul d0, d0, d0 304; CHECK-NEXT: ret 305entry: 306 %addr = getelementptr i32, ptr %sp0, i64 %offset 307 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 308 %val = uitofp i32 %pix_sp0.0.copyload to double 309 %vmull.i = fmul double %val, %val 310 ret double %vmull.i 311} 312 313define double @fct16(ptr nocapture %sp0, i64 %offset) { 314; CHECK-LABEL: fct16: 315; CHECK: // %bb.0: // %entry 316; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] 317; CHECK-NEXT: ucvtf d0, d0 318; CHECK-NEXT: fmul d0, d0, d0 319; CHECK-NEXT: ret 320entry: 321 %addr = getelementptr i64, ptr %sp0, i64 %offset 322 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 323 %val = uitofp i64 %pix_sp0.0.copyload to double 324 %vmull.i = fmul double %val, %val 325 ret double %vmull.i 326} 327 328; ********* 5. load with unscaled imm to float. ********* 329define float @fct17(ptr nocapture %sp0) { 330; CHECK-LABEL: fct17: 331; CHECK: // %bb.0: // %entry 332; CHECK-NEXT: ldur b0, [x0, #-1] 333; CHECK-NEXT: ucvtf s0, s0 334; CHECK-NEXT: fmul s0, s0, s0 335; CHECK-NEXT: ret 336entry: 337 %bitcast = ptrtoint ptr %sp0 to i64 338 %add = add i64 %bitcast, -1 339 %addr = inttoptr i64 %add to ptr 340 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 341 %val = uitofp i8 %pix_sp0.0.copyload to float 342 %vmull.i = fmul float %val, %val 343 ret float %vmull.i 344} 345 346define float @fct18(ptr nocapture %sp0) { 347; CHECK-LABEL: fct18: 348; CHECK: // %bb.0: 349; CHECK-NEXT: ldur h0, [x0, #1] 350; CHECK-NEXT: ucvtf s0, s0 351; CHECK-NEXT: fmul s0, s0, s0 352; CHECK-NEXT: ret 353 %bitcast = ptrtoint ptr %sp0 to i64 354 %add = add i64 %bitcast, 1 355 %addr = inttoptr i64 %add to ptr 356 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 357 %val = uitofp i16 %pix_sp0.0.copyload to float 358 %vmull.i = fmul float %val, %val 359 ret float %vmull.i 360} 361 362define float @fct19(ptr nocapture %sp0) { 363; CHECK-LABEL: fct19: 364; CHECK: // %bb.0: 365; CHECK-NEXT: ldur s0, [x0, #1] 366; CHECK-NEXT: ucvtf s0, s0 367; CHECK-NEXT: fmul s0, s0, s0 368; CHECK-NEXT: ret 369 %bitcast = ptrtoint ptr %sp0 to i64 370 %add = add i64 %bitcast, 1 371 %addr = inttoptr i64 %add to ptr 372 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 373 %val = uitofp i32 %pix_sp0.0.copyload to float 374 %vmull.i = fmul float %val, %val 375 ret float %vmull.i 376} 377 378; i64 -> f32 is not supported on floating point unit. 379define float @fct20(ptr nocapture %sp0) { 380; CHECK-LABEL: fct20: 381; CHECK: // %bb.0: 382; CHECK-NEXT: ldur x8, [x0, #1] 383; CHECK-NEXT: ucvtf s0, x8 384; CHECK-NEXT: fmul s0, s0, s0 385; CHECK-NEXT: ret 386 %bitcast = ptrtoint ptr %sp0 to i64 387 %add = add i64 %bitcast, 1 388 %addr = inttoptr i64 %add to ptr 389 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 390 %val = uitofp i64 %pix_sp0.0.copyload to float 391 %vmull.i = fmul float %val, %val 392 ret float %vmull.i 393 394} 395 396; ********* 6. load with unscaled imm to double. ********* 397define double @fct21(ptr nocapture %sp0) { 398; CHECK-LABEL: fct21: 399; CHECK: // %bb.0: // %entry 400; CHECK-NEXT: ldur b0, [x0, #-1] 401; CHECK-NEXT: ucvtf d0, d0 402; CHECK-NEXT: fmul d0, d0, d0 403; CHECK-NEXT: ret 404entry: 405 %bitcast = ptrtoint ptr %sp0 to i64 406 %add = add i64 %bitcast, -1 407 %addr = inttoptr i64 %add to ptr 408 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 409 %val = uitofp i8 %pix_sp0.0.copyload to double 410 %vmull.i = fmul double %val, %val 411 ret double %vmull.i 412} 413 414define double @fct22(ptr nocapture %sp0) { 415; CHECK-LABEL: fct22: 416; CHECK: // %bb.0: 417; CHECK-NEXT: ldur h0, [x0, #1] 418; CHECK-NEXT: ucvtf d0, d0 419; CHECK-NEXT: fmul d0, d0, d0 420; CHECK-NEXT: ret 421 %bitcast = ptrtoint ptr %sp0 to i64 422 %add = add i64 %bitcast, 1 423 %addr = inttoptr i64 %add to ptr 424 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 425 %val = uitofp i16 %pix_sp0.0.copyload to double 426 %vmull.i = fmul double %val, %val 427 ret double %vmull.i 428} 429 430define double @fct23(ptr nocapture %sp0) { 431; CHECK-LABEL: fct23: 432; CHECK: // %bb.0: 433; CHECK-NEXT: ldur s0, [x0, #1] 434; CHECK-NEXT: ucvtf d0, d0 435; CHECK-NEXT: fmul d0, d0, d0 436; CHECK-NEXT: ret 437 %bitcast = ptrtoint ptr %sp0 to i64 438 %add = add i64 %bitcast, 1 439 %addr = inttoptr i64 %add to ptr 440 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 441 %val = uitofp i32 %pix_sp0.0.copyload to double 442 %vmull.i = fmul double %val, %val 443 ret double %vmull.i 444} 445 446define double @fct24(ptr nocapture %sp0) { 447; CHECK-LABEL: fct24: 448; CHECK: // %bb.0: 449; CHECK-NEXT: ldur d0, [x0, #1] 450; CHECK-NEXT: ucvtf d0, d0 451; CHECK-NEXT: fmul d0, d0, d0 452; CHECK-NEXT: ret 453 %bitcast = ptrtoint ptr %sp0 to i64 454 %add = add i64 %bitcast, 1 455 %addr = inttoptr i64 %add to ptr 456 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 457 %val = uitofp i64 %pix_sp0.0.copyload to double 458 %vmull.i = fmul double %val, %val 459 ret double %vmull.i 460 461} 462 463; ********* 1s. load with scaled imm to float. ********* 464define float @sfct1(ptr nocapture %sp0) { 465; CHECK-CYC-LABEL: sfct1: 466; CHECK-CYC: // %bb.0: // %entry 467; CHECK-CYC-NEXT: ldr b0, [x0, #1] 468; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0 469; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 470; CHECK-CYC-NEXT: scvtf s0, s0 471; CHECK-CYC-NEXT: fmul s0, s0, s0 472; CHECK-CYC-NEXT: ret 473; 474; CHECK-A57-LABEL: sfct1: 475; CHECK-A57: // %bb.0: // %entry 476; CHECK-A57-NEXT: ldrsb w8, [x0, #1] 477; CHECK-A57-NEXT: scvtf s0, w8 478; CHECK-A57-NEXT: fmul s0, s0, s0 479; CHECK-A57-NEXT: ret 480entry: 481 %addr = getelementptr i8, ptr %sp0, i64 1 482 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 483 %val = sitofp i8 %pix_sp0.0.copyload to float 484 %vmull.i = fmul float %val, %val 485 ret float %vmull.i 486} 487 488define float @sfct2(ptr nocapture %sp0) { 489; CHECK-CYC-LABEL: sfct2: 490; CHECK-CYC: // %bb.0: // %entry 491; CHECK-CYC-NEXT: ldr h0, [x0, #2] 492; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 493; CHECK-CYC-NEXT: scvtf s0, s0 494; CHECK-CYC-NEXT: fmul s0, s0, s0 495; CHECK-CYC-NEXT: ret 496; 497; CHECK-A57-LABEL: sfct2: 498; CHECK-A57: // %bb.0: // %entry 499; CHECK-A57-NEXT: ldrsh w8, [x0, #2] 500; CHECK-A57-NEXT: scvtf s0, w8 501; CHECK-A57-NEXT: fmul s0, s0, s0 502; CHECK-A57-NEXT: ret 503entry: 504 %addr = getelementptr i16, ptr %sp0, i64 1 505 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 506 %val = sitofp i16 %pix_sp0.0.copyload to float 507 %vmull.i = fmul float %val, %val 508 ret float %vmull.i 509} 510 511define float @sfct3(ptr nocapture %sp0) { 512; CHECK-LABEL: sfct3: 513; CHECK: // %bb.0: // %entry 514; CHECK-NEXT: ldr s0, [x0, #4] 515; CHECK-NEXT: scvtf s0, s0 516; CHECK-NEXT: fmul s0, s0, s0 517; CHECK-NEXT: ret 518entry: 519 %addr = getelementptr i32, ptr %sp0, i64 1 520 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 521 %val = sitofp i32 %pix_sp0.0.copyload to float 522 %vmull.i = fmul float %val, %val 523 ret float %vmull.i 524} 525 526; i64 -> f32 is not supported on floating point unit. 527define float @sfct4(ptr nocapture %sp0) { 528; CHECK-LABEL: sfct4: 529; CHECK: // %bb.0: // %entry 530; CHECK-NEXT: ldr x8, [x0, #8] 531; CHECK-NEXT: scvtf s0, x8 532; CHECK-NEXT: fmul s0, s0, s0 533; CHECK-NEXT: ret 534entry: 535 %addr = getelementptr i64, ptr %sp0, i64 1 536 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 537 %val = sitofp i64 %pix_sp0.0.copyload to float 538 %vmull.i = fmul float %val, %val 539 ret float %vmull.i 540} 541 542; ********* 2s. load with scaled register to float. ********* 543define float @sfct5(ptr nocapture %sp0, i64 %offset) { 544; CHECK-CYC-LABEL: sfct5: 545; CHECK-CYC: // %bb.0: // %entry 546; CHECK-CYC-NEXT: ldr b0, [x0, x1] 547; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0 548; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 549; CHECK-CYC-NEXT: scvtf s0, s0 550; CHECK-CYC-NEXT: fmul s0, s0, s0 551; CHECK-CYC-NEXT: ret 552; 553; CHECK-A57-LABEL: sfct5: 554; CHECK-A57: // %bb.0: // %entry 555; CHECK-A57-NEXT: ldrsb w8, [x0, x1] 556; CHECK-A57-NEXT: scvtf s0, w8 557; CHECK-A57-NEXT: fmul s0, s0, s0 558; CHECK-A57-NEXT: ret 559entry: 560 %addr = getelementptr i8, ptr %sp0, i64 %offset 561 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 562 %val = sitofp i8 %pix_sp0.0.copyload to float 563 %vmull.i = fmul float %val, %val 564 ret float %vmull.i 565} 566 567define float @sfct6(ptr nocapture %sp0, i64 %offset) { 568; CHECK-CYC-LABEL: sfct6: 569; CHECK-CYC: // %bb.0: // %entry 570; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1] 571; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 572; CHECK-CYC-NEXT: scvtf s0, s0 573; CHECK-CYC-NEXT: fmul s0, s0, s0 574; CHECK-CYC-NEXT: ret 575; 576; CHECK-A57-LABEL: sfct6: 577; CHECK-A57: // %bb.0: // %entry 578; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1] 579; CHECK-A57-NEXT: scvtf s0, w8 580; CHECK-A57-NEXT: fmul s0, s0, s0 581; CHECK-A57-NEXT: ret 582entry: 583 %addr = getelementptr i16, ptr %sp0, i64 %offset 584 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 585 %val = sitofp i16 %pix_sp0.0.copyload to float 586 %vmull.i = fmul float %val, %val 587 ret float %vmull.i 588} 589 590define float @sfct7(ptr nocapture %sp0, i64 %offset) { 591; CHECK-LABEL: sfct7: 592; CHECK: // %bb.0: // %entry 593; CHECK-NEXT: ldr s0, [x0, x1, lsl #2] 594; CHECK-NEXT: scvtf s0, s0 595; CHECK-NEXT: fmul s0, s0, s0 596; CHECK-NEXT: ret 597entry: 598 %addr = getelementptr i32, ptr %sp0, i64 %offset 599 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 600 %val = sitofp i32 %pix_sp0.0.copyload to float 601 %vmull.i = fmul float %val, %val 602 ret float %vmull.i 603} 604 605; i64 -> f32 is not supported on floating point unit. 606define float @sfct8(ptr nocapture %sp0, i64 %offset) { 607; CHECK-LABEL: sfct8: 608; CHECK: // %bb.0: // %entry 609; CHECK-NEXT: ldr x8, [x0, x1, lsl #3] 610; CHECK-NEXT: scvtf s0, x8 611; CHECK-NEXT: fmul s0, s0, s0 612; CHECK-NEXT: ret 613entry: 614 %addr = getelementptr i64, ptr %sp0, i64 %offset 615 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 616 %val = sitofp i64 %pix_sp0.0.copyload to float 617 %vmull.i = fmul float %val, %val 618 ret float %vmull.i 619} 620 621; ********* 3s. load with scaled imm to double. ********* 622define double @sfct9(ptr nocapture %sp0) { 623; CHECK-LABEL: sfct9: 624; CHECK: // %bb.0: // %entry 625; CHECK-NEXT: ldrsb w8, [x0, #1] 626; CHECK-NEXT: scvtf d0, w8 627; CHECK-NEXT: fmul d0, d0, d0 628; CHECK-NEXT: ret 629entry: 630 %addr = getelementptr i8, ptr %sp0, i64 1 631 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 632 %val = sitofp i8 %pix_sp0.0.copyload to double 633 %vmull.i = fmul double %val, %val 634 ret double %vmull.i 635} 636 637define double @sfct10(ptr nocapture %sp0) { 638; CHECK-CYC-LABEL: sfct10: 639; CHECK-CYC: // %bb.0: // %entry 640; CHECK-CYC-NEXT: ldr h0, [x0, #2] 641; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 642; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 643; CHECK-CYC-NEXT: scvtf d0, d0 644; CHECK-CYC-NEXT: fmul d0, d0, d0 645; CHECK-CYC-NEXT: ret 646; 647; CHECK-A57-LABEL: sfct10: 648; CHECK-A57: // %bb.0: // %entry 649; CHECK-A57-NEXT: ldrsh w8, [x0, #2] 650; CHECK-A57-NEXT: scvtf d0, w8 651; CHECK-A57-NEXT: fmul d0, d0, d0 652; CHECK-A57-NEXT: ret 653entry: 654 %addr = getelementptr i16, ptr %sp0, i64 1 655 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 656 %val = sitofp i16 %pix_sp0.0.copyload to double 657 %vmull.i = fmul double %val, %val 658 ret double %vmull.i 659} 660 661define double @sfct11(ptr nocapture %sp0) { 662; CHECK-CYC-LABEL: sfct11: 663; CHECK-CYC: // %bb.0: // %entry 664; CHECK-CYC-NEXT: ldr s0, [x0, #4] 665; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 666; CHECK-CYC-NEXT: scvtf d0, d0 667; CHECK-CYC-NEXT: fmul d0, d0, d0 668; CHECK-CYC-NEXT: ret 669; 670; CHECK-A57-LABEL: sfct11: 671; CHECK-A57: // %bb.0: // %entry 672; CHECK-A57-NEXT: ldr w8, [x0, #4] 673; CHECK-A57-NEXT: scvtf d0, w8 674; CHECK-A57-NEXT: fmul d0, d0, d0 675; CHECK-A57-NEXT: ret 676entry: 677 %addr = getelementptr i32, ptr %sp0, i64 1 678 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 679 %val = sitofp i32 %pix_sp0.0.copyload to double 680 %vmull.i = fmul double %val, %val 681 ret double %vmull.i 682} 683 684define double @sfct12(ptr nocapture %sp0) { 685; CHECK-LABEL: sfct12: 686; CHECK: // %bb.0: // %entry 687; CHECK-NEXT: ldr d0, [x0, #8] 688; CHECK-NEXT: scvtf d0, d0 689; CHECK-NEXT: fmul d0, d0, d0 690; CHECK-NEXT: ret 691entry: 692 %addr = getelementptr i64, ptr %sp0, i64 1 693 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 694 %val = sitofp i64 %pix_sp0.0.copyload to double 695 %vmull.i = fmul double %val, %val 696 ret double %vmull.i 697} 698 699; ********* 4s. load with scaled register to double. ********* 700define double @sfct13(ptr nocapture %sp0, i64 %offset) { 701; CHECK-LABEL: sfct13: 702; CHECK: // %bb.0: // %entry 703; CHECK-NEXT: ldrsb w8, [x0, x1] 704; CHECK-NEXT: scvtf d0, w8 705; CHECK-NEXT: fmul d0, d0, d0 706; CHECK-NEXT: ret 707entry: 708 %addr = getelementptr i8, ptr %sp0, i64 %offset 709 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 710 %val = sitofp i8 %pix_sp0.0.copyload to double 711 %vmull.i = fmul double %val, %val 712 ret double %vmull.i 713} 714 715define double @sfct14(ptr nocapture %sp0, i64 %offset) { 716; CHECK-CYC-LABEL: sfct14: 717; CHECK-CYC: // %bb.0: // %entry 718; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1] 719; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 720; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 721; CHECK-CYC-NEXT: scvtf d0, d0 722; CHECK-CYC-NEXT: fmul d0, d0, d0 723; CHECK-CYC-NEXT: ret 724; 725; CHECK-A57-LABEL: sfct14: 726; CHECK-A57: // %bb.0: // %entry 727; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1] 728; CHECK-A57-NEXT: scvtf d0, w8 729; CHECK-A57-NEXT: fmul d0, d0, d0 730; CHECK-A57-NEXT: ret 731entry: 732 %addr = getelementptr i16, ptr %sp0, i64 %offset 733 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 734 %val = sitofp i16 %pix_sp0.0.copyload to double 735 %vmull.i = fmul double %val, %val 736 ret double %vmull.i 737} 738 739define double @sfct15(ptr nocapture %sp0, i64 %offset) { 740; CHECK-CYC-LABEL: sfct15: 741; CHECK-CYC: // %bb.0: // %entry 742; CHECK-CYC-NEXT: ldr s0, [x0, x1, lsl #2] 743; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 744; CHECK-CYC-NEXT: scvtf d0, d0 745; CHECK-CYC-NEXT: fmul d0, d0, d0 746; CHECK-CYC-NEXT: ret 747; 748; CHECK-A57-LABEL: sfct15: 749; CHECK-A57: // %bb.0: // %entry 750; CHECK-A57-NEXT: ldr w8, [x0, x1, lsl #2] 751; CHECK-A57-NEXT: scvtf d0, w8 752; CHECK-A57-NEXT: fmul d0, d0, d0 753; CHECK-A57-NEXT: ret 754entry: 755 %addr = getelementptr i32, ptr %sp0, i64 %offset 756 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 757 %val = sitofp i32 %pix_sp0.0.copyload to double 758 %vmull.i = fmul double %val, %val 759 ret double %vmull.i 760} 761 762define double @sfct16(ptr nocapture %sp0, i64 %offset) { 763; CHECK-LABEL: sfct16: 764; CHECK: // %bb.0: // %entry 765; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] 766; CHECK-NEXT: scvtf d0, d0 767; CHECK-NEXT: fmul d0, d0, d0 768; CHECK-NEXT: ret 769entry: 770 %addr = getelementptr i64, ptr %sp0, i64 %offset 771 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 772 %val = sitofp i64 %pix_sp0.0.copyload to double 773 %vmull.i = fmul double %val, %val 774 ret double %vmull.i 775} 776 777; ********* 5s. load with unscaled imm to float. ********* 778define float @sfct17(ptr nocapture %sp0) { 779; CHECK-CYC-LABEL: sfct17: 780; CHECK-CYC: // %bb.0: // %entry 781; CHECK-CYC-NEXT: ldur b0, [x0, #-1] 782; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0 783; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 784; CHECK-CYC-NEXT: scvtf s0, s0 785; CHECK-CYC-NEXT: fmul s0, s0, s0 786; CHECK-CYC-NEXT: ret 787; 788; CHECK-A57-LABEL: sfct17: 789; CHECK-A57: // %bb.0: // %entry 790; CHECK-A57-NEXT: ldursb w8, [x0, #-1] 791; CHECK-A57-NEXT: scvtf s0, w8 792; CHECK-A57-NEXT: fmul s0, s0, s0 793; CHECK-A57-NEXT: ret 794entry: 795 %bitcast = ptrtoint ptr %sp0 to i64 796 %add = add i64 %bitcast, -1 797 %addr = inttoptr i64 %add to ptr 798 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 799 %val = sitofp i8 %pix_sp0.0.copyload to float 800 %vmull.i = fmul float %val, %val 801 ret float %vmull.i 802} 803 804define float @sfct18(ptr nocapture %sp0) { 805; CHECK-CYC-LABEL: sfct18: 806; CHECK-CYC: // %bb.0: 807; CHECK-CYC-NEXT: ldur h0, [x0, #1] 808; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 809; CHECK-CYC-NEXT: scvtf s0, s0 810; CHECK-CYC-NEXT: fmul s0, s0, s0 811; CHECK-CYC-NEXT: ret 812; 813; CHECK-A57-LABEL: sfct18: 814; CHECK-A57: // %bb.0: 815; CHECK-A57-NEXT: ldursh w8, [x0, #1] 816; CHECK-A57-NEXT: scvtf s0, w8 817; CHECK-A57-NEXT: fmul s0, s0, s0 818; CHECK-A57-NEXT: ret 819 %bitcast = ptrtoint ptr %sp0 to i64 820 %add = add i64 %bitcast, 1 821 %addr = inttoptr i64 %add to ptr 822 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 823 %val = sitofp i16 %pix_sp0.0.copyload to float 824 %vmull.i = fmul float %val, %val 825 ret float %vmull.i 826} 827 828define float @sfct19(ptr nocapture %sp0) { 829; CHECK-LABEL: sfct19: 830; CHECK: // %bb.0: 831; CHECK-NEXT: ldur s0, [x0, #1] 832; CHECK-NEXT: scvtf s0, s0 833; CHECK-NEXT: fmul s0, s0, s0 834; CHECK-NEXT: ret 835 %bitcast = ptrtoint ptr %sp0 to i64 836 %add = add i64 %bitcast, 1 837 %addr = inttoptr i64 %add to ptr 838 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 839 %val = sitofp i32 %pix_sp0.0.copyload to float 840 %vmull.i = fmul float %val, %val 841 ret float %vmull.i 842} 843 844; i64 -> f32 is not supported on floating point unit. 845define float @sfct20(ptr nocapture %sp0) { 846; CHECK-LABEL: sfct20: 847; CHECK: // %bb.0: 848; CHECK-NEXT: ldur x8, [x0, #1] 849; CHECK-NEXT: scvtf s0, x8 850; CHECK-NEXT: fmul s0, s0, s0 851; CHECK-NEXT: ret 852 %bitcast = ptrtoint ptr %sp0 to i64 853 %add = add i64 %bitcast, 1 854 %addr = inttoptr i64 %add to ptr 855 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 856 %val = sitofp i64 %pix_sp0.0.copyload to float 857 %vmull.i = fmul float %val, %val 858 ret float %vmull.i 859 860} 861 862; ********* 6s. load with unscaled imm to double. ********* 863define double @sfct21(ptr nocapture %sp0) { 864; CHECK-LABEL: sfct21: 865; CHECK: // %bb.0: // %entry 866; CHECK-NEXT: ldursb w8, [x0, #-1] 867; CHECK-NEXT: scvtf d0, w8 868; CHECK-NEXT: fmul d0, d0, d0 869; CHECK-NEXT: ret 870entry: 871 %bitcast = ptrtoint ptr %sp0 to i64 872 %add = add i64 %bitcast, -1 873 %addr = inttoptr i64 %add to ptr 874 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 875 %val = sitofp i8 %pix_sp0.0.copyload to double 876 %vmull.i = fmul double %val, %val 877 ret double %vmull.i 878} 879 880define double @sfct22(ptr nocapture %sp0) { 881; CHECK-CYC-LABEL: sfct22: 882; CHECK-CYC: // %bb.0: 883; CHECK-CYC-NEXT: ldur h0, [x0, #1] 884; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 885; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 886; CHECK-CYC-NEXT: scvtf d0, d0 887; CHECK-CYC-NEXT: fmul d0, d0, d0 888; CHECK-CYC-NEXT: ret 889; 890; CHECK-A57-LABEL: sfct22: 891; CHECK-A57: // %bb.0: 892; CHECK-A57-NEXT: ldursh w8, [x0, #1] 893; CHECK-A57-NEXT: scvtf d0, w8 894; CHECK-A57-NEXT: fmul d0, d0, d0 895; CHECK-A57-NEXT: ret 896 %bitcast = ptrtoint ptr %sp0 to i64 897 %add = add i64 %bitcast, 1 898 %addr = inttoptr i64 %add to ptr 899 %pix_sp0.0.copyload = load i16, ptr %addr, align 1 900 %val = sitofp i16 %pix_sp0.0.copyload to double 901 %vmull.i = fmul double %val, %val 902 ret double %vmull.i 903} 904 905define double @sfct23(ptr nocapture %sp0) { 906; CHECK-CYC-LABEL: sfct23: 907; CHECK-CYC: // %bb.0: 908; CHECK-CYC-NEXT: ldur s0, [x0, #1] 909; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 910; CHECK-CYC-NEXT: scvtf d0, d0 911; CHECK-CYC-NEXT: fmul d0, d0, d0 912; CHECK-CYC-NEXT: ret 913; 914; CHECK-A57-LABEL: sfct23: 915; CHECK-A57: // %bb.0: 916; CHECK-A57-NEXT: ldur w8, [x0, #1] 917; CHECK-A57-NEXT: scvtf d0, w8 918; CHECK-A57-NEXT: fmul d0, d0, d0 919; CHECK-A57-NEXT: ret 920 %bitcast = ptrtoint ptr %sp0 to i64 921 %add = add i64 %bitcast, 1 922 %addr = inttoptr i64 %add to ptr 923 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 924 %val = sitofp i32 %pix_sp0.0.copyload to double 925 %vmull.i = fmul double %val, %val 926 ret double %vmull.i 927} 928 929define double @sfct24(ptr nocapture %sp0) { 930; CHECK-LABEL: sfct24: 931; CHECK: // %bb.0: 932; CHECK-NEXT: ldur d0, [x0, #1] 933; CHECK-NEXT: scvtf d0, d0 934; CHECK-NEXT: fmul d0, d0, d0 935; CHECK-NEXT: ret 936 %bitcast = ptrtoint ptr %sp0 to i64 937 %add = add i64 %bitcast, 1 938 %addr = inttoptr i64 %add to ptr 939 %pix_sp0.0.copyload = load i64, ptr %addr, align 1 940 %val = sitofp i64 %pix_sp0.0.copyload to double 941 %vmull.i = fmul double %val, %val 942 ret double %vmull.i 943 944} 945 946; Check that we do not use SSHLL code sequence when code size is a concern. 947define float @codesize_sfct17(ptr nocapture %sp0) optsize { 948; CHECK-LABEL: codesize_sfct17: 949; CHECK: // %bb.0: // %entry 950; CHECK-NEXT: ldursb w8, [x0, #-1] 951; CHECK-NEXT: scvtf s0, w8 952; CHECK-NEXT: fmul s0, s0, s0 953; CHECK-NEXT: ret 954entry: 955 %bitcast = ptrtoint ptr %sp0 to i64 956 %add = add i64 %bitcast, -1 957 %addr = inttoptr i64 %add to ptr 958 %pix_sp0.0.copyload = load i8, ptr %addr, align 1 959 %val = sitofp i8 %pix_sp0.0.copyload to float 960 %vmull.i = fmul float %val, %val 961 ret float %vmull.i 962} 963 964define double @codesize_sfct11(ptr nocapture %sp0) minsize { 965; CHECK-LABEL: codesize_sfct11: 966; CHECK: // %bb.0: // %entry 967; CHECK-NEXT: ldr w8, [x0, #4] 968; CHECK-NEXT: scvtf d0, w8 969; CHECK-NEXT: fmul d0, d0, d0 970; CHECK-NEXT: ret 971entry: 972 %addr = getelementptr i32, ptr %sp0, i64 1 973 %pix_sp0.0.copyload = load i32, ptr %addr, align 1 974 %val = sitofp i32 %pix_sp0.0.copyload to double 975 %vmull.i = fmul double %val, %val 976 ret double %vmull.i 977} 978 979; Adding fp128 custom lowering makes these a little fragile since we have to 980; return the correct mix of Legal/Expand from the custom method. 981; 982; rdar://problem/14991489 983 984define float @float_from_i128(i128 %in) { 985; CHECK-LABEL: float_from_i128: 986; CHECK: // %bb.0: 987; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 988; CHECK-NEXT: .cfi_def_cfa_offset 16 989; CHECK-NEXT: .cfi_offset w30, -16 990; CHECK-NEXT: bl __floatuntisf 991; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 992; CHECK-NEXT: ret 993 %conv = uitofp i128 %in to float 994 ret float %conv 995} 996 997define double @double_from_i128(i128 %in) { 998; CHECK-LABEL: double_from_i128: 999; CHECK: // %bb.0: 1000; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1001; CHECK-NEXT: .cfi_def_cfa_offset 16 1002; CHECK-NEXT: .cfi_offset w30, -16 1003; CHECK-NEXT: bl __floattidf 1004; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1005; CHECK-NEXT: ret 1006 %conv = sitofp i128 %in to double 1007 ret double %conv 1008} 1009 1010define fp128 @fp128_from_i128(i128 %in) { 1011; CHECK-LABEL: fp128_from_i128: 1012; CHECK: // %bb.0: 1013; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1014; CHECK-NEXT: .cfi_def_cfa_offset 16 1015; CHECK-NEXT: .cfi_offset w30, -16 1016; CHECK-NEXT: bl __floatuntitf 1017; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1018; CHECK-NEXT: ret 1019 %conv = uitofp i128 %in to fp128 1020 ret fp128 %conv 1021} 1022 1023define i128 @i128_from_float(float %in) { 1024; CHECK-LABEL: i128_from_float: 1025; CHECK: // %bb.0: 1026; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1027; CHECK-NEXT: .cfi_def_cfa_offset 16 1028; CHECK-NEXT: .cfi_offset w30, -16 1029; CHECK-NEXT: bl __fixsfti 1030; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1031; CHECK-NEXT: ret 1032 %conv = fptosi float %in to i128 1033 ret i128 %conv 1034} 1035 1036define i128 @i128_from_double(double %in) { 1037; CHECK-LABEL: i128_from_double: 1038; CHECK: // %bb.0: 1039; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1040; CHECK-NEXT: .cfi_def_cfa_offset 16 1041; CHECK-NEXT: .cfi_offset w30, -16 1042; CHECK-NEXT: bl __fixunsdfti 1043; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1044; CHECK-NEXT: ret 1045 %conv = fptoui double %in to i128 1046 ret i128 %conv 1047} 1048 1049define i128 @i128_from_fp128(fp128 %in) { 1050; CHECK-LABEL: i128_from_fp128: 1051; CHECK: // %bb.0: 1052; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1053; CHECK-NEXT: .cfi_def_cfa_offset 16 1054; CHECK-NEXT: .cfi_offset w30, -16 1055; CHECK-NEXT: bl __fixtfti 1056; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1057; CHECK-NEXT: ret 1058 %conv = fptosi fp128 %in to i128 1059 ret i128 %conv 1060} 1061 1062