1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; Ensure we don't attempt to combine into an extending fp128 load. 7define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 { 8; CHECK-LABEL: fcvt_v4f64_v4f128: 9; CHECK: // %bb.0: 10; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill 11; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill 12; CHECK-NEXT: sub sp, sp, #48 13; CHECK-NEXT: addvl sp, sp, #-2 14; CHECK-NEXT: ptrue p0.d, vl4 15; CHECK-NEXT: add x8, sp, #48 16; CHECK-NEXT: mov x19, x1 17; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 18; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill 19; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 20; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill 21; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 22; CHECK-NEXT: bl __extenddftf2 23; CHECK-NEXT: add x8, sp, #48 24; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 25; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload 26; CHECK-NEXT: mov d1, v1.d[1] 27; CHECK-NEXT: fmov d0, d1 28; CHECK-NEXT: bl __extenddftf2 29; CHECK-NEXT: add x8, sp, #48 30; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 31; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload 32; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 33; CHECK-NEXT: bl __extenddftf2 34; CHECK-NEXT: add x8, sp, #48 35; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 36; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload 37; CHECK-NEXT: mov d1, v1.d[1] 38; CHECK-NEXT: fmov d0, d1 39; CHECK-NEXT: bl __extenddftf2 40; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload 41; CHECK-NEXT: stp q1, q0, [x19] 42; CHECK-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload 43; CHECK-NEXT: stp q0, q1, [x19, #32] 44; CHECK-NEXT: addvl sp, sp, #2 45; CHECK-NEXT: add sp, sp, #48 46; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload 47; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload 48; CHECK-NEXT: ret 49 %op1 = load <4 x double>, ptr %a 50 %res = fpext <4 x double> %op1 to <4 x fp128> 51 store <4 x fp128> %res, ptr %b 52 ret void 53} 54 55; Ensure we don't attempt to combine into a truncating fp128 store. 56define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 57; CHECK-LABEL: fcvt_v4f128_v4f64: 58; CHECK: // %bb.0: 59; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill 60; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill 61; CHECK-NEXT: sub sp, sp, #128 62; CHECK-NEXT: addvl sp, sp, #-2 63; CHECK-NEXT: ldp q1, q0, [x0, #64] 64; CHECK-NEXT: mov x19, x1 65; CHECK-NEXT: stp q0, q1, [sp, #96] // 32-byte Folded Spill 66; CHECK-NEXT: ldp q1, q0, [x0, #96] 67; CHECK-NEXT: stp q0, q1, [sp, #64] // 32-byte Folded Spill 68; CHECK-NEXT: ldp q1, q0, [x0] 69; CHECK-NEXT: stp q0, q1, [sp, #32] // 32-byte Folded Spill 70; CHECK-NEXT: ldp q1, q0, [x0, #32] 71; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 72; CHECK-NEXT: bl __trunctfdf2 73; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 74; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 75; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 76; CHECK-NEXT: bl __trunctfdf2 77; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload 78; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 79; CHECK-NEXT: add x8, sp, #128 80; CHECK-NEXT: mov v0.d[1], v1.d[0] 81; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill 82; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 83; CHECK-NEXT: bl __trunctfdf2 84; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 85; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 86; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload 87; CHECK-NEXT: bl __trunctfdf2 88; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 89; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 90; CHECK-NEXT: add x8, sp, #128 91; CHECK-NEXT: ptrue p0.d, vl2 92; CHECK-NEXT: mov v0.d[1], v1.d[0] 93; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload 94; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 95; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill 96; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload 97; CHECK-NEXT: bl __trunctfdf2 98; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 99; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 100; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload 101; CHECK-NEXT: bl __trunctfdf2 102; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 103; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 104; CHECK-NEXT: add x8, sp, #128 105; CHECK-NEXT: mov v0.d[1], v1.d[0] 106; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill 107; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload 108; CHECK-NEXT: bl __trunctfdf2 109; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 110; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 111; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload 112; CHECK-NEXT: bl __trunctfdf2 113; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload 114; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 115; CHECK-NEXT: add x8, sp, #128 116; CHECK-NEXT: ptrue p0.d, vl2 117; CHECK-NEXT: mov v0.d[1], v1.d[0] 118; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload 119; CHECK-NEXT: mov x8, #4 // =0x4 120; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 121; CHECK-NEXT: ptrue p0.d, vl4 122; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] 123; CHECK-NEXT: add x8, sp, #128 124; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload 125; CHECK-NEXT: st1d { z0.d }, p0, [x19] 126; CHECK-NEXT: addvl sp, sp, #2 127; CHECK-NEXT: add sp, sp, #128 128; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload 129; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload 130; CHECK-NEXT: ret 131 %op1 = load <8 x fp128>, ptr %a 132 %res = fptrunc <8 x fp128> %op1 to <8 x double> 133 store <8 x double> %res, ptr %b 134 ret void 135} 136 137attributes #0 = { nounwind "target-features"="+sve" } 138