1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6 7target triple = "aarch64-unknown-linux-gnu" 8 9define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) { 10; CHECK-LABEL: hang_when_merging_stores_after_legalisation: 11; CHECK: // %bb.0: 12; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 13; CHECK-NEXT: ptrue p0.s, vl4 14; CHECK-NEXT: mov z0.s, s0 15; CHECK-NEXT: mov z1.d, z0.d 16; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] 17; CHECK-NEXT: ret 18; 19; NONEON-NOSVE-LABEL: hang_when_merging_stores_after_legalisation: 20; NONEON-NOSVE: // %bb.0: 21; NONEON-NOSVE-NEXT: sub sp, sp, #32 22; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 23; NONEON-NOSVE-NEXT: str d0, [sp, #8] 24; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] 25; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #24] 26; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #16] 27; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] 28; NONEON-NOSVE-NEXT: stp q0, q0, [x0] 29; NONEON-NOSVE-NEXT: add sp, sp, #32 30; NONEON-NOSVE-NEXT: ret 31 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer 32 %interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 33 store <8 x i32> %interleaved.vec, ptr %a, align 4 34 ret void 35} 36 37define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2) { 38; CHECK-LABEL: interleave_store_without_splat: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ptrue p0.s, vl4 41; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 42; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1 43; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] 44; CHECK-NEXT: ret 45; 46; NONEON-NOSVE-LABEL: interleave_store_without_splat: 47; NONEON-NOSVE: // %bb.0: 48; NONEON-NOSVE-NEXT: sub sp, sp, #64 49; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 50; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] 51; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] 52; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] 53; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] 54; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] 55; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] 56; NONEON-NOSVE-NEXT: stp w8, w9, [sp] 57; NONEON-NOSVE-NEXT: ldr w9, [sp, #44] 58; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] 59; NONEON-NOSVE-NEXT: ldr q1, [sp] 60; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] 61; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] 62; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] 63; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] 64; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 65; NONEON-NOSVE-NEXT: stp q1, q0, [x0] 66; NONEON-NOSVE-NEXT: add sp, sp, #64 67; NONEON-NOSVE-NEXT: ret 68 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 69 %interleaved = shufflevector <8 x i32> %shuffle, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 70 store <8 x i32> %interleaved, ptr %a, align 1 71 ret void 72} 73 74define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) { 75; CHECK-LABEL: interleave_store_legalization: 76; CHECK: // %bb.0: 77; CHECK-NEXT: mov z5.d, z2.d 78; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3 79; CHECK-NEXT: ptrue p0.s, vl4 80; CHECK-NEXT: mov x8, #8 // =0x8 81; CHECK-NEXT: mov z4.d, z0.d 82; CHECK-NEXT: mov z2.d, z1.d 83; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0] 84; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2] 85; CHECK-NEXT: ret 86; 87; NONEON-NOSVE-LABEL: interleave_store_legalization: 88; NONEON-NOSVE: // %bb.0: 89; NONEON-NOSVE-NEXT: sub sp, sp, #128 90; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 91; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] 92; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #80] 93; NONEON-NOSVE-NEXT: ldr w9, [sp, #100] 94; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] 95; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] 96; NONEON-NOSVE-NEXT: ldr w9, [sp, #96] 97; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] 98; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] 99; NONEON-NOSVE-NEXT: ldr w9, [sp, #108] 100; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] 101; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] 102; NONEON-NOSVE-NEXT: ldr w9, [sp, #104] 103; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] 104; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] 105; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] 106; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] 107; NONEON-NOSVE-NEXT: ldr q3, [sp, #112] 108; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] 109; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] 110; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] 111; NONEON-NOSVE-NEXT: stp w8, w9, [sp] 112; NONEON-NOSVE-NEXT: ldr w9, [sp, #44] 113; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] 114; NONEON-NOSVE-NEXT: ldr q1, [sp] 115; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] 116; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] 117; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] 118; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] 119; NONEON-NOSVE-NEXT: ldp q0, q2, [sp, #48] 120; NONEON-NOSVE-NEXT: stp q2, q3, [x0] 121; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] 122; NONEON-NOSVE-NEXT: add sp, sp, #128 123; NONEON-NOSVE-NEXT: ret 124 %interleaved.vec = shufflevector <8 x i32> %v1, <8 x i32> %v2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, 125 i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 126 store <16 x i32> %interleaved.vec, ptr %a, align 4 127 ret void 128} 129 130; Ensure we don't crash when trying to lower a shuffle via an extract 131define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) { 132; CHECK-LABEL: crash_when_lowering_extract_shuffle: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ret 135; 136; NONEON-NOSVE-LABEL: crash_when_lowering_extract_shuffle: 137; NONEON-NOSVE: // %bb.0: 138; NONEON-NOSVE-NEXT: ret 139 %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer 140 br i1 %cond, label %exit, label %vector.body 141 142vector.body: 143 %1 = load <32 x i32>, ptr %dst, align 16 144 %predphi = select <32 x i1> %broadcast.splat, <32 x i32> zeroinitializer, <32 x i32> %1 145 store <32 x i32> %predphi, ptr %dst, align 16 146 br label %exit 147 148exit: 149 ret void 150} 151