1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6target triple = "aarch64-unknown-linux-gnu" 7 8declare void @def(ptr) 9 10define void @alloc_v4i8(ptr %st_ptr) nounwind { 11; CHECK-LABEL: alloc_v4i8: 12; CHECK: // %bb.0: 13; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill 14; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 15; CHECK-NEXT: mov x19, x0 16; CHECK-NEXT: add x0, sp, #12 17; CHECK-NEXT: add x20, sp, #12 18; CHECK-NEXT: bl def 19; CHECK-NEXT: ptrue p0.b, vl2 20; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] 21; CHECK-NEXT: ptrue p0.s, vl2 22; CHECK-NEXT: mov z2.b, z0.b[1] 23; CHECK-NEXT: zip1 z0.s, z0.s, z2.s 24; CHECK-NEXT: st1b { z0.s }, p0, [x19] 25; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 26; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload 27; CHECK-NEXT: ret 28; 29; NONEON-NOSVE-LABEL: alloc_v4i8: 30; NONEON-NOSVE: // %bb.0: 31; NONEON-NOSVE-NEXT: sub sp, sp, #48 32; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill 33; NONEON-NOSVE-NEXT: mov x19, x0 34; NONEON-NOSVE-NEXT: add x0, sp, #28 35; NONEON-NOSVE-NEXT: bl def 36; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] 37; NONEON-NOSVE-NEXT: strh w8, [sp, #12] 38; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] 39; NONEON-NOSVE-NEXT: strh w8, [sp, #8] 40; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 41; NONEON-NOSVE-NEXT: str d0, [sp, #16] 42; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] 43; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] 44; NONEON-NOSVE-NEXT: strb w8, [x19, #1] 45; NONEON-NOSVE-NEXT: strb w9, [x19] 46; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload 47; NONEON-NOSVE-NEXT: add sp, sp, #48 48; NONEON-NOSVE-NEXT: ret 49 %alloc = alloca [4 x i8] 50 call void @def(ptr %alloc) 51 %load = load <4 x i8>, ptr %alloc 52 %strided.vec = shufflevector <4 x i8> %load, <4 x i8> poison, <2 x i32> <i32 0, i32 2> 53 store <2 x i8> %strided.vec, ptr %st_ptr 54 ret void 55} 56 57define void @alloc_v6i8(ptr %st_ptr) nounwind { 58; CHECK-LABEL: alloc_v6i8: 59; CHECK: // %bb.0: 60; CHECK-NEXT: sub sp, sp, #32 61; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill 62; CHECK-NEXT: mov x19, x0 63; CHECK-NEXT: add x0, sp, #8 64; CHECK-NEXT: bl def 65; CHECK-NEXT: ldr d0, [sp, #8] 66; CHECK-NEXT: ptrue p0.h, vl4 67; CHECK-NEXT: add x8, sp, #4 68; CHECK-NEXT: ptrue p1.s, vl2 69; CHECK-NEXT: mov z1.b, z0.b[3] 70; CHECK-NEXT: mov z2.b, z0.b[1] 71; CHECK-NEXT: mov z0.b, z0.b[5] 72; CHECK-NEXT: zip1 z1.h, z2.h, z1.h 73; CHECK-NEXT: zip1 z1.s, z1.s, z0.s 74; CHECK-NEXT: st1b { z1.h }, p0, [x8] 75; CHECK-NEXT: ld1h { z1.s }, p1/z, [x8] 76; CHECK-NEXT: fmov w8, s0 77; CHECK-NEXT: strb w8, [x19, #2] 78; CHECK-NEXT: fmov w8, s1 79; CHECK-NEXT: strh w8, [x19] 80; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload 81; CHECK-NEXT: add sp, sp, #32 82; CHECK-NEXT: ret 83; 84; NONEON-NOSVE-LABEL: alloc_v6i8: 85; NONEON-NOSVE: // %bb.0: 86; NONEON-NOSVE-NEXT: sub sp, sp, #48 87; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill 88; NONEON-NOSVE-NEXT: mov x19, x0 89; NONEON-NOSVE-NEXT: add x0, sp, #24 90; NONEON-NOSVE-NEXT: bl def 91; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] 92; NONEON-NOSVE-NEXT: str x8, [sp] 93; NONEON-NOSVE-NEXT: ldr d0, [sp] 94; NONEON-NOSVE-NEXT: str d0, [sp, #8] 95; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] 96; NONEON-NOSVE-NEXT: strb w8, [sp, #21] 97; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 98; NONEON-NOSVE-NEXT: strb w8, [sp, #20] 99; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] 100; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] 101; NONEON-NOSVE-NEXT: strb w8, [x19, #2] 102; NONEON-NOSVE-NEXT: strh w9, [x19] 103; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload 104; NONEON-NOSVE-NEXT: add sp, sp, #48 105; NONEON-NOSVE-NEXT: ret 106 %alloc = alloca [6 x i8] 107 call void @def(ptr %alloc) 108 %load = load <6 x i8>, ptr %alloc 109 %strided.vec = shufflevector <6 x i8> %load, <6 x i8> poison, <3 x i32> <i32 1, i32 3, i32 5> 110 store <3 x i8> %strided.vec, ptr %st_ptr 111 ret void 112} 113 114define void @alloc_v32i8(ptr %st_ptr) nounwind { 115; CHECK-LABEL: alloc_v32i8: 116; CHECK: // %bb.0: 117; CHECK-NEXT: sub sp, sp, #48 118; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill 119; CHECK-NEXT: mov x19, x0 120; CHECK-NEXT: mov x0, sp 121; CHECK-NEXT: bl def 122; CHECK-NEXT: adrp x8, .LCPI2_0 123; CHECK-NEXT: ldr q0, [sp] 124; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] 125; CHECK-NEXT: tbl z0.b, { z0.b }, z1.b 126; CHECK-NEXT: ldr q1, [sp, #16] 127; CHECK-NEXT: fmov w8, s1 128; CHECK-NEXT: strb w8, [x19, #8] 129; CHECK-NEXT: fmov x8, d0 130; CHECK-NEXT: str x8, [x19] 131; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload 132; CHECK-NEXT: add sp, sp, #48 133; CHECK-NEXT: ret 134; 135; NONEON-NOSVE-LABEL: alloc_v32i8: 136; NONEON-NOSVE: // %bb.0: 137; NONEON-NOSVE-NEXT: sub sp, sp, #112 138; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill 139; NONEON-NOSVE-NEXT: mov x19, x0 140; NONEON-NOSVE-NEXT: add x0, sp, #64 141; NONEON-NOSVE-NEXT: bl def 142; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] 143; NONEON-NOSVE-NEXT: str q0, [sp] 144; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] 145; NONEON-NOSVE-NEXT: str q1, [sp, #32] 146; NONEON-NOSVE-NEXT: strb w8, [sp, #23] 147; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] 148; NONEON-NOSVE-NEXT: strb w8, [sp, #22] 149; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] 150; NONEON-NOSVE-NEXT: strb w8, [sp, #21] 151; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] 152; NONEON-NOSVE-NEXT: strb w8, [sp, #20] 153; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] 154; NONEON-NOSVE-NEXT: strb w8, [sp, #19] 155; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] 156; NONEON-NOSVE-NEXT: strb w8, [sp, #18] 157; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] 158; NONEON-NOSVE-NEXT: strb w8, [sp, #17] 159; NONEON-NOSVE-NEXT: ldrb w8, [sp] 160; NONEON-NOSVE-NEXT: strb w8, [sp, #16] 161; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] 162; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] 163; NONEON-NOSVE-NEXT: strb w8, [x19, #8] 164; NONEON-NOSVE-NEXT: str q0, [sp, #48] 165; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] 166; NONEON-NOSVE-NEXT: str x8, [x19] 167; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload 168; NONEON-NOSVE-NEXT: add sp, sp, #112 169; NONEON-NOSVE-NEXT: ret 170 %alloc = alloca [32 x i8] 171 call void @def(ptr %alloc) 172 %load = load <32 x i8>, ptr %alloc 173 %strided.vec = shufflevector <32 x i8> %load, <32 x i8> poison, <9 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16> 174 store <9 x i8> %strided.vec, ptr %st_ptr 175 ret void 176} 177 178 179define void @alloc_v8f64(ptr %st_ptr) nounwind { 180; CHECK-LABEL: alloc_v8f64: 181; CHECK: // %bb.0: 182; CHECK-NEXT: sub sp, sp, #96 183; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 184; CHECK-NEXT: mov x19, x0 185; CHECK-NEXT: mov x0, sp 186; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 187; CHECK-NEXT: mov x20, sp 188; CHECK-NEXT: bl def 189; CHECK-NEXT: ptrue p0.d, vl2 190; CHECK-NEXT: mov x8, #4 // =0x4 191; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] 192; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] 193; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 194; CHECK-NEXT: stp q0, q2, [x19] 195; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 196; CHECK-NEXT: add sp, sp, #96 197; CHECK-NEXT: ret 198; 199; NONEON-NOSVE-LABEL: alloc_v8f64: 200; NONEON-NOSVE: // %bb.0: 201; NONEON-NOSVE-NEXT: sub sp, sp, #176 202; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #160] // 16-byte Folded Spill 203; NONEON-NOSVE-NEXT: mov x19, x0 204; NONEON-NOSVE-NEXT: add x0, sp, #96 205; NONEON-NOSVE-NEXT: bl def 206; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] 207; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #128] 208; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #48] 209; NONEON-NOSVE-NEXT: ldr d1, [sp, #64] 210; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] 211; NONEON-NOSVE-NEXT: stp q2, q3, [sp] 212; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] 213; NONEON-NOSVE-NEXT: ldr d1, [sp, #16] 214; NONEON-NOSVE-NEXT: ldr d0, [sp] 215; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] 216; NONEON-NOSVE-NEXT: ldr q1, [sp, #80] 217; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] 218; NONEON-NOSVE-NEXT: stp q1, q0, [x19] 219; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #160] // 16-byte Folded Reload 220; NONEON-NOSVE-NEXT: add sp, sp, #176 221; NONEON-NOSVE-NEXT: ret 222 %alloc = alloca [8 x double] 223 call void @def(ptr %alloc) 224 %load = load <8 x double>, ptr %alloc 225 %strided.vec = shufflevector <8 x double> %load, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 226 store <4 x double> %strided.vec, ptr %st_ptr 227 ret void 228} 229