1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -force-streaming < %s | FileCheck %s 4 5; == Normal Multi-Vector Consecutive Stores == 6 7define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 8; CHECK-LABEL: st1_x2_i8: 9; CHECK: // %bb.0: 10; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 11; CHECK-NEXT: addvl sp, sp, #-1 12; CHECK-NEXT: mov z3.d, z2.d 13; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 14; CHECK-NEXT: mov p8.b, p0.b 15; CHECK-NEXT: mov z2.d, z1.d 16; CHECK-NEXT: st1b { z2.b, z3.b }, pn8, [x0] 17; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 18; CHECK-NEXT: addvl sp, sp, #1 19; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 20; CHECK-NEXT: ret 21 call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 22 ret void 23} 24 25define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 26; CHECK-LABEL: st1_x2_i16: 27; CHECK: // %bb.0: 28; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 29; CHECK-NEXT: addvl sp, sp, #-1 30; CHECK-NEXT: mov z3.d, z2.d 31; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 32; CHECK-NEXT: mov p8.b, p0.b 33; CHECK-NEXT: mov z2.d, z1.d 34; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] 35; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 36; CHECK-NEXT: addvl sp, sp, #1 37; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 38; CHECK-NEXT: ret 39 call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 40 ret void 41} 42 43define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 44; CHECK-LABEL: st1_x2_i32: 45; CHECK: // %bb.0: 46; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 47; CHECK-NEXT: addvl sp, sp, #-1 48; CHECK-NEXT: mov z3.d, z2.d 49; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 50; CHECK-NEXT: mov p8.b, p0.b 51; CHECK-NEXT: mov z2.d, z1.d 52; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] 53; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 54; CHECK-NEXT: addvl sp, sp, #1 55; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 56; CHECK-NEXT: ret 57 call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 58 ret void 59} 60 61define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 62; CHECK-LABEL: st1_x2_i64: 63; CHECK: // %bb.0: 64; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 65; CHECK-NEXT: addvl sp, sp, #-1 66; CHECK-NEXT: mov z3.d, z2.d 67; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 68; CHECK-NEXT: mov p8.b, p0.b 69; CHECK-NEXT: mov z2.d, z1.d 70; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] 71; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 72; CHECK-NEXT: addvl sp, sp, #1 73; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 74; CHECK-NEXT: ret 75 call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 76 ret void 77} 78 79define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 80; CHECK-LABEL: st1_x2_f16: 81; CHECK: // %bb.0: 82; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 83; CHECK-NEXT: addvl sp, sp, #-1 84; CHECK-NEXT: mov z3.d, z2.d 85; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 86; CHECK-NEXT: mov p8.b, p0.b 87; CHECK-NEXT: mov z2.d, z1.d 88; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] 89; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 90; CHECK-NEXT: addvl sp, sp, #1 91; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 92; CHECK-NEXT: ret 93 call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 94 ret void 95} 96 97define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 98; CHECK-LABEL: st1_x2_bf16: 99; CHECK: // %bb.0: 100; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 101; CHECK-NEXT: addvl sp, sp, #-1 102; CHECK-NEXT: mov z3.d, z2.d 103; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 104; CHECK-NEXT: mov p8.b, p0.b 105; CHECK-NEXT: mov z2.d, z1.d 106; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] 107; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 108; CHECK-NEXT: addvl sp, sp, #1 109; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 110; CHECK-NEXT: ret 111 call void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 112 ret void 113} 114 115define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 116; CHECK-LABEL: st1_x2_f32: 117; CHECK: // %bb.0: 118; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 119; CHECK-NEXT: addvl sp, sp, #-1 120; CHECK-NEXT: mov z3.d, z2.d 121; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 122; CHECK-NEXT: mov p8.b, p0.b 123; CHECK-NEXT: mov z2.d, z1.d 124; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] 125; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 126; CHECK-NEXT: addvl sp, sp, #1 127; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 128; CHECK-NEXT: ret 129 call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 130 ret void 131} 132 133define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 134; CHECK-LABEL: st1_x2_f64: 135; CHECK: // %bb.0: 136; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 137; CHECK-NEXT: addvl sp, sp, #-1 138; CHECK-NEXT: mov z3.d, z2.d 139; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 140; CHECK-NEXT: mov p8.b, p0.b 141; CHECK-NEXT: mov z2.d, z1.d 142; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] 143; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 144; CHECK-NEXT: addvl sp, sp, #1 145; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 146; CHECK-NEXT: ret 147 call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 148 ret void 149} 150 151define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 152; CHECK-LABEL: st1_x4_i8: 153; CHECK: // %bb.0: 154; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 155; CHECK-NEXT: addvl sp, sp, #-1 156; CHECK-NEXT: mov z7.d, z4.d 157; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 158; CHECK-NEXT: mov p8.b, p0.b 159; CHECK-NEXT: mov z6.d, z3.d 160; CHECK-NEXT: mov z5.d, z2.d 161; CHECK-NEXT: mov z4.d, z1.d 162; CHECK-NEXT: st1b { z4.b - z7.b }, pn8, [x0] 163; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 164; CHECK-NEXT: addvl sp, sp, #1 165; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 166; CHECK-NEXT: ret 167 call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 168 ret void 169} 170 171define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 172; CHECK-LABEL: st1_x4_i16: 173; CHECK: // %bb.0: 174; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 175; CHECK-NEXT: addvl sp, sp, #-1 176; CHECK-NEXT: mov z7.d, z4.d 177; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 178; CHECK-NEXT: mov p8.b, p0.b 179; CHECK-NEXT: mov z6.d, z3.d 180; CHECK-NEXT: mov z5.d, z2.d 181; CHECK-NEXT: mov z4.d, z1.d 182; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] 183; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 184; CHECK-NEXT: addvl sp, sp, #1 185; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 186; CHECK-NEXT: ret 187 call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 188 ret void 189} 190 191define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 192; CHECK-LABEL: st1_x4_i32: 193; CHECK: // %bb.0: 194; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 195; CHECK-NEXT: addvl sp, sp, #-1 196; CHECK-NEXT: mov z7.d, z4.d 197; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 198; CHECK-NEXT: mov p8.b, p0.b 199; CHECK-NEXT: mov z6.d, z3.d 200; CHECK-NEXT: mov z5.d, z2.d 201; CHECK-NEXT: mov z4.d, z1.d 202; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] 203; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 204; CHECK-NEXT: addvl sp, sp, #1 205; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 206; CHECK-NEXT: ret 207 call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 208 ret void 209} 210 211define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 212; CHECK-LABEL: st1_x4_i64: 213; CHECK: // %bb.0: 214; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 215; CHECK-NEXT: addvl sp, sp, #-1 216; CHECK-NEXT: mov z7.d, z4.d 217; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 218; CHECK-NEXT: mov p8.b, p0.b 219; CHECK-NEXT: mov z6.d, z3.d 220; CHECK-NEXT: mov z5.d, z2.d 221; CHECK-NEXT: mov z4.d, z1.d 222; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] 223; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 224; CHECK-NEXT: addvl sp, sp, #1 225; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 226; CHECK-NEXT: ret 227 call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 228 ret void 229} 230 231define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 232; CHECK-LABEL: st1_x4_f16: 233; CHECK: // %bb.0: 234; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 235; CHECK-NEXT: addvl sp, sp, #-1 236; CHECK-NEXT: mov z7.d, z4.d 237; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 238; CHECK-NEXT: mov p8.b, p0.b 239; CHECK-NEXT: mov z6.d, z3.d 240; CHECK-NEXT: mov z5.d, z2.d 241; CHECK-NEXT: mov z4.d, z1.d 242; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] 243; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 244; CHECK-NEXT: addvl sp, sp, #1 245; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 246; CHECK-NEXT: ret 247 call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 248 ret void 249} 250 251define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 252; CHECK-LABEL: st1_x4_bf16: 253; CHECK: // %bb.0: 254; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 255; CHECK-NEXT: addvl sp, sp, #-1 256; CHECK-NEXT: mov z7.d, z4.d 257; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 258; CHECK-NEXT: mov p8.b, p0.b 259; CHECK-NEXT: mov z6.d, z3.d 260; CHECK-NEXT: mov z5.d, z2.d 261; CHECK-NEXT: mov z4.d, z1.d 262; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] 263; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 264; CHECK-NEXT: addvl sp, sp, #1 265; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 266; CHECK-NEXT: ret 267 call void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 268 ret void 269} 270 271define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 272; CHECK-LABEL: st1_x4_f32: 273; CHECK: // %bb.0: 274; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 275; CHECK-NEXT: addvl sp, sp, #-1 276; CHECK-NEXT: mov z7.d, z4.d 277; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 278; CHECK-NEXT: mov p8.b, p0.b 279; CHECK-NEXT: mov z6.d, z3.d 280; CHECK-NEXT: mov z5.d, z2.d 281; CHECK-NEXT: mov z4.d, z1.d 282; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] 283; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 284; CHECK-NEXT: addvl sp, sp, #1 285; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 286; CHECK-NEXT: ret 287 call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 288 ret void 289} 290 291define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 292; CHECK-LABEL: st1_x4_f64: 293; CHECK: // %bb.0: 294; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 295; CHECK-NEXT: addvl sp, sp, #-1 296; CHECK-NEXT: mov z7.d, z4.d 297; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 298; CHECK-NEXT: mov p8.b, p0.b 299; CHECK-NEXT: mov z6.d, z3.d 300; CHECK-NEXT: mov z5.d, z2.d 301; CHECK-NEXT: mov z4.d, z1.d 302; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] 303; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 304; CHECK-NEXT: addvl sp, sp, #1 305; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 306; CHECK-NEXT: ret 307 call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 308 ret void 309} 310 311; == Non-temporal Multi-Vector Consecutive Stores == 312 313define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 314; CHECK-LABEL: stnt1_x2_i8: 315; CHECK: // %bb.0: 316; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 317; CHECK-NEXT: addvl sp, sp, #-1 318; CHECK-NEXT: mov z3.d, z2.d 319; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 320; CHECK-NEXT: mov p8.b, p0.b 321; CHECK-NEXT: mov z2.d, z1.d 322; CHECK-NEXT: stnt1b { z2.b, z3.b }, pn8, [x0] 323; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 324; CHECK-NEXT: addvl sp, sp, #1 325; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 326; CHECK-NEXT: ret 327 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 328 ret void 329} 330 331define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 332; CHECK-LABEL: stnt1_x2_i16: 333; CHECK: // %bb.0: 334; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 335; CHECK-NEXT: addvl sp, sp, #-1 336; CHECK-NEXT: mov z3.d, z2.d 337; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 338; CHECK-NEXT: mov p8.b, p0.b 339; CHECK-NEXT: mov z2.d, z1.d 340; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] 341; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 342; CHECK-NEXT: addvl sp, sp, #1 343; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 344; CHECK-NEXT: ret 345 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 346 ret void 347} 348 349define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 350; CHECK-LABEL: stnt1_x2_i32: 351; CHECK: // %bb.0: 352; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 353; CHECK-NEXT: addvl sp, sp, #-1 354; CHECK-NEXT: mov z3.d, z2.d 355; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 356; CHECK-NEXT: mov p8.b, p0.b 357; CHECK-NEXT: mov z2.d, z1.d 358; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] 359; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 360; CHECK-NEXT: addvl sp, sp, #1 361; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 362; CHECK-NEXT: ret 363 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 364 ret void 365} 366 367define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 368; CHECK-LABEL: stnt1_x2_i64: 369; CHECK: // %bb.0: 370; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 371; CHECK-NEXT: addvl sp, sp, #-1 372; CHECK-NEXT: mov z3.d, z2.d 373; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 374; CHECK-NEXT: mov p8.b, p0.b 375; CHECK-NEXT: mov z2.d, z1.d 376; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] 377; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 378; CHECK-NEXT: addvl sp, sp, #1 379; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 380; CHECK-NEXT: ret 381 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 382 ret void 383} 384 385define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 386; CHECK-LABEL: stnt1_x2_f16: 387; CHECK: // %bb.0: 388; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 389; CHECK-NEXT: addvl sp, sp, #-1 390; CHECK-NEXT: mov z3.d, z2.d 391; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 392; CHECK-NEXT: mov p8.b, p0.b 393; CHECK-NEXT: mov z2.d, z1.d 394; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] 395; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 396; CHECK-NEXT: addvl sp, sp, #1 397; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 398; CHECK-NEXT: ret 399 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 400 ret void 401} 402 403define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 404; CHECK-LABEL: stnt1_x2_bf16: 405; CHECK: // %bb.0: 406; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 407; CHECK-NEXT: addvl sp, sp, #-1 408; CHECK-NEXT: mov z3.d, z2.d 409; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 410; CHECK-NEXT: mov p8.b, p0.b 411; CHECK-NEXT: mov z2.d, z1.d 412; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] 413; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 414; CHECK-NEXT: addvl sp, sp, #1 415; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 416; CHECK-NEXT: ret 417 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 418 ret void 419} 420 421define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 422; CHECK-LABEL: stnt1_x2_f32: 423; CHECK: // %bb.0: 424; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 425; CHECK-NEXT: addvl sp, sp, #-1 426; CHECK-NEXT: mov z3.d, z2.d 427; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 428; CHECK-NEXT: mov p8.b, p0.b 429; CHECK-NEXT: mov z2.d, z1.d 430; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] 431; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 432; CHECK-NEXT: addvl sp, sp, #1 433; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 434; CHECK-NEXT: ret 435 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 436 ret void 437} 438 439define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 440; CHECK-LABEL: stnt1_x2_f64: 441; CHECK: // %bb.0: 442; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 443; CHECK-NEXT: addvl sp, sp, #-1 444; CHECK-NEXT: mov z3.d, z2.d 445; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 446; CHECK-NEXT: mov p8.b, p0.b 447; CHECK-NEXT: mov z2.d, z1.d 448; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] 449; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 450; CHECK-NEXT: addvl sp, sp, #1 451; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 452; CHECK-NEXT: ret 453 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr); 454 ret void 455} 456 457define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 458; CHECK-LABEL: stnt1_x4_i8: 459; CHECK: // %bb.0: 460; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 461; CHECK-NEXT: addvl sp, sp, #-1 462; CHECK-NEXT: mov z7.d, z4.d 463; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 464; CHECK-NEXT: mov p8.b, p0.b 465; CHECK-NEXT: mov z6.d, z3.d 466; CHECK-NEXT: mov z5.d, z2.d 467; CHECK-NEXT: mov z4.d, z1.d 468; CHECK-NEXT: stnt1b { z4.b - z7.b }, pn8, [x0] 469; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 470; CHECK-NEXT: addvl sp, sp, #1 471; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 472; CHECK-NEXT: ret 473 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 474 ret void 475} 476 477define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 478; CHECK-LABEL: stnt1_x4_i16: 479; CHECK: // %bb.0: 480; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 481; CHECK-NEXT: addvl sp, sp, #-1 482; CHECK-NEXT: mov z7.d, z4.d 483; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 484; CHECK-NEXT: mov p8.b, p0.b 485; CHECK-NEXT: mov z6.d, z3.d 486; CHECK-NEXT: mov z5.d, z2.d 487; CHECK-NEXT: mov z4.d, z1.d 488; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] 489; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 490; CHECK-NEXT: addvl sp, sp, #1 491; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 492; CHECK-NEXT: ret 493 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 494 ret void 495} 496 497define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 498; CHECK-LABEL: stnt1_x4_i32: 499; CHECK: // %bb.0: 500; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 501; CHECK-NEXT: addvl sp, sp, #-1 502; CHECK-NEXT: mov z7.d, z4.d 503; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 504; CHECK-NEXT: mov p8.b, p0.b 505; CHECK-NEXT: mov z6.d, z3.d 506; CHECK-NEXT: mov z5.d, z2.d 507; CHECK-NEXT: mov z4.d, z1.d 508; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] 509; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 510; CHECK-NEXT: addvl sp, sp, #1 511; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 512; CHECK-NEXT: ret 513 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 514 ret void 515} 516 517define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 518; CHECK-LABEL: stnt1_x4_i64: 519; CHECK: // %bb.0: 520; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 521; CHECK-NEXT: addvl sp, sp, #-1 522; CHECK-NEXT: mov z7.d, z4.d 523; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 524; CHECK-NEXT: mov p8.b, p0.b 525; CHECK-NEXT: mov z6.d, z3.d 526; CHECK-NEXT: mov z5.d, z2.d 527; CHECK-NEXT: mov z4.d, z1.d 528; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] 529; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 530; CHECK-NEXT: addvl sp, sp, #1 531; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 532; CHECK-NEXT: ret 533 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 534 ret void 535} 536 537define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 538; CHECK-LABEL: stnt1_x4_f16: 539; CHECK: // %bb.0: 540; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 541; CHECK-NEXT: addvl sp, sp, #-1 542; CHECK-NEXT: mov z7.d, z4.d 543; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 544; CHECK-NEXT: mov p8.b, p0.b 545; CHECK-NEXT: mov z6.d, z3.d 546; CHECK-NEXT: mov z5.d, z2.d 547; CHECK-NEXT: mov z4.d, z1.d 548; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] 549; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 550; CHECK-NEXT: addvl sp, sp, #1 551; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 552; CHECK-NEXT: ret 553 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 554 ret void 555} 556 557define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 558; CHECK-LABEL: stnt1_x4_bf16: 559; CHECK: // %bb.0: 560; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 561; CHECK-NEXT: addvl sp, sp, #-1 562; CHECK-NEXT: mov z7.d, z4.d 563; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 564; CHECK-NEXT: mov p8.b, p0.b 565; CHECK-NEXT: mov z6.d, z3.d 566; CHECK-NEXT: mov z5.d, z2.d 567; CHECK-NEXT: mov z4.d, z1.d 568; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] 569; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 570; CHECK-NEXT: addvl sp, sp, #1 571; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 572; CHECK-NEXT: ret 573 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 574 ret void 575} 576 577define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 578; CHECK-LABEL: stnt1_x4_f32: 579; CHECK: // %bb.0: 580; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 581; CHECK-NEXT: addvl sp, sp, #-1 582; CHECK-NEXT: mov z7.d, z4.d 583; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 584; CHECK-NEXT: mov p8.b, p0.b 585; CHECK-NEXT: mov z6.d, z3.d 586; CHECK-NEXT: mov z5.d, z2.d 587; CHECK-NEXT: mov z4.d, z1.d 588; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] 589; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 590; CHECK-NEXT: addvl sp, sp, #1 591; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 592; CHECK-NEXT: ret 593 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 594 ret void 595} 596 597define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { 598; CHECK-LABEL: stnt1_x4_f64: 599; CHECK: // %bb.0: 600; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 601; CHECK-NEXT: addvl sp, sp, #-1 602; CHECK-NEXT: mov z7.d, z4.d 603; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 604; CHECK-NEXT: mov p8.b, p0.b 605; CHECK-NEXT: mov z6.d, z3.d 606; CHECK-NEXT: mov z5.d, z2.d 607; CHECK-NEXT: mov z4.d, z1.d 608; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] 609; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 610; CHECK-NEXT: addvl sp, sp, #1 611; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 612; CHECK-NEXT: ret 613 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr); 614 ret void 615} 616 617declare void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) 618declare void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr) 619declare void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr) 620declare void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr) 621declare void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr) 622declare void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr) 623declare void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr) 624declare void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr) 625declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) 626declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr) 627declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr) 628declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr) 629declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr) 630declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr) 631declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr) 632declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr) 633 634 635declare void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) 636declare void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr) 637declare void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr) 638declare void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr) 639declare void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr) 640declare void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr) 641declare void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr) 642declare void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr) 643declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) 644declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr) 645declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr) 646declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr) 647declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr) 648declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr) 649declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr) 650declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr) 651