1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s 3 4; 5; ST2Q 6; 7define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) { 8; CHECK-LABEL: st2q_ss_i8: 9; CHECK: // %bb.0: 10; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 11; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 12; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 13; CHECK-NEXT: ret 14 %1 = getelementptr i128, ptr %addr, i64 %offset 15 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>%v0, <vscale x 16 x i8> %v1 , 16 <vscale x 16 x i1> %pred, 17 ptr %1) 18 ret void 19} 20 21define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 22; CHECK-LABEL: st2q_ss_i16: 23; CHECK: // %bb.0: 24; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 25; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 26; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 27; CHECK-NEXT: ret 28 %1 = getelementptr i128, ptr %addr, i64 %offset 29 call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0, 30 <vscale x 8 x i16> %v1, 31 <vscale x 8 x i1> %pred, 32 ptr %1) 33 ret void 34} 35 36define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 37; CHECK-LABEL: st2q_ss_i32: 38; CHECK: // %bb.0: 39; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 40; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 41; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 42; CHECK-NEXT: ret 43 %1 = getelementptr i128, ptr %addr, i64 %offset 44 call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0, 45 <vscale x 4 x i32> %v1, 46 <vscale x 4 x i1> %pred, 47 ptr %1) 48 ret void 49} 50 51define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 52; CHECK-LABEL: st2q_ss_i64: 53; CHECK: // %bb.0: 54; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 55; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 56; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 57; CHECK-NEXT: ret 58 %1 = getelementptr i128, ptr %addr, i64 %offset 59 call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0, 60 <vscale x 2 x i64> %v1, 61 <vscale x 2 x i1> %pred, 62 ptr %1) 63 ret void 64} 65 66define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 67; CHECK-LABEL: st2q_ss_f16: 68; CHECK: // %bb.0: 69; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 70; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 71; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 72; CHECK-NEXT: ret 73 %1 = getelementptr i128, ptr %addr, i64 %offset 74 call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0, 75 <vscale x 8 x half> %v1, 76 <vscale x 8 x i1> %pred, 77 ptr %1) 78 ret void 79} 80 81define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 82; CHECK-LABEL: st2q_ss_f32: 83; CHECK: // %bb.0: 84; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 85; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 86; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 87; CHECK-NEXT: ret 88 %1 = getelementptr i128, ptr %addr, i64 %offset 89 call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0, 90 <vscale x 4 x float> %v1, 91 <vscale x 4 x i1> %pred, 92 ptr %1) 93 ret void 94} 95 96define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 97; CHECK-LABEL: st2q_ss_f64: 98; CHECK: // %bb.0: 99; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 100; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 101; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 102; CHECK-NEXT: ret 103 %1 = getelementptr i128, ptr %addr, i64 %offset 104 call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0, 105 <vscale x 2 x double> %v1, 106 <vscale x 2 x i1> %pred, 107 ptr %1) 108 ret void 109} 110 111define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 112; CHECK-LABEL: st2q_ss_bf16: 113; CHECK: // %bb.0: 114; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 115; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 116; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] 117; CHECK-NEXT: ret 118 %1 = getelementptr i128, ptr %addr, i64 %offset 119 call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0, 120 <vscale x 8 x bfloat> %v1, 121 <vscale x 8 x i1> %pred, 122 ptr %1) 123 ret void 124} 125 126 127define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) { 128; CHECK-LABEL: st2q_si_i8_off16: 129; CHECK: // %bb.0: 130; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 131; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 132; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl] 133; CHECK-NEXT: ret 134 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16 135 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0, 136 <vscale x 16 x i8> %v1, 137 <vscale x 16 x i1> %pred, 138 ptr %base) 139 ret void 140} 141 142define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) { 143; CHECK-LABEL: st2q_si_i8_off14: 144; CHECK: // %bb.0: 145; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 146; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 147; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 148; CHECK-NEXT: ret 149 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14 150 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0, 151 <vscale x 16 x i8> %v1, 152 <vscale x 16 x i1> %pred, 153 ptr %base) 154 ret void 155} 156 157define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) { 158; CHECK-LABEL: st2q_si_i16: 159; CHECK: // %bb.0: 160; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 161; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 162; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 163; CHECK-NEXT: ret 164 %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14 165 call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0, 166 <vscale x 8 x i16> %v1, 167 <vscale x 8 x i1> %pred, 168 ptr %gep) 169 ret void 170} 171 172define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) { 173; CHECK-LABEL: st2q_si_i32: 174; CHECK: // %bb.0: 175; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 176; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 177; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 178; CHECK-NEXT: ret 179 %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14 180 call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0, 181 <vscale x 4 x i32> %v1, 182 <vscale x 4 x i1> %pred, 183 ptr %gep) 184 ret void 185} 186 187define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) { 188; CHECK-LABEL: st2q_si_i64: 189; CHECK: // %bb.0: 190; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 191; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 192; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 193; CHECK-NEXT: ret 194 %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14 195 call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0, 196 <vscale x 2 x i64> %v1, 197 <vscale x 2 x i1> %pred, 198 ptr %gep) 199 ret void 200} 201 202define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) { 203; CHECK-LABEL: st2q_si_f16: 204; CHECK: // %bb.0: 205; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 206; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 207; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 208; CHECK-NEXT: ret 209 %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14 210 call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0, 211 <vscale x 8 x half> %v1, 212 <vscale x 8 x i1> %pred, 213 ptr %gep) 214 ret void 215} 216 217define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) { 218; CHECK-LABEL: st2q_si_f32: 219; CHECK: // %bb.0: 220; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 221; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 222; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 223; CHECK-NEXT: ret 224 %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14 225 call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0, 226 <vscale x 4 x float> %v1, 227 <vscale x 4 x i1> %pred, 228 ptr %gep) 229 ret void 230} 231 232define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) { 233; CHECK-LABEL: st2q_si_f64: 234; CHECK: // %bb.0: 235; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 236; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 237; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 238; CHECK-NEXT: ret 239 %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14 240 call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0, 241 <vscale x 2 x double> %v1, 242 <vscale x 2 x i1> %pred, 243 ptr %gep) 244 ret void 245} 246 247define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) { 248; CHECK-LABEL: st2q_si_bf16: 249; CHECK: // %bb.0: 250; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 251; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 252; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] 253; CHECK-NEXT: ret 254 %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14 255 call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0, 256 <vscale x 8 x bfloat> %v1, 257 <vscale x 8 x i1> %pred, 258 ptr %gep) 259 ret void 260} 261 262 263; 264; ST3Q 265; 266define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) { 267; CHECK-LABEL: st3q_ss_i8: 268; CHECK: // %bb.0: 269; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 270; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 271; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 272; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 273; CHECK-NEXT: ret 274 %1 = getelementptr i128, ptr %addr, i64 %offset 275 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>%v0, 276 <vscale x 16 x i8> %v1, 277 <vscale x 16 x i8> %v2, 278 <vscale x 16 x i1> %pred, 279 ptr %1) 280 ret void 281} 282 283define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 284; CHECK-LABEL: st3q_ss_i16: 285; CHECK: // %bb.0: 286; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 287; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 288; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 289; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 290; CHECK-NEXT: ret 291 %1 = getelementptr i128, ptr %addr, i64 %offset 292 call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0, 293 <vscale x 8 x i16> %v1, 294 <vscale x 8 x i16> %v2, 295 <vscale x 8 x i1> %pred, 296 ptr %1) 297 ret void 298} 299 300define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 301; CHECK-LABEL: st3q_ss_i32: 302; CHECK: // %bb.0: 303; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 304; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 305; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 306; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 307; CHECK-NEXT: ret 308 %1 = getelementptr i128, ptr %addr, i64 %offset 309 call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0, 310 <vscale x 4 x i32> %v1, 311 <vscale x 4 x i32> %v2, 312 <vscale x 4 x i1> %pred, 313 ptr %1) 314 ret void 315} 316 317define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 318; CHECK-LABEL: st3q_ss_i64: 319; CHECK: // %bb.0: 320; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 321; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 322; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 323; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 324; CHECK-NEXT: ret 325 %1 = getelementptr i128, ptr %addr, i64 %offset 326 call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0, 327 <vscale x 2 x i64> %v1, 328 <vscale x 2 x i64> %v2, 329 <vscale x 2 x i1> %pred, 330 ptr %1) 331 ret void 332} 333 334define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 335; CHECK-LABEL: st3q_ss_f16: 336; CHECK: // %bb.0: 337; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 338; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 339; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 340; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 341; CHECK-NEXT: ret 342 %1 = getelementptr i128, ptr %addr, i64 %offset 343 call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0, 344 <vscale x 8 x half> %v1, 345 <vscale x 8 x half> %v2, 346 <vscale x 8 x i1> %pred, 347 ptr %1) 348 ret void 349} 350 351define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 352; CHECK-LABEL: st3q_ss_f32: 353; CHECK: // %bb.0: 354; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 355; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 356; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 357; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 358; CHECK-NEXT: ret 359 %1 = getelementptr i128, ptr %addr, i64 %offset 360 call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0, 361 <vscale x 4 x float> %v1, 362 <vscale x 4 x float> %v2, 363 <vscale x 4 x i1> %pred, 364 ptr %1) 365 ret void 366} 367 368define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 369; CHECK-LABEL: st3q_ss_f64: 370; CHECK: // %bb.0: 371; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 372; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 373; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 374; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 375; CHECK-NEXT: ret 376 %1 = getelementptr i128, ptr %addr, i64 %offset 377 call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0, 378 <vscale x 2 x double> %v1, 379 <vscale x 2 x double> %v2, 380 <vscale x 2 x i1> %pred, 381 ptr %1) 382 ret void 383} 384 385define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 386; CHECK-LABEL: st3q_ss_bf16: 387; CHECK: // %bb.0: 388; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 389; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 390; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 391; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] 392; CHECK-NEXT: ret 393 %1 = getelementptr i128, ptr %addr, i64 %offset 394 call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0, 395 <vscale x 8 x bfloat> %v1, 396 <vscale x 8 x bfloat> %v2, 397 <vscale x 8 x i1> %pred, 398 ptr %1) 399 ret void 400} 401 402define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) { 403; CHECK-LABEL: st3q_si_i8_off24: 404; CHECK: // %bb.0: 405; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 406; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 407; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 408; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl] 409; CHECK-NEXT: ret 410 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24 411 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0, 412 <vscale x 16 x i8> %v1, 413 <vscale x 16 x i8> %v2, 414 <vscale x 16 x i1> %pred, 415 ptr %base) 416 ret void 417} 418 419define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) { 420; CHECK-LABEL: st3q_si_i8_off21: 421; CHECK: // %bb.0: 422; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 423; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 424; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 425; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 426; CHECK-NEXT: ret 427 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21 428 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0, 429 <vscale x 16 x i8> %v1, 430 <vscale x 16 x i8> %v2, 431 <vscale x 16 x i1> %pred, 432 ptr %base) 433 ret void 434} 435 436define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) { 437; CHECK-LABEL: st3q_si_i16: 438; CHECK: // %bb.0: 439; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 440; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 441; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 442; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 443; CHECK-NEXT: ret 444 %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21 445 call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0, 446 <vscale x 8 x i16> %v1, 447 <vscale x 8 x i16> %v2, 448 <vscale x 8 x i1> %pred, 449 ptr %base) 450 ret void 451} 452 453define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) { 454; CHECK-LABEL: st3q_si_i32: 455; CHECK: // %bb.0: 456; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 457; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 458; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 459; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 460; CHECK-NEXT: ret 461 %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21 462 call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0, 463 <vscale x 4 x i32> %v1, 464 <vscale x 4 x i32> %v2, 465 <vscale x 4 x i1> %pred, 466 ptr %base) 467 ret void 468} 469 470define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) { 471; CHECK-LABEL: st3q_si_i64: 472; CHECK: // %bb.0: 473; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 474; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 475; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 476; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 477; CHECK-NEXT: ret 478 %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21 479 call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0, 480 <vscale x 2 x i64> %v1, 481 <vscale x 2 x i64> %v2, 482 <vscale x 2 x i1> %pred, 483 ptr %base) 484 ret void 485} 486 487define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) { 488; CHECK-LABEL: st3q_si_f16: 489; CHECK: // %bb.0: 490; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 491; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 492; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 493; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 494; CHECK-NEXT: ret 495 %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21 496 call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0, 497 <vscale x 8 x half> %v1, 498 <vscale x 8 x half> %v2, 499 <vscale x 8 x i1> %pred, 500 ptr %base) 501 ret void 502} 503 504define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) { 505; CHECK-LABEL: st3q_si_f32: 506; CHECK: // %bb.0: 507; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 508; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 509; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 510; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 511; CHECK-NEXT: ret 512 %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21 513 call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0, 514 <vscale x 4 x float> %v1, 515 <vscale x 4 x float> %v2, 516 <vscale x 4 x i1> %pred, 517 ptr %base) 518 ret void 519} 520 521define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) { 522; CHECK-LABEL: st3q_si_f64: 523; CHECK: // %bb.0: 524; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 525; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 526; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 527; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 528; CHECK-NEXT: ret 529 %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21 530 call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0, 531 <vscale x 2 x double> %v1, 532 <vscale x 2 x double> %v2, 533 <vscale x 2 x i1> %pred, 534 ptr %base) 535 ret void 536} 537 538define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) { 539; CHECK-LABEL: st3q_si_bf16: 540; CHECK: // %bb.0: 541; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 542; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 543; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 544; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] 545; CHECK-NEXT: ret 546 %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21 547 call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0, 548 <vscale x 8 x bfloat> %v1, 549 <vscale x 8 x bfloat> %v2, 550 <vscale x 8 x i1> %pred, 551 ptr %base) 552 ret void 553} 554 555; 556; ST4Q 557; 558define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) { 559; CHECK-LABEL: st4q_ss_i8: 560; CHECK: // %bb.0: 561; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 562; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 563; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 564; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 565; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 566; CHECK-NEXT: ret 567 %1 = getelementptr i128, ptr %addr, i64 %offset 568 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>%v0, 569 <vscale x 16 x i8> %v1, 570 <vscale x 16 x i8> %v2, 571 <vscale x 16 x i8> %v3, 572 <vscale x 16 x i1> %pred, 573 ptr %1) 574 ret void 575} 576 577define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 578; CHECK-LABEL: st4q_ss_i16: 579; CHECK: // %bb.0: 580; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 581; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 582; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 583; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 584; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 585; CHECK-NEXT: ret 586 %1 = getelementptr i128, ptr %addr, i64 %offset 587 call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0, 588 <vscale x 8 x i16> %v1, 589 <vscale x 8 x i16> %v2, 590 <vscale x 8 x i16> %v3, 591 <vscale x 8 x i1> %pred, 592 ptr %1) 593 ret void 594} 595 596define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 597; CHECK-LABEL: st4q_ss_i32: 598; CHECK: // %bb.0: 599; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 600; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 601; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 602; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 603; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 604; CHECK-NEXT: ret 605 %1 = getelementptr i128, ptr %addr, i64 %offset 606 call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0, 607 <vscale x 4 x i32> %v1, 608 <vscale x 4 x i32> %v2, 609 <vscale x 4 x i32> %v3, 610 <vscale x 4 x i1> %pred, 611 ptr %1) 612 ret void 613} 614 615define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 616; CHECK-LABEL: st4q_ss_i64: 617; CHECK: // %bb.0: 618; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 619; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 620; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 621; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 622; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 623; CHECK-NEXT: ret 624 %1 = getelementptr i128, ptr %addr, i64 %offset 625 call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0, 626 <vscale x 2 x i64> %v1, 627 <vscale x 2 x i64> %v2, 628 <vscale x 2 x i64> %v3, 629 <vscale x 2 x i1> %pred, 630 ptr %1) 631 ret void 632} 633 634define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 635; CHECK-LABEL: st4q_ss_f16: 636; CHECK: // %bb.0: 637; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 638; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 639; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 640; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 641; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 642; CHECK-NEXT: ret 643 %1 = getelementptr i128, ptr %addr, i64 %offset 644 call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0, 645 <vscale x 8 x half> %v1, 646 <vscale x 8 x half> %v2, 647 <vscale x 8 x half> %v3, 648 <vscale x 8 x i1> %pred, 649 ptr %1) 650 ret void 651} 652 653define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) { 654; CHECK-LABEL: st4q_ss_f32: 655; CHECK: // %bb.0: 656; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 657; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 658; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 659; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 660; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 661; CHECK-NEXT: ret 662 %1 = getelementptr i128, ptr %addr, i64 %offset 663 call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0, 664 <vscale x 4 x float> %v1, 665 <vscale x 4 x float> %v2, 666 <vscale x 4 x float> %v3, 667 <vscale x 4 x i1> %pred, 668 ptr %1) 669 ret void 670} 671 672define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) { 673; CHECK-LABEL: st4q_ss_f64: 674; CHECK: // %bb.0: 675; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 676; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 677; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 678; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 679; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 680; CHECK-NEXT: ret 681 %1 = getelementptr i128, ptr %addr, i64 %offset 682 call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0, 683 <vscale x 2 x double> %v1, 684 <vscale x 2 x double> %v2, 685 <vscale x 2 x double> %v3, 686 <vscale x 2 x i1> %pred, 687 ptr %1) 688 ret void 689} 690 691define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) { 692; CHECK-LABEL: st4q_ss_bf16: 693; CHECK: // %bb.0: 694; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 695; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 696; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 697; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 698; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] 699; CHECK-NEXT: ret 700 %1 = getelementptr i128, ptr %addr, i64 %offset 701 call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0, 702 <vscale x 8 x bfloat> %v1, 703 <vscale x 8 x bfloat> %v2, 704 <vscale x 8 x bfloat> %v3, 705 <vscale x 8 x i1> %pred, 706 ptr %1) 707 ret void 708} 709 710define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) { 711; CHECK-LABEL: st4q_si_i8_off32: 712; CHECK: // %bb.0: 713; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 714; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 715; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 716; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 717; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl] 718; CHECK-NEXT: ret 719 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32 720 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0, 721 <vscale x 16 x i8> %v1, 722 <vscale x 16 x i8> %v2, 723 <vscale x 16 x i8> %v3, 724 <vscale x 16 x i1> %pred, 725 ptr %base) 726 ret void 727} 728 729define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) { 730; CHECK-LABEL: st4q_si_i8_off28: 731; CHECK: // %bb.0: 732; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 733; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 734; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 735; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 736; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 737; CHECK-NEXT: ret 738 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28 739 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0, 740 <vscale x 16 x i8> %v1, 741 <vscale x 16 x i8> %v2, 742 <vscale x 16 x i8> %v3, 743 <vscale x 16 x i1> %pred, 744 ptr %base) 745 ret void 746} 747 748define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) { 749; CHECK-LABEL: st4q_si_i16: 750; CHECK: // %bb.0: 751; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 752; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 753; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 754; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 755; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 756; CHECK-NEXT: ret 757 %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28 758 call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0, 759 <vscale x 8 x i16> %v1, 760 <vscale x 8 x i16> %v2, 761 <vscale x 8 x i16> %v3, 762 <vscale x 8 x i1> %pred, 763 ptr %base) 764 ret void 765} 766 767define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) { 768; CHECK-LABEL: st4q_si_i32: 769; CHECK: // %bb.0: 770; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 771; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 772; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 773; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 774; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 775; CHECK-NEXT: ret 776 %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28 777 call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0, 778 <vscale x 4 x i32> %v1, 779 <vscale x 4 x i32> %v2, 780 <vscale x 4 x i32> %v3, 781 <vscale x 4 x i1> %pred, 782 ptr %base1) 783 ret void 784} 785 786define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) { 787; CHECK-LABEL: st4q_si_i64: 788; CHECK: // %bb.0: 789; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 790; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 791; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 792; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 793; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 794; CHECK-NEXT: ret 795 %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28 796 call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0, 797 <vscale x 2 x i64> %v1, 798 <vscale x 2 x i64> %v2, 799 <vscale x 2 x i64> %v3, 800 <vscale x 2 x i1> %pred, 801 ptr %base) 802 ret void 803} 804 805define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) { 806; CHECK-LABEL: st4q_si_f16: 807; CHECK: // %bb.0: 808; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 809; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 810; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 811; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 812; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 813; CHECK-NEXT: ret 814 %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28 815 call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0, 816 <vscale x 8 x half> %v1, 817 <vscale x 8 x half> %v2, 818 <vscale x 8 x half> %v3, 819 <vscale x 8 x i1> %pred, 820 ptr %base) 821 ret void 822} 823 824define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) { 825; CHECK-LABEL: st4q_si_f32: 826; CHECK: // %bb.0: 827; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 828; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 829; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 830; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 831; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 832; CHECK-NEXT: ret 833 %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28 834 call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0, 835 <vscale x 4 x float> %v1, 836 <vscale x 4 x float> %v2, 837 <vscale x 4 x float> %v3, 838 <vscale x 4 x i1> %pred, 839 ptr %base) 840 ret void 841} 842 843define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) { 844; CHECK-LABEL: st4q_si_f64: 845; CHECK: // %bb.0: 846; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 847; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 848; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 849; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 850; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 851; CHECK-NEXT: ret 852 %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28 853 call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0, 854 <vscale x 2 x double> %v1, 855 <vscale x 2 x double> %v2, 856 <vscale x 2 x double> %v3, 857 <vscale x 2 x i1> %pred, 858 ptr %base) 859 ret void 860} 861 862define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) { 863; CHECK-LABEL: st4q_si_bf16: 864; CHECK: // %bb.0: 865; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 866; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 867; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 868; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 869; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] 870; CHECK-NEXT: ret 871 %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28 872 call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0, 873 <vscale x 8 x bfloat> %v1, 874 <vscale x 8 x bfloat> %v2, 875 <vscale x 8 x bfloat> %v3, 876 <vscale x 8 x i1> %pred, 877 ptr %base) 878 ret void 879} 880 881 882declare void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr) 883declare void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr) 884declare void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr) 885declare void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr) 886 887declare void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr) 888declare void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr) 889declare void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr) 890declare void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr) 891 892declare void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i1>, ptr) 893declare void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr) 894declare void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr) 895declare void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr) 896 897declare void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr) 898declare void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr) 899declare void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr) 900declare void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr) 901 902declare void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i1>, ptr) 903declare void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr) 904declare void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i1>, ptr) 905declare void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr) 906 907declare void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr) 908declare void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr) 909declare void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr) 910declare void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr) 911