1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ 3; RUN: -verify-machineinstrs < %s \ 4; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ 6; RUN: -verify-machineinstrs < %s \ 7; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ 9; RUN: -verify-machineinstrs < %s \ 10; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ 12; RUN: -verify-machineinstrs < %s \ 13; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 14 15declare void @llvm.experimental.vp.strided.store.v2i8.p0.i8(<2 x i8>, ptr, i8, <2 x i1>, i32) 16 17define void @strided_vpstore_v2i8_i8(<2 x i8> %val, ptr %ptr, i8 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: strided_vpstore_v2i8_i8: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 21; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t 22; CHECK-NEXT: ret 23 call void @llvm.experimental.vp.strided.store.v2i8.p0.i8(<2 x i8> %val, ptr %ptr, i8 %stride, <2 x i1> %m, i32 %evl) 24 ret void 25} 26 27declare void @llvm.experimental.vp.strided.store.v2i8.p0.i16(<2 x i8>, ptr, i16, <2 x i1>, i32) 28 29define void @strided_vpstore_v2i8_i16(<2 x i8> %val, ptr %ptr, i16 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 30; CHECK-LABEL: strided_vpstore_v2i8_i16: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 33; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t 34; CHECK-NEXT: ret 35 call void @llvm.experimental.vp.strided.store.v2i8.p0.i16(<2 x i8> %val, ptr %ptr, i16 %stride, <2 x i1> %m, i32 %evl) 36 ret void 37} 38 39declare void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8>, ptr, i64, <2 x i1>, i32) 40 41define void @strided_vpstore_v2i8_i64(<2 x i8> %val, ptr %ptr, i64 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 42; CHECK-RV32-LABEL: strided_vpstore_v2i8_i64: 43; CHECK-RV32: # %bb.0: 44; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma 45; CHECK-RV32-NEXT: vsse8.v v8, (a0), a1, v0.t 46; CHECK-RV32-NEXT: ret 47; 48; CHECK-RV64-LABEL: strided_vpstore_v2i8_i64: 49; CHECK-RV64: # %bb.0: 50; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 51; CHECK-RV64-NEXT: vsse8.v v8, (a0), a1, v0.t 52; CHECK-RV64-NEXT: ret 53 call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %val, ptr %ptr, i64 %stride, <2 x i1> %m, i32 %evl) 54 ret void 55} 56 57declare void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8>, ptr, i32, <2 x i1>, i32) 58 59define void @strided_vpstore_v2i8(<2 x i8> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 60; CHECK-LABEL: strided_vpstore_v2i8: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 63; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t 64; CHECK-NEXT: ret 65 call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 66 ret void 67} 68 69declare void @llvm.experimental.vp.strided.store.v4i8.p0.i32(<4 x i8>, ptr, i32, <4 x i1>, i32) 70 71define void @strided_vpstore_v4i8(<4 x i8> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 72; CHECK-LABEL: strided_vpstore_v4i8: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma 75; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t 76; CHECK-NEXT: ret 77 call void @llvm.experimental.vp.strided.store.v4i8.p0.i32(<4 x i8> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 78 ret void 79} 80 81declare void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8>, ptr, i32, <8 x i1>, i32) 82 83define void @strided_vpstore_v8i8(<8 x i8> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 84; CHECK-LABEL: strided_vpstore_v8i8: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma 87; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t 88; CHECK-NEXT: ret 89 call void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 90 ret void 91} 92 93define void @strided_vpstore_v8i8_unit_stride(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 94; CHECK-LABEL: strided_vpstore_v8i8_unit_stride: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 97; CHECK-NEXT: vse8.v v8, (a0), v0.t 98; CHECK-NEXT: ret 99 call void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8> %val, ptr %ptr, i32 1, <8 x i1> %m, i32 %evl) 100 ret void 101} 102 103declare void @llvm.experimental.vp.strided.store.v2i16.p0.i32(<2 x i16>, ptr, i32, <2 x i1>, i32) 104 105define void @strided_vpstore_v2i16(<2 x i16> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 106; CHECK-LABEL: strided_vpstore_v2i16: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 109; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 110; CHECK-NEXT: ret 111 call void @llvm.experimental.vp.strided.store.v2i16.p0.i32(<2 x i16> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 112 ret void 113} 114 115declare void @llvm.experimental.vp.strided.store.v4i16.p0.i32(<4 x i16>, ptr, i32, <4 x i1>, i32) 116 117define void @strided_vpstore_v4i16(<4 x i16> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 118; CHECK-LABEL: strided_vpstore_v4i16: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 121; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 122; CHECK-NEXT: ret 123 call void @llvm.experimental.vp.strided.store.v4i16.p0.i32(<4 x i16> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 124 ret void 125} 126 127declare void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16>, ptr, i32, <8 x i1>, i32) 128 129define void @strided_vpstore_v8i16(<8 x i16> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 130; CHECK-LABEL: strided_vpstore_v8i16: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 133; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 134; CHECK-NEXT: ret 135 call void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 136 ret void 137} 138 139define void @strided_vpstore_v8i16_unit_stride(<8 x i16> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 140; CHECK-LABEL: strided_vpstore_v8i16_unit_stride: 141; CHECK: # %bb.0: 142; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 143; CHECK-NEXT: vse16.v v8, (a0), v0.t 144; CHECK-NEXT: ret 145 call void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 146 ret void 147} 148 149declare void @llvm.experimental.vp.strided.store.v2i32.p0.i32(<2 x i32>, ptr, i32, <2 x i1>, i32) 150 151define void @strided_vpstore_v2i32(<2 x i32> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 152; CHECK-LABEL: strided_vpstore_v2i32: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma 155; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 156; CHECK-NEXT: ret 157 call void @llvm.experimental.vp.strided.store.v2i32.p0.i32(<2 x i32> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 158 ret void 159} 160 161declare void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32>, ptr, i32, <4 x i1>, i32) 162 163define void @strided_vpstore_v4i32(<4 x i32> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 164; CHECK-LABEL: strided_vpstore_v4i32: 165; CHECK: # %bb.0: 166; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 167; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 168; CHECK-NEXT: ret 169 call void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 170 ret void 171} 172 173define void @strided_vpstore_v4i32_unit_stride(<4 x i32> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 174; CHECK-LABEL: strided_vpstore_v4i32_unit_stride: 175; CHECK: # %bb.0: 176; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 177; CHECK-NEXT: vse32.v v8, (a0), v0.t 178; CHECK-NEXT: ret 179 call void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl) 180 ret void 181} 182 183declare void @llvm.experimental.vp.strided.store.v8i32.p0.i32(<8 x i32>, ptr, i32, <8 x i1>, i32) 184 185define void @strided_vpstore_v8i32(<8 x i32> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 186; CHECK-LABEL: strided_vpstore_v8i32: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 189; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 190; CHECK-NEXT: ret 191 call void @llvm.experimental.vp.strided.store.v8i32.p0.i32(<8 x i32> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 192 ret void 193} 194 195declare void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64>, ptr, i32, <2 x i1>, i32) 196 197define void @strided_vpstore_v2i64(<2 x i64> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 198; CHECK-LABEL: strided_vpstore_v2i64: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma 201; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 202; CHECK-NEXT: ret 203 call void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 204 ret void 205} 206 207define void @strided_vpstore_v2i64_unit_stride(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 208; CHECK-LABEL: strided_vpstore_v2i64_unit_stride: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 211; CHECK-NEXT: vse64.v v8, (a0), v0.t 212; CHECK-NEXT: ret 213 call void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl) 214 ret void 215} 216 217declare void @llvm.experimental.vp.strided.store.v4i64.p0.i32(<4 x i64>, ptr, i32, <4 x i1>, i32) 218 219define void @strided_vpstore_v4i64(<4 x i64> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 220; CHECK-LABEL: strided_vpstore_v4i64: 221; CHECK: # %bb.0: 222; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 223; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 224; CHECK-NEXT: ret 225 call void @llvm.experimental.vp.strided.store.v4i64.p0.i32(<4 x i64> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 226 ret void 227} 228 229declare void @llvm.experimental.vp.strided.store.v8i64.p0.i32(<8 x i64>, ptr, i32, <8 x i1>, i32) 230 231define void @strided_vpstore_v8i64(<8 x i64> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 232; CHECK-LABEL: strided_vpstore_v8i64: 233; CHECK: # %bb.0: 234; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma 235; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 236; CHECK-NEXT: ret 237 call void @llvm.experimental.vp.strided.store.v8i64.p0.i32(<8 x i64> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 238 ret void 239} 240 241declare void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat>, ptr, i32, <2 x i1>, i32) 242 243define void @strided_vpstore_v2bf16(<2 x bfloat> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 244; CHECK-LABEL: strided_vpstore_v2bf16: 245; CHECK: # %bb.0: 246; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 247; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 248; CHECK-NEXT: ret 249 call void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 250 ret void 251} 252 253declare void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat>, ptr, i32, <4 x i1>, i32) 254 255define void @strided_vpstore_v4bf16(<4 x bfloat> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 256; CHECK-LABEL: strided_vpstore_v4bf16: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 259; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 260; CHECK-NEXT: ret 261 call void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 262 ret void 263} 264 265declare void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat>, ptr, i32, <8 x i1>, i32) 266 267define void @strided_vpstore_v8bf16(<8 x bfloat> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 268; CHECK-LABEL: strided_vpstore_v8bf16: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 271; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 272; CHECK-NEXT: ret 273 call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 274 ret void 275} 276 277define void @strided_vpstore_v8bf16_unit_stride(<8 x bfloat> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 278; CHECK-LABEL: strided_vpstore_v8bf16_unit_stride: 279; CHECK: # %bb.0: 280; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 281; CHECK-NEXT: vse16.v v8, (a0), v0.t 282; CHECK-NEXT: ret 283 call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 284 ret void 285} 286 287declare void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half>, ptr, i32, <2 x i1>, i32) 288 289define void @strided_vpstore_v2f16(<2 x half> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 290; CHECK-LABEL: strided_vpstore_v2f16: 291; CHECK: # %bb.0: 292; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 293; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 294; CHECK-NEXT: ret 295 call void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 296 ret void 297} 298 299declare void @llvm.experimental.vp.strided.store.v4f16.p0.i32(<4 x half>, ptr, i32, <4 x i1>, i32) 300 301define void @strided_vpstore_v4f16(<4 x half> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 302; CHECK-LABEL: strided_vpstore_v4f16: 303; CHECK: # %bb.0: 304; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 305; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 306; CHECK-NEXT: ret 307 call void @llvm.experimental.vp.strided.store.v4f16.p0.i32(<4 x half> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 308 ret void 309} 310 311declare void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half>, ptr, i32, <8 x i1>, i32) 312 313define void @strided_vpstore_v8f16(<8 x half> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 314; CHECK-LABEL: strided_vpstore_v8f16: 315; CHECK: # %bb.0: 316; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 317; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t 318; CHECK-NEXT: ret 319 call void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 320 ret void 321} 322 323define void @strided_vpstore_v8f16_unit_stride(<8 x half> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 324; CHECK-LABEL: strided_vpstore_v8f16_unit_stride: 325; CHECK: # %bb.0: 326; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 327; CHECK-NEXT: vse16.v v8, (a0), v0.t 328; CHECK-NEXT: ret 329 call void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 330 ret void 331} 332 333declare void @llvm.experimental.vp.strided.store.v2f32.p0.i32(<2 x float>, ptr, i32, <2 x i1>, i32) 334 335define void @strided_vpstore_v2f32(<2 x float> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 336; CHECK-LABEL: strided_vpstore_v2f32: 337; CHECK: # %bb.0: 338; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma 339; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 340; CHECK-NEXT: ret 341 call void @llvm.experimental.vp.strided.store.v2f32.p0.i32(<2 x float> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 342 ret void 343} 344 345declare void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float>, ptr, i32, <4 x i1>, i32) 346 347define void @strided_vpstore_v4f32(<4 x float> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 348; CHECK-LABEL: strided_vpstore_v4f32: 349; CHECK: # %bb.0: 350; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 351; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 352; CHECK-NEXT: ret 353 call void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 354 ret void 355} 356 357define void @strided_vpstore_v4f32_unit_stride(<4 x float> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 358; CHECK-LABEL: strided_vpstore_v4f32_unit_stride: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 361; CHECK-NEXT: vse32.v v8, (a0), v0.t 362; CHECK-NEXT: ret 363 call void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl) 364 ret void 365} 366 367declare void @llvm.experimental.vp.strided.store.v8f32.p0.i32(<8 x float>, ptr, i32, <8 x i1>, i32) 368 369define void @strided_vpstore_v8f32(<8 x float> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 370; CHECK-LABEL: strided_vpstore_v8f32: 371; CHECK: # %bb.0: 372; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 373; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 374; CHECK-NEXT: ret 375 call void @llvm.experimental.vp.strided.store.v8f32.p0.i32(<8 x float> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 376 ret void 377} 378 379declare void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double>, ptr, i32, <2 x i1>, i32) 380 381define void @strided_vpstore_v2f64(<2 x double> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 382; CHECK-LABEL: strided_vpstore_v2f64: 383; CHECK: # %bb.0: 384; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma 385; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 386; CHECK-NEXT: ret 387 call void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 388 ret void 389} 390 391define void @strided_vpstore_v2f64_unit_stride(<2 x double> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 392; CHECK-LABEL: strided_vpstore_v2f64_unit_stride: 393; CHECK: # %bb.0: 394; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 395; CHECK-NEXT: vse64.v v8, (a0), v0.t 396; CHECK-NEXT: ret 397 call void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl) 398 ret void 399} 400 401declare void @llvm.experimental.vp.strided.store.v4f64.p0.i32(<4 x double>, ptr, i32, <4 x i1>, i32) 402 403define void @strided_vpstore_v4f64(<4 x double> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 404; CHECK-LABEL: strided_vpstore_v4f64: 405; CHECK: # %bb.0: 406; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 407; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 408; CHECK-NEXT: ret 409 call void @llvm.experimental.vp.strided.store.v4f64.p0.i32(<4 x double> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 410 ret void 411} 412 413declare void @llvm.experimental.vp.strided.store.v8f64.p0.i32(<8 x double>, ptr, i32, <8 x i1>, i32) 414 415define void @strided_vpstore_v8f64(<8 x double> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 416; CHECK-LABEL: strided_vpstore_v8f64: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma 419; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 420; CHECK-NEXT: ret 421 call void @llvm.experimental.vp.strided.store.v8f64.p0.i32(<8 x double> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 422 ret void 423} 424 425define void @strided_vpstore_v2i8_allones_mask(<2 x i8> %val, ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 426; CHECK-LABEL: strided_vpstore_v2i8_allones_mask: 427; CHECK: # %bb.0: 428; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 429; CHECK-NEXT: vsse8.v v8, (a0), a1 430; CHECK-NEXT: ret 431 call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) 432 ret void 433} 434 435; Widening 436define void @strided_vpstore_v3f32(<3 x float> %v, ptr %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) { 437; CHECK-LABEL: strided_vpstore_v3f32: 438; CHECK: # %bb.0: 439; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 440; CHECK-NEXT: vsse32.v v8, (a0), a1, v0.t 441; CHECK-NEXT: ret 442 call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> %mask, i32 %evl) 443 ret void 444} 445 446define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 447; CHECK-LABEL: strided_vpstore_v3f32_allones_mask: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 450; CHECK-NEXT: vsse32.v v8, (a0), a1 451; CHECK-NEXT: ret 452 call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl) 453 ret void 454} 455 456declare void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float>, ptr , i32, <3 x i1>, i32) 457 458; Splitting 459define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %stride, <32 x i1> %mask, i32 zeroext %evl) { 460; CHECK-LABEL: strided_store_v32f64: 461; CHECK: # %bb.0: 462; CHECK-NEXT: li a4, 16 463; CHECK-NEXT: mv a3, a2 464; CHECK-NEXT: bltu a2, a4, .LBB38_2 465; CHECK-NEXT: # %bb.1: 466; CHECK-NEXT: li a3, 16 467; CHECK-NEXT: .LBB38_2: 468; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 469; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t 470; CHECK-NEXT: mul a3, a3, a1 471; CHECK-NEXT: add a0, a0, a3 472; CHECK-NEXT: addi a3, a2, -16 473; CHECK-NEXT: sltu a2, a2, a3 474; CHECK-NEXT: addi a2, a2, -1 475; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 476; CHECK-NEXT: vslidedown.vi v0, v0, 2 477; CHECK-NEXT: and a2, a2, a3 478; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 479; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t 480; CHECK-NEXT: ret 481 call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> %mask, i32 %evl) 482 ret void 483} 484 485define void @strided_store_v32f64_allones_mask(<32 x double> %v, ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 486; CHECK-LABEL: strided_store_v32f64_allones_mask: 487; CHECK: # %bb.0: 488; CHECK-NEXT: li a4, 16 489; CHECK-NEXT: mv a3, a2 490; CHECK-NEXT: bltu a2, a4, .LBB39_2 491; CHECK-NEXT: # %bb.1: 492; CHECK-NEXT: li a3, 16 493; CHECK-NEXT: .LBB39_2: 494; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 495; CHECK-NEXT: vsse64.v v8, (a0), a1 496; CHECK-NEXT: mul a3, a3, a1 497; CHECK-NEXT: add a0, a0, a3 498; CHECK-NEXT: addi a3, a2, -16 499; CHECK-NEXT: sltu a2, a2, a3 500; CHECK-NEXT: addi a2, a2, -1 501; CHECK-NEXT: and a2, a2, a3 502; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 503; CHECK-NEXT: vsse64.v v16, (a0), a1 504; CHECK-NEXT: ret 505 call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl) 506 ret void 507} 508 509declare void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double>, ptr, i32, <32 x i1>, i32) 510