1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ 3; RUN: -verify-machineinstrs < %s \ 4; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT 5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ 6; RUN: -verify-machineinstrs < %s \ 7; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT 8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ 9; RUN: -verify-machineinstrs < %s \ 10; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH 11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \ 12; RUN: -verify-machineinstrs < %s \ 13; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH 14; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ 15; RUN: -verify-machineinstrs < %s \ 16; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT 17; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ 18; RUN: -verify-machineinstrs < %s \ 19; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT 20; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ 21; RUN: -verify-machineinstrs < %s \ 22; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN 23; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \ 24; RUN: -verify-machineinstrs < %s \ 25; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN 26 27declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32) 28 29define <2 x i8> @strided_vpload_v2i8_i8(ptr %ptr, i8 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 30; CHECK-LABEL: strided_vpload_v2i8_i8: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 33; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t 34; CHECK-NEXT: ret 35 %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr %ptr, i8 %stride, <2 x i1> %m, i32 %evl) 36 ret <2 x i8> %load 37} 38 39declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i16(ptr, i16, <2 x i1>, i32) 40 41define <2 x i8> @strided_vpload_v2i8_i16(ptr %ptr, i16 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 42; CHECK-LABEL: strided_vpload_v2i8_i16: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 45; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t 46; CHECK-NEXT: ret 47 %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i16(ptr %ptr, i16 %stride, <2 x i1> %m, i32 %evl) 48 ret <2 x i8> %load 49} 50 51declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr, i64, <2 x i1>, i32) 52 53define <2 x i8> @strided_vpload_v2i8_i64(ptr %ptr, i64 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 54; CHECK-RV32-LABEL: strided_vpload_v2i8_i64: 55; CHECK-RV32: # %bb.0: 56; CHECK-RV32-NEXT: vsetvli zero, a3, e8, mf8, ta, ma 57; CHECK-RV32-NEXT: vlse8.v v8, (a0), a1, v0.t 58; CHECK-RV32-NEXT: ret 59; 60; CHECK-RV64-LABEL: strided_vpload_v2i8_i64: 61; CHECK-RV64: # %bb.0: 62; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 63; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1, v0.t 64; CHECK-RV64-NEXT: ret 65 %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %ptr, i64 %stride, <2 x i1> %m, i32 %evl) 66 ret <2 x i8> %load 67} 68 69declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i32(ptr, i32, <2 x i1>, i32) 70 71define <2 x i8> @strided_vpload_v2i8(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 72; CHECK-LABEL: strided_vpload_v2i8: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma 75; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t 76; CHECK-NEXT: ret 77 %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 78 ret <2 x i8> %load 79} 80 81declare <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr, i32, <4 x i1>, i32) 82 83define <4 x i8> @strided_vpload_v4i8(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 84; CHECK-LABEL: strided_vpload_v4i8: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma 87; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t 88; CHECK-NEXT: ret 89 %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 90 ret <4 x i8> %load 91} 92 93define <4 x i8> @strided_vpload_v4i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 94; CHECK-LABEL: strided_vpload_v4i8_allones_mask: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma 97; CHECK-NEXT: vlse8.v v8, (a0), a1 98; CHECK-NEXT: ret 99 %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) 100 ret <4 x i8> %load 101} 102 103declare <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr, i32, <8 x i1>, i32) 104 105define <8 x i8> @strided_vpload_v8i8(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 106; CHECK-LABEL: strided_vpload_v8i8: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma 109; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t 110; CHECK-NEXT: ret 111 %load = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 112 ret <8 x i8> %load 113} 114 115define <8 x i8> @strided_vpload_v8i8_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 116; CHECK-LABEL: strided_vpload_v8i8_unit_stride: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 119; CHECK-NEXT: vle8.v v8, (a0), v0.t 120; CHECK-NEXT: ret 121 %load = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr %ptr, i32 1, <8 x i1> %m, i32 %evl) 122 ret <8 x i8> %load 123} 124 125declare <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i32(ptr, i32, <2 x i1>, i32) 126 127define <2 x i16> @strided_vpload_v2i16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 128; CHECK-LABEL: strided_vpload_v2i16: 129; CHECK: # %bb.0: 130; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 131; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 132; CHECK-NEXT: ret 133 %load = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 134 ret <2 x i16> %load 135} 136 137declare <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i32(ptr, i32, <4 x i1>, i32) 138 139define <4 x i16> @strided_vpload_v4i16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 140; CHECK-LABEL: strided_vpload_v4i16: 141; CHECK: # %bb.0: 142; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 143; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 144; CHECK-NEXT: ret 145 %load = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 146 ret <4 x i16> %load 147} 148 149declare <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr, i32, <8 x i1>, i32) 150 151define <8 x i16> @strided_vpload_v8i16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 152; CHECK-LABEL: strided_vpload_v8i16: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 155; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 156; CHECK-NEXT: ret 157 %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 158 ret <8 x i16> %load 159} 160 161define <8 x i16> @strided_vpload_v8i16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 162; CHECK-LABEL: strided_vpload_v8i16_unit_stride: 163; CHECK: # %bb.0: 164; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 165; CHECK-NEXT: vle16.v v8, (a0), v0.t 166; CHECK-NEXT: ret 167 %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 168 ret <8 x i16> %load 169} 170 171define <8 x i16> @strided_vpload_v8i16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 172; CHECK-LABEL: strided_vpload_v8i16_allones_mask: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 175; CHECK-NEXT: vlse16.v v8, (a0), a1 176; CHECK-NEXT: ret 177 %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) 178 ret <8 x i16> %load 179} 180 181declare <2 x i32> @llvm.experimental.vp.strided.load.v2i32.p0.i32(ptr, i32, <2 x i1>, i32) 182 183define <2 x i32> @strided_vpload_v2i32(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 184; CHECK-LABEL: strided_vpload_v2i32: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma 187; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 188; CHECK-NEXT: ret 189 %load = call <2 x i32> @llvm.experimental.vp.strided.load.v2i32.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 190 ret <2 x i32> %load 191} 192 193declare <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr, i32, <4 x i1>, i32) 194 195define <4 x i32> @strided_vpload_v4i32(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 196; CHECK-LABEL: strided_vpload_v4i32: 197; CHECK: # %bb.0: 198; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 199; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 200; CHECK-NEXT: ret 201 %load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 202 ret <4 x i32> %load 203} 204 205define <4 x i32> @strided_vpload_v4i32_unit_stride(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 206; CHECK-LABEL: strided_vpload_v4i32_unit_stride: 207; CHECK: # %bb.0: 208; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 209; CHECK-NEXT: vle32.v v8, (a0), v0.t 210; CHECK-NEXT: ret 211 %load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr %ptr, i32 4, <4 x i1> %m, i32 %evl) 212 ret <4 x i32> %load 213} 214 215declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr, i32, <8 x i1>, i32) 216 217define <8 x i32> @strided_vpload_v8i32(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 218; CHECK-LABEL: strided_vpload_v8i32: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 221; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 222; CHECK-NEXT: ret 223 %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 224 ret <8 x i32> %load 225} 226 227define <8 x i32> @strided_vpload_v8i32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 228; CHECK-LABEL: strided_vpload_v8i32_allones_mask: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 231; CHECK-NEXT: vlse32.v v8, (a0), a1 232; CHECK-NEXT: ret 233 %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) 234 ret <8 x i32> %load 235} 236 237declare <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr, i32, <2 x i1>, i32) 238 239define <2 x i64> @strided_vpload_v2i64(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 240; CHECK-LABEL: strided_vpload_v2i64: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma 243; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 244; CHECK-NEXT: ret 245 %load = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 246 ret <2 x i64> %load 247} 248 249define <2 x i64> @strided_vpload_v2i64_unit_stride(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 250; CHECK-LABEL: strided_vpload_v2i64_unit_stride: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 253; CHECK-NEXT: vle64.v v8, (a0), v0.t 254; CHECK-NEXT: ret 255 %load = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr %ptr, i32 8, <2 x i1> %m, i32 %evl) 256 ret <2 x i64> %load 257} 258 259declare <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr, i32, <4 x i1>, i32) 260 261define <4 x i64> @strided_vpload_v4i64(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 262; CHECK-LABEL: strided_vpload_v4i64: 263; CHECK: # %bb.0: 264; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 265; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 266; CHECK-NEXT: ret 267 %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 268 ret <4 x i64> %load 269} 270 271define <4 x i64> @strided_vpload_v4i64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 272; CHECK-LABEL: strided_vpload_v4i64_allones_mask: 273; CHECK: # %bb.0: 274; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 275; CHECK-NEXT: vlse64.v v8, (a0), a1 276; CHECK-NEXT: ret 277 %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) 278 ret <4 x i64> %load 279} 280 281declare <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i32(ptr, i32, <8 x i1>, i32) 282 283define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 284; CHECK-LABEL: strided_vpload_v8i64: 285; CHECK: # %bb.0: 286; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma 287; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 288; CHECK-NEXT: ret 289 %load = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 290 ret <8 x i64> %load 291} 292 293declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr, i32, <2 x i1>, i32) 294 295define <2 x bfloat> @strided_vpload_v2bf16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 296; CHECK-LABEL: strided_vpload_v2bf16: 297; CHECK: # %bb.0: 298; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 299; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 300; CHECK-NEXT: ret 301 %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 302 ret <2 x bfloat> %load 303} 304 305define <2 x bfloat> @strided_vpload_v2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 306; CHECK-LABEL: strided_vpload_v2bf16_allones_mask: 307; CHECK: # %bb.0: 308; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 309; CHECK-NEXT: vlse16.v v8, (a0), a1 310; CHECK-NEXT: ret 311 %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) 312 ret <2 x bfloat> %load 313} 314 315declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr, i32, <4 x i1>, i32) 316 317define <4 x bfloat> @strided_vpload_v4bf16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 318; CHECK-LABEL: strided_vpload_v4bf16: 319; CHECK: # %bb.0: 320; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 321; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 322; CHECK-NEXT: ret 323 %load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 324 ret <4 x bfloat> %load 325} 326 327declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr, i32, <8 x i1>, i32) 328 329define <8 x bfloat> @strided_vpload_v8bf16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 330; CHECK-LABEL: strided_vpload_v8bf16: 331; CHECK: # %bb.0: 332; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 333; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 334; CHECK-NEXT: ret 335 %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 336 ret <8 x bfloat> %load 337} 338 339define <8 x bfloat> @strided_vpload_v8bf16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 340; CHECK-LABEL: strided_vpload_v8bf16_unit_stride: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 343; CHECK-NEXT: vle16.v v8, (a0), v0.t 344; CHECK-NEXT: ret 345 %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 346 ret <8 x bfloat> %load 347} 348 349declare <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr, i32, <2 x i1>, i32) 350 351define <2 x half> @strided_vpload_v2f16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 352; CHECK-LABEL: strided_vpload_v2f16: 353; CHECK: # %bb.0: 354; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 355; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 356; CHECK-NEXT: ret 357 %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 358 ret <2 x half> %load 359} 360 361define <2 x half> @strided_vpload_v2f16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 362; CHECK-LABEL: strided_vpload_v2f16_allones_mask: 363; CHECK: # %bb.0: 364; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma 365; CHECK-NEXT: vlse16.v v8, (a0), a1 366; CHECK-NEXT: ret 367 %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) 368 ret <2 x half> %load 369} 370 371declare <4 x half> @llvm.experimental.vp.strided.load.v4f16.p0.i32(ptr, i32, <4 x i1>, i32) 372 373define <4 x half> @strided_vpload_v4f16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 374; CHECK-LABEL: strided_vpload_v4f16: 375; CHECK: # %bb.0: 376; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma 377; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 378; CHECK-NEXT: ret 379 %load = call <4 x half> @llvm.experimental.vp.strided.load.v4f16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 380 ret <4 x half> %load 381} 382 383declare <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr, i32, <8 x i1>, i32) 384 385define <8 x half> @strided_vpload_v8f16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 386; CHECK-LABEL: strided_vpload_v8f16: 387; CHECK: # %bb.0: 388; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma 389; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t 390; CHECK-NEXT: ret 391 %load = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 392 ret <8 x half> %load 393} 394 395define <8 x half> @strided_vpload_v8f16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 396; CHECK-LABEL: strided_vpload_v8f16_unit_stride: 397; CHECK: # %bb.0: 398; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 399; CHECK-NEXT: vle16.v v8, (a0), v0.t 400; CHECK-NEXT: ret 401 %load = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl) 402 ret <8 x half> %load 403} 404 405declare <2 x float> @llvm.experimental.vp.strided.load.v2f32.p0.i32(ptr, i32, <2 x i1>, i32) 406 407define <2 x float> @strided_vpload_v2f32(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 408; CHECK-LABEL: strided_vpload_v2f32: 409; CHECK: # %bb.0: 410; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma 411; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 412; CHECK-NEXT: ret 413 %load = call <2 x float> @llvm.experimental.vp.strided.load.v2f32.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 414 ret <2 x float> %load 415} 416 417declare <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr, i32, <4 x i1>, i32) 418 419define <4 x float> @strided_vpload_v4f32(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 420; CHECK-LABEL: strided_vpload_v4f32: 421; CHECK: # %bb.0: 422; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 423; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 424; CHECK-NEXT: ret 425 %load = call <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 426 ret <4 x float> %load 427} 428 429define <4 x float> @strided_vpload_v4f32_unit_stride(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 430; CHECK-LABEL: strided_vpload_v4f32_unit_stride: 431; CHECK: # %bb.0: 432; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 433; CHECK-NEXT: vle32.v v8, (a0), v0.t 434; CHECK-NEXT: ret 435 %load = call <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr %ptr, i32 4, <4 x i1> %m, i32 %evl) 436 ret <4 x float> %load 437} 438 439declare <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr, i32, <8 x i1>, i32) 440 441define <8 x float> @strided_vpload_v8f32(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 442; CHECK-LABEL: strided_vpload_v8f32: 443; CHECK: # %bb.0: 444; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 445; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 446; CHECK-NEXT: ret 447 %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 448 ret <8 x float> %load 449} 450 451define <8 x float> @strided_vpload_v8f32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 452; CHECK-LABEL: strided_vpload_v8f32_allones_mask: 453; CHECK: # %bb.0: 454; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma 455; CHECK-NEXT: vlse32.v v8, (a0), a1 456; CHECK-NEXT: ret 457 %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) 458 ret <8 x float> %load 459} 460 461declare <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr, i32, <2 x i1>, i32) 462 463define <2 x double> @strided_vpload_v2f64(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) { 464; CHECK-LABEL: strided_vpload_v2f64: 465; CHECK: # %bb.0: 466; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma 467; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 468; CHECK-NEXT: ret 469 %load = call <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl) 470 ret <2 x double> %load 471} 472 473define <2 x double> @strided_vpload_v2f64_unit_stride(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 474; CHECK-LABEL: strided_vpload_v2f64_unit_stride: 475; CHECK: # %bb.0: 476; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 477; CHECK-NEXT: vle64.v v8, (a0), v0.t 478; CHECK-NEXT: ret 479 %load = call <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr %ptr, i32 8, <2 x i1> %m, i32 %evl) 480 ret <2 x double> %load 481} 482 483 484declare <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr, i32, <4 x i1>, i32) 485 486define <4 x double> @strided_vpload_v4f64(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) { 487; CHECK-LABEL: strided_vpload_v4f64: 488; CHECK: # %bb.0: 489; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 490; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 491; CHECK-NEXT: ret 492 %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl) 493 ret <4 x double> %load 494} 495 496define <4 x double> @strided_vpload_v4f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 497; CHECK-LABEL: strided_vpload_v4f64_allones_mask: 498; CHECK: # %bb.0: 499; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 500; CHECK-NEXT: vlse64.v v8, (a0), a1 501; CHECK-NEXT: ret 502 %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) 503 ret <4 x double> %load 504} 505 506declare <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0.i32(ptr, i32, <8 x i1>, i32) 507 508define <8 x double> @strided_vpload_v8f64(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) { 509; CHECK-LABEL: strided_vpload_v8f64: 510; CHECK: # %bb.0: 511; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma 512; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 513; CHECK-NEXT: ret 514 %load = call <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl) 515 ret <8 x double> %load 516} 517 518; Widening 519define <3 x double> @strided_vpload_v3f64(ptr %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) { 520; CHECK-LABEL: strided_vpload_v3f64: 521; CHECK: # %bb.0: 522; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 523; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 524; CHECK-NEXT: ret 525 %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> %mask, i32 %evl) 526 ret <3 x double> %v 527} 528 529define <3 x double> @strided_vpload_v3f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { 530; CHECK-LABEL: strided_vpload_v3f64_allones_mask: 531; CHECK: # %bb.0: 532; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma 533; CHECK-NEXT: vlse64.v v8, (a0), a1 534; CHECK-NEXT: ret 535 %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl) 536 ret <3 x double> %v 537} 538 539declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr, i32, <3 x i1>, i32) 540 541; Splitting 542define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind { 543; CHECK-LABEL: strided_vpload_v32f64: 544; CHECK: # %bb.0: 545; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 546; CHECK-NEXT: vmv1r.v v9, v0 547; CHECK-NEXT: li a4, 16 548; CHECK-NEXT: mv a3, a2 549; CHECK-NEXT: bltu a2, a4, .LBB45_2 550; CHECK-NEXT: # %bb.1: 551; CHECK-NEXT: li a3, 16 552; CHECK-NEXT: .LBB45_2: 553; CHECK-NEXT: mul a4, a3, a1 554; CHECK-NEXT: addi a5, a2, -16 555; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 556; CHECK-NEXT: vslidedown.vi v8, v9, 2 557; CHECK-NEXT: add a4, a0, a4 558; CHECK-NEXT: sltu a2, a2, a5 559; CHECK-NEXT: addi a2, a2, -1 560; CHECK-NEXT: and a2, a2, a5 561; CHECK-NEXT: vmv1r.v v0, v8 562; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 563; CHECK-NEXT: vlse64.v v16, (a4), a1, v0.t 564; CHECK-NEXT: vmv1r.v v0, v9 565; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 566; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t 567; CHECK-NEXT: ret 568 %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl) 569 ret <32 x double> %load 570} 571 572define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) nounwind { 573; CHECK-LABEL: strided_vpload_v32f64_allones_mask: 574; CHECK: # %bb.0: 575; CHECK-NEXT: li a4, 16 576; CHECK-NEXT: mv a3, a2 577; CHECK-NEXT: bltu a2, a4, .LBB46_2 578; CHECK-NEXT: # %bb.1: 579; CHECK-NEXT: li a3, 16 580; CHECK-NEXT: .LBB46_2: 581; CHECK-NEXT: mul a4, a3, a1 582; CHECK-NEXT: addi a5, a2, -16 583; CHECK-NEXT: add a4, a0, a4 584; CHECK-NEXT: sltu a2, a2, a5 585; CHECK-NEXT: addi a2, a2, -1 586; CHECK-NEXT: and a2, a2, a5 587; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 588; CHECK-NEXT: vlse64.v v16, (a4), a1 589; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 590; CHECK-NEXT: vlse64.v v8, (a0), a1 591; CHECK-NEXT: ret 592 %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl) 593 ret <32 x double> %load 594} 595 596declare <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr, i32, <32 x i1>, i32) 597 598; Widening + splitting (with HiIsEmpty == true) 599define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) { 600; CHECK-RV32-LABEL: strided_load_v33f64: 601; CHECK-RV32: # %bb.0: 602; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 603; CHECK-RV32-NEXT: vmv1r.v v8, v0 604; CHECK-RV32-NEXT: li a5, 32 605; CHECK-RV32-NEXT: mv a3, a4 606; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2 607; CHECK-RV32-NEXT: # %bb.1: 608; CHECK-RV32-NEXT: li a3, 32 609; CHECK-RV32-NEXT: .LBB47_2: 610; CHECK-RV32-NEXT: mul a6, a3, a2 611; CHECK-RV32-NEXT: addi a5, a4, -32 612; CHECK-RV32-NEXT: sltu a7, a4, a5 613; CHECK-RV32-NEXT: addi a7, a7, -1 614; CHECK-RV32-NEXT: and a7, a7, a5 615; CHECK-RV32-NEXT: li a5, 16 616; CHECK-RV32-NEXT: add a6, a1, a6 617; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4 618; CHECK-RV32-NEXT: # %bb.3: 619; CHECK-RV32-NEXT: li a7, 16 620; CHECK-RV32-NEXT: .LBB47_4: 621; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 622; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 623; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma 624; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t 625; CHECK-RV32-NEXT: addi a6, a3, -16 626; CHECK-RV32-NEXT: sltu a3, a3, a6 627; CHECK-RV32-NEXT: addi a3, a3, -1 628; CHECK-RV32-NEXT: and a3, a3, a6 629; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_6 630; CHECK-RV32-NEXT: # %bb.5: 631; CHECK-RV32-NEXT: li a4, 16 632; CHECK-RV32-NEXT: .LBB47_6: 633; CHECK-RV32-NEXT: mul a5, a4, a2 634; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 635; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 636; CHECK-RV32-NEXT: add a5, a1, a5 637; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 638; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t 639; CHECK-RV32-NEXT: vmv1r.v v0, v8 640; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma 641; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t 642; CHECK-RV32-NEXT: addi a1, a0, 128 643; CHECK-RV32-NEXT: addi a2, a0, 256 644; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 645; CHECK-RV32-NEXT: vse64.v v8, (a0) 646; CHECK-RV32-NEXT: vse64.v v24, (a1) 647; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 648; CHECK-RV32-NEXT: vse64.v v16, (a2) 649; CHECK-RV32-NEXT: ret 650; 651; CHECK-RV64-LABEL: strided_load_v33f64: 652; CHECK-RV64: # %bb.0: 653; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma 654; CHECK-RV64-NEXT: vmv1r.v v8, v0 655; CHECK-RV64-NEXT: li a5, 32 656; CHECK-RV64-NEXT: mv a4, a3 657; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2 658; CHECK-RV64-NEXT: # %bb.1: 659; CHECK-RV64-NEXT: li a4, 32 660; CHECK-RV64-NEXT: .LBB47_2: 661; CHECK-RV64-NEXT: mul a6, a4, a2 662; CHECK-RV64-NEXT: addi a5, a3, -32 663; CHECK-RV64-NEXT: sltu a7, a3, a5 664; CHECK-RV64-NEXT: addi a7, a7, -1 665; CHECK-RV64-NEXT: and a7, a7, a5 666; CHECK-RV64-NEXT: li a5, 16 667; CHECK-RV64-NEXT: add a6, a1, a6 668; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4 669; CHECK-RV64-NEXT: # %bb.3: 670; CHECK-RV64-NEXT: li a7, 16 671; CHECK-RV64-NEXT: .LBB47_4: 672; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 673; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 674; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma 675; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t 676; CHECK-RV64-NEXT: addi a6, a4, -16 677; CHECK-RV64-NEXT: sltu a4, a4, a6 678; CHECK-RV64-NEXT: addi a4, a4, -1 679; CHECK-RV64-NEXT: and a4, a4, a6 680; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_6 681; CHECK-RV64-NEXT: # %bb.5: 682; CHECK-RV64-NEXT: li a3, 16 683; CHECK-RV64-NEXT: .LBB47_6: 684; CHECK-RV64-NEXT: mul a5, a3, a2 685; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 686; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 687; CHECK-RV64-NEXT: add a5, a1, a5 688; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma 689; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t 690; CHECK-RV64-NEXT: vmv1r.v v0, v8 691; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 692; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t 693; CHECK-RV64-NEXT: addi a1, a0, 128 694; CHECK-RV64-NEXT: addi a2, a0, 256 695; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 696; CHECK-RV64-NEXT: vse64.v v8, (a0) 697; CHECK-RV64-NEXT: vse64.v v24, (a1) 698; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 699; CHECK-RV64-NEXT: vse64.v v16, (a2) 700; CHECK-RV64-NEXT: ret 701 %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) 702 ret <33 x double> %v 703} 704 705declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32) 706 707; Test unmasked integer zero strided 708define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { 709; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: 710; CHECK-OPT: # %bb.0: 711; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma 712; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero 713; CHECK-OPT-NEXT: ret 714; 715; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: 716; CHECK-NO-OPT: # %bb.0: 717; CHECK-NO-OPT-NEXT: lbu a0, 0(a0) 718; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma 719; CHECK-NO-OPT-NEXT: vmv.v.x v8, a0 720; CHECK-NO-OPT-NEXT: ret 721 %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) 722 ret <4 x i8> %load 723} 724 725; Test unmasked float zero strided 726define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { 727; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: 728; CHECK-OPT: # %bb.0: 729; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 730; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero 731; CHECK-OPT-NEXT: ret 732; 733; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16: 734; CHECK-NO-OPT-ZVFH: # %bb.0: 735; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0) 736; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 737; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5 738; CHECK-NO-OPT-ZVFH-NEXT: ret 739; 740; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16: 741; CHECK-NO-OPT-ZVFHMIN: # %bb.0: 742; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0) 743; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 744; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0 745; CHECK-NO-OPT-ZVFHMIN-NEXT: ret 746 %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) 747 ret <4 x half> %load 748} 749 750define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) { 751; CHECK-RV32-LABEL: zero_strided_vadd.vx: 752; CHECK-RV32: # %bb.0: 753; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 754; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero 755; CHECK-RV32-NEXT: vadd.vv v8, v8, v10 756; CHECK-RV32-NEXT: ret 757; 758; CHECK-RV64-LABEL: zero_strided_vadd.vx: 759; CHECK-RV64: # %bb.0: 760; CHECK-RV64-NEXT: ld a0, 0(a0) 761; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 762; CHECK-RV64-NEXT: vadd.vx v8, v8, a0 763; CHECK-RV64-NEXT: ret 764 %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4) 765 %w = add <4 x i64> %v, %load 766 ret <4 x i64> %w 767} 768