1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s 10 11declare <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i8> @vpload_nxv1i8(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vpload_nxv1i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 17; CHECK-NEXT: vle8.v v8, (a0), v0.t 18; CHECK-NEXT: ret 19 %load = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 20 ret <vscale x 1 x i8> %load 21} 22 23define <vscale x 1 x i8> @vpload_nxv1i8_allones_mask(ptr %ptr, i32 zeroext %evl) { 24; CHECK-LABEL: vpload_nxv1i8_allones_mask: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 27; CHECK-NEXT: vle8.v v8, (a0) 28; CHECK-NEXT: ret 29 %load = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) 30 ret <vscale x 1 x i8> %load 31} 32 33define <vscale x 1 x i8> @vpload_nxv1i8_passthru(ptr %ptr, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru, i32 zeroext %evl) { 34; CHECK-LABEL: vpload_nxv1i8_passthru: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu 37; CHECK-NEXT: vle8.v v8, (a0), v0.t 38; CHECK-NEXT: ret 39 %load = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 40 %merge = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %load, <vscale x 1 x i8> %passthru, i32 %evl) 41 ret <vscale x 1 x i8> %merge 42} 43 44declare <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr, <vscale x 2 x i1>, i32) 45 46define <vscale x 2 x i8> @vpload_nxv2i8(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 47; CHECK-LABEL: vpload_nxv2i8: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 50; CHECK-NEXT: vle8.v v8, (a0), v0.t 51; CHECK-NEXT: ret 52 %load = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 53 ret <vscale x 2 x i8> %load 54} 55 56declare <vscale x 3 x i8> @llvm.vp.load.nxv3i8.p0(ptr, <vscale x 3 x i1>, i32) 57 58define <vscale x 3 x i8> @vpload_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zeroext %evl) { 59; CHECK-LABEL: vpload_nxv3i8: 60; CHECK: # %bb.0: 61; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 62; CHECK-NEXT: vle8.v v8, (a0), v0.t 63; CHECK-NEXT: ret 64 %load = call <vscale x 3 x i8> @llvm.vp.load.nxv3i8.p0(ptr %ptr, <vscale x 3 x i1> %m, i32 %evl) 65 ret <vscale x 3 x i8> %load 66} 67 68declare <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>*, <vscale x 4 x i1>, i32) 69 70define <vscale x 4 x i6> @vpload_nxv4i6(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 71; CHECK-LABEL: vpload_nxv4i6: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 74; CHECK-NEXT: vle8.v v8, (a0), v0.t 75; CHECK-NEXT: ret 76 %load = call <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 %evl) 77 ret <vscale x 4 x i6> %load 78} 79 80declare <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32) 81 82define <vscale x 4 x i8> @vpload_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 83; CHECK-LABEL: vpload_nxv4i8: 84; CHECK: # %bb.0: 85; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 86; CHECK-NEXT: vle8.v v8, (a0), v0.t 87; CHECK-NEXT: ret 88 %load = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 89 ret <vscale x 4 x i8> %load 90} 91 92declare <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr, <vscale x 8 x i1>, i32) 93 94define <vscale x 8 x i8> @vpload_nxv8i8(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 95; CHECK-LABEL: vpload_nxv8i8: 96; CHECK: # %bb.0: 97; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma 98; CHECK-NEXT: vle8.v v8, (a0), v0.t 99; CHECK-NEXT: ret 100 %load = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 101 ret <vscale x 8 x i8> %load 102} 103 104define <vscale x 8 x i8> @vpload_nxv8i8_allones_mask(ptr %ptr, i32 zeroext %evl) { 105; CHECK-LABEL: vpload_nxv8i8_allones_mask: 106; CHECK: # %bb.0: 107; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma 108; CHECK-NEXT: vle8.v v8, (a0) 109; CHECK-NEXT: ret 110 %load = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) 111 ret <vscale x 8 x i8> %load 112} 113 114declare <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr, <vscale x 1 x i1>, i32) 115 116define <vscale x 1 x i16> @vpload_nxv1i16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 117; CHECK-LABEL: vpload_nxv1i16: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 120; CHECK-NEXT: vle16.v v8, (a0), v0.t 121; CHECK-NEXT: ret 122 %load = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 123 ret <vscale x 1 x i16> %load 124} 125 126declare <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr, <vscale x 2 x i1>, i32) 127 128define <vscale x 2 x i16> @vpload_nxv2i16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 129; CHECK-LABEL: vpload_nxv2i16: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 132; CHECK-NEXT: vle16.v v8, (a0), v0.t 133; CHECK-NEXT: ret 134 %load = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 135 ret <vscale x 2 x i16> %load 136} 137 138define <vscale x 2 x i16> @vpload_nxv2i16_allones_mask(ptr %ptr, i32 zeroext %evl) { 139; CHECK-LABEL: vpload_nxv2i16_allones_mask: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 142; CHECK-NEXT: vle16.v v8, (a0) 143; CHECK-NEXT: ret 144 %load = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) 145 ret <vscale x 2 x i16> %load 146} 147 148declare <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr, <vscale x 4 x i1>, i32) 149 150define <vscale x 4 x i16> @vpload_nxv4i16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 151; CHECK-LABEL: vpload_nxv4i16: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 154; CHECK-NEXT: vle16.v v8, (a0), v0.t 155; CHECK-NEXT: ret 156 %load = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 157 ret <vscale x 4 x i16> %load 158} 159 160declare <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr, <vscale x 8 x i1>, i32) 161 162define <vscale x 8 x i16> @vpload_nxv8i16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 163; CHECK-LABEL: vpload_nxv8i16: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma 166; CHECK-NEXT: vle16.v v8, (a0), v0.t 167; CHECK-NEXT: ret 168 %load = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 169 ret <vscale x 8 x i16> %load 170} 171 172declare <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr, <vscale x 1 x i1>, i32) 173 174define <vscale x 1 x i32> @vpload_nxv1i32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 175; CHECK-LABEL: vpload_nxv1i32: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 178; CHECK-NEXT: vle32.v v8, (a0), v0.t 179; CHECK-NEXT: ret 180 %load = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 181 ret <vscale x 1 x i32> %load 182} 183 184declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32) 185 186define <vscale x 2 x i32> @vpload_nxv2i32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 187; CHECK-LABEL: vpload_nxv2i32: 188; CHECK: # %bb.0: 189; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 190; CHECK-NEXT: vle32.v v8, (a0), v0.t 191; CHECK-NEXT: ret 192 %load = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 193 ret <vscale x 2 x i32> %load 194} 195 196declare <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr, <vscale x 4 x i1>, i32) 197 198define <vscale x 4 x i32> @vpload_nxv4i32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 199; CHECK-LABEL: vpload_nxv4i32: 200; CHECK: # %bb.0: 201; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 202; CHECK-NEXT: vle32.v v8, (a0), v0.t 203; CHECK-NEXT: ret 204 %load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 205 ret <vscale x 4 x i32> %load 206} 207 208define <vscale x 4 x i32> @vpload_nxv4i32_allones_mask(ptr %ptr, i32 zeroext %evl) { 209; CHECK-LABEL: vpload_nxv4i32_allones_mask: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 212; CHECK-NEXT: vle32.v v8, (a0) 213; CHECK-NEXT: ret 214 %load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) 215 ret <vscale x 4 x i32> %load 216} 217 218declare <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr, <vscale x 8 x i1>, i32) 219 220define <vscale x 8 x i32> @vpload_nxv8i32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 221; CHECK-LABEL: vpload_nxv8i32: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma 224; CHECK-NEXT: vle32.v v8, (a0), v0.t 225; CHECK-NEXT: ret 226 %load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 227 ret <vscale x 8 x i32> %load 228} 229 230declare <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr, <vscale x 1 x i1>, i32) 231 232define <vscale x 1 x i64> @vpload_nxv1i64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 233; CHECK-LABEL: vpload_nxv1i64: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 236; CHECK-NEXT: vle64.v v8, (a0), v0.t 237; CHECK-NEXT: ret 238 %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 239 ret <vscale x 1 x i64> %load 240} 241 242define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) { 243; CHECK-LABEL: vpload_nxv1i64_allones_mask: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 246; CHECK-NEXT: vle64.v v8, (a0) 247; CHECK-NEXT: ret 248 %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) 249 ret <vscale x 1 x i64> %load 250} 251 252declare <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr, <vscale x 2 x i1>, i32) 253 254define <vscale x 2 x i64> @vpload_nxv2i64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 255; CHECK-LABEL: vpload_nxv2i64: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 258; CHECK-NEXT: vle64.v v8, (a0), v0.t 259; CHECK-NEXT: ret 260 %load = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 261 ret <vscale x 2 x i64> %load 262} 263 264declare <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr, <vscale x 4 x i1>, i32) 265 266define <vscale x 4 x i64> @vpload_nxv4i64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 267; CHECK-LABEL: vpload_nxv4i64: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 270; CHECK-NEXT: vle64.v v8, (a0), v0.t 271; CHECK-NEXT: ret 272 %load = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 273 ret <vscale x 4 x i64> %load 274} 275 276declare <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr, <vscale x 8 x i1>, i32) 277 278define <vscale x 8 x i64> @vpload_nxv8i64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 279; CHECK-LABEL: vpload_nxv8i64: 280; CHECK: # %bb.0: 281; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 282; CHECK-NEXT: vle64.v v8, (a0), v0.t 283; CHECK-NEXT: ret 284 %load = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 285 ret <vscale x 8 x i64> %load 286} 287 288declare <vscale x 1 x bfloat> @llvm.vp.load.nxv1bf16.p0(ptr, <vscale x 1 x i1>, i32) 289 290define <vscale x 1 x bfloat> @vpload_nxv1bf16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 291; CHECK-LABEL: vpload_nxv1bf16: 292; CHECK: # %bb.0: 293; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 294; CHECK-NEXT: vle16.v v8, (a0), v0.t 295; CHECK-NEXT: ret 296 %load = call <vscale x 1 x bfloat> @llvm.vp.load.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 297 ret <vscale x 1 x bfloat> %load 298} 299 300declare <vscale x 2 x bfloat> @llvm.vp.load.nxv2bf16.p0(ptr, <vscale x 2 x i1>, i32) 301 302define <vscale x 2 x bfloat> @vpload_nxv2bf16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 303; CHECK-LABEL: vpload_nxv2bf16: 304; CHECK: # %bb.0: 305; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 306; CHECK-NEXT: vle16.v v8, (a0), v0.t 307; CHECK-NEXT: ret 308 %load = call <vscale x 2 x bfloat> @llvm.vp.load.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 309 ret <vscale x 2 x bfloat> %load 310} 311 312define <vscale x 2 x bfloat> @vpload_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { 313; CHECK-LABEL: vpload_nxv2bf16_allones_mask: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 316; CHECK-NEXT: vle16.v v8, (a0) 317; CHECK-NEXT: ret 318 %load = call <vscale x 2 x bfloat> @llvm.vp.load.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) 319 ret <vscale x 2 x bfloat> %load 320} 321 322declare <vscale x 4 x bfloat> @llvm.vp.load.nxv4bf16.p0(ptr, <vscale x 4 x i1>, i32) 323 324define <vscale x 4 x bfloat> @vpload_nxv4bf16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 325; CHECK-LABEL: vpload_nxv4bf16: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 328; CHECK-NEXT: vle16.v v8, (a0), v0.t 329; CHECK-NEXT: ret 330 %load = call <vscale x 4 x bfloat> @llvm.vp.load.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 331 ret <vscale x 4 x bfloat> %load 332} 333 334declare <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr, <vscale x 8 x i1>, i32) 335 336define <vscale x 8 x bfloat> @vpload_nxv8bf16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 337; CHECK-LABEL: vpload_nxv8bf16: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma 340; CHECK-NEXT: vle16.v v8, (a0), v0.t 341; CHECK-NEXT: ret 342 %load = call <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 343 ret <vscale x 8 x bfloat> %load 344} 345 346declare <vscale x 1 x half> @llvm.vp.load.nxv1f16.p0(ptr, <vscale x 1 x i1>, i32) 347 348define <vscale x 1 x half> @vpload_nxv1f16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 349; CHECK-LABEL: vpload_nxv1f16: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 352; CHECK-NEXT: vle16.v v8, (a0), v0.t 353; CHECK-NEXT: ret 354 %load = call <vscale x 1 x half> @llvm.vp.load.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 355 ret <vscale x 1 x half> %load 356} 357 358declare <vscale x 2 x half> @llvm.vp.load.nxv2f16.p0(ptr, <vscale x 2 x i1>, i32) 359 360define <vscale x 2 x half> @vpload_nxv2f16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 361; CHECK-LABEL: vpload_nxv2f16: 362; CHECK: # %bb.0: 363; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 364; CHECK-NEXT: vle16.v v8, (a0), v0.t 365; CHECK-NEXT: ret 366 %load = call <vscale x 2 x half> @llvm.vp.load.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 367 ret <vscale x 2 x half> %load 368} 369 370define <vscale x 2 x half> @vpload_nxv2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { 371; CHECK-LABEL: vpload_nxv2f16_allones_mask: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 374; CHECK-NEXT: vle16.v v8, (a0) 375; CHECK-NEXT: ret 376 %load = call <vscale x 2 x half> @llvm.vp.load.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) 377 ret <vscale x 2 x half> %load 378} 379 380declare <vscale x 4 x half> @llvm.vp.load.nxv4f16.p0(ptr, <vscale x 4 x i1>, i32) 381 382define <vscale x 4 x half> @vpload_nxv4f16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 383; CHECK-LABEL: vpload_nxv4f16: 384; CHECK: # %bb.0: 385; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 386; CHECK-NEXT: vle16.v v8, (a0), v0.t 387; CHECK-NEXT: ret 388 %load = call <vscale x 4 x half> @llvm.vp.load.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 389 ret <vscale x 4 x half> %load 390} 391 392declare <vscale x 8 x half> @llvm.vp.load.nxv8f16.p0(ptr, <vscale x 8 x i1>, i32) 393 394define <vscale x 8 x half> @vpload_nxv8f16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 395; CHECK-LABEL: vpload_nxv8f16: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma 398; CHECK-NEXT: vle16.v v8, (a0), v0.t 399; CHECK-NEXT: ret 400 %load = call <vscale x 8 x half> @llvm.vp.load.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 401 ret <vscale x 8 x half> %load 402} 403 404declare <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr, <vscale x 1 x i1>, i32) 405 406define <vscale x 1 x float> @vpload_nxv1f32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 407; CHECK-LABEL: vpload_nxv1f32: 408; CHECK: # %bb.0: 409; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 410; CHECK-NEXT: vle32.v v8, (a0), v0.t 411; CHECK-NEXT: ret 412 %load = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 413 ret <vscale x 1 x float> %load 414} 415 416declare <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr, <vscale x 2 x i1>, i32) 417 418define <vscale x 2 x float> @vpload_nxv2f32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 419; CHECK-LABEL: vpload_nxv2f32: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 422; CHECK-NEXT: vle32.v v8, (a0), v0.t 423; CHECK-NEXT: ret 424 %load = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 425 ret <vscale x 2 x float> %load 426} 427 428declare <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32) 429 430define <vscale x 4 x float> @vpload_nxv4f32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 431; CHECK-LABEL: vpload_nxv4f32: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 434; CHECK-NEXT: vle32.v v8, (a0), v0.t 435; CHECK-NEXT: ret 436 %load = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 437 ret <vscale x 4 x float> %load 438} 439 440declare <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr, <vscale x 8 x i1>, i32) 441 442define <vscale x 8 x float> @vpload_nxv8f32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 443; CHECK-LABEL: vpload_nxv8f32: 444; CHECK: # %bb.0: 445; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma 446; CHECK-NEXT: vle32.v v8, (a0), v0.t 447; CHECK-NEXT: ret 448 %load = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 449 ret <vscale x 8 x float> %load 450} 451 452define <vscale x 8 x float> @vpload_nxv8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { 453; CHECK-LABEL: vpload_nxv8f32_allones_mask: 454; CHECK: # %bb.0: 455; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma 456; CHECK-NEXT: vle32.v v8, (a0) 457; CHECK-NEXT: ret 458 %load = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) 459 ret <vscale x 8 x float> %load 460} 461 462declare <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr, <vscale x 1 x i1>, i32) 463 464define <vscale x 1 x double> @vpload_nxv1f64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { 465; CHECK-LABEL: vpload_nxv1f64: 466; CHECK: # %bb.0: 467; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 468; CHECK-NEXT: vle64.v v8, (a0), v0.t 469; CHECK-NEXT: ret 470 %load = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) 471 ret <vscale x 1 x double> %load 472} 473 474declare <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr, <vscale x 2 x i1>, i32) 475 476define <vscale x 2 x double> @vpload_nxv2f64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { 477; CHECK-LABEL: vpload_nxv2f64: 478; CHECK: # %bb.0: 479; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 480; CHECK-NEXT: vle64.v v8, (a0), v0.t 481; CHECK-NEXT: ret 482 %load = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) 483 ret <vscale x 2 x double> %load 484} 485 486declare <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr, <vscale x 4 x i1>, i32) 487 488define <vscale x 4 x double> @vpload_nxv4f64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { 489; CHECK-LABEL: vpload_nxv4f64: 490; CHECK: # %bb.0: 491; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 492; CHECK-NEXT: vle64.v v8, (a0), v0.t 493; CHECK-NEXT: ret 494 %load = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) 495 ret <vscale x 4 x double> %load 496} 497 498define <vscale x 4 x double> @vpload_nxv4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { 499; CHECK-LABEL: vpload_nxv4f64_allones_mask: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 502; CHECK-NEXT: vle64.v v8, (a0) 503; CHECK-NEXT: ret 504 %load = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) 505 ret <vscale x 4 x double> %load 506} 507 508declare <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr, <vscale x 8 x i1>, i32) 509 510define <vscale x 8 x double> @vpload_nxv8f64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { 511; CHECK-LABEL: vpload_nxv8f64: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 514; CHECK-NEXT: vle64.v v8, (a0), v0.t 515; CHECK-NEXT: ret 516 %load = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) 517 ret <vscale x 8 x double> %load 518} 519 520declare <vscale x 16 x double> @llvm.vp.load.nxv16f64.p0(ptr, <vscale x 16 x i1>, i32) 521 522define <vscale x 16 x double> @vpload_nxv16f64(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { 523; CHECK-LABEL: vpload_nxv16f64: 524; CHECK: # %bb.0: 525; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 526; CHECK-NEXT: vmv1r.v v8, v0 527; CHECK-NEXT: csrr a2, vlenb 528; CHECK-NEXT: sub a3, a1, a2 529; CHECK-NEXT: slli a4, a2, 3 530; CHECK-NEXT: srli a5, a2, 3 531; CHECK-NEXT: vslidedown.vx v0, v0, a5 532; CHECK-NEXT: sltu a5, a1, a3 533; CHECK-NEXT: addi a5, a5, -1 534; CHECK-NEXT: and a3, a5, a3 535; CHECK-NEXT: add a4, a0, a4 536; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 537; CHECK-NEXT: vle64.v v16, (a4), v0.t 538; CHECK-NEXT: bltu a1, a2, .LBB44_2 539; CHECK-NEXT: # %bb.1: 540; CHECK-NEXT: mv a1, a2 541; CHECK-NEXT: .LBB44_2: 542; CHECK-NEXT: vmv1r.v v0, v8 543; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 544; CHECK-NEXT: vle64.v v8, (a0), v0.t 545; CHECK-NEXT: ret 546 %load = call <vscale x 16 x double> @llvm.vp.load.nxv16f64.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) 547 ret <vscale x 16 x double> %load 548} 549 550declare <vscale x 17 x double> @llvm.vp.load.nxv17f64.p0(ptr, <vscale x 17 x i1>, i32) 551 552declare <vscale x 1 x double> @llvm.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx) 553declare <vscale x 16 x double> @llvm.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx) 554 555; Note: We can't return <vscale x 17 x double> as that introduces a vector 556; store can't yet be legalized through widening. In order to test purely the 557; vp.load legalization, manually split it. 558 559; Widen to nxv32f64 then split into 4 x nxv8f64, of which 1 is empty. 560 561define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17 x i1> %m, i32 zeroext %evl) { 562; CHECK-LABEL: vpload_nxv17f64: 563; CHECK: # %bb.0: 564; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 565; CHECK-NEXT: vmv1r.v v8, v0 566; CHECK-NEXT: csrr a3, vlenb 567; CHECK-NEXT: slli a5, a3, 1 568; CHECK-NEXT: mv a4, a2 569; CHECK-NEXT: bltu a2, a5, .LBB45_2 570; CHECK-NEXT: # %bb.1: 571; CHECK-NEXT: mv a4, a5 572; CHECK-NEXT: .LBB45_2: 573; CHECK-NEXT: sub a6, a4, a3 574; CHECK-NEXT: slli a7, a3, 3 575; CHECK-NEXT: srli t0, a3, 3 576; CHECK-NEXT: sub a5, a2, a5 577; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma 578; CHECK-NEXT: vslidedown.vx v0, v8, t0 579; CHECK-NEXT: sltu t0, a4, a6 580; CHECK-NEXT: add a7, a0, a7 581; CHECK-NEXT: addi t0, t0, -1 582; CHECK-NEXT: and a6, t0, a6 583; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma 584; CHECK-NEXT: vle64.v v16, (a7), v0.t 585; CHECK-NEXT: sltu a2, a2, a5 586; CHECK-NEXT: addi a2, a2, -1 587; CHECK-NEXT: and a2, a2, a5 588; CHECK-NEXT: bltu a2, a3, .LBB45_4 589; CHECK-NEXT: # %bb.3: 590; CHECK-NEXT: mv a2, a3 591; CHECK-NEXT: .LBB45_4: 592; CHECK-NEXT: slli a5, a3, 4 593; CHECK-NEXT: srli a6, a3, 2 594; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma 595; CHECK-NEXT: vslidedown.vx v0, v8, a6 596; CHECK-NEXT: add a5, a0, a5 597; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 598; CHECK-NEXT: vle64.v v24, (a5), v0.t 599; CHECK-NEXT: bltu a4, a3, .LBB45_6 600; CHECK-NEXT: # %bb.5: 601; CHECK-NEXT: mv a4, a3 602; CHECK-NEXT: .LBB45_6: 603; CHECK-NEXT: vmv1r.v v0, v8 604; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma 605; CHECK-NEXT: vle64.v v8, (a0), v0.t 606; CHECK-NEXT: vs1r.v v24, (a1) 607; CHECK-NEXT: ret 608 %load = call <vscale x 17 x double> @llvm.vp.load.nxv17f64.p0(ptr %ptr, <vscale x 17 x i1> %m, i32 %evl) 609 %lo = call <vscale x 16 x double> @llvm.vector.extract.nxv16f64(<vscale x 17 x double> %load, i64 0) 610 %hi = call <vscale x 1 x double> @llvm.vector.extract.nxv1f64(<vscale x 17 x double> %load, i64 16) 611 store <vscale x 1 x double> %hi, ptr %out 612 ret <vscale x 16 x double> %lo 613} 614 615define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) { 616; CHECK-LABEL: vpload_all_active_nxv8i8: 617; CHECK: # %bb.0: 618; CHECK-NEXT: vl1r.v v8, (a0) 619; CHECK-NEXT: ret 620 %vscale = call i32 @llvm.vscale() 621 %evl = mul i32 %vscale, 8 622 %load = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) 623 ret <vscale x 8 x i8> %load 624} 625