1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7declare <2 x i8> @llvm.vp.load.v2i8.p0(ptr, <2 x i1>, i32) 8 9define <2 x i8> @vpload_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vpload_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma 13; CHECK-NEXT: vle8.v v8, (a0), v0.t 14; CHECK-NEXT: ret 15 %load = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 16 ret <2 x i8> %load 17} 18 19declare <3 x i8> @llvm.vp.load.v3i8.p0(ptr, <3 x i1>, i32) 20 21define <3 x i8> @vpload_v3i8(ptr %ptr, <3 x i1> %m, i32 zeroext %evl) { 22; CHECK-LABEL: vpload_v3i8: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 25; CHECK-NEXT: vle8.v v8, (a0), v0.t 26; CHECK-NEXT: ret 27 %load = call <3 x i8> @llvm.vp.load.v3i8.p0(ptr %ptr, <3 x i1> %m, i32 %evl) 28 ret <3 x i8> %load 29} 30 31declare <4 x i8> @llvm.vp.load.v4i8.p0(ptr, <4 x i1>, i32) 32 33define <4 x i8> @vpload_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 34; CHECK-LABEL: vpload_v4i8: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 37; CHECK-NEXT: vle8.v v8, (a0), v0.t 38; CHECK-NEXT: ret 39 %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 40 ret <4 x i8> %load 41} 42 43define <4 x i8> @vpload_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { 44; CHECK-LABEL: vpload_v4i8_allones_mask: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma 47; CHECK-NEXT: vle8.v v8, (a0) 48; CHECK-NEXT: ret 49 %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) 50 ret <4 x i8> %load 51} 52 53declare <8 x i8> @llvm.vp.load.v8i8.p0(ptr, <8 x i1>, i32) 54 55define <8 x i8> @vpload_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 56; CHECK-LABEL: vpload_v8i8: 57; CHECK: # %bb.0: 58; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 59; CHECK-NEXT: vle8.v v8, (a0), v0.t 60; CHECK-NEXT: ret 61 %load = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 62 ret <8 x i8> %load 63} 64 65declare <2 x i16> @llvm.vp.load.v2i16.p0(ptr, <2 x i1>, i32) 66 67define <2 x i16> @vpload_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 68; CHECK-LABEL: vpload_v2i16: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 71; CHECK-NEXT: vle16.v v8, (a0), v0.t 72; CHECK-NEXT: ret 73 %load = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 74 ret <2 x i16> %load 75} 76 77declare <4 x i16> @llvm.vp.load.v4i16.p0(ptr, <4 x i1>, i32) 78 79define <4 x i16> @vpload_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 80; CHECK-LABEL: vpload_v4i16: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 83; CHECK-NEXT: vle16.v v8, (a0), v0.t 84; CHECK-NEXT: ret 85 %load = call <4 x i16> @llvm.vp.load.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 86 ret <4 x i16> %load 87} 88 89declare <8 x i16> @llvm.vp.load.v8i16.p0(ptr, <8 x i1>, i32) 90 91define <8 x i16> @vpload_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 92; CHECK-LABEL: vpload_v8i16: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 95; CHECK-NEXT: vle16.v v8, (a0), v0.t 96; CHECK-NEXT: ret 97 %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 98 ret <8 x i16> %load 99} 100 101define <8 x i16> @vpload_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { 102; CHECK-LABEL: vpload_v8i16_allones_mask: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 105; CHECK-NEXT: vle16.v v8, (a0) 106; CHECK-NEXT: ret 107 %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) 108 ret <8 x i16> %load 109} 110 111declare <2 x i32> @llvm.vp.load.v2i32.p0(ptr, <2 x i1>, i32) 112 113define <2 x i32> @vpload_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 114; CHECK-LABEL: vpload_v2i32: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 117; CHECK-NEXT: vle32.v v8, (a0), v0.t 118; CHECK-NEXT: ret 119 %load = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 120 ret <2 x i32> %load 121} 122 123declare <4 x i32> @llvm.vp.load.v4i32.p0(ptr, <4 x i1>, i32) 124 125define <4 x i32> @vpload_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vpload_v4i32: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 129; CHECK-NEXT: vle32.v v8, (a0), v0.t 130; CHECK-NEXT: ret 131 %load = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 132 ret <4 x i32> %load 133} 134 135declare <6 x i32> @llvm.vp.load.v6i32.p0(ptr, <6 x i1>, i32) 136 137define <6 x i32> @vpload_v6i32(ptr %ptr, <6 x i1> %m, i32 zeroext %evl) { 138; CHECK-LABEL: vpload_v6i32: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 141; CHECK-NEXT: vle32.v v8, (a0), v0.t 142; CHECK-NEXT: ret 143 %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> %m, i32 %evl) 144 ret <6 x i32> %load 145} 146 147define <6 x i32> @vpload_v6i32_allones_mask(ptr %ptr, i32 zeroext %evl) { 148; CHECK-LABEL: vpload_v6i32_allones_mask: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 151; CHECK-NEXT: vle32.v v8, (a0) 152; CHECK-NEXT: ret 153 %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> splat (i1 true), i32 %evl) 154 ret <6 x i32> %load 155} 156 157declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) 158 159define <8 x i32> @vpload_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 160; CHECK-LABEL: vpload_v8i32: 161; CHECK: # %bb.0: 162; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 163; CHECK-NEXT: vle32.v v8, (a0), v0.t 164; CHECK-NEXT: ret 165 %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 166 ret <8 x i32> %load 167} 168 169define <8 x i32> @vpload_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { 170; CHECK-LABEL: vpload_v8i32_allones_mask: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 173; CHECK-NEXT: vle32.v v8, (a0) 174; CHECK-NEXT: ret 175 %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) 176 ret <8 x i32> %load 177} 178 179declare <2 x i64> @llvm.vp.load.v2i64.p0(ptr, <2 x i1>, i32) 180 181define <2 x i64> @vpload_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 182; CHECK-LABEL: vpload_v2i64: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 185; CHECK-NEXT: vle64.v v8, (a0), v0.t 186; CHECK-NEXT: ret 187 %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 188 ret <2 x i64> %load 189} 190 191declare <4 x i64> @llvm.vp.load.v4i64.p0(ptr, <4 x i1>, i32) 192 193define <4 x i64> @vpload_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 194; CHECK-LABEL: vpload_v4i64: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 197; CHECK-NEXT: vle64.v v8, (a0), v0.t 198; CHECK-NEXT: ret 199 %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 200 ret <4 x i64> %load 201} 202 203define <4 x i64> @vpload_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { 204; CHECK-LABEL: vpload_v4i64_allones_mask: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 207; CHECK-NEXT: vle64.v v8, (a0) 208; CHECK-NEXT: ret 209 %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) 210 ret <4 x i64> %load 211} 212 213declare <8 x i64> @llvm.vp.load.v8i64.p0(ptr, <8 x i1>, i32) 214 215define <8 x i64> @vpload_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 216; CHECK-LABEL: vpload_v8i64: 217; CHECK: # %bb.0: 218; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 219; CHECK-NEXT: vle64.v v8, (a0), v0.t 220; CHECK-NEXT: ret 221 %load = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 222 ret <8 x i64> %load 223} 224 225declare <2 x half> @llvm.vp.load.v2f16.p0(ptr, <2 x i1>, i32) 226 227define <2 x half> @vpload_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 228; CHECK-LABEL: vpload_v2f16: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 231; CHECK-NEXT: vle16.v v8, (a0), v0.t 232; CHECK-NEXT: ret 233 %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 234 ret <2 x half> %load 235} 236 237define <2 x half> @vpload_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { 238; CHECK-LABEL: vpload_v2f16_allones_mask: 239; CHECK: # %bb.0: 240; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma 241; CHECK-NEXT: vle16.v v8, (a0) 242; CHECK-NEXT: ret 243 %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) 244 ret <2 x half> %load 245} 246 247declare <4 x half> @llvm.vp.load.v4f16.p0(ptr, <4 x i1>, i32) 248 249define <4 x half> @vpload_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 250; CHECK-LABEL: vpload_v4f16: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma 253; CHECK-NEXT: vle16.v v8, (a0), v0.t 254; CHECK-NEXT: ret 255 %load = call <4 x half> @llvm.vp.load.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 256 ret <4 x half> %load 257} 258 259declare <8 x half> @llvm.vp.load.v8f16.p0(ptr, <8 x i1>, i32) 260 261define <8 x half> @vpload_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 262; CHECK-LABEL: vpload_v8f16: 263; CHECK: # %bb.0: 264; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 265; CHECK-NEXT: vle16.v v8, (a0), v0.t 266; CHECK-NEXT: ret 267 %load = call <8 x half> @llvm.vp.load.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 268 ret <8 x half> %load 269} 270 271declare <2 x float> @llvm.vp.load.v2f32.p0(ptr, <2 x i1>, i32) 272 273define <2 x float> @vpload_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 274; CHECK-LABEL: vpload_v2f32: 275; CHECK: # %bb.0: 276; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma 277; CHECK-NEXT: vle32.v v8, (a0), v0.t 278; CHECK-NEXT: ret 279 %load = call <2 x float> @llvm.vp.load.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 280 ret <2 x float> %load 281} 282 283declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) 284 285define <4 x float> @vpload_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 286; CHECK-LABEL: vpload_v4f32: 287; CHECK: # %bb.0: 288; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 289; CHECK-NEXT: vle32.v v8, (a0), v0.t 290; CHECK-NEXT: ret 291 %load = call <4 x float> @llvm.vp.load.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 292 ret <4 x float> %load 293} 294 295declare <8 x float> @llvm.vp.load.v8f32.p0(ptr, <8 x i1>, i32) 296 297define <8 x float> @vpload_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 298; CHECK-LABEL: vpload_v8f32: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 301; CHECK-NEXT: vle32.v v8, (a0), v0.t 302; CHECK-NEXT: ret 303 %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 304 ret <8 x float> %load 305} 306 307define <8 x float> @vpload_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { 308; CHECK-LABEL: vpload_v8f32_allones_mask: 309; CHECK: # %bb.0: 310; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 311; CHECK-NEXT: vle32.v v8, (a0) 312; CHECK-NEXT: ret 313 %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) 314 ret <8 x float> %load 315} 316 317declare <2 x double> @llvm.vp.load.v2f64.p0(ptr, <2 x i1>, i32) 318 319define <2 x double> @vpload_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { 320; CHECK-LABEL: vpload_v2f64: 321; CHECK: # %bb.0: 322; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 323; CHECK-NEXT: vle64.v v8, (a0), v0.t 324; CHECK-NEXT: ret 325 %load = call <2 x double> @llvm.vp.load.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) 326 ret <2 x double> %load 327} 328 329declare <4 x double> @llvm.vp.load.v4f64.p0(ptr, <4 x i1>, i32) 330 331define <4 x double> @vpload_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { 332; CHECK-LABEL: vpload_v4f64: 333; CHECK: # %bb.0: 334; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 335; CHECK-NEXT: vle64.v v8, (a0), v0.t 336; CHECK-NEXT: ret 337 %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) 338 ret <4 x double> %load 339} 340 341define <4 x double> @vpload_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { 342; CHECK-LABEL: vpload_v4f64_allones_mask: 343; CHECK: # %bb.0: 344; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma 345; CHECK-NEXT: vle64.v v8, (a0) 346; CHECK-NEXT: ret 347 %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) 348 ret <4 x double> %load 349} 350 351declare <8 x double> @llvm.vp.load.v8f64.p0(ptr, <8 x i1>, i32) 352 353define <8 x double> @vpload_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { 354; CHECK-LABEL: vpload_v8f64: 355; CHECK: # %bb.0: 356; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 357; CHECK-NEXT: vle64.v v8, (a0), v0.t 358; CHECK-NEXT: ret 359 %load = call <8 x double> @llvm.vp.load.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) 360 ret <8 x double> %load 361} 362 363declare <32 x double> @llvm.vp.load.v32f64.p0(ptr, <32 x i1>, i32) 364 365define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { 366; CHECK-LABEL: vpload_v32f64: 367; CHECK: # %bb.0: 368; CHECK-NEXT: li a3, 16 369; CHECK-NEXT: mv a2, a1 370; CHECK-NEXT: bltu a1, a3, .LBB31_2 371; CHECK-NEXT: # %bb.1: 372; CHECK-NEXT: li a2, 16 373; CHECK-NEXT: .LBB31_2: 374; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 375; CHECK-NEXT: vle64.v v8, (a0), v0.t 376; CHECK-NEXT: addi a2, a1, -16 377; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 378; CHECK-NEXT: vslidedown.vi v0, v0, 2 379; CHECK-NEXT: sltu a1, a1, a2 380; CHECK-NEXT: addi a1, a1, -1 381; CHECK-NEXT: and a1, a1, a2 382; CHECK-NEXT: addi a0, a0, 128 383; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 384; CHECK-NEXT: vle64.v v16, (a0), v0.t 385; CHECK-NEXT: ret 386 %load = call <32 x double> @llvm.vp.load.v32f64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) 387 ret <32 x double> %load 388} 389 390declare <33 x double> @llvm.vp.load.v33f64.p0(ptr, <33 x i1>, i32) 391 392; Widen to v64f64 then split into 4 x v16f64, of which 1 is empty. 393 394define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { 395; CHECK-LABEL: vpload_v33f64: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 398; CHECK-NEXT: vmv1r.v v8, v0 399; CHECK-NEXT: li a4, 32 400; CHECK-NEXT: mv a3, a2 401; CHECK-NEXT: bltu a2, a4, .LBB32_2 402; CHECK-NEXT: # %bb.1: 403; CHECK-NEXT: li a3, 32 404; CHECK-NEXT: .LBB32_2: 405; CHECK-NEXT: addi a4, a3, -16 406; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 407; CHECK-NEXT: vslidedown.vi v0, v8, 2 408; CHECK-NEXT: sltu a3, a3, a4 409; CHECK-NEXT: addi a3, a3, -1 410; CHECK-NEXT: and a3, a3, a4 411; CHECK-NEXT: addi a4, a1, 128 412; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma 413; CHECK-NEXT: vle64.v v16, (a4), v0.t 414; CHECK-NEXT: addi a3, a2, -32 415; CHECK-NEXT: sltu a4, a2, a3 416; CHECK-NEXT: addi a4, a4, -1 417; CHECK-NEXT: and a4, a4, a3 418; CHECK-NEXT: li a3, 16 419; CHECK-NEXT: bltu a4, a3, .LBB32_4 420; CHECK-NEXT: # %bb.3: 421; CHECK-NEXT: li a4, 16 422; CHECK-NEXT: .LBB32_4: 423; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 424; CHECK-NEXT: vslidedown.vi v0, v8, 4 425; CHECK-NEXT: addi a5, a1, 256 426; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma 427; CHECK-NEXT: vle64.v v24, (a5), v0.t 428; CHECK-NEXT: bltu a2, a3, .LBB32_6 429; CHECK-NEXT: # %bb.5: 430; CHECK-NEXT: li a2, 16 431; CHECK-NEXT: .LBB32_6: 432; CHECK-NEXT: vmv1r.v v0, v8 433; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 434; CHECK-NEXT: vle64.v v8, (a1), v0.t 435; CHECK-NEXT: addi a1, a0, 128 436; CHECK-NEXT: addi a2, a0, 256 437; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 438; CHECK-NEXT: vse64.v v8, (a0) 439; CHECK-NEXT: vse64.v v16, (a1) 440; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 441; CHECK-NEXT: vse64.v v24, (a2) 442; CHECK-NEXT: ret 443 %load = call <33 x double> @llvm.vp.load.v33f64.p0(ptr %ptr, <33 x i1> %m, i32 %evl) 444 ret <33 x double> %load 445} 446