1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s 3 4declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 5 6define fastcc <256 x float> @vp_strided_load_v256f32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { 7; CHECK-LABEL: vp_strided_load_v256f32_rrm: 8; CHECK: # %bb.0: 9; CHECK-NEXT: and %s2, %s2, (32)0 10; CHECK-NEXT: lvl %s2 11; CHECK-NEXT: vseq %v0 12; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1 13; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 14; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1 15; CHECK-NEXT: b.l.t (, %s10) 16 %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 17 ret <256 x float> %r 18} 19 20define fastcc <256 x float> @vp_strided_load_v256f32_rr(ptr %ptr, i64 %stride, i32 %evl) { 21; CHECK-LABEL: vp_strided_load_v256f32_rr: 22; CHECK: # %bb.0: 23; CHECK-NEXT: and %s2, %s2, (32)0 24; CHECK-NEXT: lvl %s2 25; CHECK-NEXT: vldu %v0, %s1, %s0 26; CHECK-NEXT: b.l.t (, %s10) 27 %one = insertelement <256 x i1> undef, i1 1, i32 0 28 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 29 %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) 30 ret <256 x float> %r 31} 32 33define fastcc <256 x float> @vp_strided_load_v256f32_ri(ptr %ptr, i32 %evl) { 34; CHECK-LABEL: vp_strided_load_v256f32_ri: 35; CHECK: # %bb.0: 36; CHECK-NEXT: and %s1, %s1, (32)0 37; CHECK-NEXT: lvl %s1 38; CHECK-NEXT: vldu %v0, 24, %s0 39; CHECK-NEXT: b.l.t (, %s10) 40 %one = insertelement <256 x i1> undef, i1 1, i32 0 41 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 42 %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl) 43 ret <256 x float> %r 44} 45 46declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 47 48define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { 49; CHECK-LABEL: vp_strided_load_v256i32_rrm: 50; CHECK: # %bb.0: 51; CHECK-NEXT: and %s2, %s2, (32)0 52; CHECK-NEXT: lvl %s2 53; CHECK-NEXT: vseq %v0 54; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1 55; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 56; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1 57; CHECK-NEXT: b.l.t (, %s10) 58 %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 59 ret <256 x i32> %r 60} 61 62define fastcc <256 x i32> @vp_strided_load_v256i32_rr(ptr %ptr, i64 %stride, i32 %evl) { 63; CHECK-LABEL: vp_strided_load_v256i32_rr: 64; CHECK: # %bb.0: 65; CHECK-NEXT: and %s2, %s2, (32)0 66; CHECK-NEXT: lvl %s2 67; CHECK-NEXT: vldl.zx %v0, %s1, %s0 68; CHECK-NEXT: b.l.t (, %s10) 69 %one = insertelement <256 x i1> undef, i1 1, i32 0 70 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 71 %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) 72 ret <256 x i32> %r 73} 74 75define fastcc <256 x i32> @vp_strided_load_v256i32_ri(ptr %ptr, i32 %evl) { 76; CHECK-LABEL: vp_strided_load_v256i32_ri: 77; CHECK: # %bb.0: 78; CHECK-NEXT: and %s1, %s1, (32)0 79; CHECK-NEXT: lvl %s1 80; CHECK-NEXT: vldl.zx %v0, 24, %s0 81; CHECK-NEXT: b.l.t (, %s10) 82 %one = insertelement <256 x i1> undef, i1 1, i32 0 83 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 84 %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl) 85 ret <256 x i32> %r 86} 87 88declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 89 90define fastcc <256 x double> @vp_strided_load_v256f64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { 91; CHECK-LABEL: vp_strided_load_v256f64_rrm: 92; CHECK: # %bb.0: 93; CHECK-NEXT: and %s2, %s2, (32)0 94; CHECK-NEXT: lvl %s2 95; CHECK-NEXT: vseq %v0 96; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1 97; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 98; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1 99; CHECK-NEXT: b.l.t (, %s10) 100 %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 101 ret <256 x double> %r 102} 103 104define fastcc <256 x double> @vp_strided_load_v256f64_rr(ptr %ptr, i64 %stride, i32 %evl) { 105; CHECK-LABEL: vp_strided_load_v256f64_rr: 106; CHECK: # %bb.0: 107; CHECK-NEXT: and %s2, %s2, (32)0 108; CHECK-NEXT: lvl %s2 109; CHECK-NEXT: vld %v0, %s1, %s0 110; CHECK-NEXT: b.l.t (, %s10) 111 %one = insertelement <256 x i1> undef, i1 1, i32 0 112 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 113 %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) 114 ret <256 x double> %r 115} 116 117define fastcc <256 x double> @vp_strided_load_v256f64_ri(ptr %ptr, i32 %evl) { 118; CHECK-LABEL: vp_strided_load_v256f64_ri: 119; CHECK: # %bb.0: 120; CHECK-NEXT: and %s1, %s1, (32)0 121; CHECK-NEXT: lvl %s1 122; CHECK-NEXT: vld %v0, 24, %s0 123; CHECK-NEXT: b.l.t (, %s10) 124 %one = insertelement <256 x i1> undef, i1 1, i32 0 125 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 126 %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl) 127 ret <256 x double> %r 128} 129 130declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 131 132define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { 133; CHECK-LABEL: vp_strided_load_v256i64_rrm: 134; CHECK: # %bb.0: 135; CHECK-NEXT: and %s2, %s2, (32)0 136; CHECK-NEXT: lvl %s2 137; CHECK-NEXT: vseq %v0 138; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1 139; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 140; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1 141; CHECK-NEXT: b.l.t (, %s10) 142 %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) 143 ret <256 x i64> %r 144} 145 146define fastcc <256 x i64> @vp_strided_load_v256i64_rr(ptr %ptr, i64 %stride, i32 %evl) { 147; CHECK-LABEL: vp_strided_load_v256i64_rr: 148; CHECK: # %bb.0: 149; CHECK-NEXT: and %s2, %s2, (32)0 150; CHECK-NEXT: lvl %s2 151; CHECK-NEXT: vld %v0, %s1, %s0 152; CHECK-NEXT: b.l.t (, %s10) 153 %one = insertelement <256 x i1> undef, i1 1, i32 0 154 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 155 %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) 156 ret <256 x i64> %r 157} 158 159define fastcc <256 x i64> @vp_strided_load_v256i64_ri(ptr %ptr, i32 %evl) { 160; CHECK-LABEL: vp_strided_load_v256i64_ri: 161; CHECK: # %bb.0: 162; CHECK-NEXT: and %s1, %s1, (32)0 163; CHECK-NEXT: lvl %s1 164; CHECK-NEXT: vld %v0, 24, %s0 165; CHECK-NEXT: b.l.t (, %s10) 166 %one = insertelement <256 x i1> undef, i1 1, i32 0 167 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer 168 %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl) 169 ret <256 x i64> %r 170} 171