xref: /llvm-project/llvm/test/CodeGen/VE/Vector/vp_strided_load.ll (revision b006b60dc993b2e0ba3e412c80709477241b6be6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
3
4declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
5
6define fastcc <256 x float> @vp_strided_load_v256f32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
7; CHECK-LABEL: vp_strided_load_v256f32_rrm:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    and %s2, %s2, (32)0
10; CHECK-NEXT:    lvl %s2
11; CHECK-NEXT:    vseq %v0
12; CHECK-NEXT:    vmulu.l %v0, %s1, %v0, %vm1
13; CHECK-NEXT:    vaddu.l %v0, %s0, %v0, %vm1
14; CHECK-NEXT:    vgtu %v0, %v0, 0, 0, %vm1
15; CHECK-NEXT:    b.l.t (, %s10)
16  %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
17  ret <256 x float> %r
18}
19
20define fastcc <256 x float> @vp_strided_load_v256f32_rr(ptr %ptr, i64 %stride, i32 %evl) {
21; CHECK-LABEL: vp_strided_load_v256f32_rr:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    and %s2, %s2, (32)0
24; CHECK-NEXT:    lvl %s2
25; CHECK-NEXT:    vldu %v0, %s1, %s0
26; CHECK-NEXT:    b.l.t (, %s10)
27  %one = insertelement <256 x i1> undef, i1 1, i32 0
28  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
29  %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
30  ret <256 x float> %r
31}
32
33define fastcc <256 x float> @vp_strided_load_v256f32_ri(ptr %ptr, i32 %evl) {
34; CHECK-LABEL: vp_strided_load_v256f32_ri:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    and %s1, %s1, (32)0
37; CHECK-NEXT:    lvl %s1
38; CHECK-NEXT:    vldu %v0, 24, %s0
39; CHECK-NEXT:    b.l.t (, %s10)
40  %one = insertelement <256 x i1> undef, i1 1, i32 0
41  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
42  %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
43  ret <256 x float> %r
44}
45
46declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
47
48define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
49; CHECK-LABEL: vp_strided_load_v256i32_rrm:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    and %s2, %s2, (32)0
52; CHECK-NEXT:    lvl %s2
53; CHECK-NEXT:    vseq %v0
54; CHECK-NEXT:    vmulu.l %v0, %s1, %v0, %vm1
55; CHECK-NEXT:    vaddu.l %v0, %s0, %v0, %vm1
56; CHECK-NEXT:    vgtl.zx %v0, %v0, 0, 0, %vm1
57; CHECK-NEXT:    b.l.t (, %s10)
58  %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
59  ret <256 x i32> %r
60}
61
62define fastcc <256 x i32> @vp_strided_load_v256i32_rr(ptr %ptr, i64 %stride, i32 %evl) {
63; CHECK-LABEL: vp_strided_load_v256i32_rr:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    and %s2, %s2, (32)0
66; CHECK-NEXT:    lvl %s2
67; CHECK-NEXT:    vldl.zx %v0, %s1, %s0
68; CHECK-NEXT:    b.l.t (, %s10)
69  %one = insertelement <256 x i1> undef, i1 1, i32 0
70  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
71  %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
72  ret <256 x i32> %r
73}
74
75define fastcc <256 x i32> @vp_strided_load_v256i32_ri(ptr %ptr, i32 %evl) {
76; CHECK-LABEL: vp_strided_load_v256i32_ri:
77; CHECK:       # %bb.0:
78; CHECK-NEXT:    and %s1, %s1, (32)0
79; CHECK-NEXT:    lvl %s1
80; CHECK-NEXT:    vldl.zx %v0, 24, %s0
81; CHECK-NEXT:    b.l.t (, %s10)
82  %one = insertelement <256 x i1> undef, i1 1, i32 0
83  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
84  %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
85  ret <256 x i32> %r
86}
87
88declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
89
90define fastcc <256 x double> @vp_strided_load_v256f64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
91; CHECK-LABEL: vp_strided_load_v256f64_rrm:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    and %s2, %s2, (32)0
94; CHECK-NEXT:    lvl %s2
95; CHECK-NEXT:    vseq %v0
96; CHECK-NEXT:    vmulu.l %v0, %s1, %v0, %vm1
97; CHECK-NEXT:    vaddu.l %v0, %s0, %v0, %vm1
98; CHECK-NEXT:    vgt %v0, %v0, 0, 0, %vm1
99; CHECK-NEXT:    b.l.t (, %s10)
100  %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
101  ret <256 x double> %r
102}
103
104define fastcc <256 x double> @vp_strided_load_v256f64_rr(ptr %ptr, i64 %stride, i32 %evl) {
105; CHECK-LABEL: vp_strided_load_v256f64_rr:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    and %s2, %s2, (32)0
108; CHECK-NEXT:    lvl %s2
109; CHECK-NEXT:    vld %v0, %s1, %s0
110; CHECK-NEXT:    b.l.t (, %s10)
111  %one = insertelement <256 x i1> undef, i1 1, i32 0
112  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
113  %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
114  ret <256 x double> %r
115}
116
117define fastcc <256 x double> @vp_strided_load_v256f64_ri(ptr %ptr, i32 %evl) {
118; CHECK-LABEL: vp_strided_load_v256f64_ri:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    and %s1, %s1, (32)0
121; CHECK-NEXT:    lvl %s1
122; CHECK-NEXT:    vld %v0, 24, %s0
123; CHECK-NEXT:    b.l.t (, %s10)
124  %one = insertelement <256 x i1> undef, i1 1, i32 0
125  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
126  %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
127  ret <256 x double> %r
128}
129
130declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
131
132define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
133; CHECK-LABEL: vp_strided_load_v256i64_rrm:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    and %s2, %s2, (32)0
136; CHECK-NEXT:    lvl %s2
137; CHECK-NEXT:    vseq %v0
138; CHECK-NEXT:    vmulu.l %v0, %s1, %v0, %vm1
139; CHECK-NEXT:    vaddu.l %v0, %s0, %v0, %vm1
140; CHECK-NEXT:    vgt %v0, %v0, 0, 0, %vm1
141; CHECK-NEXT:    b.l.t (, %s10)
142  %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
143  ret <256 x i64> %r
144}
145
146define fastcc <256 x i64> @vp_strided_load_v256i64_rr(ptr %ptr, i64 %stride, i32 %evl) {
147; CHECK-LABEL: vp_strided_load_v256i64_rr:
148; CHECK:       # %bb.0:
149; CHECK-NEXT:    and %s2, %s2, (32)0
150; CHECK-NEXT:    lvl %s2
151; CHECK-NEXT:    vld %v0, %s1, %s0
152; CHECK-NEXT:    b.l.t (, %s10)
153  %one = insertelement <256 x i1> undef, i1 1, i32 0
154  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
155  %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
156  ret <256 x i64> %r
157}
158
159define fastcc <256 x i64> @vp_strided_load_v256i64_ri(ptr %ptr, i32 %evl) {
160; CHECK-LABEL: vp_strided_load_v256i64_ri:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    and %s1, %s1, (32)0
163; CHECK-NEXT:    lvl %s1
164; CHECK-NEXT:    vld %v0, 24, %s0
165; CHECK-NEXT:    b.l.t (, %s10)
166  %one = insertelement <256 x i1> undef, i1 1, i32 0
167  %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
168  %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
169  ret <256 x i64> %r
170}
171