xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll (revision 328c3a843f886f3768e536a508e1e3723d834b3e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3
4declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
5  <vscale x 4 x i32>,
6  ptr,
7  <vscale x 4 x i64>,
8  i64);
9
10define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
11; CHECK-LABEL: test_vloxei:
12; CHECK:       # %bb.0: # %entry
13; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
14; CHECK-NEXT:    vzext.vf8 v12, v8
15; CHECK-NEXT:    vsll.vi v12, v12, 4
16; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
17; CHECK-NEXT:    vloxei64.v v8, (a0), v12
18; CHECK-NEXT:    ret
19entry:
20  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
21  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
22  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
23    <vscale x 4 x i32> undef,
24    ptr %ptr,
25    <vscale x 4 x i64> %shl,
26    i64 %vl)
27  ret <vscale x 4 x i32> %res
28}
29
30define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
31; CHECK-LABEL: test_vloxei2:
32; CHECK:       # %bb.0: # %entry
33; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
34; CHECK-NEXT:    vzext.vf8 v12, v8
35; CHECK-NEXT:    vsll.vi v12, v12, 14
36; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
37; CHECK-NEXT:    vloxei64.v v8, (a0), v12
38; CHECK-NEXT:    ret
39entry:
40  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
41  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 14)
42  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
43    <vscale x 4 x i32> undef,
44    ptr %ptr,
45    <vscale x 4 x i64> %shl,
46    i64 %vl)
47  ret <vscale x 4 x i32> %res
48}
49
50define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
51; CHECK-LABEL: test_vloxei3:
52; CHECK:       # %bb.0: # %entry
53; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
54; CHECK-NEXT:    vzext.vf8 v12, v8
55; CHECK-NEXT:    vsll.vi v12, v12, 26
56; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
57; CHECK-NEXT:    vloxei64.v v8, (a0), v12
58; CHECK-NEXT:    ret
59entry:
60  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
61  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 26)
62  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
63    <vscale x 4 x i32> undef,
64    ptr %ptr,
65    <vscale x 4 x i64> %shl,
66    i64 %vl)
67  ret <vscale x 4 x i32> %res
68}
69
70; Test use vp.zext to extend.
71declare <vscale x 4 x i64> @llvm.vp.zext.nxvi64.nxv1i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
72define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i32 zeroext %vl) {
73; CHECK-LABEL: test_vloxei4:
74; CHECK:       # %bb.0: # %entry
75; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
76; CHECK-NEXT:    vzext.vf8 v12, v8, v0.t
77; CHECK-NEXT:    vsll.vi v12, v12, 4
78; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
79; CHECK-NEXT:    vloxei64.v v8, (a0), v12
80; CHECK-NEXT:    ret
81entry:
82  %offset.ext = call <vscale x 4 x i64> @llvm.vp.zext.nxvi64.nxv1i8(<vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i32 %vl)
83  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
84  %vl.i64 = zext i32 %vl to i64
85  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
86    <vscale x 4 x i32> undef,
87    ptr %ptr,
88    <vscale x 4 x i64> %shl,
89    i64 %vl.i64)
90  ret <vscale x 4 x i32> %res
91}
92
93; Test orignal extnened type is enough narrow.
94declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
95  <vscale x 4 x i32>,
96  ptr,
97  <vscale x 4 x i16>,
98  i64);
99define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
100; CHECK-LABEL: test_vloxei5:
101; CHECK:       # %bb.0: # %entry
102; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
103; CHECK-NEXT:    vzext.vf2 v9, v8
104; CHECK-NEXT:    vsll.vi v10, v9, 12
105; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
106; CHECK-NEXT:    vloxei16.v v8, (a0), v10
107; CHECK-NEXT:    ret
108entry:
109  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i16>
110  %shl = shl <vscale x 4 x i16> %offset.ext, splat (i16 12)
111  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
112    <vscale x 4 x i32> undef,
113    ptr %ptr,
114    <vscale x 4 x i16> %shl,
115    i64 %vl)
116  ret <vscale x 4 x i32> %res
117}
118
119define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64 %vl) {
120; CHECK-LABEL: test_vloxei6:
121; CHECK:       # %bb.0: # %entry
122; CHECK-NEXT:    li a2, 127
123; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
124; CHECK-NEXT:    vand.vx v8, v8, a2
125; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
126; CHECK-NEXT:    vzext.vf8 v12, v8
127; CHECK-NEXT:    vsll.vi v12, v12, 4
128; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
129; CHECK-NEXT:    vloxei64.v v8, (a0), v12
130; CHECK-NEXT:    ret
131entry:
132  %offset.ext = zext <vscale x 4 x i7> %offset to <vscale x 4 x i64>
133  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
134  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
135    <vscale x 4 x i32> undef,
136    ptr %ptr,
137    <vscale x 4 x i64> %shl,
138    i64 %vl)
139  ret <vscale x 4 x i32> %res
140}
141
142define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64 %vl) {
143; CHECK-LABEL: test_vloxei7:
144; CHECK:       # %bb.0: # %entry
145; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
146; CHECK-NEXT:    vmv.v.i v8, 0
147; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
148; CHECK-NEXT:    vsll.vi v12, v8, 2
149; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
150; CHECK-NEXT:    vloxei64.v v8, (a0), v12
151; CHECK-NEXT:    ret
152entry:
153  %offset.ext = zext <vscale x 4 x i1> %offset to <vscale x 4 x i64>
154  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 2)
155  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
156    <vscale x 4 x i32> undef,
157    ptr %ptr,
158    <vscale x 4 x i64> %shl,
159    i64 %vl)
160  ret <vscale x 4 x i32> %res
161}
162
163declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64(
164  <vscale x 4 x i32>,
165  ptr,
166  <vscale x 4 x i64>,
167  <vscale x 4 x i1>,
168  i64,
169  i64);
170
171define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
172; CHECK-LABEL: test_vloxei_mask:
173; CHECK:       # %bb.0: # %entry
174; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
175; CHECK-NEXT:    vzext.vf8 v12, v8
176; CHECK-NEXT:    vsll.vi v12, v12, 4
177; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
178; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t
179; CHECK-NEXT:    ret
180entry:
181  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
182  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
183  %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64(
184    <vscale x 4 x i32> undef,
185    ptr %ptr,
186    <vscale x 4 x i64> %shl,
187    <vscale x 4 x i1> %m,
188    i64 %vl, i64 1)
189  ret <vscale x 4 x i32> %res
190}
191
192declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
193  <vscale x 4 x i32>,
194  ptr,
195  <vscale x 4 x i64>,
196  i64);
197
198define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
199; CHECK-LABEL: test_vluxei:
200; CHECK:       # %bb.0: # %entry
201; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
202; CHECK-NEXT:    vzext.vf8 v12, v8
203; CHECK-NEXT:    vsll.vi v12, v12, 4
204; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
205; CHECK-NEXT:    vluxei64.v v8, (a0), v12
206; CHECK-NEXT:    ret
207entry:
208  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
209  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
210  %res = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
211    <vscale x 4 x i32> undef,
212    ptr %ptr,
213    <vscale x 4 x i64> %shl,
214    i64 %vl)
215  ret <vscale x 4 x i32> %res
216}
217
218declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64(
219  <vscale x 4 x i32>,
220  ptr,
221  <vscale x 4 x i64>,
222  <vscale x 4 x i1>,
223  i64,
224  i64);
225
226define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
227; CHECK-LABEL: test_vluxei_mask:
228; CHECK:       # %bb.0: # %entry
229; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
230; CHECK-NEXT:    vzext.vf8 v12, v8
231; CHECK-NEXT:    vsll.vi v12, v12, 4
232; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
233; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t
234; CHECK-NEXT:    ret
235entry:
236  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
237  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
238  %res = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64(
239    <vscale x 4 x i32> undef,
240    ptr %ptr,
241    <vscale x 4 x i64> %shl,
242    <vscale x 4 x i1> %m,
243    i64 %vl, i64 1)
244  ret <vscale x 4 x i32> %res
245}
246
247declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64(
248  <vscale x 4 x i32>,
249  ptr,
250  <vscale x 4 x i64>,
251  i64);
252
253define void @test_vsoxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
254; CHECK-LABEL: test_vsoxei:
255; CHECK:       # %bb.0: # %entry
256; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
257; CHECK-NEXT:    vzext.vf8 v12, v10
258; CHECK-NEXT:    vsll.vi v12, v12, 4
259; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
260; CHECK-NEXT:    vsoxei64.v v8, (a0), v12
261; CHECK-NEXT:    ret
262entry:
263  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
264  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
265  call void @llvm.riscv.vsoxei.nxv4i32.nxv4i64(
266    <vscale x 4 x i32> %val,
267    ptr %ptr,
268    <vscale x 4 x i64> %shl,
269    i64 %vl)
270  ret void
271}
272
273declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64(
274  <vscale x 4 x i32>,
275  ptr,
276  <vscale x 4 x i64>,
277  <vscale x 4 x i1>,
278  i64);
279
280define void @test_vsoxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
281; CHECK-LABEL: test_vsoxei_mask:
282; CHECK:       # %bb.0: # %entry
283; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
284; CHECK-NEXT:    vzext.vf8 v12, v10
285; CHECK-NEXT:    vsll.vi v12, v12, 4
286; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
287; CHECK-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
288; CHECK-NEXT:    ret
289entry:
290  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
291  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
292  call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64(
293    <vscale x 4 x i32> %val,
294    ptr %ptr,
295    <vscale x 4 x i64> %shl,
296    <vscale x 4 x i1> %m,
297    i64 %vl)
298  ret void
299}
300
301declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64(
302  <vscale x 4 x i32>,
303  ptr,
304  <vscale x 4 x i64>,
305  i64);
306
307define void @test_vsuxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
308; CHECK-LABEL: test_vsuxei:
309; CHECK:       # %bb.0: # %entry
310; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
311; CHECK-NEXT:    vzext.vf8 v12, v10
312; CHECK-NEXT:    vsll.vi v12, v12, 4
313; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
314; CHECK-NEXT:    vsuxei64.v v8, (a0), v12
315; CHECK-NEXT:    ret
316entry:
317  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
318  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
319  call void @llvm.riscv.vsuxei.nxv4i32.nxv4i64(
320    <vscale x 4 x i32> %val,
321    ptr %ptr,
322    <vscale x 4 x i64> %shl,
323    i64 %vl)
324  ret void
325}
326
327declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64(
328  <vscale x 4 x i32>,
329  ptr,
330  <vscale x 4 x i64>,
331  <vscale x 4 x i1>,
332  i64);
333
334define void @test_vsuxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
335; CHECK-LABEL: test_vsuxei_mask:
336; CHECK:       # %bb.0: # %entry
337; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
338; CHECK-NEXT:    vzext.vf8 v12, v10
339; CHECK-NEXT:    vsll.vi v12, v12, 4
340; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
341; CHECK-NEXT:    vsuxei64.v v8, (a0), v12, v0.t
342; CHECK-NEXT:    ret
343entry:
344  %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
345  %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4)
346  call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64(
347    <vscale x 4 x i32> %val,
348    ptr %ptr,
349    <vscale x 4 x i64> %shl,
350    <vscale x 4 x i1> %m,
351    i64 %vl)
352  ret void
353}
354