xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-rsh.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s
3; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s
4
5; Wrong add/shift amount. Should be 32 for shift of 6.
6define <vscale x 2 x i64> @neg_urshr_1(<vscale x 2 x i64> %x) {
7; CHECK-LABEL: neg_urshr_1:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    add z0.d, z0.d, #16 // =0x10
10; CHECK-NEXT:    lsr z0.d, z0.d, #6
11; CHECK-NEXT:    ret
12  %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 16)
13  %sh = lshr <vscale x 2 x i64> %add, splat (i64 6)
14  ret <vscale x 2 x i64> %sh
15}
16
17; Vector Shift.
18define <vscale x 2 x i64> @neg_urshr_2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
19; CHECK-LABEL: neg_urshr_2:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    add z0.d, z0.d, #32 // =0x20
22; CHECK-NEXT:    ptrue p0.d
23; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
24; CHECK-NEXT:    ret
25  %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32)
26  %sh = lshr <vscale x 2 x i64> %add, %y
27  ret <vscale x 2 x i64> %sh
28}
29
30; Vector Add.
31define <vscale x 2 x i64> @neg_urshr_3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
32; CHECK-LABEL: neg_urshr_3:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    add z0.d, z0.d, z1.d
35; CHECK-NEXT:    lsr z0.d, z0.d, #6
36; CHECK-NEXT:    ret
37  %add = add nuw nsw <vscale x 2 x i64> %x, %y
38  %sh = lshr <vscale x 2 x i64> %add, splat (i64 6)
39  ret <vscale x 2 x i64> %sh
40}
41
42; Add has two uses.
43define <vscale x 2 x i64> @neg_urshr_4(<vscale x 2 x i64> %x, ptr %p) {
44; CHECK-LABEL: neg_urshr_4:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    mov z1.d, z0.d
47; CHECK-NEXT:    ptrue p0.d
48; CHECK-NEXT:    add z1.d, z1.d, #32 // =0x20
49; CHECK-NEXT:    lsr z0.d, z1.d, #6
50; CHECK-NEXT:    st1d { z1.d }, p0, [x0]
51; CHECK-NEXT:    ret
52  %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32)
53  %sh = lshr <vscale x 2 x i64> %add, splat (i64 6)
54  store <vscale x 2 x i64> %add, ptr %p
55  ret <vscale x 2 x i64> %sh
56}
57
58; Add can overflow.
59define <vscale x 2 x i64> @neg_urshr_5(<vscale x 2 x i64> %x) {
60; CHECK-LABEL: neg_urshr_5:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    add z0.d, z0.d, #32 // =0x20
63; CHECK-NEXT:    lsr z0.d, z0.d, #6
64; CHECK-NEXT:    ret
65  %add = add <vscale x 2 x i64> %x, splat (i64 32)
66  %sh = lshr <vscale x 2 x i64> %add, splat (i64 6)
67  ret <vscale x 2 x i64> %sh
68}
69
70define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i8> %x) {
71; SVE-LABEL: urshr_i8:
72; SVE:       // %bb.0:
73; SVE-NEXT:    add z0.b, z0.b, #32 // =0x20
74; SVE-NEXT:    lsr z0.b, z0.b, #6
75; SVE-NEXT:    ret
76;
77; SVE2-LABEL: urshr_i8:
78; SVE2:       // %bb.0:
79; SVE2-NEXT:    ptrue p0.b
80; SVE2-NEXT:    urshr z0.b, p0/m, z0.b, #6
81; SVE2-NEXT:    ret
82  %add = add nuw nsw <vscale x 16 x i8> %x, splat (i8 32)
83  %sh = lshr <vscale x 16 x i8> %add, splat (i8 6)
84  ret <vscale x 16 x i8> %sh
85}
86
87define <vscale x 16 x i8> @urshr_8_wide_trunc(<vscale x 16 x i8> %x) {
88; SVE-LABEL: urshr_8_wide_trunc:
89; SVE:       // %bb.0:
90; SVE-NEXT:    uunpkhi z1.h, z0.b
91; SVE-NEXT:    uunpklo z0.h, z0.b
92; SVE-NEXT:    add z0.h, z0.h, #32 // =0x20
93; SVE-NEXT:    add z1.h, z1.h, #32 // =0x20
94; SVE-NEXT:    lsr z1.h, z1.h, #6
95; SVE-NEXT:    lsr z0.h, z0.h, #6
96; SVE-NEXT:    uzp1 z0.b, z0.b, z1.b
97; SVE-NEXT:    ret
98;
99; SVE2-LABEL: urshr_8_wide_trunc:
100; SVE2:       // %bb.0:
101; SVE2-NEXT:    ptrue p0.b
102; SVE2-NEXT:    urshr z0.b, p0/m, z0.b, #6
103; SVE2-NEXT:    ret
104  %ext = zext <vscale x 16 x i8> %x to <vscale x 16 x i16>
105  %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 32)
106  %sh = lshr <vscale x 16 x i16> %add, splat (i16 6)
107  %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8>
108  ret <vscale x 16 x i8> %sht
109}
110
111define <vscale x 16 x i8> @urshr_8_wide_trunc_nomerge(<vscale x 16 x i16> %ext) {
112; SVE-LABEL: urshr_8_wide_trunc_nomerge:
113; SVE:       // %bb.0:
114; SVE-NEXT:    add z0.h, z0.h, #256 // =0x100
115; SVE-NEXT:    add z1.h, z1.h, #256 // =0x100
116; SVE-NEXT:    lsr z1.h, z1.h, #9
117; SVE-NEXT:    lsr z0.h, z0.h, #9
118; SVE-NEXT:    uzp1 z0.b, z0.b, z1.b
119; SVE-NEXT:    ret
120;
121; SVE2-LABEL: urshr_8_wide_trunc_nomerge:
122; SVE2:       // %bb.0:
123; SVE2-NEXT:    ptrue p0.h
124; SVE2-NEXT:    urshr z1.h, p0/m, z1.h, #9
125; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #9
126; SVE2-NEXT:    uzp1 z0.b, z0.b, z1.b
127; SVE2-NEXT:    ret
128  %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 256)
129  %sh = lshr <vscale x 16 x i16> %add, splat (i16 9)
130  %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8>
131  ret <vscale x 16 x i8> %sht
132}
133
134define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i16> %x) {
135; SVE-LABEL: urshr_i16:
136; SVE:       // %bb.0:
137; SVE-NEXT:    add z0.h, z0.h, #32 // =0x20
138; SVE-NEXT:    lsr z0.h, z0.h, #6
139; SVE-NEXT:    ret
140;
141; SVE2-LABEL: urshr_i16:
142; SVE2:       // %bb.0:
143; SVE2-NEXT:    ptrue p0.h
144; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #6
145; SVE2-NEXT:    ret
146  %add = add nuw nsw <vscale x 8 x i16> %x, splat (i16 32)
147  %sh = lshr <vscale x 8 x i16> %add, splat (i16 6)
148  ret <vscale x 8 x i16> %sh
149}
150
151define <vscale x 8 x i16> @urshr_16_wide_trunc(<vscale x 8 x i16> %x) {
152; SVE-LABEL: urshr_16_wide_trunc:
153; SVE:       // %bb.0:
154; SVE-NEXT:    uunpkhi z1.s, z0.h
155; SVE-NEXT:    uunpklo z0.s, z0.h
156; SVE-NEXT:    add z0.s, z0.s, #32 // =0x20
157; SVE-NEXT:    add z1.s, z1.s, #32 // =0x20
158; SVE-NEXT:    lsr z1.s, z1.s, #6
159; SVE-NEXT:    lsr z0.s, z0.s, #6
160; SVE-NEXT:    uzp1 z0.h, z0.h, z1.h
161; SVE-NEXT:    ret
162;
163; SVE2-LABEL: urshr_16_wide_trunc:
164; SVE2:       // %bb.0:
165; SVE2-NEXT:    ptrue p0.h
166; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #6
167; SVE2-NEXT:    ret
168  %ext = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
169  %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 32)
170  %sh = lshr <vscale x 8 x i32> %add, splat (i32 6)
171  %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16>
172  ret <vscale x 8 x i16> %sht
173}
174
175define <vscale x 8 x i16> @urshr_16_wide_trunc_nomerge(<vscale x 8 x i32> %ext) {
176; SVE-LABEL: urshr_16_wide_trunc_nomerge:
177; SVE:       // %bb.0:
178; SVE-NEXT:    mov z2.s, #0x10000
179; SVE-NEXT:    add z0.s, z0.s, z2.s
180; SVE-NEXT:    add z1.s, z1.s, z2.s
181; SVE-NEXT:    lsr z1.s, z1.s, #17
182; SVE-NEXT:    lsr z0.s, z0.s, #17
183; SVE-NEXT:    uzp1 z0.h, z0.h, z1.h
184; SVE-NEXT:    ret
185;
186; SVE2-LABEL: urshr_16_wide_trunc_nomerge:
187; SVE2:       // %bb.0:
188; SVE2-NEXT:    ptrue p0.s
189; SVE2-NEXT:    urshr z1.s, p0/m, z1.s, #17
190; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #17
191; SVE2-NEXT:    uzp1 z0.h, z0.h, z1.h
192; SVE2-NEXT:    ret
193  %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 65536)
194  %sh = lshr <vscale x 8 x i32> %add, splat (i32 17)
195  %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16>
196  ret <vscale x 8 x i16> %sht
197}
198
199define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i32> %x) {
200; SVE-LABEL: urshr_i32:
201; SVE:       // %bb.0:
202; SVE-NEXT:    add z0.s, z0.s, #32 // =0x20
203; SVE-NEXT:    lsr z0.s, z0.s, #6
204; SVE-NEXT:    ret
205;
206; SVE2-LABEL: urshr_i32:
207; SVE2:       // %bb.0:
208; SVE2-NEXT:    ptrue p0.s
209; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #6
210; SVE2-NEXT:    ret
211  %add = add nuw nsw <vscale x 4 x i32> %x, splat (i32 32)
212  %sh = lshr <vscale x 4 x i32> %add, splat (i32 6)
213  ret <vscale x 4 x i32> %sh
214}
215
216define <vscale x 4 x i32> @urshr_32_wide_trunc(<vscale x 4 x i32> %x) {
217; SVE-LABEL: urshr_32_wide_trunc:
218; SVE:       // %bb.0:
219; SVE-NEXT:    uunpkhi z1.d, z0.s
220; SVE-NEXT:    uunpklo z0.d, z0.s
221; SVE-NEXT:    add z0.d, z0.d, #32 // =0x20
222; SVE-NEXT:    add z1.d, z1.d, #32 // =0x20
223; SVE-NEXT:    lsr z1.d, z1.d, #6
224; SVE-NEXT:    lsr z0.d, z0.d, #6
225; SVE-NEXT:    uzp1 z0.s, z0.s, z1.s
226; SVE-NEXT:    ret
227;
228; SVE2-LABEL: urshr_32_wide_trunc:
229; SVE2:       // %bb.0:
230; SVE2-NEXT:    ptrue p0.s
231; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #6
232; SVE2-NEXT:    ret
233  %ext = zext <vscale x 4 x i32> %x to <vscale x 4 x i64>
234  %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 32)
235  %sh = lshr <vscale x 4 x i64> %add, splat (i64 6)
236  %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32>
237  ret <vscale x 4 x i32> %sht
238}
239
240define <vscale x 4 x i32> @urshr_32_wide_trunc_nomerge(<vscale x 4 x i64> %ext) {
241; SVE-LABEL: urshr_32_wide_trunc_nomerge:
242; SVE:       // %bb.0:
243; SVE-NEXT:    mov z2.d, #0x100000000
244; SVE-NEXT:    add z0.d, z0.d, z2.d
245; SVE-NEXT:    add z1.d, z1.d, z2.d
246; SVE-NEXT:    lsr z1.d, z1.d, #33
247; SVE-NEXT:    lsr z0.d, z0.d, #33
248; SVE-NEXT:    uzp1 z0.s, z0.s, z1.s
249; SVE-NEXT:    ret
250;
251; SVE2-LABEL: urshr_32_wide_trunc_nomerge:
252; SVE2:       // %bb.0:
253; SVE2-NEXT:    ptrue p0.d
254; SVE2-NEXT:    urshr z1.d, p0/m, z1.d, #33
255; SVE2-NEXT:    urshr z0.d, p0/m, z0.d, #33
256; SVE2-NEXT:    uzp1 z0.s, z0.s, z1.s
257; SVE2-NEXT:    ret
258  %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 4294967296)
259  %sh = lshr <vscale x 4 x i64> %add, splat (i64 33)
260  %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32>
261  ret <vscale x 4 x i32> %sht
262}
263
264define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i64> %x) {
265; SVE-LABEL: urshr_i64:
266; SVE:       // %bb.0:
267; SVE-NEXT:    add z0.d, z0.d, #32 // =0x20
268; SVE-NEXT:    lsr z0.d, z0.d, #6
269; SVE-NEXT:    ret
270;
271; SVE2-LABEL: urshr_i64:
272; SVE2:       // %bb.0:
273; SVE2-NEXT:    ptrue p0.d
274; SVE2-NEXT:    urshr z0.d, p0/m, z0.d, #6
275; SVE2-NEXT:    ret
276  %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32)
277  %sh = lshr <vscale x 2 x i64> %add, splat (i64 6)
278  ret <vscale x 2 x i64> %sh
279}
280