xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll (revision edf4e02906bd0c57087a44e5c4890ffc453667b2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3
4declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
5declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
6declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
7declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
8declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
9declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
10
11declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
12declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
13declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
14
15define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
16; CHECK-LABEL: trunc_sat_i8i16_maxmin:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
19; CHECK-NEXT:    vle16.v v8, (a0)
20; CHECK-NEXT:    vnclip.wi v8, v8, 0
21; CHECK-NEXT:    vse8.v v8, (a1)
22; CHECK-NEXT:    ret
23  %1 = load <4 x i16>, ptr %x, align 16
24  %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
25  %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
26  %4 = trunc <4 x i16> %3 to <4 x i8>
27  store <4 x i8> %4, ptr %y, align 8
28  ret void
29}
30
31define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
32; CHECK-LABEL: trunc_sat_i8i16_minmax:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
35; CHECK-NEXT:    vle16.v v8, (a0)
36; CHECK-NEXT:    vnclip.wi v8, v8, 0
37; CHECK-NEXT:    vse8.v v8, (a1)
38; CHECK-NEXT:    ret
39  %1 = load <4 x i16>, ptr %x, align 16
40  %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
41  %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
42  %4 = trunc <4 x i16> %3 to <4 x i8>
43  store <4 x i8> %4, ptr %y, align 8
44  ret void
45}
46
47define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
48; CHECK-LABEL: trunc_sat_i8i16_notopt:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
51; CHECK-NEXT:    vle16.v v8, (a0)
52; CHECK-NEXT:    li a0, -127
53; CHECK-NEXT:    vmax.vx v8, v8, a0
54; CHECK-NEXT:    li a0, 128
55; CHECK-NEXT:    vmin.vx v8, v8, a0
56; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
57; CHECK-NEXT:    vnsrl.wi v8, v8, 0
58; CHECK-NEXT:    vse8.v v8, (a1)
59; CHECK-NEXT:    ret
60  %1 = load <4 x i16>, ptr %x, align 16
61  %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -127, i16 -127, i16 -127, i16 -127>)
62  %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 128, i16 128, i16 128, i16 128>)
63  %4 = trunc <4 x i16> %3 to <4 x i8>
64  store <4 x i8> %4, ptr %y, align 8
65  ret void
66}
67
68define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
69; CHECK-LABEL: trunc_sat_u8u16_min:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
72; CHECK-NEXT:    vle16.v v8, (a0)
73; CHECK-NEXT:    vnclipu.wi v8, v8, 0
74; CHECK-NEXT:    vse8.v v8, (a1)
75; CHECK-NEXT:    ret
76  %1 = load <4 x i16>, ptr %x, align 16
77  %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
78  %3 = trunc <4 x i16> %2 to <4 x i8>
79  store <4 x i8> %3, ptr %y, align 8
80  ret void
81}
82
83define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
84; CHECK-LABEL: trunc_sat_u8u16_notopt:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
87; CHECK-NEXT:    vle16.v v8, (a0)
88; CHECK-NEXT:    li a0, 127
89; CHECK-NEXT:    vminu.vx v8, v8, a0
90; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
91; CHECK-NEXT:    vnsrl.wi v8, v8, 0
92; CHECK-NEXT:    vse8.v v8, (a1)
93; CHECK-NEXT:    ret
94  %1 = load <4 x i16>, ptr %x, align 16
95  %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
96  %3 = trunc <4 x i16> %2 to <4 x i8>
97  store <4 x i8> %3, ptr %y, align 8
98  ret void
99}
100
101define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102; CHECK-LABEL: trunc_sat_u8u16_maxmin:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
105; CHECK-NEXT:    vle16.v v8, (a0)
106; CHECK-NEXT:    vmax.vx v8, v8, zero
107; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
108; CHECK-NEXT:    vnclipu.wi v8, v8, 0
109; CHECK-NEXT:    vse8.v v8, (a1)
110; CHECK-NEXT:    ret
111  %1 = load <4 x i16>, ptr %x, align 16
112  %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> zeroinitializer)
113  %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
114  %4 = trunc <4 x i16> %3 to <4 x i8>
115  store <4 x i8> %4, ptr %y, align 8
116  ret void
117}
118
119define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
120; CHECK-LABEL: trunc_sat_u8u16_minmax:
121; CHECK:       # %bb.0:
122; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
123; CHECK-NEXT:    vle16.v v8, (a0)
124; CHECK-NEXT:    vmax.vx v8, v8, zero
125; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
126; CHECK-NEXT:    vnclipu.wi v8, v8, 0
127; CHECK-NEXT:    vse8.v v8, (a1)
128; CHECK-NEXT:    ret
129  %1 = load <4 x i16>, ptr %x, align 16
130  %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
131  %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> zeroinitializer)
132  %4 = trunc <4 x i16> %3 to <4 x i8>
133  store <4 x i8> %4, ptr %y, align 8
134  ret void
135}
136
137
138define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
139; CHECK-LABEL: trunc_sat_i16i32_notopt:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
142; CHECK-NEXT:    vle32.v v8, (a0)
143; CHECK-NEXT:    lui a0, 1048568
144; CHECK-NEXT:    addi a0, a0, 1
145; CHECK-NEXT:    vmax.vx v8, v8, a0
146; CHECK-NEXT:    lui a0, 8
147; CHECK-NEXT:    vmin.vx v8, v8, a0
148; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
149; CHECK-NEXT:    vnsrl.wi v8, v8, 0
150; CHECK-NEXT:    vse16.v v8, (a1)
151; CHECK-NEXT:    ret
152  %1 = load <4 x i32>, ptr %x, align 32
153  %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32767, i32 -32767, i32 -32767, i32 -32767>)
154  %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>)
155  %4 = trunc <4 x i32> %3 to <4 x i16>
156  store <4 x i16> %4, ptr %y, align 16
157  ret void
158}
159
160define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
161; CHECK-LABEL: trunc_sat_i16i32_maxmin:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
164; CHECK-NEXT:    vle32.v v8, (a0)
165; CHECK-NEXT:    vnclip.wi v8, v8, 0
166; CHECK-NEXT:    vse16.v v8, (a1)
167; CHECK-NEXT:    ret
168  %1 = load <4 x i32>, ptr %x, align 32
169  %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
170  %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
171  %4 = trunc <4 x i32> %3 to <4 x i16>
172  store <4 x i16> %4, ptr %y, align 16
173  ret void
174}
175
176define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
177; CHECK-LABEL: trunc_sat_i16i32_minmax:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
180; CHECK-NEXT:    vle32.v v8, (a0)
181; CHECK-NEXT:    vnclip.wi v8, v8, 0
182; CHECK-NEXT:    vse16.v v8, (a1)
183; CHECK-NEXT:    ret
184  %1 = load <4 x i32>, ptr %x, align 32
185  %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
186  %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
187  %4 = trunc <4 x i32> %3 to <4 x i16>
188  store <4 x i16> %4, ptr %y, align 16
189  ret void
190}
191
192define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
193; CHECK-LABEL: trunc_sat_u16u32_notopt:
194; CHECK:       # %bb.0:
195; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
196; CHECK-NEXT:    vle32.v v8, (a0)
197; CHECK-NEXT:    lui a0, 8
198; CHECK-NEXT:    addi a0, a0, -1
199; CHECK-NEXT:    vminu.vx v8, v8, a0
200; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
201; CHECK-NEXT:    vnsrl.wi v8, v8, 0
202; CHECK-NEXT:    vse16.v v8, (a1)
203; CHECK-NEXT:    ret
204  %1 = load <4 x i32>, ptr %x, align 32
205  %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
206  %3 = trunc <4 x i32> %2 to <4 x i16>
207  store <4 x i16> %3, ptr %y, align 16
208  ret void
209}
210
211define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
212; CHECK-LABEL: trunc_sat_u16u32_min:
213; CHECK:       # %bb.0:
214; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
215; CHECK-NEXT:    vle32.v v8, (a0)
216; CHECK-NEXT:    vnclipu.wi v8, v8, 0
217; CHECK-NEXT:    vse16.v v8, (a1)
218; CHECK-NEXT:    ret
219  %1 = load <4 x i32>, ptr %x, align 32
220  %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
221  %3 = trunc <4 x i32> %2 to <4 x i16>
222  store <4 x i16> %3, ptr %y, align 16
223  ret void
224}
225
226define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
227; CHECK-LABEL: trunc_sat_u16u32_maxmin:
228; CHECK:       # %bb.0:
229; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
230; CHECK-NEXT:    vle32.v v8, (a0)
231; CHECK-NEXT:    li a0, 1
232; CHECK-NEXT:    vmax.vx v8, v8, a0
233; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
234; CHECK-NEXT:    vnclipu.wi v8, v8, 0
235; CHECK-NEXT:    vse16.v v8, (a1)
236; CHECK-NEXT:    ret
237  %1 = load <4 x i32>, ptr %x, align 16
238  %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
239  %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
240  %4 = trunc <4 x i32> %3 to <4 x i16>
241  store <4 x i16> %4, ptr %y, align 8
242  ret void
243}
244
245define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
246; CHECK-LABEL: trunc_sat_u16u32_minmax:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
249; CHECK-NEXT:    vle32.v v8, (a0)
250; CHECK-NEXT:    li a0, 50
251; CHECK-NEXT:    vmax.vx v8, v8, a0
252; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
253; CHECK-NEXT:    vnclipu.wi v8, v8, 0
254; CHECK-NEXT:    vse16.v v8, (a1)
255; CHECK-NEXT:    ret
256  %1 = load <4 x i32>, ptr %x, align 16
257  %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
258  %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 50, i32 50, i32 50, i32 50>)
259  %4 = trunc <4 x i32> %3 to <4 x i16>
260  store <4 x i16> %4, ptr %y, align 8
261  ret void
262}
263
264
265define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
266; CHECK-LABEL: trunc_sat_i32i64_notopt:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
269; CHECK-NEXT:    vle64.v v8, (a0)
270; CHECK-NEXT:    lui a0, 524288
271; CHECK-NEXT:    addiw a0, a0, 1
272; CHECK-NEXT:    vmax.vx v8, v8, a0
273; CHECK-NEXT:    li a0, 1
274; CHECK-NEXT:    slli a0, a0, 31
275; CHECK-NEXT:    vmin.vx v8, v8, a0
276; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
277; CHECK-NEXT:    vnsrl.wi v10, v8, 0
278; CHECK-NEXT:    vse32.v v10, (a1)
279; CHECK-NEXT:    ret
280  %1 = load <4 x i64>, ptr %x, align 64
281  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>)
282  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483648, i64 2147483648, i64 2147483648, i64 2147483648>)
283  %4 = trunc <4 x i64> %3 to <4 x i32>
284  store <4 x i32> %4, ptr %y, align 32
285  ret void
286}
287
288define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
289; CHECK-LABEL: trunc_sat_i32i64_maxmin:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
292; CHECK-NEXT:    vle64.v v8, (a0)
293; CHECK-NEXT:    vnclip.wi v10, v8, 0
294; CHECK-NEXT:    vse32.v v10, (a1)
295; CHECK-NEXT:    ret
296  %1 = load <4 x i64>, ptr %x, align 64
297  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
298  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
299  %4 = trunc <4 x i64> %3 to <4 x i32>
300  store <4 x i32> %4, ptr %y, align 32
301  ret void
302}
303
304define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
305; CHECK-LABEL: trunc_sat_i32i64_minmax:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
308; CHECK-NEXT:    vle64.v v8, (a0)
309; CHECK-NEXT:    vnclip.wi v10, v8, 0
310; CHECK-NEXT:    vse32.v v10, (a1)
311; CHECK-NEXT:    ret
312  %1 = load <4 x i64>, ptr %x, align 64
313  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
314  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
315  %4 = trunc <4 x i64> %3 to <4 x i32>
316  store <4 x i32> %4, ptr %y, align 32
317  ret void
318}
319
320
321define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
322; CHECK-LABEL: trunc_sat_u32u64_notopt:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
325; CHECK-NEXT:    vle64.v v8, (a0)
326; CHECK-NEXT:    lui a0, 524288
327; CHECK-NEXT:    addiw a0, a0, -1
328; CHECK-NEXT:    vminu.vx v8, v8, a0
329; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
330; CHECK-NEXT:    vnsrl.wi v10, v8, 0
331; CHECK-NEXT:    vse32.v v10, (a1)
332; CHECK-NEXT:    ret
333  %1 = load <4 x i64>, ptr %x, align 64
334  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
335  %3 = trunc <4 x i64> %2 to <4 x i32>
336  store <4 x i32> %3, ptr %y, align 32
337  ret void
338}
339
340define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
341; CHECK-LABEL: trunc_sat_u32u64_min:
342; CHECK:       # %bb.0:
343; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
344; CHECK-NEXT:    vle64.v v8, (a0)
345; CHECK-NEXT:    vnclipu.wi v10, v8, 0
346; CHECK-NEXT:    vse32.v v10, (a1)
347; CHECK-NEXT:    ret
348  %1 = load <4 x i64>, ptr %x, align 64
349  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
350  %3 = trunc <4 x i64> %2 to <4 x i32>
351  store <4 x i32> %3, ptr %y, align 32
352  ret void
353}
354
355
356define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
357; CHECK-LABEL: trunc_sat_u32u64_maxmin:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
360; CHECK-NEXT:    vle64.v v8, (a0)
361; CHECK-NEXT:    vmax.vx v8, v8, zero
362; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
363; CHECK-NEXT:    vnclipu.wi v10, v8, 0
364; CHECK-NEXT:    vse32.v v10, (a1)
365; CHECK-NEXT:    ret
366  %1 = load <4 x i64>, ptr %x, align 16
367  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
368  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
369  %4 = trunc <4 x i64> %3 to <4 x i32>
370  store <4 x i32> %4, ptr %y, align 8
371  ret void
372}
373
374define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
375; CHECK-LABEL: trunc_sat_u32u64_minmax:
376; CHECK:       # %bb.0:
377; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
378; CHECK-NEXT:    vle64.v v8, (a0)
379; CHECK-NEXT:    vmax.vx v8, v8, zero
380; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
381; CHECK-NEXT:    vnclipu.wi v10, v8, 0
382; CHECK-NEXT:    vse32.v v10, (a1)
383; CHECK-NEXT:    ret
384  %1 = load <4 x i64>, ptr %x, align 16
385  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
386  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
387  %4 = trunc <4 x i64> %3 to <4 x i32>
388  store <4 x i32> %4, ptr %y, align 8
389  ret void
390}
391
392define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
393; CHECK-LABEL: trunc_sat_i8i32_maxmin:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
396; CHECK-NEXT:    vle32.v v8, (a0)
397; CHECK-NEXT:    vnclip.wi v8, v8, 0
398; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
399; CHECK-NEXT:    vnclip.wi v8, v8, 0
400; CHECK-NEXT:    vse8.v v8, (a1)
401; CHECK-NEXT:    ret
402  %1 = load <4 x i32>, ptr %x, align 16
403  %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
404  %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
405  %4 = trunc <4 x i32> %3 to <4 x i8>
406  store <4 x i8> %4, ptr %y, align 8
407  ret void
408}
409
410define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
411; CHECK-LABEL: trunc_sat_i8i32_minmax:
412; CHECK:       # %bb.0:
413; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
414; CHECK-NEXT:    vle32.v v8, (a0)
415; CHECK-NEXT:    vnclip.wi v8, v8, 0
416; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
417; CHECK-NEXT:    vnclip.wi v8, v8, 0
418; CHECK-NEXT:    vse8.v v8, (a1)
419; CHECK-NEXT:    ret
420  %1 = load <4 x i32>, ptr %x, align 16
421  %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
422  %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
423  %4 = trunc <4 x i32> %3 to <4 x i8>
424  store <4 x i8> %4, ptr %y, align 8
425  ret void
426}
427
428define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
429; CHECK-LABEL: trunc_sat_u8u32_min:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
432; CHECK-NEXT:    vle32.v v8, (a0)
433; CHECK-NEXT:    vnclipu.wi v8, v8, 0
434; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
435; CHECK-NEXT:    vnclipu.wi v8, v8, 0
436; CHECK-NEXT:    vse8.v v8, (a1)
437; CHECK-NEXT:    ret
438  %1 = load <4 x i32>, ptr %x, align 16
439  %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
440  %3 = trunc <4 x i32> %2 to <4 x i8>
441  store <4 x i8> %3, ptr %y, align 8
442  ret void
443}
444
445define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
446; CHECK-LABEL: trunc_sat_u8u32_maxmin:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
449; CHECK-NEXT:    vle32.v v8, (a0)
450; CHECK-NEXT:    vmax.vx v8, v8, zero
451; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
452; CHECK-NEXT:    vnclipu.wi v8, v8, 0
453; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
454; CHECK-NEXT:    vnclipu.wi v8, v8, 0
455; CHECK-NEXT:    vse8.v v8, (a1)
456; CHECK-NEXT:    ret
457  %1 = load <4 x i32>, ptr %x, align 16
458  %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> zeroinitializer)
459  %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
460  %4 = trunc <4 x i32> %3 to <4 x i8>
461  store <4 x i8> %4, ptr %y, align 8
462  ret void
463}
464
465define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
466; CHECK-LABEL: trunc_sat_u8u32_minmax:
467; CHECK:       # %bb.0:
468; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
469; CHECK-NEXT:    vle32.v v8, (a0)
470; CHECK-NEXT:    vmax.vx v8, v8, zero
471; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
472; CHECK-NEXT:    vnclipu.wi v8, v8, 0
473; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
474; CHECK-NEXT:    vnclipu.wi v8, v8, 0
475; CHECK-NEXT:    vse8.v v8, (a1)
476; CHECK-NEXT:    ret
477  %1 = load <4 x i32>, ptr %x, align 16
478  %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
479  %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> zeroinitializer)
480  %4 = trunc <4 x i32> %3 to <4 x i8>
481  store <4 x i8> %4, ptr %y, align 8
482  ret void
483}
484
485define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
486; CHECK-LABEL: trunc_sat_i8i64_maxmin:
487; CHECK:       # %bb.0:
488; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
489; CHECK-NEXT:    vle64.v v8, (a0)
490; CHECK-NEXT:    vnclip.wi v10, v8, 0
491; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
492; CHECK-NEXT:    vnclip.wi v8, v10, 0
493; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
494; CHECK-NEXT:    vnclip.wi v8, v8, 0
495; CHECK-NEXT:    vse8.v v8, (a1)
496; CHECK-NEXT:    ret
497  %1 = load <4 x i64>, ptr %x, align 16
498  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
499  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
500  %4 = trunc <4 x i64> %3 to <4 x i8>
501  store <4 x i8> %4, ptr %y, align 8
502  ret void
503}
504
505define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
506; CHECK-LABEL: trunc_sat_i8i64_minmax:
507; CHECK:       # %bb.0:
508; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
509; CHECK-NEXT:    vle64.v v8, (a0)
510; CHECK-NEXT:    vnclip.wi v10, v8, 0
511; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
512; CHECK-NEXT:    vnclip.wi v8, v10, 0
513; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
514; CHECK-NEXT:    vnclip.wi v8, v8, 0
515; CHECK-NEXT:    vse8.v v8, (a1)
516; CHECK-NEXT:    ret
517  %1 = load <4 x i64>, ptr %x, align 16
518  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
519  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
520  %4 = trunc <4 x i64> %3 to <4 x i8>
521  store <4 x i8> %4, ptr %y, align 8
522  ret void
523}
524
525define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
526; CHECK-LABEL: trunc_sat_u8u64_min:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
529; CHECK-NEXT:    vle64.v v8, (a0)
530; CHECK-NEXT:    vnclipu.wi v10, v8, 0
531; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
532; CHECK-NEXT:    vnclipu.wi v8, v10, 0
533; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
534; CHECK-NEXT:    vnclipu.wi v8, v8, 0
535; CHECK-NEXT:    vse8.v v8, (a1)
536; CHECK-NEXT:    ret
537  %1 = load <4 x i64>, ptr %x, align 16
538  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
539  %3 = trunc <4 x i64> %2 to <4 x i8>
540  store <4 x i8> %3, ptr %y, align 8
541  ret void
542}
543
544define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
545; CHECK-LABEL: trunc_sat_u8u64_maxmin:
546; CHECK:       # %bb.0:
547; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
548; CHECK-NEXT:    vle64.v v8, (a0)
549; CHECK-NEXT:    vmax.vx v8, v8, zero
550; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
551; CHECK-NEXT:    vnclipu.wi v10, v8, 0
552; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
553; CHECK-NEXT:    vnclipu.wi v8, v10, 0
554; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
555; CHECK-NEXT:    vnclipu.wi v8, v8, 0
556; CHECK-NEXT:    vse8.v v8, (a1)
557; CHECK-NEXT:    ret
558  %1 = load <4 x i64>, ptr %x, align 16
559  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
560  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
561  %4 = trunc <4 x i64> %3 to <4 x i8>
562  store <4 x i8> %4, ptr %y, align 8
563  ret void
564}
565
566define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
567; CHECK-LABEL: trunc_sat_u8u64_minmax:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
570; CHECK-NEXT:    vle64.v v8, (a0)
571; CHECK-NEXT:    vmax.vx v8, v8, zero
572; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
573; CHECK-NEXT:    vnclipu.wi v10, v8, 0
574; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
575; CHECK-NEXT:    vnclipu.wi v8, v10, 0
576; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
577; CHECK-NEXT:    vnclipu.wi v8, v8, 0
578; CHECK-NEXT:    vse8.v v8, (a1)
579; CHECK-NEXT:    ret
580  %1 = load <4 x i64>, ptr %x, align 16
581  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
582  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
583  %4 = trunc <4 x i64> %3 to <4 x i8>
584  store <4 x i8> %4, ptr %y, align 8
585  ret void
586}
587
588define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
589; CHECK-LABEL: trunc_sat_i16i64_maxmin:
590; CHECK:       # %bb.0:
591; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
592; CHECK-NEXT:    vle64.v v8, (a0)
593; CHECK-NEXT:    vnclip.wi v10, v8, 0
594; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
595; CHECK-NEXT:    vnclip.wi v8, v10, 0
596; CHECK-NEXT:    vse16.v v8, (a1)
597; CHECK-NEXT:    ret
598  %1 = load <4 x i64>, ptr %x, align 32
599  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
600  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
601  %4 = trunc <4 x i64> %3 to <4 x i16>
602  store <4 x i16> %4, ptr %y, align 16
603  ret void
604}
605
606define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
607; CHECK-LABEL: trunc_sat_i16i64_minmax:
608; CHECK:       # %bb.0:
609; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
610; CHECK-NEXT:    vle64.v v8, (a0)
611; CHECK-NEXT:    vnclip.wi v10, v8, 0
612; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
613; CHECK-NEXT:    vnclip.wi v8, v10, 0
614; CHECK-NEXT:    vse16.v v8, (a1)
615; CHECK-NEXT:    ret
616  %1 = load <4 x i64>, ptr %x, align 32
617  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
618  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
619  %4 = trunc <4 x i64> %3 to <4 x i16>
620  store <4 x i16> %4, ptr %y, align 16
621  ret void
622}
623
624define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
625; CHECK-LABEL: trunc_sat_u16u64_notopt:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
628; CHECK-NEXT:    vle64.v v8, (a0)
629; CHECK-NEXT:    lui a0, 8
630; CHECK-NEXT:    addiw a0, a0, -1
631; CHECK-NEXT:    vminu.vx v8, v8, a0
632; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
633; CHECK-NEXT:    vnsrl.wi v10, v8, 0
634; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
635; CHECK-NEXT:    vnsrl.wi v8, v10, 0
636; CHECK-NEXT:    vse16.v v8, (a1)
637; CHECK-NEXT:    ret
638  %1 = load <4 x i64>, ptr %x, align 32
639  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
640  %3 = trunc <4 x i64> %2 to <4 x i16>
641  store <4 x i16> %3, ptr %y, align 16
642  ret void
643}
644
645define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
646; CHECK-LABEL: trunc_sat_u16u64_min:
647; CHECK:       # %bb.0:
648; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
649; CHECK-NEXT:    vle64.v v8, (a0)
650; CHECK-NEXT:    vnclipu.wi v10, v8, 0
651; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
652; CHECK-NEXT:    vnclipu.wi v8, v10, 0
653; CHECK-NEXT:    vse16.v v8, (a1)
654; CHECK-NEXT:    ret
655  %1 = load <4 x i64>, ptr %x, align 32
656  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
657  %3 = trunc <4 x i64> %2 to <4 x i16>
658  store <4 x i16> %3, ptr %y, align 16
659  ret void
660}
661
662define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
663; CHECK-LABEL: trunc_sat_u16u64_maxmin:
664; CHECK:       # %bb.0:
665; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
666; CHECK-NEXT:    vle64.v v8, (a0)
667; CHECK-NEXT:    li a0, 1
668; CHECK-NEXT:    vmax.vx v8, v8, a0
669; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
670; CHECK-NEXT:    vnclipu.wi v10, v8, 0
671; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
672; CHECK-NEXT:    vnclipu.wi v8, v10, 0
673; CHECK-NEXT:    vse16.v v8, (a1)
674; CHECK-NEXT:    ret
675  %1 = load <4 x i64>, ptr %x, align 16
676  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
677  %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
678  %4 = trunc <4 x i64> %3 to <4 x i16>
679  store <4 x i16> %4, ptr %y, align 8
680  ret void
681}
682
683define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
684; CHECK-LABEL: trunc_sat_u16u64_minmax:
685; CHECK:       # %bb.0:
686; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
687; CHECK-NEXT:    vle64.v v8, (a0)
688; CHECK-NEXT:    li a0, 50
689; CHECK-NEXT:    vmax.vx v8, v8, a0
690; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
691; CHECK-NEXT:    vnclipu.wi v10, v8, 0
692; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
693; CHECK-NEXT:    vnclipu.wi v8, v10, 0
694; CHECK-NEXT:    vse16.v v8, (a1)
695; CHECK-NEXT:    ret
696  %1 = load <4 x i64>, ptr %x, align 16
697  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
698  %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 50, i64 50, i64 50, i64 50>)
699  %4 = trunc <4 x i64> %3 to <4 x i16>
700  store <4 x i16> %4, ptr %y, align 8
701  ret void
702}
703