xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
3
4; Float
5
6declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
7declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
8declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
9declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
10declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
11declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
12declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
13
14define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
15; CHECK-LABEL: test_signed_v2f32_v2i32:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    ptrue p0.d
18; CHECK-NEXT:    mov w8, #1333788671 // =0x4f7fffff
19; CHECK-NEXT:    mov z1.s, w8
20; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
21; CHECK-NEXT:    movprfx z2, z0
22; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.s
23; CHECK-NEXT:    not p1.b, p0/z, p1.b
24; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
25; CHECK-NEXT:    mov z0.d, #0xffffffff
26; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
27; CHECK-NEXT:    sel z0.d, p0, z0.d, z2.d
28; CHECK-NEXT:    ret
29    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
30    ret <vscale x 2 x i32> %x
31}
32
33define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
34; CHECK-LABEL: test_signed_v4f32_v4i32:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ptrue p0.s
37; CHECK-NEXT:    mov w8, #1333788671 // =0x4f7fffff
38; CHECK-NEXT:    mov z2.s, w8
39; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
40; CHECK-NEXT:    movprfx z1, z0
41; CHECK-NEXT:    fcvtzu z1.s, p0/m, z0.s
42; CHECK-NEXT:    not p1.b, p0/z, p1.b
43; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
44; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
45; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
46; CHECK-NEXT:    mov z0.d, z1.d
47; CHECK-NEXT:    ret
48    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
49    ret <vscale x 4 x i32> %x
50}
51
52define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
53; CHECK-LABEL: test_signed_v8f32_v8i32:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    ptrue p0.s
56; CHECK-NEXT:    mov w8, #1333788671 // =0x4f7fffff
57; CHECK-NEXT:    mov z4.s, w8
58; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
59; CHECK-NEXT:    fcmge p2.s, p0/z, z1.s, #0.0
60; CHECK-NEXT:    movprfx z2, z0
61; CHECK-NEXT:    fcvtzu z2.s, p0/m, z0.s
62; CHECK-NEXT:    movprfx z3, z1
63; CHECK-NEXT:    fcvtzu z3.s, p0/m, z1.s
64; CHECK-NEXT:    fcmgt p3.s, p0/z, z0.s, z4.s
65; CHECK-NEXT:    not p1.b, p0/z, p1.b
66; CHECK-NEXT:    not p2.b, p0/z, p2.b
67; CHECK-NEXT:    fcmgt p0.s, p0/z, z1.s, z4.s
68; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
69; CHECK-NEXT:    mov z3.s, p2/m, #0 // =0x0
70; CHECK-NEXT:    mov z2.s, p3/m, #-1 // =0xffffffffffffffff
71; CHECK-NEXT:    mov z3.s, p0/m, #-1 // =0xffffffffffffffff
72; CHECK-NEXT:    mov z0.d, z2.d
73; CHECK-NEXT:    mov z1.d, z3.d
74; CHECK-NEXT:    ret
75    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
76    ret <vscale x 8 x i32> %x
77}
78
79define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
80; CHECK-LABEL: test_signed_v4f32_v4i16:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ptrue p0.s
83; CHECK-NEXT:    mov w8, #65280 // =0xff00
84; CHECK-NEXT:    movk w8, #18303, lsl #16
85; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
86; CHECK-NEXT:    mov z1.s, w8
87; CHECK-NEXT:    movprfx z2, z0
88; CHECK-NEXT:    fcvtzu z2.s, p0/m, z0.s
89; CHECK-NEXT:    not p1.b, p0/z, p1.b
90; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
91; CHECK-NEXT:    mov z0.s, #65535 // =0xffff
92; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
93; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
94; CHECK-NEXT:    ret
95    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
96    ret <vscale x 4 x i16> %x
97}
98
99define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
100; CHECK-LABEL: test_signed_v8f32_v8i16:
101; CHECK:       // %bb.0:
102; CHECK-NEXT:    ptrue p0.s
103; CHECK-NEXT:    mov w8, #65280 // =0xff00
104; CHECK-NEXT:    movk w8, #18303, lsl #16
105; CHECK-NEXT:    fcmge p1.s, p0/z, z1.s, #0.0
106; CHECK-NEXT:    fcmge p2.s, p0/z, z0.s, #0.0
107; CHECK-NEXT:    mov z2.s, w8
108; CHECK-NEXT:    movprfx z3, z1
109; CHECK-NEXT:    fcvtzu z3.s, p0/m, z1.s
110; CHECK-NEXT:    movprfx z4, z0
111; CHECK-NEXT:    fcvtzu z4.s, p0/m, z0.s
112; CHECK-NEXT:    fcmgt p3.s, p0/z, z1.s, z2.s
113; CHECK-NEXT:    not p1.b, p0/z, p1.b
114; CHECK-NEXT:    not p2.b, p0/z, p2.b
115; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
116; CHECK-NEXT:    mov z0.s, #65535 // =0xffff
117; CHECK-NEXT:    mov z3.s, p1/m, #0 // =0x0
118; CHECK-NEXT:    mov z4.s, p2/m, #0 // =0x0
119; CHECK-NEXT:    sel z1.s, p3, z0.s, z3.s
120; CHECK-NEXT:    sel z0.s, p0, z0.s, z4.s
121; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
122; CHECK-NEXT:    ret
123    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
124    ret <vscale x 8 x i16> %x
125}
126
127define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
128; CHECK-LABEL: test_signed_v2f32_v2i64:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    ptrue p0.d
131; CHECK-NEXT:    mov w8, #1602224127 // =0x5f7fffff
132; CHECK-NEXT:    mov z2.s, w8
133; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, #0.0
134; CHECK-NEXT:    movprfx z1, z0
135; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.s
136; CHECK-NEXT:    not p1.b, p0/z, p1.b
137; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z2.s
138; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
139; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
140; CHECK-NEXT:    mov z0.d, z1.d
141; CHECK-NEXT:    ret
142    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
143    ret <vscale x 2 x i64> %x
144}
145
146define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
147; CHECK-LABEL: test_signed_v4f32_v4i64:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    uunpklo z2.d, z0.s
150; CHECK-NEXT:    uunpkhi z3.d, z0.s
151; CHECK-NEXT:    mov w8, #1602224127 // =0x5f7fffff
152; CHECK-NEXT:    ptrue p0.d
153; CHECK-NEXT:    mov z4.s, w8
154; CHECK-NEXT:    fcmge p1.s, p0/z, z2.s, #0.0
155; CHECK-NEXT:    fcmge p2.s, p0/z, z3.s, #0.0
156; CHECK-NEXT:    movprfx z0, z2
157; CHECK-NEXT:    fcvtzu z0.d, p0/m, z2.s
158; CHECK-NEXT:    movprfx z1, z3
159; CHECK-NEXT:    fcvtzu z1.d, p0/m, z3.s
160; CHECK-NEXT:    fcmgt p3.s, p0/z, z2.s, z4.s
161; CHECK-NEXT:    not p1.b, p0/z, p1.b
162; CHECK-NEXT:    not p2.b, p0/z, p2.b
163; CHECK-NEXT:    fcmgt p0.s, p0/z, z3.s, z4.s
164; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
165; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
166; CHECK-NEXT:    mov z0.d, p3/m, #-1 // =0xffffffffffffffff
167; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
168; CHECK-NEXT:    ret
169    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
170    ret <vscale x 4 x i64> %x
171}
172
173; Double
174
175declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
176declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
177declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
178declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
179declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
180declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
181declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
182
183define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
184; CHECK-LABEL: test_signed_v2f64_v2i32:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    ptrue p0.d
187; CHECK-NEXT:    mov x8, #281474974613504 // =0xffffffe00000
188; CHECK-NEXT:    movk x8, #16879, lsl #48
189; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
190; CHECK-NEXT:    mov z1.d, x8
191; CHECK-NEXT:    movprfx z2, z0
192; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.d
193; CHECK-NEXT:    not p1.b, p0/z, p1.b
194; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z1.d
195; CHECK-NEXT:    mov z0.d, #0xffffffff
196; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
197; CHECK-NEXT:    sel z0.d, p0, z0.d, z2.d
198; CHECK-NEXT:    ret
199    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
200    ret <vscale x 2 x i32> %x
201}
202
203define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
204; CHECK-LABEL: test_signed_v4f64_v4i32:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    ptrue p0.d
207; CHECK-NEXT:    mov x8, #281474974613504 // =0xffffffe00000
208; CHECK-NEXT:    movk x8, #16879, lsl #48
209; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
210; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, #0.0
211; CHECK-NEXT:    mov z2.d, x8
212; CHECK-NEXT:    movprfx z3, z1
213; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
214; CHECK-NEXT:    movprfx z4, z0
215; CHECK-NEXT:    fcvtzu z4.d, p0/m, z0.d
216; CHECK-NEXT:    fcmgt p3.d, p0/z, z1.d, z2.d
217; CHECK-NEXT:    not p1.b, p0/z, p1.b
218; CHECK-NEXT:    not p2.b, p0/z, p2.b
219; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
220; CHECK-NEXT:    mov z0.d, #0xffffffff
221; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
222; CHECK-NEXT:    mov z4.d, p2/m, #0 // =0x0
223; CHECK-NEXT:    sel z1.d, p3, z0.d, z3.d
224; CHECK-NEXT:    sel z0.d, p0, z0.d, z4.d
225; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
226; CHECK-NEXT:    ret
227    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
228    ret <vscale x 4 x i32> %x
229}
230
231define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
232; CHECK-LABEL: test_signed_v8f64_v8i32:
233; CHECK:       // %bb.0:
234; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
235; CHECK-NEXT:    addvl sp, sp, #-1
236; CHECK-NEXT:    str p6, [sp, #5, mul vl] // 2-byte Folded Spill
237; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
238; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
239; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
240; CHECK-NEXT:    .cfi_offset w29, -16
241; CHECK-NEXT:    ptrue p0.d
242; CHECK-NEXT:    mov x8, #281474974613504 // =0xffffffe00000
243; CHECK-NEXT:    movk x8, #16879, lsl #48
244; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
245; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, #0.0
246; CHECK-NEXT:    fcmge p3.d, p0/z, z3.d, #0.0
247; CHECK-NEXT:    fcmge p4.d, p0/z, z2.d, #0.0
248; CHECK-NEXT:    movprfx z5, z1
249; CHECK-NEXT:    fcvtzu z5.d, p0/m, z1.d
250; CHECK-NEXT:    mov z4.d, x8
251; CHECK-NEXT:    movprfx z6, z0
252; CHECK-NEXT:    fcvtzu z6.d, p0/m, z0.d
253; CHECK-NEXT:    movprfx z7, z3
254; CHECK-NEXT:    fcvtzu z7.d, p0/m, z3.d
255; CHECK-NEXT:    movprfx z24, z2
256; CHECK-NEXT:    fcvtzu z24.d, p0/m, z2.d
257; CHECK-NEXT:    not p1.b, p0/z, p1.b
258; CHECK-NEXT:    fcmgt p5.d, p0/z, z1.d, z4.d
259; CHECK-NEXT:    fcmgt p6.d, p0/z, z0.d, z4.d
260; CHECK-NEXT:    not p2.b, p0/z, p2.b
261; CHECK-NEXT:    mov z0.d, #0xffffffff
262; CHECK-NEXT:    not p3.b, p0/z, p3.b
263; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
264; CHECK-NEXT:    fcmgt p1.d, p0/z, z3.d, z4.d
265; CHECK-NEXT:    not p4.b, p0/z, p4.b
266; CHECK-NEXT:    fcmgt p0.d, p0/z, z2.d, z4.d
267; CHECK-NEXT:    mov z6.d, p2/m, #0 // =0x0
268; CHECK-NEXT:    mov z7.d, p3/m, #0 // =0x0
269; CHECK-NEXT:    mov z24.d, p4/m, #0 // =0x0
270; CHECK-NEXT:    sel z1.d, p5, z0.d, z5.d
271; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
272; CHECK-NEXT:    sel z2.d, p6, z0.d, z6.d
273; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
274; CHECK-NEXT:    sel z3.d, p1, z0.d, z7.d
275; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
276; CHECK-NEXT:    sel z4.d, p0, z0.d, z24.d
277; CHECK-NEXT:    uzp1 z0.s, z2.s, z1.s
278; CHECK-NEXT:    uzp1 z1.s, z4.s, z3.s
279; CHECK-NEXT:    addvl sp, sp, #1
280; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
281; CHECK-NEXT:    ret
282    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
283    ret <vscale x 8 x i32> %x
284}
285
286define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
287; CHECK-LABEL: test_signed_v4f64_v4i16:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    ptrue p0.d
290; CHECK-NEXT:    mov x8, #281337537757184 // =0xffe000000000
291; CHECK-NEXT:    movk x8, #16623, lsl #48
292; CHECK-NEXT:    fcmge p1.d, p0/z, z1.d, #0.0
293; CHECK-NEXT:    fcmge p2.d, p0/z, z0.d, #0.0
294; CHECK-NEXT:    mov z2.d, x8
295; CHECK-NEXT:    movprfx z3, z1
296; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
297; CHECK-NEXT:    movprfx z4, z0
298; CHECK-NEXT:    fcvtzu z4.d, p0/m, z0.d
299; CHECK-NEXT:    fcmgt p3.d, p0/z, z1.d, z2.d
300; CHECK-NEXT:    not p1.b, p0/z, p1.b
301; CHECK-NEXT:    not p2.b, p0/z, p2.b
302; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
303; CHECK-NEXT:    mov z0.d, #65535 // =0xffff
304; CHECK-NEXT:    mov z3.d, p1/m, #0 // =0x0
305; CHECK-NEXT:    mov z4.d, p2/m, #0 // =0x0
306; CHECK-NEXT:    sel z1.d, p3, z0.d, z3.d
307; CHECK-NEXT:    sel z0.d, p0, z0.d, z4.d
308; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
309; CHECK-NEXT:    ret
310    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
311    ret <vscale x 4 x i16> %x
312}
313
314define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
315; CHECK-LABEL: test_signed_v8f64_v8i16:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
318; CHECK-NEXT:    addvl sp, sp, #-1
319; CHECK-NEXT:    str p6, [sp, #5, mul vl] // 2-byte Folded Spill
320; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
321; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
322; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
323; CHECK-NEXT:    .cfi_offset w29, -16
324; CHECK-NEXT:    ptrue p0.d
325; CHECK-NEXT:    mov x8, #281337537757184 // =0xffe000000000
326; CHECK-NEXT:    movk x8, #16623, lsl #48
327; CHECK-NEXT:    fcmge p1.d, p0/z, z3.d, #0.0
328; CHECK-NEXT:    fcmge p2.d, p0/z, z2.d, #0.0
329; CHECK-NEXT:    fcmge p3.d, p0/z, z1.d, #0.0
330; CHECK-NEXT:    fcmge p4.d, p0/z, z0.d, #0.0
331; CHECK-NEXT:    movprfx z5, z3
332; CHECK-NEXT:    fcvtzu z5.d, p0/m, z3.d
333; CHECK-NEXT:    mov z4.d, x8
334; CHECK-NEXT:    movprfx z6, z2
335; CHECK-NEXT:    fcvtzu z6.d, p0/m, z2.d
336; CHECK-NEXT:    movprfx z7, z1
337; CHECK-NEXT:    fcvtzu z7.d, p0/m, z1.d
338; CHECK-NEXT:    movprfx z24, z0
339; CHECK-NEXT:    fcvtzu z24.d, p0/m, z0.d
340; CHECK-NEXT:    not p1.b, p0/z, p1.b
341; CHECK-NEXT:    fcmgt p5.d, p0/z, z3.d, z4.d
342; CHECK-NEXT:    fcmgt p6.d, p0/z, z2.d, z4.d
343; CHECK-NEXT:    not p2.b, p0/z, p2.b
344; CHECK-NEXT:    mov z2.d, #65535 // =0xffff
345; CHECK-NEXT:    not p3.b, p0/z, p3.b
346; CHECK-NEXT:    mov z5.d, p1/m, #0 // =0x0
347; CHECK-NEXT:    fcmgt p1.d, p0/z, z1.d, z4.d
348; CHECK-NEXT:    not p4.b, p0/z, p4.b
349; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z4.d
350; CHECK-NEXT:    mov z6.d, p2/m, #0 // =0x0
351; CHECK-NEXT:    mov z7.d, p3/m, #0 // =0x0
352; CHECK-NEXT:    mov z24.d, p4/m, #0 // =0x0
353; CHECK-NEXT:    sel z0.d, p5, z2.d, z5.d
354; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
355; CHECK-NEXT:    sel z1.d, p6, z2.d, z6.d
356; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
357; CHECK-NEXT:    sel z3.d, p1, z2.d, z7.d
358; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
359; CHECK-NEXT:    sel z2.d, p0, z2.d, z24.d
360; CHECK-NEXT:    uzp1 z0.s, z1.s, z0.s
361; CHECK-NEXT:    uzp1 z1.s, z2.s, z3.s
362; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
363; CHECK-NEXT:    addvl sp, sp, #1
364; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
365; CHECK-NEXT:    ret
366    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
367    ret <vscale x 8 x i16> %x
368}
369
370define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
371; CHECK-LABEL: test_signed_v2f64_v2i64:
372; CHECK:       // %bb.0:
373; CHECK-NEXT:    ptrue p0.d
374; CHECK-NEXT:    mov x8, #4895412794951729151 // =0x43efffffffffffff
375; CHECK-NEXT:    mov z2.d, x8
376; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
377; CHECK-NEXT:    movprfx z1, z0
378; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.d
379; CHECK-NEXT:    not p1.b, p0/z, p1.b
380; CHECK-NEXT:    fcmgt p0.d, p0/z, z0.d, z2.d
381; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
382; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
383; CHECK-NEXT:    mov z0.d, z1.d
384; CHECK-NEXT:    ret
385    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
386    ret <vscale x 2 x i64> %x
387}
388
389define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
390; CHECK-LABEL: test_signed_v4f64_v4i64:
391; CHECK:       // %bb.0:
392; CHECK-NEXT:    ptrue p0.d
393; CHECK-NEXT:    mov x8, #4895412794951729151 // =0x43efffffffffffff
394; CHECK-NEXT:    mov z4.d, x8
395; CHECK-NEXT:    fcmge p1.d, p0/z, z0.d, #0.0
396; CHECK-NEXT:    fcmge p2.d, p0/z, z1.d, #0.0
397; CHECK-NEXT:    movprfx z2, z0
398; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.d
399; CHECK-NEXT:    movprfx z3, z1
400; CHECK-NEXT:    fcvtzu z3.d, p0/m, z1.d
401; CHECK-NEXT:    fcmgt p3.d, p0/z, z0.d, z4.d
402; CHECK-NEXT:    not p1.b, p0/z, p1.b
403; CHECK-NEXT:    not p2.b, p0/z, p2.b
404; CHECK-NEXT:    fcmgt p0.d, p0/z, z1.d, z4.d
405; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
406; CHECK-NEXT:    mov z3.d, p2/m, #0 // =0x0
407; CHECK-NEXT:    mov z2.d, p3/m, #-1 // =0xffffffffffffffff
408; CHECK-NEXT:    mov z3.d, p0/m, #-1 // =0xffffffffffffffff
409; CHECK-NEXT:    mov z0.d, z2.d
410; CHECK-NEXT:    mov z1.d, z3.d
411; CHECK-NEXT:    ret
412    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
413    ret <vscale x 4 x i64> %x
414}
415
416
417; half
418
419declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
420declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
421declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
422declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
423declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
424declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
425declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
426
427define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
428; CHECK-LABEL: test_signed_v2f16_v2i32:
429; CHECK:       // %bb.0:
430; CHECK-NEXT:    ptrue p0.d
431; CHECK-NEXT:    mov w8, #31743 // =0x7bff
432; CHECK-NEXT:    mov z1.h, w8
433; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
434; CHECK-NEXT:    movprfx z2, z0
435; CHECK-NEXT:    fcvtzu z2.d, p0/m, z0.h
436; CHECK-NEXT:    not p1.b, p0/z, p1.b
437; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z1.h
438; CHECK-NEXT:    mov z0.d, #0xffffffff
439; CHECK-NEXT:    mov z2.d, p1/m, #0 // =0x0
440; CHECK-NEXT:    sel z0.d, p0, z0.d, z2.d
441; CHECK-NEXT:    ret
442    %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
443    ret <vscale x 2 x i32> %x
444}
445
446define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
447; CHECK-LABEL: test_signed_v4f16_v4i32:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    ptrue p0.s
450; CHECK-NEXT:    mov w8, #31743 // =0x7bff
451; CHECK-NEXT:    mov z2.h, w8
452; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
453; CHECK-NEXT:    movprfx z1, z0
454; CHECK-NEXT:    fcvtzu z1.s, p0/m, z0.h
455; CHECK-NEXT:    not p1.b, p0/z, p1.b
456; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
457; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
458; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
459; CHECK-NEXT:    mov z0.d, z1.d
460; CHECK-NEXT:    ret
461    %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
462    ret <vscale x 4 x i32> %x
463}
464
465define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
466; CHECK-LABEL: test_signed_v8f16_v8i32:
467; CHECK:       // %bb.0:
468; CHECK-NEXT:    uunpklo z2.s, z0.h
469; CHECK-NEXT:    uunpkhi z3.s, z0.h
470; CHECK-NEXT:    mov w8, #31743 // =0x7bff
471; CHECK-NEXT:    ptrue p0.s
472; CHECK-NEXT:    mov z4.h, w8
473; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, #0.0
474; CHECK-NEXT:    fcmge p2.h, p0/z, z3.h, #0.0
475; CHECK-NEXT:    movprfx z0, z2
476; CHECK-NEXT:    fcvtzu z0.s, p0/m, z2.h
477; CHECK-NEXT:    movprfx z1, z3
478; CHECK-NEXT:    fcvtzu z1.s, p0/m, z3.h
479; CHECK-NEXT:    fcmgt p3.h, p0/z, z2.h, z4.h
480; CHECK-NEXT:    not p1.b, p0/z, p1.b
481; CHECK-NEXT:    not p2.b, p0/z, p2.b
482; CHECK-NEXT:    fcmgt p0.h, p0/z, z3.h, z4.h
483; CHECK-NEXT:    mov z0.s, p1/m, #0 // =0x0
484; CHECK-NEXT:    mov z1.s, p2/m, #0 // =0x0
485; CHECK-NEXT:    mov z0.s, p3/m, #-1 // =0xffffffffffffffff
486; CHECK-NEXT:    mov z1.s, p0/m, #-1 // =0xffffffffffffffff
487; CHECK-NEXT:    ret
488    %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
489    ret <vscale x 8 x i32> %x
490}
491
492define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
493; CHECK-LABEL: test_signed_v4f16_v4i16:
494; CHECK:       // %bb.0:
495; CHECK-NEXT:    ptrue p0.s
496; CHECK-NEXT:    mov w8, #31743 // =0x7bff
497; CHECK-NEXT:    mov z1.h, w8
498; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
499; CHECK-NEXT:    movprfx z2, z0
500; CHECK-NEXT:    fcvtzu z2.s, p0/m, z0.h
501; CHECK-NEXT:    not p1.b, p0/z, p1.b
502; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z1.h
503; CHECK-NEXT:    mov z0.s, #65535 // =0xffff
504; CHECK-NEXT:    mov z2.s, p1/m, #0 // =0x0
505; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
506; CHECK-NEXT:    ret
507    %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
508    ret <vscale x 4 x i16> %x
509}
510
511define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
512; CHECK-LABEL: test_signed_v8f16_v8i16:
513; CHECK:       // %bb.0:
514; CHECK-NEXT:    ptrue p0.h
515; CHECK-NEXT:    mov w8, #31743 // =0x7bff
516; CHECK-NEXT:    mov z2.h, w8
517; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
518; CHECK-NEXT:    movprfx z1, z0
519; CHECK-NEXT:    fcvtzu z1.h, p0/m, z0.h
520; CHECK-NEXT:    not p1.b, p0/z, p1.b
521; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
522; CHECK-NEXT:    mov z1.h, p1/m, #0 // =0x0
523; CHECK-NEXT:    mov z1.h, p0/m, #-1 // =0xffffffffffffffff
524; CHECK-NEXT:    mov z0.d, z1.d
525; CHECK-NEXT:    ret
526    %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
527    ret <vscale x 8 x i16> %x
528}
529
530define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
531; CHECK-LABEL: test_signed_v2f16_v2i64:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    ptrue p0.d
534; CHECK-NEXT:    mov w8, #31743 // =0x7bff
535; CHECK-NEXT:    mov z2.h, w8
536; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, #0.0
537; CHECK-NEXT:    movprfx z1, z0
538; CHECK-NEXT:    fcvtzu z1.d, p0/m, z0.h
539; CHECK-NEXT:    not p1.b, p0/z, p1.b
540; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, z2.h
541; CHECK-NEXT:    mov z1.d, p1/m, #0 // =0x0
542; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
543; CHECK-NEXT:    mov z0.d, z1.d
544; CHECK-NEXT:    ret
545    %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
546    ret <vscale x 2 x i64> %x
547}
548
549define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
550; CHECK-LABEL: test_signed_v4f16_v4i64:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    uunpklo z2.d, z0.s
553; CHECK-NEXT:    uunpkhi z3.d, z0.s
554; CHECK-NEXT:    mov w8, #31743 // =0x7bff
555; CHECK-NEXT:    ptrue p0.d
556; CHECK-NEXT:    mov z4.h, w8
557; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, #0.0
558; CHECK-NEXT:    fcmge p2.h, p0/z, z3.h, #0.0
559; CHECK-NEXT:    movprfx z0, z2
560; CHECK-NEXT:    fcvtzu z0.d, p0/m, z2.h
561; CHECK-NEXT:    movprfx z1, z3
562; CHECK-NEXT:    fcvtzu z1.d, p0/m, z3.h
563; CHECK-NEXT:    fcmgt p3.h, p0/z, z2.h, z4.h
564; CHECK-NEXT:    not p1.b, p0/z, p1.b
565; CHECK-NEXT:    not p2.b, p0/z, p2.b
566; CHECK-NEXT:    fcmgt p0.h, p0/z, z3.h, z4.h
567; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
568; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
569; CHECK-NEXT:    mov z0.d, p3/m, #-1 // =0xffffffffffffffff
570; CHECK-NEXT:    mov z1.d, p0/m, #-1 // =0xffffffffffffffff
571; CHECK-NEXT:    ret
572    %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
573    ret <vscale x 4 x i64> %x
574}
575
576