xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll (revision 4f78e022ee3d437f8aefc4d7adb8abe7bb33b9ac)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
3
4define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
5; CHECK-LABEL: qadds:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    sqadd s0, s0, s1
8; CHECK-NEXT:    fmov w0, s0
9; CHECK-NEXT:    ret
10  %vecext = extractelement <4 x i32> %b, i32 0
11  %vecext1 = extractelement <4 x i32> %c, i32 0
12  %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
13  ret i32 %vqadd.i
14}
15
16define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
17; CHECK-LABEL: qaddd:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    sqadd d0, d0, d1
20; CHECK-NEXT:    fmov x0, d0
21; CHECK-NEXT:    ret
22  %vecext = extractelement <2 x i64> %b, i32 0
23  %vecext1 = extractelement <2 x i64> %c, i32 0
24  %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
25  ret i64 %vqadd.i
26}
27
28define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
29; CHECK-LABEL: uqadds:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    uqadd s0, s0, s1
32; CHECK-NEXT:    fmov w0, s0
33; CHECK-NEXT:    ret
34  %vecext = extractelement <4 x i32> %b, i32 0
35  %vecext1 = extractelement <4 x i32> %c, i32 0
36  %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
37  ret i32 %vqadd.i
38}
39
40define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
41; CHECK-LABEL: uqaddd:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    uqadd d0, d0, d1
44; CHECK-NEXT:    fmov x0, d0
45; CHECK-NEXT:    ret
46  %vecext = extractelement <2 x i64> %b, i32 0
47  %vecext1 = extractelement <2 x i64> %c, i32 0
48  %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
49  ret i64 %vqadd.i
50}
51
52declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone
53declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone
54declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone
55declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone
56
57define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
58; CHECK-LABEL: qsubs:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    sqsub s0, s0, s1
61; CHECK-NEXT:    fmov w0, s0
62; CHECK-NEXT:    ret
63  %vecext = extractelement <4 x i32> %b, i32 0
64  %vecext1 = extractelement <4 x i32> %c, i32 0
65  %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
66  ret i32 %vqsub.i
67}
68
69define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
70; CHECK-LABEL: qsubd:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    sqsub d0, d0, d1
73; CHECK-NEXT:    fmov x0, d0
74; CHECK-NEXT:    ret
75  %vecext = extractelement <2 x i64> %b, i32 0
76  %vecext1 = extractelement <2 x i64> %c, i32 0
77  %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
78  ret i64 %vqsub.i
79}
80
81define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
82; CHECK-LABEL: uqsubs:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    uqsub s0, s0, s1
85; CHECK-NEXT:    fmov w0, s0
86; CHECK-NEXT:    ret
87  %vecext = extractelement <4 x i32> %b, i32 0
88  %vecext1 = extractelement <4 x i32> %c, i32 0
89  %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
90  ret i32 %vqsub.i
91}
92
93define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
94; CHECK-LABEL: uqsubd:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    uqsub d0, d0, d1
97; CHECK-NEXT:    fmov x0, d0
98; CHECK-NEXT:    ret
99  %vecext = extractelement <2 x i64> %b, i32 0
100  %vecext1 = extractelement <2 x i64> %c, i32 0
101  %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
102  ret i64 %vqsub.i
103}
104
105declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone
106declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone
107declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone
108declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone
109
110define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
111; CHECK-LABEL: qabss:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    sqabs s0, s0
114; CHECK-NEXT:    fmov w0, s0
115; CHECK-NEXT:    ret
116  %vecext = extractelement <4 x i32> %b, i32 0
117  %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind
118  ret i32 %vqabs.i
119}
120
121define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
122; CHECK-LABEL: qabsd:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    sqabs d0, d0
125; CHECK-NEXT:    fmov x0, d0
126; CHECK-NEXT:    ret
127  %vecext = extractelement <2 x i64> %b, i32 0
128  %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind
129  ret i64 %vqabs.i
130}
131
132define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
133; CHECK-LABEL: qnegs:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    sqneg s0, s0
136; CHECK-NEXT:    fmov w0, s0
137; CHECK-NEXT:    ret
138  %vecext = extractelement <4 x i32> %b, i32 0
139  %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind
140  ret i32 %vqneg.i
141}
142
143define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
144; CHECK-LABEL: qnegd:
145; CHECK:       // %bb.0:
146; CHECK-NEXT:    sqneg d0, d0
147; CHECK-NEXT:    fmov x0, d0
148; CHECK-NEXT:    ret
149  %vecext = extractelement <2 x i64> %b, i32 0
150  %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind
151  ret i64 %vqneg.i
152}
153
154declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone
155declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone
156declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone
157declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone
158
159
160define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
161; CHECK-LABEL: vqmovund:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    sqxtun s0, d0
164; CHECK-NEXT:    fmov w0, s0
165; CHECK-NEXT:    ret
166  %vecext = extractelement <2 x i64> %b, i32 0
167  %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
168  ret i32 %vqmovun.i
169}
170
171define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
172; CHECK-LABEL: vqmovnd_s:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    sqxtn s0, d0
175; CHECK-NEXT:    fmov w0, s0
176; CHECK-NEXT:    ret
177  %vecext = extractelement <2 x i64> %b, i32 0
178  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
179  ret i32 %vqmovn.i
180}
181
182define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
183; CHECK-LABEL: vqmovnd_u:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    uqxtn s0, d0
186; CHECK-NEXT:    fmov w0, s0
187; CHECK-NEXT:    ret
188  %vecext = extractelement <2 x i64> %b, i32 0
189  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
190  ret i32 %vqmovn.i
191}
192
193define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
194; CHECK-LABEL: uqxtn_ext:
195; CHECK:       // %bb.0: // %entry
196; CHECK-NEXT:    mov v0.d[0], v3.d[1]
197; CHECK-NEXT:    uqxtn s0, d0
198; CHECK-NEXT:    fmov w0, s0
199; CHECK-NEXT:    ret
200entry:
201  %e1 = extractelement <2 x i64> %e, i64 1
202  %r = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %e1)
203  ret i32 %r
204}
205
206define <4 x i32> @sqxtn_ins(<4 x i32> noundef %a, i64 %c) {
207; CHECK-LABEL: sqxtn_ins:
208; CHECK:       // %bb.0: // %entry
209; CHECK-NEXT:    fmov d1, x0
210; CHECK-NEXT:    sqxtn s1, d1
211; CHECK-NEXT:    mov v0.s[3], v1.s[0]
212; CHECK-NEXT:    ret
213entry:
214  %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c)
215  %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3
216  ret <4 x i32> %vecins
217}
218
219define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) {
220; CHECK-LABEL: sqxtun_insext:
221; CHECK:       // %bb.0: // %entry
222; CHECK-NEXT:    mov v1.d[0], v1.d[1]
223; CHECK-NEXT:    sqxtun s1, d1
224; CHECK-NEXT:    mov v0.s[3], v1.s[0]
225; CHECK-NEXT:    ret
226entry:
227  %c = extractelement <2 x i64> %e, i64 1
228  %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %c)
229  %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3
230  ret <4 x i32> %vecins
231}
232
233define <4 x i32> @saddluse(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
234; CHECK-LABEL: saddluse:
235; CHECK:       // %bb.0: // %entry
236; CHECK-NEXT:    saddlv d1, v1.4s
237; CHECK-NEXT:    sqxtn s1, d1
238; CHECK-NEXT:    mov v0.s[1], v1.s[0]
239; CHECK-NEXT:    ret
240entry:
241  %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b)
242  %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vaddlvq_s32.i)
243  %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 1
244  ret <4 x i32> %vecins
245}
246
247declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
248declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
249declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
250declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
251