xref: /llvm-project/llvm/test/CodeGen/AArch64/smul_fix_sat.ll (revision 19008d32182ebbe421aaa222ee8af5c3e134e550)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
3
4define i32 @func(i32 %x, i32 %y) nounwind {
5; CHECK-LABEL: func:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    smull x9, w0, w1
8; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
9; CHECK-NEXT:    lsr x10, x9, #32
10; CHECK-NEXT:    extr w9, w10, w9, #2
11; CHECK-NEXT:    cmp w10, #1
12; CHECK-NEXT:    csel w8, w8, w9, gt
13; CHECK-NEXT:    cmn w10, #2
14; CHECK-NEXT:    mov w9, #-2147483648 // =0x80000000
15; CHECK-NEXT:    csel w0, w9, w8, lt
16; CHECK-NEXT:    ret
17  %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 2)
18  ret i32 %tmp
19}
20
21define i64 @func2(i64 %x, i64 %y) nounwind {
22; CHECK-LABEL: func2:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    mul x9, x0, x1
25; CHECK-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
26; CHECK-NEXT:    smulh x10, x0, x1
27; CHECK-NEXT:    extr x9, x10, x9, #2
28; CHECK-NEXT:    cmp x10, #1
29; CHECK-NEXT:    csel x8, x8, x9, gt
30; CHECK-NEXT:    cmn x10, #2
31; CHECK-NEXT:    mov x9, #-9223372036854775808 // =0x8000000000000000
32; CHECK-NEXT:    csel x0, x9, x8, lt
33; CHECK-NEXT:    ret
34  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 2)
35  ret i64 %tmp
36}
37
38define i4 @func3(i4 %x, i4 %y) nounwind {
39; CHECK-LABEL: func3:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    sbfx w9, w1, #0, #4
42; CHECK-NEXT:    lsl w10, w0, #28
43; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
44; CHECK-NEXT:    smull x9, w10, w9
45; CHECK-NEXT:    lsr x10, x9, #32
46; CHECK-NEXT:    extr w9, w10, w9, #2
47; CHECK-NEXT:    cmp w10, #1
48; CHECK-NEXT:    csel w8, w8, w9, gt
49; CHECK-NEXT:    cmn w10, #2
50; CHECK-NEXT:    mov w9, #-2147483648 // =0x80000000
51; CHECK-NEXT:    csel w8, w9, w8, lt
52; CHECK-NEXT:    asr w0, w8, #28
53; CHECK-NEXT:    ret
54  %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 2)
55  ret i4 %tmp
56}
57
58;; These result in regular integer multiplication with a saturation check.
59define i32 @func4(i32 %x, i32 %y) nounwind {
60; CHECK-LABEL: func4:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    smull x9, w0, w1
63; CHECK-NEXT:    eor w10, w0, w1
64; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
65; CHECK-NEXT:    cmp w10, #0
66; CHECK-NEXT:    cinv w8, w8, ge
67; CHECK-NEXT:    cmp x9, w9, sxtw
68; CHECK-NEXT:    csel w0, w8, w9, ne
69; CHECK-NEXT:    ret
70  %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 0)
71  ret i32 %tmp
72}
73
74define i64 @func5(i64 %x, i64 %y) {
75; CHECK-LABEL: func5:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    mul x9, x0, x1
78; CHECK-NEXT:    eor x11, x0, x1
79; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
80; CHECK-NEXT:    cmp x11, #0
81; CHECK-NEXT:    smulh x10, x0, x1
82; CHECK-NEXT:    cinv x8, x8, ge
83; CHECK-NEXT:    cmp x10, x9, asr #63
84; CHECK-NEXT:    csel x0, x8, x9, ne
85; CHECK-NEXT:    ret
86  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0)
87  ret i64 %tmp
88}
89
90define i4 @func6(i4 %x, i4 %y) nounwind {
91; CHECK-LABEL: func6:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    sbfx w9, w1, #0, #4
94; CHECK-NEXT:    lsl w10, w0, #28
95; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
96; CHECK-NEXT:    smull x11, w10, w9
97; CHECK-NEXT:    eor w9, w10, w9
98; CHECK-NEXT:    cmp w9, #0
99; CHECK-NEXT:    cinv w8, w8, ge
100; CHECK-NEXT:    cmp x11, w11, sxtw
101; CHECK-NEXT:    csel w8, w8, w11, ne
102; CHECK-NEXT:    asr w0, w8, #28
103; CHECK-NEXT:    ret
104  %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 0)
105  ret i4 %tmp
106}
107
108define i64 @func7(i64 %x, i64 %y) nounwind {
109; CHECK-LABEL: func7:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    mul x9, x0, x1
112; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
113; CHECK-NEXT:    mov x11, #-2147483648 // =0xffffffff80000000
114; CHECK-NEXT:    smulh x10, x0, x1
115; CHECK-NEXT:    extr x9, x10, x9, #32
116; CHECK-NEXT:    cmp x10, x8
117; CHECK-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
118; CHECK-NEXT:    csel x8, x8, x9, gt
119; CHECK-NEXT:    cmp x10, x11
120; CHECK-NEXT:    mov x9, #-9223372036854775808 // =0x8000000000000000
121; CHECK-NEXT:    csel x0, x9, x8, lt
122; CHECK-NEXT:    ret
123  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 32)
124  ret i64 %tmp
125}
126
127define i64 @func8(i64 %x, i64 %y) nounwind {
128; CHECK-LABEL: func8:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    mul x9, x0, x1
131; CHECK-NEXT:    mov x8, #4611686018427387903 // =0x3fffffffffffffff
132; CHECK-NEXT:    mov x11, #-4611686018427387904 // =0xc000000000000000
133; CHECK-NEXT:    smulh x10, x0, x1
134; CHECK-NEXT:    extr x9, x10, x9, #63
135; CHECK-NEXT:    cmp x10, x8
136; CHECK-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
137; CHECK-NEXT:    csel x8, x8, x9, gt
138; CHECK-NEXT:    cmp x10, x11
139; CHECK-NEXT:    mov x9, #-9223372036854775808 // =0x8000000000000000
140; CHECK-NEXT:    csel x0, x9, x8, lt
141; CHECK-NEXT:    ret
142  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63)
143  ret i64 %tmp
144}
145
146define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
147; CHECK-LABEL: vec:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
150; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
151; CHECK-NEXT:    mov w9, v1.s[1]
152; CHECK-NEXT:    mov w10, v0.s[1]
153; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
154; CHECK-NEXT:    fmov w12, s0
155; CHECK-NEXT:    smull x11, w10, w9
156; CHECK-NEXT:    eor w9, w10, w9
157; CHECK-NEXT:    fmov w10, s1
158; CHECK-NEXT:    cmp w9, #0
159; CHECK-NEXT:    smull x9, w12, w10
160; CHECK-NEXT:    eor w10, w12, w10
161; CHECK-NEXT:    cinv w12, w8, ge
162; CHECK-NEXT:    cmp x11, w11, sxtw
163; CHECK-NEXT:    csel w11, w12, w11, ne
164; CHECK-NEXT:    cmp w10, #0
165; CHECK-NEXT:    cinv w8, w8, ge
166; CHECK-NEXT:    cmp x9, w9, sxtw
167; CHECK-NEXT:    csel w8, w8, w9, ne
168; CHECK-NEXT:    fmov s0, w8
169; CHECK-NEXT:    mov v0.s[1], w11
170; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
171; CHECK-NEXT:    ret
172  %tmp = call <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
173  ret <2 x i32> %tmp
174}
175
176define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
177; CHECK-LABEL: vec2:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    mov w9, v1.s[1]
180; CHECK-NEXT:    mov w10, v0.s[1]
181; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
182; CHECK-NEXT:    fmov w12, s1
183; CHECK-NEXT:    fmov w13, s0
184; CHECK-NEXT:    mov w14, v0.s[2]
185; CHECK-NEXT:    eor w11, w10, w9
186; CHECK-NEXT:    smull x9, w10, w9
187; CHECK-NEXT:    mov w10, v1.s[2]
188; CHECK-NEXT:    cmp w11, #0
189; CHECK-NEXT:    smull x11, w13, w12
190; CHECK-NEXT:    eor w12, w13, w12
191; CHECK-NEXT:    cinv w13, w8, ge
192; CHECK-NEXT:    cmp x9, w9, sxtw
193; CHECK-NEXT:    csel w9, w13, w9, ne
194; CHECK-NEXT:    cmp w12, #0
195; CHECK-NEXT:    mov w13, v1.s[3]
196; CHECK-NEXT:    cinv w12, w8, ge
197; CHECK-NEXT:    cmp x11, w11, sxtw
198; CHECK-NEXT:    csel w11, w12, w11, ne
199; CHECK-NEXT:    mov w12, v0.s[3]
200; CHECK-NEXT:    fmov s0, w11
201; CHECK-NEXT:    smull x11, w14, w10
202; CHECK-NEXT:    mov v0.s[1], w9
203; CHECK-NEXT:    eor w9, w14, w10
204; CHECK-NEXT:    smull x10, w12, w13
205; CHECK-NEXT:    cmp w9, #0
206; CHECK-NEXT:    cinv w9, w8, ge
207; CHECK-NEXT:    cmp x11, w11, sxtw
208; CHECK-NEXT:    csel w9, w9, w11, ne
209; CHECK-NEXT:    mov v0.s[2], w9
210; CHECK-NEXT:    eor w9, w12, w13
211; CHECK-NEXT:    cmp w9, #0
212; CHECK-NEXT:    cinv w8, w8, ge
213; CHECK-NEXT:    cmp x10, w10, sxtw
214; CHECK-NEXT:    csel w8, w8, w10, ne
215; CHECK-NEXT:    mov v0.s[3], w8
216; CHECK-NEXT:    ret
217  %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
218  ret <4 x i32> %tmp
219}
220
221define <4 x i64> @vec3(<4 x i64> %x, <4 x i64> %y) nounwind {
222; CHECK-LABEL: vec3:
223; CHECK:       // %bb.0:
224; CHECK-NEXT:    mov x8, v2.d[1]
225; CHECK-NEXT:    mov x9, v0.d[1]
226; CHECK-NEXT:    mov w16, #2147483647 // =0x7fffffff
227; CHECK-NEXT:    fmov x10, d2
228; CHECK-NEXT:    fmov x11, d0
229; CHECK-NEXT:    mov x18, #9223372036854775807 // =0x7fffffffffffffff
230; CHECK-NEXT:    mov x14, v3.d[1]
231; CHECK-NEXT:    mov x15, v1.d[1]
232; CHECK-NEXT:    mul x13, x9, x8
233; CHECK-NEXT:    smulh x8, x9, x8
234; CHECK-NEXT:    mul x12, x11, x10
235; CHECK-NEXT:    smulh x9, x11, x10
236; CHECK-NEXT:    extr x13, x8, x13, #32
237; CHECK-NEXT:    cmp x8, x16
238; CHECK-NEXT:    mul x10, x15, x14
239; CHECK-NEXT:    csel x13, x18, x13, gt
240; CHECK-NEXT:    smulh x11, x15, x14
241; CHECK-NEXT:    fmov x14, d3
242; CHECK-NEXT:    fmov x15, d1
243; CHECK-NEXT:    extr x12, x9, x12, #32
244; CHECK-NEXT:    mul x17, x15, x14
245; CHECK-NEXT:    smulh x14, x15, x14
246; CHECK-NEXT:    mov x15, #-2147483648 // =0xffffffff80000000
247; CHECK-NEXT:    cmp x8, x15
248; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
249; CHECK-NEXT:    csel x13, x8, x13, lt
250; CHECK-NEXT:    cmp x9, x16
251; CHECK-NEXT:    csel x12, x18, x12, gt
252; CHECK-NEXT:    cmp x9, x15
253; CHECK-NEXT:    extr x9, x11, x10, #32
254; CHECK-NEXT:    csel x10, x8, x12, lt
255; CHECK-NEXT:    cmp x11, x16
256; CHECK-NEXT:    csel x9, x18, x9, gt
257; CHECK-NEXT:    cmp x11, x15
258; CHECK-NEXT:    extr x11, x14, x17, #32
259; CHECK-NEXT:    csel x9, x8, x9, lt
260; CHECK-NEXT:    cmp x14, x16
261; CHECK-NEXT:    fmov d0, x10
262; CHECK-NEXT:    csel x11, x18, x11, gt
263; CHECK-NEXT:    cmp x14, x15
264; CHECK-NEXT:    csel x8, x8, x11, lt
265; CHECK-NEXT:    fmov d1, x8
266; CHECK-NEXT:    mov v0.d[1], x13
267; CHECK-NEXT:    mov v1.d[1], x9
268; CHECK-NEXT:    ret
269  %tmp = call <4 x i64> @llvm.smul.fix.sat.v4i64(<4 x i64> %x, <4 x i64> %y, i32 32)
270  ret <4 x i64> %tmp
271}
272