xref: /llvm-project/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32,RV32I
3; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64,RV64I
4; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32,RV32IZbb
5; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64,RV64IZbb
6
7declare i4 @llvm.ssub.sat.i4(i4, i4)
8declare i8 @llvm.ssub.sat.i8(i8, i8)
9declare i16 @llvm.ssub.sat.i16(i16, i16)
10declare i32 @llvm.ssub.sat.i32(i32, i32)
11declare i64 @llvm.ssub.sat.i64(i64, i64)
12
13define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
14; RV32-LABEL: func32:
15; RV32:       # %bb.0:
16; RV32-NEXT:    mv a3, a0
17; RV32-NEXT:    mul a0, a1, a2
18; RV32-NEXT:    sgtz a1, a0
19; RV32-NEXT:    sub a0, a3, a0
20; RV32-NEXT:    slt a2, a0, a3
21; RV32-NEXT:    beq a1, a2, .LBB0_2
22; RV32-NEXT:  # %bb.1:
23; RV32-NEXT:    srai a0, a0, 31
24; RV32-NEXT:    lui a1, 524288
25; RV32-NEXT:    xor a0, a0, a1
26; RV32-NEXT:  .LBB0_2:
27; RV32-NEXT:    ret
28;
29; RV64I-LABEL: func32:
30; RV64I:       # %bb.0:
31; RV64I-NEXT:    sext.w a0, a0
32; RV64I-NEXT:    mulw a1, a1, a2
33; RV64I-NEXT:    subw a2, a0, a1
34; RV64I-NEXT:    sub a0, a0, a1
35; RV64I-NEXT:    beq a2, a0, .LBB0_2
36; RV64I-NEXT:  # %bb.1:
37; RV64I-NEXT:    sraiw a0, a0, 31
38; RV64I-NEXT:    lui a1, 524288
39; RV64I-NEXT:    xor a0, a0, a1
40; RV64I-NEXT:  .LBB0_2:
41; RV64I-NEXT:    ret
42;
43; RV64IZbb-LABEL: func32:
44; RV64IZbb:       # %bb.0:
45; RV64IZbb-NEXT:    sext.w a0, a0
46; RV64IZbb-NEXT:    mulw a1, a1, a2
47; RV64IZbb-NEXT:    lui a2, 524288
48; RV64IZbb-NEXT:    sub a0, a0, a1
49; RV64IZbb-NEXT:    addiw a1, a2, -1
50; RV64IZbb-NEXT:    min a0, a0, a1
51; RV64IZbb-NEXT:    max a0, a0, a2
52; RV64IZbb-NEXT:    ret
53  %a = mul i32 %y, %z
54  %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %a)
55  ret i32 %tmp
56}
57
58define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
59; RV32-LABEL: func64:
60; RV32:       # %bb.0:
61; RV32-NEXT:    mv a2, a1
62; RV32-NEXT:    sltu a1, a0, a4
63; RV32-NEXT:    sub a3, a2, a5
64; RV32-NEXT:    sub a1, a3, a1
65; RV32-NEXT:    xor a3, a2, a1
66; RV32-NEXT:    xor a2, a2, a5
67; RV32-NEXT:    and a2, a2, a3
68; RV32-NEXT:    bltz a2, .LBB1_2
69; RV32-NEXT:  # %bb.1:
70; RV32-NEXT:    sub a0, a0, a4
71; RV32-NEXT:    ret
72; RV32-NEXT:  .LBB1_2:
73; RV32-NEXT:    srai a0, a1, 31
74; RV32-NEXT:    lui a1, 524288
75; RV32-NEXT:    xor a1, a0, a1
76; RV32-NEXT:    ret
77;
78; RV64-LABEL: func64:
79; RV64:       # %bb.0:
80; RV64-NEXT:    mv a1, a0
81; RV64-NEXT:    sgtz a3, a2
82; RV64-NEXT:    sub a0, a0, a2
83; RV64-NEXT:    slt a1, a0, a1
84; RV64-NEXT:    beq a3, a1, .LBB1_2
85; RV64-NEXT:  # %bb.1:
86; RV64-NEXT:    srai a0, a0, 63
87; RV64-NEXT:    li a1, -1
88; RV64-NEXT:    slli a1, a1, 63
89; RV64-NEXT:    xor a0, a0, a1
90; RV64-NEXT:  .LBB1_2:
91; RV64-NEXT:    ret
92  %a = mul i64 %y, %z
93  %tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z)
94  ret i64 %tmp
95}
96
97define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
98; RV32I-LABEL: func16:
99; RV32I:       # %bb.0:
100; RV32I-NEXT:    slli a0, a0, 16
101; RV32I-NEXT:    mul a1, a1, a2
102; RV32I-NEXT:    lui a2, 8
103; RV32I-NEXT:    srai a0, a0, 16
104; RV32I-NEXT:    slli a1, a1, 16
105; RV32I-NEXT:    srai a1, a1, 16
106; RV32I-NEXT:    sub a0, a0, a1
107; RV32I-NEXT:    addi a1, a2, -1
108; RV32I-NEXT:    bge a0, a1, .LBB2_3
109; RV32I-NEXT:  # %bb.1:
110; RV32I-NEXT:    lui a1, 1048568
111; RV32I-NEXT:    bge a1, a0, .LBB2_4
112; RV32I-NEXT:  .LBB2_2:
113; RV32I-NEXT:    ret
114; RV32I-NEXT:  .LBB2_3:
115; RV32I-NEXT:    mv a0, a1
116; RV32I-NEXT:    lui a1, 1048568
117; RV32I-NEXT:    blt a1, a0, .LBB2_2
118; RV32I-NEXT:  .LBB2_4:
119; RV32I-NEXT:    lui a0, 1048568
120; RV32I-NEXT:    ret
121;
122; RV64I-LABEL: func16:
123; RV64I:       # %bb.0:
124; RV64I-NEXT:    slli a0, a0, 48
125; RV64I-NEXT:    mul a1, a1, a2
126; RV64I-NEXT:    lui a2, 8
127; RV64I-NEXT:    srai a0, a0, 48
128; RV64I-NEXT:    slli a1, a1, 48
129; RV64I-NEXT:    srai a1, a1, 48
130; RV64I-NEXT:    sub a0, a0, a1
131; RV64I-NEXT:    addiw a1, a2, -1
132; RV64I-NEXT:    bge a0, a1, .LBB2_3
133; RV64I-NEXT:  # %bb.1:
134; RV64I-NEXT:    lui a1, 1048568
135; RV64I-NEXT:    bge a1, a0, .LBB2_4
136; RV64I-NEXT:  .LBB2_2:
137; RV64I-NEXT:    ret
138; RV64I-NEXT:  .LBB2_3:
139; RV64I-NEXT:    mv a0, a1
140; RV64I-NEXT:    lui a1, 1048568
141; RV64I-NEXT:    blt a1, a0, .LBB2_2
142; RV64I-NEXT:  .LBB2_4:
143; RV64I-NEXT:    lui a0, 1048568
144; RV64I-NEXT:    ret
145;
146; RV32IZbb-LABEL: func16:
147; RV32IZbb:       # %bb.0:
148; RV32IZbb-NEXT:    sext.h a0, a0
149; RV32IZbb-NEXT:    mul a1, a1, a2
150; RV32IZbb-NEXT:    lui a2, 8
151; RV32IZbb-NEXT:    sext.h a1, a1
152; RV32IZbb-NEXT:    addi a2, a2, -1
153; RV32IZbb-NEXT:    sub a0, a0, a1
154; RV32IZbb-NEXT:    min a0, a0, a2
155; RV32IZbb-NEXT:    lui a1, 1048568
156; RV32IZbb-NEXT:    max a0, a0, a1
157; RV32IZbb-NEXT:    ret
158;
159; RV64IZbb-LABEL: func16:
160; RV64IZbb:       # %bb.0:
161; RV64IZbb-NEXT:    sext.h a0, a0
162; RV64IZbb-NEXT:    mul a1, a1, a2
163; RV64IZbb-NEXT:    lui a2, 8
164; RV64IZbb-NEXT:    sext.h a1, a1
165; RV64IZbb-NEXT:    addiw a2, a2, -1
166; RV64IZbb-NEXT:    sub a0, a0, a1
167; RV64IZbb-NEXT:    min a0, a0, a2
168; RV64IZbb-NEXT:    lui a1, 1048568
169; RV64IZbb-NEXT:    max a0, a0, a1
170; RV64IZbb-NEXT:    ret
171  %a = mul i16 %y, %z
172  %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a)
173  ret i16 %tmp
174}
175
176define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
177; RV32I-LABEL: func8:
178; RV32I:       # %bb.0:
179; RV32I-NEXT:    slli a0, a0, 24
180; RV32I-NEXT:    mul a1, a1, a2
181; RV32I-NEXT:    srai a0, a0, 24
182; RV32I-NEXT:    slli a1, a1, 24
183; RV32I-NEXT:    srai a1, a1, 24
184; RV32I-NEXT:    sub a0, a0, a1
185; RV32I-NEXT:    li a1, 127
186; RV32I-NEXT:    bge a0, a1, .LBB3_3
187; RV32I-NEXT:  # %bb.1:
188; RV32I-NEXT:    li a1, -128
189; RV32I-NEXT:    bge a1, a0, .LBB3_4
190; RV32I-NEXT:  .LBB3_2:
191; RV32I-NEXT:    ret
192; RV32I-NEXT:  .LBB3_3:
193; RV32I-NEXT:    li a0, 127
194; RV32I-NEXT:    li a1, -128
195; RV32I-NEXT:    blt a1, a0, .LBB3_2
196; RV32I-NEXT:  .LBB3_4:
197; RV32I-NEXT:    li a0, -128
198; RV32I-NEXT:    ret
199;
200; RV64I-LABEL: func8:
201; RV64I:       # %bb.0:
202; RV64I-NEXT:    slli a0, a0, 56
203; RV64I-NEXT:    mul a1, a1, a2
204; RV64I-NEXT:    srai a0, a0, 56
205; RV64I-NEXT:    slli a1, a1, 56
206; RV64I-NEXT:    srai a1, a1, 56
207; RV64I-NEXT:    sub a0, a0, a1
208; RV64I-NEXT:    li a1, 127
209; RV64I-NEXT:    bge a0, a1, .LBB3_3
210; RV64I-NEXT:  # %bb.1:
211; RV64I-NEXT:    li a1, -128
212; RV64I-NEXT:    bge a1, a0, .LBB3_4
213; RV64I-NEXT:  .LBB3_2:
214; RV64I-NEXT:    ret
215; RV64I-NEXT:  .LBB3_3:
216; RV64I-NEXT:    li a0, 127
217; RV64I-NEXT:    li a1, -128
218; RV64I-NEXT:    blt a1, a0, .LBB3_2
219; RV64I-NEXT:  .LBB3_4:
220; RV64I-NEXT:    li a0, -128
221; RV64I-NEXT:    ret
222;
223; RV32IZbb-LABEL: func8:
224; RV32IZbb:       # %bb.0:
225; RV32IZbb-NEXT:    sext.b a0, a0
226; RV32IZbb-NEXT:    mul a1, a1, a2
227; RV32IZbb-NEXT:    li a2, 127
228; RV32IZbb-NEXT:    sext.b a1, a1
229; RV32IZbb-NEXT:    sub a0, a0, a1
230; RV32IZbb-NEXT:    min a0, a0, a2
231; RV32IZbb-NEXT:    li a1, -128
232; RV32IZbb-NEXT:    max a0, a0, a1
233; RV32IZbb-NEXT:    ret
234;
235; RV64IZbb-LABEL: func8:
236; RV64IZbb:       # %bb.0:
237; RV64IZbb-NEXT:    sext.b a0, a0
238; RV64IZbb-NEXT:    mul a1, a1, a2
239; RV64IZbb-NEXT:    li a2, 127
240; RV64IZbb-NEXT:    sext.b a1, a1
241; RV64IZbb-NEXT:    sub a0, a0, a1
242; RV64IZbb-NEXT:    min a0, a0, a2
243; RV64IZbb-NEXT:    li a1, -128
244; RV64IZbb-NEXT:    max a0, a0, a1
245; RV64IZbb-NEXT:    ret
246  %a = mul i8 %y, %z
247  %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a)
248  ret i8 %tmp
249}
250
251define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
252; RV32I-LABEL: func4:
253; RV32I:       # %bb.0:
254; RV32I-NEXT:    slli a0, a0, 28
255; RV32I-NEXT:    mul a1, a1, a2
256; RV32I-NEXT:    srai a0, a0, 28
257; RV32I-NEXT:    slli a1, a1, 28
258; RV32I-NEXT:    srai a1, a1, 28
259; RV32I-NEXT:    sub a0, a0, a1
260; RV32I-NEXT:    li a1, 7
261; RV32I-NEXT:    bge a0, a1, .LBB4_3
262; RV32I-NEXT:  # %bb.1:
263; RV32I-NEXT:    li a1, -8
264; RV32I-NEXT:    bge a1, a0, .LBB4_4
265; RV32I-NEXT:  .LBB4_2:
266; RV32I-NEXT:    ret
267; RV32I-NEXT:  .LBB4_3:
268; RV32I-NEXT:    li a0, 7
269; RV32I-NEXT:    li a1, -8
270; RV32I-NEXT:    blt a1, a0, .LBB4_2
271; RV32I-NEXT:  .LBB4_4:
272; RV32I-NEXT:    li a0, -8
273; RV32I-NEXT:    ret
274;
275; RV64I-LABEL: func4:
276; RV64I:       # %bb.0:
277; RV64I-NEXT:    slli a0, a0, 60
278; RV64I-NEXT:    mul a1, a1, a2
279; RV64I-NEXT:    srai a0, a0, 60
280; RV64I-NEXT:    slli a1, a1, 60
281; RV64I-NEXT:    srai a1, a1, 60
282; RV64I-NEXT:    sub a0, a0, a1
283; RV64I-NEXT:    li a1, 7
284; RV64I-NEXT:    bge a0, a1, .LBB4_3
285; RV64I-NEXT:  # %bb.1:
286; RV64I-NEXT:    li a1, -8
287; RV64I-NEXT:    bge a1, a0, .LBB4_4
288; RV64I-NEXT:  .LBB4_2:
289; RV64I-NEXT:    ret
290; RV64I-NEXT:  .LBB4_3:
291; RV64I-NEXT:    li a0, 7
292; RV64I-NEXT:    li a1, -8
293; RV64I-NEXT:    blt a1, a0, .LBB4_2
294; RV64I-NEXT:  .LBB4_4:
295; RV64I-NEXT:    li a0, -8
296; RV64I-NEXT:    ret
297;
298; RV32IZbb-LABEL: func4:
299; RV32IZbb:       # %bb.0:
300; RV32IZbb-NEXT:    slli a0, a0, 28
301; RV32IZbb-NEXT:    mul a1, a1, a2
302; RV32IZbb-NEXT:    li a2, 7
303; RV32IZbb-NEXT:    srai a0, a0, 28
304; RV32IZbb-NEXT:    slli a1, a1, 28
305; RV32IZbb-NEXT:    srai a1, a1, 28
306; RV32IZbb-NEXT:    sub a0, a0, a1
307; RV32IZbb-NEXT:    min a0, a0, a2
308; RV32IZbb-NEXT:    li a1, -8
309; RV32IZbb-NEXT:    max a0, a0, a1
310; RV32IZbb-NEXT:    ret
311;
312; RV64IZbb-LABEL: func4:
313; RV64IZbb:       # %bb.0:
314; RV64IZbb-NEXT:    slli a0, a0, 60
315; RV64IZbb-NEXT:    mul a1, a1, a2
316; RV64IZbb-NEXT:    li a2, 7
317; RV64IZbb-NEXT:    srai a0, a0, 60
318; RV64IZbb-NEXT:    slli a1, a1, 60
319; RV64IZbb-NEXT:    srai a1, a1, 60
320; RV64IZbb-NEXT:    sub a0, a0, a1
321; RV64IZbb-NEXT:    min a0, a0, a2
322; RV64IZbb-NEXT:    li a1, -8
323; RV64IZbb-NEXT:    max a0, a0, a1
324; RV64IZbb-NEXT:    ret
325  %a = mul i4 %y, %z
326  %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a)
327  ret i4 %tmp
328}
329