xref: /llvm-project/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32,RV32I
3; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64,RV64I
4; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32,RV32IZbb
5; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64,RV64IZbb
6
7declare i4 @llvm.sadd.sat.i4(i4, i4)
8declare i8 @llvm.sadd.sat.i8(i8, i8)
9declare i16 @llvm.sadd.sat.i16(i16, i16)
10declare i32 @llvm.sadd.sat.i32(i32, i32)
11declare i64 @llvm.sadd.sat.i64(i64, i64)
12
13define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
14; RV32-LABEL: func32:
15; RV32:       # %bb.0:
16; RV32-NEXT:    mv a3, a0
17; RV32-NEXT:    mul a1, a1, a2
18; RV32-NEXT:    add a0, a0, a1
19; RV32-NEXT:    slt a2, a0, a3
20; RV32-NEXT:    slti a1, a1, 0
21; RV32-NEXT:    beq a1, a2, .LBB0_2
22; RV32-NEXT:  # %bb.1:
23; RV32-NEXT:    srai a0, a0, 31
24; RV32-NEXT:    lui a1, 524288
25; RV32-NEXT:    xor a0, a0, a1
26; RV32-NEXT:  .LBB0_2:
27; RV32-NEXT:    ret
28;
29; RV64I-LABEL: func32:
30; RV64I:       # %bb.0:
31; RV64I-NEXT:    sext.w a0, a0
32; RV64I-NEXT:    mulw a1, a1, a2
33; RV64I-NEXT:    addw a2, a0, a1
34; RV64I-NEXT:    add a0, a0, a1
35; RV64I-NEXT:    beq a2, a0, .LBB0_2
36; RV64I-NEXT:  # %bb.1:
37; RV64I-NEXT:    sraiw a0, a0, 31
38; RV64I-NEXT:    lui a1, 524288
39; RV64I-NEXT:    xor a0, a0, a1
40; RV64I-NEXT:  .LBB0_2:
41; RV64I-NEXT:    ret
42;
43; RV64IZbb-LABEL: func32:
44; RV64IZbb:       # %bb.0:
45; RV64IZbb-NEXT:    sext.w a0, a0
46; RV64IZbb-NEXT:    mulw a1, a1, a2
47; RV64IZbb-NEXT:    lui a2, 524288
48; RV64IZbb-NEXT:    add a0, a0, a1
49; RV64IZbb-NEXT:    addiw a1, a2, -1
50; RV64IZbb-NEXT:    min a0, a0, a1
51; RV64IZbb-NEXT:    max a0, a0, a2
52; RV64IZbb-NEXT:    ret
53  %a = mul i32 %y, %z
54  %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %a)
55  ret i32 %tmp
56}
57
58define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
59; RV32I-LABEL: func64:
60; RV32I:       # %bb.0:
61; RV32I-NEXT:    mv a2, a1
62; RV32I-NEXT:    mv a1, a0
63; RV32I-NEXT:    add a3, a2, a5
64; RV32I-NEXT:    add a0, a0, a4
65; RV32I-NEXT:    xor a5, a2, a5
66; RV32I-NEXT:    sltu a1, a0, a1
67; RV32I-NEXT:    add a1, a3, a1
68; RV32I-NEXT:    xor a2, a2, a1
69; RV32I-NEXT:    not a3, a5
70; RV32I-NEXT:    and a2, a3, a2
71; RV32I-NEXT:    bgez a2, .LBB1_2
72; RV32I-NEXT:  # %bb.1:
73; RV32I-NEXT:    srai a0, a1, 31
74; RV32I-NEXT:    lui a1, 524288
75; RV32I-NEXT:    xor a1, a0, a1
76; RV32I-NEXT:  .LBB1_2:
77; RV32I-NEXT:    ret
78;
79; RV64-LABEL: func64:
80; RV64:       # %bb.0:
81; RV64-NEXT:    mv a1, a0
82; RV64-NEXT:    add a0, a0, a2
83; RV64-NEXT:    slt a1, a0, a1
84; RV64-NEXT:    slti a2, a2, 0
85; RV64-NEXT:    beq a2, a1, .LBB1_2
86; RV64-NEXT:  # %bb.1:
87; RV64-NEXT:    srai a0, a0, 63
88; RV64-NEXT:    li a1, -1
89; RV64-NEXT:    slli a1, a1, 63
90; RV64-NEXT:    xor a0, a0, a1
91; RV64-NEXT:  .LBB1_2:
92; RV64-NEXT:    ret
93;
94; RV32IZbb-LABEL: func64:
95; RV32IZbb:       # %bb.0:
96; RV32IZbb-NEXT:    mv a2, a1
97; RV32IZbb-NEXT:    mv a1, a0
98; RV32IZbb-NEXT:    add a3, a2, a5
99; RV32IZbb-NEXT:    add a0, a0, a4
100; RV32IZbb-NEXT:    sltu a1, a0, a1
101; RV32IZbb-NEXT:    add a1, a3, a1
102; RV32IZbb-NEXT:    xor a3, a2, a1
103; RV32IZbb-NEXT:    xor a2, a2, a5
104; RV32IZbb-NEXT:    andn a2, a3, a2
105; RV32IZbb-NEXT:    bgez a2, .LBB1_2
106; RV32IZbb-NEXT:  # %bb.1:
107; RV32IZbb-NEXT:    srai a0, a1, 31
108; RV32IZbb-NEXT:    lui a1, 524288
109; RV32IZbb-NEXT:    xor a1, a0, a1
110; RV32IZbb-NEXT:  .LBB1_2:
111; RV32IZbb-NEXT:    ret
112  %a = mul i64 %y, %z
113  %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
114  ret i64 %tmp
115}
116
117define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
118; RV32I-LABEL: func16:
119; RV32I:       # %bb.0:
120; RV32I-NEXT:    slli a0, a0, 16
121; RV32I-NEXT:    mul a1, a1, a2
122; RV32I-NEXT:    lui a2, 8
123; RV32I-NEXT:    srai a0, a0, 16
124; RV32I-NEXT:    slli a1, a1, 16
125; RV32I-NEXT:    srai a1, a1, 16
126; RV32I-NEXT:    add a0, a0, a1
127; RV32I-NEXT:    addi a1, a2, -1
128; RV32I-NEXT:    bge a0, a1, .LBB2_3
129; RV32I-NEXT:  # %bb.1:
130; RV32I-NEXT:    lui a1, 1048568
131; RV32I-NEXT:    bge a1, a0, .LBB2_4
132; RV32I-NEXT:  .LBB2_2:
133; RV32I-NEXT:    ret
134; RV32I-NEXT:  .LBB2_3:
135; RV32I-NEXT:    mv a0, a1
136; RV32I-NEXT:    lui a1, 1048568
137; RV32I-NEXT:    blt a1, a0, .LBB2_2
138; RV32I-NEXT:  .LBB2_4:
139; RV32I-NEXT:    lui a0, 1048568
140; RV32I-NEXT:    ret
141;
142; RV64I-LABEL: func16:
143; RV64I:       # %bb.0:
144; RV64I-NEXT:    slli a0, a0, 48
145; RV64I-NEXT:    mul a1, a1, a2
146; RV64I-NEXT:    lui a2, 8
147; RV64I-NEXT:    srai a0, a0, 48
148; RV64I-NEXT:    slli a1, a1, 48
149; RV64I-NEXT:    srai a1, a1, 48
150; RV64I-NEXT:    add a0, a0, a1
151; RV64I-NEXT:    addiw a1, a2, -1
152; RV64I-NEXT:    bge a0, a1, .LBB2_3
153; RV64I-NEXT:  # %bb.1:
154; RV64I-NEXT:    lui a1, 1048568
155; RV64I-NEXT:    bge a1, a0, .LBB2_4
156; RV64I-NEXT:  .LBB2_2:
157; RV64I-NEXT:    ret
158; RV64I-NEXT:  .LBB2_3:
159; RV64I-NEXT:    mv a0, a1
160; RV64I-NEXT:    lui a1, 1048568
161; RV64I-NEXT:    blt a1, a0, .LBB2_2
162; RV64I-NEXT:  .LBB2_4:
163; RV64I-NEXT:    lui a0, 1048568
164; RV64I-NEXT:    ret
165;
166; RV32IZbb-LABEL: func16:
167; RV32IZbb:       # %bb.0:
168; RV32IZbb-NEXT:    sext.h a0, a0
169; RV32IZbb-NEXT:    mul a1, a1, a2
170; RV32IZbb-NEXT:    lui a2, 8
171; RV32IZbb-NEXT:    sext.h a1, a1
172; RV32IZbb-NEXT:    addi a2, a2, -1
173; RV32IZbb-NEXT:    add a0, a0, a1
174; RV32IZbb-NEXT:    min a0, a0, a2
175; RV32IZbb-NEXT:    lui a1, 1048568
176; RV32IZbb-NEXT:    max a0, a0, a1
177; RV32IZbb-NEXT:    ret
178;
179; RV64IZbb-LABEL: func16:
180; RV64IZbb:       # %bb.0:
181; RV64IZbb-NEXT:    sext.h a0, a0
182; RV64IZbb-NEXT:    mul a1, a1, a2
183; RV64IZbb-NEXT:    lui a2, 8
184; RV64IZbb-NEXT:    sext.h a1, a1
185; RV64IZbb-NEXT:    addiw a2, a2, -1
186; RV64IZbb-NEXT:    add a0, a0, a1
187; RV64IZbb-NEXT:    min a0, a0, a2
188; RV64IZbb-NEXT:    lui a1, 1048568
189; RV64IZbb-NEXT:    max a0, a0, a1
190; RV64IZbb-NEXT:    ret
191  %a = mul i16 %y, %z
192  %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
193  ret i16 %tmp
194}
195
196define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
197; RV32I-LABEL: func8:
198; RV32I:       # %bb.0:
199; RV32I-NEXT:    slli a0, a0, 24
200; RV32I-NEXT:    mul a1, a1, a2
201; RV32I-NEXT:    srai a0, a0, 24
202; RV32I-NEXT:    slli a1, a1, 24
203; RV32I-NEXT:    srai a1, a1, 24
204; RV32I-NEXT:    add a0, a0, a1
205; RV32I-NEXT:    li a1, 127
206; RV32I-NEXT:    bge a0, a1, .LBB3_3
207; RV32I-NEXT:  # %bb.1:
208; RV32I-NEXT:    li a1, -128
209; RV32I-NEXT:    bge a1, a0, .LBB3_4
210; RV32I-NEXT:  .LBB3_2:
211; RV32I-NEXT:    ret
212; RV32I-NEXT:  .LBB3_3:
213; RV32I-NEXT:    li a0, 127
214; RV32I-NEXT:    li a1, -128
215; RV32I-NEXT:    blt a1, a0, .LBB3_2
216; RV32I-NEXT:  .LBB3_4:
217; RV32I-NEXT:    li a0, -128
218; RV32I-NEXT:    ret
219;
220; RV64I-LABEL: func8:
221; RV64I:       # %bb.0:
222; RV64I-NEXT:    slli a0, a0, 56
223; RV64I-NEXT:    mul a1, a1, a2
224; RV64I-NEXT:    srai a0, a0, 56
225; RV64I-NEXT:    slli a1, a1, 56
226; RV64I-NEXT:    srai a1, a1, 56
227; RV64I-NEXT:    add a0, a0, a1
228; RV64I-NEXT:    li a1, 127
229; RV64I-NEXT:    bge a0, a1, .LBB3_3
230; RV64I-NEXT:  # %bb.1:
231; RV64I-NEXT:    li a1, -128
232; RV64I-NEXT:    bge a1, a0, .LBB3_4
233; RV64I-NEXT:  .LBB3_2:
234; RV64I-NEXT:    ret
235; RV64I-NEXT:  .LBB3_3:
236; RV64I-NEXT:    li a0, 127
237; RV64I-NEXT:    li a1, -128
238; RV64I-NEXT:    blt a1, a0, .LBB3_2
239; RV64I-NEXT:  .LBB3_4:
240; RV64I-NEXT:    li a0, -128
241; RV64I-NEXT:    ret
242;
243; RV32IZbb-LABEL: func8:
244; RV32IZbb:       # %bb.0:
245; RV32IZbb-NEXT:    sext.b a0, a0
246; RV32IZbb-NEXT:    mul a1, a1, a2
247; RV32IZbb-NEXT:    li a2, 127
248; RV32IZbb-NEXT:    sext.b a1, a1
249; RV32IZbb-NEXT:    add a0, a0, a1
250; RV32IZbb-NEXT:    min a0, a0, a2
251; RV32IZbb-NEXT:    li a1, -128
252; RV32IZbb-NEXT:    max a0, a0, a1
253; RV32IZbb-NEXT:    ret
254;
255; RV64IZbb-LABEL: func8:
256; RV64IZbb:       # %bb.0:
257; RV64IZbb-NEXT:    sext.b a0, a0
258; RV64IZbb-NEXT:    mul a1, a1, a2
259; RV64IZbb-NEXT:    li a2, 127
260; RV64IZbb-NEXT:    sext.b a1, a1
261; RV64IZbb-NEXT:    add a0, a0, a1
262; RV64IZbb-NEXT:    min a0, a0, a2
263; RV64IZbb-NEXT:    li a1, -128
264; RV64IZbb-NEXT:    max a0, a0, a1
265; RV64IZbb-NEXT:    ret
266  %a = mul i8 %y, %z
267  %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a)
268  ret i8 %tmp
269}
270
271define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
272; RV32I-LABEL: func4:
273; RV32I:       # %bb.0:
274; RV32I-NEXT:    slli a0, a0, 28
275; RV32I-NEXT:    mul a1, a1, a2
276; RV32I-NEXT:    srai a0, a0, 28
277; RV32I-NEXT:    slli a1, a1, 28
278; RV32I-NEXT:    srai a1, a1, 28
279; RV32I-NEXT:    add a0, a0, a1
280; RV32I-NEXT:    li a1, 7
281; RV32I-NEXT:    bge a0, a1, .LBB4_3
282; RV32I-NEXT:  # %bb.1:
283; RV32I-NEXT:    li a1, -8
284; RV32I-NEXT:    bge a1, a0, .LBB4_4
285; RV32I-NEXT:  .LBB4_2:
286; RV32I-NEXT:    ret
287; RV32I-NEXT:  .LBB4_3:
288; RV32I-NEXT:    li a0, 7
289; RV32I-NEXT:    li a1, -8
290; RV32I-NEXT:    blt a1, a0, .LBB4_2
291; RV32I-NEXT:  .LBB4_4:
292; RV32I-NEXT:    li a0, -8
293; RV32I-NEXT:    ret
294;
295; RV64I-LABEL: func4:
296; RV64I:       # %bb.0:
297; RV64I-NEXT:    slli a0, a0, 60
298; RV64I-NEXT:    mul a1, a1, a2
299; RV64I-NEXT:    srai a0, a0, 60
300; RV64I-NEXT:    slli a1, a1, 60
301; RV64I-NEXT:    srai a1, a1, 60
302; RV64I-NEXT:    add a0, a0, a1
303; RV64I-NEXT:    li a1, 7
304; RV64I-NEXT:    bge a0, a1, .LBB4_3
305; RV64I-NEXT:  # %bb.1:
306; RV64I-NEXT:    li a1, -8
307; RV64I-NEXT:    bge a1, a0, .LBB4_4
308; RV64I-NEXT:  .LBB4_2:
309; RV64I-NEXT:    ret
310; RV64I-NEXT:  .LBB4_3:
311; RV64I-NEXT:    li a0, 7
312; RV64I-NEXT:    li a1, -8
313; RV64I-NEXT:    blt a1, a0, .LBB4_2
314; RV64I-NEXT:  .LBB4_4:
315; RV64I-NEXT:    li a0, -8
316; RV64I-NEXT:    ret
317;
318; RV32IZbb-LABEL: func4:
319; RV32IZbb:       # %bb.0:
320; RV32IZbb-NEXT:    slli a0, a0, 28
321; RV32IZbb-NEXT:    mul a1, a1, a2
322; RV32IZbb-NEXT:    li a2, 7
323; RV32IZbb-NEXT:    srai a0, a0, 28
324; RV32IZbb-NEXT:    slli a1, a1, 28
325; RV32IZbb-NEXT:    srai a1, a1, 28
326; RV32IZbb-NEXT:    add a0, a0, a1
327; RV32IZbb-NEXT:    min a0, a0, a2
328; RV32IZbb-NEXT:    li a1, -8
329; RV32IZbb-NEXT:    max a0, a0, a1
330; RV32IZbb-NEXT:    ret
331;
332; RV64IZbb-LABEL: func4:
333; RV64IZbb:       # %bb.0:
334; RV64IZbb-NEXT:    slli a0, a0, 60
335; RV64IZbb-NEXT:    mul a1, a1, a2
336; RV64IZbb-NEXT:    li a2, 7
337; RV64IZbb-NEXT:    srai a0, a0, 60
338; RV64IZbb-NEXT:    slli a1, a1, 60
339; RV64IZbb-NEXT:    srai a1, a1, 60
340; RV64IZbb-NEXT:    add a0, a0, a1
341; RV64IZbb-NEXT:    min a0, a0, a2
342; RV64IZbb-NEXT:    li a1, -8
343; RV64IZbb-NEXT:    max a0, a0, a1
344; RV64IZbb-NEXT:    ret
345  %a = mul i4 %y, %z
346  %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a)
347  ret i4 %tmp
348}
349