xref: /llvm-project/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll (revision e30a4fc3e20bf5d9cc2f5bfcb61b4eb0e686a193)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple=loongarch32 -mattr=+d < %s | FileCheck %s --check-prefix=LA32
3; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s --check-prefix=LA64
4
5define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
6; LA32-LABEL: smuloi64:
7; LA32:       # %bb.0:
8; LA32-NEXT:    mulh.wu $a5, $a0, $a2
9; LA32-NEXT:    mul.w $a6, $a1, $a2
10; LA32-NEXT:    add.w $a5, $a6, $a5
11; LA32-NEXT:    sltu $a6, $a5, $a6
12; LA32-NEXT:    mulh.wu $a7, $a1, $a2
13; LA32-NEXT:    srai.w $t0, $a1, 31
14; LA32-NEXT:    mul.w $t0, $t0, $a2
15; LA32-NEXT:    add.w $a7, $a7, $t0
16; LA32-NEXT:    add.w $a6, $a7, $a6
17; LA32-NEXT:    mulh.wu $a7, $a0, $a3
18; LA32-NEXT:    srai.w $t0, $a3, 31
19; LA32-NEXT:    mul.w $t0, $a0, $t0
20; LA32-NEXT:    add.w $a7, $a7, $t0
21; LA32-NEXT:    mul.w $t0, $a0, $a3
22; LA32-NEXT:    add.w $a5, $t0, $a5
23; LA32-NEXT:    sltu $t0, $a5, $t0
24; LA32-NEXT:    add.w $a7, $a7, $t0
25; LA32-NEXT:    add.w $t0, $a6, $a7
26; LA32-NEXT:    sltu $t1, $t0, $a6
27; LA32-NEXT:    srai.w $a6, $a6, 31
28; LA32-NEXT:    srai.w $a7, $a7, 31
29; LA32-NEXT:    add.w $a6, $a6, $a7
30; LA32-NEXT:    add.w $a6, $a6, $t1
31; LA32-NEXT:    mulh.w $a7, $a1, $a3
32; LA32-NEXT:    add.w $a6, $a7, $a6
33; LA32-NEXT:    mul.w $a1, $a1, $a3
34; LA32-NEXT:    add.w $a3, $a1, $t0
35; LA32-NEXT:    sltu $a1, $a3, $a1
36; LA32-NEXT:    add.w $a1, $a6, $a1
37; LA32-NEXT:    srai.w $a6, $a5, 31
38; LA32-NEXT:    xor $a1, $a1, $a6
39; LA32-NEXT:    xor $a3, $a3, $a6
40; LA32-NEXT:    or $a1, $a3, $a1
41; LA32-NEXT:    sltu $a1, $zero, $a1
42; LA32-NEXT:    mul.w $a0, $a0, $a2
43; LA32-NEXT:    st.w $a0, $a4, 0
44; LA32-NEXT:    st.w $a5, $a4, 4
45; LA32-NEXT:    move $a0, $a1
46; LA32-NEXT:    ret
47;
48; LA64-LABEL: smuloi64:
49; LA64:       # %bb.0:
50; LA64-NEXT:    mulh.d $a3, $a0, $a1
51; LA64-NEXT:    mul.d $a1, $a0, $a1
52; LA64-NEXT:    srai.d $a0, $a1, 63
53; LA64-NEXT:    xor $a0, $a3, $a0
54; LA64-NEXT:    sltu $a0, $zero, $a0
55; LA64-NEXT:    st.d $a1, $a2, 0
56; LA64-NEXT:    ret
57  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
58  %val = extractvalue {i64, i1} %t, 0
59  %obit = extractvalue {i64, i1} %t, 1
60  store i64 %val, ptr %res
61  ret i1 %obit
62}
63
64define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
65; LA32-LABEL: smuloi128:
66; LA32:       # %bb.0:
67; LA32-NEXT:    addi.w $sp, $sp, -48
68; LA32-NEXT:    .cfi_def_cfa_offset 48
69; LA32-NEXT:    st.w $ra, $sp, 44 # 4-byte Folded Spill
70; LA32-NEXT:    st.w $fp, $sp, 40 # 4-byte Folded Spill
71; LA32-NEXT:    st.w $s0, $sp, 36 # 4-byte Folded Spill
72; LA32-NEXT:    st.w $s1, $sp, 32 # 4-byte Folded Spill
73; LA32-NEXT:    st.w $s2, $sp, 28 # 4-byte Folded Spill
74; LA32-NEXT:    st.w $s3, $sp, 24 # 4-byte Folded Spill
75; LA32-NEXT:    st.w $s4, $sp, 20 # 4-byte Folded Spill
76; LA32-NEXT:    st.w $s5, $sp, 16 # 4-byte Folded Spill
77; LA32-NEXT:    st.w $s6, $sp, 12 # 4-byte Folded Spill
78; LA32-NEXT:    st.w $s7, $sp, 8 # 4-byte Folded Spill
79; LA32-NEXT:    st.w $s8, $sp, 4 # 4-byte Folded Spill
80; LA32-NEXT:    .cfi_offset 1, -4
81; LA32-NEXT:    .cfi_offset 22, -8
82; LA32-NEXT:    .cfi_offset 23, -12
83; LA32-NEXT:    .cfi_offset 24, -16
84; LA32-NEXT:    .cfi_offset 25, -20
85; LA32-NEXT:    .cfi_offset 26, -24
86; LA32-NEXT:    .cfi_offset 27, -28
87; LA32-NEXT:    .cfi_offset 28, -32
88; LA32-NEXT:    .cfi_offset 29, -36
89; LA32-NEXT:    .cfi_offset 30, -40
90; LA32-NEXT:    .cfi_offset 31, -44
91; LA32-NEXT:    ld.w $a5, $a1, 12
92; LA32-NEXT:    ld.w $a6, $a1, 8
93; LA32-NEXT:    ld.w $t1, $a0, 4
94; LA32-NEXT:    ld.w $a3, $a1, 0
95; LA32-NEXT:    ld.w $a7, $a0, 8
96; LA32-NEXT:    ld.w $t0, $a0, 12
97; LA32-NEXT:    ld.w $a4, $a0, 0
98; LA32-NEXT:    ld.w $t2, $a1, 4
99; LA32-NEXT:    mulh.wu $a0, $a7, $a3
100; LA32-NEXT:    mul.w $a1, $t0, $a3
101; LA32-NEXT:    add.w $a0, $a1, $a0
102; LA32-NEXT:    sltu $a1, $a0, $a1
103; LA32-NEXT:    mulh.wu $t3, $t0, $a3
104; LA32-NEXT:    add.w $a1, $t3, $a1
105; LA32-NEXT:    mul.w $t3, $a7, $t2
106; LA32-NEXT:    add.w $t4, $t3, $a0
107; LA32-NEXT:    sltu $a0, $t4, $t3
108; LA32-NEXT:    mulh.wu $t3, $a7, $t2
109; LA32-NEXT:    add.w $a0, $t3, $a0
110; LA32-NEXT:    add.w $t3, $a1, $a0
111; LA32-NEXT:    mul.w $t5, $t0, $t2
112; LA32-NEXT:    add.w $t6, $t5, $t3
113; LA32-NEXT:    srai.w $a0, $t0, 31
114; LA32-NEXT:    mul.w $t7, $a3, $a0
115; LA32-NEXT:    add.w $t8, $t6, $t7
116; LA32-NEXT:    sltu $fp, $t8, $t6
117; LA32-NEXT:    sltu $t5, $t6, $t5
118; LA32-NEXT:    sltu $a1, $t3, $a1
119; LA32-NEXT:    mulh.wu $t3, $t0, $t2
120; LA32-NEXT:    add.w $a1, $t3, $a1
121; LA32-NEXT:    add.w $a1, $a1, $t5
122; LA32-NEXT:    mulh.wu $t3, $a3, $a0
123; LA32-NEXT:    add.w $t3, $t3, $t7
124; LA32-NEXT:    mul.w $t5, $t2, $a0
125; LA32-NEXT:    add.w $t3, $t3, $t5
126; LA32-NEXT:    add.w $a1, $a1, $t3
127; LA32-NEXT:    add.w $t3, $a1, $fp
128; LA32-NEXT:    mulh.wu $a1, $a4, $a3
129; LA32-NEXT:    mul.w $t5, $t1, $a3
130; LA32-NEXT:    add.w $a1, $t5, $a1
131; LA32-NEXT:    sltu $t5, $a1, $t5
132; LA32-NEXT:    mulh.wu $t6, $t1, $a3
133; LA32-NEXT:    add.w $t5, $t6, $t5
134; LA32-NEXT:    mul.w $t6, $a4, $t2
135; LA32-NEXT:    add.w $a1, $t6, $a1
136; LA32-NEXT:    sltu $t6, $a1, $t6
137; LA32-NEXT:    mulh.wu $t7, $a4, $t2
138; LA32-NEXT:    add.w $t6, $t7, $t6
139; LA32-NEXT:    add.w $t6, $t5, $t6
140; LA32-NEXT:    mul.w $t7, $t1, $t2
141; LA32-NEXT:    add.w $fp, $t7, $t6
142; LA32-NEXT:    sltu $t7, $fp, $t7
143; LA32-NEXT:    sltu $t5, $t6, $t5
144; LA32-NEXT:    mulh.wu $t2, $t1, $t2
145; LA32-NEXT:    add.w $t2, $t2, $t5
146; LA32-NEXT:    add.w $t2, $t2, $t7
147; LA32-NEXT:    add.w $t2, $t4, $t2
148; LA32-NEXT:    mul.w $t5, $a7, $a3
149; LA32-NEXT:    add.w $t6, $t5, $fp
150; LA32-NEXT:    sltu $t5, $t6, $t5
151; LA32-NEXT:    add.w $t2, $t2, $t5
152; LA32-NEXT:    sltu $t7, $t2, $t4
153; LA32-NEXT:    xor $t4, $t2, $t4
154; LA32-NEXT:    sltui $t4, $t4, 1
155; LA32-NEXT:    masknez $t7, $t7, $t4
156; LA32-NEXT:    maskeqz $t4, $t5, $t4
157; LA32-NEXT:    or $t4, $t4, $t7
158; LA32-NEXT:    add.w $t5, $t8, $t4
159; LA32-NEXT:    sltu $t4, $t5, $t8
160; LA32-NEXT:    add.w $t4, $t3, $t4
161; LA32-NEXT:    mulh.wu $t3, $a4, $a6
162; LA32-NEXT:    mul.w $t7, $t1, $a6
163; LA32-NEXT:    add.w $t3, $t7, $t3
164; LA32-NEXT:    sltu $t7, $t3, $t7
165; LA32-NEXT:    mulh.wu $t8, $t1, $a6
166; LA32-NEXT:    add.w $t7, $t8, $t7
167; LA32-NEXT:    mul.w $t8, $a4, $a5
168; LA32-NEXT:    add.w $fp, $t8, $t3
169; LA32-NEXT:    sltu $t3, $fp, $t8
170; LA32-NEXT:    mulh.wu $t8, $a4, $a5
171; LA32-NEXT:    add.w $t3, $t8, $t3
172; LA32-NEXT:    add.w $t8, $t7, $t3
173; LA32-NEXT:    mul.w $s0, $t1, $a5
174; LA32-NEXT:    add.w $s1, $s0, $t8
175; LA32-NEXT:    srai.w $t3, $a5, 31
176; LA32-NEXT:    mul.w $s2, $t3, $a4
177; LA32-NEXT:    add.w $s3, $s1, $s2
178; LA32-NEXT:    sltu $s4, $s3, $s1
179; LA32-NEXT:    sltu $s0, $s1, $s0
180; LA32-NEXT:    sltu $t7, $t8, $t7
181; LA32-NEXT:    mulh.wu $t8, $t1, $a5
182; LA32-NEXT:    add.w $t7, $t8, $t7
183; LA32-NEXT:    add.w $t7, $t7, $s0
184; LA32-NEXT:    mul.w $t1, $t3, $t1
185; LA32-NEXT:    mulh.wu $t8, $t3, $a4
186; LA32-NEXT:    add.w $t1, $t8, $t1
187; LA32-NEXT:    add.w $t1, $t1, $s2
188; LA32-NEXT:    add.w $t1, $t7, $t1
189; LA32-NEXT:    add.w $t7, $t1, $s4
190; LA32-NEXT:    add.w $t2, $fp, $t2
191; LA32-NEXT:    mul.w $t8, $a4, $a6
192; LA32-NEXT:    add.w $t1, $t8, $t6
193; LA32-NEXT:    sltu $t6, $t1, $t8
194; LA32-NEXT:    add.w $t2, $t2, $t6
195; LA32-NEXT:    sltu $t8, $t2, $fp
196; LA32-NEXT:    xor $fp, $t2, $fp
197; LA32-NEXT:    sltui $fp, $fp, 1
198; LA32-NEXT:    masknez $t8, $t8, $fp
199; LA32-NEXT:    maskeqz $t6, $t6, $fp
200; LA32-NEXT:    or $t6, $t6, $t8
201; LA32-NEXT:    add.w $t6, $s3, $t6
202; LA32-NEXT:    sltu $t8, $t6, $s3
203; LA32-NEXT:    add.w $t7, $t7, $t8
204; LA32-NEXT:    add.w $t8, $t4, $t7
205; LA32-NEXT:    add.w $t6, $t5, $t6
206; LA32-NEXT:    sltu $fp, $t6, $t5
207; LA32-NEXT:    add.w $t8, $t8, $fp
208; LA32-NEXT:    mulh.wu $t5, $a7, $a6
209; LA32-NEXT:    mul.w $s0, $t0, $a6
210; LA32-NEXT:    add.w $s1, $s0, $t5
211; LA32-NEXT:    mul.w $s2, $a7, $a5
212; LA32-NEXT:    add.w $s3, $s2, $s1
213; LA32-NEXT:    add.w $s4, $s3, $t8
214; LA32-NEXT:    mul.w $s5, $a7, $a6
215; LA32-NEXT:    add.w $t5, $s5, $t6
216; LA32-NEXT:    sltu $s5, $t5, $s5
217; LA32-NEXT:    add.w $t6, $s4, $s5
218; LA32-NEXT:    sltu $s4, $t6, $s3
219; LA32-NEXT:    xor $s6, $t6, $s3
220; LA32-NEXT:    sltui $s6, $s6, 1
221; LA32-NEXT:    masknez $s4, $s4, $s6
222; LA32-NEXT:    maskeqz $s5, $s5, $s6
223; LA32-NEXT:    or $s4, $s5, $s4
224; LA32-NEXT:    sltu $s5, $t8, $t4
225; LA32-NEXT:    xor $t8, $t8, $t4
226; LA32-NEXT:    sltui $t8, $t8, 1
227; LA32-NEXT:    masknez $s5, $s5, $t8
228; LA32-NEXT:    maskeqz $t8, $fp, $t8
229; LA32-NEXT:    or $t8, $t8, $s5
230; LA32-NEXT:    srai.w $t4, $t4, 31
231; LA32-NEXT:    srai.w $t7, $t7, 31
232; LA32-NEXT:    add.w $t7, $t4, $t7
233; LA32-NEXT:    add.w $t8, $t7, $t8
234; LA32-NEXT:    sltu $fp, $s1, $s0
235; LA32-NEXT:    mulh.wu $s0, $t0, $a6
236; LA32-NEXT:    add.w $fp, $s0, $fp
237; LA32-NEXT:    sltu $s0, $s3, $s2
238; LA32-NEXT:    mulh.wu $s1, $a7, $a5
239; LA32-NEXT:    add.w $s0, $s1, $s0
240; LA32-NEXT:    add.w $s0, $fp, $s0
241; LA32-NEXT:    mul.w $s1, $t0, $a5
242; LA32-NEXT:    add.w $s2, $s1, $s0
243; LA32-NEXT:    mul.w $s3, $a6, $a0
244; LA32-NEXT:    mul.w $s5, $t3, $a7
245; LA32-NEXT:    add.w $s6, $s5, $s3
246; LA32-NEXT:    add.w $s7, $s2, $s6
247; LA32-NEXT:    add.w $s8, $s7, $t8
248; LA32-NEXT:    add.w $s4, $s8, $s4
249; LA32-NEXT:    sltu $ra, $s4, $s8
250; LA32-NEXT:    sltu $t4, $t7, $t4
251; LA32-NEXT:    add.w $t4, $t7, $t4
252; LA32-NEXT:    sltu $t7, $t8, $t7
253; LA32-NEXT:    add.w $t4, $t4, $t7
254; LA32-NEXT:    sltu $t7, $s7, $s2
255; LA32-NEXT:    sltu $t8, $s2, $s1
256; LA32-NEXT:    sltu $fp, $s0, $fp
257; LA32-NEXT:    mulh.wu $s0, $t0, $a5
258; LA32-NEXT:    add.w $fp, $s0, $fp
259; LA32-NEXT:    add.w $t8, $fp, $t8
260; LA32-NEXT:    mulh.wu $a6, $a6, $a0
261; LA32-NEXT:    add.w $a6, $a6, $s3
262; LA32-NEXT:    mul.w $a0, $a5, $a0
263; LA32-NEXT:    add.w $a0, $a6, $a0
264; LA32-NEXT:    mul.w $a5, $t3, $t0
265; LA32-NEXT:    mulh.wu $a6, $t3, $a7
266; LA32-NEXT:    add.w $a5, $a6, $a5
267; LA32-NEXT:    add.w $a5, $a5, $s5
268; LA32-NEXT:    add.w $a0, $a5, $a0
269; LA32-NEXT:    sltu $a5, $s6, $s5
270; LA32-NEXT:    add.w $a0, $a0, $a5
271; LA32-NEXT:    add.w $a0, $t8, $a0
272; LA32-NEXT:    add.w $a0, $a0, $t7
273; LA32-NEXT:    add.w $a0, $a0, $t4
274; LA32-NEXT:    sltu $a5, $s8, $s7
275; LA32-NEXT:    add.w $a0, $a0, $a5
276; LA32-NEXT:    add.w $a0, $a0, $ra
277; LA32-NEXT:    srai.w $a5, $t2, 31
278; LA32-NEXT:    xor $a0, $a0, $a5
279; LA32-NEXT:    xor $a6, $t6, $a5
280; LA32-NEXT:    or $a0, $a6, $a0
281; LA32-NEXT:    xor $a6, $s4, $a5
282; LA32-NEXT:    xor $a5, $t5, $a5
283; LA32-NEXT:    or $a5, $a5, $a6
284; LA32-NEXT:    or $a0, $a5, $a0
285; LA32-NEXT:    sltu $a0, $zero, $a0
286; LA32-NEXT:    mul.w $a3, $a4, $a3
287; LA32-NEXT:    st.w $a3, $a2, 0
288; LA32-NEXT:    st.w $a1, $a2, 4
289; LA32-NEXT:    st.w $t1, $a2, 8
290; LA32-NEXT:    st.w $t2, $a2, 12
291; LA32-NEXT:    ld.w $s8, $sp, 4 # 4-byte Folded Reload
292; LA32-NEXT:    ld.w $s7, $sp, 8 # 4-byte Folded Reload
293; LA32-NEXT:    ld.w $s6, $sp, 12 # 4-byte Folded Reload
294; LA32-NEXT:    ld.w $s5, $sp, 16 # 4-byte Folded Reload
295; LA32-NEXT:    ld.w $s4, $sp, 20 # 4-byte Folded Reload
296; LA32-NEXT:    ld.w $s3, $sp, 24 # 4-byte Folded Reload
297; LA32-NEXT:    ld.w $s2, $sp, 28 # 4-byte Folded Reload
298; LA32-NEXT:    ld.w $s1, $sp, 32 # 4-byte Folded Reload
299; LA32-NEXT:    ld.w $s0, $sp, 36 # 4-byte Folded Reload
300; LA32-NEXT:    ld.w $fp, $sp, 40 # 4-byte Folded Reload
301; LA32-NEXT:    ld.w $ra, $sp, 44 # 4-byte Folded Reload
302; LA32-NEXT:    addi.w $sp, $sp, 48
303; LA32-NEXT:    ret
304;
305; LA64-LABEL: smuloi128:
306; LA64:       # %bb.0:
307; LA64-NEXT:    mulh.du $a5, $a0, $a2
308; LA64-NEXT:    mul.d $a6, $a1, $a2
309; LA64-NEXT:    add.d $a5, $a6, $a5
310; LA64-NEXT:    sltu $a6, $a5, $a6
311; LA64-NEXT:    mulh.du $a7, $a1, $a2
312; LA64-NEXT:    srai.d $t0, $a1, 63
313; LA64-NEXT:    mul.d $t0, $t0, $a2
314; LA64-NEXT:    add.d $a7, $a7, $t0
315; LA64-NEXT:    add.d $a6, $a7, $a6
316; LA64-NEXT:    mulh.du $a7, $a0, $a3
317; LA64-NEXT:    srai.d $t0, $a3, 63
318; LA64-NEXT:    mul.d $t0, $a0, $t0
319; LA64-NEXT:    add.d $a7, $a7, $t0
320; LA64-NEXT:    mul.d $t0, $a0, $a3
321; LA64-NEXT:    add.d $a5, $t0, $a5
322; LA64-NEXT:    sltu $t0, $a5, $t0
323; LA64-NEXT:    add.d $a7, $a7, $t0
324; LA64-NEXT:    add.d $t0, $a6, $a7
325; LA64-NEXT:    sltu $t1, $t0, $a6
326; LA64-NEXT:    srai.d $a6, $a6, 63
327; LA64-NEXT:    srai.d $a7, $a7, 63
328; LA64-NEXT:    add.d $a6, $a6, $a7
329; LA64-NEXT:    add.d $a6, $a6, $t1
330; LA64-NEXT:    mulh.d $a7, $a1, $a3
331; LA64-NEXT:    add.d $a6, $a7, $a6
332; LA64-NEXT:    mul.d $a1, $a1, $a3
333; LA64-NEXT:    add.d $a3, $a1, $t0
334; LA64-NEXT:    sltu $a1, $a3, $a1
335; LA64-NEXT:    add.d $a1, $a6, $a1
336; LA64-NEXT:    srai.d $a6, $a5, 63
337; LA64-NEXT:    xor $a1, $a1, $a6
338; LA64-NEXT:    xor $a3, $a3, $a6
339; LA64-NEXT:    or $a1, $a3, $a1
340; LA64-NEXT:    sltu $a1, $zero, $a1
341; LA64-NEXT:    mul.d $a0, $a0, $a2
342; LA64-NEXT:    st.d $a0, $a4, 0
343; LA64-NEXT:    st.d $a5, $a4, 8
344; LA64-NEXT:    move $a0, $a1
345; LA64-NEXT:    ret
346  %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
347  %val = extractvalue {i128, i1} %t, 0
348  %obit = extractvalue {i128, i1} %t, 1
349  store i128 %val, ptr %res
350  ret i1 %obit
351}
352
353declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
354declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
355