xref: /llvm-project/llvm/test/CodeGen/NVPTX/i128.ll (revision 0068078dca60b41ad1c7bdd4448e7de718b82a5d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}
4
5define i128 @srem_i128(i128 %lhs, i128 %rhs) {
6; CHECK-LABEL: srem_i128(
7; CHECK:       {
8; CHECK-NEXT:    .reg .pred %p<19>;
9; CHECK-NEXT:    .reg .b32 %r<16>;
10; CHECK-NEXT:    .reg .b64 %rd<127>;
11; CHECK-EMPTY:
12; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
13; CHECK-NEXT:    ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0];
14; CHECK-NEXT:    ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1];
15; CHECK-NEXT:    shr.s64 %rd2, %rd46, 63;
16; CHECK-NEXT:    mov.b64 %rd117, 0;
17; CHECK-NEXT:    sub.cc.s64 %rd52, %rd117, %rd45;
18; CHECK-NEXT:    subc.cc.s64 %rd53, %rd117, %rd46;
19; CHECK-NEXT:    setp.lt.s64 %p1, %rd46, 0;
20; CHECK-NEXT:    selp.b64 %rd4, %rd53, %rd46, %p1;
21; CHECK-NEXT:    selp.b64 %rd3, %rd52, %rd45, %p1;
22; CHECK-NEXT:    sub.cc.s64 %rd54, %rd117, %rd49;
23; CHECK-NEXT:    subc.cc.s64 %rd55, %rd117, %rd50;
24; CHECK-NEXT:    setp.lt.s64 %p2, %rd50, 0;
25; CHECK-NEXT:    selp.b64 %rd6, %rd55, %rd50, %p2;
26; CHECK-NEXT:    selp.b64 %rd5, %rd54, %rd49, %p2;
27; CHECK-NEXT:    or.b64 %rd56, %rd5, %rd6;
28; CHECK-NEXT:    setp.eq.s64 %p3, %rd56, 0;
29; CHECK-NEXT:    or.b64 %rd57, %rd3, %rd4;
30; CHECK-NEXT:    setp.eq.s64 %p4, %rd57, 0;
31; CHECK-NEXT:    or.pred %p5, %p3, %p4;
32; CHECK-NEXT:    setp.ne.s64 %p6, %rd6, 0;
33; CHECK-NEXT:    clz.b64 %r1, %rd6;
34; CHECK-NEXT:    cvt.u64.u32 %rd58, %r1;
35; CHECK-NEXT:    clz.b64 %r2, %rd5;
36; CHECK-NEXT:    cvt.u64.u32 %rd59, %r2;
37; CHECK-NEXT:    add.s64 %rd60, %rd59, 64;
38; CHECK-NEXT:    selp.b64 %rd61, %rd58, %rd60, %p6;
39; CHECK-NEXT:    setp.ne.s64 %p7, %rd4, 0;
40; CHECK-NEXT:    clz.b64 %r3, %rd4;
41; CHECK-NEXT:    cvt.u64.u32 %rd62, %r3;
42; CHECK-NEXT:    clz.b64 %r4, %rd3;
43; CHECK-NEXT:    cvt.u64.u32 %rd63, %r4;
44; CHECK-NEXT:    add.s64 %rd64, %rd63, 64;
45; CHECK-NEXT:    selp.b64 %rd65, %rd62, %rd64, %p7;
46; CHECK-NEXT:    sub.cc.s64 %rd66, %rd61, %rd65;
47; CHECK-NEXT:    subc.cc.s64 %rd67, %rd117, 0;
48; CHECK-NEXT:    setp.eq.s64 %p8, %rd67, 0;
49; CHECK-NEXT:    setp.ne.s64 %p9, %rd67, 0;
50; CHECK-NEXT:    selp.u32 %r5, -1, 0, %p9;
51; CHECK-NEXT:    setp.gt.u64 %p10, %rd66, 127;
52; CHECK-NEXT:    selp.u32 %r6, -1, 0, %p10;
53; CHECK-NEXT:    selp.b32 %r7, %r6, %r5, %p8;
54; CHECK-NEXT:    and.b32 %r8, %r7, 1;
55; CHECK-NEXT:    setp.eq.b32 %p11, %r8, 1;
56; CHECK-NEXT:    or.pred %p12, %p5, %p11;
57; CHECK-NEXT:    xor.b64 %rd68, %rd66, 127;
58; CHECK-NEXT:    or.b64 %rd69, %rd68, %rd67;
59; CHECK-NEXT:    setp.eq.s64 %p13, %rd69, 0;
60; CHECK-NEXT:    selp.b64 %rd126, 0, %rd4, %p12;
61; CHECK-NEXT:    selp.b64 %rd125, 0, %rd3, %p12;
62; CHECK-NEXT:    or.pred %p14, %p12, %p13;
63; CHECK-NEXT:    @%p14 bra $L__BB0_5;
64; CHECK-NEXT:  // %bb.3: // %udiv-bb1
65; CHECK-NEXT:    add.cc.s64 %rd119, %rd66, 1;
66; CHECK-NEXT:    addc.cc.s64 %rd120, %rd67, 0;
67; CHECK-NEXT:    or.b64 %rd72, %rd119, %rd120;
68; CHECK-NEXT:    setp.eq.s64 %p15, %rd72, 0;
69; CHECK-NEXT:    cvt.u32.u64 %r9, %rd66;
70; CHECK-NEXT:    sub.s32 %r10, 127, %r9;
71; CHECK-NEXT:    shl.b64 %rd73, %rd4, %r10;
72; CHECK-NEXT:    sub.s32 %r11, 64, %r10;
73; CHECK-NEXT:    shr.u64 %rd74, %rd3, %r11;
74; CHECK-NEXT:    or.b64 %rd75, %rd73, %rd74;
75; CHECK-NEXT:    sub.s32 %r12, 63, %r9;
76; CHECK-NEXT:    shl.b64 %rd76, %rd3, %r12;
77; CHECK-NEXT:    setp.gt.s32 %p16, %r10, 63;
78; CHECK-NEXT:    selp.b64 %rd124, %rd76, %rd75, %p16;
79; CHECK-NEXT:    shl.b64 %rd123, %rd3, %r10;
80; CHECK-NEXT:    mov.u64 %rd114, %rd117;
81; CHECK-NEXT:    @%p15 bra $L__BB0_4;
82; CHECK-NEXT:  // %bb.1: // %udiv-preheader
83; CHECK-NEXT:    cvt.u32.u64 %r13, %rd119;
84; CHECK-NEXT:    shr.u64 %rd79, %rd3, %r13;
85; CHECK-NEXT:    sub.s32 %r14, 64, %r13;
86; CHECK-NEXT:    shl.b64 %rd80, %rd4, %r14;
87; CHECK-NEXT:    or.b64 %rd81, %rd79, %rd80;
88; CHECK-NEXT:    add.s32 %r15, %r13, -64;
89; CHECK-NEXT:    shr.u64 %rd82, %rd4, %r15;
90; CHECK-NEXT:    setp.gt.s32 %p17, %r13, 63;
91; CHECK-NEXT:    selp.b64 %rd121, %rd82, %rd81, %p17;
92; CHECK-NEXT:    shr.u64 %rd122, %rd4, %r13;
93; CHECK-NEXT:    add.cc.s64 %rd35, %rd5, -1;
94; CHECK-NEXT:    addc.cc.s64 %rd36, %rd6, -1;
95; CHECK-NEXT:    mov.b64 %rd114, 0;
96; CHECK-NEXT:    mov.u64 %rd117, %rd114;
97; CHECK-NEXT:  $L__BB0_2: // %udiv-do-while
98; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
99; CHECK-NEXT:    shr.u64 %rd83, %rd121, 63;
100; CHECK-NEXT:    shl.b64 %rd84, %rd122, 1;
101; CHECK-NEXT:    or.b64 %rd85, %rd84, %rd83;
102; CHECK-NEXT:    shl.b64 %rd86, %rd121, 1;
103; CHECK-NEXT:    shr.u64 %rd87, %rd124, 63;
104; CHECK-NEXT:    or.b64 %rd88, %rd86, %rd87;
105; CHECK-NEXT:    shr.u64 %rd89, %rd123, 63;
106; CHECK-NEXT:    shl.b64 %rd90, %rd124, 1;
107; CHECK-NEXT:    or.b64 %rd91, %rd90, %rd89;
108; CHECK-NEXT:    shl.b64 %rd92, %rd123, 1;
109; CHECK-NEXT:    or.b64 %rd123, %rd117, %rd92;
110; CHECK-NEXT:    or.b64 %rd124, %rd114, %rd91;
111; CHECK-NEXT:    sub.cc.s64 %rd93, %rd35, %rd88;
112; CHECK-NEXT:    subc.cc.s64 %rd94, %rd36, %rd85;
113; CHECK-NEXT:    shr.s64 %rd95, %rd94, 63;
114; CHECK-NEXT:    and.b64 %rd117, %rd95, 1;
115; CHECK-NEXT:    and.b64 %rd96, %rd95, %rd5;
116; CHECK-NEXT:    and.b64 %rd97, %rd95, %rd6;
117; CHECK-NEXT:    sub.cc.s64 %rd121, %rd88, %rd96;
118; CHECK-NEXT:    subc.cc.s64 %rd122, %rd85, %rd97;
119; CHECK-NEXT:    add.cc.s64 %rd119, %rd119, -1;
120; CHECK-NEXT:    addc.cc.s64 %rd120, %rd120, -1;
121; CHECK-NEXT:    or.b64 %rd98, %rd119, %rd120;
122; CHECK-NEXT:    setp.eq.s64 %p18, %rd98, 0;
123; CHECK-NEXT:    @%p18 bra $L__BB0_4;
124; CHECK-NEXT:    bra.uni $L__BB0_2;
125; CHECK-NEXT:  $L__BB0_4: // %udiv-loop-exit
126; CHECK-NEXT:    shr.u64 %rd99, %rd123, 63;
127; CHECK-NEXT:    shl.b64 %rd100, %rd124, 1;
128; CHECK-NEXT:    or.b64 %rd101, %rd100, %rd99;
129; CHECK-NEXT:    shl.b64 %rd102, %rd123, 1;
130; CHECK-NEXT:    or.b64 %rd125, %rd117, %rd102;
131; CHECK-NEXT:    or.b64 %rd126, %rd114, %rd101;
132; CHECK-NEXT:  $L__BB0_5: // %udiv-end
133; CHECK-NEXT:    mul.hi.u64 %rd103, %rd5, %rd125;
134; CHECK-NEXT:    mad.lo.s64 %rd104, %rd5, %rd126, %rd103;
135; CHECK-NEXT:    mad.lo.s64 %rd105, %rd6, %rd125, %rd104;
136; CHECK-NEXT:    mul.lo.s64 %rd106, %rd5, %rd125;
137; CHECK-NEXT:    sub.cc.s64 %rd107, %rd3, %rd106;
138; CHECK-NEXT:    subc.cc.s64 %rd108, %rd4, %rd105;
139; CHECK-NEXT:    xor.b64 %rd109, %rd107, %rd2;
140; CHECK-NEXT:    xor.b64 %rd110, %rd108, %rd2;
141; CHECK-NEXT:    sub.cc.s64 %rd111, %rd109, %rd2;
142; CHECK-NEXT:    subc.cc.s64 %rd112, %rd110, %rd2;
143; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd111, %rd112};
144; CHECK-NEXT:    ret;
145  %div = srem i128 %lhs, %rhs
146  ret i128 %div
147}
148
149define i128 @urem_i128(i128 %lhs, i128 %rhs) {
150; CHECK-LABEL: urem_i128(
151; CHECK:       {
152; CHECK-NEXT:    .reg .pred %p<17>;
153; CHECK-NEXT:    .reg .b32 %r<16>;
154; CHECK-NEXT:    .reg .b64 %rd<113>;
155; CHECK-EMPTY:
156; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
157; CHECK-NEXT:    ld.param.v2.u64 {%rd41, %rd42}, [urem_i128_param_0];
158; CHECK-NEXT:    ld.param.v2.u64 {%rd3, %rd4}, [urem_i128_param_1];
159; CHECK-NEXT:    or.b64 %rd45, %rd3, %rd4;
160; CHECK-NEXT:    setp.eq.s64 %p1, %rd45, 0;
161; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
162; CHECK-NEXT:    setp.eq.s64 %p2, %rd46, 0;
163; CHECK-NEXT:    or.pred %p3, %p1, %p2;
164; CHECK-NEXT:    setp.ne.s64 %p4, %rd4, 0;
165; CHECK-NEXT:    clz.b64 %r1, %rd4;
166; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
167; CHECK-NEXT:    clz.b64 %r2, %rd3;
168; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
169; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
170; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
171; CHECK-NEXT:    setp.ne.s64 %p5, %rd42, 0;
172; CHECK-NEXT:    clz.b64 %r3, %rd42;
173; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
174; CHECK-NEXT:    clz.b64 %r4, %rd41;
175; CHECK-NEXT:    cvt.u64.u32 %rd52, %r4;
176; CHECK-NEXT:    add.s64 %rd53, %rd52, 64;
177; CHECK-NEXT:    selp.b64 %rd54, %rd51, %rd53, %p5;
178; CHECK-NEXT:    mov.b64 %rd103, 0;
179; CHECK-NEXT:    sub.cc.s64 %rd56, %rd50, %rd54;
180; CHECK-NEXT:    subc.cc.s64 %rd57, %rd103, 0;
181; CHECK-NEXT:    setp.eq.s64 %p6, %rd57, 0;
182; CHECK-NEXT:    setp.ne.s64 %p7, %rd57, 0;
183; CHECK-NEXT:    selp.u32 %r5, -1, 0, %p7;
184; CHECK-NEXT:    setp.gt.u64 %p8, %rd56, 127;
185; CHECK-NEXT:    selp.u32 %r6, -1, 0, %p8;
186; CHECK-NEXT:    selp.b32 %r7, %r6, %r5, %p6;
187; CHECK-NEXT:    and.b32 %r8, %r7, 1;
188; CHECK-NEXT:    setp.eq.b32 %p9, %r8, 1;
189; CHECK-NEXT:    or.pred %p10, %p3, %p9;
190; CHECK-NEXT:    xor.b64 %rd58, %rd56, 127;
191; CHECK-NEXT:    or.b64 %rd59, %rd58, %rd57;
192; CHECK-NEXT:    setp.eq.s64 %p11, %rd59, 0;
193; CHECK-NEXT:    selp.b64 %rd112, 0, %rd42, %p10;
194; CHECK-NEXT:    selp.b64 %rd111, 0, %rd41, %p10;
195; CHECK-NEXT:    or.pred %p12, %p10, %p11;
196; CHECK-NEXT:    @%p12 bra $L__BB1_5;
197; CHECK-NEXT:  // %bb.3: // %udiv-bb1
198; CHECK-NEXT:    add.cc.s64 %rd105, %rd56, 1;
199; CHECK-NEXT:    addc.cc.s64 %rd106, %rd57, 0;
200; CHECK-NEXT:    or.b64 %rd62, %rd105, %rd106;
201; CHECK-NEXT:    setp.eq.s64 %p13, %rd62, 0;
202; CHECK-NEXT:    cvt.u32.u64 %r9, %rd56;
203; CHECK-NEXT:    sub.s32 %r10, 127, %r9;
204; CHECK-NEXT:    shl.b64 %rd63, %rd42, %r10;
205; CHECK-NEXT:    sub.s32 %r11, 64, %r10;
206; CHECK-NEXT:    shr.u64 %rd64, %rd41, %r11;
207; CHECK-NEXT:    or.b64 %rd65, %rd63, %rd64;
208; CHECK-NEXT:    sub.s32 %r12, 63, %r9;
209; CHECK-NEXT:    shl.b64 %rd66, %rd41, %r12;
210; CHECK-NEXT:    setp.gt.s32 %p14, %r10, 63;
211; CHECK-NEXT:    selp.b64 %rd110, %rd66, %rd65, %p14;
212; CHECK-NEXT:    shl.b64 %rd109, %rd41, %r10;
213; CHECK-NEXT:    mov.u64 %rd100, %rd103;
214; CHECK-NEXT:    @%p13 bra $L__BB1_4;
215; CHECK-NEXT:  // %bb.1: // %udiv-preheader
216; CHECK-NEXT:    cvt.u32.u64 %r13, %rd105;
217; CHECK-NEXT:    shr.u64 %rd69, %rd41, %r13;
218; CHECK-NEXT:    sub.s32 %r14, 64, %r13;
219; CHECK-NEXT:    shl.b64 %rd70, %rd42, %r14;
220; CHECK-NEXT:    or.b64 %rd71, %rd69, %rd70;
221; CHECK-NEXT:    add.s32 %r15, %r13, -64;
222; CHECK-NEXT:    shr.u64 %rd72, %rd42, %r15;
223; CHECK-NEXT:    setp.gt.s32 %p15, %r13, 63;
224; CHECK-NEXT:    selp.b64 %rd107, %rd72, %rd71, %p15;
225; CHECK-NEXT:    shr.u64 %rd108, %rd42, %r13;
226; CHECK-NEXT:    add.cc.s64 %rd33, %rd3, -1;
227; CHECK-NEXT:    addc.cc.s64 %rd34, %rd4, -1;
228; CHECK-NEXT:    mov.b64 %rd100, 0;
229; CHECK-NEXT:    mov.u64 %rd103, %rd100;
230; CHECK-NEXT:  $L__BB1_2: // %udiv-do-while
231; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
232; CHECK-NEXT:    shr.u64 %rd73, %rd107, 63;
233; CHECK-NEXT:    shl.b64 %rd74, %rd108, 1;
234; CHECK-NEXT:    or.b64 %rd75, %rd74, %rd73;
235; CHECK-NEXT:    shl.b64 %rd76, %rd107, 1;
236; CHECK-NEXT:    shr.u64 %rd77, %rd110, 63;
237; CHECK-NEXT:    or.b64 %rd78, %rd76, %rd77;
238; CHECK-NEXT:    shr.u64 %rd79, %rd109, 63;
239; CHECK-NEXT:    shl.b64 %rd80, %rd110, 1;
240; CHECK-NEXT:    or.b64 %rd81, %rd80, %rd79;
241; CHECK-NEXT:    shl.b64 %rd82, %rd109, 1;
242; CHECK-NEXT:    or.b64 %rd109, %rd103, %rd82;
243; CHECK-NEXT:    or.b64 %rd110, %rd100, %rd81;
244; CHECK-NEXT:    sub.cc.s64 %rd83, %rd33, %rd78;
245; CHECK-NEXT:    subc.cc.s64 %rd84, %rd34, %rd75;
246; CHECK-NEXT:    shr.s64 %rd85, %rd84, 63;
247; CHECK-NEXT:    and.b64 %rd103, %rd85, 1;
248; CHECK-NEXT:    and.b64 %rd86, %rd85, %rd3;
249; CHECK-NEXT:    and.b64 %rd87, %rd85, %rd4;
250; CHECK-NEXT:    sub.cc.s64 %rd107, %rd78, %rd86;
251; CHECK-NEXT:    subc.cc.s64 %rd108, %rd75, %rd87;
252; CHECK-NEXT:    add.cc.s64 %rd105, %rd105, -1;
253; CHECK-NEXT:    addc.cc.s64 %rd106, %rd106, -1;
254; CHECK-NEXT:    or.b64 %rd88, %rd105, %rd106;
255; CHECK-NEXT:    setp.eq.s64 %p16, %rd88, 0;
256; CHECK-NEXT:    @%p16 bra $L__BB1_4;
257; CHECK-NEXT:    bra.uni $L__BB1_2;
258; CHECK-NEXT:  $L__BB1_4: // %udiv-loop-exit
259; CHECK-NEXT:    shr.u64 %rd89, %rd109, 63;
260; CHECK-NEXT:    shl.b64 %rd90, %rd110, 1;
261; CHECK-NEXT:    or.b64 %rd91, %rd90, %rd89;
262; CHECK-NEXT:    shl.b64 %rd92, %rd109, 1;
263; CHECK-NEXT:    or.b64 %rd111, %rd103, %rd92;
264; CHECK-NEXT:    or.b64 %rd112, %rd100, %rd91;
265; CHECK-NEXT:  $L__BB1_5: // %udiv-end
266; CHECK-NEXT:    mul.hi.u64 %rd93, %rd3, %rd111;
267; CHECK-NEXT:    mad.lo.s64 %rd94, %rd3, %rd112, %rd93;
268; CHECK-NEXT:    mad.lo.s64 %rd95, %rd4, %rd111, %rd94;
269; CHECK-NEXT:    mul.lo.s64 %rd96, %rd3, %rd111;
270; CHECK-NEXT:    sub.cc.s64 %rd97, %rd41, %rd96;
271; CHECK-NEXT:    subc.cc.s64 %rd98, %rd42, %rd95;
272; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd97, %rd98};
273; CHECK-NEXT:    ret;
274  %div = urem i128 %lhs, %rhs
275  ret i128 %div
276}
277
278define i128 @srem_i128_pow2k(i128 %lhs) {
279; CHECK-LABEL: srem_i128_pow2k(
280; CHECK:       {
281; CHECK-NEXT:    .reg .b64 %rd<10>;
282; CHECK-EMPTY:
283; CHECK-NEXT:  // %bb.0:
284; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [srem_i128_pow2k_param_0];
285; CHECK-NEXT:    shr.s64 %rd3, %rd2, 63;
286; CHECK-NEXT:    shr.u64 %rd4, %rd3, 31;
287; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd4;
288; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, 0;
289; CHECK-NEXT:    and.b64 %rd7, %rd5, -8589934592;
290; CHECK-NEXT:    sub.cc.s64 %rd8, %rd1, %rd7;
291; CHECK-NEXT:    subc.cc.s64 %rd9, %rd2, %rd6;
292; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd8, %rd9};
293; CHECK-NEXT:    ret;
294  %div = srem i128 %lhs, 8589934592
295  ret i128 %div
296}
297
298define i128 @urem_i128_pow2k(i128 %lhs) {
299; CHECK-LABEL: urem_i128_pow2k(
300; CHECK:       {
301; CHECK-NEXT:    .reg .b64 %rd<5>;
302; CHECK-EMPTY:
303; CHECK-NEXT:  // %bb.0:
304; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0];
305; CHECK-NEXT:    and.b64 %rd3, %rd1, 8589934591;
306; CHECK-NEXT:    mov.b64 %rd4, 0;
307; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd3, %rd4};
308; CHECK-NEXT:    ret;
309  %div = urem i128 %lhs, 8589934592
310  ret i128 %div
311}
312
313define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
314; CHECK-LABEL: sdiv_i128(
315; CHECK:       {
316; CHECK-NEXT:    .reg .pred %p<19>;
317; CHECK-NEXT:    .reg .b32 %r<16>;
318; CHECK-NEXT:    .reg .b64 %rd<122>;
319; CHECK-EMPTY:
320; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
321; CHECK-NEXT:    ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0];
322; CHECK-NEXT:    ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1];
323; CHECK-NEXT:    mov.b64 %rd112, 0;
324; CHECK-NEXT:    sub.cc.s64 %rd52, %rd112, %rd45;
325; CHECK-NEXT:    subc.cc.s64 %rd53, %rd112, %rd46;
326; CHECK-NEXT:    setp.lt.s64 %p1, %rd46, 0;
327; CHECK-NEXT:    selp.b64 %rd2, %rd53, %rd46, %p1;
328; CHECK-NEXT:    selp.b64 %rd1, %rd52, %rd45, %p1;
329; CHECK-NEXT:    sub.cc.s64 %rd54, %rd112, %rd49;
330; CHECK-NEXT:    subc.cc.s64 %rd55, %rd112, %rd50;
331; CHECK-NEXT:    setp.lt.s64 %p2, %rd50, 0;
332; CHECK-NEXT:    selp.b64 %rd4, %rd55, %rd50, %p2;
333; CHECK-NEXT:    selp.b64 %rd3, %rd54, %rd49, %p2;
334; CHECK-NEXT:    xor.b64 %rd56, %rd50, %rd46;
335; CHECK-NEXT:    shr.s64 %rd5, %rd56, 63;
336; CHECK-NEXT:    or.b64 %rd57, %rd3, %rd4;
337; CHECK-NEXT:    setp.eq.s64 %p3, %rd57, 0;
338; CHECK-NEXT:    or.b64 %rd58, %rd1, %rd2;
339; CHECK-NEXT:    setp.eq.s64 %p4, %rd58, 0;
340; CHECK-NEXT:    or.pred %p5, %p3, %p4;
341; CHECK-NEXT:    setp.ne.s64 %p6, %rd4, 0;
342; CHECK-NEXT:    clz.b64 %r1, %rd4;
343; CHECK-NEXT:    cvt.u64.u32 %rd59, %r1;
344; CHECK-NEXT:    clz.b64 %r2, %rd3;
345; CHECK-NEXT:    cvt.u64.u32 %rd60, %r2;
346; CHECK-NEXT:    add.s64 %rd61, %rd60, 64;
347; CHECK-NEXT:    selp.b64 %rd62, %rd59, %rd61, %p6;
348; CHECK-NEXT:    setp.ne.s64 %p7, %rd2, 0;
349; CHECK-NEXT:    clz.b64 %r3, %rd2;
350; CHECK-NEXT:    cvt.u64.u32 %rd63, %r3;
351; CHECK-NEXT:    clz.b64 %r4, %rd1;
352; CHECK-NEXT:    cvt.u64.u32 %rd64, %r4;
353; CHECK-NEXT:    add.s64 %rd65, %rd64, 64;
354; CHECK-NEXT:    selp.b64 %rd66, %rd63, %rd65, %p7;
355; CHECK-NEXT:    sub.cc.s64 %rd67, %rd62, %rd66;
356; CHECK-NEXT:    subc.cc.s64 %rd68, %rd112, 0;
357; CHECK-NEXT:    setp.eq.s64 %p8, %rd68, 0;
358; CHECK-NEXT:    setp.ne.s64 %p9, %rd68, 0;
359; CHECK-NEXT:    selp.u32 %r5, -1, 0, %p9;
360; CHECK-NEXT:    setp.gt.u64 %p10, %rd67, 127;
361; CHECK-NEXT:    selp.u32 %r6, -1, 0, %p10;
362; CHECK-NEXT:    selp.b32 %r7, %r6, %r5, %p8;
363; CHECK-NEXT:    and.b32 %r8, %r7, 1;
364; CHECK-NEXT:    setp.eq.b32 %p11, %r8, 1;
365; CHECK-NEXT:    or.pred %p12, %p5, %p11;
366; CHECK-NEXT:    xor.b64 %rd69, %rd67, 127;
367; CHECK-NEXT:    or.b64 %rd70, %rd69, %rd68;
368; CHECK-NEXT:    setp.eq.s64 %p13, %rd70, 0;
369; CHECK-NEXT:    selp.b64 %rd121, 0, %rd2, %p12;
370; CHECK-NEXT:    selp.b64 %rd120, 0, %rd1, %p12;
371; CHECK-NEXT:    or.pred %p14, %p12, %p13;
372; CHECK-NEXT:    @%p14 bra $L__BB4_5;
373; CHECK-NEXT:  // %bb.3: // %udiv-bb1
374; CHECK-NEXT:    add.cc.s64 %rd114, %rd67, 1;
375; CHECK-NEXT:    addc.cc.s64 %rd115, %rd68, 0;
376; CHECK-NEXT:    or.b64 %rd73, %rd114, %rd115;
377; CHECK-NEXT:    setp.eq.s64 %p15, %rd73, 0;
378; CHECK-NEXT:    cvt.u32.u64 %r9, %rd67;
379; CHECK-NEXT:    sub.s32 %r10, 127, %r9;
380; CHECK-NEXT:    shl.b64 %rd74, %rd2, %r10;
381; CHECK-NEXT:    sub.s32 %r11, 64, %r10;
382; CHECK-NEXT:    shr.u64 %rd75, %rd1, %r11;
383; CHECK-NEXT:    or.b64 %rd76, %rd74, %rd75;
384; CHECK-NEXT:    sub.s32 %r12, 63, %r9;
385; CHECK-NEXT:    shl.b64 %rd77, %rd1, %r12;
386; CHECK-NEXT:    setp.gt.s32 %p16, %r10, 63;
387; CHECK-NEXT:    selp.b64 %rd119, %rd77, %rd76, %p16;
388; CHECK-NEXT:    shl.b64 %rd118, %rd1, %r10;
389; CHECK-NEXT:    mov.u64 %rd109, %rd112;
390; CHECK-NEXT:    @%p15 bra $L__BB4_4;
391; CHECK-NEXT:  // %bb.1: // %udiv-preheader
392; CHECK-NEXT:    cvt.u32.u64 %r13, %rd114;
393; CHECK-NEXT:    shr.u64 %rd80, %rd1, %r13;
394; CHECK-NEXT:    sub.s32 %r14, 64, %r13;
395; CHECK-NEXT:    shl.b64 %rd81, %rd2, %r14;
396; CHECK-NEXT:    or.b64 %rd82, %rd80, %rd81;
397; CHECK-NEXT:    add.s32 %r15, %r13, -64;
398; CHECK-NEXT:    shr.u64 %rd83, %rd2, %r15;
399; CHECK-NEXT:    setp.gt.s32 %p17, %r13, 63;
400; CHECK-NEXT:    selp.b64 %rd116, %rd83, %rd82, %p17;
401; CHECK-NEXT:    shr.u64 %rd117, %rd2, %r13;
402; CHECK-NEXT:    add.cc.s64 %rd35, %rd3, -1;
403; CHECK-NEXT:    addc.cc.s64 %rd36, %rd4, -1;
404; CHECK-NEXT:    mov.b64 %rd109, 0;
405; CHECK-NEXT:    mov.u64 %rd112, %rd109;
406; CHECK-NEXT:  $L__BB4_2: // %udiv-do-while
407; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
408; CHECK-NEXT:    shr.u64 %rd84, %rd116, 63;
409; CHECK-NEXT:    shl.b64 %rd85, %rd117, 1;
410; CHECK-NEXT:    or.b64 %rd86, %rd85, %rd84;
411; CHECK-NEXT:    shl.b64 %rd87, %rd116, 1;
412; CHECK-NEXT:    shr.u64 %rd88, %rd119, 63;
413; CHECK-NEXT:    or.b64 %rd89, %rd87, %rd88;
414; CHECK-NEXT:    shr.u64 %rd90, %rd118, 63;
415; CHECK-NEXT:    shl.b64 %rd91, %rd119, 1;
416; CHECK-NEXT:    or.b64 %rd92, %rd91, %rd90;
417; CHECK-NEXT:    shl.b64 %rd93, %rd118, 1;
418; CHECK-NEXT:    or.b64 %rd118, %rd112, %rd93;
419; CHECK-NEXT:    or.b64 %rd119, %rd109, %rd92;
420; CHECK-NEXT:    sub.cc.s64 %rd94, %rd35, %rd89;
421; CHECK-NEXT:    subc.cc.s64 %rd95, %rd36, %rd86;
422; CHECK-NEXT:    shr.s64 %rd96, %rd95, 63;
423; CHECK-NEXT:    and.b64 %rd112, %rd96, 1;
424; CHECK-NEXT:    and.b64 %rd97, %rd96, %rd3;
425; CHECK-NEXT:    and.b64 %rd98, %rd96, %rd4;
426; CHECK-NEXT:    sub.cc.s64 %rd116, %rd89, %rd97;
427; CHECK-NEXT:    subc.cc.s64 %rd117, %rd86, %rd98;
428; CHECK-NEXT:    add.cc.s64 %rd114, %rd114, -1;
429; CHECK-NEXT:    addc.cc.s64 %rd115, %rd115, -1;
430; CHECK-NEXT:    or.b64 %rd99, %rd114, %rd115;
431; CHECK-NEXT:    setp.eq.s64 %p18, %rd99, 0;
432; CHECK-NEXT:    @%p18 bra $L__BB4_4;
433; CHECK-NEXT:    bra.uni $L__BB4_2;
434; CHECK-NEXT:  $L__BB4_4: // %udiv-loop-exit
435; CHECK-NEXT:    shr.u64 %rd100, %rd118, 63;
436; CHECK-NEXT:    shl.b64 %rd101, %rd119, 1;
437; CHECK-NEXT:    or.b64 %rd102, %rd101, %rd100;
438; CHECK-NEXT:    shl.b64 %rd103, %rd118, 1;
439; CHECK-NEXT:    or.b64 %rd120, %rd112, %rd103;
440; CHECK-NEXT:    or.b64 %rd121, %rd109, %rd102;
441; CHECK-NEXT:  $L__BB4_5: // %udiv-end
442; CHECK-NEXT:    xor.b64 %rd104, %rd120, %rd5;
443; CHECK-NEXT:    xor.b64 %rd105, %rd121, %rd5;
444; CHECK-NEXT:    sub.cc.s64 %rd106, %rd104, %rd5;
445; CHECK-NEXT:    subc.cc.s64 %rd107, %rd105, %rd5;
446; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd106, %rd107};
447; CHECK-NEXT:    ret;
448  %div = sdiv i128 %lhs, %rhs
449  ret i128 %div
450}
451
452define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
453; CHECK-LABEL: udiv_i128(
454; CHECK:       {
455; CHECK-NEXT:    .reg .pred %p<17>;
456; CHECK-NEXT:    .reg .b32 %r<16>;
457; CHECK-NEXT:    .reg .b64 %rd<107>;
458; CHECK-EMPTY:
459; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
460; CHECK-NEXT:    ld.param.v2.u64 {%rd41, %rd42}, [udiv_i128_param_0];
461; CHECK-NEXT:    ld.param.v2.u64 {%rd43, %rd44}, [udiv_i128_param_1];
462; CHECK-NEXT:    or.b64 %rd45, %rd43, %rd44;
463; CHECK-NEXT:    setp.eq.s64 %p1, %rd45, 0;
464; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
465; CHECK-NEXT:    setp.eq.s64 %p2, %rd46, 0;
466; CHECK-NEXT:    or.pred %p3, %p1, %p2;
467; CHECK-NEXT:    setp.ne.s64 %p4, %rd44, 0;
468; CHECK-NEXT:    clz.b64 %r1, %rd44;
469; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
470; CHECK-NEXT:    clz.b64 %r2, %rd43;
471; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
472; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
473; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
474; CHECK-NEXT:    setp.ne.s64 %p5, %rd42, 0;
475; CHECK-NEXT:    clz.b64 %r3, %rd42;
476; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
477; CHECK-NEXT:    clz.b64 %r4, %rd41;
478; CHECK-NEXT:    cvt.u64.u32 %rd52, %r4;
479; CHECK-NEXT:    add.s64 %rd53, %rd52, 64;
480; CHECK-NEXT:    selp.b64 %rd54, %rd51, %rd53, %p5;
481; CHECK-NEXT:    mov.b64 %rd97, 0;
482; CHECK-NEXT:    sub.cc.s64 %rd56, %rd50, %rd54;
483; CHECK-NEXT:    subc.cc.s64 %rd57, %rd97, 0;
484; CHECK-NEXT:    setp.eq.s64 %p6, %rd57, 0;
485; CHECK-NEXT:    setp.ne.s64 %p7, %rd57, 0;
486; CHECK-NEXT:    selp.u32 %r5, -1, 0, %p7;
487; CHECK-NEXT:    setp.gt.u64 %p8, %rd56, 127;
488; CHECK-NEXT:    selp.u32 %r6, -1, 0, %p8;
489; CHECK-NEXT:    selp.b32 %r7, %r6, %r5, %p6;
490; CHECK-NEXT:    and.b32 %r8, %r7, 1;
491; CHECK-NEXT:    setp.eq.b32 %p9, %r8, 1;
492; CHECK-NEXT:    or.pred %p10, %p3, %p9;
493; CHECK-NEXT:    xor.b64 %rd58, %rd56, 127;
494; CHECK-NEXT:    or.b64 %rd59, %rd58, %rd57;
495; CHECK-NEXT:    setp.eq.s64 %p11, %rd59, 0;
496; CHECK-NEXT:    selp.b64 %rd106, 0, %rd42, %p10;
497; CHECK-NEXT:    selp.b64 %rd105, 0, %rd41, %p10;
498; CHECK-NEXT:    or.pred %p12, %p10, %p11;
499; CHECK-NEXT:    @%p12 bra $L__BB5_5;
500; CHECK-NEXT:  // %bb.3: // %udiv-bb1
501; CHECK-NEXT:    add.cc.s64 %rd99, %rd56, 1;
502; CHECK-NEXT:    addc.cc.s64 %rd100, %rd57, 0;
503; CHECK-NEXT:    or.b64 %rd62, %rd99, %rd100;
504; CHECK-NEXT:    setp.eq.s64 %p13, %rd62, 0;
505; CHECK-NEXT:    cvt.u32.u64 %r9, %rd56;
506; CHECK-NEXT:    sub.s32 %r10, 127, %r9;
507; CHECK-NEXT:    shl.b64 %rd63, %rd42, %r10;
508; CHECK-NEXT:    sub.s32 %r11, 64, %r10;
509; CHECK-NEXT:    shr.u64 %rd64, %rd41, %r11;
510; CHECK-NEXT:    or.b64 %rd65, %rd63, %rd64;
511; CHECK-NEXT:    sub.s32 %r12, 63, %r9;
512; CHECK-NEXT:    shl.b64 %rd66, %rd41, %r12;
513; CHECK-NEXT:    setp.gt.s32 %p14, %r10, 63;
514; CHECK-NEXT:    selp.b64 %rd104, %rd66, %rd65, %p14;
515; CHECK-NEXT:    shl.b64 %rd103, %rd41, %r10;
516; CHECK-NEXT:    mov.u64 %rd94, %rd97;
517; CHECK-NEXT:    @%p13 bra $L__BB5_4;
518; CHECK-NEXT:  // %bb.1: // %udiv-preheader
519; CHECK-NEXT:    cvt.u32.u64 %r13, %rd99;
520; CHECK-NEXT:    shr.u64 %rd69, %rd41, %r13;
521; CHECK-NEXT:    sub.s32 %r14, 64, %r13;
522; CHECK-NEXT:    shl.b64 %rd70, %rd42, %r14;
523; CHECK-NEXT:    or.b64 %rd71, %rd69, %rd70;
524; CHECK-NEXT:    add.s32 %r15, %r13, -64;
525; CHECK-NEXT:    shr.u64 %rd72, %rd42, %r15;
526; CHECK-NEXT:    setp.gt.s32 %p15, %r13, 63;
527; CHECK-NEXT:    selp.b64 %rd101, %rd72, %rd71, %p15;
528; CHECK-NEXT:    shr.u64 %rd102, %rd42, %r13;
529; CHECK-NEXT:    add.cc.s64 %rd33, %rd43, -1;
530; CHECK-NEXT:    addc.cc.s64 %rd34, %rd44, -1;
531; CHECK-NEXT:    mov.b64 %rd94, 0;
532; CHECK-NEXT:    mov.u64 %rd97, %rd94;
533; CHECK-NEXT:  $L__BB5_2: // %udiv-do-while
534; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
535; CHECK-NEXT:    shr.u64 %rd73, %rd101, 63;
536; CHECK-NEXT:    shl.b64 %rd74, %rd102, 1;
537; CHECK-NEXT:    or.b64 %rd75, %rd74, %rd73;
538; CHECK-NEXT:    shl.b64 %rd76, %rd101, 1;
539; CHECK-NEXT:    shr.u64 %rd77, %rd104, 63;
540; CHECK-NEXT:    or.b64 %rd78, %rd76, %rd77;
541; CHECK-NEXT:    shr.u64 %rd79, %rd103, 63;
542; CHECK-NEXT:    shl.b64 %rd80, %rd104, 1;
543; CHECK-NEXT:    or.b64 %rd81, %rd80, %rd79;
544; CHECK-NEXT:    shl.b64 %rd82, %rd103, 1;
545; CHECK-NEXT:    or.b64 %rd103, %rd97, %rd82;
546; CHECK-NEXT:    or.b64 %rd104, %rd94, %rd81;
547; CHECK-NEXT:    sub.cc.s64 %rd83, %rd33, %rd78;
548; CHECK-NEXT:    subc.cc.s64 %rd84, %rd34, %rd75;
549; CHECK-NEXT:    shr.s64 %rd85, %rd84, 63;
550; CHECK-NEXT:    and.b64 %rd97, %rd85, 1;
551; CHECK-NEXT:    and.b64 %rd86, %rd85, %rd43;
552; CHECK-NEXT:    and.b64 %rd87, %rd85, %rd44;
553; CHECK-NEXT:    sub.cc.s64 %rd101, %rd78, %rd86;
554; CHECK-NEXT:    subc.cc.s64 %rd102, %rd75, %rd87;
555; CHECK-NEXT:    add.cc.s64 %rd99, %rd99, -1;
556; CHECK-NEXT:    addc.cc.s64 %rd100, %rd100, -1;
557; CHECK-NEXT:    or.b64 %rd88, %rd99, %rd100;
558; CHECK-NEXT:    setp.eq.s64 %p16, %rd88, 0;
559; CHECK-NEXT:    @%p16 bra $L__BB5_4;
560; CHECK-NEXT:    bra.uni $L__BB5_2;
561; CHECK-NEXT:  $L__BB5_4: // %udiv-loop-exit
562; CHECK-NEXT:    shr.u64 %rd89, %rd103, 63;
563; CHECK-NEXT:    shl.b64 %rd90, %rd104, 1;
564; CHECK-NEXT:    or.b64 %rd91, %rd90, %rd89;
565; CHECK-NEXT:    shl.b64 %rd92, %rd103, 1;
566; CHECK-NEXT:    or.b64 %rd105, %rd97, %rd92;
567; CHECK-NEXT:    or.b64 %rd106, %rd94, %rd91;
568; CHECK-NEXT:  $L__BB5_5: // %udiv-end
569; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd105, %rd106};
570; CHECK-NEXT:    ret;
571  %div = udiv i128 %lhs, %rhs
572  ret i128 %div
573}
574
575define i128 @sdiv_i128_pow2k(i128 %lhs) {
576; CHECK-LABEL: sdiv_i128_pow2k(
577; CHECK:       {
578; CHECK-NEXT:    .reg .b64 %rd<11>;
579; CHECK-EMPTY:
580; CHECK-NEXT:  // %bb.0:
581; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0];
582; CHECK-NEXT:    shr.s64 %rd3, %rd2, 63;
583; CHECK-NEXT:    shr.u64 %rd4, %rd3, 31;
584; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd4;
585; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, 0;
586; CHECK-NEXT:    shl.b64 %rd7, %rd6, 31;
587; CHECK-NEXT:    shr.u64 %rd8, %rd5, 33;
588; CHECK-NEXT:    or.b64 %rd9, %rd8, %rd7;
589; CHECK-NEXT:    shr.s64 %rd10, %rd6, 33;
590; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd9, %rd10};
591; CHECK-NEXT:    ret;
592  %div = sdiv i128 %lhs, 8589934592
593  ret i128 %div
594}
595
596define i128 @udiv_i128_pow2k(i128 %lhs) {
597; CHECK-LABEL: udiv_i128_pow2k(
598; CHECK:       {
599; CHECK-NEXT:    .reg .b64 %rd<7>;
600; CHECK-EMPTY:
601; CHECK-NEXT:  // %bb.0:
602; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0];
603; CHECK-NEXT:    shl.b64 %rd3, %rd2, 31;
604; CHECK-NEXT:    shr.u64 %rd4, %rd1, 33;
605; CHECK-NEXT:    or.b64 %rd5, %rd4, %rd3;
606; CHECK-NEXT:    shr.u64 %rd6, %rd2, 33;
607; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd5, %rd6};
608; CHECK-NEXT:    ret;
609  %div = udiv i128 %lhs, 8589934592
610  ret i128 %div
611}
612
613define i128 @add_i128(i128 %lhs, i128 %rhs) {
614; CHECK-LABEL: add_i128(
615; CHECK:       {
616; CHECK-NEXT:    .reg .b64 %rd<7>;
617; CHECK-EMPTY:
618; CHECK-NEXT:  // %bb.0:
619; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [add_i128_param_0];
620; CHECK-NEXT:    ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1];
621; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd3;
622; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, %rd4;
623; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd5, %rd6};
624; CHECK-NEXT:    ret;
625  %result = add i128 %lhs, %rhs
626  ret i128 %result
627}
628