1a288d8daSJoseph Huber; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2a288d8daSJoseph Huber; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s 3a288d8daSJoseph Huber; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %} 4a288d8daSJoseph Huber 5a288d8daSJoseph Huberdefine i128 @srem_i128(i128 %lhs, i128 %rhs) { 6a288d8daSJoseph Huber; CHECK-LABEL: srem_i128( 7a288d8daSJoseph Huber; CHECK: { 8a288d8daSJoseph Huber; CHECK-NEXT: .reg .pred %p<19>; 9273a94b3SAlex MacLean; CHECK-NEXT: .reg .b32 %r<16>; 10*0068078dSpeterbell10; CHECK-NEXT: .reg .b64 %rd<127>; 11a288d8daSJoseph Huber; CHECK-EMPTY: 12a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 13a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0]; 14a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1]; 15a288d8daSJoseph Huber; CHECK-NEXT: shr.s64 %rd2, %rd46, 63; 16*0068078dSpeterbell10; CHECK-NEXT: mov.b64 %rd117, 0; 17*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd52, %rd117, %rd45; 18*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd53, %rd117, %rd46; 19a288d8daSJoseph Huber; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; 20a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd4, %rd53, %rd46, %p1; 21a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd3, %rd52, %rd45, %p1; 22*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd54, %rd117, %rd49; 23*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd55, %rd117, %rd50; 24a288d8daSJoseph Huber; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; 25a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd6, %rd55, %rd50, %p2; 26a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd5, %rd54, %rd49, %p2; 27a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd56, %rd5, %rd6; 28a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0; 29a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; 30a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0; 31a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p5, %p3, %p4; 32a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0; 33a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r1, %rd6; 34a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd58, %r1; 35a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r2, %rd5; 36a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd59, %r2; 37a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd60, %rd59, 64; 38a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6; 39a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0; 40a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r3, %rd4; 41a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd62, %r3; 42a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r4, %rd3; 43a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd63, %r4; 44a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd64, %rd63, 64; 45a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7; 462b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd66, %rd61, %rd65; 47*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0; 482b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p8, %rd67, 0; 492b78303eSBjörn Pettersson; CHECK-NEXT: setp.ne.s64 %p9, %rd67, 0; 50a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; 512b78303eSBjörn Pettersson; CHECK-NEXT: setp.gt.u64 %p10, %rd66, 127; 52a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; 53a288d8daSJoseph Huber; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; 54a288d8daSJoseph Huber; CHECK-NEXT: and.b32 %r8, %r7, 1; 55a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; 56a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p12, %p5, %p11; 572b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd68, %rd66, 127; 582b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67; 592b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p13, %rd69, 0; 60*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p12; 61*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p12; 62a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p14, %p12, %p13; 63a288d8daSJoseph Huber; CHECK-NEXT: @%p14 bra $L__BB0_5; 64a288d8daSJoseph Huber; CHECK-NEXT: // %bb.3: // %udiv-bb1 65*0068078dSpeterbell10; CHECK-NEXT: add.cc.s64 %rd119, %rd66, 1; 66*0068078dSpeterbell10; CHECK-NEXT: addc.cc.s64 %rd120, %rd67, 0; 67*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd72, %rd119, %rd120; 682b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p15, %rd72, 0; 692b78303eSBjörn Pettersson; CHECK-NEXT: cvt.u32.u64 %r9, %rd66; 70273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r10, 127, %r9; 71273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd73, %rd4, %r10; 72273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r11, 64, %r10; 73273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd74, %rd3, %r11; 742b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74; 75273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r12, 63, %r9; 76273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd76, %rd3, %r12; 77273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; 78*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16; 79*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10; 80*0068078dSpeterbell10; CHECK-NEXT: mov.u64 %rd114, %rd117; 81a288d8daSJoseph Huber; CHECK-NEXT: @%p15 bra $L__BB0_4; 82a288d8daSJoseph Huber; CHECK-NEXT: // %bb.1: // %udiv-preheader 83*0068078dSpeterbell10; CHECK-NEXT: cvt.u32.u64 %r13, %rd119; 84273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd79, %rd3, %r13; 85273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r14, 64, %r13; 86273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd80, %rd4, %r14; 872b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80; 88273a94b3SAlex MacLean; CHECK-NEXT: add.s32 %r15, %r13, -64; 89273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd82, %rd4, %r15; 90273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; 91*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd121, %rd82, %rd81, %p17; 92*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd122, %rd4, %r13; 93a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1; 94a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1; 95*0068078dSpeterbell10; CHECK-NEXT: mov.b64 %rd114, 0; 96*0068078dSpeterbell10; CHECK-NEXT: mov.u64 %rd117, %rd114; 97a288d8daSJoseph Huber; CHECK-NEXT: $L__BB0_2: // %udiv-do-while 98a288d8daSJoseph Huber; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 99*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd83, %rd121, 63; 100*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd84, %rd122, 1; 1012b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd85, %rd84, %rd83; 102*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd86, %rd121, 1; 103*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd87, %rd124, 63; 1042b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd88, %rd86, %rd87; 105*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd89, %rd123, 63; 106*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd90, %rd124, 1; 1072b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 108*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd92, %rd123, 1; 109*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd123, %rd117, %rd92; 110*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd124, %rd114, %rd91; 1112b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd93, %rd35, %rd88; 1122b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd94, %rd36, %rd85; 1132b78303eSBjörn Pettersson; CHECK-NEXT: shr.s64 %rd95, %rd94, 63; 114*0068078dSpeterbell10; CHECK-NEXT: and.b64 %rd117, %rd95, 1; 1152b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd96, %rd95, %rd5; 1162b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd97, %rd95, %rd6; 117*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd121, %rd88, %rd96; 118*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd122, %rd85, %rd97; 119*0068078dSpeterbell10; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1; 120*0068078dSpeterbell10; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1; 121*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd98, %rd119, %rd120; 1222b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p18, %rd98, 0; 123a288d8daSJoseph Huber; CHECK-NEXT: @%p18 bra $L__BB0_4; 124a288d8daSJoseph Huber; CHECK-NEXT: bra.uni $L__BB0_2; 125a288d8daSJoseph Huber; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit 126*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd99, %rd123, 63; 127*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd100, %rd124, 1; 1282b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd101, %rd100, %rd99; 129*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd102, %rd123, 1; 130*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd125, %rd117, %rd102; 131*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd126, %rd114, %rd101; 132a288d8daSJoseph Huber; CHECK-NEXT: $L__BB0_5: // %udiv-end 133*0068078dSpeterbell10; CHECK-NEXT: mul.hi.u64 %rd103, %rd5, %rd125; 134*0068078dSpeterbell10; CHECK-NEXT: mad.lo.s64 %rd104, %rd5, %rd126, %rd103; 135*0068078dSpeterbell10; CHECK-NEXT: mad.lo.s64 %rd105, %rd6, %rd125, %rd104; 136*0068078dSpeterbell10; CHECK-NEXT: mul.lo.s64 %rd106, %rd5, %rd125; 137*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd107, %rd3, %rd106; 138*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd108, %rd4, %rd105; 139*0068078dSpeterbell10; CHECK-NEXT: xor.b64 %rd109, %rd107, %rd2; 140*0068078dSpeterbell10; CHECK-NEXT: xor.b64 %rd110, %rd108, %rd2; 141*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd111, %rd109, %rd2; 142*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd112, %rd110, %rd2; 143*0068078dSpeterbell10; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd111, %rd112}; 144a288d8daSJoseph Huber; CHECK-NEXT: ret; 145a288d8daSJoseph Huber %div = srem i128 %lhs, %rhs 146a288d8daSJoseph Huber ret i128 %div 147a288d8daSJoseph Huber} 148a288d8daSJoseph Huber 149a288d8daSJoseph Huberdefine i128 @urem_i128(i128 %lhs, i128 %rhs) { 150a288d8daSJoseph Huber; CHECK-LABEL: urem_i128( 151a288d8daSJoseph Huber; CHECK: { 152a288d8daSJoseph Huber; CHECK-NEXT: .reg .pred %p<17>; 153273a94b3SAlex MacLean; CHECK-NEXT: .reg .b32 %r<16>; 154*0068078dSpeterbell10; CHECK-NEXT: .reg .b64 %rd<113>; 155a288d8daSJoseph Huber; CHECK-EMPTY: 156a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 157a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [urem_i128_param_0]; 158a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [urem_i128_param_1]; 159a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4; 160a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; 161a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; 162a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; 163a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p3, %p1, %p2; 164a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0; 165a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r1, %rd4; 166a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; 167a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r2, %rd3; 168a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; 169a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd49, %rd48, 64; 170a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; 171a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; 172a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r3, %rd42; 173a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; 174a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r4, %rd41; 175a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; 176a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd53, %rd52, 64; 177a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; 178*0068078dSpeterbell10; CHECK-NEXT: mov.b64 %rd103, 0; 1792b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; 180*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd57, %rd103, 0; 1812b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; 1822b78303eSBjörn Pettersson; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; 183a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; 1842b78303eSBjörn Pettersson; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; 185a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; 186a288d8daSJoseph Huber; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; 187a288d8daSJoseph Huber; CHECK-NEXT: and.b32 %r8, %r7, 1; 188a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; 189a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p10, %p3, %p9; 1902b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; 1912b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; 1922b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; 193*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p10; 194*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p10; 195a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p12, %p10, %p11; 196a288d8daSJoseph Huber; CHECK-NEXT: @%p12 bra $L__BB1_5; 197a288d8daSJoseph Huber; CHECK-NEXT: // %bb.3: // %udiv-bb1 198*0068078dSpeterbell10; CHECK-NEXT: add.cc.s64 %rd105, %rd56, 1; 199*0068078dSpeterbell10; CHECK-NEXT: addc.cc.s64 %rd106, %rd57, 0; 200*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd62, %rd105, %rd106; 2012b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; 2022b78303eSBjörn Pettersson; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; 203273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r10, 127, %r9; 204273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; 205273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r11, 64, %r10; 206273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; 2072b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; 208273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r12, 63, %r9; 209273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; 210273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; 211*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14; 212*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10; 213*0068078dSpeterbell10; CHECK-NEXT: mov.u64 %rd100, %rd103; 214a288d8daSJoseph Huber; CHECK-NEXT: @%p13 bra $L__BB1_4; 215a288d8daSJoseph Huber; CHECK-NEXT: // %bb.1: // %udiv-preheader 216*0068078dSpeterbell10; CHECK-NEXT: cvt.u32.u64 %r13, %rd105; 217273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; 218273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r14, 64, %r13; 219273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; 2202b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; 221273a94b3SAlex MacLean; CHECK-NEXT: add.s32 %r15, %r13, -64; 222273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; 223273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; 224*0068078dSpeterbell10; CHECK-NEXT: selp.b64 %rd107, %rd72, %rd71, %p15; 225*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd108, %rd42, %r13; 226a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1; 227a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1; 228*0068078dSpeterbell10; CHECK-NEXT: mov.b64 %rd100, 0; 229*0068078dSpeterbell10; CHECK-NEXT: mov.u64 %rd103, %rd100; 230a288d8daSJoseph Huber; CHECK-NEXT: $L__BB1_2: // %udiv-do-while 231a288d8daSJoseph Huber; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 232*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd73, %rd107, 63; 233*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd74, %rd108, 1; 2342b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; 235*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd76, %rd107, 1; 236*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd77, %rd110, 63; 2372b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; 238*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd79, %rd109, 63; 239*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd80, %rd110, 1; 2402b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; 241*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd82, %rd109, 1; 242*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd109, %rd103, %rd82; 243*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd110, %rd100, %rd81; 2442b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; 2452b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; 2462b78303eSBjörn Pettersson; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; 247*0068078dSpeterbell10; CHECK-NEXT: and.b64 %rd103, %rd85, 1; 2482b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd86, %rd85, %rd3; 2492b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd87, %rd85, %rd4; 250*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd107, %rd78, %rd86; 251*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd108, %rd75, %rd87; 252*0068078dSpeterbell10; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1; 253*0068078dSpeterbell10; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1; 254*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd88, %rd105, %rd106; 2552b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; 256a288d8daSJoseph Huber; CHECK-NEXT: @%p16 bra $L__BB1_4; 257a288d8daSJoseph Huber; CHECK-NEXT: bra.uni $L__BB1_2; 258a288d8daSJoseph Huber; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit 259*0068078dSpeterbell10; CHECK-NEXT: shr.u64 %rd89, %rd109, 63; 260*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd90, %rd110, 1; 2612b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 262*0068078dSpeterbell10; CHECK-NEXT: shl.b64 %rd92, %rd109, 1; 263*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd111, %rd103, %rd92; 264*0068078dSpeterbell10; CHECK-NEXT: or.b64 %rd112, %rd100, %rd91; 265a288d8daSJoseph Huber; CHECK-NEXT: $L__BB1_5: // %udiv-end 266*0068078dSpeterbell10; CHECK-NEXT: mul.hi.u64 %rd93, %rd3, %rd111; 267*0068078dSpeterbell10; CHECK-NEXT: mad.lo.s64 %rd94, %rd3, %rd112, %rd93; 268*0068078dSpeterbell10; CHECK-NEXT: mad.lo.s64 %rd95, %rd4, %rd111, %rd94; 269*0068078dSpeterbell10; CHECK-NEXT: mul.lo.s64 %rd96, %rd3, %rd111; 270*0068078dSpeterbell10; CHECK-NEXT: sub.cc.s64 %rd97, %rd41, %rd96; 271*0068078dSpeterbell10; CHECK-NEXT: subc.cc.s64 %rd98, %rd42, %rd95; 272*0068078dSpeterbell10; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd97, %rd98}; 273a288d8daSJoseph Huber; CHECK-NEXT: ret; 274a288d8daSJoseph Huber %div = urem i128 %lhs, %rhs 275a288d8daSJoseph Huber ret i128 %div 276a288d8daSJoseph Huber} 277a288d8daSJoseph Huber 278a288d8daSJoseph Huberdefine i128 @srem_i128_pow2k(i128 %lhs) { 279a288d8daSJoseph Huber; CHECK-LABEL: srem_i128_pow2k( 280a288d8daSJoseph Huber; CHECK: { 281a288d8daSJoseph Huber; CHECK-NEXT: .reg .b64 %rd<10>; 282a288d8daSJoseph Huber; CHECK-EMPTY: 283a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: 284a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [srem_i128_pow2k_param_0]; 285a288d8daSJoseph Huber; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; 286a288d8daSJoseph Huber; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; 287a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; 288a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; 289a288d8daSJoseph Huber; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592; 290a288d8daSJoseph Huber; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7; 291a288d8daSJoseph Huber; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6; 2920f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9}; 293a288d8daSJoseph Huber; CHECK-NEXT: ret; 294a288d8daSJoseph Huber %div = srem i128 %lhs, 8589934592 295a288d8daSJoseph Huber ret i128 %div 296a288d8daSJoseph Huber} 297a288d8daSJoseph Huber 298a288d8daSJoseph Huberdefine i128 @urem_i128_pow2k(i128 %lhs) { 299a288d8daSJoseph Huber; CHECK-LABEL: urem_i128_pow2k( 300a288d8daSJoseph Huber; CHECK: { 301a288d8daSJoseph Huber; CHECK-NEXT: .reg .b64 %rd<5>; 302a288d8daSJoseph Huber; CHECK-EMPTY: 303a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: 304a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; 305a288d8daSJoseph Huber; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; 306310e7987SAlex MacLean; CHECK-NEXT: mov.b64 %rd4, 0; 3070f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; 308a288d8daSJoseph Huber; CHECK-NEXT: ret; 309a288d8daSJoseph Huber %div = urem i128 %lhs, 8589934592 310a288d8daSJoseph Huber ret i128 %div 311a288d8daSJoseph Huber} 312a288d8daSJoseph Huber 313a288d8daSJoseph Huberdefine i128 @sdiv_i128(i128 %lhs, i128 %rhs) { 314a288d8daSJoseph Huber; CHECK-LABEL: sdiv_i128( 315a288d8daSJoseph Huber; CHECK: { 316a288d8daSJoseph Huber; CHECK-NEXT: .reg .pred %p<19>; 317273a94b3SAlex MacLean; CHECK-NEXT: .reg .b32 %r<16>; 3182b78303eSBjörn Pettersson; CHECK-NEXT: .reg .b64 %rd<122>; 319a288d8daSJoseph Huber; CHECK-EMPTY: 320a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 321a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0]; 322a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1]; 323310e7987SAlex MacLean; CHECK-NEXT: mov.b64 %rd112, 0; 3242b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd52, %rd112, %rd45; 3252b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd53, %rd112, %rd46; 326a288d8daSJoseph Huber; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; 327a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd2, %rd53, %rd46, %p1; 328a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd1, %rd52, %rd45, %p1; 3292b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd54, %rd112, %rd49; 3302b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd55, %rd112, %rd50; 331a288d8daSJoseph Huber; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; 332a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd4, %rd55, %rd50, %p2; 333a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd3, %rd54, %rd49, %p2; 334a288d8daSJoseph Huber; CHECK-NEXT: xor.b64 %rd56, %rd50, %rd46; 335a288d8daSJoseph Huber; CHECK-NEXT: shr.s64 %rd5, %rd56, 63; 336a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; 337a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p3, %rd57, 0; 338a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd58, %rd1, %rd2; 339a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p4, %rd58, 0; 340a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p5, %p3, %p4; 341a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0; 342a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r1, %rd4; 343a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd59, %r1; 344a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r2, %rd3; 345a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd60, %r2; 346a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd61, %rd60, 64; 347a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd62, %rd59, %rd61, %p6; 348a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0; 349a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r3, %rd2; 350a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd63, %r3; 351a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r4, %rd1; 352a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd64, %r4; 353a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd65, %rd64, 64; 354a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd66, %rd63, %rd65, %p7; 3552b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd67, %rd62, %rd66; 3562b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0; 3572b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p8, %rd68, 0; 3582b78303eSBjörn Pettersson; CHECK-NEXT: setp.ne.s64 %p9, %rd68, 0; 359a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; 3602b78303eSBjörn Pettersson; CHECK-NEXT: setp.gt.u64 %p10, %rd67, 127; 361a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; 362a288d8daSJoseph Huber; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; 363a288d8daSJoseph Huber; CHECK-NEXT: and.b32 %r8, %r7, 1; 364a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; 365a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p12, %p5, %p11; 3662b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd69, %rd67, 127; 3672b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd70, %rd69, %rd68; 3682b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p13, %rd70, 0; 3692b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd121, 0, %rd2, %p12; 3702b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd120, 0, %rd1, %p12; 371a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p14, %p12, %p13; 372a288d8daSJoseph Huber; CHECK-NEXT: @%p14 bra $L__BB4_5; 373a288d8daSJoseph Huber; CHECK-NEXT: // %bb.3: // %udiv-bb1 3742b78303eSBjörn Pettersson; CHECK-NEXT: add.cc.s64 %rd114, %rd67, 1; 3752b78303eSBjörn Pettersson; CHECK-NEXT: addc.cc.s64 %rd115, %rd68, 0; 3762b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115; 3772b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p15, %rd73, 0; 3782b78303eSBjörn Pettersson; CHECK-NEXT: cvt.u32.u64 %r9, %rd67; 379273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r10, 127, %r9; 380273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd74, %rd2, %r10; 381273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r11, 64, %r10; 382273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd75, %rd1, %r11; 3832b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; 384273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r12, 63, %r9; 385273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd77, %rd1, %r12; 386273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; 3872b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16; 388273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10; 3892b78303eSBjörn Pettersson; CHECK-NEXT: mov.u64 %rd109, %rd112; 390a288d8daSJoseph Huber; CHECK-NEXT: @%p15 bra $L__BB4_4; 391a288d8daSJoseph Huber; CHECK-NEXT: // %bb.1: // %udiv-preheader 392273a94b3SAlex MacLean; CHECK-NEXT: cvt.u32.u64 %r13, %rd114; 393273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd80, %rd1, %r13; 394273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r14, 64, %r13; 395273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd81, %rd2, %r14; 3962b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd82, %rd80, %rd81; 397273a94b3SAlex MacLean; CHECK-NEXT: add.s32 %r15, %r13, -64; 398273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd83, %rd2, %r15; 399273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; 4002b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd116, %rd83, %rd82, %p17; 401273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd117, %rd2, %r13; 402a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1; 403a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1; 404310e7987SAlex MacLean; CHECK-NEXT: mov.b64 %rd109, 0; 4052b78303eSBjörn Pettersson; CHECK-NEXT: mov.u64 %rd112, %rd109; 406a288d8daSJoseph Huber; CHECK-NEXT: $L__BB4_2: // %udiv-do-while 407a288d8daSJoseph Huber; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 4082b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd84, %rd116, 63; 4092b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd85, %rd117, 1; 4102b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd86, %rd85, %rd84; 4112b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd87, %rd116, 1; 4122b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd88, %rd119, 63; 4132b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd89, %rd87, %rd88; 4142b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd90, %rd118, 63; 4152b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd91, %rd119, 1; 4162b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd92, %rd91, %rd90; 4172b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd93, %rd118, 1; 4182b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd118, %rd112, %rd93; 4192b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd119, %rd109, %rd92; 4202b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd94, %rd35, %rd89; 4212b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd95, %rd36, %rd86; 4222b78303eSBjörn Pettersson; CHECK-NEXT: shr.s64 %rd96, %rd95, 63; 4232b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd112, %rd96, 1; 4242b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd97, %rd96, %rd3; 4252b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd98, %rd96, %rd4; 4262b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd116, %rd89, %rd97; 4272b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd117, %rd86, %rd98; 4282b78303eSBjörn Pettersson; CHECK-NEXT: add.cc.s64 %rd114, %rd114, -1; 4292b78303eSBjörn Pettersson; CHECK-NEXT: addc.cc.s64 %rd115, %rd115, -1; 4302b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd99, %rd114, %rd115; 4312b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p18, %rd99, 0; 432a288d8daSJoseph Huber; CHECK-NEXT: @%p18 bra $L__BB4_4; 433a288d8daSJoseph Huber; CHECK-NEXT: bra.uni $L__BB4_2; 434a288d8daSJoseph Huber; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit 4352b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd100, %rd118, 63; 4362b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd101, %rd119, 1; 4372b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd102, %rd101, %rd100; 4382b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd103, %rd118, 1; 4392b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd120, %rd112, %rd103; 4402b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd121, %rd109, %rd102; 441a288d8daSJoseph Huber; CHECK-NEXT: $L__BB4_5: // %udiv-end 4422b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd104, %rd120, %rd5; 4432b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd105, %rd121, %rd5; 4442b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd106, %rd104, %rd5; 4452b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd107, %rd105, %rd5; 4460f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd106, %rd107}; 447a288d8daSJoseph Huber; CHECK-NEXT: ret; 448a288d8daSJoseph Huber %div = sdiv i128 %lhs, %rhs 449a288d8daSJoseph Huber ret i128 %div 450a288d8daSJoseph Huber} 451a288d8daSJoseph Huber 452a288d8daSJoseph Huberdefine i128 @udiv_i128(i128 %lhs, i128 %rhs) { 453a288d8daSJoseph Huber; CHECK-LABEL: udiv_i128( 454a288d8daSJoseph Huber; CHECK: { 455a288d8daSJoseph Huber; CHECK-NEXT: .reg .pred %p<17>; 456273a94b3SAlex MacLean; CHECK-NEXT: .reg .b32 %r<16>; 4572b78303eSBjörn Pettersson; CHECK-NEXT: .reg .b64 %rd<107>; 458a288d8daSJoseph Huber; CHECK-EMPTY: 459a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 460a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [udiv_i128_param_0]; 461a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd43, %rd44}, [udiv_i128_param_1]; 462a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44; 463a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; 464a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; 465a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; 466a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p3, %p1, %p2; 467a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0; 468a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r1, %rd44; 469a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; 470a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r2, %rd43; 471a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; 472a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd49, %rd48, 64; 473a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; 474a288d8daSJoseph Huber; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; 475a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r3, %rd42; 476a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; 477a288d8daSJoseph Huber; CHECK-NEXT: clz.b64 %r4, %rd41; 478a288d8daSJoseph Huber; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; 479a288d8daSJoseph Huber; CHECK-NEXT: add.s64 %rd53, %rd52, 64; 480a288d8daSJoseph Huber; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; 481310e7987SAlex MacLean; CHECK-NEXT: mov.b64 %rd97, 0; 4822b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; 4832b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd57, %rd97, 0; 4842b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; 4852b78303eSBjörn Pettersson; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; 486a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; 4872b78303eSBjörn Pettersson; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; 488a288d8daSJoseph Huber; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; 489a288d8daSJoseph Huber; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; 490a288d8daSJoseph Huber; CHECK-NEXT: and.b32 %r8, %r7, 1; 491a288d8daSJoseph Huber; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; 492a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p10, %p3, %p9; 4932b78303eSBjörn Pettersson; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; 4942b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; 4952b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; 4962b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd106, 0, %rd42, %p10; 4972b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd105, 0, %rd41, %p10; 498a288d8daSJoseph Huber; CHECK-NEXT: or.pred %p12, %p10, %p11; 499a288d8daSJoseph Huber; CHECK-NEXT: @%p12 bra $L__BB5_5; 500a288d8daSJoseph Huber; CHECK-NEXT: // %bb.3: // %udiv-bb1 5012b78303eSBjörn Pettersson; CHECK-NEXT: add.cc.s64 %rd99, %rd56, 1; 5022b78303eSBjörn Pettersson; CHECK-NEXT: addc.cc.s64 %rd100, %rd57, 0; 5032b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100; 5042b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; 5052b78303eSBjörn Pettersson; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; 506273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r10, 127, %r9; 507273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; 508273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r11, 64, %r10; 509273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; 5102b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; 511273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r12, 63, %r9; 512273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; 513273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; 5142b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14; 515273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10; 5162b78303eSBjörn Pettersson; CHECK-NEXT: mov.u64 %rd94, %rd97; 517a288d8daSJoseph Huber; CHECK-NEXT: @%p13 bra $L__BB5_4; 518a288d8daSJoseph Huber; CHECK-NEXT: // %bb.1: // %udiv-preheader 519273a94b3SAlex MacLean; CHECK-NEXT: cvt.u32.u64 %r13, %rd99; 520273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; 521273a94b3SAlex MacLean; CHECK-NEXT: sub.s32 %r14, 64, %r13; 522273a94b3SAlex MacLean; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; 5232b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; 524273a94b3SAlex MacLean; CHECK-NEXT: add.s32 %r15, %r13, -64; 525273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; 526273a94b3SAlex MacLean; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; 5272b78303eSBjörn Pettersson; CHECK-NEXT: selp.b64 %rd101, %rd72, %rd71, %p15; 528273a94b3SAlex MacLean; CHECK-NEXT: shr.u64 %rd102, %rd42, %r13; 529a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1; 530a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1; 531310e7987SAlex MacLean; CHECK-NEXT: mov.b64 %rd94, 0; 5322b78303eSBjörn Pettersson; CHECK-NEXT: mov.u64 %rd97, %rd94; 533a288d8daSJoseph Huber; CHECK-NEXT: $L__BB5_2: // %udiv-do-while 534a288d8daSJoseph Huber; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 5352b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd73, %rd101, 63; 5362b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd74, %rd102, 1; 5372b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; 5382b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd76, %rd101, 1; 5392b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd77, %rd104, 63; 5402b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; 5412b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd79, %rd103, 63; 5422b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd80, %rd104, 1; 5432b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; 5442b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd82, %rd103, 1; 5452b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd103, %rd97, %rd82; 5462b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd104, %rd94, %rd81; 5472b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; 5482b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; 5492b78303eSBjörn Pettersson; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; 5502b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd97, %rd85, 1; 5512b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd86, %rd85, %rd43; 5522b78303eSBjörn Pettersson; CHECK-NEXT: and.b64 %rd87, %rd85, %rd44; 5532b78303eSBjörn Pettersson; CHECK-NEXT: sub.cc.s64 %rd101, %rd78, %rd86; 5542b78303eSBjörn Pettersson; CHECK-NEXT: subc.cc.s64 %rd102, %rd75, %rd87; 5552b78303eSBjörn Pettersson; CHECK-NEXT: add.cc.s64 %rd99, %rd99, -1; 5562b78303eSBjörn Pettersson; CHECK-NEXT: addc.cc.s64 %rd100, %rd100, -1; 5572b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd88, %rd99, %rd100; 5582b78303eSBjörn Pettersson; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; 559a288d8daSJoseph Huber; CHECK-NEXT: @%p16 bra $L__BB5_4; 560a288d8daSJoseph Huber; CHECK-NEXT: bra.uni $L__BB5_2; 561a288d8daSJoseph Huber; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit 5622b78303eSBjörn Pettersson; CHECK-NEXT: shr.u64 %rd89, %rd103, 63; 5632b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd90, %rd104, 1; 5642b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 5652b78303eSBjörn Pettersson; CHECK-NEXT: shl.b64 %rd92, %rd103, 1; 5662b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd105, %rd97, %rd92; 5672b78303eSBjörn Pettersson; CHECK-NEXT: or.b64 %rd106, %rd94, %rd91; 568a288d8daSJoseph Huber; CHECK-NEXT: $L__BB5_5: // %udiv-end 5690f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; 570a288d8daSJoseph Huber; CHECK-NEXT: ret; 571a288d8daSJoseph Huber %div = udiv i128 %lhs, %rhs 572a288d8daSJoseph Huber ret i128 %div 573a288d8daSJoseph Huber} 574a288d8daSJoseph Huber 575a288d8daSJoseph Huberdefine i128 @sdiv_i128_pow2k(i128 %lhs) { 576a288d8daSJoseph Huber; CHECK-LABEL: sdiv_i128_pow2k( 577a288d8daSJoseph Huber; CHECK: { 578a288d8daSJoseph Huber; CHECK-NEXT: .reg .b64 %rd<11>; 579a288d8daSJoseph Huber; CHECK-EMPTY: 580a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: 581a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0]; 582a288d8daSJoseph Huber; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; 583a288d8daSJoseph Huber; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; 584a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; 585a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; 586a288d8daSJoseph Huber; CHECK-NEXT: shl.b64 %rd7, %rd6, 31; 587a288d8daSJoseph Huber; CHECK-NEXT: shr.u64 %rd8, %rd5, 33; 588a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7; 589a288d8daSJoseph Huber; CHECK-NEXT: shr.s64 %rd10, %rd6, 33; 5900f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10}; 591a288d8daSJoseph Huber; CHECK-NEXT: ret; 592a288d8daSJoseph Huber %div = sdiv i128 %lhs, 8589934592 593a288d8daSJoseph Huber ret i128 %div 594a288d8daSJoseph Huber} 595a288d8daSJoseph Huber 596a288d8daSJoseph Huberdefine i128 @udiv_i128_pow2k(i128 %lhs) { 597a288d8daSJoseph Huber; CHECK-LABEL: udiv_i128_pow2k( 598a288d8daSJoseph Huber; CHECK: { 599a288d8daSJoseph Huber; CHECK-NEXT: .reg .b64 %rd<7>; 600a288d8daSJoseph Huber; CHECK-EMPTY: 601a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: 602a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0]; 603a288d8daSJoseph Huber; CHECK-NEXT: shl.b64 %rd3, %rd2, 31; 604a288d8daSJoseph Huber; CHECK-NEXT: shr.u64 %rd4, %rd1, 33; 605a288d8daSJoseph Huber; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3; 606a288d8daSJoseph Huber; CHECK-NEXT: shr.u64 %rd6, %rd2, 33; 6070f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; 608a288d8daSJoseph Huber; CHECK-NEXT: ret; 609a288d8daSJoseph Huber %div = udiv i128 %lhs, 8589934592 610a288d8daSJoseph Huber ret i128 %div 611a288d8daSJoseph Huber} 612a288d8daSJoseph Huber 613a288d8daSJoseph Huberdefine i128 @add_i128(i128 %lhs, i128 %rhs) { 614a288d8daSJoseph Huber; CHECK-LABEL: add_i128( 615a288d8daSJoseph Huber; CHECK: { 616a288d8daSJoseph Huber; CHECK-NEXT: .reg .b64 %rd<7>; 617a288d8daSJoseph Huber; CHECK-EMPTY: 618a288d8daSJoseph Huber; CHECK-NEXT: // %bb.0: 619a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [add_i128_param_0]; 620a288d8daSJoseph Huber; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1]; 621a288d8daSJoseph Huber; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3; 622a288d8daSJoseph Huber; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4; 6230f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; 624a288d8daSJoseph Huber; CHECK-NEXT: ret; 625a288d8daSJoseph Huber %result = add i128 %lhs, %rhs 626a288d8daSJoseph Huber ret i128 %result 627a288d8daSJoseph Huber} 628