1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %} 4 5define i128 @srem_i128(i128 %lhs, i128 %rhs) { 6; CHECK-LABEL: srem_i128( 7; CHECK: { 8; CHECK-NEXT: .reg .pred %p<19>; 9; CHECK-NEXT: .reg .b32 %r<16>; 10; CHECK-NEXT: .reg .b64 %rd<127>; 11; CHECK-EMPTY: 12; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 13; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0]; 14; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1]; 15; CHECK-NEXT: shr.s64 %rd2, %rd46, 63; 16; CHECK-NEXT: mov.b64 %rd117, 0; 17; CHECK-NEXT: sub.cc.s64 %rd52, %rd117, %rd45; 18; CHECK-NEXT: subc.cc.s64 %rd53, %rd117, %rd46; 19; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; 20; CHECK-NEXT: selp.b64 %rd4, %rd53, %rd46, %p1; 21; CHECK-NEXT: selp.b64 %rd3, %rd52, %rd45, %p1; 22; CHECK-NEXT: sub.cc.s64 %rd54, %rd117, %rd49; 23; CHECK-NEXT: subc.cc.s64 %rd55, %rd117, %rd50; 24; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; 25; CHECK-NEXT: selp.b64 %rd6, %rd55, %rd50, %p2; 26; CHECK-NEXT: selp.b64 %rd5, %rd54, %rd49, %p2; 27; CHECK-NEXT: or.b64 %rd56, %rd5, %rd6; 28; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0; 29; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; 30; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0; 31; CHECK-NEXT: or.pred %p5, %p3, %p4; 32; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0; 33; CHECK-NEXT: clz.b64 %r1, %rd6; 34; CHECK-NEXT: cvt.u64.u32 %rd58, %r1; 35; CHECK-NEXT: clz.b64 %r2, %rd5; 36; CHECK-NEXT: cvt.u64.u32 %rd59, %r2; 37; CHECK-NEXT: add.s64 %rd60, %rd59, 64; 38; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6; 39; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0; 40; CHECK-NEXT: clz.b64 %r3, %rd4; 41; CHECK-NEXT: cvt.u64.u32 %rd62, %r3; 42; CHECK-NEXT: clz.b64 %r4, %rd3; 43; CHECK-NEXT: cvt.u64.u32 %rd63, %r4; 44; CHECK-NEXT: add.s64 %rd64, %rd63, 64; 45; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7; 46; CHECK-NEXT: sub.cc.s64 %rd66, %rd61, %rd65; 47; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0; 48; CHECK-NEXT: setp.eq.s64 %p8, %rd67, 0; 49; CHECK-NEXT: setp.ne.s64 %p9, %rd67, 0; 50; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; 51; CHECK-NEXT: setp.gt.u64 %p10, %rd66, 127; 52; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; 53; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; 54; CHECK-NEXT: and.b32 %r8, %r7, 1; 55; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; 56; CHECK-NEXT: or.pred %p12, %p5, %p11; 57; CHECK-NEXT: xor.b64 %rd68, %rd66, 127; 58; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67; 59; CHECK-NEXT: setp.eq.s64 %p13, %rd69, 0; 60; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p12; 61; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p12; 62; CHECK-NEXT: or.pred %p14, %p12, %p13; 63; CHECK-NEXT: @%p14 bra $L__BB0_5; 64; CHECK-NEXT: // %bb.3: // %udiv-bb1 65; CHECK-NEXT: add.cc.s64 %rd119, %rd66, 1; 66; CHECK-NEXT: addc.cc.s64 %rd120, %rd67, 0; 67; CHECK-NEXT: or.b64 %rd72, %rd119, %rd120; 68; CHECK-NEXT: setp.eq.s64 %p15, %rd72, 0; 69; CHECK-NEXT: cvt.u32.u64 %r9, %rd66; 70; CHECK-NEXT: sub.s32 %r10, 127, %r9; 71; CHECK-NEXT: shl.b64 %rd73, %rd4, %r10; 72; CHECK-NEXT: sub.s32 %r11, 64, %r10; 73; CHECK-NEXT: shr.u64 %rd74, %rd3, %r11; 74; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74; 75; CHECK-NEXT: sub.s32 %r12, 63, %r9; 76; CHECK-NEXT: shl.b64 %rd76, %rd3, %r12; 77; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; 78; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16; 79; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10; 80; CHECK-NEXT: mov.u64 %rd114, %rd117; 81; CHECK-NEXT: @%p15 bra $L__BB0_4; 82; CHECK-NEXT: // %bb.1: // %udiv-preheader 83; CHECK-NEXT: cvt.u32.u64 %r13, %rd119; 84; CHECK-NEXT: shr.u64 %rd79, %rd3, %r13; 85; CHECK-NEXT: sub.s32 %r14, 64, %r13; 86; CHECK-NEXT: shl.b64 %rd80, %rd4, %r14; 87; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80; 88; CHECK-NEXT: add.s32 %r15, %r13, -64; 89; CHECK-NEXT: shr.u64 %rd82, %rd4, %r15; 90; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; 91; CHECK-NEXT: selp.b64 %rd121, %rd82, %rd81, %p17; 92; CHECK-NEXT: shr.u64 %rd122, %rd4, %r13; 93; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1; 94; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1; 95; CHECK-NEXT: mov.b64 %rd114, 0; 96; CHECK-NEXT: mov.u64 %rd117, %rd114; 97; CHECK-NEXT: $L__BB0_2: // %udiv-do-while 98; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 99; CHECK-NEXT: shr.u64 %rd83, %rd121, 63; 100; CHECK-NEXT: shl.b64 %rd84, %rd122, 1; 101; CHECK-NEXT: or.b64 %rd85, %rd84, %rd83; 102; CHECK-NEXT: shl.b64 %rd86, %rd121, 1; 103; CHECK-NEXT: shr.u64 %rd87, %rd124, 63; 104; CHECK-NEXT: or.b64 %rd88, %rd86, %rd87; 105; CHECK-NEXT: shr.u64 %rd89, %rd123, 63; 106; CHECK-NEXT: shl.b64 %rd90, %rd124, 1; 107; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 108; CHECK-NEXT: shl.b64 %rd92, %rd123, 1; 109; CHECK-NEXT: or.b64 %rd123, %rd117, %rd92; 110; CHECK-NEXT: or.b64 %rd124, %rd114, %rd91; 111; CHECK-NEXT: sub.cc.s64 %rd93, %rd35, %rd88; 112; CHECK-NEXT: subc.cc.s64 %rd94, %rd36, %rd85; 113; CHECK-NEXT: shr.s64 %rd95, %rd94, 63; 114; CHECK-NEXT: and.b64 %rd117, %rd95, 1; 115; CHECK-NEXT: and.b64 %rd96, %rd95, %rd5; 116; CHECK-NEXT: and.b64 %rd97, %rd95, %rd6; 117; CHECK-NEXT: sub.cc.s64 %rd121, %rd88, %rd96; 118; CHECK-NEXT: subc.cc.s64 %rd122, %rd85, %rd97; 119; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1; 120; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1; 121; CHECK-NEXT: or.b64 %rd98, %rd119, %rd120; 122; CHECK-NEXT: setp.eq.s64 %p18, %rd98, 0; 123; CHECK-NEXT: @%p18 bra $L__BB0_4; 124; CHECK-NEXT: bra.uni $L__BB0_2; 125; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit 126; CHECK-NEXT: shr.u64 %rd99, %rd123, 63; 127; CHECK-NEXT: shl.b64 %rd100, %rd124, 1; 128; CHECK-NEXT: or.b64 %rd101, %rd100, %rd99; 129; CHECK-NEXT: shl.b64 %rd102, %rd123, 1; 130; CHECK-NEXT: or.b64 %rd125, %rd117, %rd102; 131; CHECK-NEXT: or.b64 %rd126, %rd114, %rd101; 132; CHECK-NEXT: $L__BB0_5: // %udiv-end 133; CHECK-NEXT: mul.hi.u64 %rd103, %rd5, %rd125; 134; CHECK-NEXT: mad.lo.s64 %rd104, %rd5, %rd126, %rd103; 135; CHECK-NEXT: mad.lo.s64 %rd105, %rd6, %rd125, %rd104; 136; CHECK-NEXT: mul.lo.s64 %rd106, %rd5, %rd125; 137; CHECK-NEXT: sub.cc.s64 %rd107, %rd3, %rd106; 138; CHECK-NEXT: subc.cc.s64 %rd108, %rd4, %rd105; 139; CHECK-NEXT: xor.b64 %rd109, %rd107, %rd2; 140; CHECK-NEXT: xor.b64 %rd110, %rd108, %rd2; 141; CHECK-NEXT: sub.cc.s64 %rd111, %rd109, %rd2; 142; CHECK-NEXT: subc.cc.s64 %rd112, %rd110, %rd2; 143; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd111, %rd112}; 144; CHECK-NEXT: ret; 145 %div = srem i128 %lhs, %rhs 146 ret i128 %div 147} 148 149define i128 @urem_i128(i128 %lhs, i128 %rhs) { 150; CHECK-LABEL: urem_i128( 151; CHECK: { 152; CHECK-NEXT: .reg .pred %p<17>; 153; CHECK-NEXT: .reg .b32 %r<16>; 154; CHECK-NEXT: .reg .b64 %rd<113>; 155; CHECK-EMPTY: 156; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 157; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [urem_i128_param_0]; 158; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [urem_i128_param_1]; 159; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4; 160; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; 161; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; 162; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; 163; CHECK-NEXT: or.pred %p3, %p1, %p2; 164; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0; 165; CHECK-NEXT: clz.b64 %r1, %rd4; 166; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; 167; CHECK-NEXT: clz.b64 %r2, %rd3; 168; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; 169; CHECK-NEXT: add.s64 %rd49, %rd48, 64; 170; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; 171; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; 172; CHECK-NEXT: clz.b64 %r3, %rd42; 173; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; 174; CHECK-NEXT: clz.b64 %r4, %rd41; 175; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; 176; CHECK-NEXT: add.s64 %rd53, %rd52, 64; 177; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; 178; CHECK-NEXT: mov.b64 %rd103, 0; 179; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; 180; CHECK-NEXT: subc.cc.s64 %rd57, %rd103, 0; 181; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; 182; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; 183; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; 184; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; 185; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; 186; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; 187; CHECK-NEXT: and.b32 %r8, %r7, 1; 188; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; 189; CHECK-NEXT: or.pred %p10, %p3, %p9; 190; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; 191; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; 192; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; 193; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p10; 194; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p10; 195; CHECK-NEXT: or.pred %p12, %p10, %p11; 196; CHECK-NEXT: @%p12 bra $L__BB1_5; 197; CHECK-NEXT: // %bb.3: // %udiv-bb1 198; CHECK-NEXT: add.cc.s64 %rd105, %rd56, 1; 199; CHECK-NEXT: addc.cc.s64 %rd106, %rd57, 0; 200; CHECK-NEXT: or.b64 %rd62, %rd105, %rd106; 201; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; 202; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; 203; CHECK-NEXT: sub.s32 %r10, 127, %r9; 204; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; 205; CHECK-NEXT: sub.s32 %r11, 64, %r10; 206; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; 207; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; 208; CHECK-NEXT: sub.s32 %r12, 63, %r9; 209; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; 210; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; 211; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14; 212; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10; 213; CHECK-NEXT: mov.u64 %rd100, %rd103; 214; CHECK-NEXT: @%p13 bra $L__BB1_4; 215; CHECK-NEXT: // %bb.1: // %udiv-preheader 216; CHECK-NEXT: cvt.u32.u64 %r13, %rd105; 217; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; 218; CHECK-NEXT: sub.s32 %r14, 64, %r13; 219; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; 220; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; 221; CHECK-NEXT: add.s32 %r15, %r13, -64; 222; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; 223; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; 224; CHECK-NEXT: selp.b64 %rd107, %rd72, %rd71, %p15; 225; CHECK-NEXT: shr.u64 %rd108, %rd42, %r13; 226; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1; 227; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1; 228; CHECK-NEXT: mov.b64 %rd100, 0; 229; CHECK-NEXT: mov.u64 %rd103, %rd100; 230; CHECK-NEXT: $L__BB1_2: // %udiv-do-while 231; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 232; CHECK-NEXT: shr.u64 %rd73, %rd107, 63; 233; CHECK-NEXT: shl.b64 %rd74, %rd108, 1; 234; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; 235; CHECK-NEXT: shl.b64 %rd76, %rd107, 1; 236; CHECK-NEXT: shr.u64 %rd77, %rd110, 63; 237; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; 238; CHECK-NEXT: shr.u64 %rd79, %rd109, 63; 239; CHECK-NEXT: shl.b64 %rd80, %rd110, 1; 240; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; 241; CHECK-NEXT: shl.b64 %rd82, %rd109, 1; 242; CHECK-NEXT: or.b64 %rd109, %rd103, %rd82; 243; CHECK-NEXT: or.b64 %rd110, %rd100, %rd81; 244; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; 245; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; 246; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; 247; CHECK-NEXT: and.b64 %rd103, %rd85, 1; 248; CHECK-NEXT: and.b64 %rd86, %rd85, %rd3; 249; CHECK-NEXT: and.b64 %rd87, %rd85, %rd4; 250; CHECK-NEXT: sub.cc.s64 %rd107, %rd78, %rd86; 251; CHECK-NEXT: subc.cc.s64 %rd108, %rd75, %rd87; 252; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1; 253; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1; 254; CHECK-NEXT: or.b64 %rd88, %rd105, %rd106; 255; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; 256; CHECK-NEXT: @%p16 bra $L__BB1_4; 257; CHECK-NEXT: bra.uni $L__BB1_2; 258; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit 259; CHECK-NEXT: shr.u64 %rd89, %rd109, 63; 260; CHECK-NEXT: shl.b64 %rd90, %rd110, 1; 261; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 262; CHECK-NEXT: shl.b64 %rd92, %rd109, 1; 263; CHECK-NEXT: or.b64 %rd111, %rd103, %rd92; 264; CHECK-NEXT: or.b64 %rd112, %rd100, %rd91; 265; CHECK-NEXT: $L__BB1_5: // %udiv-end 266; CHECK-NEXT: mul.hi.u64 %rd93, %rd3, %rd111; 267; CHECK-NEXT: mad.lo.s64 %rd94, %rd3, %rd112, %rd93; 268; CHECK-NEXT: mad.lo.s64 %rd95, %rd4, %rd111, %rd94; 269; CHECK-NEXT: mul.lo.s64 %rd96, %rd3, %rd111; 270; CHECK-NEXT: sub.cc.s64 %rd97, %rd41, %rd96; 271; CHECK-NEXT: subc.cc.s64 %rd98, %rd42, %rd95; 272; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd97, %rd98}; 273; CHECK-NEXT: ret; 274 %div = urem i128 %lhs, %rhs 275 ret i128 %div 276} 277 278define i128 @srem_i128_pow2k(i128 %lhs) { 279; CHECK-LABEL: srem_i128_pow2k( 280; CHECK: { 281; CHECK-NEXT: .reg .b64 %rd<10>; 282; CHECK-EMPTY: 283; CHECK-NEXT: // %bb.0: 284; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [srem_i128_pow2k_param_0]; 285; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; 286; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; 287; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; 288; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; 289; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592; 290; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7; 291; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6; 292; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9}; 293; CHECK-NEXT: ret; 294 %div = srem i128 %lhs, 8589934592 295 ret i128 %div 296} 297 298define i128 @urem_i128_pow2k(i128 %lhs) { 299; CHECK-LABEL: urem_i128_pow2k( 300; CHECK: { 301; CHECK-NEXT: .reg .b64 %rd<5>; 302; CHECK-EMPTY: 303; CHECK-NEXT: // %bb.0: 304; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; 305; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; 306; CHECK-NEXT: mov.b64 %rd4, 0; 307; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; 308; CHECK-NEXT: ret; 309 %div = urem i128 %lhs, 8589934592 310 ret i128 %div 311} 312 313define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { 314; CHECK-LABEL: sdiv_i128( 315; CHECK: { 316; CHECK-NEXT: .reg .pred %p<19>; 317; CHECK-NEXT: .reg .b32 %r<16>; 318; CHECK-NEXT: .reg .b64 %rd<122>; 319; CHECK-EMPTY: 320; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 321; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0]; 322; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1]; 323; CHECK-NEXT: mov.b64 %rd112, 0; 324; CHECK-NEXT: sub.cc.s64 %rd52, %rd112, %rd45; 325; CHECK-NEXT: subc.cc.s64 %rd53, %rd112, %rd46; 326; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; 327; CHECK-NEXT: selp.b64 %rd2, %rd53, %rd46, %p1; 328; CHECK-NEXT: selp.b64 %rd1, %rd52, %rd45, %p1; 329; CHECK-NEXT: sub.cc.s64 %rd54, %rd112, %rd49; 330; CHECK-NEXT: subc.cc.s64 %rd55, %rd112, %rd50; 331; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; 332; CHECK-NEXT: selp.b64 %rd4, %rd55, %rd50, %p2; 333; CHECK-NEXT: selp.b64 %rd3, %rd54, %rd49, %p2; 334; CHECK-NEXT: xor.b64 %rd56, %rd50, %rd46; 335; CHECK-NEXT: shr.s64 %rd5, %rd56, 63; 336; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; 337; CHECK-NEXT: setp.eq.s64 %p3, %rd57, 0; 338; CHECK-NEXT: or.b64 %rd58, %rd1, %rd2; 339; CHECK-NEXT: setp.eq.s64 %p4, %rd58, 0; 340; CHECK-NEXT: or.pred %p5, %p3, %p4; 341; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0; 342; CHECK-NEXT: clz.b64 %r1, %rd4; 343; CHECK-NEXT: cvt.u64.u32 %rd59, %r1; 344; CHECK-NEXT: clz.b64 %r2, %rd3; 345; CHECK-NEXT: cvt.u64.u32 %rd60, %r2; 346; CHECK-NEXT: add.s64 %rd61, %rd60, 64; 347; CHECK-NEXT: selp.b64 %rd62, %rd59, %rd61, %p6; 348; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0; 349; CHECK-NEXT: clz.b64 %r3, %rd2; 350; CHECK-NEXT: cvt.u64.u32 %rd63, %r3; 351; CHECK-NEXT: clz.b64 %r4, %rd1; 352; CHECK-NEXT: cvt.u64.u32 %rd64, %r4; 353; CHECK-NEXT: add.s64 %rd65, %rd64, 64; 354; CHECK-NEXT: selp.b64 %rd66, %rd63, %rd65, %p7; 355; CHECK-NEXT: sub.cc.s64 %rd67, %rd62, %rd66; 356; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0; 357; CHECK-NEXT: setp.eq.s64 %p8, %rd68, 0; 358; CHECK-NEXT: setp.ne.s64 %p9, %rd68, 0; 359; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; 360; CHECK-NEXT: setp.gt.u64 %p10, %rd67, 127; 361; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; 362; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; 363; CHECK-NEXT: and.b32 %r8, %r7, 1; 364; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; 365; CHECK-NEXT: or.pred %p12, %p5, %p11; 366; CHECK-NEXT: xor.b64 %rd69, %rd67, 127; 367; CHECK-NEXT: or.b64 %rd70, %rd69, %rd68; 368; CHECK-NEXT: setp.eq.s64 %p13, %rd70, 0; 369; CHECK-NEXT: selp.b64 %rd121, 0, %rd2, %p12; 370; CHECK-NEXT: selp.b64 %rd120, 0, %rd1, %p12; 371; CHECK-NEXT: or.pred %p14, %p12, %p13; 372; CHECK-NEXT: @%p14 bra $L__BB4_5; 373; CHECK-NEXT: // %bb.3: // %udiv-bb1 374; CHECK-NEXT: add.cc.s64 %rd114, %rd67, 1; 375; CHECK-NEXT: addc.cc.s64 %rd115, %rd68, 0; 376; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115; 377; CHECK-NEXT: setp.eq.s64 %p15, %rd73, 0; 378; CHECK-NEXT: cvt.u32.u64 %r9, %rd67; 379; CHECK-NEXT: sub.s32 %r10, 127, %r9; 380; CHECK-NEXT: shl.b64 %rd74, %rd2, %r10; 381; CHECK-NEXT: sub.s32 %r11, 64, %r10; 382; CHECK-NEXT: shr.u64 %rd75, %rd1, %r11; 383; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; 384; CHECK-NEXT: sub.s32 %r12, 63, %r9; 385; CHECK-NEXT: shl.b64 %rd77, %rd1, %r12; 386; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; 387; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16; 388; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10; 389; CHECK-NEXT: mov.u64 %rd109, %rd112; 390; CHECK-NEXT: @%p15 bra $L__BB4_4; 391; CHECK-NEXT: // %bb.1: // %udiv-preheader 392; CHECK-NEXT: cvt.u32.u64 %r13, %rd114; 393; CHECK-NEXT: shr.u64 %rd80, %rd1, %r13; 394; CHECK-NEXT: sub.s32 %r14, 64, %r13; 395; CHECK-NEXT: shl.b64 %rd81, %rd2, %r14; 396; CHECK-NEXT: or.b64 %rd82, %rd80, %rd81; 397; CHECK-NEXT: add.s32 %r15, %r13, -64; 398; CHECK-NEXT: shr.u64 %rd83, %rd2, %r15; 399; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; 400; CHECK-NEXT: selp.b64 %rd116, %rd83, %rd82, %p17; 401; CHECK-NEXT: shr.u64 %rd117, %rd2, %r13; 402; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1; 403; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1; 404; CHECK-NEXT: mov.b64 %rd109, 0; 405; CHECK-NEXT: mov.u64 %rd112, %rd109; 406; CHECK-NEXT: $L__BB4_2: // %udiv-do-while 407; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 408; CHECK-NEXT: shr.u64 %rd84, %rd116, 63; 409; CHECK-NEXT: shl.b64 %rd85, %rd117, 1; 410; CHECK-NEXT: or.b64 %rd86, %rd85, %rd84; 411; CHECK-NEXT: shl.b64 %rd87, %rd116, 1; 412; CHECK-NEXT: shr.u64 %rd88, %rd119, 63; 413; CHECK-NEXT: or.b64 %rd89, %rd87, %rd88; 414; CHECK-NEXT: shr.u64 %rd90, %rd118, 63; 415; CHECK-NEXT: shl.b64 %rd91, %rd119, 1; 416; CHECK-NEXT: or.b64 %rd92, %rd91, %rd90; 417; CHECK-NEXT: shl.b64 %rd93, %rd118, 1; 418; CHECK-NEXT: or.b64 %rd118, %rd112, %rd93; 419; CHECK-NEXT: or.b64 %rd119, %rd109, %rd92; 420; CHECK-NEXT: sub.cc.s64 %rd94, %rd35, %rd89; 421; CHECK-NEXT: subc.cc.s64 %rd95, %rd36, %rd86; 422; CHECK-NEXT: shr.s64 %rd96, %rd95, 63; 423; CHECK-NEXT: and.b64 %rd112, %rd96, 1; 424; CHECK-NEXT: and.b64 %rd97, %rd96, %rd3; 425; CHECK-NEXT: and.b64 %rd98, %rd96, %rd4; 426; CHECK-NEXT: sub.cc.s64 %rd116, %rd89, %rd97; 427; CHECK-NEXT: subc.cc.s64 %rd117, %rd86, %rd98; 428; CHECK-NEXT: add.cc.s64 %rd114, %rd114, -1; 429; CHECK-NEXT: addc.cc.s64 %rd115, %rd115, -1; 430; CHECK-NEXT: or.b64 %rd99, %rd114, %rd115; 431; CHECK-NEXT: setp.eq.s64 %p18, %rd99, 0; 432; CHECK-NEXT: @%p18 bra $L__BB4_4; 433; CHECK-NEXT: bra.uni $L__BB4_2; 434; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit 435; CHECK-NEXT: shr.u64 %rd100, %rd118, 63; 436; CHECK-NEXT: shl.b64 %rd101, %rd119, 1; 437; CHECK-NEXT: or.b64 %rd102, %rd101, %rd100; 438; CHECK-NEXT: shl.b64 %rd103, %rd118, 1; 439; CHECK-NEXT: or.b64 %rd120, %rd112, %rd103; 440; CHECK-NEXT: or.b64 %rd121, %rd109, %rd102; 441; CHECK-NEXT: $L__BB4_5: // %udiv-end 442; CHECK-NEXT: xor.b64 %rd104, %rd120, %rd5; 443; CHECK-NEXT: xor.b64 %rd105, %rd121, %rd5; 444; CHECK-NEXT: sub.cc.s64 %rd106, %rd104, %rd5; 445; CHECK-NEXT: subc.cc.s64 %rd107, %rd105, %rd5; 446; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd106, %rd107}; 447; CHECK-NEXT: ret; 448 %div = sdiv i128 %lhs, %rhs 449 ret i128 %div 450} 451 452define i128 @udiv_i128(i128 %lhs, i128 %rhs) { 453; CHECK-LABEL: udiv_i128( 454; CHECK: { 455; CHECK-NEXT: .reg .pred %p<17>; 456; CHECK-NEXT: .reg .b32 %r<16>; 457; CHECK-NEXT: .reg .b64 %rd<107>; 458; CHECK-EMPTY: 459; CHECK-NEXT: // %bb.0: // %_udiv-special-cases 460; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [udiv_i128_param_0]; 461; CHECK-NEXT: ld.param.v2.u64 {%rd43, %rd44}, [udiv_i128_param_1]; 462; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44; 463; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; 464; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; 465; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; 466; CHECK-NEXT: or.pred %p3, %p1, %p2; 467; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0; 468; CHECK-NEXT: clz.b64 %r1, %rd44; 469; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; 470; CHECK-NEXT: clz.b64 %r2, %rd43; 471; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; 472; CHECK-NEXT: add.s64 %rd49, %rd48, 64; 473; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; 474; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; 475; CHECK-NEXT: clz.b64 %r3, %rd42; 476; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; 477; CHECK-NEXT: clz.b64 %r4, %rd41; 478; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; 479; CHECK-NEXT: add.s64 %rd53, %rd52, 64; 480; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; 481; CHECK-NEXT: mov.b64 %rd97, 0; 482; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; 483; CHECK-NEXT: subc.cc.s64 %rd57, %rd97, 0; 484; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; 485; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; 486; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; 487; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; 488; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; 489; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; 490; CHECK-NEXT: and.b32 %r8, %r7, 1; 491; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; 492; CHECK-NEXT: or.pred %p10, %p3, %p9; 493; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; 494; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; 495; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; 496; CHECK-NEXT: selp.b64 %rd106, 0, %rd42, %p10; 497; CHECK-NEXT: selp.b64 %rd105, 0, %rd41, %p10; 498; CHECK-NEXT: or.pred %p12, %p10, %p11; 499; CHECK-NEXT: @%p12 bra $L__BB5_5; 500; CHECK-NEXT: // %bb.3: // %udiv-bb1 501; CHECK-NEXT: add.cc.s64 %rd99, %rd56, 1; 502; CHECK-NEXT: addc.cc.s64 %rd100, %rd57, 0; 503; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100; 504; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; 505; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; 506; CHECK-NEXT: sub.s32 %r10, 127, %r9; 507; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; 508; CHECK-NEXT: sub.s32 %r11, 64, %r10; 509; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; 510; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; 511; CHECK-NEXT: sub.s32 %r12, 63, %r9; 512; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; 513; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; 514; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14; 515; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10; 516; CHECK-NEXT: mov.u64 %rd94, %rd97; 517; CHECK-NEXT: @%p13 bra $L__BB5_4; 518; CHECK-NEXT: // %bb.1: // %udiv-preheader 519; CHECK-NEXT: cvt.u32.u64 %r13, %rd99; 520; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; 521; CHECK-NEXT: sub.s32 %r14, 64, %r13; 522; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; 523; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; 524; CHECK-NEXT: add.s32 %r15, %r13, -64; 525; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; 526; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; 527; CHECK-NEXT: selp.b64 %rd101, %rd72, %rd71, %p15; 528; CHECK-NEXT: shr.u64 %rd102, %rd42, %r13; 529; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1; 530; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1; 531; CHECK-NEXT: mov.b64 %rd94, 0; 532; CHECK-NEXT: mov.u64 %rd97, %rd94; 533; CHECK-NEXT: $L__BB5_2: // %udiv-do-while 534; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 535; CHECK-NEXT: shr.u64 %rd73, %rd101, 63; 536; CHECK-NEXT: shl.b64 %rd74, %rd102, 1; 537; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; 538; CHECK-NEXT: shl.b64 %rd76, %rd101, 1; 539; CHECK-NEXT: shr.u64 %rd77, %rd104, 63; 540; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; 541; CHECK-NEXT: shr.u64 %rd79, %rd103, 63; 542; CHECK-NEXT: shl.b64 %rd80, %rd104, 1; 543; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; 544; CHECK-NEXT: shl.b64 %rd82, %rd103, 1; 545; CHECK-NEXT: or.b64 %rd103, %rd97, %rd82; 546; CHECK-NEXT: or.b64 %rd104, %rd94, %rd81; 547; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; 548; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; 549; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; 550; CHECK-NEXT: and.b64 %rd97, %rd85, 1; 551; CHECK-NEXT: and.b64 %rd86, %rd85, %rd43; 552; CHECK-NEXT: and.b64 %rd87, %rd85, %rd44; 553; CHECK-NEXT: sub.cc.s64 %rd101, %rd78, %rd86; 554; CHECK-NEXT: subc.cc.s64 %rd102, %rd75, %rd87; 555; CHECK-NEXT: add.cc.s64 %rd99, %rd99, -1; 556; CHECK-NEXT: addc.cc.s64 %rd100, %rd100, -1; 557; CHECK-NEXT: or.b64 %rd88, %rd99, %rd100; 558; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; 559; CHECK-NEXT: @%p16 bra $L__BB5_4; 560; CHECK-NEXT: bra.uni $L__BB5_2; 561; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit 562; CHECK-NEXT: shr.u64 %rd89, %rd103, 63; 563; CHECK-NEXT: shl.b64 %rd90, %rd104, 1; 564; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; 565; CHECK-NEXT: shl.b64 %rd92, %rd103, 1; 566; CHECK-NEXT: or.b64 %rd105, %rd97, %rd92; 567; CHECK-NEXT: or.b64 %rd106, %rd94, %rd91; 568; CHECK-NEXT: $L__BB5_5: // %udiv-end 569; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; 570; CHECK-NEXT: ret; 571 %div = udiv i128 %lhs, %rhs 572 ret i128 %div 573} 574 575define i128 @sdiv_i128_pow2k(i128 %lhs) { 576; CHECK-LABEL: sdiv_i128_pow2k( 577; CHECK: { 578; CHECK-NEXT: .reg .b64 %rd<11>; 579; CHECK-EMPTY: 580; CHECK-NEXT: // %bb.0: 581; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0]; 582; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; 583; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; 584; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; 585; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; 586; CHECK-NEXT: shl.b64 %rd7, %rd6, 31; 587; CHECK-NEXT: shr.u64 %rd8, %rd5, 33; 588; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7; 589; CHECK-NEXT: shr.s64 %rd10, %rd6, 33; 590; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10}; 591; CHECK-NEXT: ret; 592 %div = sdiv i128 %lhs, 8589934592 593 ret i128 %div 594} 595 596define i128 @udiv_i128_pow2k(i128 %lhs) { 597; CHECK-LABEL: udiv_i128_pow2k( 598; CHECK: { 599; CHECK-NEXT: .reg .b64 %rd<7>; 600; CHECK-EMPTY: 601; CHECK-NEXT: // %bb.0: 602; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0]; 603; CHECK-NEXT: shl.b64 %rd3, %rd2, 31; 604; CHECK-NEXT: shr.u64 %rd4, %rd1, 33; 605; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3; 606; CHECK-NEXT: shr.u64 %rd6, %rd2, 33; 607; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; 608; CHECK-NEXT: ret; 609 %div = udiv i128 %lhs, 8589934592 610 ret i128 %div 611} 612 613define i128 @add_i128(i128 %lhs, i128 %rhs) { 614; CHECK-LABEL: add_i128( 615; CHECK: { 616; CHECK-NEXT: .reg .b64 %rd<7>; 617; CHECK-EMPTY: 618; CHECK-NEXT: // %bb.0: 619; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [add_i128_param_0]; 620; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1]; 621; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3; 622; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4; 623; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; 624; CHECK-NEXT: ret; 625 %result = add i128 %lhs, %rhs 626 ret i128 %result 627} 628