1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; 64-bit divides and rems should be split into a fast and slow path 5; where the fast path uses a 32-bit operation. 6 7define i64 @sdiv64(i64 %a, i64 %b) { 8; GFX9-LABEL: sdiv64: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 12; GFX9-NEXT: v_mov_b32_e32 v4, 0 13; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 14; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 15; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 16; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 17; GFX9-NEXT: s_cbranch_execz .LBB0_2 18; GFX9-NEXT: ; %bb.1: 19; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 20; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 21; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 22; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 23; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 24; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 25; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 26; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 27; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 28; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 29; GFX9-NEXT: v_rcp_f32_e32 v2, v2 30; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 31; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 32; GFX9-NEXT: v_trunc_f32_e32 v3, v3 33; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 34; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 35; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 36; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 37; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 38; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 39; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 40; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 41; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 42; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 43; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 44; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc 45; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 46; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 47; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc 48; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 49; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 50; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 51; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 52; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 53; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 54; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 55; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 56; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 57; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 58; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 59; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2 60; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 61; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 62; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 63; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 64; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 65; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 66; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 67; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 68; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 69; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 70; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1 71; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 72; GFX9-NEXT: v_xor_b32_e32 v6, v0, v4 73; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v4, vcc 74; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 75; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 76; GFX9-NEXT: v_xor_b32_e32 v5, v5, v4 77; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 78; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 79; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v2, 0 80; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v3, 0 81; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 82; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 83; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 84; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 85; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 86; GFX9-NEXT: v_mul_lo_u32 v7, v10, v2 87; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 88; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 89; GFX9-NEXT: v_add3_u32 v1, v1, v8, v7 90; GFX9-NEXT: v_sub_u32_e32 v7, v5, v1 91; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 92; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v7, v10, vcc 93; GFX9-NEXT: v_sub_co_u32_e64 v7, s[4:5], v0, v11 94; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[4:5], 0, v6, s[4:5] 95; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 96; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 97; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v11 98; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 99; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v10 100; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v7, s[4:5] 101; GFX9-NEXT: v_add_co_u32_e64 v7, s[4:5], 2, v2 102; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v1, vcc 103; GFX9-NEXT: v_addc_co_u32_e64 v8, s[4:5], 0, v3, s[4:5] 104; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 105; GFX9-NEXT: v_add_co_u32_e64 v12, s[4:5], 1, v2 106; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 107; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 108; GFX9-NEXT: v_addc_co_u32_e64 v13, s[4:5], 0, v3, s[4:5] 109; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 110; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 111; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 112; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 113; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 114; GFX9-NEXT: v_cndmask_b32_e64 v1, v12, v7, s[4:5] 115; GFX9-NEXT: v_cndmask_b32_e64 v6, v13, v8, s[4:5] 116; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 117; GFX9-NEXT: v_xor_b32_e32 v2, v4, v9 118; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 119; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 120; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 121; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v1, v2 122; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc 123; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 124; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 125; GFX9-NEXT: .LBB0_2: ; %Flow 126; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 127; GFX9-NEXT: s_cbranch_execz .LBB0_4 128; GFX9-NEXT: ; %bb.3: 129; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 130; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 131; GFX9-NEXT: v_mov_b32_e32 v5, 0 132; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 133; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 134; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 135; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 136; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 137; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 138; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 139; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 140; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 141; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 142; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 143; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 144; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 145; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 146; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 147; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 148; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 149; GFX9-NEXT: .LBB0_4: 150; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 151; GFX9-NEXT: v_mov_b32_e32 v0, v4 152; GFX9-NEXT: v_mov_b32_e32 v1, v5 153; GFX9-NEXT: s_setpc_b64 s[30:31] 154 %d = sdiv i64 %a, %b 155 ret i64 %d 156} 157 158define i64 @udiv64(i64 %a, i64 %b) { 159; GFX9-LABEL: udiv64: 160; GFX9: ; %bb.0: 161; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 163; GFX9-NEXT: v_mov_b32_e32 v4, 0 164; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 165; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 166; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 167; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 168; GFX9-NEXT: s_cbranch_execz .LBB1_2 169; GFX9-NEXT: ; %bb.1: 170; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 171; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 172; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 173; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 174; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 175; GFX9-NEXT: v_rcp_f32_e32 v4, v4 176; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 177; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 178; GFX9-NEXT: v_trunc_f32_e32 v5, v5 179; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 180; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 181; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 182; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 183; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 184; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 185; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 186; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 187; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 188; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 189; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 190; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 191; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 192; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 193; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 194; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 195; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 196; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 197; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 198; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 199; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 200; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 201; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 202; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 203; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 204; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 205; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 206; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 207; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 208; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 209; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 210; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 211; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 212; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 213; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 214; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 215; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 216; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 217; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 218; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 219; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 220; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 221; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 222; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 223; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 224; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 225; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 226; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 227; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 228; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 229; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 230; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 231; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 232; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 233; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v8, v3, vcc 234; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v2 235; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 236; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 237; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 238; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 239; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 240; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v3 241; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v8, s[4:5] 242; GFX9-NEXT: v_add_co_u32_e64 v8, s[4:5], 2, v6 243; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 244; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, v7, s[4:5] 245; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 246; GFX9-NEXT: v_add_co_u32_e64 v10, s[4:5], 1, v6 247; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 248; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 249; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, v7, s[4:5] 250; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 251; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 252; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 253; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 254; GFX9-NEXT: v_cndmask_b32_e64 v4, v11, v9, s[4:5] 255; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 256; GFX9-NEXT: v_cndmask_b32_e64 v0, v10, v8, s[4:5] 257; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 258; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 259; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 260; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 261; GFX9-NEXT: .LBB1_2: ; %Flow 262; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 263; GFX9-NEXT: s_cbranch_execz .LBB1_4 264; GFX9-NEXT: ; %bb.3: 265; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 266; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 267; GFX9-NEXT: v_mov_b32_e32 v5, 0 268; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 269; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 270; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 271; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 272; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 273; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 274; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 275; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 276; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 277; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 278; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 279; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 280; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 281; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 282; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 283; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 284; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 285; GFX9-NEXT: .LBB1_4: 286; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 287; GFX9-NEXT: v_mov_b32_e32 v0, v4 288; GFX9-NEXT: v_mov_b32_e32 v1, v5 289; GFX9-NEXT: s_setpc_b64 s[30:31] 290 %d = udiv i64 %a, %b 291 ret i64 %d 292} 293 294define i64 @srem64(i64 %a, i64 %b) { 295; GFX9-LABEL: srem64: 296; GFX9: ; %bb.0: 297; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 299; GFX9-NEXT: v_mov_b32_e32 v4, 0 300; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 301; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 302; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 303; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 304; GFX9-NEXT: s_cbranch_execz .LBB2_2 305; GFX9-NEXT: ; %bb.1: 306; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 307; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 308; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc 309; GFX9-NEXT: v_xor_b32_e32 v9, v3, v4 310; GFX9-NEXT: v_xor_b32_e32 v10, v2, v4 311; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v10 312; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9 313; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10 314; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc 315; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 316; GFX9-NEXT: v_rcp_f32_e32 v2, v2 317; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 318; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 319; GFX9-NEXT: v_trunc_f32_e32 v3, v3 320; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 321; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 322; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3 323; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 324; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 325; GFX9-NEXT: v_mul_lo_u32 v5, v7, v11 326; GFX9-NEXT: v_mul_hi_u32 v12, v6, v2 327; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 328; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 329; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v3 330; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0 331; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v4, vcc 332; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v5, 0 333; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 334; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v13, v3, vcc 335; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 336; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 337; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 338; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v6, v2 339; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v3, vcc 340; GFX9-NEXT: v_mul_lo_u32 v4, v7, v11 341; GFX9-NEXT: v_mul_lo_u32 v5, v8, v12 342; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 343; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 344; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v5, 0 345; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v5, 0 346; GFX9-NEXT: v_mul_hi_u32 v13, v12, v2 347; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v2, 0 348; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v5 349; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 350; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 351; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 352; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 353; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 354; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 355; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 356; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc 357; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 358; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 359; GFX9-NEXT: v_xor_b32_e32 v6, v0, v5 360; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 361; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 362; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 363; GFX9-NEXT: v_xor_b32_e32 v4, v4, v5 364; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 365; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 366; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 367; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 368; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 369; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 370; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 371; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 372; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 373; GFX9-NEXT: v_mul_lo_u32 v2, v9, v0 374; GFX9-NEXT: v_mul_lo_u32 v3, v10, v1 375; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, 0 376; GFX9-NEXT: v_add3_u32 v1, v1, v3, v2 377; GFX9-NEXT: v_sub_u32_e32 v2, v4, v1 378; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 379; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, vcc 380; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v0, v10 381; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[6:7], 0, v2, s[4:5] 382; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v9 383; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] 384; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 385; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 386; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v6, v9 387; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, s[4:5] 388; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[6:7] 389; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v3, v10 390; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 391; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 392; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v9 393; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 394; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 395; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 396; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] 397; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 398; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v9 399; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 400; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 401; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 402; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v8, s[4:5] 403; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 404; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 405; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 406; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v5 407; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc 408; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 409; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 410; GFX9-NEXT: .LBB2_2: ; %Flow 411; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 412; GFX9-NEXT: s_cbranch_execz .LBB2_4 413; GFX9-NEXT: ; %bb.3: 414; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 415; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 416; GFX9-NEXT: v_mov_b32_e32 v5, 0 417; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 418; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 419; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 420; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 421; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 422; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 423; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 424; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 425; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 426; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 427; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 428; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 429; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 430; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 431; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 432; GFX9-NEXT: .LBB2_4: 433; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 434; GFX9-NEXT: v_mov_b32_e32 v0, v4 435; GFX9-NEXT: v_mov_b32_e32 v1, v5 436; GFX9-NEXT: s_setpc_b64 s[30:31] 437 %d = srem i64 %a, %b 438 ret i64 %d 439} 440 441define i64 @urem64(i64 %a, i64 %b) { 442; GFX9-LABEL: urem64: 443; GFX9: ; %bb.0: 444; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 446; GFX9-NEXT: v_mov_b32_e32 v4, 0 447; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 448; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 449; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 450; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 451; GFX9-NEXT: s_cbranch_execz .LBB3_2 452; GFX9-NEXT: ; %bb.1: 453; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 454; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 455; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 456; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 457; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 458; GFX9-NEXT: v_rcp_f32_e32 v4, v4 459; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 460; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 461; GFX9-NEXT: v_trunc_f32_e32 v5, v5 462; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 463; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 464; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 465; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 466; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 467; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 468; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 469; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 470; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 471; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 472; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 473; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 474; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 475; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 476; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 477; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 478; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 479; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 480; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 481; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 482; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 483; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 484; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 485; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 486; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 487; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 488; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 489; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 490; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 491; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 492; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 493; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 494; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 495; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 496; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 497; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 498; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 499; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 500; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 501; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 502; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 503; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 504; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 505; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 506; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 507; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 508; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 509; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 510; GFX9-NEXT: v_mul_lo_u32 v6, v3, v4 511; GFX9-NEXT: v_mul_lo_u32 v7, v2, v5 512; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, 0 513; GFX9-NEXT: v_add3_u32 v5, v5, v7, v6 514; GFX9-NEXT: v_sub_u32_e32 v6, v1, v5 515; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 516; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v6, v3, vcc 517; GFX9-NEXT: v_sub_co_u32_e64 v6, s[4:5], v0, v2 518; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[6:7], 0, v4, s[4:5] 519; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 520; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 521; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2 522; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 523; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 524; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v7, v3 525; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v4, v3, s[4:5] 526; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 527; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 528; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v6, v2 529; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 530; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 531; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 532; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 533; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 534; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 535; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 536; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] 537; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 538; GFX9-NEXT: v_cndmask_b32_e32 v5, v1, v4, vcc 539; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, v9, s[4:5] 540; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 541; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 542; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 543; GFX9-NEXT: .LBB3_2: ; %Flow 544; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 545; GFX9-NEXT: s_cbranch_execz .LBB3_4 546; GFX9-NEXT: ; %bb.3: 547; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 548; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 549; GFX9-NEXT: v_mov_b32_e32 v5, 0 550; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 551; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 552; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 553; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 554; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 555; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 556; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 557; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 558; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 559; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 560; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 561; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 562; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 563; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 564; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 565; GFX9-NEXT: .LBB3_4: 566; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 567; GFX9-NEXT: v_mov_b32_e32 v0, v4 568; GFX9-NEXT: v_mov_b32_e32 v1, v5 569; GFX9-NEXT: s_setpc_b64 s[30:31] 570 %d = urem i64 %a, %b 571 ret i64 %d 572} 573 574define i32 @sdiv32(i32 %a, i32 %b) { 575; GFX9-LABEL: sdiv32: 576; GFX9: ; %bb.0: 577; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 578; GFX9-NEXT: v_sub_u32_e32 v2, 0, v1 579; GFX9-NEXT: v_max_i32_e32 v2, v1, v2 580; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v2 581; GFX9-NEXT: v_sub_u32_e32 v4, 0, v2 582; GFX9-NEXT: v_sub_u32_e32 v5, 0, v0 583; GFX9-NEXT: v_max_i32_e32 v5, v0, v5 584; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 585; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 586; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v0 587; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 588; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 589; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 590; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 591; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 592; GFX9-NEXT: v_mul_hi_u32 v3, v5, v3 593; GFX9-NEXT: v_mul_lo_u32 v4, v3, v2 594; GFX9-NEXT: v_add_u32_e32 v1, 1, v3 595; GFX9-NEXT: v_sub_u32_e32 v4, v5, v4 596; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 597; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 598; GFX9-NEXT: v_sub_u32_e32 v3, v4, v2 599; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 600; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 601; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 602; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 603; GFX9-NEXT: v_xor_b32_e32 v1, v1, v0 604; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0 605; GFX9-NEXT: s_setpc_b64 s[30:31] 606 %d = sdiv i32 %a, %b 607 ret i32 %d 608} 609 610define i32 @udiv32(i32 %a, i32 %b) { 611; GFX9-LABEL: udiv32: 612; GFX9: ; %bb.0: 613; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 614; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 615; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 616; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 617; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 618; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 619; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 620; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 621; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 622; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 623; GFX9-NEXT: v_mul_lo_u32 v3, v2, v1 624; GFX9-NEXT: v_add_u32_e32 v4, 1, v2 625; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 626; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 627; GFX9-NEXT: v_sub_u32_e32 v3, v0, v1 628; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 629; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 630; GFX9-NEXT: v_add_u32_e32 v3, 1, v2 631; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 632; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 633; GFX9-NEXT: s_setpc_b64 s[30:31] 634 %d = udiv i32 %a, %b 635 ret i32 %d 636} 637 638define i32 @srem32(i32 %a, i32 %b) { 639; GFX9-LABEL: srem32: 640; GFX9: ; %bb.0: 641; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX9-NEXT: v_sub_u32_e32 v2, 0, v1 643; GFX9-NEXT: v_max_i32_e32 v1, v1, v2 644; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 645; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 646; GFX9-NEXT: v_sub_u32_e32 v4, 0, v0 647; GFX9-NEXT: v_max_i32_e32 v4, v0, v4 648; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 649; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v0 650; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 651; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 652; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 653; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 654; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 655; GFX9-NEXT: v_mul_hi_u32 v2, v4, v2 656; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 657; GFX9-NEXT: v_sub_u32_e32 v2, v4, v2 658; GFX9-NEXT: v_sub_u32_e32 v3, v2, v1 659; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v2, v1 660; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 661; GFX9-NEXT: v_sub_u32_e32 v3, v2, v1 662; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v2, v1 663; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 664; GFX9-NEXT: v_xor_b32_e32 v1, v1, v0 665; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0 666; GFX9-NEXT: s_setpc_b64 s[30:31] 667 %d = srem i32 %a, %b 668 ret i32 %d 669} 670 671define i32 @urem32(i32 %a, i32 %b) { 672; GFX9-LABEL: urem32: 673; GFX9: ; %bb.0: 674; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 675; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 676; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 677; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 678; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 679; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 680; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 681; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 682; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 683; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 684; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 685; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 686; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 687; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 688; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 689; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 690; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 691; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 692; GFX9-NEXT: s_setpc_b64 s[30:31] 693 %d = urem i32 %a, %b 694 ret i32 %d 695} 696 697define <2 x i64> @sdivrem64(i64 %a, i64 %b) { 698; GFX9-LABEL: sdivrem64: 699; GFX9: ; %bb.0: 700; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 701; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 702; GFX9-NEXT: v_mov_b32_e32 v4, 0 703; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 704; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 705; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 706; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 707; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5] 708; GFX9-NEXT: s_cbranch_execz .LBB8_2 709; GFX9-NEXT: ; %bb.1: 710; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 711; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 712; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 713; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 714; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 715; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 716; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 717; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 718; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 719; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 720; GFX9-NEXT: v_rcp_f32_e32 v2, v2 721; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 722; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 723; GFX9-NEXT: v_trunc_f32_e32 v3, v3 724; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 725; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 726; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 727; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 728; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 729; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 730; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 731; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 732; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 733; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 734; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 735; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc 736; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 737; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 738; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc 739; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 740; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 741; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 742; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 743; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 744; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 745; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 746; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 747; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 748; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 749; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 750; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2 751; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 752; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 753; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 754; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 755; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 756; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 757; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 758; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 759; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 760; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 761; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1 762; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7 763; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7 764; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc 765; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 766; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2 767; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7 768; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 769; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 770; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 771; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 772; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 773; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 774; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 775; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 776; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 777; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2 778; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 779; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 780; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6 781; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1 782; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0 783; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc 784; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11 785; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5] 786; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10 787; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 788; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11 789; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] 790; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10 791; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7] 792; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2 793; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7] 794; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2 795; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 796; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7] 797; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 798; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 799; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 800; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 801; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7] 802; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 803; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 804; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc 805; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 806; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7] 807; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 808; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 809; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9 810; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5 811; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 812; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5 813; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5] 814; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9] 815; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11 816; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 817; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7] 818; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 819; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7] 820; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 821; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7 822; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 823; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 824; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc 825; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 826; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 827; GFX9-NEXT: .LBB8_2: ; %Flow 828; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] 829; GFX9-NEXT: s_cbranch_execz .LBB8_4 830; GFX9-NEXT: ; %bb.3: 831; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 832; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 833; GFX9-NEXT: v_mov_b32_e32 v5, 0 834; GFX9-NEXT: v_mov_b32_e32 v7, v5 835; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 836; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 837; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 838; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 839; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 840; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 841; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 842; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 843; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 844; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 845; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 846; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 847; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 848; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 849; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 850; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 851; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 852; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 853; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 854; GFX9-NEXT: .LBB8_4: 855; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 856; GFX9-NEXT: v_mov_b32_e32 v0, v4 857; GFX9-NEXT: v_mov_b32_e32 v1, v5 858; GFX9-NEXT: v_mov_b32_e32 v2, v6 859; GFX9-NEXT: v_mov_b32_e32 v3, v7 860; GFX9-NEXT: s_setpc_b64 s[30:31] 861 %d = sdiv i64 %a, %b 862 %r = srem i64 %a, %b 863 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 864 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 865 ret <2 x i64> %ins.1 866} 867 868define <2 x i64> @udivrem64(i64 %a, i64 %b) { 869; GFX9-LABEL: udivrem64: 870; GFX9: ; %bb.0: 871; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 873; GFX9-NEXT: v_mov_b32_e32 v4, 0 874; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 875; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 876; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 877; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 878; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 879; GFX9-NEXT: s_cbranch_execz .LBB9_2 880; GFX9-NEXT: ; %bb.1: 881; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 882; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 883; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 884; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 885; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 886; GFX9-NEXT: v_rcp_f32_e32 v4, v4 887; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 888; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 889; GFX9-NEXT: v_trunc_f32_e32 v5, v5 890; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 891; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 892; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 893; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 894; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 895; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 896; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 897; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 898; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 899; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 900; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 901; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 902; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 903; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 904; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 905; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 906; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 907; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 908; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 909; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 910; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 911; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 912; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 913; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 914; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 915; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 916; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 917; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 918; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 919; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 920; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 921; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 922; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 923; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 924; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 925; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 926; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 927; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 928; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 929; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 930; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 931; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 932; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 933; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 934; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 935; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 936; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 937; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 938; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 939; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 940; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 941; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 942; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 943; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 944; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v8, v3, vcc 945; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v2 946; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5] 947; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v3 948; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[6:7] 949; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v2 950; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[6:7] 951; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v3 952; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7] 953; GFX9-NEXT: v_add_co_u32_e64 v11, s[6:7], 2, v6 954; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, v7, s[6:7] 955; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 1, v6 956; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 957; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v7, s[6:7] 958; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 959; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 960; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 961; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 962; GFX9-NEXT: v_cndmask_b32_e64 v4, v14, v12, s[6:7] 963; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 964; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 965; GFX9-NEXT: v_subb_co_u32_e64 v3, s[4:5], v8, v3, s[4:5] 966; GFX9-NEXT: v_sub_co_u32_e64 v2, s[4:5], v9, v2 967; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc 968; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[4:5], 0, v3, s[4:5] 969; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 970; GFX9-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[6:7] 971; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 972; GFX9-NEXT: v_cndmask_b32_e64 v4, v13, v11, s[6:7] 973; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 974; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v2, s[6:7] 975; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 976; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc 977; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 978; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 979; GFX9-NEXT: .LBB9_2: ; %Flow 980; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 981; GFX9-NEXT: s_cbranch_execz .LBB9_4 982; GFX9-NEXT: ; %bb.3: 983; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 984; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 985; GFX9-NEXT: v_mov_b32_e32 v5, 0 986; GFX9-NEXT: v_mov_b32_e32 v7, v5 987; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 988; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 989; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 990; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 991; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 992; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 993; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 994; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 995; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 996; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 997; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 998; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 999; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1000; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1001; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1002; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 1003; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1004; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 1005; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 1006; GFX9-NEXT: .LBB9_4: 1007; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1008; GFX9-NEXT: v_mov_b32_e32 v0, v4 1009; GFX9-NEXT: v_mov_b32_e32 v1, v5 1010; GFX9-NEXT: v_mov_b32_e32 v2, v6 1011; GFX9-NEXT: v_mov_b32_e32 v3, v7 1012; GFX9-NEXT: s_setpc_b64 s[30:31] 1013 %d = udiv i64 %a, %b 1014 %r = urem i64 %a, %b 1015 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 1016 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 1017 ret <2 x i64> %ins.1 1018} 1019 1020define i64 @sdiv64_known32(i64 %a, i64 %b) { 1021; GFX9-LABEL: sdiv64_known32: 1022; GFX9: ; %bb.0: 1023; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1025; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 1026; GFX9-NEXT: v_or_b32_e32 v5, v2, v0 1027; GFX9-NEXT: v_mov_b32_e32 v4, 0 1028; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 1029; GFX9-NEXT: v_mov_b32_e32 v7, v1 1030; GFX9-NEXT: v_mov_b32_e32 v6, v3 1031; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 1032; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 1033; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1034; GFX9-NEXT: s_cbranch_execz .LBB10_2 1035; GFX9-NEXT: ; %bb.1: 1036; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v6 1037; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v0 1038; GFX9-NEXT: v_sub_co_u32_e32 v11, vcc, 0, v6 1039; GFX9-NEXT: v_subb_co_u32_e32 v12, vcc, 0, v0, vcc 1040; GFX9-NEXT: v_madmk_f32 v1, v3, 0x4f800000, v1 1041; GFX9-NEXT: v_rcp_f32_e32 v1, v1 1042; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 1043; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v1 1044; GFX9-NEXT: v_trunc_f32_e32 v3, v3 1045; GFX9-NEXT: v_madmk_f32 v1, v3, 0xcf800000, v1 1046; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v3 1047; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1048; GFX9-NEXT: v_mul_lo_u32 v5, v11, v10 1049; GFX9-NEXT: v_mul_lo_u32 v8, v12, v1 1050; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v1, 0 1051; GFX9-NEXT: v_add3_u32 v8, v4, v5, v8 1052; GFX9-NEXT: v_mul_hi_u32 v9, v1, v3 1053; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, 0 1054; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v9, v4 1055; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v3, 0 1056; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v10, v8, 0 1057; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 1058; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v13, v3 1059; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v4, vcc 1060; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v9, vcc 1061; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 1062; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 1063; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 1064; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v10, v4, vcc 1065; GFX9-NEXT: v_mul_lo_u32 v5, v11, v13 1066; GFX9-NEXT: v_mul_lo_u32 v8, v12, v1 1067; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v1, 0 1068; GFX9-NEXT: v_add3_u32 v8, v4, v5, v8 1069; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v8, 0 1070; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v8, 0 1071; GFX9-NEXT: v_mul_hi_u32 v12, v1, v3 1072; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v3, 0 1073; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v12, v8 1074; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v9, vcc 1075; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v10 1076; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v11, vcc 1077; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 1078; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 1079; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v5, vcc 1080; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 1081; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v13, v4, vcc 1082; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v5, 0 1083; GFX9-NEXT: v_mul_hi_u32 v8, v7, v1 1084; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v8, v3 1085; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v4, vcc 1086; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v1, 0 1087; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v5, 0 1088; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v10, v3 1089; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v11, v4, vcc 1090; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v9, vcc 1091; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v8 1092; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v3, vcc 1093; GFX9-NEXT: v_mul_lo_u32 v8, v0, v1 1094; GFX9-NEXT: v_mul_lo_u32 v9, v6, v5 1095; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v1, 0 1096; GFX9-NEXT: v_add3_u32 v4, v4, v9, v8 1097; GFX9-NEXT: v_sub_u32_e32 v8, v2, v4 1098; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, v7, v3 1099; GFX9-NEXT: v_subb_co_u32_e64 v7, s[4:5], v8, v0, vcc 1100; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v3, v6 1101; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[4:5], 0, v7, s[4:5] 1102; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v0 1103; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 1104; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 1105; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 1106; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v0 1107; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v8, s[4:5] 1108; GFX9-NEXT: v_add_co_u32_e64 v8, s[4:5], 2, v1 1109; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc 1110; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, v5, s[4:5] 1111; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 1112; GFX9-NEXT: v_add_co_u32_e64 v10, s[4:5], 1, v1 1113; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 1114; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 1115; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, v5, s[4:5] 1116; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 1117; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v0 1118; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 1119; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 1120; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v9, s[4:5] 1121; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1122; GFX9-NEXT: v_cndmask_b32_e64 v0, v10, v8, s[4:5] 1123; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 1124; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v0, vcc 1125; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 1126; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 1127; GFX9-NEXT: .LBB10_2: ; %Flow 1128; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 1129; GFX9-NEXT: s_cbranch_execz .LBB10_4 1130; GFX9-NEXT: ; %bb.3: 1131; GFX9-NEXT: v_cvt_f32_u32_e32 v0, v3 1132; GFX9-NEXT: v_sub_u32_e32 v2, 0, v3 1133; GFX9-NEXT: v_mov_b32_e32 v5, 0 1134; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1135; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1136; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1137; GFX9-NEXT: v_mul_lo_u32 v2, v2, v0 1138; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 1139; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 1140; GFX9-NEXT: v_mul_hi_u32 v0, v1, v0 1141; GFX9-NEXT: v_mul_lo_u32 v2, v0, v3 1142; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 1143; GFX9-NEXT: v_sub_u32_e32 v1, v1, v2 1144; GFX9-NEXT: v_sub_u32_e32 v2, v1, v3 1145; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 1146; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1147; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1148; GFX9-NEXT: v_add_u32_e32 v2, 1, v0 1149; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 1150; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc 1151; GFX9-NEXT: .LBB10_4: 1152; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1153; GFX9-NEXT: v_mov_b32_e32 v0, v4 1154; GFX9-NEXT: v_mov_b32_e32 v1, v5 1155; GFX9-NEXT: s_setpc_b64 s[30:31] 1156 %a.ext = ashr i64 %a, 32 1157 %b.ext = ashr i64 %b, 32 1158 %d = udiv i64 %a.ext, %b.ext 1159 ret i64 %d 1160} 1161 1162define i64 @udiv64_known32(i64 %a, i64 %b) { 1163; GFX9-LABEL: udiv64_known32: 1164; GFX9: ; %bb.0: 1165; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 1167; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 1168; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 1169; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1170; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1171; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 1172; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 1173; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 1174; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1175; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 1176; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 1177; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 1178; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1179; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1180; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1181; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1182; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 1183; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1184; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 1185; GFX9-NEXT: v_mov_b32_e32 v1, 0 1186; GFX9-NEXT: s_setpc_b64 s[30:31] 1187 %a.mask = and i64 %a, 4294967295 1188 %b.mask = and i64 %b, 4294967295 1189 %d = udiv i64 %a.mask, %b.mask 1190 ret i64 %d 1191} 1192