1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i64 @v_urem_i64(i64 %num, i64 %den) { 8; CHECK-LABEL: v_urem_i64: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CHECK-NEXT: v_mov_b32_e32 v4, v0 12; CHECK-NEXT: v_mov_b32_e32 v5, v1 13; CHECK-NEXT: v_or_b32_e32 v1, v5, v3 14; CHECK-NEXT: v_mov_b32_e32 v0, 0 15; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 16; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 17; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 18; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 19; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 20; CHECK-NEXT: s_cbranch_execnz .LBB0_3 21; CHECK-NEXT: ; %bb.1: ; %Flow 22; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 23; CHECK-NEXT: s_cbranch_execnz .LBB0_4 24; CHECK-NEXT: .LBB0_2: 25; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 26; CHECK-NEXT: s_setpc_b64 s[30:31] 27; CHECK-NEXT: .LBB0_3: 28; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3 29; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 30; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc 31; CHECK-NEXT: v_mac_f32_e32 v6, 0x4f800000, v0 32; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 33; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 34; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v0 35; CHECK-NEXT: v_trunc_f32_e32 v6, v6 36; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 37; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6 38; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 39; CHECK-NEXT: v_mul_lo_u32 v8, v1, v6 40; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0 41; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0 42; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0 43; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 44; CHECK-NEXT: v_mul_lo_u32 v10, v6, v9 45; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9 46; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 47; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 48; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8 49; CHECK-NEXT: v_mul_lo_u32 v13, v6, v8 50; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 51; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 52; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 53; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 54; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 55; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 56; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 57; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 58; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 59; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 60; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 61; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 62; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 63; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 64; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 65; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 66; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 67; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc 68; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0 69; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0 70; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0 71; CHECK-NEXT: v_mul_lo_u32 v1, v1, v6 72; CHECK-NEXT: v_mul_lo_u32 v10, v6, v8 73; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8 74; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 75; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 76; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 77; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1 78; CHECK-NEXT: v_mul_lo_u32 v9, v6, v1 79; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 80; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1 81; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 82; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 83; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 84; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 85; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 86; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 87; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 88; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 89; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 90; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 91; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 92; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 93; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 94; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 95; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 96; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc 97; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0 98; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0 99; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 100; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1 101; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1 102; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 103; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1 104; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 105; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 106; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 107; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 108; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 109; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 110; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 111; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 112; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 113; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 114; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 115; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 116; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 117; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0 118; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0 119; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0 120; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 121; CHECK-NEXT: v_mul_lo_u32 v1, v2, v1 122; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 123; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 124; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v7 125; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v0, vcc 126; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v0 127; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 128; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 129; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 130; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 131; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc 132; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 133; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 134; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v2 135; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc 136; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 137; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 138; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc 139; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3 140; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 141; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v6, v2 142; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 143; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 144; CHECK-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc 145; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 146; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 147; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v0, vcc 148; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 149; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 150; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc 151; CHECK-NEXT: ; implicit-def: $vgpr6 152; CHECK-NEXT: ; implicit-def: $vgpr2 153; CHECK-NEXT: ; implicit-def: $vgpr4 154; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 155; CHECK-NEXT: s_cbranch_execz .LBB0_2 156; CHECK-NEXT: .LBB0_4: 157; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6 158; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 159; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 160; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 161; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 162; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 163; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 164; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 165; CHECK-NEXT: v_mul_lo_u32 v0, v0, v2 166; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 167; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 168; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 169; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 170; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 171; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 172; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 173; CHECK-NEXT: v_mov_b32_e32 v1, 0 174; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 175; CHECK-NEXT: s_setpc_b64 s[30:31] 176 %result = urem i64 %num, %den 177 ret i64 %result 178} 179 180; FIXME: This is a workaround for not handling uniform VGPR case. 181declare i32 @llvm.amdgcn.readfirstlane(i32) 182 183define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { 184; CHECK-LABEL: s_urem_i64: 185; CHECK: ; %bb.0: 186; CHECK-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] 187; CHECK-NEXT: s_mov_b32 s6, 0 188; CHECK-NEXT: s_mov_b32 s7, -1 189; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 190; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0 191; CHECK-NEXT: s_mov_b32 s6, 1 192; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 193; CHECK-NEXT: s_cbranch_vccz .LBB1_2 194; CHECK-NEXT: ; %bb.1: 195; CHECK-NEXT: v_mov_b32_e32 v0, s3 196; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 197; CHECK-NEXT: s_sub_u32 s4, 0, s2 198; CHECK-NEXT: s_mov_b32 s6, 0 199; CHECK-NEXT: v_mov_b32_e32 v3, s1 200; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 201; CHECK-NEXT: s_subb_u32 s5, 0, s3 202; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 203; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 204; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 205; CHECK-NEXT: v_trunc_f32_e32 v4, v4 206; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 207; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 208; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 209; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 210; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 211; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 212; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 213; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 214; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 215; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 216; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 217; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 218; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 219; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 220; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 221; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 222; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 223; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 224; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 225; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 226; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 227; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 228; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 229; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 230; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 231; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 232; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 233; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 234; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 235; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 236; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 237; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc 238; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 239; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 240; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 241; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 242; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 243; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 244; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 245; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 246; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 247; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 248; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 249; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 250; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 251; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 252; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 253; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 254; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 255; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 256; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 257; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 258; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 259; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 260; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 261; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 262; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 263; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 264; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 265; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 266; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc 267; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 268; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 269; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 270; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 271; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 272; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 273; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 274; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 275; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 276; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 277; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 278; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 279; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 280; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 281; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 282; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 283; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 284; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 285; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 286; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 287; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 288; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 289; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 290; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 291; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 292; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 293; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 294; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 295; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc 296; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 297; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 298; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 299; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 300; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 301; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc 302; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 303; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc 304; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v4 305; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 306; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 307; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 308; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s2, v3 309; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 310; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 311; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 312; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc 313; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 314; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 315; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 316; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 317; CHECK-NEXT: s_branch .LBB1_3 318; CHECK-NEXT: .LBB1_2: 319; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 320; CHECK-NEXT: .LBB1_3: ; %Flow 321; CHECK-NEXT: s_xor_b32 s1, s6, 1 322; CHECK-NEXT: s_and_b32 s1, s1, 1 323; CHECK-NEXT: s_cmp_lg_u32 s1, 0 324; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 325; CHECK-NEXT: ; %bb.4: 326; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 327; CHECK-NEXT: s_sub_i32 s1, 0, s2 328; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 329; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 330; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 331; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 332; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 333; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 334; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2 335; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 336; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 337; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 338; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 339; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 340; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 341; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 342; CHECK-NEXT: .LBB1_5: 343; CHECK-NEXT: v_readfirstlane_b32 s0, v0 344; CHECK-NEXT: s_mov_b32 s1, s0 345; CHECK-NEXT: ; return to shader part epilog 346 %result = urem i64 %num, %den 347 %cast = bitcast i64 %result to <2 x i32> 348 %elt.0 = extractelement <2 x i32> %cast, i32 0 349 %elt.1 = extractelement <2 x i32> %cast, i32 1 350 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) 351 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) 352 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 353 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 354 %cast.back = bitcast <2 x i32> %ins.1 to i64 355 ret i64 %cast.back 356} 357 358define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { 359; GISEL-LABEL: v_urem_v2i64: 360; GISEL: ; %bb.0: 361; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 363; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 364; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 365; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 366; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 367; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 368; GISEL-NEXT: v_trunc_f32_e32 v9, v9 369; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 370; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 371; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 372; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 373; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 374; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 375; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 376; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 377; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 378; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 379; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 380; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 381; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 382; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 383; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 384; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 385; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 386; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 387; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 388; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 389; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 390; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 391; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 392; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 393; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 394; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 395; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 396; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 397; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 398; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 399; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 400; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 401; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 402; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc 403; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 404; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 405; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9 406; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 407; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 408; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 409; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12 410; GISEL-NEXT: v_mul_lo_u32 v13, v8, v10 411; GISEL-NEXT: v_mul_hi_u32 v14, v8, v12 412; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 413; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 414; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 415; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 416; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 417; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10 418; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 419; GISEL-NEXT: v_mul_hi_u32 v14, v8, v10 420; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 421; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 422; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 423; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 424; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 425; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 426; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 427; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 428; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10 429; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 430; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 431; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 432; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 433; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 434; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 435; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 436; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 437; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 438; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 439; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 440; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 441; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 442; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 443; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 444; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 445; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 446; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 447; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 448; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 449; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 450; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 451; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 452; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 453; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 454; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 455; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 456; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8 457; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 458; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 459; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 460; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc 461; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 462; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 463; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 464; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 465; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 466; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 467; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] 468; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4 469; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 470; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] 471; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 472; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 473; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 474; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 475; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 476; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 477; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 478; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] 479; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 480; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 481; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc 482; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc 483; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 484; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 485; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 486; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 487; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 488; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 489; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 490; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 491; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 492; GISEL-NEXT: v_trunc_f32_e32 v5, v5 493; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 494; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 495; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 496; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 497; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 498; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 499; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 500; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 501; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 502; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 503; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 504; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 505; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 506; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 507; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 508; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 509; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 510; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 511; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 512; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 513; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 514; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 515; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 516; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 517; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 518; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 519; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 520; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 521; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 522; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 523; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 524; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 525; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 526; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v11, vcc 527; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 528; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 529; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 530; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 531; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 532; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 533; GISEL-NEXT: v_mul_lo_u32 v9, v5, v10 534; GISEL-NEXT: v_mul_lo_u32 v11, v4, v8 535; GISEL-NEXT: v_mul_hi_u32 v12, v4, v10 536; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 537; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 538; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 539; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 540; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 541; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 542; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 543; GISEL-NEXT: v_mul_hi_u32 v12, v4, v8 544; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 545; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 546; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 547; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 548; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 549; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 550; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 551; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 552; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 553; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 554; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 555; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 556; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 557; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 558; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 559; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 560; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 561; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 562; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 563; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 564; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 565; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 566; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 567; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 568; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 569; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 570; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 571; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 572; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 573; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 574; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 575; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 576; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 577; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 578; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 579; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5 580; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 581; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 582; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 583; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 584; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc 585; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 586; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 587; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 588; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 589; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 590; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 591; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] 592; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6 593; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 594; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] 595; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 596; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 597; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6 598; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 599; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 600; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 601; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 602; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] 603; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 604; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 605; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 606; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 607; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 608; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 609; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 610; GISEL-NEXT: s_setpc_b64 s[30:31] 611; 612; CGP-LABEL: v_urem_v2i64: 613; CGP: ; %bb.0: 614; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; CGP-NEXT: v_mov_b32_e32 v10, v0 616; CGP-NEXT: v_mov_b32_e32 v11, v1 617; CGP-NEXT: v_mov_b32_e32 v8, v2 618; CGP-NEXT: v_mov_b32_e32 v9, v3 619; CGP-NEXT: v_or_b32_e32 v1, v11, v5 620; CGP-NEXT: v_mov_b32_e32 v0, 0 621; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 622; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4 623; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 624; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 625; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 626; CGP-NEXT: s_cbranch_execz .LBB2_2 627; CGP-NEXT: ; %bb.1: 628; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 629; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 630; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc 631; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0 632; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 633; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 634; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 635; CGP-NEXT: v_trunc_f32_e32 v2, v2 636; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 637; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 638; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 639; CGP-NEXT: v_mul_lo_u32 v12, v1, v2 640; CGP-NEXT: v_mul_lo_u32 v13, v1, v0 641; CGP-NEXT: v_mul_lo_u32 v14, v3, v0 642; CGP-NEXT: v_mul_hi_u32 v15, v1, v0 643; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 644; CGP-NEXT: v_mul_lo_u32 v14, v2, v13 645; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 646; CGP-NEXT: v_mul_hi_u32 v13, v2, v13 647; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 648; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 649; CGP-NEXT: v_mul_lo_u32 v17, v2, v12 650; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 651; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 652; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 653; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 654; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 655; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 656; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 657; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 658; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 659; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 660; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 661; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 662; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 663; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 664; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 665; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 666; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 667; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc 668; CGP-NEXT: v_mul_lo_u32 v12, v1, v0 669; CGP-NEXT: v_mul_lo_u32 v3, v3, v0 670; CGP-NEXT: v_mul_hi_u32 v13, v1, v0 671; CGP-NEXT: v_mul_lo_u32 v1, v1, v2 672; CGP-NEXT: v_mul_lo_u32 v14, v2, v12 673; CGP-NEXT: v_mul_hi_u32 v15, v0, v12 674; CGP-NEXT: v_mul_hi_u32 v12, v2, v12 675; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1 676; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 677; CGP-NEXT: v_mul_lo_u32 v3, v0, v1 678; CGP-NEXT: v_mul_lo_u32 v13, v2, v1 679; CGP-NEXT: v_mul_hi_u32 v16, v0, v1 680; CGP-NEXT: v_mul_hi_u32 v1, v2, v1 681; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 682; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 683; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 684; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 685; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 686; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 687; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 688; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 689; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 690; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 691; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 692; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 693; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 694; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 695; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 696; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 697; CGP-NEXT: v_mul_lo_u32 v2, v11, v0 698; CGP-NEXT: v_mul_hi_u32 v3, v10, v0 699; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 700; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 701; CGP-NEXT: v_mul_lo_u32 v13, v11, v1 702; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 703; CGP-NEXT: v_mul_hi_u32 v1, v11, v1 704; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 705; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 706; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 707; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 708; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 709; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 710; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 711; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 712; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 713; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 714; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 715; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 716; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 717; CGP-NEXT: v_mul_lo_u32 v3, v4, v0 718; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 719; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 720; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 721; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 722; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 723; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 724; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v3 725; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v11, v0, vcc 726; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v0 727; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 728; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] 729; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 730; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 731; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 732; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 733; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc 734; CGP-NEXT: v_sub_i32_e32 v10, vcc, v1, v4 735; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v0, vcc 736; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 737; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 738; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 739; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 740; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 741; CGP-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 742; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 743; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 744; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v12, vcc 745; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 746; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc 747; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v0, vcc 748; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 749; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc 750; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 751; CGP-NEXT: ; implicit-def: $vgpr2 752; CGP-NEXT: ; implicit-def: $vgpr4 753; CGP-NEXT: ; implicit-def: $vgpr10 754; CGP-NEXT: .LBB2_2: ; %Flow1 755; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 756; CGP-NEXT: s_cbranch_execz .LBB2_4 757; CGP-NEXT: ; %bb.3: 758; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2 759; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 760; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 761; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 762; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 763; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 764; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 765; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 766; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 767; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 768; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 769; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 770; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 771; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 772; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 773; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 774; CGP-NEXT: v_mov_b32_e32 v1, 0 775; CGP-NEXT: .LBB2_4: 776; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 777; CGP-NEXT: v_or_b32_e32 v3, v9, v7 778; CGP-NEXT: v_mov_b32_e32 v2, 0 779; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 780; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 781; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 782; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 783; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 784; CGP-NEXT: s_cbranch_execnz .LBB2_7 785; CGP-NEXT: ; %bb.5: ; %Flow 786; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 787; CGP-NEXT: s_cbranch_execnz .LBB2_8 788; CGP-NEXT: .LBB2_6: 789; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 790; CGP-NEXT: s_setpc_b64 s[30:31] 791; CGP-NEXT: .LBB2_7: 792; CGP-NEXT: v_cvt_f32_u32_e32 v2, v7 793; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6 794; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc 795; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2 796; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 797; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 798; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 799; CGP-NEXT: v_trunc_f32_e32 v4, v4 800; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 801; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 802; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 803; CGP-NEXT: v_mul_lo_u32 v10, v3, v4 804; CGP-NEXT: v_mul_lo_u32 v11, v3, v2 805; CGP-NEXT: v_mul_lo_u32 v12, v5, v2 806; CGP-NEXT: v_mul_hi_u32 v13, v3, v2 807; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 808; CGP-NEXT: v_mul_lo_u32 v12, v4, v11 809; CGP-NEXT: v_mul_hi_u32 v14, v2, v11 810; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 811; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 812; CGP-NEXT: v_mul_lo_u32 v13, v2, v10 813; CGP-NEXT: v_mul_lo_u32 v15, v4, v10 814; CGP-NEXT: v_mul_hi_u32 v16, v2, v10 815; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 816; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 817; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 818; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 819; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 820; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 821; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 822; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 823; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 824; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 825; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 826; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 827; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 828; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 829; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 830; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 831; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc 832; CGP-NEXT: v_mul_lo_u32 v10, v3, v2 833; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 834; CGP-NEXT: v_mul_hi_u32 v11, v3, v2 835; CGP-NEXT: v_mul_lo_u32 v3, v3, v4 836; CGP-NEXT: v_mul_lo_u32 v12, v4, v10 837; CGP-NEXT: v_mul_hi_u32 v13, v2, v10 838; CGP-NEXT: v_mul_hi_u32 v10, v4, v10 839; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 840; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11 841; CGP-NEXT: v_mul_lo_u32 v5, v2, v3 842; CGP-NEXT: v_mul_lo_u32 v11, v4, v3 843; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 844; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 845; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 846; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 847; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 848; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 849; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 850; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 851; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 852; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 853; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 854; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 855; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 856; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 857; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 858; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 859; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 860; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc 861; CGP-NEXT: v_mul_lo_u32 v4, v9, v2 862; CGP-NEXT: v_mul_hi_u32 v5, v8, v2 863; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 864; CGP-NEXT: v_mul_lo_u32 v10, v8, v3 865; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 866; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 867; CGP-NEXT: v_mul_hi_u32 v3, v9, v3 868; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 869; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 870; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 871; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 872; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 873; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 874; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 875; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 876; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 877; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 878; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 879; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 880; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 881; CGP-NEXT: v_mul_lo_u32 v5, v6, v2 882; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 883; CGP-NEXT: v_mul_hi_u32 v2, v6, v2 884; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 885; CGP-NEXT: v_mul_lo_u32 v3, v6, v3 886; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 887; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 888; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5 889; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v2, vcc 890; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v9, v2 891; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 892; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 893; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7 894; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 895; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc 896; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 897; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 898; CGP-NEXT: v_sub_i32_e32 v8, vcc, v3, v6 899; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc 900; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 901; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 902; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc 903; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 904; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 905; CGP-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 906; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 907; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 908; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v10, vcc 909; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 910; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 911; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v2, vcc 912; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 913; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc 914; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc 915; CGP-NEXT: ; implicit-def: $vgpr4 916; CGP-NEXT: ; implicit-def: $vgpr6 917; CGP-NEXT: ; implicit-def: $vgpr8 918; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 919; CGP-NEXT: s_cbranch_execz .LBB2_6 920; CGP-NEXT: .LBB2_8: 921; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 922; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6 923; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 924; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 925; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 926; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 927; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 928; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 929; CGP-NEXT: v_mul_lo_u32 v2, v2, v6 930; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 931; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 932; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 933; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 934; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 935; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 936; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 937; CGP-NEXT: v_mov_b32_e32 v3, 0 938; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 939; CGP-NEXT: s_setpc_b64 s[30:31] 940 %result = urem <2 x i64> %num, %den 941 ret <2 x i64> %result 942} 943 944define i64 @v_urem_i64_pow2k_denom(i64 %num) { 945; CHECK-LABEL: v_urem_i64_pow2k_denom: 946; CHECK: ; %bb.0: 947; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 948; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0 949; CHECK-NEXT: v_mov_b32_e32 v1, 0 950; CHECK-NEXT: s_setpc_b64 s[30:31] 951 %result = urem i64 %num, 4096 952 ret i64 %result 953} 954 955define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { 956; CHECK-LABEL: v_urem_v2i64_pow2k_denom: 957; CHECK: ; %bb.0: 958; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 959; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0 960; CHECK-NEXT: v_and_b32_e32 v2, 0xfff, v2 961; CHECK-NEXT: v_mov_b32_e32 v1, 0 962; CHECK-NEXT: v_mov_b32_e32 v3, 0 963; CHECK-NEXT: s_setpc_b64 s[30:31] 964 %result = urem <2 x i64> %num, <i64 4096, i64 4096> 965 ret <2 x i64> %result 966} 967 968define i64 @v_urem_i64_oddk_denom(i64 %num) { 969; CHECK-LABEL: v_urem_i64_oddk_denom: 970; CHECK: ; %bb.0: 971; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb 973; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb 974; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 975; CHECK-NEXT: v_mov_b32_e32 v5, 0xffed2705 976; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 977; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 978; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 979; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 980; CHECK-NEXT: v_trunc_f32_e32 v4, v4 981; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 982; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 983; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 984; CHECK-NEXT: v_mul_lo_u32 v6, v4, v5 985; CHECK-NEXT: v_mul_lo_u32 v7, v3, v5 986; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5 987; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3 988; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 989; CHECK-NEXT: v_mul_lo_u32 v8, v4, v7 990; CHECK-NEXT: v_mul_hi_u32 v9, v3, v7 991; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 992; CHECK-NEXT: v_mul_lo_u32 v10, v3, v6 993; CHECK-NEXT: v_mul_lo_u32 v11, v4, v6 994; CHECK-NEXT: v_mul_hi_u32 v12, v3, v6 995; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 996; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 997; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 998; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 999; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1000; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1001; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1002; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 1003; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1004; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1005; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1006; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1007; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1008; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1009; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1010; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 1011; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc 1012; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 1013; CHECK-NEXT: v_mul_hi_u32 v7, v3, v5 1014; CHECK-NEXT: v_mul_lo_u32 v5, v4, v5 1015; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 1016; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 1017; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 1018; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v5, v3 1019; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1020; CHECK-NEXT: v_mul_lo_u32 v7, v3, v5 1021; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 1022; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 1023; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 1024; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1025; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1026; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 1027; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1028; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1029; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1030; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 1031; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1032; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1033; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 1034; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1035; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1036; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1037; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1038; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1039; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc 1040; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 1041; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 1042; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 1043; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 1044; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 1045; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 1046; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 1047; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1048; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1049; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 1050; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1051; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1052; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1053; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 1054; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1055; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1056; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1057; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 1058; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1059; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1060; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2 1061; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 1062; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 1063; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 1064; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 1065; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6 1066; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5] 1067; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 1068; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1069; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc 1070; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 1071; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 1072; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7] 1073; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] 1074; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2 1075; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] 1076; CHECK-NEXT: s_mov_b64 s[4:5], vcc 1077; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5 1078; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] 1079; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 1080; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v2, s[4:5] 1081; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc 1082; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1083; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc 1084; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1085; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1086; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1087; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 1088; CHECK-NEXT: s_setpc_b64 s[30:31] 1089 %result = urem i64 %num, 1235195 1090 ret i64 %result 1091} 1092 1093define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { 1094; GISEL-LABEL: v_urem_v2i64_oddk_denom: 1095; GISEL: ; %bb.0: 1096; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1097; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb 1098; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb 1099; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v7, 0 1100; GISEL-NEXT: s_mov_b32 s4, 1 1101; GISEL-NEXT: v_mov_b32_e32 v5, 0xffed2705 1102; GISEL-NEXT: s_mov_b32 s5, 1 1103; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 1104; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1105; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1106; GISEL-NEXT: s_subb_u32 s6, 0, 0 1107; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1108; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 1109; GISEL-NEXT: s_cmp_lg_u32 s5, 0 1110; GISEL-NEXT: s_subb_u32 s7, 0, 0 1111; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1112; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 1113; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1114; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1115; GISEL-NEXT: v_mul_lo_u32 v8, v7, v5 1116; GISEL-NEXT: v_mul_lo_u32 v9, v6, v5 1117; GISEL-NEXT: v_mul_lo_u32 v10, s6, v6 1118; GISEL-NEXT: v_mul_hi_u32 v11, v6, v5 1119; GISEL-NEXT: v_mul_lo_u32 v12, s7, v6 1120; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v8 1121; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 1122; GISEL-NEXT: v_mul_hi_u32 v14, v6, v9 1123; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 1124; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1125; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1126; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1127; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 1128; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10 1129; GISEL-NEXT: v_mul_hi_u32 v15, v6, v10 1130; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 1131; GISEL-NEXT: v_mul_lo_u32 v16, v6, v8 1132; GISEL-NEXT: v_mul_lo_u32 v17, v7, v8 1133; GISEL-NEXT: v_mul_hi_u32 v18, v6, v8 1134; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 1135; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 1136; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1137; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 1138; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1139; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1140; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v9 1141; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 1142; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v17, v9 1143; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] 1144; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 1145; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1146; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 1147; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1148; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1149; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v18 1150; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1151; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v11 1152; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1153; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15 1154; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v18 1155; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1156; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1157; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v14 1158; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1159; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1160; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v14 1161; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1162; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 1163; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v11 1164; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v7, v10, vcc 1165; GISEL-NEXT: v_mul_lo_u32 v12, v11, v5 1166; GISEL-NEXT: v_mul_lo_u32 v13, s6, v11 1167; GISEL-NEXT: v_mul_hi_u32 v14, v11, v5 1168; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1169; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc 1170; GISEL-NEXT: v_mul_lo_u32 v8, v6, v5 1171; GISEL-NEXT: v_mul_lo_u32 v9, s7, v6 1172; GISEL-NEXT: v_mul_hi_u32 v15, v6, v5 1173; GISEL-NEXT: v_mul_lo_u32 v16, v10, v5 1174; GISEL-NEXT: v_mul_lo_u32 v17, v10, v12 1175; GISEL-NEXT: v_mul_hi_u32 v18, v11, v12 1176; GISEL-NEXT: v_mul_hi_u32 v12, v10, v12 1177; GISEL-NEXT: v_mul_lo_u32 v5, v7, v5 1178; GISEL-NEXT: v_mul_lo_u32 v19, v7, v8 1179; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 1180; GISEL-NEXT: v_mul_hi_u32 v16, v6, v8 1181; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 1182; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 1183; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v14 1184; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 1185; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 1186; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 1187; GISEL-NEXT: v_mul_hi_u32 v15, v11, v9 1188; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9 1189; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13 1190; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1191; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 1192; GISEL-NEXT: v_mul_lo_u32 v13, v6, v5 1193; GISEL-NEXT: v_mul_lo_u32 v18, v7, v5 1194; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v19, v13 1195; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] 1196; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v16 1197; GISEL-NEXT: v_mul_hi_u32 v13, v6, v5 1198; GISEL-NEXT: v_mul_hi_u32 v5, v7, v5 1199; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v12 1200; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 1201; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v18, v8 1202; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1203; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1204; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 1205; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1206; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 1207; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 1208; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 1209; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1210; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 1211; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1212; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 1213; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 1214; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1215; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v15 1216; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1217; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 1218; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1219; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v14 1220; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 1221; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1222; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v10, v9, vcc 1223; GISEL-NEXT: v_mul_lo_u32 v10, v1, v11 1224; GISEL-NEXT: v_mul_hi_u32 v12, v0, v11 1225; GISEL-NEXT: v_mul_hi_u32 v11, v1, v11 1226; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1227; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc 1228; GISEL-NEXT: v_mul_lo_u32 v7, v3, v6 1229; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6 1230; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1231; GISEL-NEXT: v_mul_lo_u32 v13, v0, v9 1232; GISEL-NEXT: v_mul_lo_u32 v14, v1, v9 1233; GISEL-NEXT: v_mul_hi_u32 v15, v0, v9 1234; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 1235; GISEL-NEXT: v_mul_lo_u32 v16, v2, v5 1236; GISEL-NEXT: v_mul_lo_u32 v17, v3, v5 1237; GISEL-NEXT: v_mul_hi_u32 v18, v2, v5 1238; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 1239; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 1240; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1241; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 1242; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1243; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v16 1244; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1245; GISEL-NEXT: v_add_i32_e32 v6, vcc, v17, v6 1246; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1247; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1248; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1249; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 1250; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1251; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1252; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1253; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 1254; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1255; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1256; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 1257; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 1258; GISEL-NEXT: v_add_i32_e32 v8, vcc, v17, v8 1259; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1260; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1261; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1262; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1263; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1264; GISEL-NEXT: v_mul_lo_u32 v12, v10, v4 1265; GISEL-NEXT: v_mul_hi_u32 v10, v10, v4 1266; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1267; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 1268; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 1269; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1270; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1271; GISEL-NEXT: v_mul_lo_u32 v7, v9, v4 1272; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 1273; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 1274; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1275; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 1276; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] 1277; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 1278; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 1279; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 1280; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v8 1281; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] 1282; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 1283; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 1284; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1285; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 1286; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v6 1287; GISEL-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[8:9] 1288; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] 1289; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 1290; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] 1291; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] 1292; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 1293; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 1294; GISEL-NEXT: s_mov_b64 s[4:5], vcc 1295; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9 1296; GISEL-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4 1297; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] 1298; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 1299; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] 1300; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] 1301; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 1302; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5] 1303; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 1304; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5] 1305; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1306; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] 1307; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc 1308; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 1309; GISEL-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc 1310; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 1311; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] 1312; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 1313; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 1314; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1315; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] 1316; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 1317; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5] 1318; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1319; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] 1320; GISEL-NEXT: s_setpc_b64 s[30:31] 1321; 1322; CGP-LABEL: v_urem_v2i64_oddk_denom: 1323; CGP: ; %bb.0: 1324; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1325; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb 1326; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb 1327; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 1328; CGP-NEXT: v_mov_b32_e32 v7, 0xffed2705 1329; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 1330; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 1331; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1332; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 1333; CGP-NEXT: v_trunc_f32_e32 v6, v6 1334; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 1335; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 1336; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 1337; CGP-NEXT: v_mul_lo_u32 v8, v6, v7 1338; CGP-NEXT: v_mul_lo_u32 v9, v5, v7 1339; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 1340; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v5 1341; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1342; CGP-NEXT: v_mul_lo_u32 v10, v6, v9 1343; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 1344; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 1345; CGP-NEXT: v_mul_lo_u32 v12, v5, v8 1346; CGP-NEXT: v_mul_lo_u32 v13, v6, v8 1347; CGP-NEXT: v_mul_hi_u32 v14, v5, v8 1348; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 1349; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1350; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1351; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1352; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1353; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1354; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1355; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 1356; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1357; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 1358; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 1359; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1360; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1361; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1362; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1363; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1364; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc 1365; CGP-NEXT: v_mul_lo_u32 v8, v5, v7 1366; CGP-NEXT: v_mul_hi_u32 v9, v5, v7 1367; CGP-NEXT: v_mul_lo_u32 v7, v6, v7 1368; CGP-NEXT: v_mul_lo_u32 v10, v6, v8 1369; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 1370; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 1371; CGP-NEXT: v_sub_i32_e32 v7, vcc, v7, v5 1372; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1373; CGP-NEXT: v_mul_lo_u32 v9, v5, v7 1374; CGP-NEXT: v_mul_lo_u32 v12, v6, v7 1375; CGP-NEXT: v_mul_hi_u32 v13, v5, v7 1376; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 1377; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1378; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1379; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1380; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1381; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1382; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1383; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 1384; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1385; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1386; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v11 1387; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1388; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1389; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1390; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1391; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 1392; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc 1393; CGP-NEXT: v_mul_lo_u32 v7, v1, v5 1394; CGP-NEXT: v_mul_hi_u32 v8, v0, v5 1395; CGP-NEXT: v_mul_hi_u32 v9, v1, v5 1396; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 1397; CGP-NEXT: v_mul_hi_u32 v11, v2, v5 1398; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 1399; CGP-NEXT: v_mul_lo_u32 v12, v0, v6 1400; CGP-NEXT: v_mul_lo_u32 v13, v1, v6 1401; CGP-NEXT: v_mul_hi_u32 v14, v0, v6 1402; CGP-NEXT: v_mul_hi_u32 v15, v1, v6 1403; CGP-NEXT: v_mul_lo_u32 v16, v2, v6 1404; CGP-NEXT: v_mul_lo_u32 v17, v3, v6 1405; CGP-NEXT: v_mul_hi_u32 v18, v2, v6 1406; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 1407; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 1408; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1409; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1410; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1411; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16 1412; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1413; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 1414; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1415; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1416; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1417; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v14 1418; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1419; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1420; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1421; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v18 1422; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1423; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 1424; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1425; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 1426; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 1427; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1428; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1429; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 1430; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1431; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1432; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 1433; CGP-NEXT: v_mul_hi_u32 v7, v7, v4 1434; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1435; CGP-NEXT: v_mul_lo_u32 v11, v5, v4 1436; CGP-NEXT: v_mul_hi_u32 v5, v5, v4 1437; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 1438; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1439; CGP-NEXT: v_mul_lo_u32 v8, v8, v4 1440; CGP-NEXT: v_mul_lo_u32 v6, v6, v4 1441; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1442; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1443; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v9 1444; CGP-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5] 1445; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 1446; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 1447; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc 1448; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v11 1449; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] 1450; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 1451; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 1452; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1453; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 1454; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v6 1455; CGP-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[8:9] 1456; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] 1457; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 1458; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] 1459; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] 1460; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 1461; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 1462; CGP-NEXT: s_mov_b64 s[4:5], vcc 1463; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9 1464; CGP-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4 1465; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] 1466; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 1467; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] 1468; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] 1469; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 1470; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5] 1471; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 1472; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5] 1473; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1474; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] 1475; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc 1476; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 1477; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc 1478; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 1479; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] 1480; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 1481; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 1482; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1483; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] 1484; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 1485; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5] 1486; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1487; CGP-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] 1488; CGP-NEXT: s_setpc_b64 s[30:31] 1489 %result = urem <2 x i64> %num, <i64 1235195, i64 1235195> 1490 ret <2 x i64> %result 1491} 1492 1493define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { 1494; CHECK-LABEL: v_urem_i64_pow2_shl_denom: 1495; CHECK: ; %bb.0: 1496; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1497; CHECK-NEXT: v_mov_b32_e32 v3, v0 1498; CHECK-NEXT: v_mov_b32_e32 v4, v1 1499; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 1500; CHECK-NEXT: v_mov_b32_e32 v1, 0 1501; CHECK-NEXT: v_mov_b32_e32 v7, 0 1502; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 1503; CHECK-NEXT: v_or_b32_e32 v8, v4, v6 1504; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] 1505; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 1506; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 1507; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 1508; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1509; CHECK-NEXT: s_cbranch_execnz .LBB7_3 1510; CHECK-NEXT: ; %bb.1: ; %Flow 1511; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 1512; CHECK-NEXT: s_cbranch_execnz .LBB7_4 1513; CHECK-NEXT: .LBB7_2: 1514; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 1515; CHECK-NEXT: s_setpc_b64 s[30:31] 1516; CHECK-NEXT: .LBB7_3: 1517; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v6 1518; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5 1519; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc 1520; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0 1521; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 1522; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1523; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 1524; CHECK-NEXT: v_trunc_f32_e32 v2, v2 1525; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 1526; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 1527; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 1528; CHECK-NEXT: v_mul_lo_u32 v8, v1, v2 1529; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0 1530; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0 1531; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0 1532; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1533; CHECK-NEXT: v_mul_lo_u32 v10, v2, v9 1534; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9 1535; CHECK-NEXT: v_mul_hi_u32 v9, v2, v9 1536; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1537; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8 1538; CHECK-NEXT: v_mul_lo_u32 v13, v2, v8 1539; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8 1540; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 1541; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1542; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1543; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1544; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1545; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1546; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1547; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 1548; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1549; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1550; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 1551; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1552; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1553; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1554; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1555; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 1556; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc 1557; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0 1558; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0 1559; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0 1560; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 1561; CHECK-NEXT: v_mul_lo_u32 v10, v2, v8 1562; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8 1563; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8 1564; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 1565; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 1566; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1 1567; CHECK-NEXT: v_mul_lo_u32 v9, v2, v1 1568; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1 1569; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 1570; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 1571; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1572; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1573; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1574; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11 1575; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1576; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1577; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1578; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 1579; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1580; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 1581; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1582; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1583; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 1584; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 1585; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1586; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 1587; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0 1588; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 1589; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1 1590; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1 1591; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 1592; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 1593; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 1594; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1595; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 1596; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1597; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 1598; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1599; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10 1600; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1601; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 1602; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 1603; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1604; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1605; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 1606; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0 1607; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0 1608; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0 1609; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 1610; CHECK-NEXT: v_mul_lo_u32 v1, v5, v1 1611; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 1612; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 1613; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v7 1614; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v0, vcc 1615; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0 1616; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 1617; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] 1618; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 1619; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1620; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc 1621; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6 1622; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 1623; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v1, v5 1624; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc 1625; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5 1626; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 1627; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc 1628; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v6 1629; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 1630; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5 1631; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 1632; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6 1633; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v8, vcc 1634; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 1635; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1636; CHECK-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 1637; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1638; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc 1639; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 1640; CHECK-NEXT: ; implicit-def: $vgpr2 1641; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 1642; CHECK-NEXT: ; implicit-def: $vgpr3 1643; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 1644; CHECK-NEXT: s_cbranch_execz .LBB7_2 1645; CHECK-NEXT: .LBB7_4: 1646; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 1647; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5 1648; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1649; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 1650; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 1651; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 1652; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1653; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 1654; CHECK-NEXT: v_mul_lo_u32 v0, v0, v5 1655; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v3, v0 1656; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5 1657; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 1658; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1659; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5 1660; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 1661; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1662; CHECK-NEXT: v_mov_b32_e32 v1, 0 1663; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 1664; CHECK-NEXT: s_setpc_b64 s[30:31] 1665 %shl.y = shl i64 4096, %y 1666 %r = urem i64 %x, %shl.y 1667 ret i64 %r 1668} 1669 1670define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { 1671; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom: 1672; GISEL: ; %bb.0: 1673; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1674; GISEL-NEXT: v_mov_b32_e32 v9, 0x1000 1675; GISEL-NEXT: v_mov_b32_e32 v10, 0 1676; GISEL-NEXT: v_lshl_b64 v[7:8], v[9:10], v4 1677; GISEL-NEXT: v_lshl_b64 v[4:5], v[9:10], v6 1678; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v7 1679; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v8 1680; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v9 1681; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1682; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1683; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6 1684; GISEL-NEXT: v_trunc_f32_e32 v9, v9 1685; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9 1686; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1687; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 1688; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 1689; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc 1690; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6 1691; GISEL-NEXT: v_mul_lo_u32 v13, v11, v6 1692; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 1693; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6 1694; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1695; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1696; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 1697; GISEL-NEXT: v_mul_lo_u32 v15, v6, v13 1698; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12 1699; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 1700; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1701; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 1702; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1703; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1704; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 1705; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 1706; GISEL-NEXT: v_mul_hi_u32 v16, v6, v13 1707; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 1708; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1709; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 1710; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1711; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 1712; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1713; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1714; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1715; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 1716; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1717; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 1718; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc 1719; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6 1720; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6 1721; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9 1722; GISEL-NEXT: v_mul_hi_u32 v10, v10, v6 1723; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1724; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1725; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12 1726; GISEL-NEXT: v_mul_lo_u32 v13, v6, v10 1727; GISEL-NEXT: v_mul_hi_u32 v14, v6, v12 1728; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1729; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1730; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 1731; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1732; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 1733; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10 1734; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 1735; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10 1736; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1737; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1738; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1739; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1740; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1741; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1742; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1743; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1744; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10 1745; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1746; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 1747; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 1748; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 1749; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 1750; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6 1751; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1752; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1753; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1754; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1755; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1756; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 1757; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 1758; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 1759; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 1760; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1761; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 1762; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1763; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1764; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1765; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1766; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1767; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 1768; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1769; GISEL-NEXT: v_mul_lo_u32 v10, v7, v6 1770; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6 1771; GISEL-NEXT: v_mul_lo_u32 v9, v7, v9 1772; GISEL-NEXT: v_mul_hi_u32 v6, v7, v6 1773; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1774; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 1775; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 1776; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v6, vcc 1777; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 1778; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8 1779; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1780; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 1781; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 1782; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8 1783; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[4:5] 1784; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v7 1785; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc 1786; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] 1787; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v8 1788; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 1789; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v7 1790; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 1791; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v8 1792; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 1793; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v10, v7 1794; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5] 1795; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1796; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 1797; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc 1798; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc 1799; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 1800; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1801; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 1802; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v4 1803; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v5 1804; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 1805; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 1806; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 1807; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 1808; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1809; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 1810; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1811; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1812; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 1813; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc 1814; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6 1815; GISEL-NEXT: v_mul_lo_u32 v11, v9, v6 1816; GISEL-NEXT: v_mul_lo_u32 v12, v8, v7 1817; GISEL-NEXT: v_mul_hi_u32 v13, v8, v6 1818; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1819; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1820; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10 1821; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11 1822; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10 1823; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 1824; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1825; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 1826; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1827; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1828; GISEL-NEXT: v_mul_lo_u32 v13, v7, v11 1829; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 1830; GISEL-NEXT: v_mul_hi_u32 v14, v6, v11 1831; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1832; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1833; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 1834; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1835; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1836; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1837; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1838; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1839; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 1840; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1841; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1842; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc 1843; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6 1844; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6 1845; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7 1846; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 1847; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1848; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1849; GISEL-NEXT: v_mul_lo_u32 v9, v7, v10 1850; GISEL-NEXT: v_mul_lo_u32 v11, v6, v8 1851; GISEL-NEXT: v_mul_hi_u32 v12, v6, v10 1852; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1853; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1854; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1855; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1856; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1857; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8 1858; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 1859; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8 1860; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1861; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1862; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1863; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1864; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1865; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 1866; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1867; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1868; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8 1869; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1870; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 1871; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc 1872; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6 1873; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7 1874; GISEL-NEXT: v_mul_hi_u32 v10, v2, v6 1875; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 1876; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1877; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1878; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1879; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1880; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7 1881; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1882; GISEL-NEXT: v_mul_hi_u32 v10, v2, v7 1883; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 1884; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1885; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 1886; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1887; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 1888; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1889; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1890; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 1891; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 1892; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 1893; GISEL-NEXT: v_mul_lo_u32 v8, v4, v6 1894; GISEL-NEXT: v_mul_lo_u32 v9, v5, v6 1895; GISEL-NEXT: v_mul_lo_u32 v7, v4, v7 1896; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6 1897; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 1898; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1899; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 1900; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v6, vcc 1901; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 1902; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v5 1903; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1904; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 1905; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 1906; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v5 1907; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5] 1908; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v4 1909; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc 1910; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] 1911; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v5 1912; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 1913; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 1914; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1915; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v5 1916; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 1917; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 1918; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5] 1919; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1920; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1921; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 1922; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 1923; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 1924; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1925; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 1926; GISEL-NEXT: s_setpc_b64 s[30:31] 1927; 1928; CGP-LABEL: v_urem_v2i64_pow2_shl_denom: 1929; CGP: ; %bb.0: 1930; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1931; CGP-NEXT: v_mov_b32_e32 v8, v0 1932; CGP-NEXT: v_mov_b32_e32 v9, v1 1933; CGP-NEXT: v_mov_b32_e32 v5, v2 1934; CGP-NEXT: v_mov_b32_e32 v7, v3 1935; CGP-NEXT: v_mov_b32_e32 v10, 0x1000 1936; CGP-NEXT: v_mov_b32_e32 v11, 0 1937; CGP-NEXT: v_mov_b32_e32 v0, 0 1938; CGP-NEXT: v_lshl_b64 v[2:3], v[10:11], v4 1939; CGP-NEXT: v_or_b32_e32 v1, v9, v3 1940; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 1941; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 1942; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 1943; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 1944; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1945; CGP-NEXT: s_cbranch_execz .LBB8_2 1946; CGP-NEXT: ; %bb.1: 1947; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 1948; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 1949; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc 1950; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v0 1951; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4 1952; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1953; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 1954; CGP-NEXT: v_trunc_f32_e32 v4, v4 1955; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4 1956; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 1957; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 1958; CGP-NEXT: v_mul_lo_u32 v13, v1, v4 1959; CGP-NEXT: v_mul_lo_u32 v14, v1, v0 1960; CGP-NEXT: v_mul_lo_u32 v15, v12, v0 1961; CGP-NEXT: v_mul_hi_u32 v16, v1, v0 1962; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 1963; CGP-NEXT: v_mul_lo_u32 v15, v4, v14 1964; CGP-NEXT: v_mul_hi_u32 v17, v0, v14 1965; CGP-NEXT: v_mul_hi_u32 v14, v4, v14 1966; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 1967; CGP-NEXT: v_mul_lo_u32 v16, v0, v13 1968; CGP-NEXT: v_mul_lo_u32 v18, v4, v13 1969; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 1970; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 1971; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 1972; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1973; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 1974; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1975; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 1976; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1977; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1978; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1979; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 1980; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 1981; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 1982; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1983; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 1984; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1985; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 1986; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc 1987; CGP-NEXT: v_mul_lo_u32 v13, v1, v0 1988; CGP-NEXT: v_mul_lo_u32 v12, v12, v0 1989; CGP-NEXT: v_mul_hi_u32 v14, v1, v0 1990; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 1991; CGP-NEXT: v_mul_lo_u32 v15, v4, v13 1992; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 1993; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 1994; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 1995; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 1996; CGP-NEXT: v_mul_lo_u32 v12, v0, v1 1997; CGP-NEXT: v_mul_lo_u32 v14, v4, v1 1998; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 1999; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 2000; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 2001; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2002; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 2003; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2004; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 2005; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2006; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 2007; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2008; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 2009; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2010; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2011; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2012; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 2013; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 2014; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 2015; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 2016; CGP-NEXT: v_mul_lo_u32 v4, v9, v0 2017; CGP-NEXT: v_mul_hi_u32 v12, v8, v0 2018; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 2019; CGP-NEXT: v_mul_lo_u32 v13, v8, v1 2020; CGP-NEXT: v_mul_lo_u32 v14, v9, v1 2021; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 2022; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 2023; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 2024; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2025; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 2026; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2027; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2028; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2029; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 2030; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2031; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 2032; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 2033; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 2034; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2035; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 2036; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 2037; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 2038; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 2039; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 2040; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 2041; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 2042; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 2043; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v12 2044; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc 2045; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 2046; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 2047; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 2048; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 2049; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 2050; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc 2051; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 2052; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 2053; CGP-NEXT: v_sub_i32_e32 v9, vcc, v1, v2 2054; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc 2055; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2 2056; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 2057; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc 2058; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v3 2059; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 2060; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 2061; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 2062; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3 2063; CGP-NEXT: v_cndmask_b32_e32 v3, v14, v13, vcc 2064; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 2065; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc 2066; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v0, vcc 2067; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2068; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 2069; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc 2070; CGP-NEXT: ; implicit-def: $vgpr4 2071; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 2072; CGP-NEXT: ; implicit-def: $vgpr8 2073; CGP-NEXT: .LBB8_2: ; %Flow1 2074; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 2075; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 2076; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] 2077; CGP-NEXT: s_cbranch_execz .LBB8_4 2078; CGP-NEXT: ; %bb.3: 2079; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4 2080; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 2081; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2082; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2083; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 2084; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 2085; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 2086; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 2087; CGP-NEXT: v_mul_lo_u32 v0, v0, v2 2088; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0 2089; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 2090; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 2091; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2092; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 2093; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 2094; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2095; CGP-NEXT: v_mov_b32_e32 v1, 0 2096; CGP-NEXT: .LBB8_4: 2097; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 2098; CGP-NEXT: v_or_b32_e32 v3, v7, v10 2099; CGP-NEXT: v_mov_b32_e32 v2, 0 2100; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 2101; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9 2102; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 2103; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 2104; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2105; CGP-NEXT: s_cbranch_execnz .LBB8_7 2106; CGP-NEXT: ; %bb.5: ; %Flow 2107; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 2108; CGP-NEXT: s_cbranch_execnz .LBB8_8 2109; CGP-NEXT: .LBB8_6: 2110; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 2111; CGP-NEXT: s_setpc_b64 s[30:31] 2112; CGP-NEXT: .LBB8_7: 2113; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 2114; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9 2115; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v10, vcc 2116; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2 2117; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 2118; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 2119; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 2120; CGP-NEXT: v_trunc_f32_e32 v4, v4 2121; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 2122; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2123; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 2124; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 2125; CGP-NEXT: v_mul_lo_u32 v11, v3, v2 2126; CGP-NEXT: v_mul_lo_u32 v12, v6, v2 2127; CGP-NEXT: v_mul_hi_u32 v13, v3, v2 2128; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 2129; CGP-NEXT: v_mul_lo_u32 v12, v4, v11 2130; CGP-NEXT: v_mul_hi_u32 v14, v2, v11 2131; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 2132; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 2133; CGP-NEXT: v_mul_lo_u32 v13, v2, v8 2134; CGP-NEXT: v_mul_lo_u32 v15, v4, v8 2135; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 2136; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 2137; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2138; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2139; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 2140; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2141; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2142; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2143; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 2144; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2145; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2146; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 2147; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2148; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2149; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2150; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2151; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 2152; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc 2153; CGP-NEXT: v_mul_lo_u32 v8, v3, v2 2154; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 2155; CGP-NEXT: v_mul_hi_u32 v11, v3, v2 2156; CGP-NEXT: v_mul_lo_u32 v3, v3, v4 2157; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 2158; CGP-NEXT: v_mul_hi_u32 v13, v2, v8 2159; CGP-NEXT: v_mul_hi_u32 v8, v4, v8 2160; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 2161; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11 2162; CGP-NEXT: v_mul_lo_u32 v6, v2, v3 2163; CGP-NEXT: v_mul_lo_u32 v11, v4, v3 2164; CGP-NEXT: v_mul_hi_u32 v14, v2, v3 2165; CGP-NEXT: v_mul_hi_u32 v3, v4, v3 2166; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 2167; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2168; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2169; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2170; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 2171; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2172; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 2173; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2174; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 2175; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 2176; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2177; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2178; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2179; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 2180; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 2181; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc 2182; CGP-NEXT: v_mul_lo_u32 v4, v7, v2 2183; CGP-NEXT: v_mul_hi_u32 v6, v5, v2 2184; CGP-NEXT: v_mul_hi_u32 v2, v7, v2 2185; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 2186; CGP-NEXT: v_mul_lo_u32 v11, v7, v3 2187; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 2188; CGP-NEXT: v_mul_hi_u32 v3, v7, v3 2189; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2190; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2191; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 2192; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2193; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 2194; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2195; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 2196; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2197; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 2198; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6 2199; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 2200; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2201; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 2202; CGP-NEXT: v_mul_lo_u32 v6, v9, v2 2203; CGP-NEXT: v_mul_lo_u32 v8, v10, v2 2204; CGP-NEXT: v_mul_hi_u32 v2, v9, v2 2205; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 2206; CGP-NEXT: v_mul_lo_u32 v3, v9, v3 2207; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3 2208; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 2209; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 2210; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v2, vcc 2211; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v7, v2 2212; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9 2213; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 2214; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v10 2215; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 2216; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc 2217; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 2218; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 2219; CGP-NEXT: v_sub_i32_e32 v6, vcc, v3, v9 2220; CGP-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v2, vcc 2221; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9 2222; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 2223; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc 2224; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v10 2225; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 2226; CGP-NEXT: v_sub_i32_e32 v9, vcc, v6, v9 2227; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 2228; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v7, v10 2229; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v8, vcc 2230; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2231; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 2232; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v2, vcc 2233; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 2234; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc 2235; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc 2236; CGP-NEXT: ; implicit-def: $vgpr4 2237; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 2238; CGP-NEXT: ; implicit-def: $vgpr5 2239; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 2240; CGP-NEXT: s_cbranch_execz .LBB8_6 2241; CGP-NEXT: .LBB8_8: 2242; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4 2243; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9 2244; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2245; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 2246; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 2247; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 2248; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2249; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 2250; CGP-NEXT: v_mul_lo_u32 v2, v2, v9 2251; CGP-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 2252; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9 2253; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 2254; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2255; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9 2256; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 2257; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2258; CGP-NEXT: v_mov_b32_e32 v3, 0 2259; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 2260; CGP-NEXT: s_setpc_b64 s[30:31] 2261 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y 2262 %r = urem <2 x i64> %x, %shl.y 2263 ret <2 x i64> %r 2264} 2265 2266define i64 @v_urem_i64_24bit(i64 %num, i64 %den) { 2267; GISEL-LABEL: v_urem_i64_24bit: 2268; GISEL: ; %bb.0: 2269; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2270; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2271; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v2 2272; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 2273; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 2274; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 2275; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2276; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 2277; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 2278; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 2279; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2280; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 2281; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 2282; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 2283; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 2284; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 2285; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2286; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 2287; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 2288; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2289; GISEL-NEXT: v_mov_b32_e32 v1, 0 2290; GISEL-NEXT: s_setpc_b64 s[30:31] 2291; 2292; CGP-LABEL: v_urem_i64_24bit: 2293; CGP: ; %bb.0: 2294; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2295; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2296; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v2 2297; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 2298; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 2299; CGP-NEXT: v_rcp_f32_e32 v4, v3 2300; CGP-NEXT: v_mul_f32_e32 v4, v2, v4 2301; CGP-NEXT: v_trunc_f32_e32 v4, v4 2302; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2 2303; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2304; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3 2305; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2306; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 2307; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 2308; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2309; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2310; CGP-NEXT: v_mov_b32_e32 v1, 0 2311; CGP-NEXT: s_setpc_b64 s[30:31] 2312 %num.mask = and i64 %num, 16777215 2313 %den.mask = and i64 %den, 16777215 2314 %result = urem i64 %num.mask, %den.mask 2315 ret i64 %result 2316} 2317 2318define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { 2319; GISEL-LABEL: v_urem_v2i64_24bit: 2320; GISEL: ; %bb.0: 2321; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2322; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v4 2323; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v6 2324; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 2325; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 2326; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 2327; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc 2328; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v1 2329; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 2330; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc 2331; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 2332; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6 2333; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7 2334; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8 2335; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 2336; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 2337; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 2338; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 2339; GISEL-NEXT: v_trunc_f32_e32 v8, v8 2340; GISEL-NEXT: v_trunc_f32_e32 v11, v11 2341; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 2342; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 2343; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 2344; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 2345; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v6 2346; GISEL-NEXT: v_mul_lo_u32 v6, v4, v8 2347; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 2348; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 2349; GISEL-NEXT: v_mul_lo_u32 v14, v4, v12 2350; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 2351; GISEL-NEXT: v_mul_hi_u32 v16, v4, v12 2352; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7 2353; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7 2354; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7 2355; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6 2356; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 2357; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 2358; GISEL-NEXT: v_mul_hi_u32 v18, v7, v17 2359; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 2360; GISEL-NEXT: v_mul_lo_u32 v19, v7, v13 2361; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 2362; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 2363; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 2364; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 2365; GISEL-NEXT: v_mul_hi_u32 v18, v12, v14 2366; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 2367; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 2368; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v6, v16 2369; GISEL-NEXT: v_mul_lo_u32 v6, v12, v16 2370; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 2371; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2372; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v18 2373; GISEL-NEXT: v_mul_lo_u32 v6, v8, v16 2374; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 2375; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 2376; GISEL-NEXT: v_mul_hi_u32 v18, v12, v16 2377; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v14 2378; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2379; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v6, v18 2380; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 2381; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v6 2382; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2383; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v6 2384; GISEL-NEXT: v_mul_lo_u32 v6, v11, v13 2385; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v17 2386; GISEL-NEXT: v_mul_hi_u32 v17, v7, v13 2387; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 2388; GISEL-NEXT: v_add_i32_e32 v17, vcc, v6, v17 2389; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2390; GISEL-NEXT: v_add_i32_e32 v20, vcc, v20, v6 2391; GISEL-NEXT: v_and_b32_e32 v6, 0xffffff, v0 2392; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v2 2393; GISEL-NEXT: v_mul_hi_u32 v2, v8, v16 2394; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 2395; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 2396; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2397; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 2398; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2399; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2400; GISEL-NEXT: v_add_i32_e32 v16, vcc, v20, v18 2401; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 2402; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 2403; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 2404; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc 2405; GISEL-NEXT: v_mul_lo_u32 v8, v4, v12 2406; GISEL-NEXT: v_mul_lo_u32 v5, v5, v12 2407; GISEL-NEXT: v_mul_hi_u32 v14, v4, v12 2408; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 2409; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc 2410; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 2411; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 2412; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7 2413; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 2414; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8 2415; GISEL-NEXT: v_mul_hi_u32 v17, v12, v8 2416; GISEL-NEXT: v_mul_hi_u32 v8, v2, v8 2417; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11 2418; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13 2419; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13 2420; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 2421; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 2422; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9 2423; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 2424; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 2425; GISEL-NEXT: v_mul_lo_u32 v9, v12, v4 2426; GISEL-NEXT: v_mul_lo_u32 v10, v2, v4 2427; GISEL-NEXT: v_mul_hi_u32 v14, v12, v4 2428; GISEL-NEXT: v_mul_hi_u32 v4, v2, v4 2429; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5 2430; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 2431; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2432; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 2433; GISEL-NEXT: v_mul_lo_u32 v15, v11, v5 2434; GISEL-NEXT: v_mul_hi_u32 v19, v7, v5 2435; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 2436; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v16, v9 2437; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 2438; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 2439; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2440; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 2441; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2442; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v17 2443; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2444; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v14 2445; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2446; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2447; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 2448; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 2449; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 2450; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 2451; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17 2452; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 2453; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 2454; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2455; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2456; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2457; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 2458; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v14 2459; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 2460; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 2461; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 2462; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc 2463; GISEL-NEXT: v_mul_lo_u32 v4, 0, v8 2464; GISEL-NEXT: v_mul_hi_u32 v9, v6, v8 2465; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8 2466; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 2467; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc 2468; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 2469; GISEL-NEXT: v_mul_hi_u32 v11, v0, v7 2470; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7 2471; GISEL-NEXT: v_mul_lo_u32 v12, v6, v2 2472; GISEL-NEXT: v_mul_lo_u32 v13, 0, v2 2473; GISEL-NEXT: v_mul_hi_u32 v14, v6, v2 2474; GISEL-NEXT: v_mul_hi_u32 v2, 0, v2 2475; GISEL-NEXT: v_mul_lo_u32 v15, v0, v5 2476; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5 2477; GISEL-NEXT: v_mul_hi_u32 v17, v0, v5 2478; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 2479; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2480; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 2481; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 2482; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 2483; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 2484; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2485; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 2486; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2487; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2488; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2489; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 2490; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2491; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 2492; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2493; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 2494; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2495; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2496; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4 2497; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 2498; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 2499; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2500; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7 2501; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7 2502; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 2503; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 2504; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 2505; GISEL-NEXT: v_mul_lo_u32 v2, v3, v2 2506; GISEL-NEXT: v_mul_lo_u32 v5, v1, v5 2507; GISEL-NEXT: v_add_i32_e32 v2, vcc, v12, v2 2508; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 2509; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 2510; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v7 2511; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v6, v9 2512; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v2, vcc 2513; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], 0, v2 2514; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 2515; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2516; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v11 2517; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, v4, s[4:5] 2518; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], 0, v4 2519; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v1 2520; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[6:7] 2521; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 2522; GISEL-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[6:7] 2523; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 2524; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 2525; GISEL-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc 2526; GISEL-NEXT: v_subbrev_u32_e64 v0, vcc, 0, v0, s[4:5] 2527; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v5, v3 2528; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 2529; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3 2530; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 2531; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v8, v1 2532; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc 2533; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v1 2534; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 2535; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 2536; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v11, vcc 2537; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 2538; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v2, vcc 2539; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 2540; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 2541; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v1 2542; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v13, vcc 2543; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 2544; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc 2545; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2546; GISEL-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5] 2547; GISEL-NEXT: v_cndmask_b32_e32 v10, v2, v14, vcc 2548; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 2549; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc 2550; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, v15, s[4:5] 2551; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 2552; GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v1, s[4:5] 2553; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v10, vcc 2554; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] 2555; GISEL-NEXT: s_setpc_b64 s[30:31] 2556; 2557; CGP-LABEL: v_urem_v2i64_24bit: 2558; CGP: ; %bb.0: 2559; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2560; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2561; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v4 2562; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2563; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v6 2564; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0 2565; CGP-NEXT: v_cvt_f32_u32_e32 v5, v1 2566; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 2567; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 2568; CGP-NEXT: v_rcp_f32_e32 v8, v5 2569; CGP-NEXT: v_rcp_f32_e32 v9, v7 2570; CGP-NEXT: v_mul_f32_e32 v8, v4, v8 2571; CGP-NEXT: v_mul_f32_e32 v9, v6, v9 2572; CGP-NEXT: v_trunc_f32_e32 v8, v8 2573; CGP-NEXT: v_trunc_f32_e32 v9, v9 2574; CGP-NEXT: v_mad_f32 v4, -v8, v5, v4 2575; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 2576; CGP-NEXT: v_mad_f32 v6, -v9, v7, v6 2577; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 2578; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, v5 2579; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] 2580; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v6|, v7 2581; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] 2582; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 2583; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 2584; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 2585; CGP-NEXT: v_mul_lo_u32 v3, v5, v3 2586; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2587; CGP-NEXT: v_sub_i32_e32 v1, vcc, v2, v3 2588; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2589; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v1 2590; CGP-NEXT: v_mov_b32_e32 v1, 0 2591; CGP-NEXT: v_mov_b32_e32 v3, 0 2592; CGP-NEXT: s_setpc_b64 s[30:31] 2593 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215> 2594 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215> 2595 %result = urem <2 x i64> %num.mask, %den.mask 2596 ret <2 x i64> %result 2597} 2598