1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI %s 3; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s 9 10; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok. 11 12define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 13; CI-LABEL: mad_i64_i32_sextops: 14; CI: ; %bb.0: 15; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 17; CI-NEXT: s_setpc_b64 s[30:31] 18; 19; SI-LABEL: mad_i64_i32_sextops: 20; SI: ; %bb.0: 21; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; SI-NEXT: v_mul_lo_u32 v4, v0, v1 23; SI-NEXT: v_mul_hi_i32 v1, v0, v1 24; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2 25; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 26; SI-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX9-LABEL: mad_i64_i32_sextops: 29; GFX9: ; %bb.0: 30; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX1100-LABEL: mad_i64_i32_sextops: 35; GFX1100: ; %bb.0: 36; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 38; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 39; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] 40; GFX1100-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX1150-LABEL: mad_i64_i32_sextops: 43; GFX1150: ; %bb.0: 44; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] 46; GFX1150-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX12-LABEL: mad_i64_i32_sextops: 49; GFX12: ; %bb.0: 50; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 51; GFX12-NEXT: s_wait_expcnt 0x0 52; GFX12-NEXT: s_wait_samplecnt 0x0 53; GFX12-NEXT: s_wait_bvhcnt 0x0 54; GFX12-NEXT: s_wait_kmcnt 0x0 55; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] 56; GFX12-NEXT: s_setpc_b64 s[30:31] 57 %sext0 = sext i32 %arg0 to i64 58 %sext1 = sext i32 %arg1 to i64 59 %mul = mul i64 %sext0, %sext1 60 %mad = add i64 %mul, %arg2 61 ret i64 %mad 62} 63 64define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 65; CI-LABEL: mad_i64_i32_sextops_commute: 66; CI: ; %bb.0: 67; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 69; CI-NEXT: s_setpc_b64 s[30:31] 70; 71; SI-LABEL: mad_i64_i32_sextops_commute: 72; SI: ; %bb.0: 73; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; SI-NEXT: v_mul_lo_u32 v4, v0, v1 75; SI-NEXT: v_mul_hi_i32 v1, v0, v1 76; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v4 77; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc 78; SI-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX9-LABEL: mad_i64_i32_sextops_commute: 81; GFX9: ; %bb.0: 82; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 84; GFX9-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX1100-LABEL: mad_i64_i32_sextops_commute: 87; GFX1100: ; %bb.0: 88; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 90; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 91; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] 92; GFX1100-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX1150-LABEL: mad_i64_i32_sextops_commute: 95; GFX1150: ; %bb.0: 96; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] 98; GFX1150-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX12-LABEL: mad_i64_i32_sextops_commute: 101; GFX12: ; %bb.0: 102; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 103; GFX12-NEXT: s_wait_expcnt 0x0 104; GFX12-NEXT: s_wait_samplecnt 0x0 105; GFX12-NEXT: s_wait_bvhcnt 0x0 106; GFX12-NEXT: s_wait_kmcnt 0x0 107; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] 108; GFX12-NEXT: s_setpc_b64 s[30:31] 109 %sext0 = sext i32 %arg0 to i64 110 %sext1 = sext i32 %arg1 to i64 111 %mul = mul i64 %sext0, %sext1 112 %mad = add i64 %arg2, %mul 113 ret i64 %mad 114} 115 116define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 117; CI-LABEL: mad_u64_u32_zextops: 118; CI: ; %bb.0: 119; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 121; CI-NEXT: s_setpc_b64 s[30:31] 122; 123; SI-LABEL: mad_u64_u32_zextops: 124; SI: ; %bb.0: 125; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; SI-NEXT: v_mul_lo_u32 v4, v0, v1 127; SI-NEXT: v_mul_hi_u32 v1, v0, v1 128; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2 129; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 130; SI-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX9-LABEL: mad_u64_u32_zextops: 133; GFX9: ; %bb.0: 134; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 136; GFX9-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX1100-LABEL: mad_u64_u32_zextops: 139; GFX1100: ; %bb.0: 140; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 142; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 143; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] 144; GFX1100-NEXT: s_setpc_b64 s[30:31] 145; 146; GFX1150-LABEL: mad_u64_u32_zextops: 147; GFX1150: ; %bb.0: 148; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] 150; GFX1150-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX12-LABEL: mad_u64_u32_zextops: 153; GFX12: ; %bb.0: 154; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 155; GFX12-NEXT: s_wait_expcnt 0x0 156; GFX12-NEXT: s_wait_samplecnt 0x0 157; GFX12-NEXT: s_wait_bvhcnt 0x0 158; GFX12-NEXT: s_wait_kmcnt 0x0 159; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] 160; GFX12-NEXT: s_setpc_b64 s[30:31] 161 %sext0 = zext i32 %arg0 to i64 162 %sext1 = zext i32 %arg1 to i64 163 %mul = mul i64 %sext0, %sext1 164 %mad = add i64 %mul, %arg2 165 ret i64 %mad 166} 167 168define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 169; CI-LABEL: mad_u64_u32_zextops_commute: 170; CI: ; %bb.0: 171; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 173; CI-NEXT: s_setpc_b64 s[30:31] 174; 175; SI-LABEL: mad_u64_u32_zextops_commute: 176; SI: ; %bb.0: 177; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; SI-NEXT: v_mul_lo_u32 v4, v0, v1 179; SI-NEXT: v_mul_hi_u32 v1, v0, v1 180; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v4 181; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc 182; SI-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX9-LABEL: mad_u64_u32_zextops_commute: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 188; GFX9-NEXT: s_setpc_b64 s[30:31] 189; 190; GFX1100-LABEL: mad_u64_u32_zextops_commute: 191; GFX1100: ; %bb.0: 192; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 194; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 195; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] 196; GFX1100-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX1150-LABEL: mad_u64_u32_zextops_commute: 199; GFX1150: ; %bb.0: 200; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] 202; GFX1150-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX12-LABEL: mad_u64_u32_zextops_commute: 205; GFX12: ; %bb.0: 206; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 207; GFX12-NEXT: s_wait_expcnt 0x0 208; GFX12-NEXT: s_wait_samplecnt 0x0 209; GFX12-NEXT: s_wait_bvhcnt 0x0 210; GFX12-NEXT: s_wait_kmcnt 0x0 211; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] 212; GFX12-NEXT: s_setpc_b64 s[30:31] 213 %sext0 = zext i32 %arg0 to i64 214 %sext1 = zext i32 %arg1 to i64 215 %mul = mul i64 %sext0, %sext1 216 %mad = add i64 %arg2, %mul 217 ret i64 %mad 218} 219 220define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 { 221; CI-LABEL: mad_i64_i32_sextops_i32_i128: 222; CI: ; %bb.0: 223; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; CI-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0 225; CI-NEXT: v_ashrrev_i32_e32 v12, 31, v0 226; CI-NEXT: v_mov_b32_e32 v8, 0 227; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v1, v[7:8] 228; CI-NEXT: v_ashrrev_i32_e32 v13, 31, v1 229; CI-NEXT: v_mov_b32_e32 v11, v10 230; CI-NEXT: v_mov_b32_e32 v10, v8 231; CI-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v13, v[9:10] 232; CI-NEXT: v_add_i32_e32 v8, vcc, v11, v8 233; CI-NEXT: v_mad_i64_i32 v[10:11], s[4:5], v1, v12, 0 234; CI-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc 235; CI-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v13, v[8:9] 236; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v13, v0, v[10:11] 237; CI-NEXT: v_add_i32_e32 v8, vcc, v8, v0 238; CI-NEXT: v_addc_u32_e32 v9, vcc, v9, v1, vcc 239; CI-NEXT: v_mov_b32_e32 v1, v7 240; CI-NEXT: v_add_i32_e32 v0, vcc, v6, v2 241; CI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 242; CI-NEXT: v_addc_u32_e32 v2, vcc, v8, v4, vcc 243; CI-NEXT: v_addc_u32_e32 v3, vcc, v9, v5, vcc 244; CI-NEXT: s_setpc_b64 s[30:31] 245; 246; SI-LABEL: mad_i64_i32_sextops_i32_i128: 247; SI: ; %bb.0: 248; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v0 250; SI-NEXT: v_mul_lo_u32 v11, v6, v1 251; SI-NEXT: v_mul_hi_u32 v12, v0, v1 252; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1 253; SI-NEXT: v_mul_hi_u32 v14, v6, v1 254; SI-NEXT: v_mul_lo_u32 v13, v0, v7 255; SI-NEXT: v_mul_hi_u32 v10, v0, v7 256; SI-NEXT: v_add_i32_e32 v12, vcc, v11, v12 257; SI-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc 258; SI-NEXT: v_mul_hi_u32 v8, v6, v7 259; SI-NEXT: v_add_i32_e32 v12, vcc, v13, v12 260; SI-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc 261; SI-NEXT: v_mul_i32_i24_e32 v9, v6, v7 262; SI-NEXT: v_add_i32_e32 v10, vcc, v14, v10 263; SI-NEXT: v_mul_hi_i32 v6, v1, v6 264; SI-NEXT: v_mul_hi_i32 v7, v7, v0 265; SI-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc 266; SI-NEXT: v_add_i32_e32 v9, vcc, v9, v10 267; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v14, vcc 268; SI-NEXT: v_add_i32_e32 v10, vcc, v13, v11 269; SI-NEXT: v_mul_lo_u32 v0, v0, v1 270; SI-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc 271; SI-NEXT: v_add_i32_e32 v7, vcc, v9, v10 272; SI-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc 273; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 274; SI-NEXT: v_addc_u32_e32 v1, vcc, v12, v3, vcc 275; SI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc 276; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc 277; SI-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX9-LABEL: mad_i64_i32_sextops_i32_i128: 280; GFX9: ; %bb.0: 281; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0 283; GFX9-NEXT: v_ashrrev_i32_e32 v13, 31, v0 284; GFX9-NEXT: v_mov_b32_e32 v9, 0 285; GFX9-NEXT: v_mov_b32_e32 v8, v7 286; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v1, v[8:9] 287; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v1 288; GFX9-NEXT: v_mov_b32_e32 v8, v11 289; GFX9-NEXT: v_mov_b32_e32 v11, v9 290; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v14, v[10:11] 291; GFX9-NEXT: v_mov_b32_e32 v12, v11 292; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v12 293; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, 0, vcc 294; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v14, v[8:9] 295; GFX9-NEXT: v_mad_i64_i32 v[12:13], s[4:5], v1, v13, 0 296; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[12:13] 297; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v8, v0 298; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v9, v1, vcc 299; GFX9-NEXT: v_mov_b32_e32 v1, v10 300; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v2 301; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 302; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v4, vcc 303; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v5, vcc 304; GFX9-NEXT: s_setpc_b64 s[30:31] 305; 306; GFX1100-LABEL: mad_i64_i32_sextops_i32_i128: 307; GFX1100: ; %bb.0: 308; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 310; GFX1100-NEXT: v_mov_b32_e32 v8, 0 311; GFX1100-NEXT: v_ashrrev_i32_e32 v14, 31, v0 312; GFX1100-NEXT: v_ashrrev_i32_e32 v15, 31, v1 313; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 314; GFX1100-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8] 315; GFX1100-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 316; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 317; GFX1100-NEXT: v_mad_u64_u32 v[7:8], null, v0, v15, v[9:10] 318; GFX1100-NEXT: v_mov_b32_e32 v10, v8 319; GFX1100-NEXT: v_mad_i64_i32 v[8:9], null, v1, v14, 0 320; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 321; GFX1100-NEXT: v_add_co_u32 v10, s0, v11, v10 322; GFX1100-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 323; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 324; GFX1100-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[8:9] 325; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[10:11] 326; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 327; GFX1100-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12 328; GFX1100-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo 329; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 330; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo 331; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 332; GFX1100-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo 333; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo 334; GFX1100-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX1150-LABEL: mad_i64_i32_sextops_i32_i128: 337; GFX1150: ; %bb.0: 338; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX1150-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 340; GFX1150-NEXT: v_mov_b32_e32 v8, 0 341; GFX1150-NEXT: v_ashrrev_i32_e32 v12, 31, v0 342; GFX1150-NEXT: v_ashrrev_i32_e32 v13, 31, v1 343; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 344; GFX1150-NEXT: v_mad_u64_u32 v[9:10], null, v12, v1, v[7:8] 345; GFX1150-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 346; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 347; GFX1150-NEXT: v_mad_u64_u32 v[7:8], null, v0, v13, v[9:10] 348; GFX1150-NEXT: v_mov_b32_e32 v10, v8 349; GFX1150-NEXT: v_mad_i64_i32 v[8:9], null, v1, v12, 0 350; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 351; GFX1150-NEXT: v_add_co_u32 v10, s0, v11, v10 352; GFX1150-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 353; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 354; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v13, v0, v[8:9] 355; GFX1150-NEXT: v_mad_u64_u32 v[8:9], null, v12, v13, v[10:11] 356; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 357; GFX1150-NEXT: v_add_co_u32 v8, vcc_lo, v8, v0 358; GFX1150-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo 359; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 360; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo 361; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 362; GFX1150-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo 363; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo 364; GFX1150-NEXT: s_setpc_b64 s[30:31] 365; 366; GFX12-LABEL: mad_i64_i32_sextops_i32_i128: 367; GFX12: ; %bb.0: 368; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 369; GFX12-NEXT: s_wait_expcnt 0x0 370; GFX12-NEXT: s_wait_samplecnt 0x0 371; GFX12-NEXT: s_wait_bvhcnt 0x0 372; GFX12-NEXT: s_wait_kmcnt 0x0 373; GFX12-NEXT: v_mad_co_u64_u32 v[6:7], null, v0, v1, 0 374; GFX12-NEXT: v_mov_b32_e32 v8, 0 375; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v0 376; GFX12-NEXT: v_ashrrev_i32_e32 v13, 31, v1 377; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 378; GFX12-NEXT: v_mad_co_u64_u32 v[9:10], null, v12, v1, v[7:8] 379; GFX12-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 380; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 381; GFX12-NEXT: v_mad_co_u64_u32 v[7:8], null, v0, v13, v[9:10] 382; GFX12-NEXT: v_mov_b32_e32 v10, v8 383; GFX12-NEXT: v_mad_co_i64_i32 v[8:9], null, v1, v12, 0 384; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 385; GFX12-NEXT: v_add_co_u32 v10, s0, v11, v10 386; GFX12-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 387; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 388; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v13, v0, v[8:9] 389; GFX12-NEXT: v_mad_co_u64_u32 v[8:9], null, v12, v13, v[10:11] 390; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 391; GFX12-NEXT: v_add_co_u32 v8, vcc_lo, v8, v0 392; GFX12-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo 393; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 394; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo 395; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 396; GFX12-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo 397; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo 398; GFX12-NEXT: s_setpc_b64 s[30:31] 399 %sext0 = sext i32 %arg0 to i128 400 %sext1 = sext i32 %arg1 to i128 401 %mul = mul i128 %sext0, %sext1 402 %mad = add i128 %mul, %arg2 403 ret i128 %mad 404} 405 406define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 { 407; CI-LABEL: mad_i64_i32_sextops_i32_i63: 408; CI: ; %bb.0: 409; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 411; CI-NEXT: s_setpc_b64 s[30:31] 412; 413; SI-LABEL: mad_i64_i32_sextops_i32_i63: 414; SI: ; %bb.0: 415; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; SI-NEXT: v_mul_lo_u32 v4, v0, v1 417; SI-NEXT: v_mul_hi_i32 v1, v0, v1 418; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v2 419; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 420; SI-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX9-LABEL: mad_i64_i32_sextops_i32_i63: 423; GFX9: ; %bb.0: 424; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 426; GFX9-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX1100-LABEL: mad_i64_i32_sextops_i32_i63: 429; GFX1100: ; %bb.0: 430; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 432; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 433; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] 434; GFX1100-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX1150-LABEL: mad_i64_i32_sextops_i32_i63: 437; GFX1150: ; %bb.0: 438; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] 440; GFX1150-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX12-LABEL: mad_i64_i32_sextops_i32_i63: 443; GFX12: ; %bb.0: 444; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 445; GFX12-NEXT: s_wait_expcnt 0x0 446; GFX12-NEXT: s_wait_samplecnt 0x0 447; GFX12-NEXT: s_wait_bvhcnt 0x0 448; GFX12-NEXT: s_wait_kmcnt 0x0 449; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] 450; GFX12-NEXT: s_setpc_b64 s[30:31] 451 %sext0 = sext i32 %arg0 to i63 452 %sext1 = sext i32 %arg1 to i63 453 %mul = mul i63 %sext0, %sext1 454 %mad = add i63 %mul, %arg2 455 ret i63 %mad 456} 457 458define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 { 459; CI-LABEL: mad_i64_i32_sextops_i31_i63: 460; CI: ; %bb.0: 461; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; CI-NEXT: v_bfe_i32 v1, v1, 0, 31 463; CI-NEXT: v_bfe_i32 v0, v0, 0, 31 464; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 465; CI-NEXT: s_setpc_b64 s[30:31] 466; 467; SI-LABEL: mad_i64_i32_sextops_i31_i63: 468; SI: ; %bb.0: 469; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; SI-NEXT: v_lshlrev_b32_e32 v4, 1, v0 471; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1 472; SI-NEXT: v_ashr_i64 v[4:5], v[3:4], 33 473; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 33 474; SI-NEXT: v_mul_lo_u32 v1, v4, v0 475; SI-NEXT: v_mul_hi_i32 v4, v4, v0 476; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v2 477; SI-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc 478; SI-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX9-LABEL: mad_i64_i32_sextops_i31_i63: 481; GFX9: ; %bb.0: 482; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 31 484; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 31 485; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 486; GFX9-NEXT: s_setpc_b64 s[30:31] 487; 488; GFX1100-LABEL: mad_i64_i32_sextops_i31_i63: 489; GFX1100: ; %bb.0: 490; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX1100-NEXT: v_bfe_i32 v4, v1, 0, 31 492; GFX1100-NEXT: v_bfe_i32 v5, v0, 0, 31 493; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 494; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] 495; GFX1100-NEXT: s_setpc_b64 s[30:31] 496; 497; GFX1150-LABEL: mad_i64_i32_sextops_i31_i63: 498; GFX1150: ; %bb.0: 499; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; GFX1150-NEXT: v_bfe_i32 v1, v1, 0, 31 501; GFX1150-NEXT: v_bfe_i32 v0, v0, 0, 31 502; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 503; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] 504; GFX1150-NEXT: s_setpc_b64 s[30:31] 505; 506; GFX12-LABEL: mad_i64_i32_sextops_i31_i63: 507; GFX12: ; %bb.0: 508; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 509; GFX12-NEXT: s_wait_expcnt 0x0 510; GFX12-NEXT: s_wait_samplecnt 0x0 511; GFX12-NEXT: s_wait_bvhcnt 0x0 512; GFX12-NEXT: s_wait_kmcnt 0x0 513; GFX12-NEXT: v_bfe_i32 v1, v1, 0, 31 514; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 31 515; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 516; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] 517; GFX12-NEXT: s_setpc_b64 s[30:31] 518 %sext0 = sext i31 %arg0 to i63 519 %sext1 = sext i31 %arg1 to i63 520 %mul = mul i63 %sext0, %sext1 521 %mad = add i63 %mul, %arg2 522 ret i63 %mad 523} 524 525define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 526; CI-LABEL: mad_i64_i32_extops_i32_i64: 527; CI: ; %bb.0: 528; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; CI-NEXT: v_ashrrev_i32_e32 v4, 31, v0 530; CI-NEXT: v_mul_lo_u32 v4, v4, v1 531; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 532; CI-NEXT: v_add_i32_e32 v1, vcc, v4, v1 533; CI-NEXT: s_setpc_b64 s[30:31] 534; 535; SI-LABEL: mad_i64_i32_extops_i32_i64: 536; SI: ; %bb.0: 537; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 538; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v0 539; SI-NEXT: v_mul_hi_u32 v5, v0, v1 540; SI-NEXT: v_mul_lo_u32 v4, v4, v1 541; SI-NEXT: v_mul_lo_u32 v0, v0, v1 542; SI-NEXT: v_add_i32_e32 v1, vcc, v5, v4 543; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 544; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 545; SI-NEXT: s_setpc_b64 s[30:31] 546; 547; GFX9-LABEL: mad_i64_i32_extops_i32_i64: 548; GFX9: ; %bb.0: 549; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX9-NEXT: v_mov_b32_e32 v4, v1 551; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0 552; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[2:3] 553; GFX9-NEXT: v_mov_b32_e32 v2, v1 554; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v4, v[2:3] 555; GFX9-NEXT: v_mov_b32_e32 v1, v2 556; GFX9-NEXT: s_setpc_b64 s[30:31] 557; 558; GFX1100-LABEL: mad_i64_i32_extops_i32_i64: 559; GFX1100: ; %bb.0: 560; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 562; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 563; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] 564; GFX1100-NEXT: v_ashrrev_i32_e32 v5, 31, v5 565; GFX1100-NEXT: v_mov_b32_e32 v3, v1 566; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 567; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4] 568; GFX1100-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX1150-LABEL: mad_i64_i32_extops_i32_i64: 571; GFX1150: ; %bb.0: 572; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX1150-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 574; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 575; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] 576; GFX1150-NEXT: v_ashrrev_i32_e32 v2, 31, v5 577; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v2, v4, v[1:2] 578; GFX1150-NEXT: s_setpc_b64 s[30:31] 579; 580; GFX12-LABEL: mad_i64_i32_extops_i32_i64: 581; GFX12: ; %bb.0: 582; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 583; GFX12-NEXT: s_wait_expcnt 0x0 584; GFX12-NEXT: s_wait_samplecnt 0x0 585; GFX12-NEXT: s_wait_bvhcnt 0x0 586; GFX12-NEXT: s_wait_kmcnt 0x0 587; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 588; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 589; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v4, v[2:3] 590; GFX12-NEXT: v_ashrrev_i32_e32 v2, 31, v5 591; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v2, v4, v[1:2] 592; GFX12-NEXT: s_setpc_b64 s[30:31] 593 %ext0 = sext i32 %arg0 to i64 594 %ext1 = zext i32 %arg1 to i64 595 %mul = mul i64 %ext0, %ext1 596 %mad = add i64 %mul, %arg2 597 ret i64 %mad 598} 599 600define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { 601; CI-LABEL: mad_u64_u32_bitops: 602; CI: ; %bb.0: 603; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5] 605; CI-NEXT: s_setpc_b64 s[30:31] 606; 607; SI-LABEL: mad_u64_u32_bitops: 608; SI: ; %bb.0: 609; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; SI-NEXT: v_mul_lo_u32 v1, v0, v2 611; SI-NEXT: v_mul_hi_u32 v2, v0, v2 612; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v4 613; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v5, vcc 614; SI-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX9-LABEL: mad_u64_u32_bitops: 617; GFX9: ; %bb.0: 618; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5] 620; GFX9-NEXT: s_setpc_b64 s[30:31] 621; 622; GFX1100-LABEL: mad_u64_u32_bitops: 623; GFX1100: ; %bb.0: 624; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 625; GFX1100-NEXT: v_mov_b32_e32 v3, v0 626; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 627; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5] 628; GFX1100-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX1150-LABEL: mad_u64_u32_bitops: 631; GFX1150: ; %bb.0: 632; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5] 634; GFX1150-NEXT: s_setpc_b64 s[30:31] 635; 636; GFX12-LABEL: mad_u64_u32_bitops: 637; GFX12: ; %bb.0: 638; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 639; GFX12-NEXT: s_wait_expcnt 0x0 640; GFX12-NEXT: s_wait_samplecnt 0x0 641; GFX12-NEXT: s_wait_bvhcnt 0x0 642; GFX12-NEXT: s_wait_kmcnt 0x0 643; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5] 644; GFX12-NEXT: s_setpc_b64 s[30:31] 645 %trunc.lhs = and i64 %arg0, 4294967295 646 %trunc.rhs = and i64 %arg1, 4294967295 647 %mul = mul i64 %trunc.lhs, %trunc.rhs 648 %add = add i64 %mul, %arg2 649 ret i64 %add 650} 651 652define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 { 653; CI-LABEL: mad_u64_u32_bitops_lhs_mask_small: 654; CI: ; %bb.0: 655; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; CI-NEXT: v_and_b32_e32 v3, 1, v1 657; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5] 658; CI-NEXT: v_mul_lo_u32 v2, v3, v2 659; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1 660; CI-NEXT: s_setpc_b64 s[30:31] 661; 662; SI-LABEL: mad_u64_u32_bitops_lhs_mask_small: 663; SI: ; %bb.0: 664; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; SI-NEXT: v_and_b32_e32 v1, 1, v1 666; SI-NEXT: v_mul_hi_u32 v3, v0, v2 667; SI-NEXT: v_mul_lo_u32 v1, v1, v2 668; SI-NEXT: v_mul_lo_u32 v0, v0, v2 669; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1 670; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4 671; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc 672; SI-NEXT: s_setpc_b64 s[30:31] 673; 674; GFX9-LABEL: mad_u64_u32_bitops_lhs_mask_small: 675; GFX9: ; %bb.0: 676; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 677; GFX9-NEXT: v_and_b32_e32 v3, 1, v1 678; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5] 679; GFX9-NEXT: v_mov_b32_e32 v4, v1 680; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v2, v[4:5] 681; GFX9-NEXT: v_mov_b32_e32 v1, v2 682; GFX9-NEXT: s_setpc_b64 s[30:31] 683; 684; GFX1100-LABEL: mad_u64_u32_bitops_lhs_mask_small: 685; GFX1100: ; %bb.0: 686; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 687; GFX1100-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0 688; GFX1100-NEXT: v_mov_b32_e32 v6, v1 689; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 690; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5] 691; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_and_b32 v5, 1, v6 692; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 693; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5] 694; GFX1100-NEXT: s_setpc_b64 s[30:31] 695; 696; GFX1150-LABEL: mad_u64_u32_bitops_lhs_mask_small: 697; GFX1150: ; %bb.0: 698; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 699; GFX1150-NEXT: v_mov_b32_e32 v3, v1 700; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5] 701; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 702; GFX1150-NEXT: v_and_b32_e32 v3, 1, v3 703; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v3, v2, v[1:2] 704; GFX1150-NEXT: s_setpc_b64 s[30:31] 705; 706; GFX12-LABEL: mad_u64_u32_bitops_lhs_mask_small: 707; GFX12: ; %bb.0: 708; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 709; GFX12-NEXT: s_wait_expcnt 0x0 710; GFX12-NEXT: s_wait_samplecnt 0x0 711; GFX12-NEXT: s_wait_bvhcnt 0x0 712; GFX12-NEXT: s_wait_kmcnt 0x0 713; GFX12-NEXT: v_mov_b32_e32 v3, v1 714; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5] 715; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 716; GFX12-NEXT: v_and_b32_e32 v3, 1, v3 717; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2] 718; GFX12-NEXT: s_setpc_b64 s[30:31] 719 %trunc.lhs = and i64 %arg0, 8589934591 720 %trunc.rhs = and i64 %arg1, 4294967295 721 %mul = mul i64 %trunc.lhs, %trunc.rhs 722 %add = add i64 %mul, %arg2 723 ret i64 %add 724} 725 726define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 { 727; CI-LABEL: mad_u64_u32_bitops_rhs_mask_small: 728; CI: ; %bb.0: 729; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; CI-NEXT: v_mov_b32_e32 v6, v0 731; CI-NEXT: v_and_b32_e32 v3, 1, v3 732; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, v[4:5] 733; CI-NEXT: v_mul_lo_u32 v2, v6, v3 734; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1 735; CI-NEXT: s_setpc_b64 s[30:31] 736; 737; SI-LABEL: mad_u64_u32_bitops_rhs_mask_small: 738; SI: ; %bb.0: 739; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; SI-NEXT: v_and_b32_e32 v1, 1, v3 741; SI-NEXT: v_mul_hi_u32 v3, v0, v2 742; SI-NEXT: v_mul_lo_u32 v1, v0, v1 743; SI-NEXT: v_mul_lo_u32 v0, v0, v2 744; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1 745; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4 746; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc 747; SI-NEXT: s_setpc_b64 s[30:31] 748; 749; GFX9-LABEL: mad_u64_u32_bitops_rhs_mask_small: 750; GFX9: ; %bb.0: 751; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX9-NEXT: v_mov_b32_e32 v6, v0 753; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, v[4:5] 754; GFX9-NEXT: v_and_b32_e32 v3, 1, v3 755; GFX9-NEXT: v_mov_b32_e32 v2, v1 756; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[2:3] 757; GFX9-NEXT: v_mov_b32_e32 v1, v2 758; GFX9-NEXT: s_setpc_b64 s[30:31] 759; 760; GFX1100-LABEL: mad_u64_u32_bitops_rhs_mask_small: 761; GFX1100: ; %bb.0: 762; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 763; GFX1100-NEXT: v_mov_b32_e32 v6, v0 764; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 765; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] 766; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v4, 1, v3 767; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 768; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4] 769; GFX1100-NEXT: s_setpc_b64 s[30:31] 770; 771; GFX1150-LABEL: mad_u64_u32_bitops_rhs_mask_small: 772; GFX1150: ; %bb.0: 773; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 774; GFX1150-NEXT: v_mov_b32_e32 v6, v0 775; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 776; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] 777; GFX1150-NEXT: v_and_b32_e32 v2, 1, v3 778; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[1:2] 779; GFX1150-NEXT: s_setpc_b64 s[30:31] 780; 781; GFX12-LABEL: mad_u64_u32_bitops_rhs_mask_small: 782; GFX12: ; %bb.0: 783; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 784; GFX12-NEXT: s_wait_expcnt 0x0 785; GFX12-NEXT: s_wait_samplecnt 0x0 786; GFX12-NEXT: s_wait_bvhcnt 0x0 787; GFX12-NEXT: s_wait_kmcnt 0x0 788; GFX12-NEXT: v_mov_b32_e32 v6, v0 789; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 790; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v6, v2, v[4:5] 791; GFX12-NEXT: v_and_b32_e32 v2, 1, v3 792; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v2, v[1:2] 793; GFX12-NEXT: s_setpc_b64 s[30:31] 794 %trunc.lhs = and i64 %arg0, 4294967295 795 %trunc.rhs = and i64 %arg1, 8589934591 796 %mul = mul i64 %trunc.lhs, %trunc.rhs 797 %add = add i64 %mul, %arg2 798 ret i64 %add 799} 800 801define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { 802; CI-LABEL: mad_i64_i32_bitops: 803; CI: ; %bb.0: 804; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v2, v[4:5] 806; CI-NEXT: s_setpc_b64 s[30:31] 807; 808; SI-LABEL: mad_i64_i32_bitops: 809; SI: ; %bb.0: 810; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; SI-NEXT: v_mul_lo_u32 v1, v0, v2 812; SI-NEXT: v_mul_hi_i32 v2, v0, v2 813; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v4 814; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v5, vcc 815; SI-NEXT: s_setpc_b64 s[30:31] 816; 817; GFX9-LABEL: mad_i64_i32_bitops: 818; GFX9: ; %bb.0: 819; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 820; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v2, v[4:5] 821; GFX9-NEXT: s_setpc_b64 s[30:31] 822; 823; GFX1100-LABEL: mad_i64_i32_bitops: 824; GFX1100: ; %bb.0: 825; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 826; GFX1100-NEXT: v_mov_b32_e32 v3, v0 827; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 828; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5] 829; GFX1100-NEXT: s_setpc_b64 s[30:31] 830; 831; GFX1150-LABEL: mad_i64_i32_bitops: 832; GFX1150: ; %bb.0: 833; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v2, v[4:5] 835; GFX1150-NEXT: s_setpc_b64 s[30:31] 836; 837; GFX12-LABEL: mad_i64_i32_bitops: 838; GFX12: ; %bb.0: 839; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 840; GFX12-NEXT: s_wait_expcnt 0x0 841; GFX12-NEXT: s_wait_samplecnt 0x0 842; GFX12-NEXT: s_wait_bvhcnt 0x0 843; GFX12-NEXT: s_wait_kmcnt 0x0 844; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v2, v[4:5] 845; GFX12-NEXT: s_setpc_b64 s[30:31] 846 %shl.lhs = shl i64 %arg0, 32 847 %trunc.lhs = ashr i64 %shl.lhs, 32 848 %shl.rhs = shl i64 %arg1, 32 849 %trunc.rhs = ashr i64 %shl.rhs, 32 850 %mul = mul i64 %trunc.lhs, %trunc.rhs 851 %add = add i64 %mul, %arg2 852 ret i64 %add 853} 854 855; Example from bug report 856define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 { 857; CI-LABEL: mad_i64_i32_unpack_i64ops: 858; CI: ; %bb.0: 859; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[0:1] 861; CI-NEXT: s_setpc_b64 s[30:31] 862; 863; SI-LABEL: mad_i64_i32_unpack_i64ops: 864; SI: ; %bb.0: 865; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; SI-NEXT: v_mul_lo_u32 v2, v1, v0 867; SI-NEXT: v_mul_hi_u32 v3, v1, v0 868; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 869; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc 870; SI-NEXT: s_setpc_b64 s[30:31] 871; 872; GFX9-LABEL: mad_i64_i32_unpack_i64ops: 873; GFX9: ; %bb.0: 874; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 875; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[0:1] 876; GFX9-NEXT: s_setpc_b64 s[30:31] 877; 878; GFX1100-LABEL: mad_i64_i32_unpack_i64ops: 879; GFX1100: ; %bb.0: 880; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 881; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] 882; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 883; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 884; GFX1100-NEXT: s_setpc_b64 s[30:31] 885; 886; GFX1150-LABEL: mad_i64_i32_unpack_i64ops: 887; GFX1150: ; %bb.0: 888; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 889; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[0:1] 890; GFX1150-NEXT: s_setpc_b64 s[30:31] 891; 892; GFX12-LABEL: mad_i64_i32_unpack_i64ops: 893; GFX12: ; %bb.0: 894; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 895; GFX12-NEXT: s_wait_expcnt 0x0 896; GFX12-NEXT: s_wait_samplecnt 0x0 897; GFX12-NEXT: s_wait_bvhcnt 0x0 898; GFX12-NEXT: s_wait_kmcnt 0x0 899; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[0:1] 900; GFX12-NEXT: s_setpc_b64 s[30:31] 901 %tmp4 = lshr i64 %arg0, 32 902 %tmp5 = and i64 %arg0, 4294967295 903 %mul = mul nuw i64 %tmp4, %tmp5 904 %mad = add i64 %mul, %arg0 905 ret i64 %mad 906} 907 908define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0, i32 %arg1, i64 %arg2) #0 { 909; CI-LABEL: mad_i64_i32_uniform: 910; CI: ; %bb.0: 911; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 912; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 913; CI-NEXT: s_mov_b32 s7, 0xf000 914; CI-NEXT: s_mov_b32 s6, -1 915; CI-NEXT: s_waitcnt lgkmcnt(0) 916; CI-NEXT: v_mov_b32_e32 v2, s3 917; CI-NEXT: v_mov_b32_e32 v0, s4 918; CI-NEXT: v_mov_b32_e32 v1, s5 919; CI-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1] 920; CI-NEXT: s_mov_b32 s4, s0 921; CI-NEXT: s_mov_b32 s5, s1 922; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 923; CI-NEXT: s_endpgm 924; 925; SI-LABEL: mad_i64_i32_uniform: 926; SI: ; %bb.0: 927; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 928; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd 929; SI-NEXT: s_mov_b32 s7, 0xf000 930; SI-NEXT: s_mov_b32 s6, -1 931; SI-NEXT: s_waitcnt lgkmcnt(0) 932; SI-NEXT: v_mov_b32_e32 v0, s3 933; SI-NEXT: v_mul_hi_u32 v1, s2, v0 934; SI-NEXT: s_mov_b32 s4, s0 935; SI-NEXT: s_mul_i32 s0, s2, s3 936; SI-NEXT: v_mov_b32_e32 v0, s0 937; SI-NEXT: v_mov_b32_e32 v2, s9 938; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0 939; SI-NEXT: s_mov_b32 s5, s1 940; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc 941; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 942; SI-NEXT: s_endpgm 943; 944; GFX9-LABEL: mad_i64_i32_uniform: 945; GFX9: ; %bb.0: 946; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 947; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 948; GFX9-NEXT: v_mov_b32_e32 v2, 0 949; GFX9-NEXT: s_waitcnt lgkmcnt(0) 950; GFX9-NEXT: s_mul_hi_u32 s4, s2, s3 951; GFX9-NEXT: s_mul_i32 s2, s2, s3 952; GFX9-NEXT: s_add_u32 s2, s2, s6 953; GFX9-NEXT: s_addc_u32 s3, s4, s7 954; GFX9-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 955; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 956; GFX9-NEXT: s_endpgm 957; 958; GFX11-LABEL: mad_i64_i32_uniform: 959; GFX11: ; %bb.0: 960; GFX11-NEXT: s_clause 0x1 961; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 962; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 963; GFX11-NEXT: s_waitcnt lgkmcnt(0) 964; GFX11-NEXT: s_mul_i32 s6, s2, s3 965; GFX11-NEXT: s_mul_hi_u32 s3, s2, s3 966; GFX11-NEXT: s_add_u32 s2, s6, s4 967; GFX11-NEXT: s_addc_u32 s3, s3, s5 968; GFX11-NEXT: v_mov_b32_e32 v0, s2 969; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 970; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 971; GFX11-NEXT: s_endpgm 972; 973; GFX12-LABEL: mad_i64_i32_uniform: 974; GFX12: ; %bb.0: 975; GFX12-NEXT: s_clause 0x1 976; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 977; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 978; GFX12-NEXT: s_mov_b32 s7, 0 979; GFX12-NEXT: s_wait_kmcnt 0x0 980; GFX12-NEXT: s_mov_b32 s6, s2 981; GFX12-NEXT: s_mov_b32 s2, s3 982; GFX12-NEXT: s_mov_b32 s3, s7 983; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 984; GFX12-NEXT: s_mul_u64 s[2:3], s[6:7], s[2:3] 985; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5] 986; GFX12-NEXT: v_mov_b32_e32 v2, 0 987; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 988; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 989; GFX12-NEXT: s_endpgm 990 %ext0 = zext i32 %arg0 to i64 991 %ext1 = zext i32 %arg1 to i64 992 %mul = mul i64 %ext0, %ext1 993 %mad = add i64 %mul, %arg2 994 store i64 %mad, ptr addrspace(1) %out 995 ret void 996} 997 998define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 { 999; CI-LABEL: mad_i64_i32_twice: 1000; CI: ; %bb.0: 1001; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1002; CI-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3] 1003; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[4:5] 1004; CI-NEXT: v_xor_b32_e32 v1, v3, v1 1005; CI-NEXT: v_xor_b32_e32 v0, v2, v0 1006; CI-NEXT: s_setpc_b64 s[30:31] 1007; 1008; SI-LABEL: mad_i64_i32_twice: 1009; SI: ; %bb.0: 1010; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; SI-NEXT: v_mul_lo_u32 v6, v0, v1 1012; SI-NEXT: v_mul_hi_i32 v0, v0, v1 1013; SI-NEXT: v_add_i32_e32 v2, vcc, v6, v2 1014; SI-NEXT: v_addc_u32_e32 v1, vcc, v0, v3, vcc 1015; SI-NEXT: v_add_i32_e32 v3, vcc, v6, v4 1016; SI-NEXT: v_addc_u32_e32 v0, vcc, v0, v5, vcc 1017; SI-NEXT: v_xor_b32_e32 v1, v1, v0 1018; SI-NEXT: v_xor_b32_e32 v0, v2, v3 1019; SI-NEXT: s_setpc_b64 s[30:31] 1020; 1021; GFX9-LABEL: mad_i64_i32_twice: 1022; GFX9: ; %bb.0: 1023; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX9-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3] 1025; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[4:5] 1026; GFX9-NEXT: v_xor_b32_e32 v1, v3, v1 1027; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0 1028; GFX9-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX1100-LABEL: mad_i64_i32_twice: 1031; GFX1100: ; %bb.0: 1032; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX1100-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3] 1034; GFX1100-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5] 1035; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1036; GFX1100-NEXT: v_xor_b32_e32 v0, v6, v2 1037; GFX1100-NEXT: v_xor_b32_e32 v1, v7, v3 1038; GFX1100-NEXT: s_setpc_b64 s[30:31] 1039; 1040; GFX1150-LABEL: mad_i64_i32_twice: 1041; GFX1150: ; %bb.0: 1042; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1043; GFX1150-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[2:3] 1044; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[4:5] 1045; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1046; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 1047; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 1048; GFX1150-NEXT: s_setpc_b64 s[30:31] 1049; 1050; GFX12-LABEL: mad_i64_i32_twice: 1051; GFX12: ; %bb.0: 1052; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1053; GFX12-NEXT: s_wait_expcnt 0x0 1054; GFX12-NEXT: s_wait_samplecnt 0x0 1055; GFX12-NEXT: s_wait_bvhcnt 0x0 1056; GFX12-NEXT: s_wait_kmcnt 0x0 1057; GFX12-NEXT: v_mad_co_i64_i32 v[2:3], null, v0, v1, v[2:3] 1058; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[4:5] 1059; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1060; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 1061; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 1062; GFX12-NEXT: s_setpc_b64 s[30:31] 1063 %sext0 = sext i32 %arg0 to i64 1064 %sext1 = sext i32 %arg1 to i64 1065 %mul = mul i64 %sext0, %sext1 1066 %mad1 = add i64 %mul, %arg2 1067 %mad2 = add i64 %mul, %arg3 1068 %out = xor i64 %mad1, %mad2 1069 ret i64 %out 1070} 1071 1072define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %arg4) #0 { 1073; CI-LABEL: mad_i64_i32_thrice: 1074; CI: ; %bb.0: 1075; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1076; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, 0 1077; CI-NEXT: v_add_i32_e32 v2, vcc, v0, v2 1078; CI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc 1079; CI-NEXT: v_add_i32_e32 v4, vcc, v0, v4 1080; CI-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc 1081; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v6 1082; CI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc 1083; CI-NEXT: v_xor_b32_e32 v3, v3, v5 1084; CI-NEXT: v_xor_b32_e32 v2, v2, v4 1085; CI-NEXT: v_xor_b32_e32 v1, v3, v1 1086; CI-NEXT: v_xor_b32_e32 v0, v2, v0 1087; CI-NEXT: s_setpc_b64 s[30:31] 1088; 1089; SI-LABEL: mad_i64_i32_thrice: 1090; SI: ; %bb.0: 1091; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; SI-NEXT: v_mul_lo_u32 v8, v0, v1 1093; SI-NEXT: v_mul_hi_i32 v0, v0, v1 1094; SI-NEXT: v_add_i32_e32 v1, vcc, v8, v2 1095; SI-NEXT: v_addc_u32_e32 v2, vcc, v0, v3, vcc 1096; SI-NEXT: v_add_i32_e32 v3, vcc, v8, v4 1097; SI-NEXT: v_addc_u32_e32 v4, vcc, v0, v5, vcc 1098; SI-NEXT: v_add_i32_e32 v5, vcc, v8, v6 1099; SI-NEXT: v_addc_u32_e32 v0, vcc, v0, v7, vcc 1100; SI-NEXT: v_xor_b32_e32 v2, v2, v4 1101; SI-NEXT: v_xor_b32_e32 v3, v1, v3 1102; SI-NEXT: v_xor_b32_e32 v1, v2, v0 1103; SI-NEXT: v_xor_b32_e32 v0, v3, v5 1104; SI-NEXT: s_setpc_b64 s[30:31] 1105; 1106; GFX9-LABEL: mad_i64_i32_thrice: 1107; GFX9: ; %bb.0: 1108; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; GFX9-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v0, v1, v[2:3] 1110; GFX9-NEXT: v_mad_i64_i32 v[4:5], s[4:5], v0, v1, v[4:5] 1111; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[6:7] 1112; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 1113; GFX9-NEXT: v_xor_b32_e32 v2, v2, v4 1114; GFX9-NEXT: v_xor_b32_e32 v1, v3, v1 1115; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0 1116; GFX9-NEXT: s_setpc_b64 s[30:31] 1117; 1118; GFX1100-LABEL: mad_i64_i32_thrice: 1119; GFX1100: ; %bb.0: 1120; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX1100-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0 1122; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1123; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2 1124; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo 1125; GFX1100-NEXT: v_add_co_u32 v2, vcc_lo, v8, v4 1126; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo 1127; GFX1100-NEXT: v_add_co_u32 v4, vcc_lo, v8, v6 1128; GFX1100-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v9, v7, vcc_lo 1129; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1130; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v2 1131; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v3 1132; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1133; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4 1134; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5 1135; GFX1100-NEXT: s_setpc_b64 s[30:31] 1136; 1137; GFX1150-LABEL: mad_i64_i32_thrice: 1138; GFX1150: ; %bb.0: 1139; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1140; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0 1141; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1142; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 1143; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 1144; GFX1150-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 1145; GFX1150-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo 1146; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6 1147; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo 1148; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1149; GFX1150-NEXT: v_xor_b32_e32 v2, v2, v4 1150; GFX1150-NEXT: v_xor_b32_e32 v3, v3, v5 1151; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1152; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 1153; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 1154; GFX1150-NEXT: s_setpc_b64 s[30:31] 1155; 1156; GFX12-LABEL: mad_i64_i32_thrice: 1157; GFX12: ; %bb.0: 1158; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1159; GFX12-NEXT: s_wait_expcnt 0x0 1160; GFX12-NEXT: s_wait_samplecnt 0x0 1161; GFX12-NEXT: s_wait_bvhcnt 0x0 1162; GFX12-NEXT: s_wait_kmcnt 0x0 1163; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, 0 1164; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1165; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 1166; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 1167; GFX12-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 1168; GFX12-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo 1169; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6 1170; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo 1171; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1172; GFX12-NEXT: v_xor_b32_e32 v2, v2, v4 1173; GFX12-NEXT: v_xor_b32_e32 v3, v3, v5 1174; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1175; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 1176; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 1177; GFX12-NEXT: s_setpc_b64 s[30:31] 1178 %sext0 = sext i32 %arg0 to i64 1179 %sext1 = sext i32 %arg1 to i64 1180 %mul = mul i64 %sext0, %sext1 1181 %mad1 = add i64 %mul, %arg2 1182 %mad2 = add i64 %mul, %arg3 1183 %mad3 = add i64 %mul, %arg4 1184 %out.p = xor i64 %mad1, %mad2 1185 %out = xor i64 %out.p, %mad3 1186 ret i64 %out 1187} 1188 1189define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 { 1190; CI-LABEL: mad_i64_i32_secondary_use: 1191; CI: ; %bb.0: 1192; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1193; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, 0 1194; CI-NEXT: v_add_i32_e32 v2, vcc, v0, v2 1195; CI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc 1196; CI-NEXT: v_xor_b32_e32 v1, v3, v1 1197; CI-NEXT: v_xor_b32_e32 v0, v2, v0 1198; CI-NEXT: s_setpc_b64 s[30:31] 1199; 1200; SI-LABEL: mad_i64_i32_secondary_use: 1201; SI: ; %bb.0: 1202; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; SI-NEXT: v_mul_lo_u32 v4, v0, v1 1204; SI-NEXT: v_mul_hi_i32 v0, v0, v1 1205; SI-NEXT: v_add_i32_e32 v2, vcc, v4, v2 1206; SI-NEXT: v_addc_u32_e32 v1, vcc, v0, v3, vcc 1207; SI-NEXT: v_xor_b32_e32 v1, v1, v0 1208; SI-NEXT: v_xor_b32_e32 v0, v2, v4 1209; SI-NEXT: s_setpc_b64 s[30:31] 1210; 1211; GFX9-LABEL: mad_i64_i32_secondary_use: 1212; GFX9: ; %bb.0: 1213; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1214; GFX9-NEXT: v_mad_i64_i32 v[4:5], s[4:5], v0, v1, 0 1215; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] 1216; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 1217; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 1218; GFX9-NEXT: s_setpc_b64 s[30:31] 1219; 1220; GFX1100-LABEL: mad_i64_i32_secondary_use: 1221; GFX1100: ; %bb.0: 1222; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1223; GFX1100-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0 1224; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1225; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 1226; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo 1227; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1228; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4 1229; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5 1230; GFX1100-NEXT: s_setpc_b64 s[30:31] 1231; 1232; GFX1150-LABEL: mad_i64_i32_secondary_use: 1233; GFX1150: ; %bb.0: 1234; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1235; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0 1236; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1237; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 1238; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 1239; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1240; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 1241; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 1242; GFX1150-NEXT: s_setpc_b64 s[30:31] 1243; 1244; GFX12-LABEL: mad_i64_i32_secondary_use: 1245; GFX12: ; %bb.0: 1246; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1247; GFX12-NEXT: s_wait_expcnt 0x0 1248; GFX12-NEXT: s_wait_samplecnt 0x0 1249; GFX12-NEXT: s_wait_bvhcnt 0x0 1250; GFX12-NEXT: s_wait_kmcnt 0x0 1251; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, 0 1252; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1253; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 1254; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 1255; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1256; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 1257; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 1258; GFX12-NEXT: s_setpc_b64 s[30:31] 1259 %sext0 = sext i32 %arg0 to i64 1260 %sext1 = sext i32 %arg1 to i64 1261 %mul = mul i64 %sext0, %sext1 1262 %mad = add i64 %mul, %arg2 1263 %out = xor i64 %mad, %mul 1264 ret i64 %out 1265} 1266 1267define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 { 1268; CI-LABEL: mad_i48_i48: 1269; CI: ; %bb.0: 1270; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; CI-NEXT: v_mov_b32_e32 v6, v1 1272; CI-NEXT: v_mov_b32_e32 v7, v0 1273; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v2, v[4:5] 1274; CI-NEXT: v_mul_lo_u32 v2, v6, v2 1275; CI-NEXT: v_mul_lo_u32 v3, v7, v3 1276; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1 1277; CI-NEXT: v_add_i32_e32 v1, vcc, v3, v1 1278; CI-NEXT: s_setpc_b64 s[30:31] 1279; 1280; SI-LABEL: mad_i48_i48: 1281; SI: ; %bb.0: 1282; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1283; SI-NEXT: v_mul_lo_u32 v3, v0, v3 1284; SI-NEXT: v_mul_hi_u32 v6, v0, v2 1285; SI-NEXT: v_mul_lo_u32 v1, v1, v2 1286; SI-NEXT: v_mul_lo_u32 v0, v0, v2 1287; SI-NEXT: v_add_i32_e32 v3, vcc, v6, v3 1288; SI-NEXT: v_add_i32_e32 v1, vcc, v3, v1 1289; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4 1290; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc 1291; SI-NEXT: s_setpc_b64 s[30:31] 1292; 1293; GFX9-LABEL: mad_i48_i48: 1294; GFX9: ; %bb.0: 1295; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1296; GFX9-NEXT: v_mov_b32_e32 v6, v1 1297; GFX9-NEXT: v_mov_b32_e32 v7, v0 1298; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v2, v[4:5] 1299; GFX9-NEXT: v_mul_lo_u32 v3, v7, v3 1300; GFX9-NEXT: v_mul_lo_u32 v2, v6, v2 1301; GFX9-NEXT: v_add3_u32 v1, v2, v1, v3 1302; GFX9-NEXT: s_setpc_b64 s[30:31] 1303; 1304; GFX11-LABEL: mad_i48_i48: 1305; GFX11: ; %bb.0: 1306; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1307; GFX11-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0 1308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1309; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5] 1310; GFX11-NEXT: v_mul_lo_u32 v3, v7, v3 1311; GFX11-NEXT: v_mul_lo_u32 v2, v6, v2 1312; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1313; GFX11-NEXT: v_add3_u32 v1, v2, v1, v3 1314; GFX11-NEXT: s_setpc_b64 s[30:31] 1315; 1316; GFX12-LABEL: mad_i48_i48: 1317; GFX12: ; %bb.0: 1318; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1319; GFX12-NEXT: s_wait_expcnt 0x0 1320; GFX12-NEXT: s_wait_samplecnt 0x0 1321; GFX12-NEXT: s_wait_bvhcnt 0x0 1322; GFX12-NEXT: s_wait_kmcnt 0x0 1323; GFX12-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0 1324; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1325; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v7, v2, v[4:5] 1326; GFX12-NEXT: v_mul_lo_u32 v3, v7, v3 1327; GFX12-NEXT: v_mul_lo_u32 v2, v6, v2 1328; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1329; GFX12-NEXT: v_add3_u32 v1, v2, v1, v3 1330; GFX12-NEXT: s_setpc_b64 s[30:31] 1331 %m = mul i48 %arg0, %arg1 1332 %a = add i48 %m, %arg2 1333 ret i48 %a 1334} 1335 1336define i64 @lshr_mad_i64_1(i64 %arg0, i64 %arg1) #0 { 1337; CI-LABEL: lshr_mad_i64_1: 1338; CI: ; %bb.0: 1339; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1340; CI-NEXT: v_mov_b32_e32 v2, v1 1341; CI-NEXT: v_mov_b32_e32 v1, 0 1342; CI-NEXT: s_movk_i32 s4, 0xfc19 1343; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1344; CI-NEXT: s_setpc_b64 s[30:31] 1345; 1346; SI-LABEL: lshr_mad_i64_1: 1347; SI: ; %bb.0: 1348; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; SI-NEXT: s_movk_i32 s4, 0xfc19 1350; SI-NEXT: v_mul_hi_u32 v2, v1, s4 1351; SI-NEXT: v_mul_lo_u32 v3, v1, s4 1352; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1 1353; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1354; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1355; SI-NEXT: s_setpc_b64 s[30:31] 1356; 1357; GFX9-LABEL: lshr_mad_i64_1: 1358; GFX9: ; %bb.0: 1359; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1360; GFX9-NEXT: v_mov_b32_e32 v2, v1 1361; GFX9-NEXT: v_mov_b32_e32 v1, 0 1362; GFX9-NEXT: s_movk_i32 s4, 0xfc19 1363; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1364; GFX9-NEXT: s_setpc_b64 s[30:31] 1365; 1366; GFX1100-LABEL: lshr_mad_i64_1: 1367; GFX1100: ; %bb.0: 1368; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1369; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0 1370; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1371; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc19, v4, v[0:1] 1372; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 1373; GFX1100-NEXT: s_setpc_b64 s[30:31] 1374; 1375; GFX1150-LABEL: lshr_mad_i64_1: 1376; GFX1150: ; %bb.0: 1377; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1378; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1379; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 1380; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc19, v2, v[0:1] 1381; GFX1150-NEXT: s_setpc_b64 s[30:31] 1382; 1383; GFX12-LABEL: lshr_mad_i64_1: 1384; GFX12: ; %bb.0: 1385; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1386; GFX12-NEXT: s_wait_expcnt 0x0 1387; GFX12-NEXT: s_wait_samplecnt 0x0 1388; GFX12-NEXT: s_wait_bvhcnt 0x0 1389; GFX12-NEXT: s_wait_kmcnt 0x0 1390; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1391; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1392; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc19, v2, v[0:1] 1393; GFX12-NEXT: s_setpc_b64 s[30:31] 1394 %lsh = lshr i64 %arg0, 32 1395 %mul = mul i64 %lsh, s0xfffffffffffffc19 1396 %mad = add i64 %mul, %arg0 1397 1398 ret i64 %mad 1399} 1400 1401define i64 @lshr_mad_i64_2(i64 %arg0) #0 { 1402; CI-LABEL: lshr_mad_i64_2: 1403; CI: ; %bb.0: 1404; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1405; CI-NEXT: v_mov_b32_e32 v2, v1 1406; CI-NEXT: v_mov_b32_e32 v1, 0 1407; CI-NEXT: s_movk_i32 s4, 0xd1 1408; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1409; CI-NEXT: s_setpc_b64 s[30:31] 1410; 1411; SI-LABEL: lshr_mad_i64_2: 1412; SI: ; %bb.0: 1413; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; SI-NEXT: s_movk_i32 s4, 0xd1 1415; SI-NEXT: v_mul_hi_u32 v2, v1, s4 1416; SI-NEXT: v_mul_lo_u32 v3, v1, s4 1417; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1 1418; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1419; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1420; SI-NEXT: s_setpc_b64 s[30:31] 1421; 1422; GFX9-LABEL: lshr_mad_i64_2: 1423; GFX9: ; %bb.0: 1424; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1425; GFX9-NEXT: v_mov_b32_e32 v2, v1 1426; GFX9-NEXT: v_mov_b32_e32 v1, 0 1427; GFX9-NEXT: s_movk_i32 s4, 0xd1 1428; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1429; GFX9-NEXT: s_setpc_b64 s[30:31] 1430; 1431; GFX1100-LABEL: lshr_mad_i64_2: 1432; GFX1100: ; %bb.0: 1433; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1434; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0 1435; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1436; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xd1, v4, v[0:1] 1437; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 1438; GFX1100-NEXT: s_setpc_b64 s[30:31] 1439; 1440; GFX1150-LABEL: lshr_mad_i64_2: 1441; GFX1150: ; %bb.0: 1442; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1444; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 1445; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xd1, v2, v[0:1] 1446; GFX1150-NEXT: s_setpc_b64 s[30:31] 1447; 1448; GFX12-LABEL: lshr_mad_i64_2: 1449; GFX12: ; %bb.0: 1450; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1451; GFX12-NEXT: s_wait_expcnt 0x0 1452; GFX12-NEXT: s_wait_samplecnt 0x0 1453; GFX12-NEXT: s_wait_bvhcnt 0x0 1454; GFX12-NEXT: s_wait_kmcnt 0x0 1455; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1456; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1457; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xd1, v2, v[0:1] 1458; GFX12-NEXT: s_setpc_b64 s[30:31] 1459 %lsh = lshr i64 %arg0, 32 1460 %mul = mul i64 %lsh, s0xffffffff000000d1 1461 %mad = add i64 %mul, %arg0 1462 1463 ret i64 %mad 1464} 1465 1466define i64 @lshr_mad_i64_3(i64 %arg0) #0 { 1467; CI-LABEL: lshr_mad_i64_3: 1468; CI: ; %bb.0: 1469; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1470; CI-NEXT: v_mov_b32_e32 v2, v1 1471; CI-NEXT: v_mov_b32_e32 v1, 0 1472; CI-NEXT: s_movk_i32 s4, 0xfc88 1473; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1474; CI-NEXT: s_setpc_b64 s[30:31] 1475; 1476; SI-LABEL: lshr_mad_i64_3: 1477; SI: ; %bb.0: 1478; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1479; SI-NEXT: s_movk_i32 s4, 0xfc88 1480; SI-NEXT: v_mul_hi_u32 v2, v1, s4 1481; SI-NEXT: v_mul_lo_u32 v3, v1, s4 1482; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1 1483; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1484; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1485; SI-NEXT: s_setpc_b64 s[30:31] 1486; 1487; GFX9-LABEL: lshr_mad_i64_3: 1488; GFX9: ; %bb.0: 1489; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; GFX9-NEXT: v_mov_b32_e32 v2, v1 1491; GFX9-NEXT: v_mov_b32_e32 v1, 0 1492; GFX9-NEXT: s_movk_i32 s4, 0xfc88 1493; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1494; GFX9-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX1100-LABEL: lshr_mad_i64_3: 1497; GFX1100: ; %bb.0: 1498; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, 0 1500; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1501; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc88, v4, v[0:1] 1502; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 1503; GFX1100-NEXT: s_setpc_b64 s[30:31] 1504; 1505; GFX1150-LABEL: lshr_mad_i64_3: 1506; GFX1150: ; %bb.0: 1507; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1508; GFX1150-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1509; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 1510; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v2, v[0:1] 1511; GFX1150-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX12-LABEL: lshr_mad_i64_3: 1514; GFX12: ; %bb.0: 1515; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1516; GFX12-NEXT: s_wait_expcnt 0x0 1517; GFX12-NEXT: s_wait_samplecnt 0x0 1518; GFX12-NEXT: s_wait_bvhcnt 0x0 1519; GFX12-NEXT: s_wait_kmcnt 0x0 1520; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 1521; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1522; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v2, v[0:1] 1523; GFX12-NEXT: s_setpc_b64 s[30:31] 1524 %lsh = lshr i64 %arg0, 32 1525 %mul = mul i64 s0xfffffffffffffc88, %lsh 1526 %mad = add i64 %mul, %arg0 1527 1528 ret i64 %mad 1529} 1530 1531define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 { 1532; CI-LABEL: lshr_mad_i64_4: 1533; CI: ; %bb.0: 1534; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; CI-NEXT: v_mul_lo_u32 v2, v2, v0 1536; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, 0 1537; CI-NEXT: s_movk_i32 s4, 0xfc88 1538; CI-NEXT: v_add_i32_e32 v2, vcc, v1, v2 1539; CI-NEXT: v_mov_b32_e32 v1, 0 1540; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, s4, v[0:1] 1541; CI-NEXT: s_setpc_b64 s[30:31] 1542; 1543; SI-LABEL: lshr_mad_i64_4: 1544; SI: ; %bb.0: 1545; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1546; SI-NEXT: v_mul_lo_u32 v2, v2, v0 1547; SI-NEXT: v_mul_hi_u32 v3, v1, v0 1548; SI-NEXT: s_movk_i32 s4, 0xfc88 1549; SI-NEXT: v_mul_lo_u32 v0, v1, v0 1550; SI-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1551; SI-NEXT: v_mul_hi_u32 v3, v2, s4 1552; SI-NEXT: v_mul_lo_u32 v1, v2, s4 1553; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 1554; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0 1555; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v2, vcc 1556; SI-NEXT: s_setpc_b64 s[30:31] 1557; 1558; GFX9-LABEL: lshr_mad_i64_4: 1559; GFX9: ; %bb.0: 1560; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1561; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v0, 0 1562; GFX9-NEXT: v_mov_b32_e32 v6, v5 1563; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[6:7] 1564; GFX9-NEXT: v_mov_b32_e32 v5, 0 1565; GFX9-NEXT: s_movk_i32 s4, 0xfc88 1566; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[4:5] 1567; GFX9-NEXT: s_setpc_b64 s[30:31] 1568; 1569; GFX1100-LABEL: lshr_mad_i64_4: 1570; GFX1100: ; %bb.0: 1571; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1572; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0 1573; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1574; GFX1100-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0 1575; GFX1100-NEXT: v_mad_u64_u32 v[5:6], null, v2, v0, v[1:2] 1576; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 1577; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v5, v[3:4] 1578; GFX1100-NEXT: s_setpc_b64 s[30:31] 1579; 1580; GFX1150-LABEL: lshr_mad_i64_4: 1581; GFX1150: ; %bb.0: 1582; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0 1584; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1585; GFX1150-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0 1586; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, v[1:2] 1587; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 1588; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4] 1589; GFX1150-NEXT: s_setpc_b64 s[30:31] 1590; 1591; GFX12-LABEL: lshr_mad_i64_4: 1592; GFX12: ; %bb.0: 1593; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1594; GFX12-NEXT: s_wait_expcnt 0x0 1595; GFX12-NEXT: s_wait_samplecnt 0x0 1596; GFX12-NEXT: s_wait_bvhcnt 0x0 1597; GFX12-NEXT: s_wait_kmcnt 0x0 1598; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, 0 1599; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1600; GFX12-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0 1601; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, v0, v[1:2] 1602; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1603; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4] 1604; GFX12-NEXT: s_setpc_b64 s[30:31] 1605 %ext = zext i32 %arg0 to i64 1606 %mul1 = mul i64 %arg1, %ext 1607 %lsh = lshr i64 %mul1, 32 1608 %mul2 = mul i64 %lsh, s0xfffffffffffffc88 1609 %mad = add i64 %mul2, %mul1 1610 ret i64 %mad 1611} 1612 1613define i64 @lshr_mad_i64_negative_1(i64 %arg0) #0 { 1614; CI-LABEL: lshr_mad_i64_negative_1: 1615; CI: ; %bb.0: 1616; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1617; CI-NEXT: v_lshrrev_b32_e32 v2, 4, v1 1618; CI-NEXT: s_movk_i32 s4, 0xfc19 1619; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v2, s4, v[0:1] 1620; CI-NEXT: s_setpc_b64 s[30:31] 1621; 1622; SI-LABEL: lshr_mad_i64_negative_1: 1623; SI: ; %bb.0: 1624; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; SI-NEXT: v_lshrrev_b32_e32 v2, 4, v1 1626; SI-NEXT: s_movk_i32 s4, 0xfc19 1627; SI-NEXT: v_mul_lo_u32 v3, v2, s4 1628; SI-NEXT: v_mul_hi_i32 v2, v2, s4 1629; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1630; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1631; SI-NEXT: s_setpc_b64 s[30:31] 1632; 1633; GFX9-LABEL: lshr_mad_i64_negative_1: 1634; GFX9: ; %bb.0: 1635; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1636; GFX9-NEXT: v_lshrrev_b32_e32 v2, 4, v1 1637; GFX9-NEXT: s_movk_i32 s4, 0xfc19 1638; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v2, s4, v[0:1] 1639; GFX9-NEXT: s_setpc_b64 s[30:31] 1640; 1641; GFX1100-LABEL: lshr_mad_i64_negative_1: 1642; GFX1100: ; %bb.0: 1643; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX1100-NEXT: v_lshrrev_b32_e32 v4, 4, v1 1645; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1646; GFX1100-NEXT: v_mad_i64_i32 v[2:3], null, 0xfffffc19, v4, v[0:1] 1647; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 1648; GFX1100-NEXT: s_setpc_b64 s[30:31] 1649; 1650; GFX1150-LABEL: lshr_mad_i64_negative_1: 1651; GFX1150: ; %bb.0: 1652; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1653; GFX1150-NEXT: v_lshrrev_b32_e32 v2, 4, v1 1654; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) 1655; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, 0xfffffc19, v2, v[0:1] 1656; GFX1150-NEXT: s_setpc_b64 s[30:31] 1657; 1658; GFX12-LABEL: lshr_mad_i64_negative_1: 1659; GFX12: ; %bb.0: 1660; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1661; GFX12-NEXT: s_wait_expcnt 0x0 1662; GFX12-NEXT: s_wait_samplecnt 0x0 1663; GFX12-NEXT: s_wait_bvhcnt 0x0 1664; GFX12-NEXT: s_wait_kmcnt 0x0 1665; GFX12-NEXT: v_lshrrev_b32_e32 v2, 4, v1 1666; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1667; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, 0xfffffc19, v2, v[0:1] 1668; GFX12-NEXT: s_setpc_b64 s[30:31] 1669 %lsh = lshr i64 %arg0, 36 1670 %mul = mul i64 %lsh, s0xfffffffffffffc19 1671 %mad = add i64 %mul, %arg0 1672 1673 ret i64 %mad 1674} 1675 1676define i64 @lshr_mad_i64_negative_2(i64 %arg0) #0 { 1677; CI-LABEL: lshr_mad_i64_negative_2: 1678; CI: ; %bb.0: 1679; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1680; CI-NEXT: s_movk_i32 s4, 0xd1 1681; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1] 1682; CI-NEXT: v_lshlrev_b32_e32 v0, 8, v1 1683; CI-NEXT: v_sub_i32_e32 v1, vcc, v3, v0 1684; CI-NEXT: v_mov_b32_e32 v0, v2 1685; CI-NEXT: s_setpc_b64 s[30:31] 1686; 1687; SI-LABEL: lshr_mad_i64_negative_2: 1688; SI: ; %bb.0: 1689; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1690; SI-NEXT: s_movk_i32 s4, 0xd1 1691; SI-NEXT: v_mul_hi_u32 v2, v1, s4 1692; SI-NEXT: v_mul_lo_u32 v4, v1, s4 1693; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v1 1694; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1695; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v0 1696; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1697; SI-NEXT: s_setpc_b64 s[30:31] 1698; 1699; GFX9-LABEL: lshr_mad_i64_negative_2: 1700; GFX9: ; %bb.0: 1701; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1702; GFX9-NEXT: s_movk_i32 s4, 0xd1 1703; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1] 1704; GFX9-NEXT: v_lshlrev_b32_e32 v0, 8, v1 1705; GFX9-NEXT: v_sub_u32_e32 v1, v3, v0 1706; GFX9-NEXT: v_mov_b32_e32 v0, v2 1707; GFX9-NEXT: s_setpc_b64 s[30:31] 1708; 1709; GFX11-LABEL: lshr_mad_i64_negative_2: 1710; GFX11: ; %bb.0: 1711; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1712; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0xd1, v1, v[0:1] 1713; GFX11-NEXT: v_lshlrev_b32_e32 v0, 8, v1 1714; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1715; GFX11-NEXT: v_sub_nc_u32_e32 v1, v3, v0 1716; GFX11-NEXT: v_mov_b32_e32 v0, v2 1717; GFX11-NEXT: s_setpc_b64 s[30:31] 1718; 1719; GFX12-LABEL: lshr_mad_i64_negative_2: 1720; GFX12: ; %bb.0: 1721; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1722; GFX12-NEXT: s_wait_expcnt 0x0 1723; GFX12-NEXT: s_wait_samplecnt 0x0 1724; GFX12-NEXT: s_wait_bvhcnt 0x0 1725; GFX12-NEXT: s_wait_kmcnt 0x0 1726; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xd1, v1, v[0:1] 1727; GFX12-NEXT: v_lshlrev_b32_e32 v0, 8, v1 1728; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1729; GFX12-NEXT: v_sub_nc_u32_e32 v1, v3, v0 1730; GFX12-NEXT: v_mov_b32_e32 v0, v2 1731; GFX12-NEXT: s_setpc_b64 s[30:31] 1732 %lsh = lshr i64 %arg0, 32 1733 %mul = mul i64 %lsh, s0xffffff00000000d1 1734 %mad = add i64 %mul, %arg0 1735 1736 ret i64 %mad 1737} 1738 1739define i64 @lshr_mad_i64_negative_3(i64 %arg0) #0 { 1740; CI-LABEL: lshr_mad_i64_negative_3: 1741; CI: ; %bb.0: 1742; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1743; CI-NEXT: v_lshr_b64 v[2:3], v[0:1], 22 1744; CI-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 1745; CI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 1746; CI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 1747; CI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1748; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1749; CI-NEXT: s_setpc_b64 s[30:31] 1750; 1751; SI-LABEL: lshr_mad_i64_negative_3: 1752; SI: ; %bb.0: 1753; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1754; SI-NEXT: v_lshr_b64 v[2:3], v[0:1], 22 1755; SI-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 1756; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 1757; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 1758; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1759; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1760; SI-NEXT: s_setpc_b64 s[30:31] 1761; 1762; GFX9-LABEL: lshr_mad_i64_negative_3: 1763; GFX9: ; %bb.0: 1764; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX9-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1] 1766; GFX9-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 1767; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 1768; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 1769; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 1770; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 1771; GFX9-NEXT: s_setpc_b64 s[30:31] 1772; 1773; GFX11-LABEL: lshr_mad_i64_negative_3: 1774; GFX11: ; %bb.0: 1775; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1776; GFX11-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1] 1777; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1778; GFX11-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 1779; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 1780; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1781; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1782; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 1783; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1784; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 1785; GFX11-NEXT: s_setpc_b64 s[30:31] 1786; 1787; GFX12-LABEL: lshr_mad_i64_negative_3: 1788; GFX12: ; %bb.0: 1789; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1790; GFX12-NEXT: s_wait_expcnt 0x0 1791; GFX12-NEXT: s_wait_samplecnt 0x0 1792; GFX12-NEXT: s_wait_bvhcnt 0x0 1793; GFX12-NEXT: s_wait_kmcnt 0x0 1794; GFX12-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1] 1795; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1796; GFX12-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 1797; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 1798; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1799; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1800; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 1801; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 1802; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 1803; GFX12-NEXT: s_setpc_b64 s[30:31] 1804 %op = add i64 %arg0, 1 1805 %lsh = lshr i64 %arg0, 32 1806 %mul = mul i64 %lsh, s0xfffffffffffffc00 1807 %mad = add i64 %mul, %op 1808 1809 ret i64 %mad 1810} 1811 1812define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 { 1813; CI-LABEL: lshr_mad_i64_negative_4: 1814; CI: ; %bb.0: 1815; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1816; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v0, v[0:1] 1817; CI-NEXT: v_mul_lo_u32 v0, v1, v1 1818; CI-NEXT: v_add_i32_e32 v1, vcc, v0, v3 1819; CI-NEXT: v_mov_b32_e32 v0, v2 1820; CI-NEXT: s_setpc_b64 s[30:31] 1821; 1822; SI-LABEL: lshr_mad_i64_negative_4: 1823; SI: ; %bb.0: 1824; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1825; SI-NEXT: v_mul_hi_u32 v2, v1, v0 1826; SI-NEXT: v_mul_lo_u32 v3, v1, v1 1827; SI-NEXT: v_mul_lo_u32 v4, v1, v0 1828; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1829; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v0 1830; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 1831; SI-NEXT: s_setpc_b64 s[30:31] 1832; 1833; GFX9-LABEL: lshr_mad_i64_negative_4: 1834; GFX9: ; %bb.0: 1835; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1836; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v0, v[0:1] 1837; GFX9-NEXT: v_mov_b32_e32 v0, v3 1838; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v1, v[0:1] 1839; GFX9-NEXT: v_mov_b32_e32 v0, v2 1840; GFX9-NEXT: v_mov_b32_e32 v1, v4 1841; GFX9-NEXT: s_setpc_b64 s[30:31] 1842; 1843; GFX1100-LABEL: lshr_mad_i64_negative_4: 1844; GFX1100: ; %bb.0: 1845; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1846; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] 1847; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1848; GFX1100-NEXT: v_mov_b32_e32 v0, v3 1849; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v1, v[0:1] 1850; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 1851; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 1852; GFX1100-NEXT: s_setpc_b64 s[30:31] 1853; 1854; GFX1150-LABEL: lshr_mad_i64_negative_4: 1855; GFX1150: ; %bb.0: 1856; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1857; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, v[0:1] 1858; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1859; GFX1150-NEXT: v_mov_b32_e32 v0, v4 1860; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v1, v1, v[0:1] 1861; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_3) 1862; GFX1150-NEXT: v_mov_b32_e32 v0, v3 1863; GFX1150-NEXT: s_setpc_b64 s[30:31] 1864; 1865; GFX12-LABEL: lshr_mad_i64_negative_4: 1866; GFX12: ; %bb.0: 1867; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1868; GFX12-NEXT: s_wait_expcnt 0x0 1869; GFX12-NEXT: s_wait_samplecnt 0x0 1870; GFX12-NEXT: s_wait_bvhcnt 0x0 1871; GFX12-NEXT: s_wait_kmcnt 0x0 1872; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, v[0:1] 1873; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1874; GFX12-NEXT: v_mov_b32_e32 v0, v4 1875; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v1, v1, v[0:1] 1876; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 1877; GFX12-NEXT: v_mov_b32_e32 v0, v3 1878; GFX12-NEXT: s_setpc_b64 s[30:31] 1879 %lsh = lshr i64 %arg0, 32 1880 %mul = mul i64 %lsh, %arg0 1881 %mad = add i64 %mul, %arg0 1882 1883 ret i64 %mad 1884} 1885 1886define amdgpu_ps i64 @lshr_mad_i64_sgpr(i64 inreg %arg0) #0 { 1887; CI-LABEL: lshr_mad_i64_sgpr: 1888; CI: ; %bb.0: 1889; CI-NEXT: v_mov_b32_e32 v0, s0 1890; CI-NEXT: v_mov_b32_e32 v1, 0 1891; CI-NEXT: v_mov_b32_e32 v2, 0xffff1c18 1892; CI-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s1, v2, v[0:1] 1893; CI-NEXT: v_readfirstlane_b32 s0, v0 1894; CI-NEXT: v_readfirstlane_b32 s1, v1 1895; CI-NEXT: ; return to shader part epilog 1896; 1897; SI-LABEL: lshr_mad_i64_sgpr: 1898; SI: ; %bb.0: 1899; SI-NEXT: v_mov_b32_e32 v0, 0xffff1c18 1900; SI-NEXT: v_mul_hi_u32 v0, s1, v0 1901; SI-NEXT: s_mul_i32 s2, s1, 0xffff1c18 1902; SI-NEXT: v_readfirstlane_b32 s3, v0 1903; SI-NEXT: s_sub_i32 s3, s3, s1 1904; SI-NEXT: s_add_u32 s0, s2, s0 1905; SI-NEXT: s_addc_u32 s1, s3, s1 1906; SI-NEXT: ; return to shader part epilog 1907; 1908; GFX9-LABEL: lshr_mad_i64_sgpr: 1909; GFX9: ; %bb.0: 1910; GFX9-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18 1911; GFX9-NEXT: s_sub_i32 s2, s2, s1 1912; GFX9-NEXT: s_mul_i32 s3, s1, 0xffff1c18 1913; GFX9-NEXT: s_add_u32 s0, s3, s0 1914; GFX9-NEXT: s_addc_u32 s1, s2, s1 1915; GFX9-NEXT: ; return to shader part epilog 1916; 1917; GFX11-LABEL: lshr_mad_i64_sgpr: 1918; GFX11: ; %bb.0: 1919; GFX11-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18 1920; GFX11-NEXT: s_mul_i32 s3, s1, 0xffff1c18 1921; GFX11-NEXT: s_sub_i32 s2, s2, s1 1922; GFX11-NEXT: s_add_u32 s0, s3, s0 1923; GFX11-NEXT: s_addc_u32 s1, s2, s1 1924; GFX11-NEXT: ; return to shader part epilog 1925; 1926; GFX12-LABEL: lshr_mad_i64_sgpr: 1927; GFX12: ; %bb.0: 1928; GFX12-NEXT: s_mov_b32 s4, 0xffff1c18 1929; GFX12-NEXT: s_mov_b32 s3, 0 1930; GFX12-NEXT: s_mov_b32 s2, s1 1931; GFX12-NEXT: s_mov_b32 s5, -1 1932; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1933; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5] 1934; GFX12-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] 1935; GFX12-NEXT: ; return to shader part epilog 1936 %lsh = lshr i64 %arg0, 32 1937 %mul = mul i64 %lsh, s0xffffffffffff1c18 1938 %mad = add i64 %mul, %arg0 1939 1940 ret i64 %mad 1941} 1942 1943define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 { 1944; CI-LABEL: lshr_mad_i64_vec: 1945; CI: ; %bb.0: 1946; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1947; CI-NEXT: v_mov_b32_e32 v6, v3 1948; CI-NEXT: v_mov_b32_e32 v3, v1 1949; CI-NEXT: v_mov_b32_e32 v1, 0 1950; CI-NEXT: s_mov_b32 s4, 0xffff1c18 1951; CI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1] 1952; CI-NEXT: v_mov_b32_e32 v3, v1 1953; CI-NEXT: s_mov_b32 s4, 0xffff1118 1954; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3] 1955; CI-NEXT: v_mov_b32_e32 v0, v4 1956; CI-NEXT: v_mov_b32_e32 v1, v5 1957; CI-NEXT: s_setpc_b64 s[30:31] 1958; 1959; SI-LABEL: lshr_mad_i64_vec: 1960; SI: ; %bb.0: 1961; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1962; SI-NEXT: s_mov_b32 s4, 0xffff1118 1963; SI-NEXT: v_mul_lo_u32 v4, v3, s4 1964; SI-NEXT: v_mul_hi_u32 v5, v3, s4 1965; SI-NEXT: s_mov_b32 s4, 0xffff1c18 1966; SI-NEXT: v_mul_hi_u32 v6, v1, s4 1967; SI-NEXT: v_mul_lo_u32 v7, v1, s4 1968; SI-NEXT: v_sub_i32_e32 v5, vcc, v5, v3 1969; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v1 1970; SI-NEXT: v_add_i32_e32 v0, vcc, v7, v0 1971; SI-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc 1972; SI-NEXT: v_add_i32_e32 v2, vcc, v4, v2 1973; SI-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc 1974; SI-NEXT: s_setpc_b64 s[30:31] 1975; 1976; GFX9-LABEL: lshr_mad_i64_vec: 1977; GFX9: ; %bb.0: 1978; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1979; GFX9-NEXT: v_mov_b32_e32 v6, v3 1980; GFX9-NEXT: v_mov_b32_e32 v3, v1 1981; GFX9-NEXT: v_mov_b32_e32 v1, 0 1982; GFX9-NEXT: s_mov_b32 s4, 0xffff1c18 1983; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1] 1984; GFX9-NEXT: v_mov_b32_e32 v3, v1 1985; GFX9-NEXT: s_mov_b32 s4, 0xffff1118 1986; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3] 1987; GFX9-NEXT: v_mov_b32_e32 v0, v4 1988; GFX9-NEXT: v_mov_b32_e32 v1, v5 1989; GFX9-NEXT: s_setpc_b64 s[30:31] 1990; 1991; GFX1100-LABEL: lshr_mad_i64_vec: 1992; GFX1100: ; %bb.0: 1993; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1994; GFX1100-NEXT: v_mov_b32_e32 v8, v3 1995; GFX1100-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v1, 0 1996; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1997; GFX1100-NEXT: v_mad_u64_u32 v[4:5], null, 0xffff1c18, v6, v[0:1] 1998; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, v4 1999; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2000; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, 0xffff1118, v8, v[2:3] 2001; GFX1100-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_mov_b32 v2, v6 2002; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) 2003; GFX1100-NEXT: v_mov_b32_e32 v3, v7 2004; GFX1100-NEXT: s_setpc_b64 s[30:31] 2005; 2006; GFX1150-LABEL: lshr_mad_i64_vec: 2007; GFX1150: ; %bb.0: 2008; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2009; GFX1150-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1 2010; GFX1150-NEXT: v_mov_b32_e32 v1, 0 2011; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 2012; GFX1150-NEXT: v_mov_b32_e32 v3, v1 2013; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1] 2014; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) 2015; GFX1150-NEXT: v_mad_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3] 2016; GFX1150-NEXT: s_setpc_b64 s[30:31] 2017; 2018; GFX12-LABEL: lshr_mad_i64_vec: 2019; GFX12: ; %bb.0: 2020; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2021; GFX12-NEXT: s_wait_expcnt 0x0 2022; GFX12-NEXT: s_wait_samplecnt 0x0 2023; GFX12-NEXT: s_wait_bvhcnt 0x0 2024; GFX12-NEXT: s_wait_kmcnt 0x0 2025; GFX12-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1 2026; GFX12-NEXT: v_mov_b32_e32 v1, 0 2027; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 2028; GFX12-NEXT: v_mov_b32_e32 v3, v1 2029; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1] 2030; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 2031; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3] 2032; GFX12-NEXT: s_setpc_b64 s[30:31] 2033 %lsh = lshr <2 x i64> %arg0, <i64 32, i64 32> 2034 %mul = mul <2 x i64> %lsh, <i64 s0xffffffffffff1c18, i64 s0xffffffffffff1118> 2035 %mad = add <2 x i64> %mul, %arg0 2036 2037 ret <2 x i64> %mad 2038} 2039 2040attributes #0 = { nounwind } 2041attributes #1 = { nounwind readnone speculatable } 2042