1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-SDAG,GFX6-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-GISEL,GFX6-GISEL %s 4 5; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-SDAG,GFX7-SDAG %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-GISEL,GFX7-GISEL %s 7 8; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s 10 11; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG,GFX900-SDAG,GFX900 %s 12; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL,GFX900-GISEL,GFX900 %s 13 14; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-SDAG,GFX90A-SDAG %s 15; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-GISEL,GFX90A-GISEL %s 16 17; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s 18; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s 19 20; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s 21; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s 22 23; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-SDAG %s 24; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL %s 25 26; Test for integer mad formation for patterns used in clpeak 27 28define i32 @clpeak_imad_pat_i32(i32 %x, i32 %y) { 29; GFX67-SDAG-LABEL: clpeak_imad_pat_i32: 30; GFX67-SDAG: ; %bb.0: ; %entry 31; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 33; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 34; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 35; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 36; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 37; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0 38; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 39; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 40; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX67-GISEL-LABEL: clpeak_imad_pat_i32: 43; GFX67-GISEL: ; %bb.0: ; %entry 44; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 46; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 47; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 48; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 49; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 50; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 51; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 52; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 53; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 54; 55; GFX8-SDAG-LABEL: clpeak_imad_pat_i32: 56; GFX8-SDAG: ; %bb.0: ; %entry 57; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 59; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 60; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0 61; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 62; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 63; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0 64; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 65; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 66; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX8-GISEL-LABEL: clpeak_imad_pat_i32: 69; GFX8-GISEL: ; %bb.0: ; %entry 70; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 72; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 73; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0 74; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 75; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 76; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 77; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 78; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 79; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX900-SDAG-LABEL: clpeak_imad_pat_i32: 82; GFX900-SDAG: ; %bb.0: ; %entry 83; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 85; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 86; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 87; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 88; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1] 89; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2] 90; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX900-GISEL-LABEL: clpeak_imad_pat_i32: 93; GFX900-GISEL: ; %bb.0: ; %entry 94; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 96; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 97; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 98; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 99; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 100; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 101; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 102; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 103; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32: 106; GFX90A-SDAG: ; %bb.0: ; %entry 107; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 109; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 110; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 111; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 112; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1] 113; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3] 114; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32: 117; GFX90A-GISEL: ; %bb.0: ; %entry 118; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 120; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 121; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 122; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 123; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 124; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 125; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 126; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 127; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 128; 129; GFX10-SDAG-LABEL: clpeak_imad_pat_i32: 130; GFX10-SDAG: ; %bb.0: ; %entry 131; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 133; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 134; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 135; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 136; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1] 137; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2] 138; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 139; 140; GFX10-GISEL-LABEL: clpeak_imad_pat_i32: 141; GFX10-GISEL: ; %bb.0: ; %entry 142; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 144; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 145; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 146; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 147; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 148; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 149; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 150; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 151; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX11-SDAG-LABEL: clpeak_imad_pat_i32: 154; GFX11-SDAG: ; %bb.0: ; %entry 155; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 157; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 158; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 159; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v5, v0 160; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 161; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 162; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3] 163; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 164; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4] 165; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX11-GISEL-LABEL: clpeak_imad_pat_i32: 168; GFX11-GISEL: ; %bb.0: ; %entry 169; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 171; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 172; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 173; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 174; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 175; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 176; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 177; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 178; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 179; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 180; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 181; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX1200-SDAG-LABEL: clpeak_imad_pat_i32: 184; GFX1200-SDAG: ; %bb.0: ; %entry 185; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 186; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 187; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 188; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 189; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 190; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 191; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 192; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 193; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 194; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 195; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 196; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1] 197; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 198; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2] 199; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 200; 201; GFX1200-GISEL-LABEL: clpeak_imad_pat_i32: 202; GFX1200-GISEL: ; %bb.0: ; %entry 203; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 204; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 205; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 206; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 207; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 208; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 209; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 210; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 211; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 212; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 213; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 214; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 215; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 216; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 217; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 218; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 219; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 220entry: 221 %y18 = add i32 %x, 1 222 %add = mul i32 %y18, %y 223 %mul119 = add i32 %add, %y18 224 %add2 = mul i32 %mul119, %y 225 %add220 = add i32 %add, 1 226 %add422 = add i32 %add2, 1 227 %mul521 = mul i32 %add2, %add220 228 %add6 = mul i32 %mul521, %add422 229 ret i32 %add6 230} 231 232define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { 233; GFX67-LABEL: clpeak_imad_pat_i16: 234; GFX67: ; %bb.0: ; %entry 235; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 237; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 238; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1 239; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 240; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 241; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 242; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 243; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 244; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 245; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 246; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 247; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 248; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 249; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 250; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 251; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 252; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16 253; GFX67-NEXT: s_setpc_b64 s[30:31] 254; 255; GFX8-SDAG-LABEL: clpeak_imad_pat_i16: 256; GFX8-SDAG: ; %bb.0: ; %entry 257; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 258; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 259; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 260; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 261; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 262; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 263; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX8-GISEL-LABEL: clpeak_imad_pat_i16: 266; GFX8-GISEL: ; %bb.0: ; %entry 267; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 269; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 270; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 271; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 272; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 273; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 274; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1 275; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 276; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 277; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX9-SDAG-LABEL: clpeak_imad_pat_i16: 280; GFX9-SDAG: ; %bb.0: ; %entry 281; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 283; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 284; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 285; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0 286; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 287; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 288; 289; GFX9-GISEL-LABEL: clpeak_imad_pat_i16: 290; GFX9-GISEL: ; %bb.0: ; %entry 291; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 293; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 294; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 295; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 296; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1 297; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 298; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1 299; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 300; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 301; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 302; 303; GFX10-SDAG-LABEL: clpeak_imad_pat_i16: 304; GFX10-SDAG: ; %bb.0: ; %entry 305; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 307; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 308; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 309; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 310; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 311; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX10-GISEL-LABEL: clpeak_imad_pat_i16: 314; GFX10-GISEL: ; %bb.0: ; %entry 315; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 317; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 318; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 319; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 320; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 321; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 322; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 323; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 324; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 325; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX11-SDAG-LABEL: clpeak_imad_pat_i16: 328; GFX11-SDAG: ; %bb.0: ; %entry 329; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 331; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 332; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 333; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 334; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 335; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 336; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 337; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 338; 339; GFX11-GISEL-LABEL: clpeak_imad_pat_i16: 340; GFX11-GISEL: ; %bb.0: ; %entry 341; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 343; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 344; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 345; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 346; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 347; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 348; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 349; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 350; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 351; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 352; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 353; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 354; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16: 357; GFX1200-SDAG: ; %bb.0: ; %entry 358; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 359; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 360; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 361; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 362; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 363; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 364; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 365; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 366; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 367; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 368; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 369; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 370; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 371; 372; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16: 373; GFX1200-GISEL: ; %bb.0: ; %entry 374; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 375; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 376; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 377; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 378; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 379; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 380; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 381; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 382; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 383; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 384; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 385; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 386; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 387; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 388; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 389; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 390; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 391; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 392entry: 393 %conv33 = add i16 %x, 1 394 %add = mul i16 %conv33, %y 395 %conv434 = add i16 %y, 1 396 %add8 = mul i16 %conv434, %add 397 %conv1035 = add i16 %add, 1 398 %add14 = mul i16 %conv1035, %add8 399 %conv1636 = add i16 %add8, 1 400 %add20 = mul i16 %add14, %conv1636 401 ret i16 %add20 402} 403 404define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { 405; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16: 406; GFX67-SDAG: ; %bb.0: ; %entry 407; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 409; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 410; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 411; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 412; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 413; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 414; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0 415; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 416; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1 417; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 418; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2 419; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v4, v2, 1 420; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3 421; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 422; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v5, v3, 1 423; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 424; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6 425; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 426; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5 427; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4 428; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7 429; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 430; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 431; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 432; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 433; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 434; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0 435; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1 436; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16: 439; GFX67-GISEL: ; %bb.0: ; %entry 440; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 442; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 443; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 444; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 445; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 446; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 447; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 448; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 449; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 450; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 451; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 452; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 453; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 454; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 455; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 456; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 457; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 458; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 459; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 460; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 461; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 462; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 463; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 464; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 465; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 466; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 467; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 468; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 469; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 470; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 471; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 472; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 473; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 474; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 475; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 476; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 477; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 478; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 479; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 480; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 481; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 482; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 483; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 484; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 485; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 486; 487; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16: 488; GFX8-SDAG: ; %bb.0: ; %entry 489; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1 491; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0 492; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 493; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 494; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v0, v3 495; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, v0 496; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v2, v1 497; GFX8-SDAG-NEXT: v_mad_u16 v2, v2, v1, v2 498; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v3 499; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v2, v1 500; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0 501; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1 502; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 503; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 504; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 505; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 506; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 507; 508; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16: 509; GFX8-GISEL: ; %bb.0: ; %entry 510; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1 512; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0 513; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 514; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 515; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2 516; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0 517; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v1 518; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 519; GFX8-GISEL-NEXT: v_mad_u16 v2, v2, v1, 1 520; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 521; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1 522; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 523; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v2 524; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0 525; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v2, v1 526; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 527; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 528; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX9-LABEL: clpeak_imad_pat_v2i16: 531; GFX9: ; %bb.0: ; %entry 532; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 534; GFX9-NEXT: v_pk_mad_u16 v2, v0, v1, v0 535; GFX9-NEXT: v_pk_mul_lo_u16 v3, v2, v1 536; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 537; GFX9-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 538; GFX9-NEXT: v_pk_mul_lo_u16 v0, v3, v0 539; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 540; GFX9-NEXT: s_setpc_b64 s[30:31] 541; 542; GFX10-LABEL: clpeak_imad_pat_v2i16: 543; GFX10: ; %bb.0: ; %entry 544; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 546; GFX10-NEXT: v_pk_mad_u16 v2, v0, v1, v0 547; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 548; GFX10-NEXT: v_pk_mul_lo_u16 v3, v2, v1 549; GFX10-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 550; GFX10-NEXT: v_pk_mul_lo_u16 v0, v3, v0 551; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 552; GFX10-NEXT: s_setpc_b64 s[30:31] 553; 554; GFX11-LABEL: clpeak_imad_pat_v2i16: 555; GFX11: ; %bb.0: ; %entry 556; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 558; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 559; GFX11-NEXT: v_pk_mad_u16 v2, v0, v1, v0 560; GFX11-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 561; GFX11-NEXT: v_pk_mul_lo_u16 v3, v2, v1 562; GFX11-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 563; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 564; GFX11-NEXT: v_pk_mul_lo_u16 v0, v3, v0 565; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1 566; GFX11-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX1200-LABEL: clpeak_imad_pat_v2i16: 569; GFX1200: ; %bb.0: ; %entry 570; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 571; GFX1200-NEXT: s_wait_expcnt 0x0 572; GFX1200-NEXT: s_wait_samplecnt 0x0 573; GFX1200-NEXT: s_wait_bvhcnt 0x0 574; GFX1200-NEXT: s_wait_kmcnt 0x0 575; GFX1200-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 576; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 577; GFX1200-NEXT: v_pk_mad_u16 v2, v0, v1, v0 578; GFX1200-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 579; GFX1200-NEXT: v_pk_mul_lo_u16 v3, v2, v1 580; GFX1200-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 581; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 582; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 583; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 584; GFX1200-NEXT: s_setpc_b64 s[30:31] 585entry: 586 %y18 = add <2 x i16> %x, <i16 1, i16 1> 587 %add = mul <2 x i16> %y18, %y 588 %mul119 = add <2 x i16> %add, %y18 589 %add2 = mul <2 x i16> %mul119, %y 590 %add220 = add <2 x i16> %add, <i16 1, i16 1> 591 %add422 = add <2 x i16> %add2, <i16 1, i16 1> 592 %mul521 = mul <2 x i16> %add2, %add220 593 %add6 = mul <2 x i16> %mul521, %add422 594 ret <2 x i16> %add6 595} 596 597define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { 598; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i16: 599; GFX67-SDAG: ; %bb.0: ; %entry 600; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 602; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 603; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0 604; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1 605; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 606; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 607; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 608; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4 609; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0 610; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1 611; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1 612; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2 613; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 614; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 615; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 616; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 617; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9 618; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2 619; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3 620; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 621; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6 622; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000 623; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1 624; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 625; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6 626; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 627; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1 628; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5 629; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1 630; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0 631; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6 632; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 633; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 634; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1 635; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8 636; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 637; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3 638; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 639; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0 640; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6 641; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7 642; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 643; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 644; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 645; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 646; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 647; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 648; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v5, v0 649; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 650; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v3, v2 651; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 652; 653; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i16: 654; GFX67-GISEL: ; %bb.0: ; %entry 655; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 657; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 658; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0 659; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 660; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 661; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1 662; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 663; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0 664; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2 665; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 666; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1 667; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 668; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2 669; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3 670; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 671; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1 672; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4 673; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 674; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1 675; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1 676; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1 677; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9 678; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6 679; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5 680; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1 681; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 682; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 683; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10 684; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7 685; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 686; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11 687; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8 688; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 689; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 690; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 691; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0 692; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 693; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 694; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 695; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 696; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 697; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2 698; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16: 701; GFX8-SDAG: ; %bb.0: ; %entry 702; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 1 704; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v0 705; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 706; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 707; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1 708; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v6, v0, v5 709; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v5, v0 710; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v3 711; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v8, v4, v2 712; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v2, v4 713; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 714; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v5 715; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 716; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v4, v2 717; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v6, v0 718; GFX8-SDAG-NEXT: v_mad_u16 v4, v2, v8, v2 719; GFX8-SDAG-NEXT: v_mad_u16 v0, v3, v0, v3 720; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1 721; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 722; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v2, v4 723; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5 724; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 725; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 726; 727; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i16: 728; GFX8-GISEL: ; %bb.0: ; %entry 729; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 1 731; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 732; GFX8-GISEL-NEXT: v_add_u16_e32 v5, 1, v0 733; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 734; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1 735; GFX8-GISEL-NEXT: v_mad_u16 v6, v5, v2, v5 736; GFX8-GISEL-NEXT: v_mad_u16 v7, v0, v4, v0 737; GFX8-GISEL-NEXT: v_mad_u16 v8, v1, v3, v1 738; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v9, v6, v2 739; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v10, v7, v4 740; GFX8-GISEL-NEXT: v_mad_u16 v5, v5, v2, 1 741; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1 742; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v11, v8, v3 743; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 744; GFX8-GISEL-NEXT: v_mad_u16 v2, v6, v2, 1 745; GFX8-GISEL-NEXT: v_mad_u16 v4, v7, v4, 1 746; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v5, v9, v5 747; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v10, v0 748; GFX8-GISEL-NEXT: v_mad_u16 v3, v8, v3, 1 749; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v11, v1 750; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v5, v2 751; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 752; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 753; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3 754; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 755; 756; GFX9-SDAG-LABEL: clpeak_imad_pat_v3i16: 757; GFX9-SDAG: ; %bb.0: ; %entry 758; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 760; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 761; GFX9-SDAG-NEXT: v_pk_mad_u16 v4, v1, v3, v1 762; GFX9-SDAG-NEXT: v_pk_mad_u16 v5, v0, v2, v0 763; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v6, v5, v2 764; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v7, v4, v3 765; GFX9-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 766; GFX9-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 767; GFX9-SDAG-NEXT: v_pk_mad_u16 v3, v4, v3, 1 768; GFX9-SDAG-NEXT: v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0] 769; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 770; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 771; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 772; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 773; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 774; 775; GFX9-GISEL-LABEL: clpeak_imad_pat_v3i16: 776; GFX9-GISEL: ; %bb.0: ; %entry 777; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 778; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 779; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 780; GFX9-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 781; GFX9-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 782; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 783; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 784; GFX9-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 785; GFX9-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 786; GFX9-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 787; GFX9-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 788; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 789; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 790; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 791; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 792; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 793; 794; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16: 795; GFX10-SDAG: ; %bb.0: ; %entry 796; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 797; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 798; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 799; GFX10-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 800; GFX10-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 801; GFX10-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 802; GFX10-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 803; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 804; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 805; GFX10-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 806; GFX10-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 807; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 808; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 809; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 810; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 811; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 812; 813; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i16: 814; GFX10-GISEL: ; %bb.0: ; %entry 815; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 817; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 818; GFX10-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 819; GFX10-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 820; GFX10-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 821; GFX10-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 822; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 823; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 824; GFX10-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 825; GFX10-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 826; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 827; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 828; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 829; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 830; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 831; 832; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i16: 833; GFX11-SDAG: ; %bb.0: ; %entry 834; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 835; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 836; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 837; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 838; GFX11-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 839; GFX11-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 840; GFX11-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 841; GFX11-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 842; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 843; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 844; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 845; GFX11-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 846; GFX11-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 847; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 848; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 849; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 850; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 851; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 852; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 853; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 854; 855; GFX11-GISEL-LABEL: clpeak_imad_pat_v3i16: 856; GFX11-GISEL: ; %bb.0: ; %entry 857; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 858; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 859; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 860; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 861; GFX11-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 862; GFX11-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 863; GFX11-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 864; GFX11-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 865; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 866; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 867; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 868; GFX11-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 869; GFX11-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 870; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 871; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 872; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 873; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 874; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 875; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 876; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 877; 878; GFX1200-SDAG-LABEL: clpeak_imad_pat_v3i16: 879; GFX1200-SDAG: ; %bb.0: ; %entry 880; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 881; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 882; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 883; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 884; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 885; GFX1200-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 886; GFX1200-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 887; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 888; GFX1200-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 889; GFX1200-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 890; GFX1200-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 891; GFX1200-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 892; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 893; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 894; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 895; GFX1200-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 896; GFX1200-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 897; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 898; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 899; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 900; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 901; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 902; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 903; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 904; 905; GFX1200-GISEL-LABEL: clpeak_imad_pat_v3i16: 906; GFX1200-GISEL: ; %bb.0: ; %entry 907; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 908; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 909; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 910; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 911; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 912; GFX1200-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 913; GFX1200-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 914; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 915; GFX1200-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 916; GFX1200-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 917; GFX1200-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 918; GFX1200-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 919; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 920; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 921; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 922; GFX1200-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 923; GFX1200-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 924; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 925; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 926; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 927; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 928; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 929; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 930; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 931entry: 932 %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1> 933 %add = mul <3 x i16> %y48, %y 934 %mul1249 = add <3 x i16> %add, %y48 935 %add15 = mul <3 x i16> %mul1249, %y 936 %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1> 937 %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1> 938 %mul3051 = mul <3 x i16> %add15, %add1550 939 %add33 = mul <3 x i16> %mul3051, %add2452 940 ret <3 x i16> %add33 941} 942 943define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { 944; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i16: 945; GFX67-SDAG: ; %bb.0: ; %entry 946; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 947; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3 948; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3 949; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 950; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 951; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3 952; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 953; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2 954; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 955; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 956; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 957; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0 958; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1 959; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 960; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 961; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7 962; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7 963; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1 964; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5 965; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2 966; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0 967; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1 968; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 969; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13 970; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1 971; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 972; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 973; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7 974; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 975; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12 976; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 977; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4 978; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5 979; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000 980; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8 981; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1 982; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6 983; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8 984; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v6, 1 985; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 986; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v1 987; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, s4, v7 988; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 989; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3 990; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 991; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8 992; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 993; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2 994; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7 995; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10 996; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 997; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 998; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5 999; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7 1000; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1001; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, s4, v2 1002; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0 1003; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8 1004; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5 1005; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6 1006; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1007; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1008; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8 1009; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1010; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1011; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 1012; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 1013; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1014; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v7, v0 1015; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5 1016; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 1017; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6 1018; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 1019; 1020; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i16: 1021; GFX67-GISEL: ; %bb.0: ; %entry 1022; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1023; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 1024; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1025; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3 1026; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1 1027; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 1028; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0 1029; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1030; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v3 1031; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v9 1032; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v2 1033; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1034; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 1035; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v8 1036; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 1037; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 1038; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 1039; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1 1040; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9 1041; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 1042; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 1043; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 1044; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0 1045; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1046; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2 1047; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3 1048; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1049; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1050; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 1051; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 1052; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 1053; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1054; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 1055; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1056; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1057; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5 1058; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1 1059; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 1060; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1061; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4 1062; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1 1063; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10 1064; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 1065; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1066; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1067; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7 1068; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1 1069; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 1070; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1071; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1 1072; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1073; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1074; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6 1075; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1 1076; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10 1077; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11 1078; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1 1079; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1080; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 1081; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 1082; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1083; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1084; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1085; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12 1086; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8 1087; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 1088; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2 1089; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8 1090; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 1091; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13 1092; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1 1093; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14 1094; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9 1095; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9 1096; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 1097; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15 1098; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3 1099; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1100; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 1101; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1102; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 1103; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0 1104; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1105; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5 1106; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1107; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 1108; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 1109; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2 1110; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 1111; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 1112; 1113; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16: 1114; GFX8-SDAG: ; %bb.0: ; %entry 1115; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1116; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 1 1117; GFX8-SDAG-NEXT: v_add_u16_e32 v5, 1, v0 1118; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1119; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1120; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v1 1121; GFX8-SDAG-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1122; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3 1123; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v9, v0, v8 1124; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v8, v0 1125; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v6 1126; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v10, v5, v2 1127; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v11, v4, v3 1128; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v6, v1 1129; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v3, v4 1130; GFX8-SDAG-NEXT: v_mad_u16 v5, v5, v2, v5 1131; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v8 1132; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v6 1133; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v5, v2 1134; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v4, v3 1135; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v9, v0 1136; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1 1137; GFX8-SDAG-NEXT: v_mad_u16 v7, v2, v10, v2 1138; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, v4 1139; GFX8-SDAG-NEXT: v_mad_u16 v6, v3, v11, v3 1140; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5 1141; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1142; GFX8-SDAG-NEXT: v_mad_u16 v2, v7, v2, v7 1143; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 1144; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1145; GFX8-SDAG-NEXT: v_mad_u16 v2, v6, v3, v6 1146; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v2, v1 1147; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 1148; 1149; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i16: 1150; GFX8-GISEL: ; %bb.0: ; %entry 1151; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1152; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 1 1153; GFX8-GISEL-NEXT: v_add_u16_e32 v4, 1, v0 1154; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1155; GFX8-GISEL-NEXT: v_add_u16_e32 v6, 1, v1 1156; GFX8-GISEL-NEXT: v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1157; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1158; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3 1159; GFX8-GISEL-NEXT: v_mad_u16 v8, v4, v2, v4 1160; GFX8-GISEL-NEXT: v_mad_u16 v9, v0, v5, v0 1161; GFX8-GISEL-NEXT: v_mad_u16 v10, v6, v3, v6 1162; GFX8-GISEL-NEXT: v_mad_u16 v11, v1, v7, v1 1163; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v12, v8, v2 1164; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v13, v9, v5 1165; GFX8-GISEL-NEXT: v_mad_u16 v4, v4, v2, 1 1166; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v5, 1 1167; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v14, v10, v3 1168; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v15, v11, v7 1169; GFX8-GISEL-NEXT: v_mad_u16 v6, v6, v3, 1 1170; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v7, 1 1171; GFX8-GISEL-NEXT: v_mad_u16 v2, v8, v2, 1 1172; GFX8-GISEL-NEXT: v_mad_u16 v5, v9, v5, 1 1173; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v12, v4 1174; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v13, v0 1175; GFX8-GISEL-NEXT: v_mad_u16 v3, v10, v3, 1 1176; GFX8-GISEL-NEXT: v_mad_u16 v7, v11, v7, 1 1177; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v14, v6 1178; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v15, v1 1179; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v4, v2 1180; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1181; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1182; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v3 1183; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1184; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 1185; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1186; 1187; GFX9-SDAG-LABEL: clpeak_imad_pat_v4i16: 1188; GFX9-SDAG: ; %bb.0: ; %entry 1189; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1190; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1191; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1192; GFX9-SDAG-NEXT: v_pk_mad_u16 v4, v1, v3, v1 1193; GFX9-SDAG-NEXT: v_pk_mad_u16 v5, v0, v2, v0 1194; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v6, v5, v2 1195; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v7, v4, v3 1196; GFX9-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1197; GFX9-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1198; GFX9-SDAG-NEXT: v_pk_mad_u16 v3, v4, v3, 1 op_sel_hi:[1,1,0] 1199; GFX9-SDAG-NEXT: v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0] 1200; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1201; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1202; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1203; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1204; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1205; 1206; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i16: 1207; GFX9-GISEL: ; %bb.0: ; %entry 1208; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1210; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1211; GFX9-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1212; GFX9-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1213; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1214; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1215; GFX9-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1216; GFX9-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1217; GFX9-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1218; GFX9-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1219; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1220; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1221; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1222; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1223; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1224; 1225; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16: 1226; GFX10-SDAG: ; %bb.0: ; %entry 1227; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1228; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1229; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1230; GFX10-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1231; GFX10-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1232; GFX10-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1233; GFX10-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1234; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1235; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1236; GFX10-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1237; GFX10-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1238; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1239; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1240; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1241; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1242; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1243; 1244; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i16: 1245; GFX10-GISEL: ; %bb.0: ; %entry 1246; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1247; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1248; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1249; GFX10-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1250; GFX10-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1251; GFX10-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1252; GFX10-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1253; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1254; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1255; GFX10-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1256; GFX10-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1257; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1258; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1259; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1260; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1261; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1262; 1263; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i16: 1264; GFX11-SDAG: ; %bb.0: ; %entry 1265; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1266; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1267; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1268; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1269; GFX11-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1270; GFX11-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1271; GFX11-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1272; GFX11-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1273; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1274; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1275; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1276; GFX11-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1277; GFX11-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1278; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1279; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1280; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1281; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1282; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1283; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1284; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1285; 1286; GFX11-GISEL-LABEL: clpeak_imad_pat_v4i16: 1287; GFX11-GISEL: ; %bb.0: ; %entry 1288; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1289; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1290; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1291; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1292; GFX11-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1293; GFX11-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1294; GFX11-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1295; GFX11-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1296; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1297; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1298; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1299; GFX11-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1300; GFX11-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1301; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1302; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1303; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1304; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1305; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1306; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1307; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1308; 1309; GFX1200-SDAG-LABEL: clpeak_imad_pat_v4i16: 1310; GFX1200-SDAG: ; %bb.0: ; %entry 1311; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1312; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 1313; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 1314; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 1315; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 1316; GFX1200-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1317; GFX1200-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1318; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1319; GFX1200-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1320; GFX1200-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1321; GFX1200-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1322; GFX1200-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1323; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1324; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1325; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1326; GFX1200-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1327; GFX1200-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1328; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1329; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1330; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1331; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1332; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1333; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1334; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 1335; 1336; GFX1200-GISEL-LABEL: clpeak_imad_pat_v4i16: 1337; GFX1200-GISEL: ; %bb.0: ; %entry 1338; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1339; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 1340; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 1341; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 1342; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 1343; GFX1200-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1344; GFX1200-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 1345; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1346; GFX1200-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1347; GFX1200-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1348; GFX1200-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1349; GFX1200-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 1350; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1351; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1352; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1353; GFX1200-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1354; GFX1200-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 1355; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1356; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1357; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1358; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1359; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1360; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1361; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 1362entry: 1363 %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1> 1364 %add = mul <4 x i16> %y18, %y 1365 %mul119 = add <4 x i16> %add, %y18 1366 %add2 = mul <4 x i16> %mul119, %y 1367 %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 1368 %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 1369 %mul521 = mul <4 x i16> %add2, %add220 1370 %add6 = mul <4 x i16> %mul521, %add422 1371 ret <4 x i16> %add6 1372} 1373 1374define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { 1375; GFX67-LABEL: clpeak_umad_pat_i16: 1376; GFX67: ; %bb.0: ; %entry 1377; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1378; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1379; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 1380; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1 1381; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1 1382; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 1383; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 1384; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2 1385; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 1386; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 1387; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 1388; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 1389; GFX67-NEXT: v_mad_u32_u24 v1, v3, v2, 1 1390; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 1391; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 1392; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 1393; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 1394; GFX67-NEXT: s_setpc_b64 s[30:31] 1395; 1396; GFX8-SDAG-LABEL: clpeak_umad_pat_i16: 1397; GFX8-SDAG: ; %bb.0: ; %entry 1398; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1399; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1400; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 1401; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1402; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 1403; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 1404; 1405; GFX8-GISEL-LABEL: clpeak_umad_pat_i16: 1406; GFX8-GISEL: ; %bb.0: ; %entry 1407; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1408; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 1409; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 1410; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 1411; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 1412; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 1413; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 1414; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1 1415; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 1416; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1417; 1418; GFX9-SDAG-LABEL: clpeak_umad_pat_i16: 1419; GFX9-SDAG: ; %bb.0: ; %entry 1420; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 1422; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 1423; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 1424; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0 1425; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1426; 1427; GFX9-GISEL-LABEL: clpeak_umad_pat_i16: 1428; GFX9-GISEL: ; %bb.0: ; %entry 1429; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1430; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 1431; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 1432; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 1433; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 1434; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1 1435; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 1436; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1 1437; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 1438; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1439; 1440; GFX10-SDAG-LABEL: clpeak_umad_pat_i16: 1441; GFX10-SDAG: ; %bb.0: ; %entry 1442; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1444; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 1445; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1446; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 1447; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1448; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX10-GISEL-LABEL: clpeak_umad_pat_i16: 1451; GFX10-GISEL: ; %bb.0: ; %entry 1452; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 1454; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 1455; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 1456; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 1457; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 1458; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1459; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 1460; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1461; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1462; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1463; 1464; GFX11-SDAG-LABEL: clpeak_umad_pat_i16: 1465; GFX11-SDAG: ; %bb.0: ; %entry 1466; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1467; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1468; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1469; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 1470; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1471; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1472; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 1473; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1474; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1475; 1476; GFX11-GISEL-LABEL: clpeak_umad_pat_i16: 1477; GFX11-GISEL: ; %bb.0: ; %entry 1478; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1479; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 1480; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 1481; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1482; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 1483; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 1484; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 1485; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 1486; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1487; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 1488; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1489; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1490; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1491; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1492; 1493; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16: 1494; GFX1200-SDAG: ; %bb.0: ; %entry 1495; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1496; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 1497; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 1498; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 1499; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 1500; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1501; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1502; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 1503; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 1504; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1505; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 1506; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1507; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 1508; 1509; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16: 1510; GFX1200-GISEL: ; %bb.0: ; %entry 1511; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1512; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 1513; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 1514; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 1515; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 1516; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 1517; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 1518; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1519; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 1520; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 1521; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 1522; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 1523; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1524; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 1525; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 1526; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1527; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1528; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 1529entry: 1530 %conv33 = add i16 %x, 1 1531 %add = mul i16 %conv33, %y 1532 %conv434 = add i16 %y, 1 1533 %add8 = mul i16 %conv434, %add 1534 %conv1035 = add i16 %add, 1 1535 %add14 = mul i16 %conv1035, %add8 1536 %conv1636 = add i16 %add8, 1 1537 %add20 = mul i16 %add14, %conv1636 1538 ret i16 %add20 1539} 1540 1541define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { 1542; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16: 1543; GFX67-SDAG: ; %bb.0: ; %entry 1544; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1545; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1546; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 1547; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 1548; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 1549; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 1550; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1551; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0 1552; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1553; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1 1554; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1555; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2 1556; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v4, v2, 1 1557; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3 1558; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 1559; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v5, v3, 1 1560; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 1561; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6 1562; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 1563; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5 1564; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4 1565; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7 1566; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 1567; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1568; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1569; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1570; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 1571; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0 1572; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1 1573; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 1574; 1575; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16: 1576; GFX67-GISEL: ; %bb.0: ; %entry 1577; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1578; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 1579; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1580; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 1581; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 1582; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1583; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 1584; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 1585; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 1586; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 1587; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1588; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 1589; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 1590; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1591; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1592; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1593; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 1594; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 1595; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1596; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 1597; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 1598; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1599; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 1600; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 1601; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1602; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 1603; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 1604; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 1605; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 1606; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1607; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1608; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1609; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 1610; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 1611; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 1612; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1613; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 1614; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 1615; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 1616; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1617; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1618; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1619; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1620; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 1621; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 1622; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 1623; 1624; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16: 1625; GFX8-SDAG: ; %bb.0: ; %entry 1626; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1627; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1 1628; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0 1629; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1630; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1631; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v0, v3 1632; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, v0 1633; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v2, v1 1634; GFX8-SDAG-NEXT: v_mad_u16 v2, v2, v1, v2 1635; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v3 1636; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v2, v1 1637; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0 1638; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1 1639; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 1640; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1641; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 1642; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 1643; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 1644; 1645; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16: 1646; GFX8-GISEL: ; %bb.0: ; %entry 1647; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1648; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1 1649; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0 1650; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1651; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1652; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2 1653; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0 1654; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v1 1655; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 1656; GFX8-GISEL-NEXT: v_mad_u16 v2, v2, v1, 1 1657; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 1658; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1 1659; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 1660; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v2 1661; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0 1662; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v2, v1 1663; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1664; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 1665; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1666; 1667; GFX9-LABEL: clpeak_umad_pat_v2i16: 1668; GFX9: ; %bb.0: ; %entry 1669; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1670; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1671; GFX9-NEXT: v_pk_mad_u16 v2, v0, v1, v0 1672; GFX9-NEXT: v_pk_mul_lo_u16 v3, v2, v1 1673; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 1674; GFX9-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 1675; GFX9-NEXT: v_pk_mul_lo_u16 v0, v3, v0 1676; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 1677; GFX9-NEXT: s_setpc_b64 s[30:31] 1678; 1679; GFX10-LABEL: clpeak_umad_pat_v2i16: 1680; GFX10: ; %bb.0: ; %entry 1681; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1682; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1683; GFX10-NEXT: v_pk_mad_u16 v2, v0, v1, v0 1684; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 1685; GFX10-NEXT: v_pk_mul_lo_u16 v3, v2, v1 1686; GFX10-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 1687; GFX10-NEXT: v_pk_mul_lo_u16 v0, v3, v0 1688; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 1689; GFX10-NEXT: s_setpc_b64 s[30:31] 1690; 1691; GFX11-LABEL: clpeak_umad_pat_v2i16: 1692; GFX11: ; %bb.0: ; %entry 1693; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1694; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1695; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1696; GFX11-NEXT: v_pk_mad_u16 v2, v0, v1, v0 1697; GFX11-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 1698; GFX11-NEXT: v_pk_mul_lo_u16 v3, v2, v1 1699; GFX11-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 1700; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1701; GFX11-NEXT: v_pk_mul_lo_u16 v0, v3, v0 1702; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1 1703; GFX11-NEXT: s_setpc_b64 s[30:31] 1704; 1705; GFX1200-LABEL: clpeak_umad_pat_v2i16: 1706; GFX1200: ; %bb.0: ; %entry 1707; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 1708; GFX1200-NEXT: s_wait_expcnt 0x0 1709; GFX1200-NEXT: s_wait_samplecnt 0x0 1710; GFX1200-NEXT: s_wait_bvhcnt 0x0 1711; GFX1200-NEXT: s_wait_kmcnt 0x0 1712; GFX1200-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1713; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1714; GFX1200-NEXT: v_pk_mad_u16 v2, v0, v1, v0 1715; GFX1200-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 1716; GFX1200-NEXT: v_pk_mul_lo_u16 v3, v2, v1 1717; GFX1200-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 1718; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1719; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 1720; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 1721; GFX1200-NEXT: s_setpc_b64 s[30:31] 1722entry: 1723 %y18 = add <2 x i16> %x, <i16 1, i16 1> 1724 %add = mul <2 x i16> %y18, %y 1725 %mul119 = add <2 x i16> %add, %y18 1726 %add2 = mul <2 x i16> %mul119, %y 1727 %add220 = add <2 x i16> %add, <i16 1, i16 1> 1728 %add422 = add <2 x i16> %add2, <i16 1, i16 1> 1729 %mul521 = mul <2 x i16> %add2, %add220 1730 %add6 = mul <2 x i16> %mul521, %add422 1731 ret <2 x i16> %add6 1732} 1733 1734define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { 1735; GFX67-SDAG-LABEL: clpeak_umad_pat_v3i16: 1736; GFX67-SDAG: ; %bb.0: ; %entry 1737; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1738; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1739; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 1740; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0 1741; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1 1742; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1743; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 1744; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 1745; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4 1746; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0 1747; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1 1748; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1 1749; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2 1750; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 1751; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1752; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1753; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 1754; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1755; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2 1756; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3 1757; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 1758; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6 1759; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000 1760; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1 1761; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 1762; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6 1763; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1764; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1 1765; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5 1766; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1 1767; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0 1768; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6 1769; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1770; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 1771; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1 1772; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8 1773; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 1774; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3 1775; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 1776; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0 1777; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6 1778; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7 1779; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 1780; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1781; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 1782; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1783; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1784; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 1785; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v5, v0 1786; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 1787; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v3, v2 1788; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 1789; 1790; GFX67-GISEL-LABEL: clpeak_umad_pat_v3i16: 1791; GFX67-GISEL: ; %bb.0: ; %entry 1792; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1793; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 1794; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 1795; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0 1796; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 1797; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 1798; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1 1799; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 1800; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0 1801; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2 1802; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 1803; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1 1804; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1805; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2 1806; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3 1807; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1808; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1 1809; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4 1810; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1811; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1 1812; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1 1813; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1 1814; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9 1815; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6 1816; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5 1817; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1 1818; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 1819; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 1820; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10 1821; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7 1822; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 1823; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11 1824; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8 1825; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 1826; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1827; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 1828; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0 1829; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 1830; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1831; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 1832; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 1833; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1834; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2 1835; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 1836; 1837; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16: 1838; GFX8-SDAG: ; %bb.0: ; %entry 1839; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1840; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 1 1841; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v0 1842; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1843; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 1844; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1 1845; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v6, v0, v5 1846; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v5, v0 1847; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v3 1848; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v8, v4, v2 1849; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v2, v4 1850; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 1851; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v5 1852; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 1853; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v4, v2 1854; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v6, v0 1855; GFX8-SDAG-NEXT: v_mad_u16 v4, v2, v8, v2 1856; GFX8-SDAG-NEXT: v_mad_u16 v0, v3, v0, v3 1857; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1 1858; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1859; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v2, v4 1860; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5 1861; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 1862; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 1863; 1864; GFX8-GISEL-LABEL: clpeak_umad_pat_v3i16: 1865; GFX8-GISEL: ; %bb.0: ; %entry 1866; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1867; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 1 1868; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 1869; GFX8-GISEL-NEXT: v_add_u16_e32 v5, 1, v0 1870; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1871; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1 1872; GFX8-GISEL-NEXT: v_mad_u16 v6, v5, v2, v5 1873; GFX8-GISEL-NEXT: v_mad_u16 v7, v0, v4, v0 1874; GFX8-GISEL-NEXT: v_mad_u16 v8, v1, v3, v1 1875; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v9, v6, v2 1876; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v10, v7, v4 1877; GFX8-GISEL-NEXT: v_mad_u16 v5, v5, v2, 1 1878; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1 1879; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v11, v8, v3 1880; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 1881; GFX8-GISEL-NEXT: v_mad_u16 v2, v6, v2, 1 1882; GFX8-GISEL-NEXT: v_mad_u16 v4, v7, v4, 1 1883; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v5, v9, v5 1884; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v10, v0 1885; GFX8-GISEL-NEXT: v_mad_u16 v3, v8, v3, 1 1886; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v11, v1 1887; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v5, v2 1888; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1889; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1890; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3 1891; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1892; 1893; GFX9-SDAG-LABEL: clpeak_umad_pat_v3i16: 1894; GFX9-SDAG: ; %bb.0: ; %entry 1895; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1896; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1897; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 1898; GFX9-SDAG-NEXT: v_pk_mad_u16 v4, v1, v3, v1 1899; GFX9-SDAG-NEXT: v_pk_mad_u16 v5, v0, v2, v0 1900; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v6, v5, v2 1901; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v7, v4, v3 1902; GFX9-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1903; GFX9-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 1904; GFX9-SDAG-NEXT: v_pk_mad_u16 v3, v4, v3, 1 1905; GFX9-SDAG-NEXT: v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0] 1906; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1907; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1908; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1909; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1910; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1911; 1912; GFX9-GISEL-LABEL: clpeak_umad_pat_v3i16: 1913; GFX9-GISEL: ; %bb.0: ; %entry 1914; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1915; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1916; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 1917; GFX9-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1918; GFX9-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1919; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1920; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1921; GFX9-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1922; GFX9-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 1923; GFX9-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1924; GFX9-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 1925; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1926; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1927; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1928; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1929; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1930; 1931; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16: 1932; GFX10-SDAG: ; %bb.0: ; %entry 1933; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1934; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1935; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 1936; GFX10-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1937; GFX10-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1938; GFX10-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1939; GFX10-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 1940; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1941; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1942; GFX10-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 1943; GFX10-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1944; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1945; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1946; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1947; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1948; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1949; 1950; GFX10-GISEL-LABEL: clpeak_umad_pat_v3i16: 1951; GFX10-GISEL: ; %bb.0: ; %entry 1952; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1953; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1954; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 1955; GFX10-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1956; GFX10-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1957; GFX10-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1958; GFX10-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 1959; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1960; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1961; GFX10-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1962; GFX10-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 1963; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1964; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1965; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1966; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1967; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1968; 1969; GFX11-SDAG-LABEL: clpeak_umad_pat_v3i16: 1970; GFX11-SDAG: ; %bb.0: ; %entry 1971; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1972; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1973; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 1974; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1975; GFX11-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1976; GFX11-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 1977; GFX11-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 1978; GFX11-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 1979; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1980; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 1981; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 1982; GFX11-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 1983; GFX11-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 1984; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1985; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 1986; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 1987; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1988; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 1989; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 1990; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1991; 1992; GFX11-GISEL-LABEL: clpeak_umad_pat_v3i16: 1993; GFX11-GISEL: ; %bb.0: ; %entry 1994; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1995; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 1996; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 1997; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1998; GFX11-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 1999; GFX11-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2000; GFX11-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2001; GFX11-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 2002; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2003; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2004; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2005; GFX11-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2006; GFX11-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 2007; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2008; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2009; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2010; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2011; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2012; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2013; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2014; 2015; GFX1200-SDAG-LABEL: clpeak_umad_pat_v3i16: 2016; GFX1200-SDAG: ; %bb.0: ; %entry 2017; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2018; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 2019; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 2020; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 2021; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 2022; GFX1200-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2023; GFX1200-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 2024; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2025; GFX1200-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2026; GFX1200-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2027; GFX1200-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2028; GFX1200-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 2029; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2030; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2031; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2032; GFX1200-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 2033; GFX1200-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2034; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2035; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2036; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2037; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2038; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2039; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2040; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 2041; 2042; GFX1200-GISEL-LABEL: clpeak_umad_pat_v3i16: 2043; GFX1200-GISEL: ; %bb.0: ; %entry 2044; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2045; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 2046; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 2047; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 2048; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 2049; GFX1200-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2050; GFX1200-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 2051; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2052; GFX1200-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2053; GFX1200-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2054; GFX1200-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2055; GFX1200-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 2056; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2057; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2058; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2059; GFX1200-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2060; GFX1200-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 2061; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2062; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2063; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2064; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2065; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2066; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2067; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 2068entry: 2069 %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1> 2070 %add = mul <3 x i16> %y48, %y 2071 %mul1249 = add <3 x i16> %add, %y48 2072 %add15 = mul <3 x i16> %mul1249, %y 2073 %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1> 2074 %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1> 2075 %mul3051 = mul <3 x i16> %add15, %add1550 2076 %add33 = mul <3 x i16> %mul3051, %add2452 2077 ret <3 x i16> %add33 2078} 2079 2080define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { 2081; GFX67-SDAG-LABEL: clpeak_umad_pat_v4i16: 2082; GFX67-SDAG: ; %bb.0: ; %entry 2083; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2084; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3 2085; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3 2086; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 2087; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 2088; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3 2089; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2090; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2 2091; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2092; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 2093; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 2094; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0 2095; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1 2096; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 2097; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 2098; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7 2099; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7 2100; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1 2101; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5 2102; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2 2103; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0 2104; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1 2105; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 2106; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13 2107; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1 2108; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 2109; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 2110; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7 2111; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 2112; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12 2113; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 2114; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4 2115; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5 2116; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000 2117; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8 2118; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1 2119; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6 2120; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8 2121; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v6, 1 2122; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 2123; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v1 2124; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, s4, v7 2125; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 2126; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3 2127; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 2128; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8 2129; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 2130; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2 2131; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7 2132; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10 2133; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8 2134; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4 2135; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5 2136; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7 2137; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 2138; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, s4, v2 2139; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0 2140; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8 2141; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5 2142; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6 2143; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0 2144; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2 2145; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8 2146; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 2147; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 2148; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 2149; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 2150; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 2151; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v7, v0 2152; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5 2153; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 2154; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6 2155; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 2156; 2157; GFX67-GISEL-LABEL: clpeak_umad_pat_v4i16: 2158; GFX67-GISEL: ; %bb.0: ; %entry 2159; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2161; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2162; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3 2163; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1 2164; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 2165; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0 2166; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v9, 16, v9 2167; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v3 2168; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v9 2169; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v2 2170; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 2171; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 2172; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v8 2173; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 2174; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 2175; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 2176; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1 2177; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9 2178; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 2179; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 2180; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 2181; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0 2182; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 2183; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2 2184; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3 2185; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2186; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2187; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 2188; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 2189; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 2190; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2191; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 2192; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2193; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2194; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5 2195; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1 2196; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 2197; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 2198; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4 2199; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1 2200; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10 2201; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 2202; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2203; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 2204; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7 2205; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1 2206; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 2207; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 2208; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1 2209; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2210; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2211; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6 2212; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1 2213; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10 2214; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11 2215; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1 2216; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 2217; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 2218; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 2219; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 2220; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 2221; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2222; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12 2223; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8 2224; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 2225; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2 2226; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8 2227; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 2228; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13 2229; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1 2230; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14 2231; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9 2232; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9 2233; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 2234; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15 2235; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3 2236; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0 2237; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 2238; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2239; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 2240; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0 2241; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 2242; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5 2243; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2244; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2245; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 2246; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2 2247; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 2248; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 2249; 2250; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16: 2251; GFX8-SDAG: ; %bb.0: ; %entry 2252; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2253; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 1 2254; GFX8-SDAG-NEXT: v_add_u16_e32 v5, 1, v0 2255; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2256; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v8, 16, v2 2257; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v1 2258; GFX8-SDAG-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2259; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3 2260; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v9, v0, v8 2261; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v8, v0 2262; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v6 2263; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v10, v5, v2 2264; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v11, v4, v3 2265; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v6, v1 2266; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v3, v4 2267; GFX8-SDAG-NEXT: v_mad_u16 v5, v5, v2, v5 2268; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v8 2269; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v6 2270; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v5, v2 2271; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v4, v3 2272; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v9, v0 2273; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1 2274; GFX8-SDAG-NEXT: v_mad_u16 v7, v2, v10, v2 2275; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, v4 2276; GFX8-SDAG-NEXT: v_mad_u16 v6, v3, v11, v3 2277; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5 2278; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2279; GFX8-SDAG-NEXT: v_mad_u16 v2, v7, v2, v7 2280; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 2281; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2282; GFX8-SDAG-NEXT: v_mad_u16 v2, v6, v3, v6 2283; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v2, v1 2284; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 2285; 2286; GFX8-GISEL-LABEL: clpeak_umad_pat_v4i16: 2287; GFX8-GISEL: ; %bb.0: ; %entry 2288; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2289; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 1 2290; GFX8-GISEL-NEXT: v_add_u16_e32 v4, 1, v0 2291; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2292; GFX8-GISEL-NEXT: v_add_u16_e32 v6, 1, v1 2293; GFX8-GISEL-NEXT: v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2294; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2 2295; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3 2296; GFX8-GISEL-NEXT: v_mad_u16 v8, v4, v2, v4 2297; GFX8-GISEL-NEXT: v_mad_u16 v9, v0, v5, v0 2298; GFX8-GISEL-NEXT: v_mad_u16 v10, v6, v3, v6 2299; GFX8-GISEL-NEXT: v_mad_u16 v11, v1, v7, v1 2300; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v12, v8, v2 2301; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v13, v9, v5 2302; GFX8-GISEL-NEXT: v_mad_u16 v4, v4, v2, 1 2303; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v5, 1 2304; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v14, v10, v3 2305; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v15, v11, v7 2306; GFX8-GISEL-NEXT: v_mad_u16 v6, v6, v3, 1 2307; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v7, 1 2308; GFX8-GISEL-NEXT: v_mad_u16 v2, v8, v2, 1 2309; GFX8-GISEL-NEXT: v_mad_u16 v5, v9, v5, 1 2310; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v12, v4 2311; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v13, v0 2312; GFX8-GISEL-NEXT: v_mad_u16 v3, v10, v3, 1 2313; GFX8-GISEL-NEXT: v_mad_u16 v7, v11, v7, 1 2314; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v14, v6 2315; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v15, v1 2316; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v4, v2 2317; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2318; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 2319; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v3 2320; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2321; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 2322; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 2323; 2324; GFX9-SDAG-LABEL: clpeak_umad_pat_v4i16: 2325; GFX9-SDAG: ; %bb.0: ; %entry 2326; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2327; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2328; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2329; GFX9-SDAG-NEXT: v_pk_mad_u16 v4, v1, v3, v1 2330; GFX9-SDAG-NEXT: v_pk_mad_u16 v5, v0, v2, v0 2331; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v6, v5, v2 2332; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v7, v4, v3 2333; GFX9-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2334; GFX9-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2335; GFX9-SDAG-NEXT: v_pk_mad_u16 v3, v4, v3, 1 op_sel_hi:[1,1,0] 2336; GFX9-SDAG-NEXT: v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0] 2337; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2338; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2339; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2340; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2341; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 2342; 2343; GFX9-GISEL-LABEL: clpeak_umad_pat_v4i16: 2344; GFX9-GISEL: ; %bb.0: ; %entry 2345; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2346; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2347; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2348; GFX9-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2349; GFX9-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2350; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2351; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2352; GFX9-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2353; GFX9-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2354; GFX9-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2355; GFX9-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2356; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2357; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2358; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2359; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2360; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 2361; 2362; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16: 2363; GFX10-SDAG: ; %bb.0: ; %entry 2364; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2365; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2366; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2367; GFX10-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2368; GFX10-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2369; GFX10-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2370; GFX10-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2371; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2372; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2373; GFX10-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2374; GFX10-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2375; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2376; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2377; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2378; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2379; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 2380; 2381; GFX10-GISEL-LABEL: clpeak_umad_pat_v4i16: 2382; GFX10-GISEL: ; %bb.0: ; %entry 2383; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2384; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2385; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2386; GFX10-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2387; GFX10-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2388; GFX10-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2389; GFX10-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2390; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2391; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2392; GFX10-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2393; GFX10-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2394; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2395; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2396; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2397; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2398; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 2399; 2400; GFX11-SDAG-LABEL: clpeak_umad_pat_v4i16: 2401; GFX11-SDAG: ; %bb.0: ; %entry 2402; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2403; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2404; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2405; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2406; GFX11-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2407; GFX11-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2408; GFX11-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2409; GFX11-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2410; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2411; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2412; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2413; GFX11-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2414; GFX11-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2415; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2416; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2417; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2418; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2419; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2420; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2421; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2422; 2423; GFX11-GISEL-LABEL: clpeak_umad_pat_v4i16: 2424; GFX11-GISEL: ; %bb.0: ; %entry 2425; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2426; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2427; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2428; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2429; GFX11-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2430; GFX11-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2431; GFX11-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2432; GFX11-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2433; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2434; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2435; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2436; GFX11-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2437; GFX11-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2438; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2439; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2440; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2441; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2442; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2443; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2444; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2445; 2446; GFX1200-SDAG-LABEL: clpeak_umad_pat_v4i16: 2447; GFX1200-SDAG: ; %bb.0: ; %entry 2448; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2449; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 2450; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 2451; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 2452; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 2453; GFX1200-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2454; GFX1200-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2455; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2456; GFX1200-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2457; GFX1200-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2458; GFX1200-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2459; GFX1200-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2460; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2461; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2462; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2463; GFX1200-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2464; GFX1200-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2465; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2466; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2467; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2468; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2469; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2470; GFX1200-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2471; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 2472; 2473; GFX1200-GISEL-LABEL: clpeak_umad_pat_v4i16: 2474; GFX1200-GISEL: ; %bb.0: ; %entry 2475; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2476; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 2477; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 2478; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 2479; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 2480; GFX1200-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 2481; GFX1200-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] 2482; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2483; GFX1200-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 2484; GFX1200-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 2485; GFX1200-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] 2486; GFX1200-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] 2487; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2488; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 2489; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 2490; GFX1200-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] 2491; GFX1200-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] 2492; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 2493; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 2494; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 2495; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2496; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 2497; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 2498; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 2499entry: 2500 %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1> 2501 %add = mul <4 x i16> %y18, %y 2502 %mul119 = add <4 x i16> %add, %y18 2503 %add2 = mul <4 x i16> %mul119, %y 2504 %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 2505 %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 2506 %mul521 = mul <4 x i16> %add2, %add220 2507 %add6 = mul <4 x i16> %mul521, %add422 2508 ret <4 x i16> %add6 2509} 2510 2511define <2 x i32> @clpeak_imad_pat_v2i32(<2 x i32> %x, <2 x i32> %y) { 2512; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32: 2513; GFX67-SDAG: ; %bb.0: ; %entry 2514; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2515; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2516; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2517; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 2518; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 2519; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v4, v0 2520; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v5, v1 2521; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2522; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 2523; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 2524; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 2525; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v0 2526; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v1 2527; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 2528; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 2529; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v2 2530; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v3 2531; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 2532; 2533; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32: 2534; GFX67-GISEL: ; %bb.0: ; %entry 2535; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2536; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2537; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2538; GFX67-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2539; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2540; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 2541; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 2542; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2543; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2544; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 2545; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 2546; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0 2547; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2548; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3 2549; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2550; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 2551; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1 2552; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 2553; 2554; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32: 2555; GFX8-SDAG: ; %bb.0: ; %entry 2556; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2557; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 2558; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1 2559; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 2560; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 2561; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v4, v0 2562; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v5, v1 2563; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2564; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 2565; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 2566; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 2567; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v0 2568; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v1 2569; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 2570; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 2571; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2572; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v3 2573; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 2574; 2575; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32: 2576; GFX8-GISEL: ; %bb.0: ; %entry 2577; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2578; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 2579; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 2580; GFX8-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2581; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2582; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v0 2583; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v5, v1 2584; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2585; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2586; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v4 2587; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v5 2588; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v0 2589; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2590; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3 2591; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 2592; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 2593; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1 2594; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 2595; 2596; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32: 2597; GFX900-SDAG: ; %bb.0: ; %entry 2598; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2599; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 2600; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 2601; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 2602; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 2603; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v4, v0 2604; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2605; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v5, v1 2606; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3 2607; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[0:1] 2608; GFX900-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3] 2609; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4] 2610; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5] 2611; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2612; 2613; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32: 2614; GFX900-GISEL: ; %bb.0: ; %entry 2615; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2616; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 2617; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 2618; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2619; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2620; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 2621; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v5, v1 2622; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2623; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2624; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 2625; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 2626; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 2627; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 2628; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 2629; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 2630; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 2631; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 2632; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2633; 2634; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32: 2635; GFX90A-SDAG: ; %bb.0: ; %entry 2636; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2637; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 2638; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 2639; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v0, v2 2640; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 2641; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v4, v1 2642; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v6, v0 2643; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2644; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3 2645; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3] 2646; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1] 2647; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7] 2648; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5] 2649; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v2 2650; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 2651; 2652; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32: 2653; GFX90A-GISEL: ; %bb.0: ; %entry 2654; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2655; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 2656; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 2657; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2658; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2659; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 2660; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v5, v1 2661; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2662; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2663; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 2664; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 2665; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v0 2666; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v1 2667; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2668; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2669; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 2670; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 2671; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 2672; 2673; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32: 2674; GFX10-SDAG: ; %bb.0: ; %entry 2675; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2676; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 2677; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 2678; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 2679; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 2680; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0 2681; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 2682; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2683; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3 2684; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1] 2685; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3] 2686; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4] 2687; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5] 2688; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 2689; 2690; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32: 2691; GFX10-GISEL: ; %bb.0: ; %entry 2692; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2693; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2694; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2695; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2696; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2697; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 2698; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 2699; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2700; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2701; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 2702; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 2703; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 2704; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 2705; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2706; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2707; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 2708; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 2709; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 2710; 2711; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i32: 2712; GFX11-SDAG: ; %bb.0: ; %entry 2713; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2714; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 2715; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 2716; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2717; GFX11-SDAG-NEXT: v_mul_lo_u32 v6, v0, v2 2718; GFX11-SDAG-NEXT: v_mul_lo_u32 v7, v1, v3 2719; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2720; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v6, v0 2721; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v7, v1 2722; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2723; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v2 2724; GFX11-SDAG-NEXT: v_mul_lo_u32 v3, v1, v3 2725; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2726; GFX11-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v6, v[2:3] 2727; GFX11-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v3, v7, v[3:4] 2728; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2729; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, v[4:5] 2730; GFX11-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[5:6] 2731; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2732; 2733; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i32: 2734; GFX11-GISEL: ; %bb.0: ; %entry 2735; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2736; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2737; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2738; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2739; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2740; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2741; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2742; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 2743; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 2744; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2745; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2746; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2747; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 2748; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 2749; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2750; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 2751; GFX11-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 2752; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2753; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2754; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2755; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 2756; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 2757; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2758; 2759; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i32: 2760; GFX1200-SDAG: ; %bb.0: ; %entry 2761; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2762; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 2763; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 2764; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 2765; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 2766; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 2767; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 2768; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2769; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 2770; GFX1200-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 2771; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2772; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0 2773; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 2774; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2775; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 2776; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3 2777; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2778; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[3:4], null, v0, v4, v[0:1] 2779; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[4:5], null, v2, v5, v[2:3] 2780; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2781; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4] 2782; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v4, v2, v[4:5] 2783; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 2784; 2785; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i32: 2786; GFX1200-GISEL: ; %bb.0: ; %entry 2787; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2788; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 2789; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 2790; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 2791; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 2792; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2793; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2794; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2795; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 2796; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 2797; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2798; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 2799; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 2800; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2801; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 2802; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 2803; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 2804; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 2805; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2806; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 2807; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 2808; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 2809; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 2810; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2811; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 2812; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 2813; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 2814entry: 2815 %y18 = add <2 x i32> %x, <i32 1, i32 1> 2816 %add = mul <2 x i32> %y18, %y 2817 %mul119 = add <2 x i32> %add, %y18 2818 %add2 = mul <2 x i32> %mul119, %y 2819 %add220 = add <2 x i32> %add, <i32 1, i32 1> 2820 %add422 = add <2 x i32> %add2, <i32 1, i32 1> 2821 %mul521 = mul <2 x i32> %add2, %add220 2822 %add6 = mul <2 x i32> %mul521, %add422 2823 ret <2 x i32> %add6 2824} 2825 2826define <3 x i32> @clpeak_imad_pat_v3i32(<3 x i32> %x, <3 x i32> %y) { 2827; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i32: 2828; GFX67-SDAG: ; %bb.0: ; %entry 2829; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2830; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 2831; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2832; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2833; GFX67-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5 2834; GFX67-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3 2835; GFX67-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4 2836; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v6, v2 2837; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v7, v0 2838; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v8, v1 2839; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v2, v5 2840; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 2841; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4 2842; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v2, v6 2843; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v0, v7 2844; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v8 2845; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v2 2846; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v0 2847; GFX67-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v1 2848; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v4, v0 2849; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v5, v1 2850; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2 2851; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v4 2852; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v5 2853; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2854; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 2855; 2856; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i32: 2857; GFX67-GISEL: ; %bb.0: ; %entry 2858; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2859; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 2860; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 2861; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 2862; GFX67-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 2863; GFX67-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 2864; GFX67-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 2865; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 2866; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1 2867; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 2868; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2869; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2870; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 2871; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 2872; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7 2873; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8 2874; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 2875; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1 2876; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2877; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2878; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5 2879; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 2880; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6 2881; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7 2882; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2 2883; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 2884; 2885; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i32: 2886; GFX8-SDAG: ; %bb.0: ; %entry 2887; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2888; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v2 2889; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1 2890; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 2891; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5 2892; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3 2893; GFX8-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4 2894; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v6, v2 2895; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v7, v0 2896; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v8, v1 2897; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v2, v5 2898; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 2899; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4 2900; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v2, v6 2901; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v0, v7 2902; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v8 2903; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v2 2904; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v0 2905; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v5, v1 2906; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v4, v0 2907; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v5, v1 2908; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2 2909; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v4 2910; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v5 2911; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v3 2912; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 2913; 2914; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i32: 2915; GFX8-GISEL: ; %bb.0: ; %entry 2916; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2917; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 2918; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 2919; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2 2920; GFX8-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 2921; GFX8-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 2922; GFX8-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 2923; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v6, v0 2924; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v7, v1 2925; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v8, v2 2926; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2927; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2928; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 2929; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v6 2930; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v7 2931; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 1, v8 2932; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0 2933; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v1 2934; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2935; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2936; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5 2937; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2 2938; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6 2939; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7 2940; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2 2941; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 2942; 2943; GFX900-SDAG-LABEL: clpeak_imad_pat_v3i32: 2944; GFX900-SDAG: ; %bb.0: ; %entry 2945; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2946; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v2 2947; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5 2948; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 2949; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 2950; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3 2951; GFX900-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4 2952; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v6, v2 2953; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5 2954; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v7, v0 2955; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v8, v1 2956; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 2957; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4 2958; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, v[5:6] 2959; GFX900-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v7, v[0:1] 2960; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v2, v8, v[2:3] 2961; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7] 2962; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v2, v[7:8] 2963; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v5, v[3:4] 2964; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2965; 2966; GFX900-GISEL-LABEL: clpeak_imad_pat_v3i32: 2967; GFX900-GISEL: ; %bb.0: ; %entry 2968; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2969; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 2970; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 2971; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v2 2972; GFX900-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 2973; GFX900-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 2974; GFX900-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 2975; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v6, v0 2976; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v7, v1 2977; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v8, v2 2978; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2979; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2980; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 2981; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v6 2982; GFX900-GISEL-NEXT: v_add_u32_e32 v4, 1, v7 2983; GFX900-GISEL-NEXT: v_add_u32_e32 v5, 1, v8 2984; GFX900-GISEL-NEXT: v_add_u32_e32 v6, 1, v0 2985; GFX900-GISEL-NEXT: v_add_u32_e32 v7, 1, v1 2986; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 2987; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 2988; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5 2989; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v2 2990; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6 2991; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7 2992; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2 2993; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2994; 2995; GFX90A-SDAG-LABEL: clpeak_imad_pat_v3i32: 2996; GFX90A-SDAG: ; %bb.0: ; %entry 2997; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2998; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 2999; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 3000; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v2 3001; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v0, v3 3002; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4 3003; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5 3004; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v8, v1 3005; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v10, v0 3006; GFX90A-SDAG-NEXT: v_add_u32_e32 v7, v6, v2 3007; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 3008; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4 3009; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v7, v5 3010; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v8, v[2:3] 3011; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v10, v[0:1] 3012; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v6, v[4:5] 3013; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, v[10:11] 3014; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v8, v2, v[8:9] 3015; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v4, v[6:7] 3016; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v8 3017; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 3018; 3019; GFX90A-GISEL-LABEL: clpeak_imad_pat_v3i32: 3020; GFX90A-GISEL: ; %bb.0: ; %entry 3021; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3022; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 3023; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 3024; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v2 3025; GFX90A-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 3026; GFX90A-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 3027; GFX90A-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 3028; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v6, v0 3029; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v7, v1 3030; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v8, v2 3031; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 3032; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 3033; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 3034; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v6 3035; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v7 3036; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v8 3037; GFX90A-GISEL-NEXT: v_add_u32_e32 v6, 1, v0 3038; GFX90A-GISEL-NEXT: v_add_u32_e32 v7, 1, v1 3039; GFX90A-GISEL-NEXT: v_add_u32_e32 v8, 1, v2 3040; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 3041; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 3042; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 3043; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6 3044; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7 3045; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v8 3046; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 3047; 3048; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i32: 3049; GFX10-SDAG: ; %bb.0: ; %entry 3050; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3051; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3052; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3053; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3054; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3 3055; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v1, v4 3056; GFX10-SDAG-NEXT: v_mul_lo_u32 v8, v2, v5 3057; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v6, v0 3058; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v7, v1 3059; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v9, v8, v2 3060; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 3061; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4 3062; GFX10-SDAG-NEXT: v_mul_lo_u32 v3, v9, v5 3063; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v0, v6, v[0:1] 3064; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v2, v7, v[2:3] 3065; GFX10-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v3, v8, v[3:4] 3066; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v0, v[4:5] 3067; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v5, v2, v[5:6] 3068; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v6, v3, v[6:7] 3069; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 3070; 3071; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i32: 3072; GFX10-GISEL: ; %bb.0: ; %entry 3073; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3074; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3075; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3076; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3077; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 3078; GFX10-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 3079; GFX10-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 3080; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v6, v0 3081; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v7, v1 3082; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v8, v2 3083; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 3084; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 3085; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 3086; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v6 3087; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v7 3088; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v8 3089; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3 3090; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 3091; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v2, v5 3092; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3093; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3094; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3095; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v3, v0 3096; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3097; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3098; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 3099; 3100; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i32: 3101; GFX11-SDAG: ; %bb.0: ; %entry 3102; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3103; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3104; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3105; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3106; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3107; GFX11-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3 3108; GFX11-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4 3109; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3110; GFX11-SDAG-NEXT: v_mul_lo_u32 v9, v2, v5 3111; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v7, v0 3112; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3113; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v8, v1 3114; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v6, v9, v2 3115; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3116; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v3 3117; GFX11-SDAG-NEXT: v_mul_lo_u32 v3, v1, v4 3118; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 3119; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v6, v5 3120; GFX11-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v2, v7, v[2:3] 3121; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3122; GFX11-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v3, v8, v[3:4] 3123; GFX11-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v4, v9, v[4:5] 3124; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3125; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v2, v[5:6] 3126; GFX11-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v6, v3, v[6:7] 3127; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 3128; GFX11-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v7, v4, v[7:8] 3129; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 3130; 3131; GFX11-GISEL-LABEL: clpeak_imad_pat_v3i32: 3132; GFX11-GISEL: ; %bb.0: ; %entry 3133; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3134; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3135; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3136; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3137; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3138; GFX11-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 3139; GFX11-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 3140; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3141; GFX11-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 3142; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v6, v0 3143; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3144; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v7, v1 3145; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, v8, v2 3146; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3147; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 3148; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 3149; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 3150; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 3151; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v6 3152; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v7 3153; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v8 3154; GFX11-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3 3155; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3156; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 3157; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v2, v5 3158; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3159; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3160; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3161; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3162; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v3, v0 3163; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3164; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) 3165; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3166; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 3167; 3168; GFX1200-SDAG-LABEL: clpeak_imad_pat_v3i32: 3169; GFX1200-SDAG: ; %bb.0: ; %entry 3170; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3171; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 3172; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 3173; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 3174; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 3175; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3176; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3177; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3178; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3179; GFX1200-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3 3180; GFX1200-SDAG-NEXT: v_mul_lo_u32 v7, v1, v4 3181; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3182; GFX1200-SDAG-NEXT: v_mul_lo_u32 v8, v2, v5 3183; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v6, v0 3184; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3185; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v7, v1 3186; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v9, v8, v2 3187; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3188; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 3189; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4 3190; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3191; GFX1200-SDAG-NEXT: v_mul_lo_u32 v3, v9, v5 3192; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[4:5], null, v0, v6, v[0:1] 3193; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3194; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[5:6], null, v2, v7, v[2:3] 3195; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[6:7], null, v3, v8, v[3:4] 3196; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3197; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v4, v0, v[4:5] 3198; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v5, v2, v[5:6] 3199; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 3200; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[2:3], null, v6, v3, v[6:7] 3201; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 3202; 3203; GFX1200-GISEL-LABEL: clpeak_imad_pat_v3i32: 3204; GFX1200-GISEL: ; %bb.0: ; %entry 3205; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3206; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 3207; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 3208; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 3209; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 3210; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3211; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3212; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3213; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3214; GFX1200-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 3215; GFX1200-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 3216; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3217; GFX1200-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 3218; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v6, v0 3219; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3220; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v7, v1 3221; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, v8, v2 3222; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3223; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 3224; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 3225; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 3226; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 3227; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v6 3228; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v7 3229; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v8 3230; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3 3231; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3232; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 3233; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v2, v5 3234; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3235; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3236; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3237; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3238; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v3, v0 3239; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3240; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) 3241; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3242; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 3243entry: 3244 %y48 = add <3 x i32> %x, <i32 1, i32 1, i32 1> 3245 %add = mul <3 x i32> %y48, %y 3246 %mul1249 = add <3 x i32> %add, %y48 3247 %add15 = mul <3 x i32> %mul1249, %y 3248 %add1550 = add <3 x i32> %add, <i32 1, i32 1, i32 1> 3249 %add2452 = add <3 x i32> %add15, <i32 1, i32 1, i32 1> 3250 %mul3051 = mul <3 x i32> %add15, %add1550 3251 %add33 = mul <3 x i32> %mul3051, %add2452 3252 ret <3 x i32> %add33 3253} 3254 3255define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) { 3256; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i32: 3257; GFX67-SDAG: ; %bb.0: ; %entry 3258; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3259; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3 3260; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 3261; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 3262; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3263; GFX67-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 3264; GFX67-SDAG-NEXT: v_mul_lo_u32 v9, v3, v7 3265; GFX67-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5 3266; GFX67-SDAG-NEXT: v_mul_lo_u32 v11, v2, v6 3267; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v8, v0 3268; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v9, v3 3269; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v11, v2 3270; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v10, v1 3271; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v3, v7 3272; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6 3273; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3274; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v5 3275; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v3, v9 3276; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v2, v11 3277; GFX67-SDAG-NEXT: v_mul_lo_u32 v6, v0, v8 3278; GFX67-SDAG-NEXT: v_mul_lo_u32 v7, v1, v10 3279; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v3 3280; GFX67-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v2 3281; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, v6, v0 3282; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, v7, v1 3283; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0 3284; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v7, v1 3285; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 3286; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v4, v3 3287; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v6 3288; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v7 3289; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v5 3290; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v4 3291; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 3292; 3293; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i32: 3294; GFX67-GISEL: ; %bb.0: ; %entry 3295; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3296; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3297; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 3298; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 3299; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3 3300; GFX67-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3301; GFX67-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3302; GFX67-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3303; GFX67-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3304; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 3305; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 3306; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2 3307; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3 3308; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3309; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3310; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3311; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3312; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8 3313; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v9 3314; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 3315; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11 3316; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0 3317; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v1 3318; GFX67-GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v2 3319; GFX67-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v3 3320; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3321; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3322; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3323; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3324; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3325; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9 3326; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10 3327; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11 3328; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 3329; 3330; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i32: 3331; GFX8-SDAG: ; %bb.0: ; %entry 3332; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3333; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, 1, v3 3334; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v2 3335; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1 3336; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 3337; GFX8-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 3338; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v3, v7 3339; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5 3340; GFX8-SDAG-NEXT: v_mul_lo_u32 v11, v2, v6 3341; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v8, v0 3342; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v9, v3 3343; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v11, v2 3344; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v10, v1 3345; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v3, v7 3346; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6 3347; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3348; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v5 3349; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v3, v9 3350; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v2, v11 3351; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v0, v8 3352; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v1, v10 3353; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v3 3354; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v5, v2 3355; GFX8-SDAG-NEXT: v_add_u32_e32 v6, vcc, v6, v0 3356; GFX8-SDAG-NEXT: v_add_u32_e32 v7, vcc, v7, v1 3357; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0 3358; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v7, v1 3359; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 3360; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v4, v3 3361; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3362; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v7 3363; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v5 3364; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v4 3365; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 3366; 3367; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i32: 3368; GFX8-GISEL: ; %bb.0: ; %entry 3369; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3370; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 3371; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 3372; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2 3373; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v3 3374; GFX8-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3375; GFX8-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3376; GFX8-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3377; GFX8-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3378; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v8, v0 3379; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v9, v1 3380; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v10, v2 3381; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, v11, v3 3382; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3383; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3384; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3385; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3386; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v8 3387; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 1, v9 3388; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v10 3389; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v11 3390; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 1, v0 3391; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 1, v1 3392; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 1, v2 3393; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v3 3394; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3395; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3396; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3397; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3398; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3399; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9 3400; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10 3401; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11 3402; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 3403; 3404; GFX900-SDAG-LABEL: clpeak_imad_pat_v4i32: 3405; GFX900-SDAG: ; %bb.0: ; %entry 3406; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3407; GFX900-SDAG-NEXT: v_add_u32_e32 v3, 1, v3 3408; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 3409; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v2 3410; GFX900-SDAG-NEXT: v_mul_lo_u32 v8, v3, v7 3411; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v0, v4 3412; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v2, v6 3413; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 3414; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5 3415; GFX900-SDAG-NEXT: v_add_u32_e32 v3, v8, v3 3416; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v11, v0 3417; GFX900-SDAG-NEXT: v_add_u32_e32 v12, v9, v2 3418; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3419; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v3, v7 3420; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6 3421; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v10, v1 3422; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5 3423; GFX900-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[4:5] 3424; GFX900-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[3:4] 3425; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v11, v[0:1] 3426; GFX900-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v10, v[2:3] 3427; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v0, v[7:8] 3428; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v2, v[8:9] 3429; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[6:7] 3430; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[5:6] 3431; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3432; 3433; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i32: 3434; GFX9-GISEL: ; %bb.0: ; %entry 3435; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3436; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 3437; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 3438; GFX9-GISEL-NEXT: v_add_u32_e32 v2, 1, v2 3439; GFX9-GISEL-NEXT: v_add_u32_e32 v3, 1, v3 3440; GFX9-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3441; GFX9-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3442; GFX9-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3443; GFX9-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3444; GFX9-GISEL-NEXT: v_add_u32_e32 v0, v8, v0 3445; GFX9-GISEL-NEXT: v_add_u32_e32 v1, v9, v1 3446; GFX9-GISEL-NEXT: v_add_u32_e32 v2, v10, v2 3447; GFX9-GISEL-NEXT: v_add_u32_e32 v3, v11, v3 3448; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3449; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3450; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3451; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3452; GFX9-GISEL-NEXT: v_add_u32_e32 v4, 1, v8 3453; GFX9-GISEL-NEXT: v_add_u32_e32 v5, 1, v9 3454; GFX9-GISEL-NEXT: v_add_u32_e32 v6, 1, v10 3455; GFX9-GISEL-NEXT: v_add_u32_e32 v7, 1, v11 3456; GFX9-GISEL-NEXT: v_add_u32_e32 v8, 1, v0 3457; GFX9-GISEL-NEXT: v_add_u32_e32 v9, 1, v1 3458; GFX9-GISEL-NEXT: v_add_u32_e32 v10, 1, v2 3459; GFX9-GISEL-NEXT: v_add_u32_e32 v11, 1, v3 3460; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3461; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3462; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3463; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3464; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3465; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9 3466; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10 3467; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11 3468; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 3469; 3470; GFX90A-SDAG-LABEL: clpeak_imad_pat_v4i32: 3471; GFX90A-SDAG: ; %bb.0: ; %entry 3472; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3473; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v3 3474; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v2 3475; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 3476; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 3477; GFX90A-SDAG-NEXT: v_mul_lo_u32 v14, v0, v4 3478; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v5 3479; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 3480; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v3, v7 3481; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v8, v3 3482; GFX90A-SDAG-NEXT: v_add_u32_e32 v9, v10, v2 3483; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v12, v1 3484; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v14, v0 3485; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3486; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5 3487; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v9, v6 3488; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v3, v7 3489; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[6:7] 3490; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v4, v10, v[4:5] 3491; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v2, v12, v[2:3] 3492; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v0, v14, v[0:1] 3493; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v0, v[14:15] 3494; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v12, v2, v[12:13] 3495; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v4, v[10:11] 3496; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v6, v[8:9] 3497; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v12 3498; GFX90A-SDAG-NEXT: v_mov_b32_e32 v3, v4 3499; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 3500; 3501; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i32: 3502; GFX10-SDAG: ; %bb.0: ; %entry 3503; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3504; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3505; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3506; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3507; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v3 3508; GFX10-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 3509; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v1, v5 3510; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 3511; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v3, v7 3512; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v8, v0 3513; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v9, v1 3514; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v12, v10, v2 3515; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3516; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v4, v11, v3 3517; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5 3518; GFX10-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6 3519; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v4, v7 3520; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v0, v8, v[0:1] 3521; GFX10-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v2, v9, v[2:3] 3522; GFX10-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v3, v10, v[3:4] 3523; GFX10-SDAG-NEXT: v_mad_u64_u32 v[8:9], null, v4, v11, v[4:5] 3524; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v0, v[5:6] 3525; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[6:7] 3526; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v7, v3, v[7:8] 3527; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v8, v4, v[8:9] 3528; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 3529; 3530; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i32: 3531; GFX10-GISEL: ; %bb.0: ; %entry 3532; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3533; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3534; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3535; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3536; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3537; GFX10-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3538; GFX10-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3539; GFX10-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3540; GFX10-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3541; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v8, v0 3542; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v9, v1 3543; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v10, v2 3544; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v11, v3 3545; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3546; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3547; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3548; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3549; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v8 3550; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v9 3551; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v10 3552; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v11 3553; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0 3554; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3555; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5 3556; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6 3557; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7 3558; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3559; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3560; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3561; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3562; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3563; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3564; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3 3565; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 3566; 3567; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i32: 3568; GFX11-SDAG: ; %bb.0: ; %entry 3569; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3570; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3571; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3572; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3573; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v3 3574; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3575; GFX11-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 3576; GFX11-SDAG-NEXT: v_mul_lo_u32 v9, v1, v5 3577; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3578; GFX11-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 3579; GFX11-SDAG-NEXT: v_mul_lo_u32 v11, v3, v7 3580; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3581; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v8, v0 3582; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v9, v1 3583; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 3584; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v12, v10, v2 3585; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 3586; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v11, v3 3587; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3588; GFX11-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 3589; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v12, v6 3590; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3591; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v0, v7 3592; GFX11-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v2, v8, v[2:3] 3593; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3594; GFX11-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v3, v9, v[3:4] 3595; GFX11-SDAG-NEXT: v_mad_u64_u32 v[8:9], null, v4, v10, v[4:5] 3596; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3597; GFX11-SDAG-NEXT: v_mad_u64_u32 v[9:10], null, v5, v11, v[5:6] 3598; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[6:7] 3599; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3600; GFX11-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v7, v3, v[7:8] 3601; GFX11-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v8, v4, v[8:9] 3602; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) 3603; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v9, v5, v[9:10] 3604; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 3605; 3606; GFX11-GISEL-LABEL: clpeak_imad_pat_v4i32: 3607; GFX11-GISEL: ; %bb.0: ; %entry 3608; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3609; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3610; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3611; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3612; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3613; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3614; GFX11-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3615; GFX11-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3616; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3617; GFX11-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3618; GFX11-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3619; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3620; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v8, v0 3621; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v9, v1 3622; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3623; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, v10, v2 3624; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, v11, v3 3625; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3626; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3627; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3628; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3629; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3630; GFX11-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3631; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v8 3632; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v9 3633; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v10 3634; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v11 3635; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0 3636; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3637; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5 3638; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6 3639; GFX11-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7 3640; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3641; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3642; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3643; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3644; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3645; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3646; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3647; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) 3648; GFX11-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3 3649; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 3650; 3651; GFX1200-SDAG-LABEL: clpeak_imad_pat_v4i32: 3652; GFX1200-SDAG: ; %bb.0: ; %entry 3653; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3654; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 3655; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 3656; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 3657; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 3658; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3659; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 3660; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 3661; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v3 3662; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3663; GFX1200-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 3664; GFX1200-SDAG-NEXT: v_mul_lo_u32 v9, v1, v5 3665; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3666; GFX1200-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 3667; GFX1200-SDAG-NEXT: v_mul_lo_u32 v11, v3, v7 3668; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3669; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v8, v0 3670; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v9, v1 3671; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 3672; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v12, v10, v2 3673; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 3674; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v4, v11, v3 3675; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3676; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5 3677; GFX1200-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6 3678; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 3679; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v4, v7 3680; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[5:6], null, v0, v8, v[0:1] 3681; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[6:7], null, v2, v9, v[2:3] 3682; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3683; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[7:8], null, v3, v10, v[3:4] 3684; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[8:9], null, v4, v11, v[4:5] 3685; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3686; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v0, v[5:6] 3687; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v2, v[6:7] 3688; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 3689; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[2:3], null, v7, v3, v[7:8] 3690; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[3:4], null, v8, v4, v[8:9] 3691; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 3692; 3693; GFX1200-GISEL-LABEL: clpeak_imad_pat_v4i32: 3694; GFX1200-GISEL: ; %bb.0: ; %entry 3695; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3696; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 3697; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 3698; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 3699; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 3700; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3701; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3702; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3703; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3704; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3705; GFX1200-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 3706; GFX1200-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 3707; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3708; GFX1200-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 3709; GFX1200-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 3710; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3711; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v8, v0 3712; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v9, v1 3713; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3714; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, v10, v2 3715; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, v11, v3 3716; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3717; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3718; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 3719; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3720; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 3721; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 3722; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v8 3723; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v9 3724; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v10 3725; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v11 3726; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0 3727; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 3728; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5 3729; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6 3730; GFX1200-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7 3731; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 3732; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 3733; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3 3734; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8 3735; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 3736; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 3737; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 3738; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) 3739; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3 3740; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 3741entry: 3742 %y18 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 3743 %add = mul <4 x i32> %y18, %y 3744 %mul119 = add <4 x i32> %add, %y18 3745 %add2 = mul <4 x i32> %mul119, %y 3746 %add220 = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 3747 %add422 = add <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1> 3748 %mul521 = mul <4 x i32> %add2, %add220 3749 %add6 = mul <4 x i32> %mul521, %add422 3750 ret <4 x i32> %add6 3751} 3752 3753define i32 @clpeak_imad_pat_i24(i32 %x, i32 %y) { 3754; GFX67-SDAG-LABEL: clpeak_imad_pat_i24: 3755; GFX67-SDAG: ; %bb.0: ; %entry 3756; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3757; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3758; GFX67-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3759; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3760; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3761; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 3762; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3763; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 3764; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0 3765; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 3766; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 3767; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 3768; 3769; GFX67-GISEL-LABEL: clpeak_imad_pat_i24: 3770; GFX67-GISEL: ; %bb.0: ; %entry 3771; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3772; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3773; GFX67-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3774; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3775; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3776; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 3777; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3778; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 3779; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3780; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3781; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3782; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 3783; 3784; GFX8-SDAG-LABEL: clpeak_imad_pat_i24: 3785; GFX8-SDAG: ; %bb.0: ; %entry 3786; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3787; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3788; GFX8-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3789; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 3790; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3791; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0 3792; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3793; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 3794; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0 3795; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 3796; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 3797; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 3798; 3799; GFX8-GISEL-LABEL: clpeak_imad_pat_i24: 3800; GFX8-GISEL: ; %bb.0: ; %entry 3801; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3802; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3803; GFX8-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3804; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 3805; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3806; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0 3807; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3808; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 3809; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3810; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 3811; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3812; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 3813; 3814; GFX900-SDAG-LABEL: clpeak_imad_pat_i24: 3815; GFX900-SDAG: ; %bb.0: ; %entry 3816; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3817; GFX900-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3818; GFX900-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3819; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 3820; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3821; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 3822; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3823; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1] 3824; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2] 3825; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3826; 3827; GFX900-GISEL-LABEL: clpeak_imad_pat_i24: 3828; GFX900-GISEL: ; %bb.0: ; %entry 3829; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3830; GFX900-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3831; GFX900-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3832; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 3833; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3834; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 3835; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3836; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 3837; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3838; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 3839; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3840; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3841; 3842; GFX90A-SDAG-LABEL: clpeak_imad_pat_i24: 3843; GFX90A-SDAG: ; %bb.0: ; %entry 3844; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3845; GFX90A-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3846; GFX90A-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3847; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 3848; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3849; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 3850; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3851; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1] 3852; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3] 3853; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 3854; 3855; GFX90A-GISEL-LABEL: clpeak_imad_pat_i24: 3856; GFX90A-GISEL: ; %bb.0: ; %entry 3857; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3858; GFX90A-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3859; GFX90A-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3860; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 3861; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3862; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 3863; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3864; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 3865; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 3866; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3867; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 3868; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 3869; 3870; GFX10-SDAG-LABEL: clpeak_imad_pat_i24: 3871; GFX10-SDAG: ; %bb.0: ; %entry 3872; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3873; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3874; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3875; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3876; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3877; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 3878; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3879; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1] 3880; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2] 3881; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 3882; 3883; GFX10-GISEL-LABEL: clpeak_imad_pat_i24: 3884; GFX10-GISEL: ; %bb.0: ; %entry 3885; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3886; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3887; GFX10-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3888; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3889; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3890; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 3891; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3892; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 3893; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3894; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3895; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3896; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 3897; 3898; GFX11-SDAG-LABEL: clpeak_imad_pat_i24: 3899; GFX11-SDAG: ; %bb.0: ; %entry 3900; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3901; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3902; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3903; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3904; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3905; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v1, v0 3906; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3907; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v5, v0 3908; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 3909; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3910; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3] 3911; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4] 3912; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 3913; 3914; GFX11-GISEL-LABEL: clpeak_imad_pat_i24: 3915; GFX11-GISEL: ; %bb.0: ; %entry 3916; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3917; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3918; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3919; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3920; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3921; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3922; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3923; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 3924; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3925; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 3926; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 3927; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3928; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3929; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3930; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 3931; 3932; GFX1200-SDAG-LABEL: clpeak_imad_pat_i24: 3933; GFX1200-SDAG: ; %bb.0: ; %entry 3934; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3935; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 3936; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 3937; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 3938; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 3939; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 3940; GFX1200-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 3941; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3942; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 3943; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3944; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3945; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 3946; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3947; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3948; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1] 3949; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2] 3950; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 3951; 3952; GFX1200-GISEL-LABEL: clpeak_imad_pat_i24: 3953; GFX1200-GISEL: ; %bb.0: ; %entry 3954; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3955; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 3956; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 3957; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 3958; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 3959; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 3960; GFX1200-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 3961; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3962; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3963; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 3964; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3965; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 3966; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 3967; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 3968; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 3969; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 3970; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 3971; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 3972; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 3973entry: 3974 %shl = shl i32 %x, 8 3975 %shr = ashr exact i32 %shl, 8 3976 %shl1 = shl i32 %y, 8 3977 %shr2 = ashr exact i32 %shl1, 8 3978 %shr222 = add nsw i32 %shr, 1 3979 %add = mul i32 %shr2, %shr222 3980 %mul323 = add i32 %add, %shr222 3981 %add4 = mul i32 %mul323, %shr2 3982 %add424 = add i32 %add, 1 3983 %add626 = add i32 %add4, 1 3984 %mul725 = mul i32 %add4, %add424 3985 %add8 = mul i32 %mul725, %add626 3986 ret i32 %add8 3987} 3988 3989define i32 @clpeak_imad_pat_u24(i32 %x, i32 %y) { 3990; GFX67-SDAG-LABEL: clpeak_imad_pat_u24: 3991; GFX67-SDAG: ; %bb.0: ; %entry 3992; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3993; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 3994; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 3995; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 3996; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 3997; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 3998; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 3999; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 4000; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0 4001; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 4002; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 4003; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 4004; 4005; GFX67-GISEL-LABEL: clpeak_imad_pat_u24: 4006; GFX67-GISEL: ; %bb.0: ; %entry 4007; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4008; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4009; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4010; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4011; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4012; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 4013; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4014; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 4015; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4016; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4017; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4018; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 4019; 4020; GFX8-SDAG-LABEL: clpeak_imad_pat_u24: 4021; GFX8-SDAG: ; %bb.0: ; %entry 4022; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4023; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4024; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4025; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 4026; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 4027; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0 4028; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 4029; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 4030; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0 4031; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 4032; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 4033; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 4034; 4035; GFX8-GISEL-LABEL: clpeak_imad_pat_u24: 4036; GFX8-GISEL: ; %bb.0: ; %entry 4037; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4038; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4039; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4040; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 4041; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4042; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0 4043; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4044; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 4045; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4046; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 4047; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4048; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 4049; 4050; GFX900-SDAG-LABEL: clpeak_imad_pat_u24: 4051; GFX900-SDAG: ; %bb.0: ; %entry 4052; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4053; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4054; GFX900-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4055; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 4056; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 4057; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 4058; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 4059; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1] 4060; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2] 4061; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4062; 4063; GFX900-GISEL-LABEL: clpeak_imad_pat_u24: 4064; GFX900-GISEL: ; %bb.0: ; %entry 4065; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4066; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4067; GFX900-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4068; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 4069; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4070; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 4071; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4072; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 4073; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4074; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 4075; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4076; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4077; 4078; GFX90A-SDAG-LABEL: clpeak_imad_pat_u24: 4079; GFX90A-SDAG: ; %bb.0: ; %entry 4080; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4081; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4082; GFX90A-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4083; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 4084; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 4085; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 4086; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 4087; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1] 4088; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3] 4089; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 4090; 4091; GFX90A-GISEL-LABEL: clpeak_imad_pat_u24: 4092; GFX90A-GISEL: ; %bb.0: ; %entry 4093; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4094; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4095; GFX90A-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4096; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 4097; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4098; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 4099; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4100; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 4101; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 4102; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4103; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 4104; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 4105; 4106; GFX10-SDAG-LABEL: clpeak_imad_pat_u24: 4107; GFX10-SDAG: ; %bb.0: ; %entry 4108; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4109; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4110; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4111; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 4112; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 4113; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 4114; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 4115; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1] 4116; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2] 4117; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 4118; 4119; GFX10-GISEL-LABEL: clpeak_imad_pat_u24: 4120; GFX10-GISEL: ; %bb.0: ; %entry 4121; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4122; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4123; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4124; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4125; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4126; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 4127; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4128; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 4129; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4130; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4131; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4132; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 4133; 4134; GFX11-SDAG-LABEL: clpeak_imad_pat_u24: 4135; GFX11-SDAG: ; %bb.0: ; %entry 4136; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4137; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4138; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4139; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4140; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 4141; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v1, v0 4142; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4143; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v5, v0 4144; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 4145; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4146; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3] 4147; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4] 4148; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 4149; 4150; GFX11-GISEL-LABEL: clpeak_imad_pat_u24: 4151; GFX11-GISEL: ; %bb.0: ; %entry 4152; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4153; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4154; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4155; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4156; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4157; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4158; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4159; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 4160; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4161; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 4162; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4163; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4164; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4165; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4166; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 4167; 4168; GFX1200-SDAG-LABEL: clpeak_imad_pat_u24: 4169; GFX1200-SDAG: ; %bb.0: ; %entry 4170; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 4171; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 4172; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 4173; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 4174; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 4175; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4176; GFX1200-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4177; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4178; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 4179; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 4180; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4181; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 4182; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 4183; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4184; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1] 4185; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2] 4186; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 4187; 4188; GFX1200-GISEL-LABEL: clpeak_imad_pat_u24: 4189; GFX1200-GISEL: ; %bb.0: ; %entry 4190; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 4191; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 4192; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 4193; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 4194; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 4195; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 4196; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 4197; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4198; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4199; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 4200; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4201; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 4202; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4203; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 4204; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4205; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 4206; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 4207; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 4208; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 4209entry: 4210 %shl = and i32 %x, 16777215 4211 %shl1 = and i32 %y, 16777215 4212 %shl122 = add nuw nsw i32 %shl, 1 4213 %add = mul i32 %shl1, %shl122 4214 %mul323 = add i32 %add, %shl122 4215 %add4 = mul i32 %mul323, %shl1 4216 %add424 = add i32 %add, 1 4217 %add626 = add i32 %add4, 1 4218 %mul725 = mul i32 %add4, %add424 4219 %add8 = mul i32 %mul725, %add626 4220 ret i32 %add8 4221} 4222 4223define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { 4224; GFX67-LABEL: clpeak_imad_pat_i8: 4225; GFX67: ; %bb.0: ; %entry 4226; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4227; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4228; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 4229; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v1 4230; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 4231; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 4232; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1 4233; GFX67-NEXT: v_and_b32_e32 v3, 0xff, v3 4234; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 4235; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 4236; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 4237; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v4 4238; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 4239; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 4240; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 4241; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1 4242; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 4243; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 8 4244; GFX67-NEXT: s_setpc_b64 s[30:31] 4245; 4246; GFX8-SDAG-LABEL: clpeak_imad_pat_i8: 4247; GFX8-SDAG: ; %bb.0: ; %entry 4248; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4249; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4250; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 4251; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4252; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 4253; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 4254; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 4255; 4256; GFX8-GISEL-LABEL: clpeak_imad_pat_i8: 4257; GFX8-GISEL: ; %bb.0: ; %entry 4258; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4259; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 4260; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 4261; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 4262; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 4263; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 4264; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 4265; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1 4266; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 4267; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 4268; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 4269; 4270; GFX9-SDAG-LABEL: clpeak_imad_pat_i8: 4271; GFX9-SDAG: ; %bb.0: ; %entry 4272; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4273; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 4274; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 4275; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 4276; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0 4277; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 4278; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 4279; 4280; GFX9-GISEL-LABEL: clpeak_imad_pat_i8: 4281; GFX9-GISEL: ; %bb.0: ; %entry 4282; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4283; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 4284; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 4285; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 4286; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 4287; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1 4288; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4 4289; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1 4290; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 4291; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 4292; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 4293; 4294; GFX10-SDAG-LABEL: clpeak_imad_pat_i8: 4295; GFX10-SDAG: ; %bb.0: ; %entry 4296; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4297; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4298; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 4299; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4300; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 4301; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 4302; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 4303; 4304; GFX10-GISEL-LABEL: clpeak_imad_pat_i8: 4305; GFX10-GISEL: ; %bb.0: ; %entry 4306; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4307; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4308; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 4309; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 4310; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 4311; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 4312; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4313; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 4314; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4315; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 4316; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 4317; 4318; GFX11-SDAG-LABEL: clpeak_imad_pat_i8: 4319; GFX11-SDAG: ; %bb.0: ; %entry 4320; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4321; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4322; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4323; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 4324; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4325; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4326; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 4327; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 4328; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 4329; 4330; GFX11-GISEL-LABEL: clpeak_imad_pat_i8: 4331; GFX11-GISEL: ; %bb.0: ; %entry 4332; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4333; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4334; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 4335; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 4336; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 4337; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 4338; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 4339; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4340; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4341; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 4342; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4343; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 4344; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 4345; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 4346; 4347; GFX1200-SDAG-LABEL: clpeak_imad_pat_i8: 4348; GFX1200-SDAG: ; %bb.0: ; %entry 4349; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 4350; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 4351; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 4352; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 4353; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 4354; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4355; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4356; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 4357; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 4358; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 4359; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 4360; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 4361; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 4362; 4363; GFX1200-GISEL-LABEL: clpeak_imad_pat_i8: 4364; GFX1200-GISEL: ; %bb.0: ; %entry 4365; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 4366; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 4367; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 4368; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 4369; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 4370; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4371; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 4372; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 4373; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 4374; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 4375; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 4376; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4377; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4378; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 4379; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 4380; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 4381; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 4382; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 4383entry: 4384 %conv33 = add i8 %x, 1 4385 %add = mul i8 %conv33, %y 4386 %conv434 = add i8 %y, 1 4387 %add8 = mul i8 %conv434, %add 4388 %conv1035 = add i8 %add, 1 4389 %add14 = mul i8 %conv1035, %add8 4390 %conv1636 = add i8 %add8, 1 4391 %add20 = mul i8 %add14, %conv1636 4392 ret i8 %add20 4393} 4394 4395define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { 4396; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i8: 4397; GFX67-SDAG: ; %bb.0: ; %entry 4398; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4399; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 4400; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v1 4401; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 4402; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4403; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1 4404; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v0 4405; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 4406; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4407; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v5, v3 4408; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0 4409; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3 4410; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v4, v2, 1 4411; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 4412; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 4413; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v6 4414; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v0, v2 4415; GFX67-SDAG-NEXT: v_or_b32_e32 v3, v4, v3 4416; GFX67-SDAG-NEXT: s_movk_i32 s4, 0x100 4417; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 4418; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, s4, v3 4419; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 4420; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v1 4421; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 4422; GFX67-SDAG-NEXT: v_bfe_u32 v2, v3, 8, 8 4423; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v5 4424; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 4425; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4426; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 0x100, v0 4427; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v4, v3 4428; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v2 4429; GFX67-SDAG-NEXT: v_bfe_u32 v2, v0, 8, 8 4430; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 4431; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 4432; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4433; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0 4434; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v2 4435; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 4436; 4437; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i8: 4438; GFX67-GISEL: ; %bb.0: ; %entry 4439; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4440; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4441; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 4442; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v0 4443; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 4444; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v1 4445; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3 4446; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 4447; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 4448; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 4449; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 4450; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 4451; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 4452; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 4453; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 4454; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 4455; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 4456; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6 4457; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v4 4458; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 4459; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7 4460; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v5 4461; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 4462; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 4463; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 4464; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 4465; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 4466; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 4467; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1 4468; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 4469; 4470; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8: 4471; GFX8-SDAG: ; %bb.0: ; %entry 4472; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4473; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1 4474; GFX8-SDAG-NEXT: v_add_u16_e32 v0, 1, v0 4475; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v1, v3 4476; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 4477; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v0, v2 4478; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0 4479; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 4480; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2 4481; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v4, v1 4482; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v5, v0 4483; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 4484; GFX8-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v1 4485; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 4486; GFX8-SDAG-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4487; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4488; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 4489; 4490; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i8: 4491; GFX8-GISEL: ; %bb.0: ; %entry 4492; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4493; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 4494; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1 4495; GFX8-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 4496; GFX8-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 4497; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2 4498; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 4499; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 4500; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 4501; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 4502; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 4503; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v6, v0 4504; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v7, v1 4505; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 4506; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3 4507; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 4508; 4509; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i8: 4510; GFX9-SDAG: ; %bb.0: ; %entry 4511; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4512; GFX9-SDAG-NEXT: v_add_u16_e32 v1, 1, v1 4513; GFX9-SDAG-NEXT: v_add_u16_e32 v0, 1, v0 4514; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v4, v1, v3 4515; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v1, v3, v1 4516; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v5, v0, v2 4517; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v2, v0 4518; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 4519; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2 4520; GFX9-SDAG-NEXT: v_mad_legacy_u16 v3, v1, v4, v1 4521; GFX9-SDAG-NEXT: v_mad_legacy_u16 v2, v0, v5, v0 4522; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v3, v1, v3 4523; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v1 4524; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v2, v0, v2 4525; GFX9-SDAG-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4526; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4527; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 4528; 4529; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i8: 4530; GFX9-GISEL: ; %bb.0: ; %entry 4531; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4532; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 4533; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 1, v1 4534; GFX9-GISEL-NEXT: v_mad_legacy_u16 v4, v0, v2, v0 4535; GFX9-GISEL-NEXT: v_mad_legacy_u16 v5, v1, v3, v1 4536; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2 4537; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 4538; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v2, 1 4539; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v3, 1 4540; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v4, v2, 1 4541; GFX9-GISEL-NEXT: v_mad_legacy_u16 v3, v5, v3, 1 4542; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v6, v0 4543; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v7, v1 4544; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 4545; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3 4546; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 4547; 4548; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i8: 4549; GFX10-SDAG: ; %bb.0: ; %entry 4550; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4551; GFX10-SDAG-NEXT: v_add_nc_u16 v1, v1, 1 4552; GFX10-SDAG-NEXT: v_add_nc_u16 v0, v0, 1 4553; GFX10-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1 4554; GFX10-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3 4555; GFX10-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0 4556; GFX10-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2 4557; GFX10-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3 4558; GFX10-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2 4559; GFX10-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 4560; GFX10-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 4561; GFX10-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 4562; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0 4563; GFX10-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1 4564; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4565; GFX10-SDAG-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 4566; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 4567; 4568; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i8: 4569; GFX10-GISEL: ; %bb.0: ; %entry 4570; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4571; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4572; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 4573; GFX10-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 4574; GFX10-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 4575; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 4576; GFX10-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 4577; GFX10-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 4578; GFX10-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 4579; GFX10-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 4580; GFX10-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 4581; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 4582; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 4583; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 4584; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 4585; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 4586; 4587; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8: 4588; GFX11-SDAG: ; %bb.0: ; %entry 4589; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4590; GFX11-SDAG-NEXT: v_add_nc_u16 v1, v1, 1 4591; GFX11-SDAG-NEXT: v_add_nc_u16 v0, v0, 1 4592; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4593; GFX11-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1 4594; GFX11-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0 4595; GFX11-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3 4596; GFX11-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2 4597; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4598; GFX11-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3 4599; GFX11-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2 4600; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4601; GFX11-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 4602; GFX11-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 4603; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4604; GFX11-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 4605; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0 4606; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4607; GFX11-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1 4608; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 4609; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4610; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 4611; GFX11-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 4612; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 4613; 4614; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i8: 4615; GFX11-GISEL: ; %bb.0: ; %entry 4616; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4617; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4618; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 4619; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4620; GFX11-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 4621; GFX11-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 4622; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 4623; GFX11-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 4624; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4625; GFX11-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 4626; GFX11-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 4627; GFX11-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 4628; GFX11-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 4629; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4630; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 4631; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 4632; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4633; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 4634; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 4635; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 4636; 4637; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8: 4638; GFX1200-SDAG: ; %bb.0: ; %entry 4639; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 4640; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 4641; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 4642; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 4643; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 4644; GFX1200-SDAG-NEXT: v_add_nc_u16 v1, v1, 1 4645; GFX1200-SDAG-NEXT: v_add_nc_u16 v0, v0, 1 4646; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4647; GFX1200-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1 4648; GFX1200-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0 4649; GFX1200-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3 4650; GFX1200-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2 4651; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4652; GFX1200-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3 4653; GFX1200-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2 4654; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4655; GFX1200-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 4656; GFX1200-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 4657; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4658; GFX1200-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 4659; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0 4660; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4661; GFX1200-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1 4662; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 4663; GFX1200-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 4664; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 4665; GFX1200-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 4666; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 4667; 4668; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i8: 4669; GFX1200-GISEL: ; %bb.0: ; %entry 4670; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 4671; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 4672; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 4673; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 4674; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 4675; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 4676; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 4677; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4678; GFX1200-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 4679; GFX1200-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 4680; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 4681; GFX1200-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 4682; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4683; GFX1200-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 4684; GFX1200-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 4685; GFX1200-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 4686; GFX1200-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 4687; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4688; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 4689; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 4690; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4691; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 4692; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 4693; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 4694entry: 4695 %y18 = add <2 x i8> %x, <i8 1, i8 1> 4696 %add = mul <2 x i8> %y18, %y 4697 %mul119 = add <2 x i8> %add, %y18 4698 %add2 = mul <2 x i8> %mul119, %y 4699 %add220 = add <2 x i8> %add, <i8 1, i8 1> 4700 %add422 = add <2 x i8> %add2, <i8 1, i8 1> 4701 %mul521 = mul <2 x i8> %add2, %add220 4702 %add6 = mul <2 x i8> %mul521, %add422 4703 ret <2 x i8> %add6 4704} 4705 4706define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) { 4707; GFX6-SDAG-LABEL: clpeak_imad_pat_i64: 4708; GFX6-SDAG: ; %bb.0: ; %entry 4709; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4710; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4711; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v0, v3 4712; GFX6-SDAG-NEXT: v_mul_hi_u32 v5, v0, v2 4713; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4714; GFX6-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2 4715; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4 4716; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2 4717; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v6 4718; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v0 4719; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v0, v3 4720; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v0, v2 4721; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 4722; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 4723; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v1, v2 4724; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 4725; GFX6-SDAG-NEXT: v_mul_hi_u32 v4, v0, v5 4726; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v6, v3 4727; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v3, v1 4728; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 4729; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v4, v2 4730; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v0, v5 4731; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v3 4732; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v0 4733; GFX6-SDAG-NEXT: v_addc_u32_e32 v2, vcc, v2, v1, vcc 4734; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 4735; GFX6-SDAG-NEXT: v_mul_hi_u32 v4, v3, v0 4736; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v0 4737; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v3, v0 4738; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v4, v1 4739; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v5 4740; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v3 4741; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc 4742; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] 4743; 4744; GFX6-GISEL-LABEL: clpeak_imad_pat_i64: 4745; GFX6-GISEL: ; %bb.0: ; %entry 4746; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4747; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 4748; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4749; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v1, v2 4750; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v3 4751; GFX6-GISEL-NEXT: v_mul_hi_u32 v7, v0, v2 4752; GFX6-GISEL-NEXT: v_mul_lo_u32 v6, v0, v2 4753; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 4754; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 4755; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 4756; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 4757; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v2 4758; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3 4759; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v2 4760; GFX6-GISEL-NEXT: v_mul_hi_u32 v0, v0, v2 4761; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 4762; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 4763; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v6 4764; GFX6-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc 4765; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 4766; GFX6-GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v0, vcc 4767; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 4768; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 4769; GFX6-GISEL-NEXT: v_mul_lo_u32 v6, v5, v1 4770; GFX6-GISEL-NEXT: v_mul_hi_u32 v1, v5, v1 4771; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 4772; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 4773; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v0, v3 4774; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v6, v4 4775; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v6, v3 4776; GFX6-GISEL-NEXT: v_mul_hi_u32 v3, v6, v3 4777; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 4778; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 4779; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] 4780; 4781; GFX7-SDAG-LABEL: clpeak_imad_pat_i64: 4782; GFX7-SDAG: ; %bb.0: ; %entry 4783; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4784; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v0 4785; GFX7-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4786; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 4787; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 4788; GFX7-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2 4789; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v6 4790; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v7 4791; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v0, v4 4792; GFX7-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc 4793; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 4794; GFX7-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0 4795; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 4796; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v6 4797; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v2 4798; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v4, v0 4799; GFX7-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4] 4800; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v3, v1 4801; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v6 4802; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, v0, v1 4803; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v6, v3 4804; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6] 4805; GFX7-SDAG-NEXT: v_mul_lo_u32 v3, v5, v4 4806; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1 4807; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v3, v1 4808; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] 4809; 4810; GFX7-GISEL-LABEL: clpeak_imad_pat_i64: 4811; GFX7-GISEL: ; %bb.0: ; %entry 4812; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4813; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 4814; GFX7-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc 4815; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0 4816; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2] 4817; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v6 4818; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5] 4819; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0 4820; GFX7-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc 4821; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v6 4822; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2] 4823; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7] 4824; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 4825; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc 4826; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0 4827; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 4828; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4 4829; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1] 4830; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0 4831; GFX7-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc 4832; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] 4833; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2] 4834; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2] 4835; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] 4836; 4837; GFX8-SDAG-LABEL: clpeak_imad_pat_i64: 4838; GFX8-SDAG: ; %bb.0: ; %entry 4839; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4840; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v0 4841; GFX8-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 4842; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 4843; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 4844; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2 4845; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v6 4846; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v7 4847; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v0, v4 4848; GFX8-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc 4849; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 4850; GFX8-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0 4851; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 4852; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4853; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v2 4854; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v4, v0 4855; GFX8-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4] 4856; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v3, v1 4857; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v6 4858; GFX8-SDAG-NEXT: v_add_u32_e32 v6, vcc, v0, v1 4859; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v6, v3 4860; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6] 4861; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v5, v4 4862; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1 4863; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v3, v1 4864; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 4865; 4866; GFX8-GISEL-LABEL: clpeak_imad_pat_i64: 4867; GFX8-GISEL: ; %bb.0: ; %entry 4868; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4869; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0 4870; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc 4871; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0 4872; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2] 4873; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6 4874; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5] 4875; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0 4876; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc 4877; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v6 4878; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2] 4879; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7] 4880; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0 4881; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc 4882; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0 4883; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v5 4884; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v4 4885; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1] 4886; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0 4887; GFX8-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc 4888; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] 4889; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2] 4890; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2] 4891; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 4892; 4893; GFX900-SDAG-LABEL: clpeak_imad_pat_i64: 4894; GFX900-SDAG: ; %bb.0: ; %entry 4895; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4896; GFX900-SDAG-NEXT: v_add_co_u32_e32 v4, vcc, 1, v0 4897; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc 4898; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2 4899; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3 4900; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 4901; GFX900-SDAG-NEXT: v_add3_u32 v6, v1, v7, v6 4902; GFX900-SDAG-NEXT: v_add_co_u32_e32 v1, vcc, v0, v4 4903; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc 4904; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v4, v2 4905; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v1, v3 4906; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v2, 0 4907; GFX900-SDAG-NEXT: v_add3_u32 v2, v2, v3, v4 4908; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v2, v0 4909; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v0, v[1:2] 4910; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v6 4911; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2 4912; GFX900-SDAG-NEXT: v_add3_u32 v4, v5, v4, v0 4913; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v4, v1 4914; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v1, v[3:4] 4915; GFX900-SDAG-NEXT: v_add3_u32 v1, v5, v1, v2 4916; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4917; 4918; GFX900-GISEL-LABEL: clpeak_imad_pat_i64: 4919; GFX900-GISEL: ; %bb.0: ; %entry 4920; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4921; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0 4922; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc 4923; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0 4924; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2] 4925; GFX900-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, v0, v6 4926; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5] 4927; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0 4928; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, v4, v7, vcc 4929; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v6 4930; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2] 4931; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7] 4932; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0 4933; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v4, vcc 4934; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0 4935; GFX900-GISEL-NEXT: v_add_co_u32_e32 v7, vcc, 1, v5 4936; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v4 4937; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1] 4938; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0 4939; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v2, vcc 4940; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] 4941; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2] 4942; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2] 4943; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4944; 4945; GFX90A-SDAG-LABEL: clpeak_imad_pat_i64: 4946; GFX90A-SDAG: ; %bb.0: ; %entry 4947; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4948; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 4949; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 4950; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2 4951; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3 4952; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, 0 4953; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1] 4954; GFX90A-SDAG-NEXT: v_add3_u32 v1, v6, v1, v7 4955; GFX90A-SDAG-NEXT: v_add3_u32 v5, v5, v7, v6 4956; GFX90A-SDAG-NEXT: v_mul_lo_u32 v3, v0, v3 4957; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2 4958; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0 4959; GFX90A-SDAG-NEXT: v_add3_u32 v1, v1, v3, v6 4960; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v4 4961; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v4, v[0:1] 4962; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v0, v5 4963; GFX90A-SDAG-NEXT: v_add3_u32 v3, v6, v3, v4 4964; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v2, v1 4965; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v3, v0 4966; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3] 4967; GFX90A-SDAG-NEXT: v_add3_u32 v1, v5, v1, v4 4968; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 4969; 4970; GFX90A-GISEL-LABEL: clpeak_imad_pat_i64: 4971; GFX90A-GISEL: ; %bb.0: ; %entry 4972; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4973; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0 4974; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc 4975; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, 0 4976; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0 4977; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5] 4978; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v4 4979; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v6 4980; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, v1, v7, vcc 4981; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v2, 0 4982; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v3, 0 4983; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, v[6:7] 4984; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0 4985; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v1, vcc 4986; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, v5, v2 4987; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v7, vcc, 1, v4 4988; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0 4989; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v5, vcc 4990; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v6, 0 4991; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, v[0:1] 4992; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, v3, v0 4993; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v7, 0 4994; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v8, 0 4995; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[2:3] 4996; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2 4997; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 4998; 4999; GFX10-SDAG-LABEL: clpeak_imad_pat_i64: 5000; GFX10-SDAG: ; %bb.0: ; %entry 5001; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5002; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1 5003; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo 5004; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3 5005; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 5006; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2 5007; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 5008; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v7, v6 5009; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 5010; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v4, v2, 0 5011; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo 5012; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 5013; GFX10-SDAG-NEXT: v_add3_u32 v4, v4, v6, v2 5014; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1 5015; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v4, v0 5016; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4] 5017; GFX10-SDAG-NEXT: v_add3_u32 v1, v5, v1, v2 5018; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 5019; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 5020; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1] 5021; GFX10-SDAG-NEXT: v_add3_u32 v1, v4, v1, v2 5022; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 5023; 5024; GFX10-GISEL-LABEL: clpeak_imad_pat_i64: 5025; GFX10-GISEL: ; %bb.0: ; %entry 5026; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5027; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1 5028; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo 5029; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, 0 5030; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v3, v[1:2] 5031; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, v6 5032; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v7, v2, v[4:5] 5033; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v2, 0 5034; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v4, v7, vcc_lo 5035; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v6 5036; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v3, v[1:2] 5037; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 5038; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v4, vcc_lo 5039; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v5, v8, 0 5040; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v9, v2, v[6:7] 5041; GFX10-GISEL-NEXT: v_add_co_u32 v7, vcc_lo, v5, 1 5042; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v4 5043; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v7, 0 5044; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v6, vcc_lo 5045; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v5, v10, v[2:3] 5046; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v3, v9, v[1:2] 5047; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v8, v[4:5] 5048; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v4, v7, v[1:2] 5049; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 5050; 5051; GFX11-SDAG-LABEL: clpeak_imad_pat_i64: 5052; GFX11-SDAG: ; %bb.0: ; %entry 5053; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5054; GFX11-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1 5055; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo 5056; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5057; GFX11-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3 5058; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 5059; GFX11-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2 5060; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 5061; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v7, v6 5062; GFX11-SDAG-NEXT: v_add_co_u32 v6, vcc_lo, v0, v4 5063; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5064; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo 5065; GFX11-SDAG-NEXT: v_mul_lo_u32 v7, v6, v3 5066; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v6, v2, 0 5067; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5068; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 5069; GFX11-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 5070; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5071; GFX11-SDAG-NEXT: v_add3_u32 v4, v4, v7, v2 5072; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v4, v0 5073; GFX11-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v3, v0, v[3:4] 5074; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5075; GFX11-SDAG-NEXT: v_add3_u32 v6, v2, v6, v1 5076; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v5, v4 5077; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 5078; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v6, v3 5079; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v3, v[5:6] 5080; GFX11-SDAG-NEXT: v_add3_u32 v1, v4, v1, v2 5081; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 5082; 5083; GFX11-GISEL-LABEL: clpeak_imad_pat_i64: 5084; GFX11-GISEL: ; %bb.0: ; %entry 5085; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5086; GFX11-GISEL-NEXT: v_add_co_u32 v7, vcc_lo, v0, 1 5087; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v1, vcc_lo 5088; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5089; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, 0 5090; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v7, v3, v[1:2] 5091; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 5092; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v2, v[4:5] 5093; GFX11-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v7 5094; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 5095; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v4, v2, 0 5096; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo 5097; GFX11-GISEL-NEXT: v_add_co_u32 v11, vcc_lo, v0, 1 5098; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v5, vcc_lo 5099; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 5100; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v7 5101; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v4, v3, v[1:2] 5102; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5103; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v6, v11, 0 5104; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v10, v2, v[7:8] 5105; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 5106; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v4 5107; GFX11-GISEL-NEXT: v_add_co_u32 v9, vcc_lo, v6, 1 5108; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v12, v[2:3] 5109; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5110; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v9, 0 5111; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo 5112; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v11, v[4:5] 5113; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5114; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v3, v10, v[1:2] 5115; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v5, v9, v[6:7] 5116; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 5117; 5118; GFX1200-SDAG-LABEL: clpeak_imad_pat_i64: 5119; GFX1200-SDAG: ; %bb.0: ; %entry 5120; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 5121; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 5122; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 5123; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 5124; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 5125; GFX1200-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1 5126; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo 5127; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5128; GFX1200-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3 5129; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v4, v2, 0 5130; GFX1200-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2 5131; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5132; GFX1200-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 5133; GFX1200-SDAG-NEXT: v_add3_u32 v1, v1, v7, v6 5134; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5135; GFX1200-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 5136; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[3:4], null, v4, v2, 0 5137; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo 5138; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5139; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2 5140; GFX1200-SDAG-NEXT: v_add3_u32 v4, v4, v6, v2 5141; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5142; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1 5143; GFX1200-SDAG-NEXT: v_mul_lo_u32 v5, v4, v0 5144; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4] 5145; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5146; GFX1200-SDAG-NEXT: v_add3_u32 v1, v5, v1, v2 5147; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4 5148; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 5149; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 5150; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v3, v[0:1] 5151; GFX1200-SDAG-NEXT: v_add3_u32 v1, v4, v1, v2 5152; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 5153; 5154; GFX1200-GISEL-LABEL: clpeak_imad_pat_i64: 5155; GFX1200-GISEL: ; %bb.0: ; %entry 5156; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 5157; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 5158; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 5159; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 5160; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 5161; GFX1200-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1 5162; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo 5163; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 5164; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v4, v2 5165; GFX1200-GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 5166; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v4, v3, v[0:1] 5167; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5168; GFX1200-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v6, v4 5169; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v2, v[0:1] 5170; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 5171; GFX1200-GISEL-NEXT: v_mov_b32_e32 v7, v0 5172; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v4, v2 5173; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5174; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v7, v5, vcc_lo 5175; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v4, v3, v[0:1] 5176; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v4, v2 5177; GFX1200-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v6, 1 5178; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5179; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v2, v[0:1] 5180; GFX1200-GISEL-NEXT: v_mul_hi_u32 v1, v3, v4 5181; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v3, v4 5182; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) 5183; GFX1200-GISEL-NEXT: v_mov_b32_e32 v6, v0 5184; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v7, vcc_lo 5185; GFX1200-GISEL-NEXT: v_add_co_u32 v7, vcc_lo, v3, 1 5186; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo 5187; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5188; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2] 5189; GFX1200-GISEL-NEXT: v_mul_hi_u32 v2, v5, v7 5190; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 5191; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[3:4], null, v0, v4, v[1:2] 5192; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v5, v7 5193; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v5, v6, v[2:3] 5194; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 5195; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v7, v[1:2] 5196; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 5197entry: 5198 %y18 = add i64 %x, 1 5199 %add = mul i64 %y18, %y 5200 %mul119 = add i64 %add, %y18 5201 %add2 = mul i64 %mul119, %y 5202 %add220 = add i64 %add, 1 5203 %add422 = add i64 %add2, 1 5204 %mul521 = mul i64 %add2, %add220 5205 %add6 = mul i64 %mul521, %add422 5206 ret i64 %add6 5207} 5208 5209define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) { 5210; GFX6-SDAG-LABEL: clpeak_imad_pat_v2i64: 5211; GFX6-SDAG: ; %bb.0: ; %entry 5212; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5213; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 5214; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5215; GFX6-SDAG-NEXT: v_mul_lo_u32 v8, v0, v5 5216; GFX6-SDAG-NEXT: v_mul_hi_u32 v9, v0, v4 5217; GFX6-SDAG-NEXT: v_mul_lo_u32 v10, v1, v4 5218; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2 5219; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5220; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, v9, v8 5221; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, v8, v10 5222; GFX6-SDAG-NEXT: v_mul_lo_u32 v9, v2, v7 5223; GFX6-SDAG-NEXT: v_mul_hi_u32 v10, v2, v6 5224; GFX6-SDAG-NEXT: v_mul_lo_u32 v12, v3, v6 5225; GFX6-SDAG-NEXT: v_mul_lo_u32 v11, v0, v4 5226; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, v10, v9 5227; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, v9, v12 5228; GFX6-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 5229; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v11, v0 5230; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc 5231; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v5 5232; GFX6-SDAG-NEXT: v_mul_hi_u32 v12, v0, v4 5233; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4 5234; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v10, v2 5235; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, v9, v3, vcc 5236; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v12, v5 5237; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v5, v1 5238; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v7 5239; GFX6-SDAG-NEXT: v_mul_hi_u32 v7, v2, v6 5240; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6 5241; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v3, v6 5242; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 5243; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v7, v5 5244; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v9 5245; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v2, v10 5246; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v3 5247; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v3, v10 5248; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5 5249; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v0, v11 5250; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4 5251; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v8 5252; GFX6-SDAG-NEXT: v_mul_lo_u32 v8, v1, v11 5253; GFX6-SDAG-NEXT: v_mul_lo_u32 v7, v2, v10 5254; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5 5255; GFX6-SDAG-NEXT: v_mul_lo_u32 v6, v0, v11 5256; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v8 5257; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, v6, v0 5258; GFX6-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc 5259; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v6, v1 5260; GFX6-SDAG-NEXT: v_mul_hi_u32 v8, v6, v0 5261; GFX6-SDAG-NEXT: v_mul_lo_u32 v9, v5, v0 5262; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0 5263; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, v7, v2 5264; GFX6-SDAG-NEXT: v_addc_u32_e32 v4, vcc, v4, v3, vcc 5265; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v8, v1 5266; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v9 5267; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v7, v3 5268; GFX6-SDAG-NEXT: v_mul_hi_u32 v8, v7, v2 5269; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v6 5270; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc 5271; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v4, v2 5272; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v7, v2 5273; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v8, v3 5274; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v5 5275; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v7 5276; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc 5277; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] 5278; 5279; GFX6-GISEL-LABEL: clpeak_imad_pat_v2i64: 5280; GFX6-GISEL: ; %bb.0: ; %entry 5281; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5282; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 5283; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5284; GFX6-GISEL-NEXT: v_mul_lo_u32 v8, v1, v4 5285; GFX6-GISEL-NEXT: v_mul_lo_u32 v9, v0, v5 5286; GFX6-GISEL-NEXT: v_mul_hi_u32 v11, v0, v4 5287; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 5288; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5289; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 5290; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 5291; GFX6-GISEL-NEXT: v_mul_lo_u32 v9, v3, v6 5292; GFX6-GISEL-NEXT: v_mul_lo_u32 v11, v2, v7 5293; GFX6-GISEL-NEXT: v_mul_hi_u32 v13, v2, v6 5294; GFX6-GISEL-NEXT: v_mul_lo_u32 v10, v0, v4 5295; GFX6-GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 5296; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 5297; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 5298; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 5299; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc 5300; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 5301; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v5 5302; GFX6-GISEL-NEXT: v_mul_lo_u32 v11, v0, v4 5303; GFX6-GISEL-NEXT: v_mul_hi_u32 v0, v0, v4 5304; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, v12, v2 5305; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, v9, v3, vcc 5306; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 5307; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 5308; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v3, v6 5309; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v2, v7 5310; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v2, v6 5311; GFX6-GISEL-NEXT: v_mul_hi_u32 v2, v2, v6 5312; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 5313; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 5314; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v10 5315; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v8, vcc 5316; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v12 5317; GFX6-GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc 5318; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11 5319; GFX6-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v0, vcc 5320; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 5321; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v11, v3 5322; GFX6-GISEL-NEXT: v_mul_lo_u32 v12, v11, v2 5323; GFX6-GISEL-NEXT: v_mul_hi_u32 v2, v11, v2 5324; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 5325; GFX6-GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v1, vcc 5326; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3 5327; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 5328; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 5329; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v4, v6 5330; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v4, v5 5331; GFX6-GISEL-NEXT: v_mul_hi_u32 v4, v4, v5 5332; GFX6-GISEL-NEXT: v_mul_hi_u32 v5, v12, v7 5333; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 5334; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, v1, v4 5335; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v0, v7 5336; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v12, v8 5337; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v12, v7 5338; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4 5339; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 5340; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v2, v9 5341; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v3, v10 5342; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v3, v9 5343; GFX6-GISEL-NEXT: v_mul_hi_u32 v3, v3, v9 5344; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 5345; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 5346; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] 5347; 5348; GFX7-SDAG-LABEL: clpeak_imad_pat_v2i64: 5349; GFX7-SDAG: ; %bb.0: ; %entry 5350; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5351; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 1, v0 5352; GFX7-SDAG-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5353; GFX7-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5 5354; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 5355; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 1, v2 5356; GFX7-SDAG-NEXT: v_addc_u32_e32 v12, vcc, 0, v3, vcc 5357; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v1, v10 5358; GFX7-SDAG-NEXT: v_mul_lo_u32 v10, v9, v4 5359; GFX7-SDAG-NEXT: v_mul_lo_u32 v13, v11, v7 5360; GFX7-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0 5361; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, v3, v10 5362; GFX7-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6 5363; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v13 5364; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, v2, v3 5365; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, v0, v8 5366; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5 5367; GFX7-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0 5368; GFX7-SDAG-NEXT: v_addc_u32_e32 v9, vcc, v14, v9, vcc 5369; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, v1, v11 5370; GFX7-SDAG-NEXT: v_addc_u32_e32 v10, vcc, v13, v12, vcc 5371; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v5 5372; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v8, v7 5373; GFX7-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0 5374; GFX7-SDAG-NEXT: v_mul_lo_u32 v9, v9, v4 5375; GFX7-SDAG-NEXT: v_mul_lo_u32 v4, v10, v6 5376; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v8, v5 5377; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v9 5378; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, v5, v4 5379; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v8, v1 5380; GFX7-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8] 5381; GFX7-SDAG-NEXT: v_mul_lo_u32 v1, v3, v0 5382; GFX7-SDAG-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3] 5383; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5 5384; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v7, v13 5385; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v2, v14 5386; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v10 5387; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5 5388; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, v0, v1 5389; GFX7-SDAG-NEXT: v_mul_lo_u32 v11, v10, v2 5390; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10] 5391; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v9, v3 5392; GFX7-SDAG-NEXT: v_mul_lo_u32 v9, v5, v7 5393; GFX7-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5] 5394; GFX7-SDAG-NEXT: v_mul_lo_u32 v4, v4, v8 5395; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v11, v1 5396; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v9, v3 5397; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v6, v1 5398; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v3 5399; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] 5400; 5401; GFX7-GISEL-LABEL: clpeak_imad_pat_v2i64: 5402; GFX7-GISEL: ; %bb.0: ; %entry 5403; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5404; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v0 5405; GFX7-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc 5406; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v2 5407; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0 5408; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc 5409; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0 5410; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2] 5411; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3 5412; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2] 5413; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9] 5414; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v12 5415; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11] 5416; GFX7-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v8, v13, vcc 5417; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0 5418; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, v2, v14 5419; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0 5420; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v11 5421; GFX7-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2] 5422; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v14 5423; GFX7-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v9, v15, vcc 5424; GFX7-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2] 5425; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12] 5426; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15] 5427; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v0 5428; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0 5429; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc 5430; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v2 5431; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v6 5432; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1] 5433; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0 5434; GFX7-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc 5435; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v10 5436; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1] 5437; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v7 5438; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc 5439; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1] 5440; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v13 5441; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0 5442; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3] 5443; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0 5444; GFX7-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc 5445; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2] 5446; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3 5447; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2] 5448; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5] 5449; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11] 5450; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5 5451; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] 5452; 5453; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i64: 5454; GFX8-SDAG: ; %bb.0: ; %entry 5455; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5456; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, 1, v0 5457; GFX8-SDAG-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5458; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5 5459; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 5460; GFX8-SDAG-NEXT: v_add_u32_e32 v11, vcc, 1, v2 5461; GFX8-SDAG-NEXT: v_addc_u32_e32 v12, vcc, 0, v3, vcc 5462; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v1, v10 5463; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v9, v4 5464; GFX8-SDAG-NEXT: v_mul_lo_u32 v13, v11, v7 5465; GFX8-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0 5466; GFX8-SDAG-NEXT: v_add_u32_e32 v14, vcc, v3, v10 5467; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6 5468; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v13 5469; GFX8-SDAG-NEXT: v_add_u32_e32 v13, vcc, v2, v3 5470; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v0, v8 5471; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5 5472; GFX8-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0 5473; GFX8-SDAG-NEXT: v_addc_u32_e32 v9, vcc, v14, v9, vcc 5474; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, v1, v11 5475; GFX8-SDAG-NEXT: v_addc_u32_e32 v10, vcc, v13, v12, vcc 5476; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v5 5477; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v8, v7 5478; GFX8-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0 5479; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v9, v4 5480; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v10, v6 5481; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v8, v5 5482; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v9 5483; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, v5, v4 5484; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v8, v1 5485; GFX8-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8] 5486; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v0 5487; GFX8-SDAG-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3] 5488; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v6, v5 5489; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v7, v13 5490; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v14 5491; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v10 5492; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v6, v5 5493; GFX8-SDAG-NEXT: v_add_u32_e32 v10, vcc, v0, v1 5494; GFX8-SDAG-NEXT: v_mul_lo_u32 v11, v10, v2 5495; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10] 5496; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v9, v3 5497; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v5, v7 5498; GFX8-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5] 5499; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v4, v8 5500; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v11, v1 5501; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v9, v3 5502; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v6, v1 5503; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v4, v3 5504; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 5505; 5506; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i64: 5507; GFX8-GISEL: ; %bb.0: ; %entry 5508; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5509; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 1, v0 5510; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc 5511; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v2 5512; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0 5513; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc 5514; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0 5515; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2] 5516; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3 5517; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2] 5518; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9] 5519; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, v0, v12 5520; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11] 5521; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v8, v13, vcc 5522; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0 5523; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, v2, v14 5524; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0 5525; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v11 5526; GFX8-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2] 5527; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v14 5528; GFX8-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v9, v15, vcc 5529; GFX8-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2] 5530; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12] 5531; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15] 5532; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v0 5533; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0 5534; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc 5535; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 1, v2 5536; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v6 5537; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1] 5538; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0 5539; GFX8-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc 5540; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v10 5541; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1] 5542; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v7 5543; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc 5544; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1] 5545; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 1, v13 5546; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0 5547; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3] 5548; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0 5549; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc 5550; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2] 5551; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3 5552; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2] 5553; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5] 5554; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11] 5555; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v5 5556; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 5557; 5558; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i64: 5559; GFX900-SDAG: ; %bb.0: ; %entry 5560; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5561; GFX900-SDAG-NEXT: v_add_co_u32_e32 v8, vcc, 1, v2 5562; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v3, vcc 5563; GFX900-SDAG-NEXT: v_add_co_u32_e32 v10, vcc, 1, v0 5564; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v1, vcc 5565; GFX900-SDAG-NEXT: v_mul_lo_u32 v12, v11, v4 5566; GFX900-SDAG-NEXT: v_mul_lo_u32 v13, v10, v5 5567; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0 5568; GFX900-SDAG-NEXT: v_mul_lo_u32 v14, v9, v6 5569; GFX900-SDAG-NEXT: v_mul_lo_u32 v15, v8, v7 5570; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0 5571; GFX900-SDAG-NEXT: v_add3_u32 v12, v1, v13, v12 5572; GFX900-SDAG-NEXT: v_add3_u32 v1, v3, v15, v14 5573; GFX900-SDAG-NEXT: v_add_co_u32_e32 v8, vcc, v2, v8 5574; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v9, vcc 5575; GFX900-SDAG-NEXT: v_add_co_u32_e32 v3, vcc, v0, v10 5576; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v10, vcc, v12, v11, vcc 5577; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v10, v4 5578; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v3, v5 5579; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, v4, 0 5580; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v9, v6 5581; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v8, v7 5582; GFX900-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v6, 0 5583; GFX900-SDAG-NEXT: v_add3_u32 v4, v4, v11, v10 5584; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v4, v0 5585; GFX900-SDAG-NEXT: v_add3_u32 v6, v6, v7, v9 5586; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v6, v2 5587; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v2, v[5:6] 5588; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v5, v1 5589; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4] 5590; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v3, v12 5591; GFX900-SDAG-NEXT: v_add3_u32 v8, v9, v8, v2 5592; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v4 5593; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v7, v6 5594; GFX900-SDAG-NEXT: v_add3_u32 v1, v10, v1, v11 5595; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v1, v3 5596; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1] 5597; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5 5598; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v5, v[7:8] 5599; GFX900-SDAG-NEXT: v_add3_u32 v1, v9, v1, v4 5600; GFX900-SDAG-NEXT: v_add3_u32 v3, v10, v3, v6 5601; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5602; 5603; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i64: 5604; GFX900-GISEL: ; %bb.0: ; %entry 5605; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5606; GFX900-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v0 5607; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc 5608; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v2 5609; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0 5610; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc 5611; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0 5612; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2] 5613; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3 5614; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2] 5615; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9] 5616; GFX900-GISEL-NEXT: v_add_co_u32_e32 v3, vcc, v0, v12 5617; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11] 5618; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v16, vcc, v8, v13, vcc 5619; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0 5620; GFX900-GISEL-NEXT: v_add_co_u32_e32 v17, vcc, v2, v14 5621; GFX900-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0 5622; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v11 5623; GFX900-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2] 5624; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v14 5625; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v18, vcc, v9, v15, vcc 5626; GFX900-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2] 5627; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12] 5628; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15] 5629; GFX900-GISEL-NEXT: v_add_co_u32_e32 v11, vcc, 1, v0 5630; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0 5631; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v8, vcc 5632; GFX900-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v2 5633; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v6 5634; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1] 5635; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0 5636; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v9, vcc 5637; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v10 5638; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1] 5639; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v7 5640; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc 5641; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1] 5642; GFX900-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, 1, v13 5643; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0 5644; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3] 5645; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0 5646; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, 0, v4, vcc 5647; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2] 5648; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3 5649; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2] 5650; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5] 5651; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11] 5652; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v5 5653; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5654; 5655; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i64: 5656; GFX90A-SDAG: ; %bb.0: ; %entry 5657; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5658; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 5659; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 5660; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 5661; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 5662; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v4 5663; GFX90A-SDAG-NEXT: v_mul_lo_u32 v13, v0, v5 5664; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v4, 0 5665; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[0:1] 5666; GFX90A-SDAG-NEXT: v_mul_lo_u32 v14, v3, v6 5667; GFX90A-SDAG-NEXT: v_mul_lo_u32 v15, v2, v7 5668; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v2, v6, 0 5669; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v6, v[2:3] 5670; GFX90A-SDAG-NEXT: v_add3_u32 v1, v12, v1, v13 5671; GFX90A-SDAG-NEXT: v_add3_u32 v9, v9, v13, v12 5672; GFX90A-SDAG-NEXT: v_add3_u32 v3, v14, v3, v15 5673; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v0, v5 5674; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v4 5675; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0 5676; GFX90A-SDAG-NEXT: v_add3_u32 v1, v1, v5, v12 5677; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v2, v7 5678; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v3, v6 5679; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v6, 0 5680; GFX90A-SDAG-NEXT: v_add3_u32 v11, v11, v15, v14 5681; GFX90A-SDAG-NEXT: v_add3_u32 v3, v3, v4, v5 5682; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v3, v10 5683; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v10, v[2:3] 5684; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v2, v11 5685; GFX90A-SDAG-NEXT: v_add3_u32 v5, v6, v5, v7 5686; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v1, v8 5687; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v8, v[0:1] 5688; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v0, v9 5689; GFX90A-SDAG-NEXT: v_add3_u32 v7, v10, v7, v8 5690; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v6, v1 5691; GFX90A-SDAG-NEXT: v_mul_lo_u32 v9, v7, v0 5692; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7] 5693; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3 5694; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2 5695; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5] 5696; GFX90A-SDAG-NEXT: v_add3_u32 v1, v9, v1, v8 5697; GFX90A-SDAG-NEXT: v_add3_u32 v3, v7, v3, v6 5698; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 5699; 5700; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i64: 5701; GFX90A-GISEL: ; %bb.0: ; %entry 5702; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5703; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 1, v0 5704; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v1, vcc 5705; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v2 5706; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v3, vcc 5707; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v5, 0 5708; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0 5709; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v4, v[2:3] 5710; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2 5711; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v7, 0 5712; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v10 5713; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v6, 0 5714; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v6, v[8:9] 5715; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v14, vcc, v1, v11, vcc 5716; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v3, v8 5717; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v2, v12 5718; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v10, v4, 0 5719; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v10, v5, 0 5720; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v3, v13, vcc 5721; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v4, v[10:11] 5722; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v12, v7, 0 5723; GFX90A-GISEL-NEXT: v_add_u32_e32 v9, v9, v4 5724; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v6, 0 5725; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v6, v[10:11] 5726; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, v5, v6 5727; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0 5728; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v1, vcc 5729; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 1, v2 5730; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v3, vcc 5731; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0 5732; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v8 5733; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0 5734; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v6, v[0:1] 5735; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v9, vcc 5736; GFX90A-GISEL-NEXT: v_add_u32_e32 v8, v3, v0 5737; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0 5738; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v4 5739; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, 0 5740; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, v[0:1] 5741; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v5, vcc 5742; GFX90A-GISEL-NEXT: v_add_u32_e32 v7, v7, v0 5743; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v12, 0 5744; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v13, 0 5745; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v12, v[2:3] 5746; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v15, 0 5747; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2 5748; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v14, 0 5749; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v14, v[4:5] 5750; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v3, v4 5751; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 5752; 5753; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i64: 5754; GFX10-SDAG: ; %bb.0: ; %entry 5755; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5756; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 5757; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo 5758; GFX10-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1 5759; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo 5760; GFX10-SDAG-NEXT: v_mul_lo_u32 v12, v9, v4 5761; GFX10-SDAG-NEXT: v_mul_lo_u32 v13, v8, v5 5762; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0 5763; GFX10-SDAG-NEXT: v_mul_lo_u32 v14, v11, v6 5764; GFX10-SDAG-NEXT: v_mul_lo_u32 v15, v10, v7 5765; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v10, v6, 0 5766; GFX10-SDAG-NEXT: v_add3_u32 v12, v1, v13, v12 5767; GFX10-SDAG-NEXT: v_add_co_u32 v1, vcc_lo, v0, v8 5768; GFX10-SDAG-NEXT: v_add3_u32 v13, v3, v15, v14 5769; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v12, v9, vcc_lo 5770; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v2, v10 5771; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v13, v11, vcc_lo 5772; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v3, v4 5773; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v1, v5 5774; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v8, v7 5775; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v9, v6 5776; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v8, v6, 0 5777; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v1, v4, 0 5778; GFX10-SDAG-NEXT: v_add3_u32 v6, v6, v7, v9 5779; GFX10-SDAG-NEXT: v_add3_u32 v4, v4, v11, v10 5780; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v3, v12 5781; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v6, v2 5782; GFX10-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v5, v2, v[5:6] 5783; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v4, v0 5784; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4] 5785; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v5, v13 5786; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v7, v6 5787; GFX10-SDAG-NEXT: v_add3_u32 v1, v10, v1, v11 5788; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v4 5789; GFX10-SDAG-NEXT: v_add3_u32 v8, v9, v8, v2 5790; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v1, v3 5791; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1] 5792; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5 5793; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v7, v5, v[7:8] 5794; GFX10-SDAG-NEXT: v_add3_u32 v1, v9, v1, v4 5795; GFX10-SDAG-NEXT: v_add3_u32 v3, v10, v3, v6 5796; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 5797; 5798; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i64: 5799; GFX10-GISEL: ; %bb.0: ; %entry 5800; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5801; GFX10-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v0, 1 5802; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v1, vcc_lo 5803; GFX10-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v2, 1 5804; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v12, v4, 0 5805; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v15, vcc_lo, 0, v3, vcc_lo 5806; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v14, v6, 0 5807; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v12, v5, v[1:2] 5808; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v14, v7, v[3:4] 5809; GFX10-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v12 5810; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v13, v4, v[8:9] 5811; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v3, v4, 0 5812; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v10, v13, vcc_lo 5813; GFX10-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v2, v14 5814; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v15, v6, v[9:10] 5815; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v17, v6, 0 5816; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, v8, v15, vcc_lo 5817; GFX10-GISEL-NEXT: v_add_co_u32 v19, vcc_lo, v0, 1 5818; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v12 5819; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v14 5820; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v10, vcc_lo 5821; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v11, v19, 0 5822; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v3, v5, v[0:1] 5823; GFX10-GISEL-NEXT: v_add_co_u32 v15, vcc_lo, v2, 1 5824; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v17, v7, v[1:2] 5825; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v8, vcc_lo 5826; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v13, v15, 0 5827; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v10 5828; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v16, v4, v[14:15] 5829; GFX10-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v11, 1 5830; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v18, v6, v[0:1] 5831; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, v8 5832; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v11, v20, v[1:2] 5833; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v4, vcc_lo 5834; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v13, v12, v[6:7] 5835; GFX10-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v13, 1 5836; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v9, v14, 0 5837; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v5, vcc_lo 5838; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v4, v19, v[10:11] 5839; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v7, v17, 0 5840; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v5, v15, v[11:12] 5841; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v9, v16, v[1:2] 5842; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v7, v18, v[3:4] 5843; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v12, v14, v[5:6] 5844; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v4, v17, v[7:8] 5845; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5 5846; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 5847; 5848; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i64: 5849; GFX11-SDAG: ; %bb.0: ; %entry 5850; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5851; GFX11-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 5852; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo 5853; GFX11-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1 5854; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo 5855; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 5856; GFX11-SDAG-NEXT: v_mul_lo_u32 v12, v9, v4 5857; GFX11-SDAG-NEXT: v_mul_lo_u32 v13, v8, v5 5858; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0 5859; GFX11-SDAG-NEXT: v_mul_lo_u32 v14, v11, v6 5860; GFX11-SDAG-NEXT: v_mul_lo_u32 v15, v10, v7 5861; GFX11-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v10, v6, 0 5862; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5863; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v13, v12 5864; GFX11-SDAG-NEXT: v_add3_u32 v12, v3, v15, v14 5865; GFX11-SDAG-NEXT: v_add_co_u32 v3, vcc_lo, v0, v8 5866; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 5867; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v9, vcc_lo 5868; GFX11-SDAG-NEXT: v_add_co_u32 v9, vcc_lo, v2, v10 5869; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v12, v11, vcc_lo 5870; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 5871; GFX11-SDAG-NEXT: v_mul_lo_u32 v11, v8, v4 5872; GFX11-SDAG-NEXT: v_mul_lo_u32 v13, v3, v5 5873; GFX11-SDAG-NEXT: v_mul_lo_u32 v14, v9, v7 5874; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2) 5875; GFX11-SDAG-NEXT: v_mul_lo_u32 v10, v10, v6 5876; GFX11-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v9, v6, 0 5877; GFX11-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v3, v4, 0 5878; GFX11-SDAG-NEXT: v_add3_u32 v8, v8, v14, v10 5879; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5880; GFX11-SDAG-NEXT: v_add3_u32 v6, v6, v13, v11 5881; GFX11-SDAG-NEXT: v_mul_lo_u32 v11, v8, v2 5882; GFX11-SDAG-NEXT: v_mad_u64_u32 v[9:10], null, v7, v2, v[7:8] 5883; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 5884; GFX11-SDAG-NEXT: v_mul_lo_u32 v13, v6, v0 5885; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v5, v0, v[5:6] 5886; GFX11-SDAG-NEXT: v_mul_lo_u32 v0, v5, v1 5887; GFX11-SDAG-NEXT: v_mul_lo_u32 v1, v7, v12 5888; GFX11-SDAG-NEXT: v_mul_lo_u32 v6, v3, v6 5889; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 5890; GFX11-SDAG-NEXT: v_add3_u32 v4, v13, v4, v0 5891; GFX11-SDAG-NEXT: v_add3_u32 v10, v11, v10, v1 5892; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) 5893; GFX11-SDAG-NEXT: v_mul_lo_u32 v11, v4, v5 5894; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v5, v[3:4] 5895; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v9, v8 5896; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v10, v7 5897; GFX11-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v9, v7, v[9:10] 5898; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5899; GFX11-SDAG-NEXT: v_add3_u32 v1, v11, v1, v6 5900; GFX11-SDAG-NEXT: v_add3_u32 v3, v5, v3, v4 5901; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 5902; 5903; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i64: 5904; GFX11-GISEL: ; %bb.0: ; %entry 5905; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5906; GFX11-GISEL-NEXT: v_add_co_u32 v13, vcc_lo, v0, 1 5907; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v1, vcc_lo 5908; GFX11-GISEL-NEXT: v_add_co_u32 v15, vcc_lo, v2, 1 5909; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5910; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v13, v4, 0 5911; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v3, vcc_lo 5912; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v15, v6, 0 5913; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5914; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v13, v5, v[1:2] 5915; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v15, v7, v[3:4] 5916; GFX11-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v13 5917; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5918; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v14, v4, v[8:9] 5919; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v16, v6, v[9:10] 5920; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, v10, v14, vcc_lo 5921; GFX11-GISEL-NEXT: v_add_co_u32 v18, vcc_lo, v2, v15 5922; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v3, v4, 0 5923; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 5924; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, v11, v16, vcc_lo 5925; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v18, v6, 0 5926; GFX11-GISEL-NEXT: v_add_co_u32 v20, vcc_lo, v0, 1 5927; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v10, vcc_lo 5928; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v9 5929; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) 5930; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v8, v20, 0 5931; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v13 5932; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v3, v5, v[0:1] 5933; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3) 5934; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v10 5935; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v18, v7, v[1:2] 5936; GFX11-GISEL-NEXT: v_add_co_u32 v18, vcc_lo, v2, 1 5937; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v11, vcc_lo 5938; GFX11-GISEL-NEXT: v_mad_u64_u32 v[15:16], null, v17, v4, v[13:14] 5939; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5940; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v12, v18, 0 5941; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v19, v6, v[14:15] 5942; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v21, v[0:1] 5943; GFX11-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v8, 1 5944; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v15, vcc_lo 5945; GFX11-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v12, 1 5946; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5947; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v12, v22, v[5:6] 5948; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v9, v14, 0 5949; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 5950; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v4, v17, 0 5951; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, 0, v10, vcc_lo 5952; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v15, v20, v[6:7] 5953; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v10, v18, v[7:8] 5954; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5955; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v9, v16, v[1:2] 5956; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v4, v19, v[3:4] 5957; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5958; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v11, v14, v[7:8] 5959; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v12, v17, v[8:9] 5960; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 5961; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v5 5962; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 5963; 5964; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i64: 5965; GFX1200-SDAG: ; %bb.0: ; %entry 5966; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 5967; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 5968; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 5969; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 5970; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 5971; GFX1200-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 5972; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo 5973; GFX1200-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1 5974; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo 5975; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 5976; GFX1200-SDAG-NEXT: v_mul_lo_u32 v12, v9, v4 5977; GFX1200-SDAG-NEXT: v_mul_lo_u32 v13, v8, v5 5978; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v8, v4, 0 5979; GFX1200-SDAG-NEXT: v_mul_lo_u32 v14, v11, v6 5980; GFX1200-SDAG-NEXT: v_mul_lo_u32 v15, v10, v7 5981; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[2:3], null, v10, v6, 0 5982; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5983; GFX1200-SDAG-NEXT: v_add3_u32 v12, v1, v13, v12 5984; GFX1200-SDAG-NEXT: v_add_co_u32 v1, vcc_lo, v0, v8 5985; GFX1200-SDAG-NEXT: v_add3_u32 v13, v3, v15, v14 5986; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 5987; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v12, v9, vcc_lo 5988; GFX1200-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v2, v10 5989; GFX1200-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v13, v11, vcc_lo 5990; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 5991; GFX1200-SDAG-NEXT: v_mul_lo_u32 v10, v3, v4 5992; GFX1200-SDAG-NEXT: v_mul_lo_u32 v11, v1, v5 5993; GFX1200-SDAG-NEXT: v_mul_lo_u32 v7, v8, v7 5994; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2) 5995; GFX1200-SDAG-NEXT: v_mul_lo_u32 v9, v9, v6 5996; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[5:6], null, v8, v6, 0 5997; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v4, 0 5998; GFX1200-SDAG-NEXT: v_add3_u32 v6, v6, v7, v9 5999; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 6000; GFX1200-SDAG-NEXT: v_add3_u32 v4, v4, v11, v10 6001; GFX1200-SDAG-NEXT: v_mul_lo_u32 v11, v3, v12 6002; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 6003; GFX1200-SDAG-NEXT: v_mul_lo_u32 v9, v6, v2 6004; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[7:8], null, v5, v2, v[5:6] 6005; GFX1200-SDAG-NEXT: v_mul_lo_u32 v10, v4, v0 6006; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4] 6007; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v5, v13 6008; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 6009; GFX1200-SDAG-NEXT: v_mul_lo_u32 v6, v7, v6 6010; GFX1200-SDAG-NEXT: v_add3_u32 v1, v10, v1, v11 6011; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6012; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v0, v4 6013; GFX1200-SDAG-NEXT: v_add3_u32 v8, v9, v8, v2 6014; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6015; GFX1200-SDAG-NEXT: v_mul_lo_u32 v9, v1, v3 6016; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v3, v[0:1] 6017; GFX1200-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5 6018; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[2:3], null, v7, v5, v[7:8] 6019; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 6020; GFX1200-SDAG-NEXT: v_add3_u32 v1, v9, v1, v4 6021; GFX1200-SDAG-NEXT: v_add3_u32 v3, v10, v3, v6 6022; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 6023; 6024; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i64: 6025; GFX1200-GISEL: ; %bb.0: ; %entry 6026; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 6027; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 6028; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 6029; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 6030; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 6031; GFX1200-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1 6032; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo 6033; GFX1200-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1 6034; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6035; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v8, v4 6036; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo 6037; GFX1200-GISEL-NEXT: v_mul_hi_u32 v1, v10, v6 6038; GFX1200-GISEL-NEXT: v_mul_lo_u32 v12, v8, v4 6039; GFX1200-GISEL-NEXT: v_mul_lo_u32 v13, v10, v6 6040; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 6041; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[2:3], null, v8, v5, v[0:1] 6042; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v10, v7, v[1:2] 6043; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 6044; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v9, v4, v[2:3] 6045; GFX1200-GISEL-NEXT: v_mov_b32_e32 v14, v1 6046; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6047; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[2:3], null, v11, v6, v[0:1] 6048; GFX1200-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v12, v8 6049; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v14, v9, vcc_lo 6050; GFX1200-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v13, v10 6051; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 6052; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v3, v4 6053; GFX1200-GISEL-NEXT: v_mov_b32_e32 v8, v2 6054; GFX1200-GISEL-NEXT: v_mul_lo_u32 v15, v3, v4 6055; GFX1200-GISEL-NEXT: v_mul_hi_u32 v1, v10, v6 6056; GFX1200-GISEL-NEXT: v_mul_lo_u32 v16, v10, v6 6057; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 6058; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v8, v11, vcc_lo 6059; GFX1200-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v12, 1 6060; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[2:3], null, v3, v5, v[0:1] 6061; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v14, vcc_lo 6062; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 6063; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[0:1], null, v10, v7, v[1:2] 6064; GFX1200-GISEL-NEXT: v_mul_hi_u32 v1, v15, v12 6065; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6066; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[2:3], null, v9, v4, v[2:3] 6067; GFX1200-GISEL-NEXT: v_add_co_u32 v9, vcc_lo, v13, 1 6068; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[3:4], null, v11, v6, v[0:1] 6069; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 6070; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[4:5], null, v15, v5, v[1:2] 6071; GFX1200-GISEL-NEXT: v_mov_b32_e32 v1, v2 6072; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v16, v9 6073; GFX1200-GISEL-NEXT: v_mul_lo_u32 v14, v16, v9 6074; GFX1200-GISEL-NEXT: v_mov_b32_e32 v11, v3 6075; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v8, vcc_lo 6076; GFX1200-GISEL-NEXT: v_mul_lo_u32 v8, v15, v12 6077; GFX1200-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v15, 1 6078; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v1, vcc_lo 6079; GFX1200-GISEL-NEXT: v_add_co_u32 v15, vcc_lo, v16, 1 6080; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v11, vcc_lo 6081; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 6082; GFX1200-GISEL-NEXT: v_mul_hi_u32 v1, v8, v10 6083; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[5:6], null, v16, v7, v[0:1] 6084; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 6085; GFX1200-GISEL-NEXT: v_mul_hi_u32 v0, v14, v15 6086; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[6:7], null, v2, v12, v[4:5] 6087; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v8, v13, v[1:2] 6088; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 6089; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[3:4], null, v3, v9, v[5:6] 6090; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[4:5], null, v14, v11, v[0:1] 6091; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 6092; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v10, v[1:2] 6093; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v8, v10 6094; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v14, v15 6095; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[3:4], null, v3, v15, v[4:5] 6096; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 6097entry: 6098 %y18 = add <2 x i64> %x, <i64 1, i64 1> 6099 %add = mul <2 x i64> %y18, %y 6100 %mul119 = add <2 x i64> %add, %y18 6101 %add2 = mul <2 x i64> %mul119, %y 6102 %add220 = add <2 x i64> %add, <i64 1, i64 1> 6103 %add422 = add <2 x i64> %add2, <i64 1, i64 1> 6104 %mul521 = mul <2 x i64> %add2, %add220 6105 %add6 = mul <2 x i64> %mul521, %add422 6106 ret <2 x i64> %add6 6107} 6108 6109define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) { 6110; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: 6111; GFX6-SDAG: ; %bb.0: ; %bb 6112; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6113; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6114; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6115; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 6116; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 6117; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 6118; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6119; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6120; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6121; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 6122; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 6123; GFX6-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6124; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) 6125; GFX6-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6126; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) 6127; GFX6-SDAG-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64 6128; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(1) 6129; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v1 6130; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) 6131; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] 6132; 6133; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: 6134; GFX6-GISEL: ; %bb.0: ; %bb 6135; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6136; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6137; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6138; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 6139; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 6140; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 6141; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6142; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6143; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6144; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1 6145; GFX6-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6146; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) 6147; GFX6-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6148; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) 6149; GFX6-GISEL-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64 6150; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(1) 6151; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v1 6152; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) 6153; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] 6154; 6155; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: 6156; GFX7-SDAG: ; %bb.0: ; %bb 6157; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6158; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6159; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6160; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 6161; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 6162; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 6163; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6164; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6165; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6166; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 6167; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 6168; GFX7-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6169; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) 6170; GFX7-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6171; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) 6172; GFX7-SDAG-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64 6173; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) 6174; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v1 6175; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] 6176; 6177; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: 6178; GFX7-GISEL: ; %bb.0: ; %bb 6179; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6180; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6181; GFX7-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6182; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 6183; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 6184; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 6185; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6186; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6187; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6188; GFX7-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1 6189; GFX7-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6190; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) 6191; GFX7-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6192; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) 6193; GFX7-GISEL-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64 6194; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) 6195; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v1 6196; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] 6197; 6198; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_all: 6199; GFX8: ; %bb.0: ; %bb 6200; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6201; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0 6202; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 6203; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 6204; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1 6205; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6206; GFX8-NEXT: v_mul_lo_u32 v5, v0, v1 6207; GFX8-NEXT: flat_store_dword v[3:4], v2 6208; GFX8-NEXT: s_waitcnt vmcnt(0) 6209; GFX8-NEXT: flat_store_dword v[3:4], v0 6210; GFX8-NEXT: s_waitcnt vmcnt(0) 6211; GFX8-NEXT: flat_store_dword v[3:4], v5 6212; GFX8-NEXT: s_waitcnt vmcnt(0) 6213; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v1 6214; GFX8-NEXT: s_setpc_b64 s[30:31] 6215; 6216; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: 6217; GFX900-SDAG: ; %bb.0: ; %bb 6218; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6219; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6220; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6221; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6222; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6223; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6224; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 6225; GFX900-SDAG-NEXT: global_store_dword v[3:4], v2, off 6226; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) 6227; GFX900-SDAG-NEXT: global_store_dword v[3:4], v0, off 6228; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) 6229; GFX900-SDAG-NEXT: global_store_dword v[3:4], v5, off 6230; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) 6231; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v5, v1 6232; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6233; 6234; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: 6235; GFX900-GISEL: ; %bb.0: ; %bb 6236; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6237; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6238; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6239; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 6240; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6241; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6242; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1 6243; GFX900-GISEL-NEXT: global_store_dword v[3:4], v2, off 6244; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) 6245; GFX900-GISEL-NEXT: global_store_dword v[3:4], v0, off 6246; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) 6247; GFX900-GISEL-NEXT: global_store_dword v[3:4], v5, off 6248; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) 6249; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v5, v1 6250; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6251; 6252; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: 6253; GFX90A-SDAG: ; %bb.0: ; %bb 6254; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6255; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6256; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6257; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6258; GFX90A-SDAG-NEXT: v_mov_b32_e32 v5, v4 6259; GFX90A-SDAG-NEXT: v_mov_b32_e32 v4, v3 6260; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6261; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6262; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v2, off 6263; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) 6264; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off 6265; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) 6266; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6267; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off 6268; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) 6269; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v0, v1 6270; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 6271; 6272; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: 6273; GFX90A-GISEL: ; %bb.0: ; %bb 6274; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6275; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6276; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v3 6277; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v4 6278; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1 6279; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 6280; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6281; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v4 6282; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v4, off 6283; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) 6284; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off 6285; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) 6286; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6287; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off 6288; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) 6289; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v0, v1 6290; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 6291; 6292; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_all: 6293; GFX10: ; %bb.0: ; %bb 6294; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6295; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 6296; GFX10-NEXT: v_mul_lo_u32 v2, v0, v1 6297; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 6298; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 6299; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v2 6300; GFX10-NEXT: v_mul_lo_u32 v5, v1, v0 6301; GFX10-NEXT: global_store_dword v[3:4], v2, off 6302; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6303; GFX10-NEXT: global_store_dword v[3:4], v1, off 6304; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6305; GFX10-NEXT: global_store_dword v[3:4], v5, off 6306; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6307; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 6308; GFX10-NEXT: s_setpc_b64 s[30:31] 6309; 6310; GFX11-LABEL: v_multi_use_mul_chain_add_other_use_all: 6311; GFX11: ; %bb.0: ; %bb 6312; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6313; GFX11-NEXT: v_add_nc_u32_e32 v0, 1, v0 6314; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6315; GFX11-NEXT: v_mul_lo_u32 v2, v0, v1 6316; GFX11-NEXT: v_add_nc_u32_e32 v0, v2, v0 6317; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6318; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1 6319; GFX11-NEXT: v_add_nc_u32_e32 v0, 1, v2 6320; GFX11-NEXT: v_mul_lo_u32 v5, v1, v0 6321; GFX11-NEXT: global_store_b32 v[3:4], v2, off dlc 6322; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6323; GFX11-NEXT: global_store_b32 v[3:4], v1, off dlc 6324; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6325; GFX11-NEXT: global_store_b32 v[3:4], v5, off dlc 6326; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6327; GFX11-NEXT: v_add_nc_u32_e32 v0, v5, v0 6328; GFX11-NEXT: s_setpc_b64 s[30:31] 6329; 6330; GFX1200-LABEL: v_multi_use_mul_chain_add_other_use_all: 6331; GFX1200: ; %bb.0: ; %bb 6332; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 6333; GFX1200-NEXT: s_wait_expcnt 0x0 6334; GFX1200-NEXT: s_wait_samplecnt 0x0 6335; GFX1200-NEXT: s_wait_bvhcnt 0x0 6336; GFX1200-NEXT: s_wait_kmcnt 0x0 6337; GFX1200-NEXT: v_add_nc_u32_e32 v0, 1, v0 6338; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6339; GFX1200-NEXT: v_mul_lo_u32 v2, v0, v1 6340; GFX1200-NEXT: v_add_nc_u32_e32 v0, v2, v0 6341; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6342; GFX1200-NEXT: v_mul_lo_u32 v1, v0, v1 6343; GFX1200-NEXT: v_add_nc_u32_e32 v0, 1, v2 6344; GFX1200-NEXT: v_mul_lo_u32 v5, v1, v0 6345; GFX1200-NEXT: s_wait_storecnt 0x0 6346; GFX1200-NEXT: global_store_b32 v[3:4], v2, off scope:SCOPE_SYS 6347; GFX1200-NEXT: s_wait_storecnt 0x0 6348; GFX1200-NEXT: global_store_b32 v[3:4], v1, off scope:SCOPE_SYS 6349; GFX1200-NEXT: s_wait_storecnt 0x0 6350; GFX1200-NEXT: global_store_b32 v[3:4], v5, off scope:SCOPE_SYS 6351; GFX1200-NEXT: s_wait_storecnt 0x0 6352; GFX1200-NEXT: v_add_nc_u32_e32 v0, v5, v0 6353; GFX1200-NEXT: s_setpc_b64 s[30:31] 6354bb: 6355 %i = add i32 %arg, 1 6356 %i3 = mul i32 %i, %arg1 6357 store volatile i32 %i3, ptr addrspace(1) %ptr 6358 %i4 = add i32 %i3, %i 6359 %i5 = mul i32 %i4, %arg1 6360 store volatile i32 %i5, ptr addrspace(1) %ptr 6361 %i6 = add i32 %i3, 1 6362 %i7 = mul i32 %i5, %i6 6363 store volatile i32 %i7, ptr addrspace(1) %ptr 6364 %i8 = add i32 %i7, %i6 6365 ret i32 %i8 6366} 6367 6368define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) { 6369; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: 6370; GFX6-SDAG: ; %bb.0: ; %bb 6371; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6372; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6373; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6374; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 6375; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 6376; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 6377; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6378; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6379; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6380; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 6381; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6382; GFX6-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6383; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) 6384; GFX6-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6385; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) 6386; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 6387; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] 6388; 6389; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: 6390; GFX6-GISEL: ; %bb.0: ; %bb 6391; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6392; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6393; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6394; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 6395; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 6396; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 6397; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6398; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6399; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6400; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6401; GFX6-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6402; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) 6403; GFX6-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6404; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) 6405; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 6406; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] 6407; 6408; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: 6409; GFX7-SDAG: ; %bb.0: ; %bb 6410; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6411; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6412; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6413; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 6414; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 6415; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 6416; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6417; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6418; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6419; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 6420; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6421; GFX7-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6422; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) 6423; GFX7-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6424; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) 6425; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 6426; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] 6427; 6428; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: 6429; GFX7-GISEL: ; %bb.0: ; %bb 6430; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6431; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6432; GFX7-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6433; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 6434; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 6435; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 6436; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6437; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6438; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6439; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6440; GFX7-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 6441; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) 6442; GFX7-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 6443; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) 6444; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 6445; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] 6446; 6447; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_some: 6448; GFX8: ; %bb.0: ; %bb 6449; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6450; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0 6451; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 6452; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 6453; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1 6454; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6455; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1 6456; GFX8-NEXT: flat_store_dword v[3:4], v2 6457; GFX8-NEXT: s_waitcnt vmcnt(0) 6458; GFX8-NEXT: flat_store_dword v[3:4], v0 6459; GFX8-NEXT: s_waitcnt vmcnt(0) 6460; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 6461; GFX8-NEXT: s_setpc_b64 s[30:31] 6462; 6463; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: 6464; GFX900-SDAG: ; %bb.0: ; %bb 6465; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6466; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6467; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6468; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6469; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6470; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6471; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6472; GFX900-SDAG-NEXT: global_store_dword v[3:4], v2, off 6473; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) 6474; GFX900-SDAG-NEXT: global_store_dword v[3:4], v0, off 6475; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) 6476; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v0, v1 6477; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6478; 6479; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: 6480; GFX900-GISEL: ; %bb.0: ; %bb 6481; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6482; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6483; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6484; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 6485; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6486; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6487; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6488; GFX900-GISEL-NEXT: global_store_dword v[3:4], v2, off 6489; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) 6490; GFX900-GISEL-NEXT: global_store_dword v[3:4], v0, off 6491; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) 6492; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v0, v1 6493; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6494; 6495; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: 6496; GFX90A-SDAG: ; %bb.0: ; %bb 6497; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6498; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6499; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6500; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6501; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6502; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6503; GFX90A-SDAG-NEXT: v_mov_b32_e32 v5, v4 6504; GFX90A-SDAG-NEXT: v_mov_b32_e32 v4, v3 6505; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6506; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v2, off 6507; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) 6508; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off 6509; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) 6510; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v0, v1 6511; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 6512; 6513; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: 6514; GFX90A-GISEL: ; %bb.0: ; %bb 6515; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6516; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6517; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v3 6518; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v4 6519; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1 6520; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 6521; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6522; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v4 6523; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6524; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v4, off 6525; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) 6526; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off 6527; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) 6528; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v0, v1 6529; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 6530; 6531; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_some: 6532; GFX10: ; %bb.0: ; %bb 6533; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6534; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 6535; GFX10-NEXT: v_mul_lo_u32 v2, v0, v1 6536; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 6537; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 6538; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v2 6539; GFX10-NEXT: v_mul_lo_u32 v5, v0, v1 6540; GFX10-NEXT: global_store_dword v[3:4], v2, off 6541; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6542; GFX10-NEXT: global_store_dword v[3:4], v5, off 6543; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6544; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v1 6545; GFX10-NEXT: s_setpc_b64 s[30:31] 6546; 6547; GFX11-LABEL: v_multi_use_mul_chain_add_other_use_some: 6548; GFX11: ; %bb.0: ; %bb 6549; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6550; GFX11-NEXT: v_add_nc_u32_e32 v0, 1, v0 6551; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6552; GFX11-NEXT: v_mul_lo_u32 v2, v0, v1 6553; GFX11-NEXT: v_add_nc_u32_e32 v0, v2, v0 6554; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6555; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1 6556; GFX11-NEXT: v_add_nc_u32_e32 v1, 1, v2 6557; GFX11-NEXT: v_mul_lo_u32 v5, v0, v1 6558; GFX11-NEXT: global_store_b32 v[3:4], v2, off dlc 6559; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6560; GFX11-NEXT: global_store_b32 v[3:4], v5, off dlc 6561; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6562; GFX11-NEXT: v_add_nc_u32_e32 v0, v5, v1 6563; GFX11-NEXT: s_setpc_b64 s[30:31] 6564; 6565; GFX1200-LABEL: v_multi_use_mul_chain_add_other_use_some: 6566; GFX1200: ; %bb.0: ; %bb 6567; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 6568; GFX1200-NEXT: s_wait_expcnt 0x0 6569; GFX1200-NEXT: s_wait_samplecnt 0x0 6570; GFX1200-NEXT: s_wait_bvhcnt 0x0 6571; GFX1200-NEXT: s_wait_kmcnt 0x0 6572; GFX1200-NEXT: v_add_nc_u32_e32 v0, 1, v0 6573; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6574; GFX1200-NEXT: v_mul_lo_u32 v2, v0, v1 6575; GFX1200-NEXT: v_add_nc_u32_e32 v0, v2, v0 6576; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6577; GFX1200-NEXT: v_mul_lo_u32 v0, v0, v1 6578; GFX1200-NEXT: v_add_nc_u32_e32 v1, 1, v2 6579; GFX1200-NEXT: v_mul_lo_u32 v5, v0, v1 6580; GFX1200-NEXT: s_wait_storecnt 0x0 6581; GFX1200-NEXT: global_store_b32 v[3:4], v2, off scope:SCOPE_SYS 6582; GFX1200-NEXT: s_wait_storecnt 0x0 6583; GFX1200-NEXT: global_store_b32 v[3:4], v5, off scope:SCOPE_SYS 6584; GFX1200-NEXT: s_wait_storecnt 0x0 6585; GFX1200-NEXT: v_add_nc_u32_e32 v0, v5, v1 6586; GFX1200-NEXT: s_setpc_b64 s[30:31] 6587bb: 6588 %i = add i32 %arg, 1 6589 %i3 = mul i32 %i, %arg1 6590 store volatile i32 %i3, ptr addrspace(1) %ptr 6591 %i4 = add i32 %i3, %i 6592 %i5 = mul i32 %i4, %arg1 6593 %i6 = add i32 %i3, 1 6594 %i7 = mul i32 %i5, %i6 6595 store volatile i32 %i7, ptr addrspace(1) %ptr 6596 %i8 = add i32 %i7, %i6 6597 ret i32 %i8 6598} 6599 6600define i32 @clpeak_imad_pat_i32_x2(i32 %x, i32 %y) { 6601; GFX67-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6602; GFX67-SDAG: ; %bb.0: ; %entry 6603; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6604; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6605; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6606; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6607; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6608; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6609; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6610; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1 6611; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6612; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6613; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6614; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1 6615; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6616; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 6617; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0 6618; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6619; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1 6620; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 6621; 6622; GFX67-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6623; GFX67-GISEL: ; %bb.0: ; %entry 6624; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6625; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6626; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6627; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0 6628; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6629; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6630; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6631; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1 6632; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6633; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6634; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6635; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1 6636; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6637; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2 6638; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6639; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6640; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6641; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 6642; 6643; GFX8-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6644; GFX8-SDAG: ; %bb.0: ; %entry 6645; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6646; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 6647; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6648; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0 6649; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6650; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6651; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6652; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1 6653; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6654; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6655; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6656; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1 6657; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6658; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2 6659; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0 6660; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6661; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 6662; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 6663; 6664; GFX8-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6665; GFX8-GISEL: ; %bb.0: ; %entry 6666; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6667; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 6668; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6669; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0 6670; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6671; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6672; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6673; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v2, v1 6674; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6675; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6676; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6677; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v2, v1 6678; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6679; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2 6680; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6681; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 6682; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6683; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 6684; 6685; GFX900-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6686; GFX900-SDAG: ; %bb.0: ; %entry 6687; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6688; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6689; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6690; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6691; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6692; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6693; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6694; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v2, v1 6695; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6696; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6697; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6698; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v2, v1 6699; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6700; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1] 6701; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2] 6702; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6703; 6704; GFX900-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6705; GFX900-GISEL: ; %bb.0: ; %entry 6706; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6707; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6708; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6709; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 6710; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6711; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6712; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6713; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v2, v1 6714; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6715; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6716; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6717; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v2, v1 6718; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6719; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6720; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6721; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6722; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6723; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6724; 6725; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6726; GFX90A-SDAG: ; %bb.0: ; %entry 6727; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6728; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 6729; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6730; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 6731; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6732; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6733; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6734; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v2, v1 6735; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6736; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 6737; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6738; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v2, v1 6739; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6740; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1] 6741; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3] 6742; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 6743; 6744; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6745; GFX90A-GISEL: ; %bb.0: ; %entry 6746; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6747; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 6748; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6749; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 6750; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6751; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6752; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6753; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v2, v1 6754; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6755; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6756; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6757; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v2, v1 6758; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6759; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 6760; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 6761; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6762; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 6763; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 6764; 6765; GFX10-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6766; GFX10-SDAG: ; %bb.0: ; %entry 6767; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6768; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 6769; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6770; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 6771; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6772; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6773; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6774; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 6775; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6776; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6777; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6778; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 6779; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6780; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1] 6781; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2] 6782; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 6783; 6784; GFX10-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6785; GFX10-GISEL: ; %bb.0: ; %entry 6786; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6787; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6788; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6789; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 6790; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6791; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6792; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6793; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6794; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6795; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6796; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6797; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6798; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6799; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6800; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6801; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6802; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6803; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 6804; 6805; GFX11-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6806; GFX11-SDAG: ; %bb.0: ; %entry 6807; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6808; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 6809; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6810; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6811; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 6812; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6813; GFX11-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6814; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6815; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6816; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6817; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 6818; GFX11-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6819; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6820; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6821; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 6822; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 6823; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6824; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 6825; GFX11-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3] 6826; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 6827; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4] 6828; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 6829; 6830; GFX11-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6831; GFX11-GISEL: ; %bb.0: ; %entry 6832; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6833; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6834; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6835; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6836; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 6837; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6838; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6839; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6840; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6841; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6842; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6843; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6844; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6845; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6846; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6847; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6848; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6849; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6850; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6851; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6852; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6853; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 6854; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6855; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 6856; 6857; GFX1200-SDAG-LABEL: clpeak_imad_pat_i32_x2: 6858; GFX1200-SDAG: ; %bb.0: ; %entry 6859; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 6860; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 6861; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 6862; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 6863; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 6864; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 6865; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6866; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6867; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 6868; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6869; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 6870; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6871; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6872; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6873; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 6874; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6875; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 6876; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6877; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 6878; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 6879; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6880; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 6881; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1] 6882; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 6883; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2] 6884; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 6885; 6886; GFX1200-GISEL-LABEL: clpeak_imad_pat_i32_x2: 6887; GFX1200-GISEL: ; %bb.0: ; %entry 6888; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 6889; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 6890; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 6891; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 6892; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 6893; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6894; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6895; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6896; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 6897; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6898; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 6899; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6900; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6901; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6902; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6903; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6904; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6905; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6906; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 6907; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 6908; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6909; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6910; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 6911; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 6912; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 6913; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 6914; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 6915; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 6916entry: 6917 %y38 = add i32 %x, 1 6918 %add = mul i32 %y38, %y 6919 %mul139 = add i32 %add, %y38 6920 %add2 = mul i32 %mul139, %y 6921 %add240 = add i32 %add, 1 6922 %add4 = mul i32 %add2, %add240 6923 %mul541 = add i32 %add4, %add240 6924 %add6 = mul i32 %mul541, %add2 6925 %add642 = add i32 %add4, 1 6926 %add8 = mul i32 %add6, %add642 6927 %mul943 = add i32 %add8, %add642 6928 %add10 = mul i32 %mul943, %add6 6929 %add1044 = add i32 %add8, 1 6930 %add1246 = add i32 %add10, 1 6931 %mul1345 = mul i32 %add10, %add1044 6932 %add14 = mul i32 %mul1345, %add1246 6933 ret i32 %add14 6934} 6935 6936define <2 x i32> @clpeak_imad_pat_v2i32_x2(<2 x i32> %x, <2 x i32> %y) { 6937; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 6938; GFX67-SDAG: ; %bb.0: ; %entry 6939; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6940; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6941; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 6942; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2 6943; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 6944; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v0 6945; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v4, v1 6946; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 6947; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 6948; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v5 6949; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 6950; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v4 6951; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 6952; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v3, v2 6953; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 6954; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4 6955; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v4, v1 6956; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v3 6957; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 6958; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v5 6959; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 6960; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v3, v2 6961; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 6962; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v5, v4 6963; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1 6964; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v3 6965; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 6966; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v0 6967; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 6968; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v1 6969; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 6970; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v2 6971; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v3 6972; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 6973; 6974; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 6975; GFX67-GISEL: ; %bb.0: ; %entry 6976; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6977; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 6978; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 6979; GFX67-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 6980; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 6981; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 6982; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 6983; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 6984; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 6985; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 6986; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v5 6987; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2 6988; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4 6989; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 6990; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 6991; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 6992; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 6993; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v3 6994; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v5 6995; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2 6996; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4 6997; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 6998; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 6999; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7000; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 7001; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v3 7002; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 7003; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0 7004; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7005; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3 7006; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 7007; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 7008; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1 7009; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 7010; 7011; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7012; GFX8-SDAG: ; %bb.0: ; %entry 7013; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7014; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0 7015; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1 7016; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2 7017; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 7018; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v5, v0 7019; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v4, v1 7020; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7021; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7022; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v5 7023; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 7024; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v4 7025; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 7026; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v3, v2 7027; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7028; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v5, v4 7029; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v4, v1 7030; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v3 7031; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 7032; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v5 7033; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 7034; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v3, v2 7035; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7036; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v5, v4 7037; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1 7038; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v3 7039; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5 7040; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v0 7041; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7042; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v1 7043; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 7044; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v2 7045; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v3 7046; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 7047; 7048; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7049; GFX8-GISEL: ; %bb.0: ; %entry 7050; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7051; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0 7052; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 7053; GFX8-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7054; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7055; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v0 7056; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v5, v1 7057; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7058; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7059; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v4 7060; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v5 7061; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2 7062; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4 7063; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v3, v2 7064; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, v5, v4 7065; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7066; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 7067; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v3 7068; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v5 7069; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2 7070; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4 7071; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v3, v2 7072; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, v5, v4 7073; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7074; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 7075; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v3 7076; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v5 7077; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v0 7078; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7079; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3 7080; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1 7081; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 7082; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1 7083; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 7084; 7085; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7086; GFX900-SDAG: ; %bb.0: ; %entry 7087; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7088; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 7089; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7090; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 7091; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7092; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v4, v0 7093; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7094; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v5, v1 7095; GFX900-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7096; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v4 7097; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 7098; GFX900-SDAG-NEXT: v_add_u32_e32 v4, 1, v5 7099; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 7100; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v3, v2 7101; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7102; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v5, v4 7103; GFX900-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1 7104; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v3 7105; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2 7106; GFX900-SDAG-NEXT: v_add_u32_e32 v4, 1, v5 7107; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4 7108; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v3, v2 7109; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7110; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v5, v4 7111; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v2, v1 7112; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v3, v[0:1] 7113; GFX900-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3] 7114; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4] 7115; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5] 7116; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 7117; 7118; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7119; GFX900-GISEL: ; %bb.0: ; %entry 7120; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7121; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 7122; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 7123; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7124; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7125; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 7126; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v5, v1 7127; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7128; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7129; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7130; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7131; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7132; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7133; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v4, v2 7134; GFX900-GISEL-NEXT: v_add_u32_e32 v3, v5, v3 7135; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7136; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7137; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7138; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7139; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7140; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7141; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v4, v2 7142; GFX900-GISEL-NEXT: v_add_u32_e32 v3, v5, v3 7143; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7144; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7145; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7146; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7147; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 7148; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 7149; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 7150; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 7151; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7152; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7153; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 7154; 7155; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7156; GFX90A-SDAG: ; %bb.0: ; %entry 7157; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7158; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 7159; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 7160; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7161; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7162; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v5, v1 7163; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v4, v0 7164; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7165; GFX90A-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7166; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v4 7167; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v5 7168; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3 7169; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2 7170; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, v5, v2 7171; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v4, v3 7172; GFX90A-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 7173; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7174; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v4 7175; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v5 7176; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3 7177; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v2 7178; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, v4, v2 7179; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v6, v3 7180; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v3, v0 7181; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v2, v1 7182; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3] 7183; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1] 7184; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7] 7185; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5] 7186; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v2 7187; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] 7188; 7189; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7190; GFX90A-GISEL: ; %bb.0: ; %entry 7191; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7192; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 7193; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1 7194; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7195; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7196; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0 7197; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v5, v1 7198; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7199; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7200; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7201; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7202; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7203; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7204; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v4, v2 7205; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v5, v3 7206; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7207; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7208; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7209; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7210; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7211; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7212; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v4, v2 7213; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v5, v3 7214; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7215; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7216; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4 7217; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5 7218; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v0 7219; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v1 7220; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7221; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7222; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 7223; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 7224; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] 7225; 7226; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7227; GFX10-SDAG: ; %bb.0: ; %entry 7228; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7229; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 7230; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 7231; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7232; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7233; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0 7234; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 7235; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7236; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7237; GFX10-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7238; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7239; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7240; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7241; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2 7242; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3 7243; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7244; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7245; GFX10-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 7246; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7247; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7248; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7249; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2 7250; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3 7251; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7252; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1 7253; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1] 7254; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3] 7255; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4] 7256; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5] 7257; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 7258; 7259; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7260; GFX10-GISEL: ; %bb.0: ; %entry 7261; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7262; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7263; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7264; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7265; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7266; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 7267; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 7268; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7269; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7270; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7271; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7272; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7273; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7274; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7275; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7276; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7277; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7278; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7279; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7280; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7281; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7282; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7283; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7284; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7285; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7286; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7287; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7288; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 7289; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 7290; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7291; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7292; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7293; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7294; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 7295; 7296; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7297; GFX11-SDAG: ; %bb.0: ; %entry 7298; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7299; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 7300; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 7301; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7302; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7303; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7304; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7305; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0 7306; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 7307; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7308; GFX11-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7309; GFX11-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7310; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7311; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7312; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7313; GFX11-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7314; GFX11-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7315; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7316; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2 7317; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3 7318; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7319; GFX11-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7320; GFX11-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 7321; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7322; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7323; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7324; GFX11-SDAG-NEXT: v_mul_lo_u32 v6, v0, v2 7325; GFX11-SDAG-NEXT: v_mul_lo_u32 v7, v1, v3 7326; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7327; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v2, v6, v2 7328; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v3, v7, v3 7329; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7330; GFX11-SDAG-NEXT: v_mul_lo_u32 v2, v2, v0 7331; GFX11-SDAG-NEXT: v_mul_lo_u32 v3, v3, v1 7332; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7333; GFX11-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v6, v[2:3] 7334; GFX11-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v3, v7, v[3:4] 7335; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 7336; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, v[4:5] 7337; GFX11-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[5:6] 7338; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 7339; 7340; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7341; GFX11-GISEL: ; %bb.0: ; %entry 7342; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7343; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7344; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7345; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7346; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7347; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7348; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7349; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 7350; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 7351; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7352; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7353; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7354; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7355; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7356; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7357; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7358; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7359; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7360; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7361; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7362; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7363; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7364; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7365; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7366; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7367; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7368; GFX11-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7369; GFX11-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7370; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7371; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7372; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7373; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7374; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7375; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7376; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7377; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7378; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7379; GFX11-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 7380; GFX11-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 7381; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7382; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7383; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7384; GFX11-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7385; GFX11-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7386; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 7387; 7388; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: 7389; GFX1200-SDAG: ; %bb.0: ; %entry 7390; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 7391; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 7392; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 7393; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 7394; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 7395; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 7396; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 7397; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7398; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7399; GFX1200-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7400; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7401; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0 7402; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1 7403; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 7404; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 7405; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7406; GFX1200-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 7407; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7408; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 7409; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7410; GFX1200-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7411; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7412; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2 7413; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3 7414; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 7415; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7416; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4 7417; GFX1200-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 7418; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5 7419; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 7420; GFX1200-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 7421; GFX1200-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 7422; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7423; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2 7424; GFX1200-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3 7425; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7426; GFX1200-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 7427; GFX1200-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1 7428; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 7429; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[3:4], null, v0, v4, v[0:1] 7430; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[4:5], null, v2, v5, v[2:3] 7431; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 7432; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4] 7433; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[1:2], null, v4, v2, v[4:5] 7434; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 7435; 7436; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: 7437; GFX1200-GISEL: ; %bb.0: ; %entry 7438; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 7439; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 7440; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 7441; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 7442; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 7443; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7444; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7445; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7446; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7447; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7448; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7449; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 7450; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1 7451; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7452; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 7453; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 7454; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7455; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7456; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7457; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7458; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7459; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7460; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7461; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7462; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7463; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7464; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7465; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7466; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7467; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7468; GFX1200-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 7469; GFX1200-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 7470; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7471; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2 7472; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3 7473; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7474; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7475; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7476; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4 7477; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5 7478; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7479; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 7480; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 7481; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 7482; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 7483; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7484; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 7485; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 7486; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 7487entry: 7488 %y38 = add <2 x i32> %x, <i32 1, i32 1> 7489 %add = mul <2 x i32> %y38, %y 7490 %mul139 = add <2 x i32> %add, %y38 7491 %add2 = mul <2 x i32> %mul139, %y 7492 %add240 = add <2 x i32> %add, <i32 1, i32 1> 7493 %add4 = mul <2 x i32> %add2, %add240 7494 %mul541 = add <2 x i32> %add4, %add240 7495 %add6 = mul <2 x i32> %mul541, %add2 7496 %add642 = add <2 x i32> %add4, <i32 1, i32 1> 7497 %add8 = mul <2 x i32> %add6, %add642 7498 %mul943 = add <2 x i32> %add8, %add642 7499 %add10 = mul <2 x i32> %mul943, %add6 7500 %add1044 = add <2 x i32> %add8, <i32 1, i32 1> 7501 %add1246 = add <2 x i32> %add10, <i32 1, i32 1> 7502 %mul1345 = mul <2 x i32> %add10, %add1044 7503 %add14 = mul <2 x i32> %mul1345, %add1246 7504 ret <2 x i32> %add14 7505} 7506 7507define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { 7508; GFX67-LABEL: clpeak_imad_pat_i16_x2: 7509; GFX67: ; %bb.0: ; %entry 7510; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7511; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 7512; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7513; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1 7514; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 7515; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 7516; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 7517; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 7518; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 7519; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 7520; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7521; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 7522; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2 7523; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 7524; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 7525; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 7526; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 7527; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 7528; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7529; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 7530; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2 7531; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 7532; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 7533; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 7534; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 7535; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 7536; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7537; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 7538; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 7539; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 7540; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7541; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 7542; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 7543; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16 7544; GFX67-NEXT: s_setpc_b64 s[30:31] 7545; 7546; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2: 7547; GFX8-SDAG: ; %bb.0: ; %entry 7548; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7549; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7550; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7551; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7552; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7553; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7554; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7555; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7556; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7557; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 7558; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 7559; 7560; GFX8-GISEL-LABEL: clpeak_imad_pat_i16_x2: 7561; GFX8-GISEL: ; %bb.0: ; %entry 7562; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7563; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 7564; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 7565; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 7566; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 7567; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7568; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4 7569; GFX8-GISEL-NEXT: v_mad_u16 v2, v3, v2, 1 7570; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1 7571; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1 7572; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3 7573; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 7574; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4 7575; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 7576; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 7577; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v4, 1 7578; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 7579; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 7580; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 7581; 7582; GFX9-SDAG-LABEL: clpeak_imad_pat_i16_x2: 7583; GFX9-SDAG: ; %bb.0: ; %entry 7584; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7585; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7586; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7587; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7588; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7589; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7590; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7591; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7592; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0 7593; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 7594; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 7595; 7596; GFX9-GISEL-LABEL: clpeak_imad_pat_i16_x2: 7597; GFX9-GISEL: ; %bb.0: ; %entry 7598; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7599; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 7600; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 7601; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 7602; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 7603; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1 7604; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4 7605; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v3, v2, 1 7606; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1 7607; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v4, 1 7608; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3 7609; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v2, v1, 1 7610; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4 7611; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v3, 1 7612; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 7613; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v4, 1 7614; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 7615; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 7616; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 7617; 7618; GFX10-SDAG-LABEL: clpeak_imad_pat_i16_x2: 7619; GFX10-SDAG: ; %bb.0: ; %entry 7620; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7621; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7622; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7623; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7624; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7625; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7626; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7627; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7628; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7629; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 7630; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 7631; 7632; GFX10-GISEL-LABEL: clpeak_imad_pat_i16_x2: 7633; GFX10-GISEL: ; %bb.0: ; %entry 7634; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7635; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7636; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7637; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7638; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7639; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7640; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7641; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7642; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7643; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7644; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7645; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7646; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7647; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7648; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7649; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 7650; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7651; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 7652; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 7653; 7654; GFX11-SDAG-LABEL: clpeak_imad_pat_i16_x2: 7655; GFX11-SDAG: ; %bb.0: ; %entry 7656; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7657; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7658; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7659; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7660; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7661; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7662; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7663; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7664; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7665; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7666; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7667; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7668; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7669; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 7670; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 7671; 7672; GFX11-GISEL-LABEL: clpeak_imad_pat_i16_x2: 7673; GFX11-GISEL: ; %bb.0: ; %entry 7674; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7675; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7676; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7677; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7678; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7679; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7680; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7681; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7682; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7683; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7684; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7685; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7686; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7687; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7688; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7689; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7690; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7691; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 7692; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7693; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 7694; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7695; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7696; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 7697; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 7698; 7699; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16_x2: 7700; GFX1200-SDAG: ; %bb.0: ; %entry 7701; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 7702; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 7703; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 7704; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 7705; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 7706; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7707; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7708; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7709; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7710; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7711; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7712; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7713; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7714; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7715; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7716; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7717; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7718; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 7719; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 7720; 7721; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16_x2: 7722; GFX1200-GISEL: ; %bb.0: ; %entry 7723; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 7724; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 7725; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 7726; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 7727; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 7728; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7729; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7730; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7731; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7732; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7733; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7734; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7735; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7736; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7737; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7738; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7739; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7740; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7741; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7742; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7743; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7744; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 7745; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7746; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 7747; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7748; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7749; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 7750; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 7751entry: 7752 %conv69 = add i16 %x, 1 7753 %add = mul i16 %conv69, %y 7754 %conv470 = add i16 %y, 1 7755 %add8 = mul i16 %conv470, %add 7756 %conv1071 = add i16 %add, 1 7757 %add14 = mul i16 %conv1071, %add8 7758 %conv1672 = add i16 %add8, 1 7759 %add20 = mul i16 %conv1672, %add14 7760 %conv2273 = add i16 %add14, 1 7761 %add26 = mul i16 %conv2273, %add20 7762 %conv2874 = add i16 %add20, 1 7763 %add32 = mul i16 %conv2874, %add26 7764 %conv3475 = add i16 %add26, 1 7765 %add38 = mul i16 %conv3475, %add32 7766 %conv4076 = add i16 %add32, 1 7767 %add44 = mul i16 %add38, %conv4076 7768 ret i16 %add44 7769} 7770 7771define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { 7772; GFX67-LABEL: clpeak_umad_pat_i16_x2: 7773; GFX67: ; %bb.0: ; %entry 7774; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7775; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 7776; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7777; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1 7778; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1 7779; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 7780; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 7781; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2 7782; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 7783; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7784; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 7785; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 7786; GFX67-NEXT: v_mad_u32_u24 v2, v3, v2, 1 7787; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 7788; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 7789; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3 7790; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 7791; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7792; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 7793; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 7794; GFX67-NEXT: v_mad_u32_u24 v2, v2, v3, 1 7795; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 7796; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 7797; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3 7798; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 7799; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7800; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 7801; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 7802; GFX67-NEXT: v_mad_u32_u24 v1, v2, v3, 1 7803; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7804; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 7805; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 7806; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 7807; GFX67-NEXT: s_setpc_b64 s[30:31] 7808; 7809; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2: 7810; GFX8-SDAG: ; %bb.0: ; %entry 7811; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7812; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7813; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7814; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7815; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7816; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7817; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7818; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7819; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7820; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 7821; 7822; GFX8-GISEL-LABEL: clpeak_umad_pat_i16_x2: 7823; GFX8-GISEL: ; %bb.0: ; %entry 7824; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7825; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 7826; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 7827; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 7828; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 7829; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7830; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4 7831; GFX8-GISEL-NEXT: v_mad_u16 v2, v3, v2, 1 7832; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1 7833; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1 7834; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3 7835; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 7836; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4 7837; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 7838; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 7839; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v4, 1 7840; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 7841; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 7842; 7843; GFX9-SDAG-LABEL: clpeak_umad_pat_i16_x2: 7844; GFX9-SDAG: ; %bb.0: ; %entry 7845; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7846; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7847; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7848; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7849; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7850; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7851; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0 7852; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1 7853; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0 7854; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 7855; 7856; GFX9-GISEL-LABEL: clpeak_umad_pat_i16_x2: 7857; GFX9-GISEL: ; %bb.0: ; %entry 7858; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7859; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0 7860; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1 7861; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1 7862; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2 7863; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1 7864; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4 7865; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v3, v2, 1 7866; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1 7867; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v4, 1 7868; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3 7869; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v2, v1, 1 7870; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4 7871; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v3, 1 7872; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2 7873; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v4, 1 7874; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 7875; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 7876; 7877; GFX10-SDAG-LABEL: clpeak_umad_pat_i16_x2: 7878; GFX10-SDAG: ; %bb.0: ; %entry 7879; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7880; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7881; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7882; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7883; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7884; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7885; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7886; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7887; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7888; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 7889; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 7890; 7891; GFX10-GISEL-LABEL: clpeak_umad_pat_i16_x2: 7892; GFX10-GISEL: ; %bb.0: ; %entry 7893; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7894; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7895; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7896; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7897; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7898; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7899; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7900; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7901; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7902; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7903; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7904; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7905; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7906; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7907; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7908; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 7909; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7910; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 7911; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 7912; 7913; GFX11-SDAG-LABEL: clpeak_umad_pat_i16_x2: 7914; GFX11-SDAG: ; %bb.0: ; %entry 7915; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7916; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7917; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7918; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7919; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7920; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7921; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7922; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7923; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7924; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7925; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7926; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7927; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7928; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 7929; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 7930; 7931; GFX11-GISEL-LABEL: clpeak_umad_pat_i16_x2: 7932; GFX11-GISEL: ; %bb.0: ; %entry 7933; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7934; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7935; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7936; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7937; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7938; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7939; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7940; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7941; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7942; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7943; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7944; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7945; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7946; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7947; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7948; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7949; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7950; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 7951; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7952; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 7953; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 7954; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7955; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 7956; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 7957; 7958; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16_x2: 7959; GFX1200-SDAG: ; %bb.0: ; %entry 7960; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 7961; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 7962; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 7963; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 7964; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 7965; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7966; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7967; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7968; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7969; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7970; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7971; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7972; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7973; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 7974; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 7975; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 7976; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 7977; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 7978; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 7979; 7980; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16_x2: 7981; GFX1200-GISEL: ; %bb.0: ; %entry 7982; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 7983; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 7984; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 7985; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 7986; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 7987; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 7988; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 7989; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7990; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7991; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7992; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7993; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7994; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7995; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 7996; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 7997; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 7998; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 7999; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8000; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 8001; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 8002; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 8003; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 8004; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 8005; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 8006; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 8007; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 8008; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8009; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 8010entry: 8011 %conv69 = add i16 %x, 1 8012 %add = mul i16 %conv69, %y 8013 %conv470 = add i16 %y, 1 8014 %add8 = mul i16 %conv470, %add 8015 %conv1071 = add i16 %add, 1 8016 %add14 = mul i16 %conv1071, %add8 8017 %conv1672 = add i16 %add8, 1 8018 %add20 = mul i16 %conv1672, %add14 8019 %conv2273 = add i16 %add14, 1 8020 %add26 = mul i16 %conv2273, %add20 8021 %conv2874 = add i16 %add20, 1 8022 %add32 = mul i16 %conv2874, %add26 8023 %conv3475 = add i16 %add26, 1 8024 %add38 = mul i16 %conv3475, %add32 8025 %conv4076 = add i16 %add32, 1 8026 %add44 = mul i16 %add38, %conv4076 8027 ret i16 %add44 8028} 8029 8030define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { 8031; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16_x2: 8032; GFX67-SDAG: ; %bb.0: ; %entry 8033; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8034; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 8035; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 8036; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 8037; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8038; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 8039; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8040; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0 8041; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8042; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1 8043; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8044; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2 8045; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v4, v2, 1 8046; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8047; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v2 8048; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v5, v3, 1 8049; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8050; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v3 8051; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8052; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v0, v4, v2 8053; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8054; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v5, v3 8055; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8056; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v2, v0 8057; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1 8058; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v1 8059; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 8060; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v5, 1 8061; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8062; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 8063; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8064; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v2, v4, v0 8065; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8066; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v5, v1 8067; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8068; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2 8069; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v2, v4, 1 8070; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3 8071; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 8072; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v3, v5, 1 8073; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 8074; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6 8075; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 8076; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5 8077; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4 8078; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7 8079; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 8080; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8081; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8082; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8083; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8084; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0 8085; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1 8086; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 8087; 8088; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16_x2: 8089; GFX67-GISEL: ; %bb.0: ; %entry 8090; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8091; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 8092; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 8093; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 8094; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 8095; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8096; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8097; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8098; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8099; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8100; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8101; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 8102; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 8103; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8104; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8105; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8106; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8107; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 8108; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8109; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8110; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1 8111; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 8112; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1 8113; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 8114; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 8115; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8116; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8117; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8118; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8119; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8120; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8121; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3 8122; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2 8123; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8124; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8125; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 8126; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 8127; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 8128; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8129; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1 8130; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1 8131; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0 8132; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 8133; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 8134; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 8135; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8136; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8137; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8138; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8139; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8140; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8141; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1 8142; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0 8143; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8144; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8145; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8146; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8147; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1 8148; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 8149; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1 8150; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 8151; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8152; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 8153; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8154; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8155; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 8156; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 8157; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8158; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 8159; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8160; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8161; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8162; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 8163; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 8164; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8165; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 8166; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 8167; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 8168; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 8169; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 8170; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8171; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8172; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8173; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 8174; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8175; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 8176; 8177; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2: 8178; GFX8-SDAG: ; %bb.0: ; %entry 8179; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8180; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1 8181; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0 8182; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 8183; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 8184; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v3, v0 8185; GFX8-SDAG-NEXT: v_mad_u16 v5, v2, v1, v2 8186; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v4, v3 8187; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, 1 8188; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v5, v1 8189; GFX8-SDAG-NEXT: v_mad_u16 v1, v2, v1, 1 8190; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v0, v0 8191; GFX8-SDAG-NEXT: v_mad_u16 v3, v5, v1, v1 8192; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v2, v4 8193; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, 1 8194; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v3, v5 8195; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, 1 8196; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v2, v0 8197; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v0 8198; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v3, v1 8199; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v1 8200; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2 8201; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 8202; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0 8203; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1 8204; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 8205; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 8206; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 8207; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 8208; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 8209; 8210; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16_x2: 8211; GFX8-GISEL: ; %bb.0: ; %entry 8212; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8213; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1 8214; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0 8215; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 8216; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 8217; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2 8218; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0 8219; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v4, v1 8220; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 8221; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 8222; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 8223; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v1, v1 8224; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v0, v0 8225; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v2, v4 8226; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v3, v5 8227; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1 8228; GFX8-GISEL-NEXT: v_mad_u16 v0, v5, v0, 1 8229; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v1 8230; GFX8-GISEL-NEXT: v_mad_u16 v5, v3, v0, v0 8231; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2 8232; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 8233; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 8234; GFX8-GISEL-NEXT: v_mad_u16 v0, v3, v0, 1 8235; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 8236; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 8237; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v6, v1 8238; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0 8239; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v2 8240; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 8241; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 8242; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 8243; 8244; GFX9-LABEL: clpeak_imad_pat_v2i16_x2: 8245; GFX9: ; %bb.0: ; %entry 8246; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8247; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8248; GFX9-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8249; GFX9-NEXT: v_pk_mul_lo_u16 v2, v2, v1 8250; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8251; GFX9-NEXT: v_pk_mad_u16 v1, v2, v0, v0 8252; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v2 8253; GFX9-NEXT: v_pk_mad_u16 v0, v2, v0, 1 op_sel_hi:[1,1,0] 8254; GFX9-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8255; GFX9-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8256; GFX9-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8257; GFX9-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8258; GFX9-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8259; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8260; GFX9-NEXT: s_setpc_b64 s[30:31] 8261; 8262; GFX10-LABEL: clpeak_imad_pat_v2i16_x2: 8263; GFX10: ; %bb.0: ; %entry 8264; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8265; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8266; GFX10-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8267; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8268; GFX10-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8269; GFX10-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8270; GFX10-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8271; GFX10-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8272; GFX10-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8273; GFX10-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8274; GFX10-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8275; GFX10-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8276; GFX10-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8277; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8278; GFX10-NEXT: s_setpc_b64 s[30:31] 8279; 8280; GFX11-LABEL: clpeak_imad_pat_v2i16_x2: 8281; GFX11: ; %bb.0: ; %entry 8282; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8283; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8284; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8285; GFX11-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8286; GFX11-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8287; GFX11-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8288; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8289; GFX11-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8290; GFX11-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8291; GFX11-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8292; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8293; GFX11-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8294; GFX11-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8295; GFX11-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8296; GFX11-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8297; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8298; GFX11-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8299; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8300; GFX11-NEXT: s_setpc_b64 s[30:31] 8301; 8302; GFX1200-LABEL: clpeak_imad_pat_v2i16_x2: 8303; GFX1200: ; %bb.0: ; %entry 8304; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 8305; GFX1200-NEXT: s_wait_expcnt 0x0 8306; GFX1200-NEXT: s_wait_samplecnt 0x0 8307; GFX1200-NEXT: s_wait_bvhcnt 0x0 8308; GFX1200-NEXT: s_wait_kmcnt 0x0 8309; GFX1200-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8310; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8311; GFX1200-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8312; GFX1200-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8313; GFX1200-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8314; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8315; GFX1200-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8316; GFX1200-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8317; GFX1200-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8318; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8319; GFX1200-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8320; GFX1200-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8321; GFX1200-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8322; GFX1200-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8323; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8324; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8325; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8326; GFX1200-NEXT: s_setpc_b64 s[30:31] 8327entry: 8328 %y38 = add <2 x i16> %x, <i16 1, i16 1> 8329 %add = mul <2 x i16> %y38, %y 8330 %mul139 = add <2 x i16> %add, %y38 8331 %add2 = mul <2 x i16> %mul139, %y 8332 %add240 = add <2 x i16> %add, <i16 1, i16 1> 8333 %add4 = mul <2 x i16> %add2, %add240 8334 %mul541 = add <2 x i16> %add4, %add240 8335 %add6 = mul <2 x i16> %mul541, %add2 8336 %add642 = add <2 x i16> %add4, <i16 1, i16 1> 8337 %add8 = mul <2 x i16> %add6, %add642 8338 %mul943 = add <2 x i16> %add8, %add642 8339 %add10 = mul <2 x i16> %mul943, %add6 8340 %add1044 = add <2 x i16> %add8, <i16 1, i16 1> 8341 %add1246 = add <2 x i16> %add10, <i16 1, i16 1> 8342 %mul1345 = mul <2 x i16> %add10, %add1044 8343 %add14 = mul <2 x i16> %mul1345, %add1246 8344 ret <2 x i16> %add14 8345} 8346 8347define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { 8348; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16_x2: 8349; GFX67-SDAG: ; %bb.0: ; %entry 8350; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8351; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 8352; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 8353; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 8354; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8355; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 8356; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8357; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0 8358; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8359; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1 8360; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8361; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2 8362; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v4, v2, 1 8363; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8364; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v2 8365; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v5, v3, 1 8366; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8367; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v3 8368; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8369; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v0, v4, v2 8370; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8371; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v5, v3 8372; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8373; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v2, v0 8374; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1 8375; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v1 8376; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 8377; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v5, 1 8378; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8379; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1 8380; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8381; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v2, v4, v0 8382; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8383; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v5, v1 8384; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8385; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2 8386; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v2, v4, 1 8387; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3 8388; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 8389; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v3, v5, 1 8390; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 8391; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6 8392; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 8393; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5 8394; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4 8395; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7 8396; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2 8397; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 8398; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8399; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8400; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8401; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0 8402; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1 8403; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 8404; 8405; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16_x2: 8406; GFX67-GISEL: ; %bb.0: ; %entry 8407; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8408; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 8409; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 8410; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 8411; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 8412; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8413; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8414; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8415; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8416; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8417; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8418; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 8419; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 8420; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8421; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8422; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8423; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8424; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 8425; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8426; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8427; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1 8428; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 8429; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1 8430; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 8431; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 8432; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8433; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8434; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8435; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8436; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8437; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8438; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3 8439; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2 8440; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8441; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8442; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 8443; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 8444; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 8445; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8446; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1 8447; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1 8448; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0 8449; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 8450; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 8451; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 8452; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8453; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8454; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8455; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 8456; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8457; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8458; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1 8459; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0 8460; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8461; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8462; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8463; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8464; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1 8465; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 8466; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1 8467; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 8468; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8469; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 8470; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 8471; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 8472; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 8473; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 8474; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 8475; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 8476; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8477; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8478; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 8479; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 8480; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 8481; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 8482; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 8483; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 8484; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 8485; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 8486; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 8487; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 8488; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 8489; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 8490; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 8491; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 8492; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 8493; 8494; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2: 8495; GFX8-SDAG: ; %bb.0: ; %entry 8496; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8497; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1 8498; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0 8499; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 8500; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 8501; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v3, v0 8502; GFX8-SDAG-NEXT: v_mad_u16 v5, v2, v1, v2 8503; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v4, v3 8504; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, 1 8505; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v5, v1 8506; GFX8-SDAG-NEXT: v_mad_u16 v1, v2, v1, 1 8507; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v0, v0 8508; GFX8-SDAG-NEXT: v_mad_u16 v3, v5, v1, v1 8509; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v2, v4 8510; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, 1 8511; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v3, v5 8512; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, 1 8513; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v2, v0 8514; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v0 8515; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v3, v1 8516; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v1 8517; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2 8518; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3 8519; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0 8520; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1 8521; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 8522; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 8523; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 8524; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 8525; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 8526; 8527; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16_x2: 8528; GFX8-GISEL: ; %bb.0: ; %entry 8529; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8530; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1 8531; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0 8532; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 8533; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 8534; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2 8535; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0 8536; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v4, v1 8537; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 8538; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 8539; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1 8540; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v1, v1 8541; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v0, v0 8542; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v2, v4 8543; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v3, v5 8544; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1 8545; GFX8-GISEL-NEXT: v_mad_u16 v0, v5, v0, 1 8546; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v1 8547; GFX8-GISEL-NEXT: v_mad_u16 v5, v3, v0, v0 8548; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2 8549; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3 8550; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1 8551; GFX8-GISEL-NEXT: v_mad_u16 v0, v3, v0, 1 8552; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 8553; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 8554; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v6, v1 8555; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0 8556; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v2 8557; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 8558; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 8559; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 8560; 8561; GFX9-LABEL: clpeak_umad_pat_v2i16_x2: 8562; GFX9: ; %bb.0: ; %entry 8563; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8564; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8565; GFX9-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8566; GFX9-NEXT: v_pk_mul_lo_u16 v2, v2, v1 8567; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8568; GFX9-NEXT: v_pk_mad_u16 v1, v2, v0, v0 8569; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v2 8570; GFX9-NEXT: v_pk_mad_u16 v0, v2, v0, 1 op_sel_hi:[1,1,0] 8571; GFX9-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8572; GFX9-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8573; GFX9-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8574; GFX9-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8575; GFX9-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8576; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8577; GFX9-NEXT: s_setpc_b64 s[30:31] 8578; 8579; GFX10-LABEL: clpeak_umad_pat_v2i16_x2: 8580; GFX10: ; %bb.0: ; %entry 8581; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8582; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8583; GFX10-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8584; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8585; GFX10-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8586; GFX10-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8587; GFX10-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8588; GFX10-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8589; GFX10-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8590; GFX10-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8591; GFX10-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8592; GFX10-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8593; GFX10-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8594; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8595; GFX10-NEXT: s_setpc_b64 s[30:31] 8596; 8597; GFX11-LABEL: clpeak_umad_pat_v2i16_x2: 8598; GFX11: ; %bb.0: ; %entry 8599; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8600; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8601; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8602; GFX11-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8603; GFX11-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8604; GFX11-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8605; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8606; GFX11-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8607; GFX11-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8608; GFX11-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8609; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8610; GFX11-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8611; GFX11-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8612; GFX11-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8613; GFX11-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8614; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8615; GFX11-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8616; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8617; GFX11-NEXT: s_setpc_b64 s[30:31] 8618; 8619; GFX1200-LABEL: clpeak_umad_pat_v2i16_x2: 8620; GFX1200: ; %bb.0: ; %entry 8621; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 8622; GFX1200-NEXT: s_wait_expcnt 0x0 8623; GFX1200-NEXT: s_wait_samplecnt 0x0 8624; GFX1200-NEXT: s_wait_bvhcnt 0x0 8625; GFX1200-NEXT: s_wait_kmcnt 0x0 8626; GFX1200-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] 8627; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8628; GFX1200-NEXT: v_pk_mad_u16 v2, v0, v1, v0 8629; GFX1200-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] 8630; GFX1200-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8631; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8632; GFX1200-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8633; GFX1200-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8634; GFX1200-NEXT: v_pk_mul_lo_u16 v1, v2, v1 8635; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8636; GFX1200-NEXT: v_pk_mad_u16 v2, v1, v0, v0 8637; GFX1200-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] 8638; GFX1200-NEXT: v_pk_mul_lo_u16 v3, v2, v1 8639; GFX1200-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] 8640; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8641; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 8642; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 8643; GFX1200-NEXT: s_setpc_b64 s[30:31] 8644entry: 8645 %y38 = add <2 x i16> %x, <i16 1, i16 1> 8646 %add = mul <2 x i16> %y38, %y 8647 %mul139 = add <2 x i16> %add, %y38 8648 %add2 = mul <2 x i16> %mul139, %y 8649 %add240 = add <2 x i16> %add, <i16 1, i16 1> 8650 %add4 = mul <2 x i16> %add2, %add240 8651 %mul541 = add <2 x i16> %add4, %add240 8652 %add6 = mul <2 x i16> %mul541, %add2 8653 %add642 = add <2 x i16> %add4, <i16 1, i16 1> 8654 %add8 = mul <2 x i16> %add6, %add642 8655 %mul943 = add <2 x i16> %add8, %add642 8656 %add10 = mul <2 x i16> %mul943, %add6 8657 %add1044 = add <2 x i16> %add8, <i16 1, i16 1> 8658 %add1246 = add <2 x i16> %add10, <i16 1, i16 1> 8659 %mul1345 = mul <2 x i16> %add10, %add1044 8660 %add14 = mul <2 x i16> %mul1345, %add1246 8661 ret <2 x i16> %add14 8662} 8663 8664; Multiple uses of mul with different variable addend 8665define <2 x i32> @multi_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z0, i32 %z1) { 8666; GFX67-LABEL: multi_use_mul_mad_i32_var: 8667; GFX67: ; %bb.0: ; %entry 8668; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8669; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1 8670; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v2 8671; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v3 8672; GFX67-NEXT: s_setpc_b64 s[30:31] 8673; 8674; GFX8-LABEL: multi_use_mul_mad_i32_var: 8675; GFX8: ; %bb.0: ; %entry 8676; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8677; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1 8678; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v2 8679; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 8680; GFX8-NEXT: s_setpc_b64 s[30:31] 8681; 8682; GFX9-LABEL: multi_use_mul_mad_i32_var: 8683; GFX9: ; %bb.0: ; %entry 8684; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8685; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 8686; GFX9-NEXT: v_add_u32_e32 v0, v1, v2 8687; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 8688; GFX9-NEXT: s_setpc_b64 s[30:31] 8689; 8690; GFX10-LABEL: multi_use_mul_mad_i32_var: 8691; GFX10: ; %bb.0: ; %entry 8692; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8693; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 8694; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 8695; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 8696; GFX10-NEXT: s_setpc_b64 s[30:31] 8697; 8698; GFX11-LABEL: multi_use_mul_mad_i32_var: 8699; GFX11: ; %bb.0: ; %entry 8700; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8701; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1 8702; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8703; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2 8704; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v3 8705; GFX11-NEXT: s_setpc_b64 s[30:31] 8706; 8707; GFX1200-LABEL: multi_use_mul_mad_i32_var: 8708; GFX1200: ; %bb.0: ; %entry 8709; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 8710; GFX1200-NEXT: s_wait_expcnt 0x0 8711; GFX1200-NEXT: s_wait_samplecnt 0x0 8712; GFX1200-NEXT: s_wait_bvhcnt 0x0 8713; GFX1200-NEXT: s_wait_kmcnt 0x0 8714; GFX1200-NEXT: v_mul_lo_u32 v1, v0, v1 8715; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) 8716; GFX1200-NEXT: v_add_nc_u32_e32 v0, v1, v2 8717; GFX1200-NEXT: v_add_nc_u32_e32 v1, v1, v3 8718; GFX1200-NEXT: s_setpc_b64 s[30:31] 8719entry: 8720 %mul = mul i32 %x, %y 8721 %add0 = add i32 %mul, %z0 8722 %add1 = add i32 %mul, %z1 8723 %insert.0 = insertelement <2 x i32> poison, i32 %add0, i32 0 8724 %insert.1 = insertelement <2 x i32> %insert.0, i32 %add1, i32 1 8725 ret <2 x i32> %insert.1 8726} 8727 8728define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { 8729; GFX67-LABEL: multi_use_mul_mad_i16_var: 8730; GFX67: ; %bb.0: ; %entry 8731; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8732; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0 8733; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 8734; GFX67-NEXT: v_mad_u32_u24 v0, v4, v1, v2 8735; GFX67-NEXT: v_mad_u32_u24 v1, v4, v1, v3 8736; GFX67-NEXT: s_setpc_b64 s[30:31] 8737; 8738; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var: 8739; GFX8-SDAG: ; %bb.0: ; %entry 8740; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8741; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v1, v3 8742; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 8743; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v3 8744; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 8745; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 8746; 8747; GFX8-GISEL-LABEL: multi_use_mul_mad_i16_var: 8748; GFX8-GISEL: ; %bb.0: ; %entry 8749; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8750; GFX8-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 8751; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 8752; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 8753; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 8754; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 8755; 8756; GFX9-SDAG-LABEL: multi_use_mul_mad_i16_var: 8757; GFX9-SDAG: ; %bb.0: ; %entry 8758; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8759; GFX9-SDAG-NEXT: v_mad_legacy_u16 v2, v0, v1, v2 8760; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v3 8761; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 8762; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v2, s4 8763; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 8764; 8765; GFX9-GISEL-LABEL: multi_use_mul_mad_i16_var: 8766; GFX9-GISEL: ; %bb.0: ; %entry 8767; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8768; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v0, v1, v2 8769; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, v3 8770; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 8771; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 8772; 8773; GFX10-SDAG-LABEL: multi_use_mul_mad_i16_var: 8774; GFX10-SDAG: ; %bb.0: ; %entry 8775; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8776; GFX10-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 8777; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 8778; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 8779; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 8780; 8781; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var: 8782; GFX10-GISEL: ; %bb.0: ; %entry 8783; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8784; GFX10-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 8785; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 8786; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 8787; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 8788; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 8789; 8790; GFX11-SDAG-LABEL: multi_use_mul_mad_i16_var: 8791; GFX11-SDAG: ; %bb.0: ; %entry 8792; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8793; GFX11-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 8794; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 8795; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 8796; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 8797; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 8798; 8799; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var: 8800; GFX11-GISEL: ; %bb.0: ; %entry 8801; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8802; GFX11-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 8803; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 8804; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8805; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 8806; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 8807; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 8808; 8809; GFX1200-SDAG-LABEL: multi_use_mul_mad_i16_var: 8810; GFX1200-SDAG: ; %bb.0: ; %entry 8811; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 8812; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 8813; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 8814; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 8815; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 8816; GFX1200-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 8817; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 8818; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 8819; GFX1200-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 8820; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] 8821; 8822; GFX1200-GISEL-LABEL: multi_use_mul_mad_i16_var: 8823; GFX1200-GISEL: ; %bb.0: ; %entry 8824; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 8825; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 8826; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 8827; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 8828; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 8829; GFX1200-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 8830; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 8831; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 8832; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 8833; GFX1200-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 8834; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] 8835entry: 8836 %mul = mul i16 %x, %y 8837 %add0 = add i16 %mul, %z0 8838 %add1 = add i16 %mul, %z1 8839 %insert.0 = insertelement <2 x i16> poison, i16 %add0, i16 0 8840 %insert.1 = insertelement <2 x i16> %insert.0, i16 %add1, i16 1 8841 ret <2 x i16> %insert.1 8842} 8843 8844define i32 @other_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z, ptr addrspace(3) %ptr) { 8845; GFX67-LABEL: other_use_mul_mad_i32_var: 8846; GFX67: ; %bb.0: ; %entry 8847; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8848; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1 8849; GFX67-NEXT: s_mov_b32 m0, -1 8850; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v2 8851; GFX67-NEXT: ds_write_b32 v3, v1 8852; GFX67-NEXT: s_waitcnt lgkmcnt(0) 8853; GFX67-NEXT: s_setpc_b64 s[30:31] 8854; 8855; GFX8-LABEL: other_use_mul_mad_i32_var: 8856; GFX8: ; %bb.0: ; %entry 8857; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8858; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1 8859; GFX8-NEXT: s_mov_b32 m0, -1 8860; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v2 8861; GFX8-NEXT: ds_write_b32 v3, v1 8862; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8863; GFX8-NEXT: s_setpc_b64 s[30:31] 8864; 8865; GFX9-LABEL: other_use_mul_mad_i32_var: 8866; GFX9: ; %bb.0: ; %entry 8867; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8868; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 8869; GFX9-NEXT: v_add_u32_e32 v0, v1, v2 8870; GFX9-NEXT: ds_write_b32 v3, v1 8871; GFX9-NEXT: s_waitcnt lgkmcnt(0) 8872; GFX9-NEXT: s_setpc_b64 s[30:31] 8873; 8874; GFX10-LABEL: other_use_mul_mad_i32_var: 8875; GFX10: ; %bb.0: ; %entry 8876; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8877; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 8878; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 8879; GFX10-NEXT: ds_write_b32 v3, v1 8880; GFX10-NEXT: s_waitcnt lgkmcnt(0) 8881; GFX10-NEXT: s_setpc_b64 s[30:31] 8882; 8883; GFX11-LABEL: other_use_mul_mad_i32_var: 8884; GFX11: ; %bb.0: ; %entry 8885; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8886; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1 8887; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8888; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2 8889; GFX11-NEXT: ds_store_b32 v3, v1 8890; GFX11-NEXT: s_waitcnt lgkmcnt(0) 8891; GFX11-NEXT: s_setpc_b64 s[30:31] 8892; 8893; GFX1200-LABEL: other_use_mul_mad_i32_var: 8894; GFX1200: ; %bb.0: ; %entry 8895; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 8896; GFX1200-NEXT: s_wait_expcnt 0x0 8897; GFX1200-NEXT: s_wait_samplecnt 0x0 8898; GFX1200-NEXT: s_wait_bvhcnt 0x0 8899; GFX1200-NEXT: s_wait_kmcnt 0x0 8900; GFX1200-NEXT: v_mul_lo_u32 v1, v0, v1 8901; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) 8902; GFX1200-NEXT: v_add_nc_u32_e32 v0, v1, v2 8903; GFX1200-NEXT: ds_store_b32 v3, v1 8904; GFX1200-NEXT: s_wait_dscnt 0x0 8905; GFX1200-NEXT: s_setpc_b64 s[30:31] 8906entry: 8907 %mul = mul i32 %x, %y 8908 %add0 = add i32 %mul, %z 8909 store i32 %mul, ptr addrspace(3) %ptr 8910 ret i32 %add0 8911} 8912 8913define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) { 8914; GFX67-LABEL: other_use_mul_mad_i16_var: 8915; GFX67: ; %bb.0: ; %entry 8916; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8917; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 8918; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 8919; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 8920; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v2 8921; GFX67-NEXT: s_mov_b32 m0, -1 8922; GFX67-NEXT: ds_write_b16 v3, v4 8923; GFX67-NEXT: s_waitcnt lgkmcnt(0) 8924; GFX67-NEXT: s_setpc_b64 s[30:31] 8925; 8926; GFX8-LABEL: other_use_mul_mad_i16_var: 8927; GFX8: ; %bb.0: ; %entry 8928; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8929; GFX8-NEXT: v_mul_lo_u16_e32 v4, v0, v1 8930; GFX8-NEXT: v_mad_u16 v0, v0, v1, v2 8931; GFX8-NEXT: s_mov_b32 m0, -1 8932; GFX8-NEXT: ds_write_b16 v3, v4 8933; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8934; GFX8-NEXT: s_setpc_b64 s[30:31] 8935; 8936; GFX9-LABEL: other_use_mul_mad_i16_var: 8937; GFX9: ; %bb.0: ; %entry 8938; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8939; GFX9-NEXT: v_mul_lo_u16_e32 v4, v0, v1 8940; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v2 8941; GFX9-NEXT: ds_write_b16 v3, v4 8942; GFX9-NEXT: s_waitcnt lgkmcnt(0) 8943; GFX9-NEXT: s_setpc_b64 s[30:31] 8944; 8945; GFX10-LABEL: other_use_mul_mad_i16_var: 8946; GFX10: ; %bb.0: ; %entry 8947; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8948; GFX10-NEXT: v_mul_lo_u16 v4, v0, v1 8949; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 8950; GFX10-NEXT: ds_write_b16 v3, v4 8951; GFX10-NEXT: s_waitcnt lgkmcnt(0) 8952; GFX10-NEXT: s_setpc_b64 s[30:31] 8953; 8954; GFX11-LABEL: other_use_mul_mad_i16_var: 8955; GFX11: ; %bb.0: ; %entry 8956; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8957; GFX11-NEXT: v_mul_lo_u16 v4, v0, v1 8958; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 8959; GFX11-NEXT: ds_store_b16 v3, v4 8960; GFX11-NEXT: s_waitcnt lgkmcnt(0) 8961; GFX11-NEXT: s_setpc_b64 s[30:31] 8962; 8963; GFX1200-LABEL: other_use_mul_mad_i16_var: 8964; GFX1200: ; %bb.0: ; %entry 8965; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 8966; GFX1200-NEXT: s_wait_expcnt 0x0 8967; GFX1200-NEXT: s_wait_samplecnt 0x0 8968; GFX1200-NEXT: s_wait_bvhcnt 0x0 8969; GFX1200-NEXT: s_wait_kmcnt 0x0 8970; GFX1200-NEXT: v_mul_lo_u16 v4, v0, v1 8971; GFX1200-NEXT: v_mad_u16 v0, v0, v1, v2 8972; GFX1200-NEXT: ds_store_b16 v3, v4 8973; GFX1200-NEXT: s_wait_dscnt 0x0 8974; GFX1200-NEXT: s_setpc_b64 s[30:31] 8975entry: 8976 %mul = mul i16 %x, %y 8977 %add0 = add i16 %mul, %z 8978 store i16 %mul, ptr addrspace(3) %ptr 8979 ret i16 %add0 8980} 8981 8982define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z0, <2 x i16> %z1) { 8983; GFX67-SDAG-LABEL: multi_use_mul_mad_v2i16_var: 8984; GFX67-SDAG: ; %bb.0: ; %entry 8985; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8986; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0 8987; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8988; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 8989; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 8990; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v1, v3, v5 8991; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v8, v2, v4 8992; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v3, v7 8993; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v8, v2, v6 8994; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v5 8995; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 8996; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v3 8997; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 8998; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 8999; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v2, v1 9000; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v5 9001; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 9002; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 9003; 9004; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var: 9005; GFX67-GISEL: ; %bb.0: ; %entry 9006; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9007; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0 9008; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 9009; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1 9010; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 9011; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v2, v4 9012; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v9, v3, v5 9013; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v2, v6 9014; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v9, v3, v7 9015; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 9016; 9017; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var: 9018; GFX8-SDAG: ; %bb.0: ; %entry 9019; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9020; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1 9021; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0 9022; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2 9023; GFX8-SDAG-NEXT: v_mad_u16 v6, v5, v4, v6 9024; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v6 9025; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 9026; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6 9027; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3 9028; GFX8-SDAG-NEXT: v_mad_u16 v4, v5, v4, v6 9029; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 9030; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 9031; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v0, v4 9032; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, v2 9033; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 9034; 9035; GFX8-GISEL-LABEL: multi_use_mul_mad_v2i16_var: 9036; GFX8-GISEL: ; %bb.0: ; %entry 9037; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9038; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 9039; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1 9040; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2 9041; GFX8-GISEL-NEXT: v_mad_u16 v6, v4, v5, v6 9042; GFX8-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 9043; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 9044; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v2, v6 9045; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v3 9046; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 9047; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v5, v6 9048; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 9049; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v0, v1 9050; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v2 9051; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 9052; 9053; GFX9-LABEL: multi_use_mul_mad_v2i16_var: 9054; GFX9: ; %bb.0: ; %entry 9055; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9056; GFX9-NEXT: v_pk_mad_u16 v2, v0, v1, v2 9057; GFX9-NEXT: v_pk_mad_u16 v1, v0, v1, v3 9058; GFX9-NEXT: v_mov_b32_e32 v0, v2 9059; GFX9-NEXT: s_setpc_b64 s[30:31] 9060; 9061; GFX10-LABEL: multi_use_mul_mad_v2i16_var: 9062; GFX10: ; %bb.0: ; %entry 9063; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9064; GFX10-NEXT: v_pk_mad_u16 v2, v0, v1, v2 9065; GFX10-NEXT: v_pk_mad_u16 v1, v0, v1, v3 9066; GFX10-NEXT: v_mov_b32_e32 v0, v2 9067; GFX10-NEXT: s_setpc_b64 s[30:31] 9068; 9069; GFX11-LABEL: multi_use_mul_mad_v2i16_var: 9070; GFX11: ; %bb.0: ; %entry 9071; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9072; GFX11-NEXT: v_pk_mad_u16 v2, v0, v1, v2 9073; GFX11-NEXT: v_pk_mad_u16 v1, v0, v1, v3 9074; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 9075; GFX11-NEXT: v_mov_b32_e32 v0, v2 9076; GFX11-NEXT: s_setpc_b64 s[30:31] 9077; 9078; GFX1200-LABEL: multi_use_mul_mad_v2i16_var: 9079; GFX1200: ; %bb.0: ; %entry 9080; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 9081; GFX1200-NEXT: s_wait_expcnt 0x0 9082; GFX1200-NEXT: s_wait_samplecnt 0x0 9083; GFX1200-NEXT: s_wait_bvhcnt 0x0 9084; GFX1200-NEXT: s_wait_kmcnt 0x0 9085; GFX1200-NEXT: v_pk_mad_u16 v2, v0, v1, v2 9086; GFX1200-NEXT: v_pk_mad_u16 v1, v0, v1, v3 9087; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) 9088; GFX1200-NEXT: v_mov_b32_e32 v0, v2 9089; GFX1200-NEXT: s_setpc_b64 s[30:31] 9090entry: 9091 %mul = mul <2 x i16> %x, %y 9092 %add0 = add <2 x i16> %mul, %z0 9093 %add1 = add <2 x i16> %mul, %z1 9094 %shuffle = shufflevector <2 x i16> %add0, <2 x i16> %add1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9095 ret <4 x i16> %shuffle 9096} 9097 9098define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z, ptr addrspace(3) %ptr) { 9099; GFX67-SDAG-LABEL: other_use_mul_mad_v2i16_var: 9100; GFX67-SDAG: ; %bb.0: ; %entry 9101; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9102; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 9103; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 9104; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 9105; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 9106; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v0, v2 9107; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v1, v3 9108; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, v5 9109; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, v4 9110; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v8, 16, v8 9111; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7 9112; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1 9113; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 9114; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v7, v8 9115; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 9116; GFX67-SDAG-NEXT: s_mov_b32 m0, -1 9117; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 9118; GFX67-SDAG-NEXT: ds_write_b32 v6, v7 9119; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0) 9120; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] 9121; 9122; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var: 9123; GFX67-GISEL: ; %bb.0: ; %entry 9124; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9125; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 9126; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 9127; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 9128; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 9129; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v1, v3 9130; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v0, v2 9131; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 9132; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 9133; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 9134; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8 9135; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, v4 9136; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, v5 9137; GFX67-GISEL-NEXT: s_mov_b32 m0, -1 9138; GFX67-GISEL-NEXT: ds_write_b32 v6, v7 9139; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0) 9140; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] 9141; 9142; GFX8-SDAG-LABEL: other_use_mul_mad_v2i16_var: 9143; GFX8-SDAG: ; %bb.0: ; %entry 9144; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9145; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1 9146; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0 9147; GFX8-SDAG-NEXT: v_mul_lo_u16_sdwa v6, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 9148; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v0, v1 9149; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v7, v6 9150; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v2 9151; GFX8-SDAG-NEXT: v_mad_u16 v4, v5, v4, v7 9152; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 9153; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 9154; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 9155; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 9156; GFX8-SDAG-NEXT: ds_write_b32 v3, v6 9157; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) 9158; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 9159; 9160; GFX8-GISEL-LABEL: other_use_mul_mad_v2i16_var: 9161; GFX8-GISEL: ; %bb.0: ; %entry 9162; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9163; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 9164; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1 9165; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v0, v1 9166; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v7, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 9167; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v7 9168; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 9169; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v2 9170; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v5, v7 9171; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 9172; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 9173; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 9174; GFX8-GISEL-NEXT: ds_write_b32 v3, v6 9175; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 9176; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 9177; 9178; GFX9-LABEL: other_use_mul_mad_v2i16_var: 9179; GFX9: ; %bb.0: ; %entry 9180; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9181; GFX9-NEXT: v_pk_mul_lo_u16 v4, v0, v1 9182; GFX9-NEXT: v_pk_mad_u16 v0, v0, v1, v2 9183; GFX9-NEXT: ds_write_b32 v3, v4 9184; GFX9-NEXT: s_waitcnt lgkmcnt(0) 9185; GFX9-NEXT: s_setpc_b64 s[30:31] 9186; 9187; GFX10-LABEL: other_use_mul_mad_v2i16_var: 9188; GFX10: ; %bb.0: ; %entry 9189; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9190; GFX10-NEXT: v_pk_mul_lo_u16 v4, v0, v1 9191; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, v2 9192; GFX10-NEXT: ds_write_b32 v3, v4 9193; GFX10-NEXT: s_waitcnt lgkmcnt(0) 9194; GFX10-NEXT: s_setpc_b64 s[30:31] 9195; 9196; GFX11-LABEL: other_use_mul_mad_v2i16_var: 9197; GFX11: ; %bb.0: ; %entry 9198; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9199; GFX11-NEXT: v_pk_mul_lo_u16 v4, v0, v1 9200; GFX11-NEXT: v_pk_mad_u16 v0, v0, v1, v2 9201; GFX11-NEXT: ds_store_b32 v3, v4 9202; GFX11-NEXT: s_waitcnt lgkmcnt(0) 9203; GFX11-NEXT: s_setpc_b64 s[30:31] 9204; 9205; GFX1200-LABEL: other_use_mul_mad_v2i16_var: 9206; GFX1200: ; %bb.0: ; %entry 9207; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 9208; GFX1200-NEXT: s_wait_expcnt 0x0 9209; GFX1200-NEXT: s_wait_samplecnt 0x0 9210; GFX1200-NEXT: s_wait_bvhcnt 0x0 9211; GFX1200-NEXT: s_wait_kmcnt 0x0 9212; GFX1200-NEXT: v_pk_mul_lo_u16 v4, v0, v1 9213; GFX1200-NEXT: v_pk_mad_u16 v0, v0, v1, v2 9214; GFX1200-NEXT: ds_store_b32 v3, v4 9215; GFX1200-NEXT: s_wait_dscnt 0x0 9216; GFX1200-NEXT: s_setpc_b64 s[30:31] 9217entry: 9218 %mul = mul <2 x i16> %x, %y 9219 %add0 = add <2 x i16> %mul, %z 9220 store <2 x i16> %mul, ptr addrspace(3) %ptr 9221 ret <2 x i16> %add0 9222} 9223 9224define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) { 9225; GFX67-LABEL: mul_u24_add64: 9226; GFX67: ; %bb.0: 9227; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9228; GFX67-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 9229; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9230; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2 9231; GFX67-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc 9232; GFX67-NEXT: s_setpc_b64 s[30:31] 9233; 9234; GFX8-LABEL: mul_u24_add64: 9235; GFX8: ; %bb.0: 9236; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9237; GFX8-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 9238; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9239; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 9240; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc 9241; GFX8-NEXT: s_setpc_b64 s[30:31] 9242; 9243; GFX9-SDAG-LABEL: mul_u24_add64: 9244; GFX9-SDAG: ; %bb.0: 9245; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9246; GFX9-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] 9247; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 9248; 9249; GFX9-GISEL-LABEL: mul_u24_add64: 9250; GFX9-GISEL: ; %bb.0: 9251; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9252; GFX9-GISEL-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 9253; GFX9-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9254; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 9255; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v3, vcc 9256; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 9257; 9258; GFX10-SDAG-LABEL: mul_u24_add64: 9259; GFX10-SDAG: ; %bb.0: 9260; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9261; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] 9262; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 9263; 9264; GFX10-GISEL-LABEL: mul_u24_add64: 9265; GFX10-GISEL: ; %bb.0: 9266; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9267; GFX10-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1 9268; GFX10-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 9269; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 9270; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 9271; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 9272; 9273; GFX11-SDAG-LABEL: mul_u24_add64: 9274; GFX11-SDAG: ; %bb.0: 9275; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9276; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 9277; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 9278; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] 9279; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 9280; 9281; GFX11-GISEL-LABEL: mul_u24_add64: 9282; GFX11-GISEL: ; %bb.0: 9283; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9284; GFX11-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1 9285; GFX11-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 9286; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 9287; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 9288; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 9289; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 9290; 9291; GFX1200-LABEL: mul_u24_add64: 9292; GFX1200: ; %bb.0: 9293; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 9294; GFX1200-NEXT: s_wait_expcnt 0x0 9295; GFX1200-NEXT: s_wait_samplecnt 0x0 9296; GFX1200-NEXT: s_wait_bvhcnt 0x0 9297; GFX1200-NEXT: s_wait_kmcnt 0x0 9298; GFX1200-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] 9299; GFX1200-NEXT: s_setpc_b64 s[30:31] 9300 %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y) 9301 %add = add i64 %mul, %z 9302 ret i64 %add 9303} 9304 9305define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) { 9306; GFX67-LABEL: mul_u24_zext_add64: 9307; GFX67: ; %bb.0: 9308; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9309; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9310; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2 9311; GFX67-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc 9312; GFX67-NEXT: s_setpc_b64 s[30:31] 9313; 9314; GFX8-LABEL: mul_u24_zext_add64: 9315; GFX8: ; %bb.0: 9316; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9317; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9318; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 9319; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc 9320; GFX8-NEXT: s_setpc_b64 s[30:31] 9321; 9322; GFX9-LABEL: mul_u24_zext_add64: 9323; GFX9: ; %bb.0: 9324; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9325; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9326; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 9327; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 9328; GFX9-NEXT: s_setpc_b64 s[30:31] 9329; 9330; GFX10-LABEL: mul_u24_zext_add64: 9331; GFX10: ; %bb.0: 9332; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9333; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9334; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 9335; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo 9336; GFX10-NEXT: s_setpc_b64 s[30:31] 9337; 9338; GFX11-LABEL: mul_u24_zext_add64: 9339; GFX11: ; %bb.0: 9340; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9341; GFX11-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9342; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9343; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 9344; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo 9345; GFX11-NEXT: s_setpc_b64 s[30:31] 9346; 9347; GFX1200-LABEL: mul_u24_zext_add64: 9348; GFX1200: ; %bb.0: 9349; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 9350; GFX1200-NEXT: s_wait_expcnt 0x0 9351; GFX1200-NEXT: s_wait_samplecnt 0x0 9352; GFX1200-NEXT: s_wait_bvhcnt 0x0 9353; GFX1200-NEXT: s_wait_kmcnt 0x0 9354; GFX1200-NEXT: v_mul_u32_u24_e32 v0, v0, v1 9355; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) 9356; GFX1200-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 9357; GFX1200-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo 9358; GFX1200-NEXT: s_setpc_b64 s[30:31] 9359 %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y) 9360 %mul.zext = zext i32 %mul to i64 9361 %add = add i64 %mul.zext, %z 9362 ret i64 %add 9363} 9364 9365declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32) 9366declare i32 @llvm.amdgcn.mul.u24(i32, i32) 9367 9368;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 9369; GFX6: {{.*}} 9370; GFX7: {{.*}} 9371; GFX900: {{.*}} 9372; GFX90A: {{.*}} 9373