1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s 7 8; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16. 9 10define amdgpu_kernel void @mad_u16( 11; GFX8-LABEL: mad_u16: 12; GFX8: ; %bb.0: ; %entry 13; GFX8-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 14; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0 15; GFX8-NEXT: s_waitcnt lgkmcnt(0) 16; GFX8-NEXT: v_mov_b32_e32 v1, s3 17; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v4 18; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 19; GFX8-NEXT: v_mov_b32_e32 v3, s5 20; GFX8-NEXT: v_add_u32_e32 v2, vcc, s4, v4 21; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 22; GFX8-NEXT: v_mov_b32_e32 v5, s7 23; GFX8-NEXT: v_add_u32_e32 v4, vcc, s6, v4 24; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 25; GFX8-NEXT: flat_load_ushort v6, v[0:1] glc 26; GFX8-NEXT: s_waitcnt vmcnt(0) 27; GFX8-NEXT: flat_load_ushort v2, v[2:3] glc 28; GFX8-NEXT: s_waitcnt vmcnt(0) 29; GFX8-NEXT: flat_load_ushort v3, v[4:5] glc 30; GFX8-NEXT: s_waitcnt vmcnt(0) 31; GFX8-NEXT: v_mov_b32_e32 v0, s0 32; GFX8-NEXT: v_mov_b32_e32 v1, s1 33; GFX8-NEXT: v_mad_u16 v2, v6, v2, v3 34; GFX8-NEXT: flat_store_short v[0:1], v2 35; GFX8-NEXT: s_endpgm 36; 37; GFX9-LABEL: mad_u16: 38; GFX9: ; %bb.0: ; %entry 39; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 40; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 41; GFX9-NEXT: s_waitcnt lgkmcnt(0) 42; GFX9-NEXT: global_load_ushort v1, v0, s[10:11] glc 43; GFX9-NEXT: s_waitcnt vmcnt(0) 44; GFX9-NEXT: global_load_ushort v2, v0, s[12:13] glc 45; GFX9-NEXT: s_waitcnt vmcnt(0) 46; GFX9-NEXT: global_load_ushort v3, v0, s[14:15] glc 47; GFX9-NEXT: s_waitcnt vmcnt(0) 48; GFX9-NEXT: v_mov_b32_e32 v0, 0 49; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v2, v3 50; GFX9-NEXT: global_store_short v0, v1, s[8:9] 51; GFX9-NEXT: s_endpgm 52; 53; GFX10-LABEL: mad_u16: 54; GFX10: ; %bb.0: ; %entry 55; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 56; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 57; GFX10-NEXT: s_waitcnt lgkmcnt(0) 58; GFX10-NEXT: global_load_ushort v1, v0, s[10:11] glc dlc 59; GFX10-NEXT: s_waitcnt vmcnt(0) 60; GFX10-NEXT: global_load_ushort v2, v0, s[12:13] glc dlc 61; GFX10-NEXT: s_waitcnt vmcnt(0) 62; GFX10-NEXT: global_load_ushort v3, v0, s[14:15] glc dlc 63; GFX10-NEXT: s_waitcnt vmcnt(0) 64; GFX10-NEXT: v_mov_b32_e32 v0, 0 65; GFX10-NEXT: v_mad_u16 v1, v1, v2, v3 66; GFX10-NEXT: global_store_short v0, v1, s[8:9] 67; GFX10-NEXT: s_endpgm 68; 69; GFX11-TRUE16-LABEL: mad_u16: 70; GFX11-TRUE16: ; %bb.0: ; %entry 71; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 72; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 73; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 74; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 75; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 76; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 77; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) 78; GFX11-TRUE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc 79; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) 80; GFX11-TRUE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc 81; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) 82; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l 83; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l 84; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l 85; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 86; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l 87; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 88; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] 89; GFX11-TRUE16-NEXT: s_endpgm 90; 91; GFX11-FAKE16-LABEL: mad_u16: 92; GFX11-FAKE16: ; %bb.0: ; %entry 93; GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 94; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 95; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 96; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 97; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 98; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 99; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) 100; GFX11-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc 101; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) 102; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc 103; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) 104; GFX11-FAKE16-NEXT: v_mad_u16 v0, v1, v2, v0 105; GFX11-FAKE16-NEXT: global_store_b16 v3, v0, s[0:1] 106; GFX11-FAKE16-NEXT: s_endpgm 107 ptr addrspace(1) %r, 108 ptr addrspace(1) %a, 109 ptr addrspace(1) %b, 110 ptr addrspace(1) %c) { 111entry: 112 %tid = call i32 @llvm.amdgcn.workitem.id.x() 113 %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a, i32 %tid 114 %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b, i32 %tid 115 %c.gep = getelementptr inbounds i16, ptr addrspace(1) %c, i32 %tid 116 117 %a.val = load volatile i16, ptr addrspace(1) %a.gep 118 %b.val = load volatile i16, ptr addrspace(1) %b.gep 119 %c.val = load volatile i16, ptr addrspace(1) %c.gep 120 121 %m.val = mul i16 %a.val, %b.val 122 %r.val = add i16 %m.val, %c.val 123 124 store i16 %r.val, ptr addrspace(1) %r 125 ret void 126} 127 128define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) { 129; GFX8-LABEL: v_mad_u16: 130; GFX8: ; %bb.0: 131; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX8-NEXT: v_mad_u16 v0, v0, v1, v2 133; GFX8-NEXT: s_setpc_b64 s[30:31] 134; 135; GFX9-LABEL: v_mad_u16: 136; GFX9: ; %bb.0: 137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v2 139; GFX9-NEXT: s_setpc_b64 s[30:31] 140; 141; GFX10-LABEL: v_mad_u16: 142; GFX10: ; %bb.0: 143; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 145; GFX10-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX11-TRUE16-LABEL: v_mad_u16: 148; GFX11-TRUE16: ; %bb.0: 149; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l 151; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l 152; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 153; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l 154; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX11-FAKE16-LABEL: v_mad_u16: 157; GFX11-FAKE16: ; %bb.0: 158; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 160; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] 161 %mul = mul i16 %arg0, %arg1 162 %add = add i16 %mul, %arg2 163 ret i16 %add 164} 165 166define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) { 167; GFX8-LABEL: v_mad_u16_zext: 168; GFX8: ; %bb.0: 169; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX8-NEXT: v_mad_u16 v0, v0, v1, v2 171; GFX8-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX9-LABEL: v_mad_u16_zext: 174; GFX9: ; %bb.0: 175; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v2 177; GFX9-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX10-LABEL: v_mad_u16_zext: 180; GFX10: ; %bb.0: 181; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 183; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 184; GFX10-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX11-TRUE16-LABEL: v_mad_u16_zext: 187; GFX11-TRUE16: ; %bb.0: 188; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l 190; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l 191; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 192; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l 193; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 194; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX11-FAKE16-LABEL: v_mad_u16_zext: 197; GFX11-FAKE16: ; %bb.0: 198; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 200; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 201; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 202; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] 203 %mul = mul i16 %arg0, %arg1 204 %add = add i16 %mul, %arg2 205 %zext = zext i16 %add to i32 206 ret i32 %zext 207} 208 209define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) { 210; GFX8-LABEL: v_mad_u16_zext64: 211; GFX8: ; %bb.0: 212; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GFX8-NEXT: v_mad_u16 v0, v0, v1, v2 214; GFX8-NEXT: v_mov_b32_e32 v1, 0 215; GFX8-NEXT: s_setpc_b64 s[30:31] 216; 217; GFX9-LABEL: v_mad_u16_zext64: 218; GFX9: ; %bb.0: 219; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v2 221; GFX9-NEXT: v_mov_b32_e32 v1, 0 222; GFX9-NEXT: s_setpc_b64 s[30:31] 223; 224; GFX10-LABEL: v_mad_u16_zext64: 225; GFX10: ; %bb.0: 226; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 227; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 228; GFX10-NEXT: v_mov_b32_e32 v1, 0 229; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 230; GFX10-NEXT: s_setpc_b64 s[30:31] 231; 232; GFX11-TRUE16-LABEL: v_mad_u16_zext64: 233; GFX11-TRUE16: ; %bb.0: 234; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l 236; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l 237; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 238; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l 239; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 240; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX11-FAKE16-LABEL: v_mad_u16_zext64: 243; GFX11-FAKE16: ; %bb.0: 244; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 246; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 247; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 248; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] 249 %mul = mul i16 %arg0, %arg1 250 %add = add i16 %mul, %arg2 251 %zext = zext i16 %add to i64 252 ret i64 %zext 253} 254 255define amdgpu_ps i16 @s_mad_u16(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) { 256; GFX8-LABEL: s_mad_u16: 257; GFX8: ; %bb.0: 258; GFX8-NEXT: s_mul_i32 s0, s0, s1 259; GFX8-NEXT: s_add_i32 s0, s0, s2 260; GFX8-NEXT: ; return to shader part epilog 261; 262; GFX9-LABEL: s_mad_u16: 263; GFX9: ; %bb.0: 264; GFX9-NEXT: s_mul_i32 s0, s0, s1 265; GFX9-NEXT: s_add_i32 s0, s0, s2 266; GFX9-NEXT: ; return to shader part epilog 267; 268; GFX10-LABEL: s_mad_u16: 269; GFX10: ; %bb.0: 270; GFX10-NEXT: s_mul_i32 s0, s0, s1 271; GFX10-NEXT: s_add_i32 s0, s0, s2 272; GFX10-NEXT: ; return to shader part epilog 273; 274; GFX11-LABEL: s_mad_u16: 275; GFX11: ; %bb.0: 276; GFX11-NEXT: s_mul_i32 s0, s0, s1 277; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 278; GFX11-NEXT: s_add_i32 s0, s0, s2 279; GFX11-NEXT: ; return to shader part epilog 280 %mul = mul i16 %arg0, %arg1 281 %add = add i16 %mul, %arg2 282 ret i16 %add 283} 284 285define amdgpu_ps i32 @s_mad_u16_zext(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) { 286; GFX8-LABEL: s_mad_u16_zext: 287; GFX8: ; %bb.0: 288; GFX8-NEXT: s_mul_i32 s0, s0, s1 289; GFX8-NEXT: s_add_i32 s0, s0, s2 290; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 291; GFX8-NEXT: ; return to shader part epilog 292; 293; GFX9-LABEL: s_mad_u16_zext: 294; GFX9: ; %bb.0: 295; GFX9-NEXT: s_mul_i32 s0, s0, s1 296; GFX9-NEXT: s_add_i32 s0, s0, s2 297; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 298; GFX9-NEXT: ; return to shader part epilog 299; 300; GFX10-LABEL: s_mad_u16_zext: 301; GFX10: ; %bb.0: 302; GFX10-NEXT: s_mul_i32 s0, s0, s1 303; GFX10-NEXT: s_add_i32 s0, s0, s2 304; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 305; GFX10-NEXT: ; return to shader part epilog 306; 307; GFX11-LABEL: s_mad_u16_zext: 308; GFX11: ; %bb.0: 309; GFX11-NEXT: s_mul_i32 s0, s0, s1 310; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 311; GFX11-NEXT: s_add_i32 s0, s0, s2 312; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 313; GFX11-NEXT: ; return to shader part epilog 314 %mul = mul i16 %arg0, %arg1 315 %add = add i16 %mul, %arg2 316 %zext = zext i16 %add to i32 317 ret i32 %zext 318} 319 320define amdgpu_ps i64 @s_mad_u16_zext64(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) { 321; GFX8-LABEL: s_mad_u16_zext64: 322; GFX8: ; %bb.0: 323; GFX8-NEXT: s_mul_i32 s0, s0, s1 324; GFX8-NEXT: s_add_i32 s0, s0, s2 325; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 326; GFX8-NEXT: s_mov_b32 s1, 0 327; GFX8-NEXT: ; return to shader part epilog 328; 329; GFX9-LABEL: s_mad_u16_zext64: 330; GFX9: ; %bb.0: 331; GFX9-NEXT: s_mul_i32 s0, s0, s1 332; GFX9-NEXT: s_add_i32 s0, s0, s2 333; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 334; GFX9-NEXT: s_mov_b32 s1, 0 335; GFX9-NEXT: ; return to shader part epilog 336; 337; GFX10-LABEL: s_mad_u16_zext64: 338; GFX10: ; %bb.0: 339; GFX10-NEXT: s_mul_i32 s0, s0, s1 340; GFX10-NEXT: s_mov_b32 s1, 0 341; GFX10-NEXT: s_add_i32 s0, s0, s2 342; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 343; GFX10-NEXT: ; return to shader part epilog 344; 345; GFX11-LABEL: s_mad_u16_zext64: 346; GFX11: ; %bb.0: 347; GFX11-NEXT: s_mul_i32 s0, s0, s1 348; GFX11-NEXT: s_mov_b32 s1, 0 349; GFX11-NEXT: s_add_i32 s0, s0, s2 350; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 351; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 352; GFX11-NEXT: ; return to shader part epilog 353 %mul = mul i16 %arg0, %arg1 354 %add = add i16 %mul, %arg2 355 %zext = zext i16 %add to i64 356 ret i64 %zext 357} 358 359define amdgpu_ps i32 @s_mad_u16_sext(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) { 360; GFX8-LABEL: s_mad_u16_sext: 361; GFX8: ; %bb.0: 362; GFX8-NEXT: s_mul_i32 s0, s0, s1 363; GFX8-NEXT: s_add_i32 s0, s0, s2 364; GFX8-NEXT: s_sext_i32_i16 s0, s0 365; GFX8-NEXT: ; return to shader part epilog 366; 367; GFX9-LABEL: s_mad_u16_sext: 368; GFX9: ; %bb.0: 369; GFX9-NEXT: s_mul_i32 s0, s0, s1 370; GFX9-NEXT: s_add_i32 s0, s0, s2 371; GFX9-NEXT: s_sext_i32_i16 s0, s0 372; GFX9-NEXT: ; return to shader part epilog 373; 374; GFX10-LABEL: s_mad_u16_sext: 375; GFX10: ; %bb.0: 376; GFX10-NEXT: s_mul_i32 s0, s0, s1 377; GFX10-NEXT: s_add_i32 s0, s0, s2 378; GFX10-NEXT: s_sext_i32_i16 s0, s0 379; GFX10-NEXT: ; return to shader part epilog 380; 381; GFX11-LABEL: s_mad_u16_sext: 382; GFX11: ; %bb.0: 383; GFX11-NEXT: s_mul_i32 s0, s0, s1 384; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 385; GFX11-NEXT: s_add_i32 s0, s0, s2 386; GFX11-NEXT: s_sext_i32_i16 s0, s0 387; GFX11-NEXT: ; return to shader part epilog 388 %mul = mul i16 %arg0, %arg1 389 %add = add i16 %mul, %arg2 390 %sext = sext i16 %add to i32 391 ret i32 %sext 392} 393 394declare i32 @llvm.amdgcn.workitem.id.x() 395;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 396; GCN: {{.*}} 397