1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 9; GFX9-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_and_b32 s0, s4, 3 12; GFX9-NEXT: s_lshl_b32 s0, s0, 1 13; GFX9-NEXT: v_mov_b32_e32 v0, s0 14; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 15; GFX9-NEXT: s_waitcnt vmcnt(0) 16; GFX9-NEXT: v_readfirstlane_b32 s0, v0 17; GFX9-NEXT: ; return to shader part epilog 18; 19; GFX8-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 20; GFX8: ; %bb.0: 21; GFX8-NEXT: s_and_b32 s0, s4, 3 22; GFX8-NEXT: s_lshl_b32 s0, s0, 1 23; GFX8-NEXT: s_add_u32 s0, s2, s0 24; GFX8-NEXT: s_addc_u32 s1, s3, 0 25; GFX8-NEXT: v_mov_b32_e32 v0, s0 26; GFX8-NEXT: v_mov_b32_e32 v1, s1 27; GFX8-NEXT: flat_load_ushort v0, v[0:1] 28; GFX8-NEXT: s_waitcnt vmcnt(0) 29; GFX8-NEXT: v_readfirstlane_b32 s0, v0 30; GFX8-NEXT: ; return to shader part epilog 31; 32; GFX7-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 33; GFX7: ; %bb.0: 34; GFX7-NEXT: s_mov_b32 s0, s2 35; GFX7-NEXT: s_and_b32 s2, s4, 3 36; GFX7-NEXT: s_lshl_b32 s4, s2, 1 37; GFX7-NEXT: s_mov_b32 s5, 0 38; GFX7-NEXT: v_mov_b32_e32 v0, s4 39; GFX7-NEXT: s_mov_b32 s1, s3 40; GFX7-NEXT: s_mov_b32 s3, 0xf000 41; GFX7-NEXT: s_mov_b32 s2, s5 42; GFX7-NEXT: v_mov_b32_e32 v1, s5 43; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 44; GFX7-NEXT: s_waitcnt vmcnt(0) 45; GFX7-NEXT: v_readfirstlane_b32 s0, v0 46; GFX7-NEXT: ; return to shader part epilog 47; 48; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 49; GFX10: ; %bb.0: 50; GFX10-NEXT: s_and_b32 s0, s4, 3 51; GFX10-NEXT: s_lshl_b32 s0, s0, 1 52; GFX10-NEXT: v_mov_b32_e32 v0, s0 53; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 54; GFX10-NEXT: s_waitcnt vmcnt(0) 55; GFX10-NEXT: v_readfirstlane_b32 s0, v0 56; GFX10-NEXT: ; return to shader part epilog 57; 58; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 59; GFX11: ; %bb.0: 60; GFX11-NEXT: s_and_b32 s0, s4, 3 61; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 62; GFX11-NEXT: s_lshl_b32 s0, s0, 1 63; GFX11-NEXT: v_mov_b32_e32 v0, s0 64; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 65; GFX11-NEXT: s_waitcnt vmcnt(0) 66; GFX11-NEXT: v_readfirstlane_b32 s0, v0 67; GFX11-NEXT: ; return to shader part epilog 68 %vector = load <4 x i16>, ptr addrspace(4) %ptr 69 %element = extractelement <4 x i16> %vector, i32 %idx 70 ret i16 %element 71} 72 73define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 74; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 75; GFX9: ; %bb.0: 76; GFX9-NEXT: s_and_b32 s0, s2, 3 77; GFX9-NEXT: s_mov_b32 s1, 0 78; GFX9-NEXT: s_lshl_b32 s0, s0, 1 79; GFX9-NEXT: v_mov_b32_e32 v3, s1 80; GFX9-NEXT: v_mov_b32_e32 v2, s0 81; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 82; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 83; GFX9-NEXT: global_load_ushort v0, v[0:1], off 84; GFX9-NEXT: s_waitcnt vmcnt(0) 85; GFX9-NEXT: v_readfirstlane_b32 s0, v0 86; GFX9-NEXT: ; return to shader part epilog 87; 88; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 89; GFX8: ; %bb.0: 90; GFX8-NEXT: s_and_b32 s0, s2, 3 91; GFX8-NEXT: s_mov_b32 s1, 0 92; GFX8-NEXT: s_lshl_b32 s0, s0, 1 93; GFX8-NEXT: v_mov_b32_e32 v3, s1 94; GFX8-NEXT: v_mov_b32_e32 v2, s0 95; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 96; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 97; GFX8-NEXT: flat_load_ushort v0, v[0:1] 98; GFX8-NEXT: s_waitcnt vmcnt(0) 99; GFX8-NEXT: v_readfirstlane_b32 s0, v0 100; GFX8-NEXT: ; return to shader part epilog 101; 102; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 103; GFX7: ; %bb.0: 104; GFX7-NEXT: s_and_b32 s0, s2, 3 105; GFX7-NEXT: s_mov_b32 s1, 0 106; GFX7-NEXT: s_lshl_b32 s0, s0, 1 107; GFX7-NEXT: s_mov_b32 s3, 0xf000 108; GFX7-NEXT: s_mov_b32 s2, s1 109; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 110; GFX7-NEXT: s_waitcnt vmcnt(0) 111; GFX7-NEXT: v_readfirstlane_b32 s0, v0 112; GFX7-NEXT: ; return to shader part epilog 113; 114; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 115; GFX10: ; %bb.0: 116; GFX10-NEXT: s_and_b32 s0, s2, 3 117; GFX10-NEXT: s_mov_b32 s1, 0 118; GFX10-NEXT: s_lshl_b32 s0, s0, 1 119; GFX10-NEXT: v_mov_b32_e32 v3, s1 120; GFX10-NEXT: v_mov_b32_e32 v2, s0 121; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 122; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 123; GFX10-NEXT: global_load_ushort v0, v[0:1], off 124; GFX10-NEXT: s_waitcnt vmcnt(0) 125; GFX10-NEXT: v_readfirstlane_b32 s0, v0 126; GFX10-NEXT: ; return to shader part epilog 127; 128; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 129; GFX11: ; %bb.0: 130; GFX11-NEXT: s_and_b32 s0, s2, 3 131; GFX11-NEXT: s_mov_b32 s1, 0 132; GFX11-NEXT: s_lshl_b32 s0, s0, 1 133; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 134; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 135; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 136; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 137; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 138; GFX11-NEXT: global_load_u16 v0, v[0:1], off 139; GFX11-NEXT: s_waitcnt vmcnt(0) 140; GFX11-NEXT: v_readfirstlane_b32 s0, v0 141; GFX11-NEXT: ; return to shader part epilog 142 %vector = load <4 x i16>, ptr addrspace(1) %ptr 143 %element = extractelement <4 x i16> %vector, i32 %idx 144 ret i16 %element 145} 146 147define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 148; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 149; GFX9: ; %bb.0: 150; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 152; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2 153; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 154; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 155; GFX9-NEXT: global_load_ushort v0, v[0:1], off 156; GFX9-NEXT: s_waitcnt vmcnt(0) 157; GFX9-NEXT: s_setpc_b64 s[30:31] 158; 159; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 160; GFX8: ; %bb.0: 161; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 163; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2 164; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 165; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 166; GFX8-NEXT: flat_load_ushort v0, v[0:1] 167; GFX8-NEXT: s_waitcnt vmcnt(0) 168; GFX8-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 171; GFX7: ; %bb.0: 172; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 174; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2 175; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 176; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 177; GFX7-NEXT: s_mov_b32 s6, 0 178; GFX7-NEXT: s_mov_b32 s7, 0xf000 179; GFX7-NEXT: s_mov_b64 s[4:5], 0 180; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 181; GFX7-NEXT: s_waitcnt vmcnt(0) 182; GFX7-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX10-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 185; GFX10: ; %bb.0: 186; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 188; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2 189; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 190; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 191; GFX10-NEXT: global_load_ushort v0, v[0:1], off 192; GFX10-NEXT: s_waitcnt vmcnt(0) 193; GFX10-NEXT: s_setpc_b64 s[30:31] 194; 195; GFX11-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 196; GFX11: ; %bb.0: 197; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 199; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 200; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2 201; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 202; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 203; GFX11-NEXT: global_load_u16 v0, v[0:1], off 204; GFX11-NEXT: s_waitcnt vmcnt(0) 205; GFX11-NEXT: s_setpc_b64 s[30:31] 206 %vector = load <4 x i16>, ptr addrspace(1) %ptr 207 %element = extractelement <4 x i16> %vector, i32 %idx 208 ret i16 %element 209} 210 211define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 212; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 213; GFX9: ; %bb.0: 214; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 215; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 216; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 217; GFX9-NEXT: s_waitcnt vmcnt(0) 218; GFX9-NEXT: v_readfirstlane_b32 s0, v0 219; GFX9-NEXT: ; return to shader part epilog 220; 221; GFX8-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 222; GFX8: ; %bb.0: 223; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 224; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 225; GFX8-NEXT: v_mov_b32_e32 v0, s2 226; GFX8-NEXT: v_mov_b32_e32 v1, s3 227; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 228; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 229; GFX8-NEXT: flat_load_ushort v0, v[0:1] 230; GFX8-NEXT: s_waitcnt vmcnt(0) 231; GFX8-NEXT: v_readfirstlane_b32 s0, v0 232; GFX8-NEXT: ; return to shader part epilog 233; 234; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 235; GFX7: ; %bb.0: 236; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 237; GFX7-NEXT: s_mov_b32 s0, s2 238; GFX7-NEXT: s_mov_b32 s1, s3 239; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 240; GFX7-NEXT: v_mov_b32_e32 v1, 0 241; GFX7-NEXT: s_mov_b32 s2, 0 242; GFX7-NEXT: s_mov_b32 s3, 0xf000 243; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 244; GFX7-NEXT: s_waitcnt vmcnt(0) 245; GFX7-NEXT: v_readfirstlane_b32 s0, v0 246; GFX7-NEXT: ; return to shader part epilog 247; 248; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 249; GFX10: ; %bb.0: 250; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 251; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 252; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 253; GFX10-NEXT: s_waitcnt vmcnt(0) 254; GFX10-NEXT: v_readfirstlane_b32 s0, v0 255; GFX10-NEXT: ; return to shader part epilog 256; 257; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 258; GFX11: ; %bb.0: 259; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 260; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 261; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 262; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 263; GFX11-NEXT: s_waitcnt vmcnt(0) 264; GFX11-NEXT: v_readfirstlane_b32 s0, v0 265; GFX11-NEXT: ; return to shader part epilog 266 %vector = load <4 x i16>, ptr addrspace(4) %ptr 267 %element = extractelement <4 x i16> %vector, i32 %idx 268 ret i16 %element 269} 270 271define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(ptr addrspace(4) inreg %ptr) { 272; GFX9-LABEL: extractelement_sgpr_v4i16_idx0: 273; GFX9: ; %bb.0: 274; GFX9-NEXT: v_mov_b32_e32 v0, 0 275; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 276; GFX9-NEXT: s_waitcnt vmcnt(0) 277; GFX9-NEXT: v_readfirstlane_b32 s0, v0 278; GFX9-NEXT: ; return to shader part epilog 279; 280; GFX8-LABEL: extractelement_sgpr_v4i16_idx0: 281; GFX8: ; %bb.0: 282; GFX8-NEXT: v_mov_b32_e32 v0, s2 283; GFX8-NEXT: v_mov_b32_e32 v1, s3 284; GFX8-NEXT: flat_load_ushort v0, v[0:1] 285; GFX8-NEXT: s_waitcnt vmcnt(0) 286; GFX8-NEXT: v_readfirstlane_b32 s0, v0 287; GFX8-NEXT: ; return to shader part epilog 288; 289; GFX7-LABEL: extractelement_sgpr_v4i16_idx0: 290; GFX7: ; %bb.0: 291; GFX7-NEXT: s_mov_b32 s0, s2 292; GFX7-NEXT: s_mov_b32 s1, s3 293; GFX7-NEXT: s_mov_b32 s2, -1 294; GFX7-NEXT: s_mov_b32 s3, 0xf000 295; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 296; GFX7-NEXT: s_waitcnt vmcnt(0) 297; GFX7-NEXT: v_readfirstlane_b32 s0, v0 298; GFX7-NEXT: ; return to shader part epilog 299; 300; GFX10-LABEL: extractelement_sgpr_v4i16_idx0: 301; GFX10: ; %bb.0: 302; GFX10-NEXT: v_mov_b32_e32 v0, 0 303; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 304; GFX10-NEXT: s_waitcnt vmcnt(0) 305; GFX10-NEXT: v_readfirstlane_b32 s0, v0 306; GFX10-NEXT: ; return to shader part epilog 307; 308; GFX11-LABEL: extractelement_sgpr_v4i16_idx0: 309; GFX11: ; %bb.0: 310; GFX11-NEXT: v_mov_b32_e32 v0, 0 311; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 312; GFX11-NEXT: s_waitcnt vmcnt(0) 313; GFX11-NEXT: v_readfirstlane_b32 s0, v0 314; GFX11-NEXT: ; return to shader part epilog 315 %vector = load <4 x i16>, ptr addrspace(4) %ptr 316 %element = extractelement <4 x i16> %vector, i32 0 317 ret i16 %element 318} 319 320define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(ptr addrspace(4) inreg %ptr) { 321; GFX9-LABEL: extractelement_sgpr_v4i16_idx1: 322; GFX9: ; %bb.0: 323; GFX9-NEXT: v_mov_b32_e32 v0, 0 324; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:2 325; GFX9-NEXT: s_waitcnt vmcnt(0) 326; GFX9-NEXT: v_readfirstlane_b32 s0, v0 327; GFX9-NEXT: ; return to shader part epilog 328; 329; GFX8-LABEL: extractelement_sgpr_v4i16_idx1: 330; GFX8: ; %bb.0: 331; GFX8-NEXT: s_add_u32 s0, s2, 2 332; GFX8-NEXT: s_addc_u32 s1, s3, 0 333; GFX8-NEXT: v_mov_b32_e32 v0, s0 334; GFX8-NEXT: v_mov_b32_e32 v1, s1 335; GFX8-NEXT: flat_load_ushort v0, v[0:1] 336; GFX8-NEXT: s_waitcnt vmcnt(0) 337; GFX8-NEXT: v_readfirstlane_b32 s0, v0 338; GFX8-NEXT: ; return to shader part epilog 339; 340; GFX7-LABEL: extractelement_sgpr_v4i16_idx1: 341; GFX7: ; %bb.0: 342; GFX7-NEXT: s_mov_b32 s0, s2 343; GFX7-NEXT: s_mov_b32 s1, s3 344; GFX7-NEXT: s_mov_b32 s2, -1 345; GFX7-NEXT: s_mov_b32 s3, 0xf000 346; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2 347; GFX7-NEXT: s_waitcnt vmcnt(0) 348; GFX7-NEXT: v_readfirstlane_b32 s0, v0 349; GFX7-NEXT: ; return to shader part epilog 350; 351; GFX10-LABEL: extractelement_sgpr_v4i16_idx1: 352; GFX10: ; %bb.0: 353; GFX10-NEXT: v_mov_b32_e32 v0, 0 354; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:2 355; GFX10-NEXT: s_waitcnt vmcnt(0) 356; GFX10-NEXT: v_readfirstlane_b32 s0, v0 357; GFX10-NEXT: ; return to shader part epilog 358; 359; GFX11-LABEL: extractelement_sgpr_v4i16_idx1: 360; GFX11: ; %bb.0: 361; GFX11-NEXT: v_mov_b32_e32 v0, 0 362; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:2 363; GFX11-NEXT: s_waitcnt vmcnt(0) 364; GFX11-NEXT: v_readfirstlane_b32 s0, v0 365; GFX11-NEXT: ; return to shader part epilog 366 %vector = load <4 x i16>, ptr addrspace(4) %ptr 367 %element = extractelement <4 x i16> %vector, i32 1 368 ret i16 %element 369} 370 371define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(ptr addrspace(4) inreg %ptr) { 372; GFX9-LABEL: extractelement_sgpr_v4i16_idx2: 373; GFX9: ; %bb.0: 374; GFX9-NEXT: v_mov_b32_e32 v0, 0 375; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:4 376; GFX9-NEXT: s_waitcnt vmcnt(0) 377; GFX9-NEXT: v_readfirstlane_b32 s0, v0 378; GFX9-NEXT: ; return to shader part epilog 379; 380; GFX8-LABEL: extractelement_sgpr_v4i16_idx2: 381; GFX8: ; %bb.0: 382; GFX8-NEXT: s_add_u32 s0, s2, 4 383; GFX8-NEXT: s_addc_u32 s1, s3, 0 384; GFX8-NEXT: v_mov_b32_e32 v0, s0 385; GFX8-NEXT: v_mov_b32_e32 v1, s1 386; GFX8-NEXT: flat_load_ushort v0, v[0:1] 387; GFX8-NEXT: s_waitcnt vmcnt(0) 388; GFX8-NEXT: v_readfirstlane_b32 s0, v0 389; GFX8-NEXT: ; return to shader part epilog 390; 391; GFX7-LABEL: extractelement_sgpr_v4i16_idx2: 392; GFX7: ; %bb.0: 393; GFX7-NEXT: s_mov_b32 s0, s2 394; GFX7-NEXT: s_mov_b32 s1, s3 395; GFX7-NEXT: s_mov_b32 s2, -1 396; GFX7-NEXT: s_mov_b32 s3, 0xf000 397; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4 398; GFX7-NEXT: s_waitcnt vmcnt(0) 399; GFX7-NEXT: v_readfirstlane_b32 s0, v0 400; GFX7-NEXT: ; return to shader part epilog 401; 402; GFX10-LABEL: extractelement_sgpr_v4i16_idx2: 403; GFX10: ; %bb.0: 404; GFX10-NEXT: v_mov_b32_e32 v0, 0 405; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:4 406; GFX10-NEXT: s_waitcnt vmcnt(0) 407; GFX10-NEXT: v_readfirstlane_b32 s0, v0 408; GFX10-NEXT: ; return to shader part epilog 409; 410; GFX11-LABEL: extractelement_sgpr_v4i16_idx2: 411; GFX11: ; %bb.0: 412; GFX11-NEXT: v_mov_b32_e32 v0, 0 413; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:4 414; GFX11-NEXT: s_waitcnt vmcnt(0) 415; GFX11-NEXT: v_readfirstlane_b32 s0, v0 416; GFX11-NEXT: ; return to shader part epilog 417 %vector = load <4 x i16>, ptr addrspace(4) %ptr 418 %element = extractelement <4 x i16> %vector, i32 2 419 ret i16 %element 420} 421 422define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(ptr addrspace(4) inreg %ptr) { 423; GFX9-LABEL: extractelement_sgpr_v4i16_idx3: 424; GFX9: ; %bb.0: 425; GFX9-NEXT: v_mov_b32_e32 v0, 0 426; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:6 427; GFX9-NEXT: s_waitcnt vmcnt(0) 428; GFX9-NEXT: v_readfirstlane_b32 s0, v0 429; GFX9-NEXT: ; return to shader part epilog 430; 431; GFX8-LABEL: extractelement_sgpr_v4i16_idx3: 432; GFX8: ; %bb.0: 433; GFX8-NEXT: s_add_u32 s0, s2, 6 434; GFX8-NEXT: s_addc_u32 s1, s3, 0 435; GFX8-NEXT: v_mov_b32_e32 v0, s0 436; GFX8-NEXT: v_mov_b32_e32 v1, s1 437; GFX8-NEXT: flat_load_ushort v0, v[0:1] 438; GFX8-NEXT: s_waitcnt vmcnt(0) 439; GFX8-NEXT: v_readfirstlane_b32 s0, v0 440; GFX8-NEXT: ; return to shader part epilog 441; 442; GFX7-LABEL: extractelement_sgpr_v4i16_idx3: 443; GFX7: ; %bb.0: 444; GFX7-NEXT: s_mov_b32 s0, s2 445; GFX7-NEXT: s_mov_b32 s1, s3 446; GFX7-NEXT: s_mov_b32 s2, -1 447; GFX7-NEXT: s_mov_b32 s3, 0xf000 448; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:6 449; GFX7-NEXT: s_waitcnt vmcnt(0) 450; GFX7-NEXT: v_readfirstlane_b32 s0, v0 451; GFX7-NEXT: ; return to shader part epilog 452; 453; GFX10-LABEL: extractelement_sgpr_v4i16_idx3: 454; GFX10: ; %bb.0: 455; GFX10-NEXT: v_mov_b32_e32 v0, 0 456; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:6 457; GFX10-NEXT: s_waitcnt vmcnt(0) 458; GFX10-NEXT: v_readfirstlane_b32 s0, v0 459; GFX10-NEXT: ; return to shader part epilog 460; 461; GFX11-LABEL: extractelement_sgpr_v4i16_idx3: 462; GFX11: ; %bb.0: 463; GFX11-NEXT: v_mov_b32_e32 v0, 0 464; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:6 465; GFX11-NEXT: s_waitcnt vmcnt(0) 466; GFX11-NEXT: v_readfirstlane_b32 s0, v0 467; GFX11-NEXT: ; return to shader part epilog 468 %vector = load <4 x i16>, ptr addrspace(4) %ptr 469 %element = extractelement <4 x i16> %vector, i32 3 470 ret i16 %element 471} 472 473define i16 @extractelement_vgpr_v4i16_idx0(ptr addrspace(1) %ptr) { 474; GFX9-LABEL: extractelement_vgpr_v4i16_idx0: 475; GFX9: ; %bb.0: 476; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX9-NEXT: global_load_ushort v0, v[0:1], off 478; GFX9-NEXT: s_waitcnt vmcnt(0) 479; GFX9-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX8-LABEL: extractelement_vgpr_v4i16_idx0: 482; GFX8: ; %bb.0: 483; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX8-NEXT: flat_load_ushort v0, v[0:1] 485; GFX8-NEXT: s_waitcnt vmcnt(0) 486; GFX8-NEXT: s_setpc_b64 s[30:31] 487; 488; GFX7-LABEL: extractelement_vgpr_v4i16_idx0: 489; GFX7: ; %bb.0: 490; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX7-NEXT: s_mov_b32 s6, 0 492; GFX7-NEXT: s_mov_b32 s7, 0xf000 493; GFX7-NEXT: s_mov_b64 s[4:5], 0 494; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 495; GFX7-NEXT: s_waitcnt vmcnt(0) 496; GFX7-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX10-LABEL: extractelement_vgpr_v4i16_idx0: 499; GFX10: ; %bb.0: 500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX10-NEXT: global_load_ushort v0, v[0:1], off 502; GFX10-NEXT: s_waitcnt vmcnt(0) 503; GFX10-NEXT: s_setpc_b64 s[30:31] 504; 505; GFX11-LABEL: extractelement_vgpr_v4i16_idx0: 506; GFX11: ; %bb.0: 507; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX11-NEXT: global_load_u16 v0, v[0:1], off 509; GFX11-NEXT: s_waitcnt vmcnt(0) 510; GFX11-NEXT: s_setpc_b64 s[30:31] 511 %vector = load <4 x i16>, ptr addrspace(1) %ptr 512 %element = extractelement <4 x i16> %vector, i32 0 513 ret i16 %element 514} 515 516define i16 @extractelement_vgpr_v4i16_idx1(ptr addrspace(1) %ptr) { 517; GFX9-LABEL: extractelement_vgpr_v4i16_idx1: 518; GFX9: ; %bb.0: 519; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:2 521; GFX9-NEXT: s_waitcnt vmcnt(0) 522; GFX9-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX8-LABEL: extractelement_vgpr_v4i16_idx1: 525; GFX8: ; %bb.0: 526; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0 528; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 529; GFX8-NEXT: flat_load_ushort v0, v[0:1] 530; GFX8-NEXT: s_waitcnt vmcnt(0) 531; GFX8-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX7-LABEL: extractelement_vgpr_v4i16_idx1: 534; GFX7: ; %bb.0: 535; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX7-NEXT: s_mov_b32 s6, 0 537; GFX7-NEXT: s_mov_b32 s7, 0xf000 538; GFX7-NEXT: s_mov_b64 s[4:5], 0 539; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:2 540; GFX7-NEXT: s_waitcnt vmcnt(0) 541; GFX7-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX10-LABEL: extractelement_vgpr_v4i16_idx1: 544; GFX10: ; %bb.0: 545; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:2 547; GFX10-NEXT: s_waitcnt vmcnt(0) 548; GFX10-NEXT: s_setpc_b64 s[30:31] 549; 550; GFX11-LABEL: extractelement_vgpr_v4i16_idx1: 551; GFX11: ; %bb.0: 552; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 553; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:2 554; GFX11-NEXT: s_waitcnt vmcnt(0) 555; GFX11-NEXT: s_setpc_b64 s[30:31] 556 %vector = load <4 x i16>, ptr addrspace(1) %ptr 557 %element = extractelement <4 x i16> %vector, i32 1 558 ret i16 %element 559} 560 561define i16 @extractelement_vgpr_v4i16_idx2(ptr addrspace(1) %ptr) { 562; GFX9-LABEL: extractelement_vgpr_v4i16_idx2: 563; GFX9: ; %bb.0: 564; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:4 566; GFX9-NEXT: s_waitcnt vmcnt(0) 567; GFX9-NEXT: s_setpc_b64 s[30:31] 568; 569; GFX8-LABEL: extractelement_vgpr_v4i16_idx2: 570; GFX8: ; %bb.0: 571; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 572; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0 573; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 574; GFX8-NEXT: flat_load_ushort v0, v[0:1] 575; GFX8-NEXT: s_waitcnt vmcnt(0) 576; GFX8-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX7-LABEL: extractelement_vgpr_v4i16_idx2: 579; GFX7: ; %bb.0: 580; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX7-NEXT: s_mov_b32 s6, 0 582; GFX7-NEXT: s_mov_b32 s7, 0xf000 583; GFX7-NEXT: s_mov_b64 s[4:5], 0 584; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:4 585; GFX7-NEXT: s_waitcnt vmcnt(0) 586; GFX7-NEXT: s_setpc_b64 s[30:31] 587; 588; GFX10-LABEL: extractelement_vgpr_v4i16_idx2: 589; GFX10: ; %bb.0: 590; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:4 592; GFX10-NEXT: s_waitcnt vmcnt(0) 593; GFX10-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX11-LABEL: extractelement_vgpr_v4i16_idx2: 596; GFX11: ; %bb.0: 597; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:4 599; GFX11-NEXT: s_waitcnt vmcnt(0) 600; GFX11-NEXT: s_setpc_b64 s[30:31] 601 %vector = load <4 x i16>, ptr addrspace(1) %ptr 602 %element = extractelement <4 x i16> %vector, i32 2 603 ret i16 %element 604} 605 606define i16 @extractelement_vgpr_v4i16_idx3(ptr addrspace(1) %ptr) { 607; GFX9-LABEL: extractelement_vgpr_v4i16_idx3: 608; GFX9: ; %bb.0: 609; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:6 611; GFX9-NEXT: s_waitcnt vmcnt(0) 612; GFX9-NEXT: s_setpc_b64 s[30:31] 613; 614; GFX8-LABEL: extractelement_vgpr_v4i16_idx3: 615; GFX8: ; %bb.0: 616; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 617; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0 618; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 619; GFX8-NEXT: flat_load_ushort v0, v[0:1] 620; GFX8-NEXT: s_waitcnt vmcnt(0) 621; GFX8-NEXT: s_setpc_b64 s[30:31] 622; 623; GFX7-LABEL: extractelement_vgpr_v4i16_idx3: 624; GFX7: ; %bb.0: 625; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX7-NEXT: s_mov_b32 s6, 0 627; GFX7-NEXT: s_mov_b32 s7, 0xf000 628; GFX7-NEXT: s_mov_b64 s[4:5], 0 629; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:6 630; GFX7-NEXT: s_waitcnt vmcnt(0) 631; GFX7-NEXT: s_setpc_b64 s[30:31] 632; 633; GFX10-LABEL: extractelement_vgpr_v4i16_idx3: 634; GFX10: ; %bb.0: 635; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:6 637; GFX10-NEXT: s_waitcnt vmcnt(0) 638; GFX10-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX11-LABEL: extractelement_vgpr_v4i16_idx3: 641; GFX11: ; %bb.0: 642; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:6 644; GFX11-NEXT: s_waitcnt vmcnt(0) 645; GFX11-NEXT: s_setpc_b64 s[30:31] 646 %vector = load <4 x i16>, ptr addrspace(1) %ptr 647 %element = extractelement <4 x i16> %vector, i32 3 648 ret i16 %element 649} 650 651define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 652; GFX9-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 653; GFX9: ; %bb.0: 654; GFX9-NEXT: s_and_b32 s0, s4, 7 655; GFX9-NEXT: s_lshl_b32 s0, s0, 1 656; GFX9-NEXT: v_mov_b32_e32 v0, s0 657; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 658; GFX9-NEXT: s_waitcnt vmcnt(0) 659; GFX9-NEXT: v_readfirstlane_b32 s0, v0 660; GFX9-NEXT: ; return to shader part epilog 661; 662; GFX8-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 663; GFX8: ; %bb.0: 664; GFX8-NEXT: s_and_b32 s0, s4, 7 665; GFX8-NEXT: s_lshl_b32 s0, s0, 1 666; GFX8-NEXT: s_add_u32 s0, s2, s0 667; GFX8-NEXT: s_addc_u32 s1, s3, 0 668; GFX8-NEXT: v_mov_b32_e32 v0, s0 669; GFX8-NEXT: v_mov_b32_e32 v1, s1 670; GFX8-NEXT: flat_load_ushort v0, v[0:1] 671; GFX8-NEXT: s_waitcnt vmcnt(0) 672; GFX8-NEXT: v_readfirstlane_b32 s0, v0 673; GFX8-NEXT: ; return to shader part epilog 674; 675; GFX7-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 676; GFX7: ; %bb.0: 677; GFX7-NEXT: s_mov_b32 s0, s2 678; GFX7-NEXT: s_and_b32 s2, s4, 7 679; GFX7-NEXT: s_lshl_b32 s4, s2, 1 680; GFX7-NEXT: s_mov_b32 s5, 0 681; GFX7-NEXT: v_mov_b32_e32 v0, s4 682; GFX7-NEXT: s_mov_b32 s1, s3 683; GFX7-NEXT: s_mov_b32 s3, 0xf000 684; GFX7-NEXT: s_mov_b32 s2, s5 685; GFX7-NEXT: v_mov_b32_e32 v1, s5 686; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 687; GFX7-NEXT: s_waitcnt vmcnt(0) 688; GFX7-NEXT: v_readfirstlane_b32 s0, v0 689; GFX7-NEXT: ; return to shader part epilog 690; 691; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 692; GFX10: ; %bb.0: 693; GFX10-NEXT: s_and_b32 s0, s4, 7 694; GFX10-NEXT: s_lshl_b32 s0, s0, 1 695; GFX10-NEXT: v_mov_b32_e32 v0, s0 696; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 697; GFX10-NEXT: s_waitcnt vmcnt(0) 698; GFX10-NEXT: v_readfirstlane_b32 s0, v0 699; GFX10-NEXT: ; return to shader part epilog 700; 701; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 702; GFX11: ; %bb.0: 703; GFX11-NEXT: s_and_b32 s0, s4, 7 704; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 705; GFX11-NEXT: s_lshl_b32 s0, s0, 1 706; GFX11-NEXT: v_mov_b32_e32 v0, s0 707; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 708; GFX11-NEXT: s_waitcnt vmcnt(0) 709; GFX11-NEXT: v_readfirstlane_b32 s0, v0 710; GFX11-NEXT: ; return to shader part epilog 711 %vector = load <8 x i16>, ptr addrspace(4) %ptr 712 %element = extractelement <8 x i16> %vector, i32 %idx 713 ret i16 %element 714} 715 716define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 717; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 718; GFX9: ; %bb.0: 719; GFX9-NEXT: s_and_b32 s0, s2, 7 720; GFX9-NEXT: s_mov_b32 s1, 0 721; GFX9-NEXT: s_lshl_b32 s0, s0, 1 722; GFX9-NEXT: v_mov_b32_e32 v3, s1 723; GFX9-NEXT: v_mov_b32_e32 v2, s0 724; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 725; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 726; GFX9-NEXT: global_load_ushort v0, v[0:1], off 727; GFX9-NEXT: s_waitcnt vmcnt(0) 728; GFX9-NEXT: v_readfirstlane_b32 s0, v0 729; GFX9-NEXT: ; return to shader part epilog 730; 731; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 732; GFX8: ; %bb.0: 733; GFX8-NEXT: s_and_b32 s0, s2, 7 734; GFX8-NEXT: s_mov_b32 s1, 0 735; GFX8-NEXT: s_lshl_b32 s0, s0, 1 736; GFX8-NEXT: v_mov_b32_e32 v3, s1 737; GFX8-NEXT: v_mov_b32_e32 v2, s0 738; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 739; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 740; GFX8-NEXT: flat_load_ushort v0, v[0:1] 741; GFX8-NEXT: s_waitcnt vmcnt(0) 742; GFX8-NEXT: v_readfirstlane_b32 s0, v0 743; GFX8-NEXT: ; return to shader part epilog 744; 745; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 746; GFX7: ; %bb.0: 747; GFX7-NEXT: s_and_b32 s0, s2, 7 748; GFX7-NEXT: s_mov_b32 s1, 0 749; GFX7-NEXT: s_lshl_b32 s0, s0, 1 750; GFX7-NEXT: s_mov_b32 s3, 0xf000 751; GFX7-NEXT: s_mov_b32 s2, s1 752; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 753; GFX7-NEXT: s_waitcnt vmcnt(0) 754; GFX7-NEXT: v_readfirstlane_b32 s0, v0 755; GFX7-NEXT: ; return to shader part epilog 756; 757; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 758; GFX10: ; %bb.0: 759; GFX10-NEXT: s_and_b32 s0, s2, 7 760; GFX10-NEXT: s_mov_b32 s1, 0 761; GFX10-NEXT: s_lshl_b32 s0, s0, 1 762; GFX10-NEXT: v_mov_b32_e32 v3, s1 763; GFX10-NEXT: v_mov_b32_e32 v2, s0 764; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 765; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 766; GFX10-NEXT: global_load_ushort v0, v[0:1], off 767; GFX10-NEXT: s_waitcnt vmcnt(0) 768; GFX10-NEXT: v_readfirstlane_b32 s0, v0 769; GFX10-NEXT: ; return to shader part epilog 770; 771; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 772; GFX11: ; %bb.0: 773; GFX11-NEXT: s_and_b32 s0, s2, 7 774; GFX11-NEXT: s_mov_b32 s1, 0 775; GFX11-NEXT: s_lshl_b32 s0, s0, 1 776; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 777; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 778; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 779; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 780; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 781; GFX11-NEXT: global_load_u16 v0, v[0:1], off 782; GFX11-NEXT: s_waitcnt vmcnt(0) 783; GFX11-NEXT: v_readfirstlane_b32 s0, v0 784; GFX11-NEXT: ; return to shader part epilog 785 %vector = load <8 x i16>, ptr addrspace(1) %ptr 786 %element = extractelement <8 x i16> %vector, i32 %idx 787 ret i16 %element 788} 789 790define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 791; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 792; GFX9: ; %bb.0: 793; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 794; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 795; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2 796; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 797; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 798; GFX9-NEXT: global_load_ushort v0, v[0:1], off 799; GFX9-NEXT: s_waitcnt vmcnt(0) 800; GFX9-NEXT: s_setpc_b64 s[30:31] 801; 802; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 803; GFX8: ; %bb.0: 804; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 806; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2 807; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 808; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 809; GFX8-NEXT: flat_load_ushort v0, v[0:1] 810; GFX8-NEXT: s_waitcnt vmcnt(0) 811; GFX8-NEXT: s_setpc_b64 s[30:31] 812; 813; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 814; GFX7: ; %bb.0: 815; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; GFX7-NEXT: v_and_b32_e32 v2, 7, v2 817; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2 818; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 819; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 820; GFX7-NEXT: s_mov_b32 s6, 0 821; GFX7-NEXT: s_mov_b32 s7, 0xf000 822; GFX7-NEXT: s_mov_b64 s[4:5], 0 823; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 824; GFX7-NEXT: s_waitcnt vmcnt(0) 825; GFX7-NEXT: s_setpc_b64 s[30:31] 826; 827; GFX10-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 828; GFX10: ; %bb.0: 829; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 831; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2 832; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 833; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 834; GFX10-NEXT: global_load_ushort v0, v[0:1], off 835; GFX10-NEXT: s_waitcnt vmcnt(0) 836; GFX10-NEXT: s_setpc_b64 s[30:31] 837; 838; GFX11-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 839; GFX11: ; %bb.0: 840; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 841; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 842; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 843; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2 844; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 845; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 846; GFX11-NEXT: global_load_u16 v0, v[0:1], off 847; GFX11-NEXT: s_waitcnt vmcnt(0) 848; GFX11-NEXT: s_setpc_b64 s[30:31] 849 %vector = load <8 x i16>, ptr addrspace(1) %ptr 850 %element = extractelement <8 x i16> %vector, i32 %idx 851 ret i16 %element 852} 853 854define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 855; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 856; GFX9: ; %bb.0: 857; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 858; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 859; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 860; GFX9-NEXT: s_waitcnt vmcnt(0) 861; GFX9-NEXT: v_readfirstlane_b32 s0, v0 862; GFX9-NEXT: ; return to shader part epilog 863; 864; GFX8-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 865; GFX8: ; %bb.0: 866; GFX8-NEXT: v_and_b32_e32 v0, 7, v0 867; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 868; GFX8-NEXT: v_mov_b32_e32 v0, s2 869; GFX8-NEXT: v_mov_b32_e32 v1, s3 870; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 871; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 872; GFX8-NEXT: flat_load_ushort v0, v[0:1] 873; GFX8-NEXT: s_waitcnt vmcnt(0) 874; GFX8-NEXT: v_readfirstlane_b32 s0, v0 875; GFX8-NEXT: ; return to shader part epilog 876; 877; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 878; GFX7: ; %bb.0: 879; GFX7-NEXT: v_and_b32_e32 v0, 7, v0 880; GFX7-NEXT: s_mov_b32 s0, s2 881; GFX7-NEXT: s_mov_b32 s1, s3 882; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 883; GFX7-NEXT: v_mov_b32_e32 v1, 0 884; GFX7-NEXT: s_mov_b32 s2, 0 885; GFX7-NEXT: s_mov_b32 s3, 0xf000 886; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 887; GFX7-NEXT: s_waitcnt vmcnt(0) 888; GFX7-NEXT: v_readfirstlane_b32 s0, v0 889; GFX7-NEXT: ; return to shader part epilog 890; 891; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 892; GFX10: ; %bb.0: 893; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 894; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 895; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 896; GFX10-NEXT: s_waitcnt vmcnt(0) 897; GFX10-NEXT: v_readfirstlane_b32 s0, v0 898; GFX10-NEXT: ; return to shader part epilog 899; 900; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 901; GFX11: ; %bb.0: 902; GFX11-NEXT: v_and_b32_e32 v0, 7, v0 903; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 904; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 905; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 906; GFX11-NEXT: s_waitcnt vmcnt(0) 907; GFX11-NEXT: v_readfirstlane_b32 s0, v0 908; GFX11-NEXT: ; return to shader part epilog 909 %vector = load <8 x i16>, ptr addrspace(4) %ptr 910 %element = extractelement <8 x i16> %vector, i32 %idx 911 ret i16 %element 912} 913 914define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(ptr addrspace(4) inreg %ptr) { 915; GFX9-LABEL: extractelement_sgpr_v8i16_idx0: 916; GFX9: ; %bb.0: 917; GFX9-NEXT: v_mov_b32_e32 v0, 0 918; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] 919; GFX9-NEXT: s_waitcnt vmcnt(0) 920; GFX9-NEXT: v_readfirstlane_b32 s0, v0 921; GFX9-NEXT: ; return to shader part epilog 922; 923; GFX8-LABEL: extractelement_sgpr_v8i16_idx0: 924; GFX8: ; %bb.0: 925; GFX8-NEXT: v_mov_b32_e32 v0, s2 926; GFX8-NEXT: v_mov_b32_e32 v1, s3 927; GFX8-NEXT: flat_load_ushort v0, v[0:1] 928; GFX8-NEXT: s_waitcnt vmcnt(0) 929; GFX8-NEXT: v_readfirstlane_b32 s0, v0 930; GFX8-NEXT: ; return to shader part epilog 931; 932; GFX7-LABEL: extractelement_sgpr_v8i16_idx0: 933; GFX7: ; %bb.0: 934; GFX7-NEXT: s_mov_b32 s0, s2 935; GFX7-NEXT: s_mov_b32 s1, s3 936; GFX7-NEXT: s_mov_b32 s2, -1 937; GFX7-NEXT: s_mov_b32 s3, 0xf000 938; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 939; GFX7-NEXT: s_waitcnt vmcnt(0) 940; GFX7-NEXT: v_readfirstlane_b32 s0, v0 941; GFX7-NEXT: ; return to shader part epilog 942; 943; GFX10-LABEL: extractelement_sgpr_v8i16_idx0: 944; GFX10: ; %bb.0: 945; GFX10-NEXT: v_mov_b32_e32 v0, 0 946; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] 947; GFX10-NEXT: s_waitcnt vmcnt(0) 948; GFX10-NEXT: v_readfirstlane_b32 s0, v0 949; GFX10-NEXT: ; return to shader part epilog 950; 951; GFX11-LABEL: extractelement_sgpr_v8i16_idx0: 952; GFX11: ; %bb.0: 953; GFX11-NEXT: v_mov_b32_e32 v0, 0 954; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] 955; GFX11-NEXT: s_waitcnt vmcnt(0) 956; GFX11-NEXT: v_readfirstlane_b32 s0, v0 957; GFX11-NEXT: ; return to shader part epilog 958 %vector = load <8 x i16>, ptr addrspace(4) %ptr 959 %element = extractelement <8 x i16> %vector, i32 0 960 ret i16 %element 961} 962 963define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(ptr addrspace(4) inreg %ptr) { 964; GFX9-LABEL: extractelement_sgpr_v8i16_idx1: 965; GFX9: ; %bb.0: 966; GFX9-NEXT: v_mov_b32_e32 v0, 0 967; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:2 968; GFX9-NEXT: s_waitcnt vmcnt(0) 969; GFX9-NEXT: v_readfirstlane_b32 s0, v0 970; GFX9-NEXT: ; return to shader part epilog 971; 972; GFX8-LABEL: extractelement_sgpr_v8i16_idx1: 973; GFX8: ; %bb.0: 974; GFX8-NEXT: s_add_u32 s0, s2, 2 975; GFX8-NEXT: s_addc_u32 s1, s3, 0 976; GFX8-NEXT: v_mov_b32_e32 v0, s0 977; GFX8-NEXT: v_mov_b32_e32 v1, s1 978; GFX8-NEXT: flat_load_ushort v0, v[0:1] 979; GFX8-NEXT: s_waitcnt vmcnt(0) 980; GFX8-NEXT: v_readfirstlane_b32 s0, v0 981; GFX8-NEXT: ; return to shader part epilog 982; 983; GFX7-LABEL: extractelement_sgpr_v8i16_idx1: 984; GFX7: ; %bb.0: 985; GFX7-NEXT: s_mov_b32 s0, s2 986; GFX7-NEXT: s_mov_b32 s1, s3 987; GFX7-NEXT: s_mov_b32 s2, -1 988; GFX7-NEXT: s_mov_b32 s3, 0xf000 989; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2 990; GFX7-NEXT: s_waitcnt vmcnt(0) 991; GFX7-NEXT: v_readfirstlane_b32 s0, v0 992; GFX7-NEXT: ; return to shader part epilog 993; 994; GFX10-LABEL: extractelement_sgpr_v8i16_idx1: 995; GFX10: ; %bb.0: 996; GFX10-NEXT: v_mov_b32_e32 v0, 0 997; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:2 998; GFX10-NEXT: s_waitcnt vmcnt(0) 999; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1000; GFX10-NEXT: ; return to shader part epilog 1001; 1002; GFX11-LABEL: extractelement_sgpr_v8i16_idx1: 1003; GFX11: ; %bb.0: 1004; GFX11-NEXT: v_mov_b32_e32 v0, 0 1005; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:2 1006; GFX11-NEXT: s_waitcnt vmcnt(0) 1007; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1008; GFX11-NEXT: ; return to shader part epilog 1009 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1010 %element = extractelement <8 x i16> %vector, i32 1 1011 ret i16 %element 1012} 1013 1014define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(ptr addrspace(4) inreg %ptr) { 1015; GFX9-LABEL: extractelement_sgpr_v8i16_idx2: 1016; GFX9: ; %bb.0: 1017; GFX9-NEXT: v_mov_b32_e32 v0, 0 1018; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:4 1019; GFX9-NEXT: s_waitcnt vmcnt(0) 1020; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1021; GFX9-NEXT: ; return to shader part epilog 1022; 1023; GFX8-LABEL: extractelement_sgpr_v8i16_idx2: 1024; GFX8: ; %bb.0: 1025; GFX8-NEXT: s_add_u32 s0, s2, 4 1026; GFX8-NEXT: s_addc_u32 s1, s3, 0 1027; GFX8-NEXT: v_mov_b32_e32 v0, s0 1028; GFX8-NEXT: v_mov_b32_e32 v1, s1 1029; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1030; GFX8-NEXT: s_waitcnt vmcnt(0) 1031; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1032; GFX8-NEXT: ; return to shader part epilog 1033; 1034; GFX7-LABEL: extractelement_sgpr_v8i16_idx2: 1035; GFX7: ; %bb.0: 1036; GFX7-NEXT: s_mov_b32 s0, s2 1037; GFX7-NEXT: s_mov_b32 s1, s3 1038; GFX7-NEXT: s_mov_b32 s2, -1 1039; GFX7-NEXT: s_mov_b32 s3, 0xf000 1040; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4 1041; GFX7-NEXT: s_waitcnt vmcnt(0) 1042; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1043; GFX7-NEXT: ; return to shader part epilog 1044; 1045; GFX10-LABEL: extractelement_sgpr_v8i16_idx2: 1046; GFX10: ; %bb.0: 1047; GFX10-NEXT: v_mov_b32_e32 v0, 0 1048; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:4 1049; GFX10-NEXT: s_waitcnt vmcnt(0) 1050; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1051; GFX10-NEXT: ; return to shader part epilog 1052; 1053; GFX11-LABEL: extractelement_sgpr_v8i16_idx2: 1054; GFX11: ; %bb.0: 1055; GFX11-NEXT: v_mov_b32_e32 v0, 0 1056; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:4 1057; GFX11-NEXT: s_waitcnt vmcnt(0) 1058; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1059; GFX11-NEXT: ; return to shader part epilog 1060 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1061 %element = extractelement <8 x i16> %vector, i32 2 1062 ret i16 %element 1063} 1064 1065define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(ptr addrspace(4) inreg %ptr) { 1066; GFX9-LABEL: extractelement_sgpr_v8i16_idx3: 1067; GFX9: ; %bb.0: 1068; GFX9-NEXT: v_mov_b32_e32 v0, 0 1069; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:6 1070; GFX9-NEXT: s_waitcnt vmcnt(0) 1071; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1072; GFX9-NEXT: ; return to shader part epilog 1073; 1074; GFX8-LABEL: extractelement_sgpr_v8i16_idx3: 1075; GFX8: ; %bb.0: 1076; GFX8-NEXT: s_add_u32 s0, s2, 6 1077; GFX8-NEXT: s_addc_u32 s1, s3, 0 1078; GFX8-NEXT: v_mov_b32_e32 v0, s0 1079; GFX8-NEXT: v_mov_b32_e32 v1, s1 1080; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1081; GFX8-NEXT: s_waitcnt vmcnt(0) 1082; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1083; GFX8-NEXT: ; return to shader part epilog 1084; 1085; GFX7-LABEL: extractelement_sgpr_v8i16_idx3: 1086; GFX7: ; %bb.0: 1087; GFX7-NEXT: s_mov_b32 s0, s2 1088; GFX7-NEXT: s_mov_b32 s1, s3 1089; GFX7-NEXT: s_mov_b32 s2, -1 1090; GFX7-NEXT: s_mov_b32 s3, 0xf000 1091; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:6 1092; GFX7-NEXT: s_waitcnt vmcnt(0) 1093; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1094; GFX7-NEXT: ; return to shader part epilog 1095; 1096; GFX10-LABEL: extractelement_sgpr_v8i16_idx3: 1097; GFX10: ; %bb.0: 1098; GFX10-NEXT: v_mov_b32_e32 v0, 0 1099; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:6 1100; GFX10-NEXT: s_waitcnt vmcnt(0) 1101; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1102; GFX10-NEXT: ; return to shader part epilog 1103; 1104; GFX11-LABEL: extractelement_sgpr_v8i16_idx3: 1105; GFX11: ; %bb.0: 1106; GFX11-NEXT: v_mov_b32_e32 v0, 0 1107; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:6 1108; GFX11-NEXT: s_waitcnt vmcnt(0) 1109; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1110; GFX11-NEXT: ; return to shader part epilog 1111 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1112 %element = extractelement <8 x i16> %vector, i32 3 1113 ret i16 %element 1114} 1115 1116define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(ptr addrspace(4) inreg %ptr) { 1117; GFX9-LABEL: extractelement_sgpr_v8i16_idx4: 1118; GFX9: ; %bb.0: 1119; GFX9-NEXT: v_mov_b32_e32 v0, 0 1120; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:8 1121; GFX9-NEXT: s_waitcnt vmcnt(0) 1122; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1123; GFX9-NEXT: ; return to shader part epilog 1124; 1125; GFX8-LABEL: extractelement_sgpr_v8i16_idx4: 1126; GFX8: ; %bb.0: 1127; GFX8-NEXT: s_add_u32 s0, s2, 8 1128; GFX8-NEXT: s_addc_u32 s1, s3, 0 1129; GFX8-NEXT: v_mov_b32_e32 v0, s0 1130; GFX8-NEXT: v_mov_b32_e32 v1, s1 1131; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1132; GFX8-NEXT: s_waitcnt vmcnt(0) 1133; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1134; GFX8-NEXT: ; return to shader part epilog 1135; 1136; GFX7-LABEL: extractelement_sgpr_v8i16_idx4: 1137; GFX7: ; %bb.0: 1138; GFX7-NEXT: s_mov_b32 s0, s2 1139; GFX7-NEXT: s_mov_b32 s1, s3 1140; GFX7-NEXT: s_mov_b32 s2, -1 1141; GFX7-NEXT: s_mov_b32 s3, 0xf000 1142; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:8 1143; GFX7-NEXT: s_waitcnt vmcnt(0) 1144; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1145; GFX7-NEXT: ; return to shader part epilog 1146; 1147; GFX10-LABEL: extractelement_sgpr_v8i16_idx4: 1148; GFX10: ; %bb.0: 1149; GFX10-NEXT: v_mov_b32_e32 v0, 0 1150; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:8 1151; GFX10-NEXT: s_waitcnt vmcnt(0) 1152; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1153; GFX10-NEXT: ; return to shader part epilog 1154; 1155; GFX11-LABEL: extractelement_sgpr_v8i16_idx4: 1156; GFX11: ; %bb.0: 1157; GFX11-NEXT: v_mov_b32_e32 v0, 0 1158; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:8 1159; GFX11-NEXT: s_waitcnt vmcnt(0) 1160; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1161; GFX11-NEXT: ; return to shader part epilog 1162 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1163 %element = extractelement <8 x i16> %vector, i32 4 1164 ret i16 %element 1165} 1166 1167define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(ptr addrspace(4) inreg %ptr) { 1168; GFX9-LABEL: extractelement_sgpr_v8i16_idx5: 1169; GFX9: ; %bb.0: 1170; GFX9-NEXT: v_mov_b32_e32 v0, 0 1171; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:10 1172; GFX9-NEXT: s_waitcnt vmcnt(0) 1173; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1174; GFX9-NEXT: ; return to shader part epilog 1175; 1176; GFX8-LABEL: extractelement_sgpr_v8i16_idx5: 1177; GFX8: ; %bb.0: 1178; GFX8-NEXT: s_add_u32 s0, s2, 10 1179; GFX8-NEXT: s_addc_u32 s1, s3, 0 1180; GFX8-NEXT: v_mov_b32_e32 v0, s0 1181; GFX8-NEXT: v_mov_b32_e32 v1, s1 1182; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1183; GFX8-NEXT: s_waitcnt vmcnt(0) 1184; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1185; GFX8-NEXT: ; return to shader part epilog 1186; 1187; GFX7-LABEL: extractelement_sgpr_v8i16_idx5: 1188; GFX7: ; %bb.0: 1189; GFX7-NEXT: s_mov_b32 s0, s2 1190; GFX7-NEXT: s_mov_b32 s1, s3 1191; GFX7-NEXT: s_mov_b32 s2, -1 1192; GFX7-NEXT: s_mov_b32 s3, 0xf000 1193; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:10 1194; GFX7-NEXT: s_waitcnt vmcnt(0) 1195; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1196; GFX7-NEXT: ; return to shader part epilog 1197; 1198; GFX10-LABEL: extractelement_sgpr_v8i16_idx5: 1199; GFX10: ; %bb.0: 1200; GFX10-NEXT: v_mov_b32_e32 v0, 0 1201; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:10 1202; GFX10-NEXT: s_waitcnt vmcnt(0) 1203; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1204; GFX10-NEXT: ; return to shader part epilog 1205; 1206; GFX11-LABEL: extractelement_sgpr_v8i16_idx5: 1207; GFX11: ; %bb.0: 1208; GFX11-NEXT: v_mov_b32_e32 v0, 0 1209; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:10 1210; GFX11-NEXT: s_waitcnt vmcnt(0) 1211; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1212; GFX11-NEXT: ; return to shader part epilog 1213 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1214 %element = extractelement <8 x i16> %vector, i32 5 1215 ret i16 %element 1216} 1217 1218define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(ptr addrspace(4) inreg %ptr) { 1219; GFX9-LABEL: extractelement_sgpr_v8i16_idx6: 1220; GFX9: ; %bb.0: 1221; GFX9-NEXT: v_mov_b32_e32 v0, 0 1222; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:12 1223; GFX9-NEXT: s_waitcnt vmcnt(0) 1224; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1225; GFX9-NEXT: ; return to shader part epilog 1226; 1227; GFX8-LABEL: extractelement_sgpr_v8i16_idx6: 1228; GFX8: ; %bb.0: 1229; GFX8-NEXT: s_add_u32 s0, s2, 12 1230; GFX8-NEXT: s_addc_u32 s1, s3, 0 1231; GFX8-NEXT: v_mov_b32_e32 v0, s0 1232; GFX8-NEXT: v_mov_b32_e32 v1, s1 1233; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1234; GFX8-NEXT: s_waitcnt vmcnt(0) 1235; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1236; GFX8-NEXT: ; return to shader part epilog 1237; 1238; GFX7-LABEL: extractelement_sgpr_v8i16_idx6: 1239; GFX7: ; %bb.0: 1240; GFX7-NEXT: s_mov_b32 s0, s2 1241; GFX7-NEXT: s_mov_b32 s1, s3 1242; GFX7-NEXT: s_mov_b32 s2, -1 1243; GFX7-NEXT: s_mov_b32 s3, 0xf000 1244; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:12 1245; GFX7-NEXT: s_waitcnt vmcnt(0) 1246; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1247; GFX7-NEXT: ; return to shader part epilog 1248; 1249; GFX10-LABEL: extractelement_sgpr_v8i16_idx6: 1250; GFX10: ; %bb.0: 1251; GFX10-NEXT: v_mov_b32_e32 v0, 0 1252; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:12 1253; GFX10-NEXT: s_waitcnt vmcnt(0) 1254; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1255; GFX10-NEXT: ; return to shader part epilog 1256; 1257; GFX11-LABEL: extractelement_sgpr_v8i16_idx6: 1258; GFX11: ; %bb.0: 1259; GFX11-NEXT: v_mov_b32_e32 v0, 0 1260; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:12 1261; GFX11-NEXT: s_waitcnt vmcnt(0) 1262; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1263; GFX11-NEXT: ; return to shader part epilog 1264 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1265 %element = extractelement <8 x i16> %vector, i32 6 1266 ret i16 %element 1267} 1268 1269define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(ptr addrspace(4) inreg %ptr) { 1270; GFX9-LABEL: extractelement_sgpr_v8i16_idx7: 1271; GFX9: ; %bb.0: 1272; GFX9-NEXT: v_mov_b32_e32 v0, 0 1273; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] offset:14 1274; GFX9-NEXT: s_waitcnt vmcnt(0) 1275; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1276; GFX9-NEXT: ; return to shader part epilog 1277; 1278; GFX8-LABEL: extractelement_sgpr_v8i16_idx7: 1279; GFX8: ; %bb.0: 1280; GFX8-NEXT: s_add_u32 s0, s2, 14 1281; GFX8-NEXT: s_addc_u32 s1, s3, 0 1282; GFX8-NEXT: v_mov_b32_e32 v0, s0 1283; GFX8-NEXT: v_mov_b32_e32 v1, s1 1284; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1285; GFX8-NEXT: s_waitcnt vmcnt(0) 1286; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1287; GFX8-NEXT: ; return to shader part epilog 1288; 1289; GFX7-LABEL: extractelement_sgpr_v8i16_idx7: 1290; GFX7: ; %bb.0: 1291; GFX7-NEXT: s_mov_b32 s0, s2 1292; GFX7-NEXT: s_mov_b32 s1, s3 1293; GFX7-NEXT: s_mov_b32 s2, -1 1294; GFX7-NEXT: s_mov_b32 s3, 0xf000 1295; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:14 1296; GFX7-NEXT: s_waitcnt vmcnt(0) 1297; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1298; GFX7-NEXT: ; return to shader part epilog 1299; 1300; GFX10-LABEL: extractelement_sgpr_v8i16_idx7: 1301; GFX10: ; %bb.0: 1302; GFX10-NEXT: v_mov_b32_e32 v0, 0 1303; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] offset:14 1304; GFX10-NEXT: s_waitcnt vmcnt(0) 1305; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1306; GFX10-NEXT: ; return to shader part epilog 1307; 1308; GFX11-LABEL: extractelement_sgpr_v8i16_idx7: 1309; GFX11: ; %bb.0: 1310; GFX11-NEXT: v_mov_b32_e32 v0, 0 1311; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:14 1312; GFX11-NEXT: s_waitcnt vmcnt(0) 1313; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1314; GFX11-NEXT: ; return to shader part epilog 1315 %vector = load <8 x i16>, ptr addrspace(4) %ptr 1316 %element = extractelement <8 x i16> %vector, i32 7 1317 ret i16 %element 1318} 1319 1320define i16 @extractelement_vgpr_v8i16_idx0(ptr addrspace(1) %ptr) { 1321; GFX9-LABEL: extractelement_vgpr_v8i16_idx0: 1322; GFX9: ; %bb.0: 1323; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1324; GFX9-NEXT: global_load_ushort v0, v[0:1], off 1325; GFX9-NEXT: s_waitcnt vmcnt(0) 1326; GFX9-NEXT: s_setpc_b64 s[30:31] 1327; 1328; GFX8-LABEL: extractelement_vgpr_v8i16_idx0: 1329; GFX8: ; %bb.0: 1330; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1332; GFX8-NEXT: s_waitcnt vmcnt(0) 1333; GFX8-NEXT: s_setpc_b64 s[30:31] 1334; 1335; GFX7-LABEL: extractelement_vgpr_v8i16_idx0: 1336; GFX7: ; %bb.0: 1337; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; GFX7-NEXT: s_mov_b32 s6, 0 1339; GFX7-NEXT: s_mov_b32 s7, 0xf000 1340; GFX7-NEXT: s_mov_b64 s[4:5], 0 1341; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 1342; GFX7-NEXT: s_waitcnt vmcnt(0) 1343; GFX7-NEXT: s_setpc_b64 s[30:31] 1344; 1345; GFX10-LABEL: extractelement_vgpr_v8i16_idx0: 1346; GFX10: ; %bb.0: 1347; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1348; GFX10-NEXT: global_load_ushort v0, v[0:1], off 1349; GFX10-NEXT: s_waitcnt vmcnt(0) 1350; GFX10-NEXT: s_setpc_b64 s[30:31] 1351; 1352; GFX11-LABEL: extractelement_vgpr_v8i16_idx0: 1353; GFX11: ; %bb.0: 1354; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1355; GFX11-NEXT: global_load_u16 v0, v[0:1], off 1356; GFX11-NEXT: s_waitcnt vmcnt(0) 1357; GFX11-NEXT: s_setpc_b64 s[30:31] 1358 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1359 %element = extractelement <8 x i16> %vector, i32 0 1360 ret i16 %element 1361} 1362 1363define i16 @extractelement_vgpr_v8i16_idx1(ptr addrspace(1) %ptr) { 1364; GFX9-LABEL: extractelement_vgpr_v8i16_idx1: 1365; GFX9: ; %bb.0: 1366; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:2 1368; GFX9-NEXT: s_waitcnt vmcnt(0) 1369; GFX9-NEXT: s_setpc_b64 s[30:31] 1370; 1371; GFX8-LABEL: extractelement_vgpr_v8i16_idx1: 1372; GFX8: ; %bb.0: 1373; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1374; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0 1375; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1376; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1377; GFX8-NEXT: s_waitcnt vmcnt(0) 1378; GFX8-NEXT: s_setpc_b64 s[30:31] 1379; 1380; GFX7-LABEL: extractelement_vgpr_v8i16_idx1: 1381; GFX7: ; %bb.0: 1382; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1383; GFX7-NEXT: s_mov_b32 s6, 0 1384; GFX7-NEXT: s_mov_b32 s7, 0xf000 1385; GFX7-NEXT: s_mov_b64 s[4:5], 0 1386; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:2 1387; GFX7-NEXT: s_waitcnt vmcnt(0) 1388; GFX7-NEXT: s_setpc_b64 s[30:31] 1389; 1390; GFX10-LABEL: extractelement_vgpr_v8i16_idx1: 1391; GFX10: ; %bb.0: 1392; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1393; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:2 1394; GFX10-NEXT: s_waitcnt vmcnt(0) 1395; GFX10-NEXT: s_setpc_b64 s[30:31] 1396; 1397; GFX11-LABEL: extractelement_vgpr_v8i16_idx1: 1398; GFX11: ; %bb.0: 1399; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1400; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:2 1401; GFX11-NEXT: s_waitcnt vmcnt(0) 1402; GFX11-NEXT: s_setpc_b64 s[30:31] 1403 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1404 %element = extractelement <8 x i16> %vector, i32 1 1405 ret i16 %element 1406} 1407 1408define i16 @extractelement_vgpr_v8i16_idx2(ptr addrspace(1) %ptr) { 1409; GFX9-LABEL: extractelement_vgpr_v8i16_idx2: 1410; GFX9: ; %bb.0: 1411; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1412; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:4 1413; GFX9-NEXT: s_waitcnt vmcnt(0) 1414; GFX9-NEXT: s_setpc_b64 s[30:31] 1415; 1416; GFX8-LABEL: extractelement_vgpr_v8i16_idx2: 1417; GFX8: ; %bb.0: 1418; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1419; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0 1420; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1421; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1422; GFX8-NEXT: s_waitcnt vmcnt(0) 1423; GFX8-NEXT: s_setpc_b64 s[30:31] 1424; 1425; GFX7-LABEL: extractelement_vgpr_v8i16_idx2: 1426; GFX7: ; %bb.0: 1427; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1428; GFX7-NEXT: s_mov_b32 s6, 0 1429; GFX7-NEXT: s_mov_b32 s7, 0xf000 1430; GFX7-NEXT: s_mov_b64 s[4:5], 0 1431; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:4 1432; GFX7-NEXT: s_waitcnt vmcnt(0) 1433; GFX7-NEXT: s_setpc_b64 s[30:31] 1434; 1435; GFX10-LABEL: extractelement_vgpr_v8i16_idx2: 1436; GFX10: ; %bb.0: 1437; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1438; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:4 1439; GFX10-NEXT: s_waitcnt vmcnt(0) 1440; GFX10-NEXT: s_setpc_b64 s[30:31] 1441; 1442; GFX11-LABEL: extractelement_vgpr_v8i16_idx2: 1443; GFX11: ; %bb.0: 1444; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1445; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:4 1446; GFX11-NEXT: s_waitcnt vmcnt(0) 1447; GFX11-NEXT: s_setpc_b64 s[30:31] 1448 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1449 %element = extractelement <8 x i16> %vector, i32 2 1450 ret i16 %element 1451} 1452 1453define i16 @extractelement_vgpr_v8i16_idx3(ptr addrspace(1) %ptr) { 1454; GFX9-LABEL: extractelement_vgpr_v8i16_idx3: 1455; GFX9: ; %bb.0: 1456; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1457; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:6 1458; GFX9-NEXT: s_waitcnt vmcnt(0) 1459; GFX9-NEXT: s_setpc_b64 s[30:31] 1460; 1461; GFX8-LABEL: extractelement_vgpr_v8i16_idx3: 1462; GFX8: ; %bb.0: 1463; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1464; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0 1465; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1466; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1467; GFX8-NEXT: s_waitcnt vmcnt(0) 1468; GFX8-NEXT: s_setpc_b64 s[30:31] 1469; 1470; GFX7-LABEL: extractelement_vgpr_v8i16_idx3: 1471; GFX7: ; %bb.0: 1472; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1473; GFX7-NEXT: s_mov_b32 s6, 0 1474; GFX7-NEXT: s_mov_b32 s7, 0xf000 1475; GFX7-NEXT: s_mov_b64 s[4:5], 0 1476; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:6 1477; GFX7-NEXT: s_waitcnt vmcnt(0) 1478; GFX7-NEXT: s_setpc_b64 s[30:31] 1479; 1480; GFX10-LABEL: extractelement_vgpr_v8i16_idx3: 1481; GFX10: ; %bb.0: 1482; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:6 1484; GFX10-NEXT: s_waitcnt vmcnt(0) 1485; GFX10-NEXT: s_setpc_b64 s[30:31] 1486; 1487; GFX11-LABEL: extractelement_vgpr_v8i16_idx3: 1488; GFX11: ; %bb.0: 1489; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:6 1491; GFX11-NEXT: s_waitcnt vmcnt(0) 1492; GFX11-NEXT: s_setpc_b64 s[30:31] 1493 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1494 %element = extractelement <8 x i16> %vector, i32 3 1495 ret i16 %element 1496} 1497 1498define i16 @extractelement_vgpr_v8i16_idx4(ptr addrspace(1) %ptr) { 1499; GFX9-LABEL: extractelement_vgpr_v8i16_idx4: 1500; GFX9: ; %bb.0: 1501; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1502; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:8 1503; GFX9-NEXT: s_waitcnt vmcnt(0) 1504; GFX9-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX8-LABEL: extractelement_vgpr_v8i16_idx4: 1507; GFX8: ; %bb.0: 1508; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX8-NEXT: v_add_u32_e32 v0, vcc, 8, v0 1510; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1511; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1512; GFX8-NEXT: s_waitcnt vmcnt(0) 1513; GFX8-NEXT: s_setpc_b64 s[30:31] 1514; 1515; GFX7-LABEL: extractelement_vgpr_v8i16_idx4: 1516; GFX7: ; %bb.0: 1517; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1518; GFX7-NEXT: s_mov_b32 s6, 0 1519; GFX7-NEXT: s_mov_b32 s7, 0xf000 1520; GFX7-NEXT: s_mov_b64 s[4:5], 0 1521; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:8 1522; GFX7-NEXT: s_waitcnt vmcnt(0) 1523; GFX7-NEXT: s_setpc_b64 s[30:31] 1524; 1525; GFX10-LABEL: extractelement_vgpr_v8i16_idx4: 1526; GFX10: ; %bb.0: 1527; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1528; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:8 1529; GFX10-NEXT: s_waitcnt vmcnt(0) 1530; GFX10-NEXT: s_setpc_b64 s[30:31] 1531; 1532; GFX11-LABEL: extractelement_vgpr_v8i16_idx4: 1533; GFX11: ; %bb.0: 1534; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:8 1536; GFX11-NEXT: s_waitcnt vmcnt(0) 1537; GFX11-NEXT: s_setpc_b64 s[30:31] 1538 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1539 %element = extractelement <8 x i16> %vector, i32 4 1540 ret i16 %element 1541} 1542 1543define i16 @extractelement_vgpr_v8i16_idx5(ptr addrspace(1) %ptr) { 1544; GFX9-LABEL: extractelement_vgpr_v8i16_idx5: 1545; GFX9: ; %bb.0: 1546; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1547; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:10 1548; GFX9-NEXT: s_waitcnt vmcnt(0) 1549; GFX9-NEXT: s_setpc_b64 s[30:31] 1550; 1551; GFX8-LABEL: extractelement_vgpr_v8i16_idx5: 1552; GFX8: ; %bb.0: 1553; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1554; GFX8-NEXT: v_add_u32_e32 v0, vcc, 10, v0 1555; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1556; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1557; GFX8-NEXT: s_waitcnt vmcnt(0) 1558; GFX8-NEXT: s_setpc_b64 s[30:31] 1559; 1560; GFX7-LABEL: extractelement_vgpr_v8i16_idx5: 1561; GFX7: ; %bb.0: 1562; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1563; GFX7-NEXT: s_mov_b32 s6, 0 1564; GFX7-NEXT: s_mov_b32 s7, 0xf000 1565; GFX7-NEXT: s_mov_b64 s[4:5], 0 1566; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:10 1567; GFX7-NEXT: s_waitcnt vmcnt(0) 1568; GFX7-NEXT: s_setpc_b64 s[30:31] 1569; 1570; GFX10-LABEL: extractelement_vgpr_v8i16_idx5: 1571; GFX10: ; %bb.0: 1572; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1573; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:10 1574; GFX10-NEXT: s_waitcnt vmcnt(0) 1575; GFX10-NEXT: s_setpc_b64 s[30:31] 1576; 1577; GFX11-LABEL: extractelement_vgpr_v8i16_idx5: 1578; GFX11: ; %bb.0: 1579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1580; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:10 1581; GFX11-NEXT: s_waitcnt vmcnt(0) 1582; GFX11-NEXT: s_setpc_b64 s[30:31] 1583 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1584 %element = extractelement <8 x i16> %vector, i32 5 1585 ret i16 %element 1586} 1587 1588define i16 @extractelement_vgpr_v8i16_idx6(ptr addrspace(1) %ptr) { 1589; GFX9-LABEL: extractelement_vgpr_v8i16_idx6: 1590; GFX9: ; %bb.0: 1591; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1592; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:12 1593; GFX9-NEXT: s_waitcnt vmcnt(0) 1594; GFX9-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX8-LABEL: extractelement_vgpr_v8i16_idx6: 1597; GFX8: ; %bb.0: 1598; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX8-NEXT: v_add_u32_e32 v0, vcc, 12, v0 1600; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1601; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1602; GFX8-NEXT: s_waitcnt vmcnt(0) 1603; GFX8-NEXT: s_setpc_b64 s[30:31] 1604; 1605; GFX7-LABEL: extractelement_vgpr_v8i16_idx6: 1606; GFX7: ; %bb.0: 1607; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1608; GFX7-NEXT: s_mov_b32 s6, 0 1609; GFX7-NEXT: s_mov_b32 s7, 0xf000 1610; GFX7-NEXT: s_mov_b64 s[4:5], 0 1611; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:12 1612; GFX7-NEXT: s_waitcnt vmcnt(0) 1613; GFX7-NEXT: s_setpc_b64 s[30:31] 1614; 1615; GFX10-LABEL: extractelement_vgpr_v8i16_idx6: 1616; GFX10: ; %bb.0: 1617; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1618; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:12 1619; GFX10-NEXT: s_waitcnt vmcnt(0) 1620; GFX10-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX11-LABEL: extractelement_vgpr_v8i16_idx6: 1623; GFX11: ; %bb.0: 1624; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:12 1626; GFX11-NEXT: s_waitcnt vmcnt(0) 1627; GFX11-NEXT: s_setpc_b64 s[30:31] 1628 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1629 %element = extractelement <8 x i16> %vector, i32 6 1630 ret i16 %element 1631} 1632 1633define i16 @extractelement_vgpr_v8i16_idx7(ptr addrspace(1) %ptr) { 1634; GFX9-LABEL: extractelement_vgpr_v8i16_idx7: 1635; GFX9: ; %bb.0: 1636; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1637; GFX9-NEXT: global_load_ushort v0, v[0:1], off offset:14 1638; GFX9-NEXT: s_waitcnt vmcnt(0) 1639; GFX9-NEXT: s_setpc_b64 s[30:31] 1640; 1641; GFX8-LABEL: extractelement_vgpr_v8i16_idx7: 1642; GFX8: ; %bb.0: 1643; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX8-NEXT: v_add_u32_e32 v0, vcc, 14, v0 1645; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1646; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1647; GFX8-NEXT: s_waitcnt vmcnt(0) 1648; GFX8-NEXT: s_setpc_b64 s[30:31] 1649; 1650; GFX7-LABEL: extractelement_vgpr_v8i16_idx7: 1651; GFX7: ; %bb.0: 1652; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1653; GFX7-NEXT: s_mov_b32 s6, 0 1654; GFX7-NEXT: s_mov_b32 s7, 0xf000 1655; GFX7-NEXT: s_mov_b64 s[4:5], 0 1656; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 1657; GFX7-NEXT: s_waitcnt vmcnt(0) 1658; GFX7-NEXT: s_setpc_b64 s[30:31] 1659; 1660; GFX10-LABEL: extractelement_vgpr_v8i16_idx7: 1661; GFX10: ; %bb.0: 1662; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1663; GFX10-NEXT: global_load_ushort v0, v[0:1], off offset:14 1664; GFX10-NEXT: s_waitcnt vmcnt(0) 1665; GFX10-NEXT: s_setpc_b64 s[30:31] 1666; 1667; GFX11-LABEL: extractelement_vgpr_v8i16_idx7: 1668; GFX11: ; %bb.0: 1669; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1670; GFX11-NEXT: global_load_u16 v0, v[0:1], off offset:14 1671; GFX11-NEXT: s_waitcnt vmcnt(0) 1672; GFX11-NEXT: s_setpc_b64 s[30:31] 1673 %vector = load <8 x i16>, ptr addrspace(1) %ptr 1674 %element = extractelement <8 x i16> %vector, i32 7 1675 ret i16 %element 1676} 1677;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1678; GCN: {{.*}} 1679