1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 9; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_and_b32 s0, s4, 3 12; GFX9-NEXT: s_lshl_b32 s0, s0, 4 13; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 14; GFX9-NEXT: s_waitcnt lgkmcnt(0) 15; GFX9-NEXT: ; return to shader part epilog 16; 17; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_and_b32 s0, s4, 3 20; GFX8-NEXT: s_lshl_b32 s0, s0, 4 21; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 22; GFX8-NEXT: s_waitcnt lgkmcnt(0) 23; GFX8-NEXT: ; return to shader part epilog 24; 25; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx: 26; GFX7: ; %bb.0: 27; GFX7-NEXT: s_and_b32 s0, s4, 3 28; GFX7-NEXT: s_lshl_b32 s0, s0, 4 29; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 30; GFX7-NEXT: s_waitcnt lgkmcnt(0) 31; GFX7-NEXT: ; return to shader part epilog 32; 33; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx: 34; GFX10: ; %bb.0: 35; GFX10-NEXT: s_and_b32 s0, s4, 3 36; GFX10-NEXT: s_lshl_b32 s0, s0, 4 37; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 38; GFX10-NEXT: s_waitcnt lgkmcnt(0) 39; GFX10-NEXT: ; return to shader part epilog 40; 41; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx: 42; GFX11: ; %bb.0: 43; GFX11-NEXT: s_and_b32 s0, s4, 3 44; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 45; GFX11-NEXT: s_lshl_b32 s0, s0, 4 46; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0 47; GFX11-NEXT: s_waitcnt lgkmcnt(0) 48; GFX11-NEXT: ; return to shader part epilog 49 %vector = load <4 x i128>, ptr addrspace(4) %ptr 50 %element = extractelement <4 x i128> %vector, i32 %idx 51 ret i128 %element 52} 53 54define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 55; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx: 56; GFX9: ; %bb.0: 57; GFX9-NEXT: s_and_b32 s0, s2, 3 58; GFX9-NEXT: s_mov_b32 s1, 0 59; GFX9-NEXT: s_lshl_b32 s0, s0, 4 60; GFX9-NEXT: v_mov_b32_e32 v3, s1 61; GFX9-NEXT: v_mov_b32_e32 v2, s0 62; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 63; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 64; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 65; GFX9-NEXT: s_waitcnt vmcnt(0) 66; GFX9-NEXT: v_readfirstlane_b32 s0, v0 67; GFX9-NEXT: v_readfirstlane_b32 s1, v1 68; GFX9-NEXT: v_readfirstlane_b32 s2, v2 69; GFX9-NEXT: v_readfirstlane_b32 s3, v3 70; GFX9-NEXT: ; return to shader part epilog 71; 72; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx: 73; GFX8: ; %bb.0: 74; GFX8-NEXT: s_and_b32 s0, s2, 3 75; GFX8-NEXT: s_mov_b32 s1, 0 76; GFX8-NEXT: s_lshl_b32 s0, s0, 4 77; GFX8-NEXT: v_mov_b32_e32 v3, s1 78; GFX8-NEXT: v_mov_b32_e32 v2, s0 79; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 80; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 81; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 82; GFX8-NEXT: s_waitcnt vmcnt(0) 83; GFX8-NEXT: v_readfirstlane_b32 s0, v0 84; GFX8-NEXT: v_readfirstlane_b32 s1, v1 85; GFX8-NEXT: v_readfirstlane_b32 s2, v2 86; GFX8-NEXT: v_readfirstlane_b32 s3, v3 87; GFX8-NEXT: ; return to shader part epilog 88; 89; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx: 90; GFX7: ; %bb.0: 91; GFX7-NEXT: s_and_b32 s0, s2, 3 92; GFX7-NEXT: s_mov_b32 s1, 0 93; GFX7-NEXT: s_lshl_b32 s0, s0, 4 94; GFX7-NEXT: s_mov_b32 s3, 0xf000 95; GFX7-NEXT: s_mov_b32 s2, s1 96; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 97; GFX7-NEXT: s_waitcnt vmcnt(0) 98; GFX7-NEXT: v_readfirstlane_b32 s0, v0 99; GFX7-NEXT: v_readfirstlane_b32 s1, v1 100; GFX7-NEXT: v_readfirstlane_b32 s2, v2 101; GFX7-NEXT: v_readfirstlane_b32 s3, v3 102; GFX7-NEXT: ; return to shader part epilog 103; 104; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx: 105; GFX10: ; %bb.0: 106; GFX10-NEXT: s_and_b32 s0, s2, 3 107; GFX10-NEXT: s_mov_b32 s1, 0 108; GFX10-NEXT: s_lshl_b32 s0, s0, 4 109; GFX10-NEXT: v_mov_b32_e32 v3, s1 110; GFX10-NEXT: v_mov_b32_e32 v2, s0 111; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 112; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 113; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 114; GFX10-NEXT: s_waitcnt vmcnt(0) 115; GFX10-NEXT: v_readfirstlane_b32 s0, v0 116; GFX10-NEXT: v_readfirstlane_b32 s1, v1 117; GFX10-NEXT: v_readfirstlane_b32 s2, v2 118; GFX10-NEXT: v_readfirstlane_b32 s3, v3 119; GFX10-NEXT: ; return to shader part epilog 120; 121; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx: 122; GFX11: ; %bb.0: 123; GFX11-NEXT: s_and_b32 s0, s2, 3 124; GFX11-NEXT: s_mov_b32 s1, 0 125; GFX11-NEXT: s_lshl_b32 s0, s0, 4 126; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 127; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 128; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 129; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 130; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 131; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 132; GFX11-NEXT: s_waitcnt vmcnt(0) 133; GFX11-NEXT: v_readfirstlane_b32 s0, v0 134; GFX11-NEXT: v_readfirstlane_b32 s1, v1 135; GFX11-NEXT: v_readfirstlane_b32 s2, v2 136; GFX11-NEXT: v_readfirstlane_b32 s3, v3 137; GFX11-NEXT: ; return to shader part epilog 138 %vector = load <4 x i128>, ptr addrspace(1) %ptr 139 %element = extractelement <4 x i128> %vector, i32 %idx 140 ret i128 %element 141} 142 143define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 144; GFX9-LABEL: extractelement_vgpr_v4i128_vgpr_idx: 145; GFX9: ; %bb.0: 146; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 148; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 149; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 150; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 151; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 152; GFX9-NEXT: s_waitcnt vmcnt(0) 153; GFX9-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX8-LABEL: extractelement_vgpr_v4i128_vgpr_idx: 156; GFX8: ; %bb.0: 157; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 159; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2 160; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 161; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 162; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 163; GFX8-NEXT: s_waitcnt vmcnt(0) 164; GFX8-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX7-LABEL: extractelement_vgpr_v4i128_vgpr_idx: 167; GFX7: ; %bb.0: 168; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 170; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2 171; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 172; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 173; GFX7-NEXT: s_mov_b32 s6, 0 174; GFX7-NEXT: s_mov_b32 s7, 0xf000 175; GFX7-NEXT: s_mov_b64 s[4:5], 0 176; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 177; GFX7-NEXT: s_waitcnt vmcnt(0) 178; GFX7-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: extractelement_vgpr_v4i128_vgpr_idx: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 184; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2 185; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 186; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 187; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 188; GFX10-NEXT: s_waitcnt vmcnt(0) 189; GFX10-NEXT: s_setpc_b64 s[30:31] 190; 191; GFX11-LABEL: extractelement_vgpr_v4i128_vgpr_idx: 192; GFX11: ; %bb.0: 193; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 195; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 196; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2 197; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 198; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 199; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 200; GFX11-NEXT: s_waitcnt vmcnt(0) 201; GFX11-NEXT: s_setpc_b64 s[30:31] 202 %vector = load <4 x i128>, ptr addrspace(1) %ptr 203 %element = extractelement <4 x i128> %vector, i32 %idx 204 ret i128 %element 205} 206 207define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 208; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx: 209; GFX9: ; %bb.0: 210; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 211; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0 212; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] 213; GFX9-NEXT: s_waitcnt vmcnt(0) 214; GFX9-NEXT: v_readfirstlane_b32 s0, v0 215; GFX9-NEXT: v_readfirstlane_b32 s1, v1 216; GFX9-NEXT: v_readfirstlane_b32 s2, v2 217; GFX9-NEXT: v_readfirstlane_b32 s3, v3 218; GFX9-NEXT: ; return to shader part epilog 219; 220; GFX8-LABEL: extractelement_sgpr_v4i128_vgpr_idx: 221; GFX8: ; %bb.0: 222; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 223; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v0 224; GFX8-NEXT: v_mov_b32_e32 v0, s2 225; GFX8-NEXT: v_mov_b32_e32 v1, s3 226; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 227; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 228; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 229; GFX8-NEXT: s_waitcnt vmcnt(0) 230; GFX8-NEXT: v_readfirstlane_b32 s0, v0 231; GFX8-NEXT: v_readfirstlane_b32 s1, v1 232; GFX8-NEXT: v_readfirstlane_b32 s2, v2 233; GFX8-NEXT: v_readfirstlane_b32 s3, v3 234; GFX8-NEXT: ; return to shader part epilog 235; 236; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx: 237; GFX7: ; %bb.0: 238; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 239; GFX7-NEXT: s_mov_b32 s0, s2 240; GFX7-NEXT: s_mov_b32 s1, s3 241; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 242; GFX7-NEXT: v_mov_b32_e32 v1, 0 243; GFX7-NEXT: s_mov_b32 s2, 0 244; GFX7-NEXT: s_mov_b32 s3, 0xf000 245; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 246; GFX7-NEXT: s_waitcnt vmcnt(0) 247; GFX7-NEXT: v_readfirstlane_b32 s0, v0 248; GFX7-NEXT: v_readfirstlane_b32 s1, v1 249; GFX7-NEXT: v_readfirstlane_b32 s2, v2 250; GFX7-NEXT: v_readfirstlane_b32 s3, v3 251; GFX7-NEXT: ; return to shader part epilog 252; 253; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx: 254; GFX10: ; %bb.0: 255; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 256; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 257; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] 258; GFX10-NEXT: s_waitcnt vmcnt(0) 259; GFX10-NEXT: v_readfirstlane_b32 s0, v0 260; GFX10-NEXT: v_readfirstlane_b32 s1, v1 261; GFX10-NEXT: v_readfirstlane_b32 s2, v2 262; GFX10-NEXT: v_readfirstlane_b32 s3, v3 263; GFX10-NEXT: ; return to shader part epilog 264; 265; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx: 266; GFX11: ; %bb.0: 267; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 268; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 269; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 270; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] 271; GFX11-NEXT: s_waitcnt vmcnt(0) 272; GFX11-NEXT: v_readfirstlane_b32 s0, v0 273; GFX11-NEXT: v_readfirstlane_b32 s1, v1 274; GFX11-NEXT: v_readfirstlane_b32 s2, v2 275; GFX11-NEXT: v_readfirstlane_b32 s3, v3 276; GFX11-NEXT: ; return to shader part epilog 277 %vector = load <4 x i128>, ptr addrspace(4) %ptr 278 %element = extractelement <4 x i128> %vector, i32 %idx 279 ret i128 %element 280} 281 282define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %ptr) { 283; GCN-LABEL: extractelement_sgpr_v4i128_idx0: 284; GCN: ; %bb.0: 285; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 286; GCN-NEXT: s_waitcnt lgkmcnt(0) 287; GCN-NEXT: ; return to shader part epilog 288; 289; GFX10-LABEL: extractelement_sgpr_v4i128_idx0: 290; GFX10: ; %bb.0: 291; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 292; GFX10-NEXT: s_waitcnt lgkmcnt(0) 293; GFX10-NEXT: ; return to shader part epilog 294; 295; GFX11-LABEL: extractelement_sgpr_v4i128_idx0: 296; GFX11: ; %bb.0: 297; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0 298; GFX11-NEXT: s_waitcnt lgkmcnt(0) 299; GFX11-NEXT: ; return to shader part epilog 300 %vector = load <4 x i128>, ptr addrspace(4) %ptr 301 %element = extractelement <4 x i128> %vector, i32 0 302 ret i128 %element 303} 304 305define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %ptr) { 306; GFX9-LABEL: extractelement_sgpr_v4i128_idx1: 307; GFX9: ; %bb.0: 308; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 309; GFX9-NEXT: s_waitcnt lgkmcnt(0) 310; GFX9-NEXT: ; return to shader part epilog 311; 312; GFX8-LABEL: extractelement_sgpr_v4i128_idx1: 313; GFX8: ; %bb.0: 314; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 315; GFX8-NEXT: s_waitcnt lgkmcnt(0) 316; GFX8-NEXT: ; return to shader part epilog 317; 318; GFX7-LABEL: extractelement_sgpr_v4i128_idx1: 319; GFX7: ; %bb.0: 320; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x4 321; GFX7-NEXT: s_waitcnt lgkmcnt(0) 322; GFX7-NEXT: ; return to shader part epilog 323; 324; GFX10-LABEL: extractelement_sgpr_v4i128_idx1: 325; GFX10: ; %bb.0: 326; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 327; GFX10-NEXT: s_waitcnt lgkmcnt(0) 328; GFX10-NEXT: ; return to shader part epilog 329; 330; GFX11-LABEL: extractelement_sgpr_v4i128_idx1: 331; GFX11: ; %bb.0: 332; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x10 333; GFX11-NEXT: s_waitcnt lgkmcnt(0) 334; GFX11-NEXT: ; return to shader part epilog 335 %vector = load <4 x i128>, ptr addrspace(4) %ptr 336 %element = extractelement <4 x i128> %vector, i32 1 337 ret i128 %element 338} 339 340define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %ptr) { 341; GFX9-LABEL: extractelement_sgpr_v4i128_idx2: 342; GFX9: ; %bb.0: 343; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 344; GFX9-NEXT: s_waitcnt lgkmcnt(0) 345; GFX9-NEXT: ; return to shader part epilog 346; 347; GFX8-LABEL: extractelement_sgpr_v4i128_idx2: 348; GFX8: ; %bb.0: 349; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 350; GFX8-NEXT: s_waitcnt lgkmcnt(0) 351; GFX8-NEXT: ; return to shader part epilog 352; 353; GFX7-LABEL: extractelement_sgpr_v4i128_idx2: 354; GFX7: ; %bb.0: 355; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8 356; GFX7-NEXT: s_waitcnt lgkmcnt(0) 357; GFX7-NEXT: ; return to shader part epilog 358; 359; GFX10-LABEL: extractelement_sgpr_v4i128_idx2: 360; GFX10: ; %bb.0: 361; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 362; GFX10-NEXT: s_waitcnt lgkmcnt(0) 363; GFX10-NEXT: ; return to shader part epilog 364; 365; GFX11-LABEL: extractelement_sgpr_v4i128_idx2: 366; GFX11: ; %bb.0: 367; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x20 368; GFX11-NEXT: s_waitcnt lgkmcnt(0) 369; GFX11-NEXT: ; return to shader part epilog 370 %vector = load <4 x i128>, ptr addrspace(4) %ptr 371 %element = extractelement <4 x i128> %vector, i32 2 372 ret i128 %element 373} 374 375define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(ptr addrspace(4) inreg %ptr) { 376; GFX9-LABEL: extractelement_sgpr_v4i128_idx3: 377; GFX9: ; %bb.0: 378; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 379; GFX9-NEXT: s_waitcnt lgkmcnt(0) 380; GFX9-NEXT: ; return to shader part epilog 381; 382; GFX8-LABEL: extractelement_sgpr_v4i128_idx3: 383; GFX8: ; %bb.0: 384; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 385; GFX8-NEXT: s_waitcnt lgkmcnt(0) 386; GFX8-NEXT: ; return to shader part epilog 387; 388; GFX7-LABEL: extractelement_sgpr_v4i128_idx3: 389; GFX7: ; %bb.0: 390; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0xc 391; GFX7-NEXT: s_waitcnt lgkmcnt(0) 392; GFX7-NEXT: ; return to shader part epilog 393; 394; GFX10-LABEL: extractelement_sgpr_v4i128_idx3: 395; GFX10: ; %bb.0: 396; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 397; GFX10-NEXT: s_waitcnt lgkmcnt(0) 398; GFX10-NEXT: ; return to shader part epilog 399; 400; GFX11-LABEL: extractelement_sgpr_v4i128_idx3: 401; GFX11: ; %bb.0: 402; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x30 403; GFX11-NEXT: s_waitcnt lgkmcnt(0) 404; GFX11-NEXT: ; return to shader part epilog 405 %vector = load <4 x i128>, ptr addrspace(4) %ptr 406 %element = extractelement <4 x i128> %vector, i32 3 407 ret i128 %element 408} 409 410define i128 @extractelement_vgpr_v4i128_idx0(ptr addrspace(1) %ptr) { 411; GFX9-LABEL: extractelement_vgpr_v4i128_idx0: 412; GFX9: ; %bb.0: 413; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 415; GFX9-NEXT: s_waitcnt vmcnt(0) 416; GFX9-NEXT: s_setpc_b64 s[30:31] 417; 418; GFX8-LABEL: extractelement_vgpr_v4i128_idx0: 419; GFX8: ; %bb.0: 420; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 421; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 422; GFX8-NEXT: s_waitcnt vmcnt(0) 423; GFX8-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX7-LABEL: extractelement_vgpr_v4i128_idx0: 426; GFX7: ; %bb.0: 427; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX7-NEXT: s_mov_b32 s6, 0 429; GFX7-NEXT: s_mov_b32 s7, 0xf000 430; GFX7-NEXT: s_mov_b64 s[4:5], 0 431; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 432; GFX7-NEXT: s_waitcnt vmcnt(0) 433; GFX7-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX10-LABEL: extractelement_vgpr_v4i128_idx0: 436; GFX10: ; %bb.0: 437; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 439; GFX10-NEXT: s_waitcnt vmcnt(0) 440; GFX10-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX11-LABEL: extractelement_vgpr_v4i128_idx0: 443; GFX11: ; %bb.0: 444; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 446; GFX11-NEXT: s_waitcnt vmcnt(0) 447; GFX11-NEXT: s_setpc_b64 s[30:31] 448 %vector = load <4 x i128>, ptr addrspace(1) %ptr 449 %element = extractelement <4 x i128> %vector, i32 0 450 ret i128 %element 451} 452 453define i128 @extractelement_vgpr_v4i128_idx1(ptr addrspace(1) %ptr) { 454; GFX9-LABEL: extractelement_vgpr_v4i128_idx1: 455; GFX9: ; %bb.0: 456; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 457; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 458; GFX9-NEXT: s_waitcnt vmcnt(0) 459; GFX9-NEXT: s_setpc_b64 s[30:31] 460; 461; GFX8-LABEL: extractelement_vgpr_v4i128_idx1: 462; GFX8: ; %bb.0: 463; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 464; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 465; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 466; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 467; GFX8-NEXT: s_waitcnt vmcnt(0) 468; GFX8-NEXT: s_setpc_b64 s[30:31] 469; 470; GFX7-LABEL: extractelement_vgpr_v4i128_idx1: 471; GFX7: ; %bb.0: 472; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; GFX7-NEXT: s_mov_b32 s6, 0 474; GFX7-NEXT: s_mov_b32 s7, 0xf000 475; GFX7-NEXT: s_mov_b64 s[4:5], 0 476; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:16 477; GFX7-NEXT: s_waitcnt vmcnt(0) 478; GFX7-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX10-LABEL: extractelement_vgpr_v4i128_idx1: 481; GFX10: ; %bb.0: 482; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 484; GFX10-NEXT: s_waitcnt vmcnt(0) 485; GFX10-NEXT: s_setpc_b64 s[30:31] 486; 487; GFX11-LABEL: extractelement_vgpr_v4i128_idx1: 488; GFX11: ; %bb.0: 489; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 491; GFX11-NEXT: s_waitcnt vmcnt(0) 492; GFX11-NEXT: s_setpc_b64 s[30:31] 493 %vector = load <4 x i128>, ptr addrspace(1) %ptr 494 %element = extractelement <4 x i128> %vector, i32 1 495 ret i128 %element 496} 497 498define i128 @extractelement_vgpr_v4i128_idx2(ptr addrspace(1) %ptr) { 499; GFX9-LABEL: extractelement_vgpr_v4i128_idx2: 500; GFX9: ; %bb.0: 501; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 503; GFX9-NEXT: s_waitcnt vmcnt(0) 504; GFX9-NEXT: s_setpc_b64 s[30:31] 505; 506; GFX8-LABEL: extractelement_vgpr_v4i128_idx2: 507; GFX8: ; %bb.0: 508; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 509; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 510; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 511; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 512; GFX8-NEXT: s_waitcnt vmcnt(0) 513; GFX8-NEXT: s_setpc_b64 s[30:31] 514; 515; GFX7-LABEL: extractelement_vgpr_v4i128_idx2: 516; GFX7: ; %bb.0: 517; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX7-NEXT: s_mov_b32 s6, 0 519; GFX7-NEXT: s_mov_b32 s7, 0xf000 520; GFX7-NEXT: s_mov_b64 s[4:5], 0 521; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:32 522; GFX7-NEXT: s_waitcnt vmcnt(0) 523; GFX7-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX10-LABEL: extractelement_vgpr_v4i128_idx2: 526; GFX10: ; %bb.0: 527; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 529; GFX10-NEXT: s_waitcnt vmcnt(0) 530; GFX10-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX11-LABEL: extractelement_vgpr_v4i128_idx2: 533; GFX11: ; %bb.0: 534; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32 536; GFX11-NEXT: s_waitcnt vmcnt(0) 537; GFX11-NEXT: s_setpc_b64 s[30:31] 538 %vector = load <4 x i128>, ptr addrspace(1) %ptr 539 %element = extractelement <4 x i128> %vector, i32 2 540 ret i128 %element 541} 542 543define i128 @extractelement_vgpr_v4i128_idx3(ptr addrspace(1) %ptr) { 544; GFX9-LABEL: extractelement_vgpr_v4i128_idx3: 545; GFX9: ; %bb.0: 546; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 548; GFX9-NEXT: s_waitcnt vmcnt(0) 549; GFX9-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX8-LABEL: extractelement_vgpr_v4i128_idx3: 552; GFX8: ; %bb.0: 553; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 555; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 556; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 557; GFX8-NEXT: s_waitcnt vmcnt(0) 558; GFX8-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX7-LABEL: extractelement_vgpr_v4i128_idx3: 561; GFX7: ; %bb.0: 562; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX7-NEXT: s_mov_b32 s6, 0 564; GFX7-NEXT: s_mov_b32 s7, 0xf000 565; GFX7-NEXT: s_mov_b64 s[4:5], 0 566; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:48 567; GFX7-NEXT: s_waitcnt vmcnt(0) 568; GFX7-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX10-LABEL: extractelement_vgpr_v4i128_idx3: 571; GFX10: ; %bb.0: 572; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 574; GFX10-NEXT: s_waitcnt vmcnt(0) 575; GFX10-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX11-LABEL: extractelement_vgpr_v4i128_idx3: 578; GFX11: ; %bb.0: 579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48 581; GFX11-NEXT: s_waitcnt vmcnt(0) 582; GFX11-NEXT: s_setpc_b64 s[30:31] 583 %vector = load <4 x i128>, ptr addrspace(1) %ptr 584 %element = extractelement <4 x i128> %vector, i32 3 585 ret i128 %element 586} 587