1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 9; GFX9-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_and_b32 s0, s4, 3 12; GFX9-NEXT: s_ashr_i32 s1, s0, 31 13; GFX9-NEXT: s_add_u32 s0, s2, s0 14; GFX9-NEXT: s_addc_u32 s1, s3, s1 15; GFX9-NEXT: v_mov_b32_e32 v0, 0 16; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] 17; GFX9-NEXT: s_waitcnt vmcnt(0) 18; GFX9-NEXT: v_readfirstlane_b32 s0, v0 19; GFX9-NEXT: ; return to shader part epilog 20; 21; GFX8-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 22; GFX8: ; %bb.0: 23; GFX8-NEXT: s_and_b32 s0, s4, 3 24; GFX8-NEXT: s_ashr_i32 s1, s0, 31 25; GFX8-NEXT: s_add_u32 s0, s2, s0 26; GFX8-NEXT: s_addc_u32 s1, s3, s1 27; GFX8-NEXT: v_mov_b32_e32 v0, s0 28; GFX8-NEXT: v_mov_b32_e32 v1, s1 29; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 30; GFX8-NEXT: s_waitcnt vmcnt(0) 31; GFX8-NEXT: v_readfirstlane_b32 s0, v0 32; GFX8-NEXT: ; return to shader part epilog 33; 34; GFX7-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 35; GFX7: ; %bb.0: 36; GFX7-NEXT: s_and_b32 s4, s4, 3 37; GFX7-NEXT: s_ashr_i32 s5, s4, 31 38; GFX7-NEXT: v_mov_b32_e32 v0, s4 39; GFX7-NEXT: s_mov_b32 s0, s2 40; GFX7-NEXT: s_mov_b32 s1, s3 41; GFX7-NEXT: s_mov_b32 s2, 0 42; GFX7-NEXT: s_mov_b32 s3, 0xf000 43; GFX7-NEXT: v_mov_b32_e32 v1, s5 44; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 45; GFX7-NEXT: s_waitcnt vmcnt(0) 46; GFX7-NEXT: v_readfirstlane_b32 s0, v0 47; GFX7-NEXT: ; return to shader part epilog 48; 49; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_and_b32 s0, s4, 3 52; GFX10-NEXT: v_mov_b32_e32 v0, 0 53; GFX10-NEXT: s_ashr_i32 s1, s0, 31 54; GFX10-NEXT: s_add_u32 s0, s2, s0 55; GFX10-NEXT: s_addc_u32 s1, s3, s1 56; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] 57; GFX10-NEXT: s_waitcnt vmcnt(0) 58; GFX10-NEXT: v_readfirstlane_b32 s0, v0 59; GFX10-NEXT: ; return to shader part epilog 60; 61; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 62; GFX11: ; %bb.0: 63; GFX11-NEXT: s_and_b32 s0, s4, 3 64; GFX11-NEXT: v_mov_b32_e32 v0, 0 65; GFX11-NEXT: s_ashr_i32 s1, s0, 31 66; GFX11-NEXT: s_add_u32 s0, s2, s0 67; GFX11-NEXT: s_addc_u32 s1, s3, s1 68; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] 69; GFX11-NEXT: s_waitcnt vmcnt(0) 70; GFX11-NEXT: v_readfirstlane_b32 s0, v0 71; GFX11-NEXT: ; return to shader part epilog 72 %vector = load <4 x i8>, ptr addrspace(4) %ptr 73 %element = extractelement <4 x i8> %vector, i32 %idx 74 ret i8 %element 75} 76 77define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 78; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 79; GFX9: ; %bb.0: 80; GFX9-NEXT: s_and_b32 s0, s2, 3 81; GFX9-NEXT: s_ashr_i32 s1, s0, 31 82; GFX9-NEXT: v_mov_b32_e32 v3, s1 83; GFX9-NEXT: v_mov_b32_e32 v2, s0 84; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 85; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 86; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 87; GFX9-NEXT: s_waitcnt vmcnt(0) 88; GFX9-NEXT: v_readfirstlane_b32 s0, v0 89; GFX9-NEXT: ; return to shader part epilog 90; 91; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 92; GFX8: ; %bb.0: 93; GFX8-NEXT: s_and_b32 s0, s2, 3 94; GFX8-NEXT: s_ashr_i32 s1, s0, 31 95; GFX8-NEXT: v_mov_b32_e32 v3, s1 96; GFX8-NEXT: v_mov_b32_e32 v2, s0 97; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 98; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 99; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 100; GFX8-NEXT: s_waitcnt vmcnt(0) 101; GFX8-NEXT: v_readfirstlane_b32 s0, v0 102; GFX8-NEXT: ; return to shader part epilog 103; 104; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 105; GFX7: ; %bb.0: 106; GFX7-NEXT: s_and_b32 s0, s2, 3 107; GFX7-NEXT: s_ashr_i32 s1, s0, 31 108; GFX7-NEXT: s_mov_b32 s2, 0 109; GFX7-NEXT: s_mov_b32 s3, 0xf000 110; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 111; GFX7-NEXT: s_waitcnt vmcnt(0) 112; GFX7-NEXT: v_readfirstlane_b32 s0, v0 113; GFX7-NEXT: ; return to shader part epilog 114; 115; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 116; GFX10: ; %bb.0: 117; GFX10-NEXT: s_and_b32 s0, s2, 3 118; GFX10-NEXT: s_ashr_i32 s1, s0, 31 119; GFX10-NEXT: v_mov_b32_e32 v3, s1 120; GFX10-NEXT: v_mov_b32_e32 v2, s0 121; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 122; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 123; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 124; GFX10-NEXT: s_waitcnt vmcnt(0) 125; GFX10-NEXT: v_readfirstlane_b32 s0, v0 126; GFX10-NEXT: ; return to shader part epilog 127; 128; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 129; GFX11: ; %bb.0: 130; GFX11-NEXT: s_and_b32 s0, s2, 3 131; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 132; GFX11-NEXT: s_ashr_i32 s1, s0, 31 133; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 135; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 136; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 137; GFX11-NEXT: global_load_u8 v0, v[0:1], off 138; GFX11-NEXT: s_waitcnt vmcnt(0) 139; GFX11-NEXT: v_readfirstlane_b32 s0, v0 140; GFX11-NEXT: ; return to shader part epilog 141 %vector = load <4 x i8>, ptr addrspace(1) %ptr 142 %element = extractelement <4 x i8> %vector, i32 %idx 143 ret i8 %element 144} 145 146define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 147; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 148; GFX9: ; %bb.0: 149; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 151; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 152; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 153; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 154; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 155; GFX9-NEXT: s_waitcnt vmcnt(0) 156; GFX9-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 159; GFX8: ; %bb.0: 160; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 162; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 163; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 164; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 165; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 166; GFX8-NEXT: s_waitcnt vmcnt(0) 167; GFX8-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 170; GFX7: ; %bb.0: 171; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 173; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 174; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 175; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 176; GFX7-NEXT: s_mov_b32 s6, 0 177; GFX7-NEXT: s_mov_b32 s7, 0xf000 178; GFX7-NEXT: s_mov_b64 s[4:5], 0 179; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 180; GFX7-NEXT: s_waitcnt vmcnt(0) 181; GFX7-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 184; GFX10: ; %bb.0: 185; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 187; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 188; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 189; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 190; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 191; GFX10-NEXT: s_waitcnt vmcnt(0) 192; GFX10-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 195; GFX11: ; %bb.0: 196; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 198; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 199; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 200; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 201; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 202; GFX11-NEXT: global_load_u8 v0, v[0:1], off 203; GFX11-NEXT: s_waitcnt vmcnt(0) 204; GFX11-NEXT: s_setpc_b64 s[30:31] 205 %vector = load <4 x i8>, ptr addrspace(1) %ptr 206 %element = extractelement <4 x i8> %vector, i32 %idx 207 ret i8 %element 208} 209 210define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 211; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 212; GFX9: ; %bb.0: 213; GFX9-NEXT: v_and_b32_e32 v2, 3, v0 214; GFX9-NEXT: v_mov_b32_e32 v0, s2 215; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 216; GFX9-NEXT: v_mov_b32_e32 v1, s3 217; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 218; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 219; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 220; GFX9-NEXT: s_waitcnt vmcnt(0) 221; GFX9-NEXT: v_readfirstlane_b32 s0, v0 222; GFX9-NEXT: ; return to shader part epilog 223; 224; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 225; GFX8: ; %bb.0: 226; GFX8-NEXT: v_and_b32_e32 v2, 3, v0 227; GFX8-NEXT: v_mov_b32_e32 v0, s2 228; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 229; GFX8-NEXT: v_mov_b32_e32 v1, s3 230; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 231; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 232; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 233; GFX8-NEXT: s_waitcnt vmcnt(0) 234; GFX8-NEXT: v_readfirstlane_b32 s0, v0 235; GFX8-NEXT: ; return to shader part epilog 236; 237; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 238; GFX7: ; %bb.0: 239; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 240; GFX7-NEXT: s_mov_b32 s0, s2 241; GFX7-NEXT: s_mov_b32 s1, s3 242; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 243; GFX7-NEXT: s_mov_b32 s2, 0 244; GFX7-NEXT: s_mov_b32 s3, 0xf000 245; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 246; GFX7-NEXT: s_waitcnt vmcnt(0) 247; GFX7-NEXT: v_readfirstlane_b32 s0, v0 248; GFX7-NEXT: ; return to shader part epilog 249; 250; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 251; GFX10: ; %bb.0: 252; GFX10-NEXT: v_and_b32_e32 v2, 3, v0 253; GFX10-NEXT: v_mov_b32_e32 v0, s2 254; GFX10-NEXT: v_mov_b32_e32 v1, s3 255; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 256; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 257; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 258; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 259; GFX10-NEXT: s_waitcnt vmcnt(0) 260; GFX10-NEXT: v_readfirstlane_b32 s0, v0 261; GFX10-NEXT: ; return to shader part epilog 262; 263; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 264; GFX11: ; %bb.0: 265; GFX11-NEXT: v_and_b32_e32 v2, 3, v0 266; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 267; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 268; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 269; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 270; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 271; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 272; GFX11-NEXT: global_load_u8 v0, v[0:1], off 273; GFX11-NEXT: s_waitcnt vmcnt(0) 274; GFX11-NEXT: v_readfirstlane_b32 s0, v0 275; GFX11-NEXT: ; return to shader part epilog 276 %vector = load <4 x i8>, ptr addrspace(4) %ptr 277 %element = extractelement <4 x i8> %vector, i32 %idx 278 ret i8 %element 279} 280 281define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) { 282; GFX9-LABEL: extractelement_sgpr_v4i8_idx0: 283; GFX9: ; %bb.0: 284; GFX9-NEXT: v_mov_b32_e32 v0, 0 285; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] 286; GFX9-NEXT: s_waitcnt vmcnt(0) 287; GFX9-NEXT: v_readfirstlane_b32 s0, v0 288; GFX9-NEXT: ; return to shader part epilog 289; 290; GFX8-LABEL: extractelement_sgpr_v4i8_idx0: 291; GFX8: ; %bb.0: 292; GFX8-NEXT: v_mov_b32_e32 v0, s2 293; GFX8-NEXT: v_mov_b32_e32 v1, s3 294; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 295; GFX8-NEXT: s_waitcnt vmcnt(0) 296; GFX8-NEXT: v_readfirstlane_b32 s0, v0 297; GFX8-NEXT: ; return to shader part epilog 298; 299; GFX7-LABEL: extractelement_sgpr_v4i8_idx0: 300; GFX7: ; %bb.0: 301; GFX7-NEXT: s_mov_b32 s0, s2 302; GFX7-NEXT: s_mov_b32 s1, s3 303; GFX7-NEXT: s_mov_b32 s2, -1 304; GFX7-NEXT: s_mov_b32 s3, 0xf000 305; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 306; GFX7-NEXT: s_waitcnt vmcnt(0) 307; GFX7-NEXT: v_readfirstlane_b32 s0, v0 308; GFX7-NEXT: ; return to shader part epilog 309; 310; GFX10-LABEL: extractelement_sgpr_v4i8_idx0: 311; GFX10: ; %bb.0: 312; GFX10-NEXT: v_mov_b32_e32 v0, 0 313; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] 314; GFX10-NEXT: s_waitcnt vmcnt(0) 315; GFX10-NEXT: v_readfirstlane_b32 s0, v0 316; GFX10-NEXT: ; return to shader part epilog 317; 318; GFX11-LABEL: extractelement_sgpr_v4i8_idx0: 319; GFX11: ; %bb.0: 320; GFX11-NEXT: v_mov_b32_e32 v0, 0 321; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] 322; GFX11-NEXT: s_waitcnt vmcnt(0) 323; GFX11-NEXT: v_readfirstlane_b32 s0, v0 324; GFX11-NEXT: ; return to shader part epilog 325 %vector = load <4 x i8>, ptr addrspace(4) %ptr 326 %element = extractelement <4 x i8> %vector, i32 0 327 ret i8 %element 328} 329 330define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) { 331; GFX9-LABEL: extractelement_sgpr_v4i8_idx1: 332; GFX9: ; %bb.0: 333; GFX9-NEXT: v_mov_b32_e32 v0, 0 334; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1 335; GFX9-NEXT: s_waitcnt vmcnt(0) 336; GFX9-NEXT: v_readfirstlane_b32 s0, v0 337; GFX9-NEXT: ; return to shader part epilog 338; 339; GFX8-LABEL: extractelement_sgpr_v4i8_idx1: 340; GFX8: ; %bb.0: 341; GFX8-NEXT: s_add_u32 s0, s2, 1 342; GFX8-NEXT: s_addc_u32 s1, s3, 0 343; GFX8-NEXT: v_mov_b32_e32 v0, s0 344; GFX8-NEXT: v_mov_b32_e32 v1, s1 345; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 346; GFX8-NEXT: s_waitcnt vmcnt(0) 347; GFX8-NEXT: v_readfirstlane_b32 s0, v0 348; GFX8-NEXT: ; return to shader part epilog 349; 350; GFX7-LABEL: extractelement_sgpr_v4i8_idx1: 351; GFX7: ; %bb.0: 352; GFX7-NEXT: s_mov_b32 s0, s2 353; GFX7-NEXT: s_mov_b32 s1, s3 354; GFX7-NEXT: s_mov_b32 s2, -1 355; GFX7-NEXT: s_mov_b32 s3, 0xf000 356; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1 357; GFX7-NEXT: s_waitcnt vmcnt(0) 358; GFX7-NEXT: v_readfirstlane_b32 s0, v0 359; GFX7-NEXT: ; return to shader part epilog 360; 361; GFX10-LABEL: extractelement_sgpr_v4i8_idx1: 362; GFX10: ; %bb.0: 363; GFX10-NEXT: v_mov_b32_e32 v0, 0 364; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1 365; GFX10-NEXT: s_waitcnt vmcnt(0) 366; GFX10-NEXT: v_readfirstlane_b32 s0, v0 367; GFX10-NEXT: ; return to shader part epilog 368; 369; GFX11-LABEL: extractelement_sgpr_v4i8_idx1: 370; GFX11: ; %bb.0: 371; GFX11-NEXT: v_mov_b32_e32 v0, 0 372; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1 373; GFX11-NEXT: s_waitcnt vmcnt(0) 374; GFX11-NEXT: v_readfirstlane_b32 s0, v0 375; GFX11-NEXT: ; return to shader part epilog 376 %vector = load <4 x i8>, ptr addrspace(4) %ptr 377 %element = extractelement <4 x i8> %vector, i32 1 378 ret i8 %element 379} 380 381define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) { 382; GFX9-LABEL: extractelement_sgpr_v4i8_idx2: 383; GFX9: ; %bb.0: 384; GFX9-NEXT: v_mov_b32_e32 v0, 0 385; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2 386; GFX9-NEXT: s_waitcnt vmcnt(0) 387; GFX9-NEXT: v_readfirstlane_b32 s0, v0 388; GFX9-NEXT: ; return to shader part epilog 389; 390; GFX8-LABEL: extractelement_sgpr_v4i8_idx2: 391; GFX8: ; %bb.0: 392; GFX8-NEXT: s_add_u32 s0, s2, 2 393; GFX8-NEXT: s_addc_u32 s1, s3, 0 394; GFX8-NEXT: v_mov_b32_e32 v0, s0 395; GFX8-NEXT: v_mov_b32_e32 v1, s1 396; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 397; GFX8-NEXT: s_waitcnt vmcnt(0) 398; GFX8-NEXT: v_readfirstlane_b32 s0, v0 399; GFX8-NEXT: ; return to shader part epilog 400; 401; GFX7-LABEL: extractelement_sgpr_v4i8_idx2: 402; GFX7: ; %bb.0: 403; GFX7-NEXT: s_mov_b32 s0, s2 404; GFX7-NEXT: s_mov_b32 s1, s3 405; GFX7-NEXT: s_mov_b32 s2, -1 406; GFX7-NEXT: s_mov_b32 s3, 0xf000 407; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2 408; GFX7-NEXT: s_waitcnt vmcnt(0) 409; GFX7-NEXT: v_readfirstlane_b32 s0, v0 410; GFX7-NEXT: ; return to shader part epilog 411; 412; GFX10-LABEL: extractelement_sgpr_v4i8_idx2: 413; GFX10: ; %bb.0: 414; GFX10-NEXT: v_mov_b32_e32 v0, 0 415; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2 416; GFX10-NEXT: s_waitcnt vmcnt(0) 417; GFX10-NEXT: v_readfirstlane_b32 s0, v0 418; GFX10-NEXT: ; return to shader part epilog 419; 420; GFX11-LABEL: extractelement_sgpr_v4i8_idx2: 421; GFX11: ; %bb.0: 422; GFX11-NEXT: v_mov_b32_e32 v0, 0 423; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2 424; GFX11-NEXT: s_waitcnt vmcnt(0) 425; GFX11-NEXT: v_readfirstlane_b32 s0, v0 426; GFX11-NEXT: ; return to shader part epilog 427 %vector = load <4 x i8>, ptr addrspace(4) %ptr 428 %element = extractelement <4 x i8> %vector, i32 2 429 ret i8 %element 430} 431 432define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) { 433; GFX9-LABEL: extractelement_sgpr_v4i8_idx3: 434; GFX9: ; %bb.0: 435; GFX9-NEXT: v_mov_b32_e32 v0, 0 436; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3 437; GFX9-NEXT: s_waitcnt vmcnt(0) 438; GFX9-NEXT: v_readfirstlane_b32 s0, v0 439; GFX9-NEXT: ; return to shader part epilog 440; 441; GFX8-LABEL: extractelement_sgpr_v4i8_idx3: 442; GFX8: ; %bb.0: 443; GFX8-NEXT: s_add_u32 s0, s2, 3 444; GFX8-NEXT: s_addc_u32 s1, s3, 0 445; GFX8-NEXT: v_mov_b32_e32 v0, s0 446; GFX8-NEXT: v_mov_b32_e32 v1, s1 447; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 448; GFX8-NEXT: s_waitcnt vmcnt(0) 449; GFX8-NEXT: v_readfirstlane_b32 s0, v0 450; GFX8-NEXT: ; return to shader part epilog 451; 452; GFX7-LABEL: extractelement_sgpr_v4i8_idx3: 453; GFX7: ; %bb.0: 454; GFX7-NEXT: s_mov_b32 s0, s2 455; GFX7-NEXT: s_mov_b32 s1, s3 456; GFX7-NEXT: s_mov_b32 s2, -1 457; GFX7-NEXT: s_mov_b32 s3, 0xf000 458; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3 459; GFX7-NEXT: s_waitcnt vmcnt(0) 460; GFX7-NEXT: v_readfirstlane_b32 s0, v0 461; GFX7-NEXT: ; return to shader part epilog 462; 463; GFX10-LABEL: extractelement_sgpr_v4i8_idx3: 464; GFX10: ; %bb.0: 465; GFX10-NEXT: v_mov_b32_e32 v0, 0 466; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3 467; GFX10-NEXT: s_waitcnt vmcnt(0) 468; GFX10-NEXT: v_readfirstlane_b32 s0, v0 469; GFX10-NEXT: ; return to shader part epilog 470; 471; GFX11-LABEL: extractelement_sgpr_v4i8_idx3: 472; GFX11: ; %bb.0: 473; GFX11-NEXT: v_mov_b32_e32 v0, 0 474; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3 475; GFX11-NEXT: s_waitcnt vmcnt(0) 476; GFX11-NEXT: v_readfirstlane_b32 s0, v0 477; GFX11-NEXT: ; return to shader part epilog 478 %vector = load <4 x i8>, ptr addrspace(4) %ptr 479 %element = extractelement <4 x i8> %vector, i32 3 480 ret i8 %element 481} 482 483define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) { 484; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: 485; GFX9: ; %bb.0: 486; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 487; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 488; GFX9-NEXT: s_waitcnt vmcnt(0) 489; GFX9-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX8-LABEL: extractelement_vgpr_v4i8_idx0: 492; GFX8: ; %bb.0: 493; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 495; GFX8-NEXT: s_waitcnt vmcnt(0) 496; GFX8-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX7-LABEL: extractelement_vgpr_v4i8_idx0: 499; GFX7: ; %bb.0: 500; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX7-NEXT: s_mov_b32 s6, 0 502; GFX7-NEXT: s_mov_b32 s7, 0xf000 503; GFX7-NEXT: s_mov_b64 s[4:5], 0 504; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 505; GFX7-NEXT: s_waitcnt vmcnt(0) 506; GFX7-NEXT: s_setpc_b64 s[30:31] 507; 508; GFX10-LABEL: extractelement_vgpr_v4i8_idx0: 509; GFX10: ; %bb.0: 510; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 512; GFX10-NEXT: s_waitcnt vmcnt(0) 513; GFX10-NEXT: s_setpc_b64 s[30:31] 514; 515; GFX11-LABEL: extractelement_vgpr_v4i8_idx0: 516; GFX11: ; %bb.0: 517; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX11-NEXT: global_load_u8 v0, v[0:1], off 519; GFX11-NEXT: s_waitcnt vmcnt(0) 520; GFX11-NEXT: s_setpc_b64 s[30:31] 521 %vector = load <4 x i8>, ptr addrspace(1) %ptr 522 %element = extractelement <4 x i8> %vector, i32 0 523 ret i8 %element 524} 525 526define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) { 527; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: 528; GFX9: ; %bb.0: 529; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 531; GFX9-NEXT: s_waitcnt vmcnt(0) 532; GFX9-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX8-LABEL: extractelement_vgpr_v4i8_idx1: 535; GFX8: ; %bb.0: 536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0 538; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 539; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 540; GFX8-NEXT: s_waitcnt vmcnt(0) 541; GFX8-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX7-LABEL: extractelement_vgpr_v4i8_idx1: 544; GFX7: ; %bb.0: 545; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX7-NEXT: s_mov_b32 s6, 0 547; GFX7-NEXT: s_mov_b32 s7, 0xf000 548; GFX7-NEXT: s_mov_b64 s[4:5], 0 549; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1 550; GFX7-NEXT: s_waitcnt vmcnt(0) 551; GFX7-NEXT: s_setpc_b64 s[30:31] 552; 553; GFX10-LABEL: extractelement_vgpr_v4i8_idx1: 554; GFX10: ; %bb.0: 555; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 557; GFX10-NEXT: s_waitcnt vmcnt(0) 558; GFX10-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX11-LABEL: extractelement_vgpr_v4i8_idx1: 561; GFX11: ; %bb.0: 562; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 564; GFX11-NEXT: s_waitcnt vmcnt(0) 565; GFX11-NEXT: s_setpc_b64 s[30:31] 566 %vector = load <4 x i8>, ptr addrspace(1) %ptr 567 %element = extractelement <4 x i8> %vector, i32 1 568 ret i8 %element 569} 570 571define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) { 572; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: 573; GFX9: ; %bb.0: 574; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2 576; GFX9-NEXT: s_waitcnt vmcnt(0) 577; GFX9-NEXT: s_setpc_b64 s[30:31] 578; 579; GFX8-LABEL: extractelement_vgpr_v4i8_idx2: 580; GFX8: ; %bb.0: 581; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0 583; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 584; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 585; GFX8-NEXT: s_waitcnt vmcnt(0) 586; GFX8-NEXT: s_setpc_b64 s[30:31] 587; 588; GFX7-LABEL: extractelement_vgpr_v4i8_idx2: 589; GFX7: ; %bb.0: 590; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX7-NEXT: s_mov_b32 s6, 0 592; GFX7-NEXT: s_mov_b32 s7, 0xf000 593; GFX7-NEXT: s_mov_b64 s[4:5], 0 594; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2 595; GFX7-NEXT: s_waitcnt vmcnt(0) 596; GFX7-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX10-LABEL: extractelement_vgpr_v4i8_idx2: 599; GFX10: ; %bb.0: 600; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2 602; GFX10-NEXT: s_waitcnt vmcnt(0) 603; GFX10-NEXT: s_setpc_b64 s[30:31] 604; 605; GFX11-LABEL: extractelement_vgpr_v4i8_idx2: 606; GFX11: ; %bb.0: 607; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 608; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2 609; GFX11-NEXT: s_waitcnt vmcnt(0) 610; GFX11-NEXT: s_setpc_b64 s[30:31] 611 %vector = load <4 x i8>, ptr addrspace(1) %ptr 612 %element = extractelement <4 x i8> %vector, i32 2 613 ret i8 %element 614} 615 616define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) { 617; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: 618; GFX9: ; %bb.0: 619; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3 621; GFX9-NEXT: s_waitcnt vmcnt(0) 622; GFX9-NEXT: s_setpc_b64 s[30:31] 623; 624; GFX8-LABEL: extractelement_vgpr_v4i8_idx3: 625; GFX8: ; %bb.0: 626; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 627; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0 628; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 629; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 630; GFX8-NEXT: s_waitcnt vmcnt(0) 631; GFX8-NEXT: s_setpc_b64 s[30:31] 632; 633; GFX7-LABEL: extractelement_vgpr_v4i8_idx3: 634; GFX7: ; %bb.0: 635; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; GFX7-NEXT: s_mov_b32 s6, 0 637; GFX7-NEXT: s_mov_b32 s7, 0xf000 638; GFX7-NEXT: s_mov_b64 s[4:5], 0 639; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3 640; GFX7-NEXT: s_waitcnt vmcnt(0) 641; GFX7-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX10-LABEL: extractelement_vgpr_v4i8_idx3: 644; GFX10: ; %bb.0: 645; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3 647; GFX10-NEXT: s_waitcnt vmcnt(0) 648; GFX10-NEXT: s_setpc_b64 s[30:31] 649; 650; GFX11-LABEL: extractelement_vgpr_v4i8_idx3: 651; GFX11: ; %bb.0: 652; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 653; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3 654; GFX11-NEXT: s_waitcnt vmcnt(0) 655; GFX11-NEXT: s_setpc_b64 s[30:31] 656 %vector = load <4 x i8>, ptr addrspace(1) %ptr 657 %element = extractelement <4 x i8> %vector, i32 3 658 ret i8 %element 659} 660 661define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 662; GFX9-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 663; GFX9: ; %bb.0: 664; GFX9-NEXT: s_and_b32 s0, s4, 7 665; GFX9-NEXT: s_ashr_i32 s1, s0, 31 666; GFX9-NEXT: s_add_u32 s0, s2, s0 667; GFX9-NEXT: s_addc_u32 s1, s3, s1 668; GFX9-NEXT: v_mov_b32_e32 v0, 0 669; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] 670; GFX9-NEXT: s_waitcnt vmcnt(0) 671; GFX9-NEXT: v_readfirstlane_b32 s0, v0 672; GFX9-NEXT: ; return to shader part epilog 673; 674; GFX8-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 675; GFX8: ; %bb.0: 676; GFX8-NEXT: s_and_b32 s0, s4, 7 677; GFX8-NEXT: s_ashr_i32 s1, s0, 31 678; GFX8-NEXT: s_add_u32 s0, s2, s0 679; GFX8-NEXT: s_addc_u32 s1, s3, s1 680; GFX8-NEXT: v_mov_b32_e32 v0, s0 681; GFX8-NEXT: v_mov_b32_e32 v1, s1 682; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 683; GFX8-NEXT: s_waitcnt vmcnt(0) 684; GFX8-NEXT: v_readfirstlane_b32 s0, v0 685; GFX8-NEXT: ; return to shader part epilog 686; 687; GFX7-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 688; GFX7: ; %bb.0: 689; GFX7-NEXT: s_and_b32 s4, s4, 7 690; GFX7-NEXT: s_ashr_i32 s5, s4, 31 691; GFX7-NEXT: v_mov_b32_e32 v0, s4 692; GFX7-NEXT: s_mov_b32 s0, s2 693; GFX7-NEXT: s_mov_b32 s1, s3 694; GFX7-NEXT: s_mov_b32 s2, 0 695; GFX7-NEXT: s_mov_b32 s3, 0xf000 696; GFX7-NEXT: v_mov_b32_e32 v1, s5 697; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 698; GFX7-NEXT: s_waitcnt vmcnt(0) 699; GFX7-NEXT: v_readfirstlane_b32 s0, v0 700; GFX7-NEXT: ; return to shader part epilog 701; 702; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 703; GFX10: ; %bb.0: 704; GFX10-NEXT: s_and_b32 s0, s4, 7 705; GFX10-NEXT: v_mov_b32_e32 v0, 0 706; GFX10-NEXT: s_ashr_i32 s1, s0, 31 707; GFX10-NEXT: s_add_u32 s0, s2, s0 708; GFX10-NEXT: s_addc_u32 s1, s3, s1 709; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] 710; GFX10-NEXT: s_waitcnt vmcnt(0) 711; GFX10-NEXT: v_readfirstlane_b32 s0, v0 712; GFX10-NEXT: ; return to shader part epilog 713; 714; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 715; GFX11: ; %bb.0: 716; GFX11-NEXT: s_and_b32 s0, s4, 7 717; GFX11-NEXT: v_mov_b32_e32 v0, 0 718; GFX11-NEXT: s_ashr_i32 s1, s0, 31 719; GFX11-NEXT: s_add_u32 s0, s2, s0 720; GFX11-NEXT: s_addc_u32 s1, s3, s1 721; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] 722; GFX11-NEXT: s_waitcnt vmcnt(0) 723; GFX11-NEXT: v_readfirstlane_b32 s0, v0 724; GFX11-NEXT: ; return to shader part epilog 725 %vector = load <8 x i8>, ptr addrspace(4) %ptr 726 %element = extractelement <8 x i8> %vector, i32 %idx 727 ret i8 %element 728} 729 730define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 731; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 732; GFX9: ; %bb.0: 733; GFX9-NEXT: s_and_b32 s0, s2, 7 734; GFX9-NEXT: s_ashr_i32 s1, s0, 31 735; GFX9-NEXT: v_mov_b32_e32 v3, s1 736; GFX9-NEXT: v_mov_b32_e32 v2, s0 737; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 738; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 739; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 740; GFX9-NEXT: s_waitcnt vmcnt(0) 741; GFX9-NEXT: v_readfirstlane_b32 s0, v0 742; GFX9-NEXT: ; return to shader part epilog 743; 744; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 745; GFX8: ; %bb.0: 746; GFX8-NEXT: s_and_b32 s0, s2, 7 747; GFX8-NEXT: s_ashr_i32 s1, s0, 31 748; GFX8-NEXT: v_mov_b32_e32 v3, s1 749; GFX8-NEXT: v_mov_b32_e32 v2, s0 750; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 751; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 752; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 753; GFX8-NEXT: s_waitcnt vmcnt(0) 754; GFX8-NEXT: v_readfirstlane_b32 s0, v0 755; GFX8-NEXT: ; return to shader part epilog 756; 757; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 758; GFX7: ; %bb.0: 759; GFX7-NEXT: s_and_b32 s0, s2, 7 760; GFX7-NEXT: s_ashr_i32 s1, s0, 31 761; GFX7-NEXT: s_mov_b32 s2, 0 762; GFX7-NEXT: s_mov_b32 s3, 0xf000 763; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 764; GFX7-NEXT: s_waitcnt vmcnt(0) 765; GFX7-NEXT: v_readfirstlane_b32 s0, v0 766; GFX7-NEXT: ; return to shader part epilog 767; 768; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 769; GFX10: ; %bb.0: 770; GFX10-NEXT: s_and_b32 s0, s2, 7 771; GFX10-NEXT: s_ashr_i32 s1, s0, 31 772; GFX10-NEXT: v_mov_b32_e32 v3, s1 773; GFX10-NEXT: v_mov_b32_e32 v2, s0 774; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 775; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 776; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 777; GFX10-NEXT: s_waitcnt vmcnt(0) 778; GFX10-NEXT: v_readfirstlane_b32 s0, v0 779; GFX10-NEXT: ; return to shader part epilog 780; 781; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 782; GFX11: ; %bb.0: 783; GFX11-NEXT: s_and_b32 s0, s2, 7 784; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 785; GFX11-NEXT: s_ashr_i32 s1, s0, 31 786; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 787; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 788; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 789; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 790; GFX11-NEXT: global_load_u8 v0, v[0:1], off 791; GFX11-NEXT: s_waitcnt vmcnt(0) 792; GFX11-NEXT: v_readfirstlane_b32 s0, v0 793; GFX11-NEXT: ; return to shader part epilog 794 %vector = load <8 x i8>, ptr addrspace(1) %ptr 795 %element = extractelement <8 x i8> %vector, i32 %idx 796 ret i8 %element 797} 798 799define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 800; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 801; GFX9: ; %bb.0: 802; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 803; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 804; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 805; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 806; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 807; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 808; GFX9-NEXT: s_waitcnt vmcnt(0) 809; GFX9-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 812; GFX8: ; %bb.0: 813; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 815; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 816; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 817; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 818; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 819; GFX8-NEXT: s_waitcnt vmcnt(0) 820; GFX8-NEXT: s_setpc_b64 s[30:31] 821; 822; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 823; GFX7: ; %bb.0: 824; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 825; GFX7-NEXT: v_and_b32_e32 v2, 7, v2 826; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 827; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 828; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 829; GFX7-NEXT: s_mov_b32 s6, 0 830; GFX7-NEXT: s_mov_b32 s7, 0xf000 831; GFX7-NEXT: s_mov_b64 s[4:5], 0 832; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 833; GFX7-NEXT: s_waitcnt vmcnt(0) 834; GFX7-NEXT: s_setpc_b64 s[30:31] 835; 836; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 837; GFX10: ; %bb.0: 838; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 840; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 841; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 842; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 843; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 844; GFX10-NEXT: s_waitcnt vmcnt(0) 845; GFX10-NEXT: s_setpc_b64 s[30:31] 846; 847; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 848; GFX11: ; %bb.0: 849; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 851; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 852; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 853; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 854; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 855; GFX11-NEXT: global_load_u8 v0, v[0:1], off 856; GFX11-NEXT: s_waitcnt vmcnt(0) 857; GFX11-NEXT: s_setpc_b64 s[30:31] 858 %vector = load <8 x i8>, ptr addrspace(1) %ptr 859 %element = extractelement <8 x i8> %vector, i32 %idx 860 ret i8 %element 861} 862 863define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 864; GFX9-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 865; GFX9: ; %bb.0: 866; GFX9-NEXT: v_and_b32_e32 v2, 7, v0 867; GFX9-NEXT: v_mov_b32_e32 v0, s2 868; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 869; GFX9-NEXT: v_mov_b32_e32 v1, s3 870; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 871; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 872; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 873; GFX9-NEXT: s_waitcnt vmcnt(0) 874; GFX9-NEXT: v_readfirstlane_b32 s0, v0 875; GFX9-NEXT: ; return to shader part epilog 876; 877; GFX8-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 878; GFX8: ; %bb.0: 879; GFX8-NEXT: v_and_b32_e32 v2, 7, v0 880; GFX8-NEXT: v_mov_b32_e32 v0, s2 881; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 882; GFX8-NEXT: v_mov_b32_e32 v1, s3 883; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 884; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 885; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 886; GFX8-NEXT: s_waitcnt vmcnt(0) 887; GFX8-NEXT: v_readfirstlane_b32 s0, v0 888; GFX8-NEXT: ; return to shader part epilog 889; 890; GFX7-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 891; GFX7: ; %bb.0: 892; GFX7-NEXT: v_and_b32_e32 v0, 7, v0 893; GFX7-NEXT: s_mov_b32 s0, s2 894; GFX7-NEXT: s_mov_b32 s1, s3 895; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 896; GFX7-NEXT: s_mov_b32 s2, 0 897; GFX7-NEXT: s_mov_b32 s3, 0xf000 898; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 899; GFX7-NEXT: s_waitcnt vmcnt(0) 900; GFX7-NEXT: v_readfirstlane_b32 s0, v0 901; GFX7-NEXT: ; return to shader part epilog 902; 903; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 904; GFX10: ; %bb.0: 905; GFX10-NEXT: v_and_b32_e32 v2, 7, v0 906; GFX10-NEXT: v_mov_b32_e32 v0, s2 907; GFX10-NEXT: v_mov_b32_e32 v1, s3 908; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 909; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 910; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 911; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 912; GFX10-NEXT: s_waitcnt vmcnt(0) 913; GFX10-NEXT: v_readfirstlane_b32 s0, v0 914; GFX10-NEXT: ; return to shader part epilog 915; 916; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 917; GFX11: ; %bb.0: 918; GFX11-NEXT: v_and_b32_e32 v2, 7, v0 919; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 920; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 921; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 922; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 923; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 924; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 925; GFX11-NEXT: global_load_u8 v0, v[0:1], off 926; GFX11-NEXT: s_waitcnt vmcnt(0) 927; GFX11-NEXT: v_readfirstlane_b32 s0, v0 928; GFX11-NEXT: ; return to shader part epilog 929 %vector = load <8 x i8>, ptr addrspace(4) %ptr 930 %element = extractelement <8 x i8> %vector, i32 %idx 931 ret i8 %element 932} 933 934define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) { 935; GFX9-LABEL: extractelement_sgpr_v8i8_idx0: 936; GFX9: ; %bb.0: 937; GFX9-NEXT: v_mov_b32_e32 v0, 0 938; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] 939; GFX9-NEXT: s_waitcnt vmcnt(0) 940; GFX9-NEXT: v_readfirstlane_b32 s0, v0 941; GFX9-NEXT: ; return to shader part epilog 942; 943; GFX8-LABEL: extractelement_sgpr_v8i8_idx0: 944; GFX8: ; %bb.0: 945; GFX8-NEXT: v_mov_b32_e32 v0, s2 946; GFX8-NEXT: v_mov_b32_e32 v1, s3 947; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 948; GFX8-NEXT: s_waitcnt vmcnt(0) 949; GFX8-NEXT: v_readfirstlane_b32 s0, v0 950; GFX8-NEXT: ; return to shader part epilog 951; 952; GFX7-LABEL: extractelement_sgpr_v8i8_idx0: 953; GFX7: ; %bb.0: 954; GFX7-NEXT: s_mov_b32 s0, s2 955; GFX7-NEXT: s_mov_b32 s1, s3 956; GFX7-NEXT: s_mov_b32 s2, -1 957; GFX7-NEXT: s_mov_b32 s3, 0xf000 958; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 959; GFX7-NEXT: s_waitcnt vmcnt(0) 960; GFX7-NEXT: v_readfirstlane_b32 s0, v0 961; GFX7-NEXT: ; return to shader part epilog 962; 963; GFX10-LABEL: extractelement_sgpr_v8i8_idx0: 964; GFX10: ; %bb.0: 965; GFX10-NEXT: v_mov_b32_e32 v0, 0 966; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] 967; GFX10-NEXT: s_waitcnt vmcnt(0) 968; GFX10-NEXT: v_readfirstlane_b32 s0, v0 969; GFX10-NEXT: ; return to shader part epilog 970; 971; GFX11-LABEL: extractelement_sgpr_v8i8_idx0: 972; GFX11: ; %bb.0: 973; GFX11-NEXT: v_mov_b32_e32 v0, 0 974; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] 975; GFX11-NEXT: s_waitcnt vmcnt(0) 976; GFX11-NEXT: v_readfirstlane_b32 s0, v0 977; GFX11-NEXT: ; return to shader part epilog 978 %vector = load <8 x i8>, ptr addrspace(4) %ptr 979 %element = extractelement <8 x i8> %vector, i32 0 980 ret i8 %element 981} 982 983define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) { 984; GFX9-LABEL: extractelement_sgpr_v8i8_idx1: 985; GFX9: ; %bb.0: 986; GFX9-NEXT: v_mov_b32_e32 v0, 0 987; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1 988; GFX9-NEXT: s_waitcnt vmcnt(0) 989; GFX9-NEXT: v_readfirstlane_b32 s0, v0 990; GFX9-NEXT: ; return to shader part epilog 991; 992; GFX8-LABEL: extractelement_sgpr_v8i8_idx1: 993; GFX8: ; %bb.0: 994; GFX8-NEXT: s_add_u32 s0, s2, 1 995; GFX8-NEXT: s_addc_u32 s1, s3, 0 996; GFX8-NEXT: v_mov_b32_e32 v0, s0 997; GFX8-NEXT: v_mov_b32_e32 v1, s1 998; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 999; GFX8-NEXT: s_waitcnt vmcnt(0) 1000; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1001; GFX8-NEXT: ; return to shader part epilog 1002; 1003; GFX7-LABEL: extractelement_sgpr_v8i8_idx1: 1004; GFX7: ; %bb.0: 1005; GFX7-NEXT: s_mov_b32 s0, s2 1006; GFX7-NEXT: s_mov_b32 s1, s3 1007; GFX7-NEXT: s_mov_b32 s2, -1 1008; GFX7-NEXT: s_mov_b32 s3, 0xf000 1009; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1 1010; GFX7-NEXT: s_waitcnt vmcnt(0) 1011; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1012; GFX7-NEXT: ; return to shader part epilog 1013; 1014; GFX10-LABEL: extractelement_sgpr_v8i8_idx1: 1015; GFX10: ; %bb.0: 1016; GFX10-NEXT: v_mov_b32_e32 v0, 0 1017; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1 1018; GFX10-NEXT: s_waitcnt vmcnt(0) 1019; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1020; GFX10-NEXT: ; return to shader part epilog 1021; 1022; GFX11-LABEL: extractelement_sgpr_v8i8_idx1: 1023; GFX11: ; %bb.0: 1024; GFX11-NEXT: v_mov_b32_e32 v0, 0 1025; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1 1026; GFX11-NEXT: s_waitcnt vmcnt(0) 1027; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1028; GFX11-NEXT: ; return to shader part epilog 1029 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1030 %element = extractelement <8 x i8> %vector, i32 1 1031 ret i8 %element 1032} 1033 1034define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) { 1035; GFX9-LABEL: extractelement_sgpr_v8i8_idx2: 1036; GFX9: ; %bb.0: 1037; GFX9-NEXT: v_mov_b32_e32 v0, 0 1038; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2 1039; GFX9-NEXT: s_waitcnt vmcnt(0) 1040; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1041; GFX9-NEXT: ; return to shader part epilog 1042; 1043; GFX8-LABEL: extractelement_sgpr_v8i8_idx2: 1044; GFX8: ; %bb.0: 1045; GFX8-NEXT: s_add_u32 s0, s2, 2 1046; GFX8-NEXT: s_addc_u32 s1, s3, 0 1047; GFX8-NEXT: v_mov_b32_e32 v0, s0 1048; GFX8-NEXT: v_mov_b32_e32 v1, s1 1049; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1050; GFX8-NEXT: s_waitcnt vmcnt(0) 1051; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1052; GFX8-NEXT: ; return to shader part epilog 1053; 1054; GFX7-LABEL: extractelement_sgpr_v8i8_idx2: 1055; GFX7: ; %bb.0: 1056; GFX7-NEXT: s_mov_b32 s0, s2 1057; GFX7-NEXT: s_mov_b32 s1, s3 1058; GFX7-NEXT: s_mov_b32 s2, -1 1059; GFX7-NEXT: s_mov_b32 s3, 0xf000 1060; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2 1061; GFX7-NEXT: s_waitcnt vmcnt(0) 1062; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1063; GFX7-NEXT: ; return to shader part epilog 1064; 1065; GFX10-LABEL: extractelement_sgpr_v8i8_idx2: 1066; GFX10: ; %bb.0: 1067; GFX10-NEXT: v_mov_b32_e32 v0, 0 1068; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2 1069; GFX10-NEXT: s_waitcnt vmcnt(0) 1070; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1071; GFX10-NEXT: ; return to shader part epilog 1072; 1073; GFX11-LABEL: extractelement_sgpr_v8i8_idx2: 1074; GFX11: ; %bb.0: 1075; GFX11-NEXT: v_mov_b32_e32 v0, 0 1076; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2 1077; GFX11-NEXT: s_waitcnt vmcnt(0) 1078; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1079; GFX11-NEXT: ; return to shader part epilog 1080 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1081 %element = extractelement <8 x i8> %vector, i32 2 1082 ret i8 %element 1083} 1084 1085define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) { 1086; GFX9-LABEL: extractelement_sgpr_v8i8_idx3: 1087; GFX9: ; %bb.0: 1088; GFX9-NEXT: v_mov_b32_e32 v0, 0 1089; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3 1090; GFX9-NEXT: s_waitcnt vmcnt(0) 1091; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1092; GFX9-NEXT: ; return to shader part epilog 1093; 1094; GFX8-LABEL: extractelement_sgpr_v8i8_idx3: 1095; GFX8: ; %bb.0: 1096; GFX8-NEXT: s_add_u32 s0, s2, 3 1097; GFX8-NEXT: s_addc_u32 s1, s3, 0 1098; GFX8-NEXT: v_mov_b32_e32 v0, s0 1099; GFX8-NEXT: v_mov_b32_e32 v1, s1 1100; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1101; GFX8-NEXT: s_waitcnt vmcnt(0) 1102; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1103; GFX8-NEXT: ; return to shader part epilog 1104; 1105; GFX7-LABEL: extractelement_sgpr_v8i8_idx3: 1106; GFX7: ; %bb.0: 1107; GFX7-NEXT: s_mov_b32 s0, s2 1108; GFX7-NEXT: s_mov_b32 s1, s3 1109; GFX7-NEXT: s_mov_b32 s2, -1 1110; GFX7-NEXT: s_mov_b32 s3, 0xf000 1111; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3 1112; GFX7-NEXT: s_waitcnt vmcnt(0) 1113; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1114; GFX7-NEXT: ; return to shader part epilog 1115; 1116; GFX10-LABEL: extractelement_sgpr_v8i8_idx3: 1117; GFX10: ; %bb.0: 1118; GFX10-NEXT: v_mov_b32_e32 v0, 0 1119; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3 1120; GFX10-NEXT: s_waitcnt vmcnt(0) 1121; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1122; GFX10-NEXT: ; return to shader part epilog 1123; 1124; GFX11-LABEL: extractelement_sgpr_v8i8_idx3: 1125; GFX11: ; %bb.0: 1126; GFX11-NEXT: v_mov_b32_e32 v0, 0 1127; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3 1128; GFX11-NEXT: s_waitcnt vmcnt(0) 1129; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1130; GFX11-NEXT: ; return to shader part epilog 1131 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1132 %element = extractelement <8 x i8> %vector, i32 3 1133 ret i8 %element 1134} 1135 1136define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) { 1137; GFX9-LABEL: extractelement_sgpr_v8i8_idx4: 1138; GFX9: ; %bb.0: 1139; GFX9-NEXT: v_mov_b32_e32 v0, 0 1140; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4 1141; GFX9-NEXT: s_waitcnt vmcnt(0) 1142; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1143; GFX9-NEXT: ; return to shader part epilog 1144; 1145; GFX8-LABEL: extractelement_sgpr_v8i8_idx4: 1146; GFX8: ; %bb.0: 1147; GFX8-NEXT: s_add_u32 s0, s2, 4 1148; GFX8-NEXT: s_addc_u32 s1, s3, 0 1149; GFX8-NEXT: v_mov_b32_e32 v0, s0 1150; GFX8-NEXT: v_mov_b32_e32 v1, s1 1151; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1152; GFX8-NEXT: s_waitcnt vmcnt(0) 1153; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1154; GFX8-NEXT: ; return to shader part epilog 1155; 1156; GFX7-LABEL: extractelement_sgpr_v8i8_idx4: 1157; GFX7: ; %bb.0: 1158; GFX7-NEXT: s_mov_b32 s0, s2 1159; GFX7-NEXT: s_mov_b32 s1, s3 1160; GFX7-NEXT: s_mov_b32 s2, -1 1161; GFX7-NEXT: s_mov_b32 s3, 0xf000 1162; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4 1163; GFX7-NEXT: s_waitcnt vmcnt(0) 1164; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1165; GFX7-NEXT: ; return to shader part epilog 1166; 1167; GFX10-LABEL: extractelement_sgpr_v8i8_idx4: 1168; GFX10: ; %bb.0: 1169; GFX10-NEXT: v_mov_b32_e32 v0, 0 1170; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4 1171; GFX10-NEXT: s_waitcnt vmcnt(0) 1172; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1173; GFX10-NEXT: ; return to shader part epilog 1174; 1175; GFX11-LABEL: extractelement_sgpr_v8i8_idx4: 1176; GFX11: ; %bb.0: 1177; GFX11-NEXT: v_mov_b32_e32 v0, 0 1178; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:4 1179; GFX11-NEXT: s_waitcnt vmcnt(0) 1180; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1181; GFX11-NEXT: ; return to shader part epilog 1182 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1183 %element = extractelement <8 x i8> %vector, i32 4 1184 ret i8 %element 1185} 1186 1187define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) { 1188; GFX9-LABEL: extractelement_sgpr_v8i8_idx5: 1189; GFX9: ; %bb.0: 1190; GFX9-NEXT: v_mov_b32_e32 v0, 0 1191; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5 1192; GFX9-NEXT: s_waitcnt vmcnt(0) 1193; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1194; GFX9-NEXT: ; return to shader part epilog 1195; 1196; GFX8-LABEL: extractelement_sgpr_v8i8_idx5: 1197; GFX8: ; %bb.0: 1198; GFX8-NEXT: s_add_u32 s0, s2, 5 1199; GFX8-NEXT: s_addc_u32 s1, s3, 0 1200; GFX8-NEXT: v_mov_b32_e32 v0, s0 1201; GFX8-NEXT: v_mov_b32_e32 v1, s1 1202; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1203; GFX8-NEXT: s_waitcnt vmcnt(0) 1204; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1205; GFX8-NEXT: ; return to shader part epilog 1206; 1207; GFX7-LABEL: extractelement_sgpr_v8i8_idx5: 1208; GFX7: ; %bb.0: 1209; GFX7-NEXT: s_mov_b32 s0, s2 1210; GFX7-NEXT: s_mov_b32 s1, s3 1211; GFX7-NEXT: s_mov_b32 s2, -1 1212; GFX7-NEXT: s_mov_b32 s3, 0xf000 1213; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:5 1214; GFX7-NEXT: s_waitcnt vmcnt(0) 1215; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1216; GFX7-NEXT: ; return to shader part epilog 1217; 1218; GFX10-LABEL: extractelement_sgpr_v8i8_idx5: 1219; GFX10: ; %bb.0: 1220; GFX10-NEXT: v_mov_b32_e32 v0, 0 1221; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5 1222; GFX10-NEXT: s_waitcnt vmcnt(0) 1223; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1224; GFX10-NEXT: ; return to shader part epilog 1225; 1226; GFX11-LABEL: extractelement_sgpr_v8i8_idx5: 1227; GFX11: ; %bb.0: 1228; GFX11-NEXT: v_mov_b32_e32 v0, 0 1229; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:5 1230; GFX11-NEXT: s_waitcnt vmcnt(0) 1231; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1232; GFX11-NEXT: ; return to shader part epilog 1233 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1234 %element = extractelement <8 x i8> %vector, i32 5 1235 ret i8 %element 1236} 1237 1238define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) { 1239; GFX9-LABEL: extractelement_sgpr_v8i8_idx6: 1240; GFX9: ; %bb.0: 1241; GFX9-NEXT: v_mov_b32_e32 v0, 0 1242; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6 1243; GFX9-NEXT: s_waitcnt vmcnt(0) 1244; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1245; GFX9-NEXT: ; return to shader part epilog 1246; 1247; GFX8-LABEL: extractelement_sgpr_v8i8_idx6: 1248; GFX8: ; %bb.0: 1249; GFX8-NEXT: s_add_u32 s0, s2, 6 1250; GFX8-NEXT: s_addc_u32 s1, s3, 0 1251; GFX8-NEXT: v_mov_b32_e32 v0, s0 1252; GFX8-NEXT: v_mov_b32_e32 v1, s1 1253; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1254; GFX8-NEXT: s_waitcnt vmcnt(0) 1255; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1256; GFX8-NEXT: ; return to shader part epilog 1257; 1258; GFX7-LABEL: extractelement_sgpr_v8i8_idx6: 1259; GFX7: ; %bb.0: 1260; GFX7-NEXT: s_mov_b32 s0, s2 1261; GFX7-NEXT: s_mov_b32 s1, s3 1262; GFX7-NEXT: s_mov_b32 s2, -1 1263; GFX7-NEXT: s_mov_b32 s3, 0xf000 1264; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:6 1265; GFX7-NEXT: s_waitcnt vmcnt(0) 1266; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1267; GFX7-NEXT: ; return to shader part epilog 1268; 1269; GFX10-LABEL: extractelement_sgpr_v8i8_idx6: 1270; GFX10: ; %bb.0: 1271; GFX10-NEXT: v_mov_b32_e32 v0, 0 1272; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6 1273; GFX10-NEXT: s_waitcnt vmcnt(0) 1274; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1275; GFX10-NEXT: ; return to shader part epilog 1276; 1277; GFX11-LABEL: extractelement_sgpr_v8i8_idx6: 1278; GFX11: ; %bb.0: 1279; GFX11-NEXT: v_mov_b32_e32 v0, 0 1280; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:6 1281; GFX11-NEXT: s_waitcnt vmcnt(0) 1282; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1283; GFX11-NEXT: ; return to shader part epilog 1284 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1285 %element = extractelement <8 x i8> %vector, i32 6 1286 ret i8 %element 1287} 1288 1289define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) { 1290; GFX9-LABEL: extractelement_sgpr_v8i8_idx7: 1291; GFX9: ; %bb.0: 1292; GFX9-NEXT: v_mov_b32_e32 v0, 0 1293; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7 1294; GFX9-NEXT: s_waitcnt vmcnt(0) 1295; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1296; GFX9-NEXT: ; return to shader part epilog 1297; 1298; GFX8-LABEL: extractelement_sgpr_v8i8_idx7: 1299; GFX8: ; %bb.0: 1300; GFX8-NEXT: s_add_u32 s0, s2, 7 1301; GFX8-NEXT: s_addc_u32 s1, s3, 0 1302; GFX8-NEXT: v_mov_b32_e32 v0, s0 1303; GFX8-NEXT: v_mov_b32_e32 v1, s1 1304; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1305; GFX8-NEXT: s_waitcnt vmcnt(0) 1306; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1307; GFX8-NEXT: ; return to shader part epilog 1308; 1309; GFX7-LABEL: extractelement_sgpr_v8i8_idx7: 1310; GFX7: ; %bb.0: 1311; GFX7-NEXT: s_mov_b32 s0, s2 1312; GFX7-NEXT: s_mov_b32 s1, s3 1313; GFX7-NEXT: s_mov_b32 s2, -1 1314; GFX7-NEXT: s_mov_b32 s3, 0xf000 1315; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:7 1316; GFX7-NEXT: s_waitcnt vmcnt(0) 1317; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1318; GFX7-NEXT: ; return to shader part epilog 1319; 1320; GFX10-LABEL: extractelement_sgpr_v8i8_idx7: 1321; GFX10: ; %bb.0: 1322; GFX10-NEXT: v_mov_b32_e32 v0, 0 1323; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7 1324; GFX10-NEXT: s_waitcnt vmcnt(0) 1325; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1326; GFX10-NEXT: ; return to shader part epilog 1327; 1328; GFX11-LABEL: extractelement_sgpr_v8i8_idx7: 1329; GFX11: ; %bb.0: 1330; GFX11-NEXT: v_mov_b32_e32 v0, 0 1331; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:7 1332; GFX11-NEXT: s_waitcnt vmcnt(0) 1333; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1334; GFX11-NEXT: ; return to shader part epilog 1335 %vector = load <8 x i8>, ptr addrspace(4) %ptr 1336 %element = extractelement <8 x i8> %vector, i32 7 1337 ret i8 %element 1338} 1339 1340define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) { 1341; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: 1342; GFX9: ; %bb.0: 1343; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1344; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1345; GFX9-NEXT: s_waitcnt vmcnt(0) 1346; GFX9-NEXT: s_setpc_b64 s[30:31] 1347; 1348; GFX8-LABEL: extractelement_vgpr_v8i8_idx0: 1349; GFX8: ; %bb.0: 1350; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1351; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1352; GFX8-NEXT: s_waitcnt vmcnt(0) 1353; GFX8-NEXT: s_setpc_b64 s[30:31] 1354; 1355; GFX7-LABEL: extractelement_vgpr_v8i8_idx0: 1356; GFX7: ; %bb.0: 1357; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1358; GFX7-NEXT: s_mov_b32 s6, 0 1359; GFX7-NEXT: s_mov_b32 s7, 0xf000 1360; GFX7-NEXT: s_mov_b64 s[4:5], 0 1361; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 1362; GFX7-NEXT: s_waitcnt vmcnt(0) 1363; GFX7-NEXT: s_setpc_b64 s[30:31] 1364; 1365; GFX10-LABEL: extractelement_vgpr_v8i8_idx0: 1366; GFX10: ; %bb.0: 1367; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1368; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1369; GFX10-NEXT: s_waitcnt vmcnt(0) 1370; GFX10-NEXT: s_setpc_b64 s[30:31] 1371; 1372; GFX11-LABEL: extractelement_vgpr_v8i8_idx0: 1373; GFX11: ; %bb.0: 1374; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1376; GFX11-NEXT: s_waitcnt vmcnt(0) 1377; GFX11-NEXT: s_setpc_b64 s[30:31] 1378 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1379 %element = extractelement <8 x i8> %vector, i32 0 1380 ret i8 %element 1381} 1382 1383define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) { 1384; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: 1385; GFX9: ; %bb.0: 1386; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1387; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 1388; GFX9-NEXT: s_waitcnt vmcnt(0) 1389; GFX9-NEXT: s_setpc_b64 s[30:31] 1390; 1391; GFX8-LABEL: extractelement_vgpr_v8i8_idx1: 1392; GFX8: ; %bb.0: 1393; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0 1395; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1396; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1397; GFX8-NEXT: s_waitcnt vmcnt(0) 1398; GFX8-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX7-LABEL: extractelement_vgpr_v8i8_idx1: 1401; GFX7: ; %bb.0: 1402; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; GFX7-NEXT: s_mov_b32 s6, 0 1404; GFX7-NEXT: s_mov_b32 s7, 0xf000 1405; GFX7-NEXT: s_mov_b64 s[4:5], 0 1406; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1 1407; GFX7-NEXT: s_waitcnt vmcnt(0) 1408; GFX7-NEXT: s_setpc_b64 s[30:31] 1409; 1410; GFX10-LABEL: extractelement_vgpr_v8i8_idx1: 1411; GFX10: ; %bb.0: 1412; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1413; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 1414; GFX10-NEXT: s_waitcnt vmcnt(0) 1415; GFX10-NEXT: s_setpc_b64 s[30:31] 1416; 1417; GFX11-LABEL: extractelement_vgpr_v8i8_idx1: 1418; GFX11: ; %bb.0: 1419; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1420; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 1421; GFX11-NEXT: s_waitcnt vmcnt(0) 1422; GFX11-NEXT: s_setpc_b64 s[30:31] 1423 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1424 %element = extractelement <8 x i8> %vector, i32 1 1425 ret i8 %element 1426} 1427 1428define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) { 1429; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: 1430; GFX9: ; %bb.0: 1431; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1432; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2 1433; GFX9-NEXT: s_waitcnt vmcnt(0) 1434; GFX9-NEXT: s_setpc_b64 s[30:31] 1435; 1436; GFX8-LABEL: extractelement_vgpr_v8i8_idx2: 1437; GFX8: ; %bb.0: 1438; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1439; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0 1440; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1441; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1442; GFX8-NEXT: s_waitcnt vmcnt(0) 1443; GFX8-NEXT: s_setpc_b64 s[30:31] 1444; 1445; GFX7-LABEL: extractelement_vgpr_v8i8_idx2: 1446; GFX7: ; %bb.0: 1447; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1448; GFX7-NEXT: s_mov_b32 s6, 0 1449; GFX7-NEXT: s_mov_b32 s7, 0xf000 1450; GFX7-NEXT: s_mov_b64 s[4:5], 0 1451; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2 1452; GFX7-NEXT: s_waitcnt vmcnt(0) 1453; GFX7-NEXT: s_setpc_b64 s[30:31] 1454; 1455; GFX10-LABEL: extractelement_vgpr_v8i8_idx2: 1456; GFX10: ; %bb.0: 1457; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1458; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2 1459; GFX10-NEXT: s_waitcnt vmcnt(0) 1460; GFX10-NEXT: s_setpc_b64 s[30:31] 1461; 1462; GFX11-LABEL: extractelement_vgpr_v8i8_idx2: 1463; GFX11: ; %bb.0: 1464; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1465; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2 1466; GFX11-NEXT: s_waitcnt vmcnt(0) 1467; GFX11-NEXT: s_setpc_b64 s[30:31] 1468 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1469 %element = extractelement <8 x i8> %vector, i32 2 1470 ret i8 %element 1471} 1472 1473define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) { 1474; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: 1475; GFX9: ; %bb.0: 1476; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1477; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3 1478; GFX9-NEXT: s_waitcnt vmcnt(0) 1479; GFX9-NEXT: s_setpc_b64 s[30:31] 1480; 1481; GFX8-LABEL: extractelement_vgpr_v8i8_idx3: 1482; GFX8: ; %bb.0: 1483; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0 1485; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1486; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1487; GFX8-NEXT: s_waitcnt vmcnt(0) 1488; GFX8-NEXT: s_setpc_b64 s[30:31] 1489; 1490; GFX7-LABEL: extractelement_vgpr_v8i8_idx3: 1491; GFX7: ; %bb.0: 1492; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1493; GFX7-NEXT: s_mov_b32 s6, 0 1494; GFX7-NEXT: s_mov_b32 s7, 0xf000 1495; GFX7-NEXT: s_mov_b64 s[4:5], 0 1496; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3 1497; GFX7-NEXT: s_waitcnt vmcnt(0) 1498; GFX7-NEXT: s_setpc_b64 s[30:31] 1499; 1500; GFX10-LABEL: extractelement_vgpr_v8i8_idx3: 1501; GFX10: ; %bb.0: 1502; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1503; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3 1504; GFX10-NEXT: s_waitcnt vmcnt(0) 1505; GFX10-NEXT: s_setpc_b64 s[30:31] 1506; 1507; GFX11-LABEL: extractelement_vgpr_v8i8_idx3: 1508; GFX11: ; %bb.0: 1509; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3 1511; GFX11-NEXT: s_waitcnt vmcnt(0) 1512; GFX11-NEXT: s_setpc_b64 s[30:31] 1513 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1514 %element = extractelement <8 x i8> %vector, i32 3 1515 ret i8 %element 1516} 1517 1518define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) { 1519; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: 1520; GFX9: ; %bb.0: 1521; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1522; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4 1523; GFX9-NEXT: s_waitcnt vmcnt(0) 1524; GFX9-NEXT: s_setpc_b64 s[30:31] 1525; 1526; GFX8-LABEL: extractelement_vgpr_v8i8_idx4: 1527; GFX8: ; %bb.0: 1528; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1529; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0 1530; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1531; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1532; GFX8-NEXT: s_waitcnt vmcnt(0) 1533; GFX8-NEXT: s_setpc_b64 s[30:31] 1534; 1535; GFX7-LABEL: extractelement_vgpr_v8i8_idx4: 1536; GFX7: ; %bb.0: 1537; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1538; GFX7-NEXT: s_mov_b32 s6, 0 1539; GFX7-NEXT: s_mov_b32 s7, 0xf000 1540; GFX7-NEXT: s_mov_b64 s[4:5], 0 1541; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4 1542; GFX7-NEXT: s_waitcnt vmcnt(0) 1543; GFX7-NEXT: s_setpc_b64 s[30:31] 1544; 1545; GFX10-LABEL: extractelement_vgpr_v8i8_idx4: 1546; GFX10: ; %bb.0: 1547; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1548; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4 1549; GFX10-NEXT: s_waitcnt vmcnt(0) 1550; GFX10-NEXT: s_setpc_b64 s[30:31] 1551; 1552; GFX11-LABEL: extractelement_vgpr_v8i8_idx4: 1553; GFX11: ; %bb.0: 1554; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1555; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4 1556; GFX11-NEXT: s_waitcnt vmcnt(0) 1557; GFX11-NEXT: s_setpc_b64 s[30:31] 1558 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1559 %element = extractelement <8 x i8> %vector, i32 4 1560 ret i8 %element 1561} 1562 1563define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) { 1564; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: 1565; GFX9: ; %bb.0: 1566; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1567; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5 1568; GFX9-NEXT: s_waitcnt vmcnt(0) 1569; GFX9-NEXT: s_setpc_b64 s[30:31] 1570; 1571; GFX8-LABEL: extractelement_vgpr_v8i8_idx5: 1572; GFX8: ; %bb.0: 1573; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1574; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0 1575; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1576; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1577; GFX8-NEXT: s_waitcnt vmcnt(0) 1578; GFX8-NEXT: s_setpc_b64 s[30:31] 1579; 1580; GFX7-LABEL: extractelement_vgpr_v8i8_idx5: 1581; GFX7: ; %bb.0: 1582; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX7-NEXT: s_mov_b32 s6, 0 1584; GFX7-NEXT: s_mov_b32 s7, 0xf000 1585; GFX7-NEXT: s_mov_b64 s[4:5], 0 1586; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5 1587; GFX7-NEXT: s_waitcnt vmcnt(0) 1588; GFX7-NEXT: s_setpc_b64 s[30:31] 1589; 1590; GFX10-LABEL: extractelement_vgpr_v8i8_idx5: 1591; GFX10: ; %bb.0: 1592; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1593; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5 1594; GFX10-NEXT: s_waitcnt vmcnt(0) 1595; GFX10-NEXT: s_setpc_b64 s[30:31] 1596; 1597; GFX11-LABEL: extractelement_vgpr_v8i8_idx5: 1598; GFX11: ; %bb.0: 1599; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1600; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5 1601; GFX11-NEXT: s_waitcnt vmcnt(0) 1602; GFX11-NEXT: s_setpc_b64 s[30:31] 1603 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1604 %element = extractelement <8 x i8> %vector, i32 5 1605 ret i8 %element 1606} 1607 1608define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) { 1609; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: 1610; GFX9: ; %bb.0: 1611; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1612; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6 1613; GFX9-NEXT: s_waitcnt vmcnt(0) 1614; GFX9-NEXT: s_setpc_b64 s[30:31] 1615; 1616; GFX8-LABEL: extractelement_vgpr_v8i8_idx6: 1617; GFX8: ; %bb.0: 1618; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1619; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0 1620; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1621; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1622; GFX8-NEXT: s_waitcnt vmcnt(0) 1623; GFX8-NEXT: s_setpc_b64 s[30:31] 1624; 1625; GFX7-LABEL: extractelement_vgpr_v8i8_idx6: 1626; GFX7: ; %bb.0: 1627; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1628; GFX7-NEXT: s_mov_b32 s6, 0 1629; GFX7-NEXT: s_mov_b32 s7, 0xf000 1630; GFX7-NEXT: s_mov_b64 s[4:5], 0 1631; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6 1632; GFX7-NEXT: s_waitcnt vmcnt(0) 1633; GFX7-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX10-LABEL: extractelement_vgpr_v8i8_idx6: 1636; GFX10: ; %bb.0: 1637; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6 1639; GFX10-NEXT: s_waitcnt vmcnt(0) 1640; GFX10-NEXT: s_setpc_b64 s[30:31] 1641; 1642; GFX11-LABEL: extractelement_vgpr_v8i8_idx6: 1643; GFX11: ; %bb.0: 1644; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1645; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6 1646; GFX11-NEXT: s_waitcnt vmcnt(0) 1647; GFX11-NEXT: s_setpc_b64 s[30:31] 1648 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1649 %element = extractelement <8 x i8> %vector, i32 6 1650 ret i8 %element 1651} 1652 1653define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) { 1654; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: 1655; GFX9: ; %bb.0: 1656; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1657; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7 1658; GFX9-NEXT: s_waitcnt vmcnt(0) 1659; GFX9-NEXT: s_setpc_b64 s[30:31] 1660; 1661; GFX8-LABEL: extractelement_vgpr_v8i8_idx7: 1662; GFX8: ; %bb.0: 1663; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1664; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0 1665; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1666; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1667; GFX8-NEXT: s_waitcnt vmcnt(0) 1668; GFX8-NEXT: s_setpc_b64 s[30:31] 1669; 1670; GFX7-LABEL: extractelement_vgpr_v8i8_idx7: 1671; GFX7: ; %bb.0: 1672; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1673; GFX7-NEXT: s_mov_b32 s6, 0 1674; GFX7-NEXT: s_mov_b32 s7, 0xf000 1675; GFX7-NEXT: s_mov_b64 s[4:5], 0 1676; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7 1677; GFX7-NEXT: s_waitcnt vmcnt(0) 1678; GFX7-NEXT: s_setpc_b64 s[30:31] 1679; 1680; GFX10-LABEL: extractelement_vgpr_v8i8_idx7: 1681; GFX10: ; %bb.0: 1682; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1683; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7 1684; GFX10-NEXT: s_waitcnt vmcnt(0) 1685; GFX10-NEXT: s_setpc_b64 s[30:31] 1686; 1687; GFX11-LABEL: extractelement_vgpr_v8i8_idx7: 1688; GFX11: ; %bb.0: 1689; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1690; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7 1691; GFX11-NEXT: s_waitcnt vmcnt(0) 1692; GFX11-NEXT: s_setpc_b64 s[30:31] 1693 %vector = load <8 x i8>, ptr addrspace(1) %ptr 1694 %element = extractelement <8 x i8> %vector, i32 7 1695 ret i8 %element 1696} 1697 1698define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { 1699; GFX9-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1700; GFX9: ; %bb.0: 1701; GFX9-NEXT: s_and_b32 s0, s4, 15 1702; GFX9-NEXT: s_ashr_i32 s1, s0, 31 1703; GFX9-NEXT: s_add_u32 s0, s2, s0 1704; GFX9-NEXT: s_addc_u32 s1, s3, s1 1705; GFX9-NEXT: v_mov_b32_e32 v0, 0 1706; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] 1707; GFX9-NEXT: s_waitcnt vmcnt(0) 1708; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1709; GFX9-NEXT: ; return to shader part epilog 1710; 1711; GFX8-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1712; GFX8: ; %bb.0: 1713; GFX8-NEXT: s_and_b32 s0, s4, 15 1714; GFX8-NEXT: s_ashr_i32 s1, s0, 31 1715; GFX8-NEXT: s_add_u32 s0, s2, s0 1716; GFX8-NEXT: s_addc_u32 s1, s3, s1 1717; GFX8-NEXT: v_mov_b32_e32 v0, s0 1718; GFX8-NEXT: v_mov_b32_e32 v1, s1 1719; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1720; GFX8-NEXT: s_waitcnt vmcnt(0) 1721; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1722; GFX8-NEXT: ; return to shader part epilog 1723; 1724; GFX7-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1725; GFX7: ; %bb.0: 1726; GFX7-NEXT: s_and_b32 s4, s4, 15 1727; GFX7-NEXT: s_ashr_i32 s5, s4, 31 1728; GFX7-NEXT: v_mov_b32_e32 v0, s4 1729; GFX7-NEXT: s_mov_b32 s0, s2 1730; GFX7-NEXT: s_mov_b32 s1, s3 1731; GFX7-NEXT: s_mov_b32 s2, 0 1732; GFX7-NEXT: s_mov_b32 s3, 0xf000 1733; GFX7-NEXT: v_mov_b32_e32 v1, s5 1734; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 1735; GFX7-NEXT: s_waitcnt vmcnt(0) 1736; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1737; GFX7-NEXT: ; return to shader part epilog 1738; 1739; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1740; GFX10: ; %bb.0: 1741; GFX10-NEXT: s_and_b32 s0, s4, 15 1742; GFX10-NEXT: v_mov_b32_e32 v0, 0 1743; GFX10-NEXT: s_ashr_i32 s1, s0, 31 1744; GFX10-NEXT: s_add_u32 s0, s2, s0 1745; GFX10-NEXT: s_addc_u32 s1, s3, s1 1746; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] 1747; GFX10-NEXT: s_waitcnt vmcnt(0) 1748; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1749; GFX10-NEXT: ; return to shader part epilog 1750; 1751; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1752; GFX11: ; %bb.0: 1753; GFX11-NEXT: s_and_b32 s0, s4, 15 1754; GFX11-NEXT: v_mov_b32_e32 v0, 0 1755; GFX11-NEXT: s_ashr_i32 s1, s0, 31 1756; GFX11-NEXT: s_add_u32 s0, s2, s0 1757; GFX11-NEXT: s_addc_u32 s1, s3, s1 1758; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] 1759; GFX11-NEXT: s_waitcnt vmcnt(0) 1760; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1761; GFX11-NEXT: ; return to shader part epilog 1762 %vector = load <16 x i8>, ptr addrspace(4) %ptr 1763 %element = extractelement <16 x i8> %vector, i32 %idx 1764 ret i8 %element 1765} 1766 1767define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) { 1768; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1769; GFX9: ; %bb.0: 1770; GFX9-NEXT: s_and_b32 s0, s2, 15 1771; GFX9-NEXT: s_ashr_i32 s1, s0, 31 1772; GFX9-NEXT: v_mov_b32_e32 v3, s1 1773; GFX9-NEXT: v_mov_b32_e32 v2, s0 1774; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1775; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 1776; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1777; GFX9-NEXT: s_waitcnt vmcnt(0) 1778; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1779; GFX9-NEXT: ; return to shader part epilog 1780; 1781; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1782; GFX8: ; %bb.0: 1783; GFX8-NEXT: s_and_b32 s0, s2, 15 1784; GFX8-NEXT: s_ashr_i32 s1, s0, 31 1785; GFX8-NEXT: v_mov_b32_e32 v3, s1 1786; GFX8-NEXT: v_mov_b32_e32 v2, s0 1787; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1788; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 1789; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1790; GFX8-NEXT: s_waitcnt vmcnt(0) 1791; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1792; GFX8-NEXT: ; return to shader part epilog 1793; 1794; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1795; GFX7: ; %bb.0: 1796; GFX7-NEXT: s_and_b32 s0, s2, 15 1797; GFX7-NEXT: s_ashr_i32 s1, s0, 31 1798; GFX7-NEXT: s_mov_b32 s2, 0 1799; GFX7-NEXT: s_mov_b32 s3, 0xf000 1800; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 1801; GFX7-NEXT: s_waitcnt vmcnt(0) 1802; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1803; GFX7-NEXT: ; return to shader part epilog 1804; 1805; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1806; GFX10: ; %bb.0: 1807; GFX10-NEXT: s_and_b32 s0, s2, 15 1808; GFX10-NEXT: s_ashr_i32 s1, s0, 31 1809; GFX10-NEXT: v_mov_b32_e32 v3, s1 1810; GFX10-NEXT: v_mov_b32_e32 v2, s0 1811; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1812; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1813; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1814; GFX10-NEXT: s_waitcnt vmcnt(0) 1815; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1816; GFX10-NEXT: ; return to shader part epilog 1817; 1818; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1819; GFX11: ; %bb.0: 1820; GFX11-NEXT: s_and_b32 s0, s2, 15 1821; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1822; GFX11-NEXT: s_ashr_i32 s1, s0, 31 1823; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 1824; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1825; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1826; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1827; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1828; GFX11-NEXT: s_waitcnt vmcnt(0) 1829; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1830; GFX11-NEXT: ; return to shader part epilog 1831 %vector = load <16 x i8>, ptr addrspace(1) %ptr 1832 %element = extractelement <16 x i8> %vector, i32 %idx 1833 ret i8 %element 1834} 1835 1836define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) { 1837; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1838; GFX9: ; %bb.0: 1839; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1840; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 1841; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1842; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1843; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 1844; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1845; GFX9-NEXT: s_waitcnt vmcnt(0) 1846; GFX9-NEXT: s_setpc_b64 s[30:31] 1847; 1848; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1849; GFX8: ; %bb.0: 1850; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1851; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 1852; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1853; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1854; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 1855; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1856; GFX8-NEXT: s_waitcnt vmcnt(0) 1857; GFX8-NEXT: s_setpc_b64 s[30:31] 1858; 1859; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1860; GFX7: ; %bb.0: 1861; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1862; GFX7-NEXT: v_and_b32_e32 v2, 15, v2 1863; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1864; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1865; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 1866; GFX7-NEXT: s_mov_b32 s6, 0 1867; GFX7-NEXT: s_mov_b32 s7, 0xf000 1868; GFX7-NEXT: s_mov_b64 s[4:5], 0 1869; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 1870; GFX7-NEXT: s_waitcnt vmcnt(0) 1871; GFX7-NEXT: s_setpc_b64 s[30:31] 1872; 1873; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1874; GFX10: ; %bb.0: 1875; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1876; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 1877; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1878; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1879; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1880; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1881; GFX10-NEXT: s_waitcnt vmcnt(0) 1882; GFX10-NEXT: s_setpc_b64 s[30:31] 1883; 1884; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1885; GFX11: ; %bb.0: 1886; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1887; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 1888; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1889; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1890; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1891; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1892; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1893; GFX11-NEXT: s_waitcnt vmcnt(0) 1894; GFX11-NEXT: s_setpc_b64 s[30:31] 1895 %vector = load <16 x i8>, ptr addrspace(1) %ptr 1896 %element = extractelement <16 x i8> %vector, i32 %idx 1897 ret i8 %element 1898} 1899 1900define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) { 1901; GFX9-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 1902; GFX9: ; %bb.0: 1903; GFX9-NEXT: v_and_b32_e32 v2, 15, v0 1904; GFX9-NEXT: v_mov_b32_e32 v0, s2 1905; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1906; GFX9-NEXT: v_mov_b32_e32 v1, s3 1907; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1908; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 1909; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1910; GFX9-NEXT: s_waitcnt vmcnt(0) 1911; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1912; GFX9-NEXT: ; return to shader part epilog 1913; 1914; GFX8-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 1915; GFX8: ; %bb.0: 1916; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 1917; GFX8-NEXT: v_mov_b32_e32 v0, s2 1918; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1919; GFX8-NEXT: v_mov_b32_e32 v1, s3 1920; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1921; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 1922; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1923; GFX8-NEXT: s_waitcnt vmcnt(0) 1924; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1925; GFX8-NEXT: ; return to shader part epilog 1926; 1927; GFX7-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 1928; GFX7: ; %bb.0: 1929; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 1930; GFX7-NEXT: s_mov_b32 s0, s2 1931; GFX7-NEXT: s_mov_b32 s1, s3 1932; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1933; GFX7-NEXT: s_mov_b32 s2, 0 1934; GFX7-NEXT: s_mov_b32 s3, 0xf000 1935; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 1936; GFX7-NEXT: s_waitcnt vmcnt(0) 1937; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1938; GFX7-NEXT: ; return to shader part epilog 1939; 1940; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 1941; GFX10: ; %bb.0: 1942; GFX10-NEXT: v_and_b32_e32 v2, 15, v0 1943; GFX10-NEXT: v_mov_b32_e32 v0, s2 1944; GFX10-NEXT: v_mov_b32_e32 v1, s3 1945; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1946; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1947; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1948; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1949; GFX10-NEXT: s_waitcnt vmcnt(0) 1950; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1951; GFX10-NEXT: ; return to shader part epilog 1952; 1953; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 1954; GFX11: ; %bb.0: 1955; GFX11-NEXT: v_and_b32_e32 v2, 15, v0 1956; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 1957; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1958; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1959; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 1960; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1961; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 1962; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1963; GFX11-NEXT: s_waitcnt vmcnt(0) 1964; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1965; GFX11-NEXT: ; return to shader part epilog 1966 %vector = load <16 x i8>, ptr addrspace(4) %ptr 1967 %element = extractelement <16 x i8> %vector, i32 %idx 1968 ret i8 %element 1969} 1970 1971define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) { 1972; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: 1973; GFX9: ; %bb.0: 1974; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1975; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1976; GFX9-NEXT: s_waitcnt vmcnt(0) 1977; GFX9-NEXT: s_setpc_b64 s[30:31] 1978; 1979; GFX8-LABEL: extractelement_vgpr_v16i8_idx0: 1980; GFX8: ; %bb.0: 1981; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1982; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1983; GFX8-NEXT: s_waitcnt vmcnt(0) 1984; GFX8-NEXT: s_setpc_b64 s[30:31] 1985; 1986; GFX7-LABEL: extractelement_vgpr_v16i8_idx0: 1987; GFX7: ; %bb.0: 1988; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1989; GFX7-NEXT: s_mov_b32 s6, 0 1990; GFX7-NEXT: s_mov_b32 s7, 0xf000 1991; GFX7-NEXT: s_mov_b64 s[4:5], 0 1992; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 1993; GFX7-NEXT: s_waitcnt vmcnt(0) 1994; GFX7-NEXT: s_setpc_b64 s[30:31] 1995; 1996; GFX10-LABEL: extractelement_vgpr_v16i8_idx0: 1997; GFX10: ; %bb.0: 1998; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1999; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 2000; GFX10-NEXT: s_waitcnt vmcnt(0) 2001; GFX10-NEXT: s_setpc_b64 s[30:31] 2002; 2003; GFX11-LABEL: extractelement_vgpr_v16i8_idx0: 2004; GFX11: ; %bb.0: 2005; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2006; GFX11-NEXT: global_load_u8 v0, v[0:1], off 2007; GFX11-NEXT: s_waitcnt vmcnt(0) 2008; GFX11-NEXT: s_setpc_b64 s[30:31] 2009 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2010 %element = extractelement <16 x i8> %vector, i32 0 2011 ret i8 %element 2012} 2013 2014define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) { 2015; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: 2016; GFX9: ; %bb.0: 2017; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2018; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 2019; GFX9-NEXT: s_waitcnt vmcnt(0) 2020; GFX9-NEXT: s_setpc_b64 s[30:31] 2021; 2022; GFX8-LABEL: extractelement_vgpr_v16i8_idx1: 2023; GFX8: ; %bb.0: 2024; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2025; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0 2026; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2027; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2028; GFX8-NEXT: s_waitcnt vmcnt(0) 2029; GFX8-NEXT: s_setpc_b64 s[30:31] 2030; 2031; GFX7-LABEL: extractelement_vgpr_v16i8_idx1: 2032; GFX7: ; %bb.0: 2033; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2034; GFX7-NEXT: s_mov_b32 s6, 0 2035; GFX7-NEXT: s_mov_b32 s7, 0xf000 2036; GFX7-NEXT: s_mov_b64 s[4:5], 0 2037; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1 2038; GFX7-NEXT: s_waitcnt vmcnt(0) 2039; GFX7-NEXT: s_setpc_b64 s[30:31] 2040; 2041; GFX10-LABEL: extractelement_vgpr_v16i8_idx1: 2042; GFX10: ; %bb.0: 2043; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2044; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 2045; GFX10-NEXT: s_waitcnt vmcnt(0) 2046; GFX10-NEXT: s_setpc_b64 s[30:31] 2047; 2048; GFX11-LABEL: extractelement_vgpr_v16i8_idx1: 2049; GFX11: ; %bb.0: 2050; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2051; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 2052; GFX11-NEXT: s_waitcnt vmcnt(0) 2053; GFX11-NEXT: s_setpc_b64 s[30:31] 2054 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2055 %element = extractelement <16 x i8> %vector, i32 1 2056 ret i8 %element 2057} 2058 2059define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) { 2060; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: 2061; GFX9: ; %bb.0: 2062; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2063; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2 2064; GFX9-NEXT: s_waitcnt vmcnt(0) 2065; GFX9-NEXT: s_setpc_b64 s[30:31] 2066; 2067; GFX8-LABEL: extractelement_vgpr_v16i8_idx2: 2068; GFX8: ; %bb.0: 2069; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2070; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0 2071; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2072; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2073; GFX8-NEXT: s_waitcnt vmcnt(0) 2074; GFX8-NEXT: s_setpc_b64 s[30:31] 2075; 2076; GFX7-LABEL: extractelement_vgpr_v16i8_idx2: 2077; GFX7: ; %bb.0: 2078; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2079; GFX7-NEXT: s_mov_b32 s6, 0 2080; GFX7-NEXT: s_mov_b32 s7, 0xf000 2081; GFX7-NEXT: s_mov_b64 s[4:5], 0 2082; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2 2083; GFX7-NEXT: s_waitcnt vmcnt(0) 2084; GFX7-NEXT: s_setpc_b64 s[30:31] 2085; 2086; GFX10-LABEL: extractelement_vgpr_v16i8_idx2: 2087; GFX10: ; %bb.0: 2088; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2089; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2 2090; GFX10-NEXT: s_waitcnt vmcnt(0) 2091; GFX10-NEXT: s_setpc_b64 s[30:31] 2092; 2093; GFX11-LABEL: extractelement_vgpr_v16i8_idx2: 2094; GFX11: ; %bb.0: 2095; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2096; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2 2097; GFX11-NEXT: s_waitcnt vmcnt(0) 2098; GFX11-NEXT: s_setpc_b64 s[30:31] 2099 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2100 %element = extractelement <16 x i8> %vector, i32 2 2101 ret i8 %element 2102} 2103 2104define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) { 2105; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: 2106; GFX9: ; %bb.0: 2107; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2108; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3 2109; GFX9-NEXT: s_waitcnt vmcnt(0) 2110; GFX9-NEXT: s_setpc_b64 s[30:31] 2111; 2112; GFX8-LABEL: extractelement_vgpr_v16i8_idx3: 2113; GFX8: ; %bb.0: 2114; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2115; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0 2116; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2117; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2118; GFX8-NEXT: s_waitcnt vmcnt(0) 2119; GFX8-NEXT: s_setpc_b64 s[30:31] 2120; 2121; GFX7-LABEL: extractelement_vgpr_v16i8_idx3: 2122; GFX7: ; %bb.0: 2123; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2124; GFX7-NEXT: s_mov_b32 s6, 0 2125; GFX7-NEXT: s_mov_b32 s7, 0xf000 2126; GFX7-NEXT: s_mov_b64 s[4:5], 0 2127; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3 2128; GFX7-NEXT: s_waitcnt vmcnt(0) 2129; GFX7-NEXT: s_setpc_b64 s[30:31] 2130; 2131; GFX10-LABEL: extractelement_vgpr_v16i8_idx3: 2132; GFX10: ; %bb.0: 2133; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2134; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3 2135; GFX10-NEXT: s_waitcnt vmcnt(0) 2136; GFX10-NEXT: s_setpc_b64 s[30:31] 2137; 2138; GFX11-LABEL: extractelement_vgpr_v16i8_idx3: 2139; GFX11: ; %bb.0: 2140; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2141; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3 2142; GFX11-NEXT: s_waitcnt vmcnt(0) 2143; GFX11-NEXT: s_setpc_b64 s[30:31] 2144 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2145 %element = extractelement <16 x i8> %vector, i32 3 2146 ret i8 %element 2147} 2148 2149define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) { 2150; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: 2151; GFX9: ; %bb.0: 2152; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2153; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4 2154; GFX9-NEXT: s_waitcnt vmcnt(0) 2155; GFX9-NEXT: s_setpc_b64 s[30:31] 2156; 2157; GFX8-LABEL: extractelement_vgpr_v16i8_idx4: 2158; GFX8: ; %bb.0: 2159; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0 2161; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2162; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2163; GFX8-NEXT: s_waitcnt vmcnt(0) 2164; GFX8-NEXT: s_setpc_b64 s[30:31] 2165; 2166; GFX7-LABEL: extractelement_vgpr_v16i8_idx4: 2167; GFX7: ; %bb.0: 2168; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2169; GFX7-NEXT: s_mov_b32 s6, 0 2170; GFX7-NEXT: s_mov_b32 s7, 0xf000 2171; GFX7-NEXT: s_mov_b64 s[4:5], 0 2172; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4 2173; GFX7-NEXT: s_waitcnt vmcnt(0) 2174; GFX7-NEXT: s_setpc_b64 s[30:31] 2175; 2176; GFX10-LABEL: extractelement_vgpr_v16i8_idx4: 2177; GFX10: ; %bb.0: 2178; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2179; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4 2180; GFX10-NEXT: s_waitcnt vmcnt(0) 2181; GFX10-NEXT: s_setpc_b64 s[30:31] 2182; 2183; GFX11-LABEL: extractelement_vgpr_v16i8_idx4: 2184; GFX11: ; %bb.0: 2185; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2186; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4 2187; GFX11-NEXT: s_waitcnt vmcnt(0) 2188; GFX11-NEXT: s_setpc_b64 s[30:31] 2189 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2190 %element = extractelement <16 x i8> %vector, i32 4 2191 ret i8 %element 2192} 2193 2194define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) { 2195; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: 2196; GFX9: ; %bb.0: 2197; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2198; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5 2199; GFX9-NEXT: s_waitcnt vmcnt(0) 2200; GFX9-NEXT: s_setpc_b64 s[30:31] 2201; 2202; GFX8-LABEL: extractelement_vgpr_v16i8_idx5: 2203; GFX8: ; %bb.0: 2204; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2205; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0 2206; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2207; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2208; GFX8-NEXT: s_waitcnt vmcnt(0) 2209; GFX8-NEXT: s_setpc_b64 s[30:31] 2210; 2211; GFX7-LABEL: extractelement_vgpr_v16i8_idx5: 2212; GFX7: ; %bb.0: 2213; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2214; GFX7-NEXT: s_mov_b32 s6, 0 2215; GFX7-NEXT: s_mov_b32 s7, 0xf000 2216; GFX7-NEXT: s_mov_b64 s[4:5], 0 2217; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5 2218; GFX7-NEXT: s_waitcnt vmcnt(0) 2219; GFX7-NEXT: s_setpc_b64 s[30:31] 2220; 2221; GFX10-LABEL: extractelement_vgpr_v16i8_idx5: 2222; GFX10: ; %bb.0: 2223; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2224; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5 2225; GFX10-NEXT: s_waitcnt vmcnt(0) 2226; GFX10-NEXT: s_setpc_b64 s[30:31] 2227; 2228; GFX11-LABEL: extractelement_vgpr_v16i8_idx5: 2229; GFX11: ; %bb.0: 2230; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2231; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5 2232; GFX11-NEXT: s_waitcnt vmcnt(0) 2233; GFX11-NEXT: s_setpc_b64 s[30:31] 2234 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2235 %element = extractelement <16 x i8> %vector, i32 5 2236 ret i8 %element 2237} 2238 2239define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) { 2240; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: 2241; GFX9: ; %bb.0: 2242; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2243; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6 2244; GFX9-NEXT: s_waitcnt vmcnt(0) 2245; GFX9-NEXT: s_setpc_b64 s[30:31] 2246; 2247; GFX8-LABEL: extractelement_vgpr_v16i8_idx6: 2248; GFX8: ; %bb.0: 2249; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0 2251; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2252; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2253; GFX8-NEXT: s_waitcnt vmcnt(0) 2254; GFX8-NEXT: s_setpc_b64 s[30:31] 2255; 2256; GFX7-LABEL: extractelement_vgpr_v16i8_idx6: 2257; GFX7: ; %bb.0: 2258; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2259; GFX7-NEXT: s_mov_b32 s6, 0 2260; GFX7-NEXT: s_mov_b32 s7, 0xf000 2261; GFX7-NEXT: s_mov_b64 s[4:5], 0 2262; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6 2263; GFX7-NEXT: s_waitcnt vmcnt(0) 2264; GFX7-NEXT: s_setpc_b64 s[30:31] 2265; 2266; GFX10-LABEL: extractelement_vgpr_v16i8_idx6: 2267; GFX10: ; %bb.0: 2268; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2269; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6 2270; GFX10-NEXT: s_waitcnt vmcnt(0) 2271; GFX10-NEXT: s_setpc_b64 s[30:31] 2272; 2273; GFX11-LABEL: extractelement_vgpr_v16i8_idx6: 2274; GFX11: ; %bb.0: 2275; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2276; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6 2277; GFX11-NEXT: s_waitcnt vmcnt(0) 2278; GFX11-NEXT: s_setpc_b64 s[30:31] 2279 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2280 %element = extractelement <16 x i8> %vector, i32 6 2281 ret i8 %element 2282} 2283 2284define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) { 2285; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: 2286; GFX9: ; %bb.0: 2287; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7 2289; GFX9-NEXT: s_waitcnt vmcnt(0) 2290; GFX9-NEXT: s_setpc_b64 s[30:31] 2291; 2292; GFX8-LABEL: extractelement_vgpr_v16i8_idx7: 2293; GFX8: ; %bb.0: 2294; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2295; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0 2296; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2297; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2298; GFX8-NEXT: s_waitcnt vmcnt(0) 2299; GFX8-NEXT: s_setpc_b64 s[30:31] 2300; 2301; GFX7-LABEL: extractelement_vgpr_v16i8_idx7: 2302; GFX7: ; %bb.0: 2303; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2304; GFX7-NEXT: s_mov_b32 s6, 0 2305; GFX7-NEXT: s_mov_b32 s7, 0xf000 2306; GFX7-NEXT: s_mov_b64 s[4:5], 0 2307; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7 2308; GFX7-NEXT: s_waitcnt vmcnt(0) 2309; GFX7-NEXT: s_setpc_b64 s[30:31] 2310; 2311; GFX10-LABEL: extractelement_vgpr_v16i8_idx7: 2312; GFX10: ; %bb.0: 2313; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2314; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7 2315; GFX10-NEXT: s_waitcnt vmcnt(0) 2316; GFX10-NEXT: s_setpc_b64 s[30:31] 2317; 2318; GFX11-LABEL: extractelement_vgpr_v16i8_idx7: 2319; GFX11: ; %bb.0: 2320; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2321; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7 2322; GFX11-NEXT: s_waitcnt vmcnt(0) 2323; GFX11-NEXT: s_setpc_b64 s[30:31] 2324 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2325 %element = extractelement <16 x i8> %vector, i32 7 2326 ret i8 %element 2327} 2328 2329define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) { 2330; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: 2331; GFX9: ; %bb.0: 2332; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2333; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:8 2334; GFX9-NEXT: s_waitcnt vmcnt(0) 2335; GFX9-NEXT: s_setpc_b64 s[30:31] 2336; 2337; GFX8-LABEL: extractelement_vgpr_v16i8_idx8: 2338; GFX8: ; %bb.0: 2339; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2340; GFX8-NEXT: v_add_u32_e32 v0, vcc, 8, v0 2341; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2342; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2343; GFX8-NEXT: s_waitcnt vmcnt(0) 2344; GFX8-NEXT: s_setpc_b64 s[30:31] 2345; 2346; GFX7-LABEL: extractelement_vgpr_v16i8_idx8: 2347; GFX7: ; %bb.0: 2348; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2349; GFX7-NEXT: s_mov_b32 s6, 0 2350; GFX7-NEXT: s_mov_b32 s7, 0xf000 2351; GFX7-NEXT: s_mov_b64 s[4:5], 0 2352; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8 2353; GFX7-NEXT: s_waitcnt vmcnt(0) 2354; GFX7-NEXT: s_setpc_b64 s[30:31] 2355; 2356; GFX10-LABEL: extractelement_vgpr_v16i8_idx8: 2357; GFX10: ; %bb.0: 2358; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2359; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:8 2360; GFX10-NEXT: s_waitcnt vmcnt(0) 2361; GFX10-NEXT: s_setpc_b64 s[30:31] 2362; 2363; GFX11-LABEL: extractelement_vgpr_v16i8_idx8: 2364; GFX11: ; %bb.0: 2365; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2366; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:8 2367; GFX11-NEXT: s_waitcnt vmcnt(0) 2368; GFX11-NEXT: s_setpc_b64 s[30:31] 2369 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2370 %element = extractelement <16 x i8> %vector, i32 8 2371 ret i8 %element 2372} 2373 2374define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) { 2375; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: 2376; GFX9: ; %bb.0: 2377; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2378; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:9 2379; GFX9-NEXT: s_waitcnt vmcnt(0) 2380; GFX9-NEXT: s_setpc_b64 s[30:31] 2381; 2382; GFX8-LABEL: extractelement_vgpr_v16i8_idx9: 2383; GFX8: ; %bb.0: 2384; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2385; GFX8-NEXT: v_add_u32_e32 v0, vcc, 9, v0 2386; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2387; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2388; GFX8-NEXT: s_waitcnt vmcnt(0) 2389; GFX8-NEXT: s_setpc_b64 s[30:31] 2390; 2391; GFX7-LABEL: extractelement_vgpr_v16i8_idx9: 2392; GFX7: ; %bb.0: 2393; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GFX7-NEXT: s_mov_b32 s6, 0 2395; GFX7-NEXT: s_mov_b32 s7, 0xf000 2396; GFX7-NEXT: s_mov_b64 s[4:5], 0 2397; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:9 2398; GFX7-NEXT: s_waitcnt vmcnt(0) 2399; GFX7-NEXT: s_setpc_b64 s[30:31] 2400; 2401; GFX10-LABEL: extractelement_vgpr_v16i8_idx9: 2402; GFX10: ; %bb.0: 2403; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2404; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:9 2405; GFX10-NEXT: s_waitcnt vmcnt(0) 2406; GFX10-NEXT: s_setpc_b64 s[30:31] 2407; 2408; GFX11-LABEL: extractelement_vgpr_v16i8_idx9: 2409; GFX11: ; %bb.0: 2410; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2411; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:9 2412; GFX11-NEXT: s_waitcnt vmcnt(0) 2413; GFX11-NEXT: s_setpc_b64 s[30:31] 2414 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2415 %element = extractelement <16 x i8> %vector, i32 9 2416 ret i8 %element 2417} 2418 2419define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) { 2420; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: 2421; GFX9: ; %bb.0: 2422; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2423; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:10 2424; GFX9-NEXT: s_waitcnt vmcnt(0) 2425; GFX9-NEXT: s_setpc_b64 s[30:31] 2426; 2427; GFX8-LABEL: extractelement_vgpr_v16i8_idx10: 2428; GFX8: ; %bb.0: 2429; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2430; GFX8-NEXT: v_add_u32_e32 v0, vcc, 10, v0 2431; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2432; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2433; GFX8-NEXT: s_waitcnt vmcnt(0) 2434; GFX8-NEXT: s_setpc_b64 s[30:31] 2435; 2436; GFX7-LABEL: extractelement_vgpr_v16i8_idx10: 2437; GFX7: ; %bb.0: 2438; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2439; GFX7-NEXT: s_mov_b32 s6, 0 2440; GFX7-NEXT: s_mov_b32 s7, 0xf000 2441; GFX7-NEXT: s_mov_b64 s[4:5], 0 2442; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:10 2443; GFX7-NEXT: s_waitcnt vmcnt(0) 2444; GFX7-NEXT: s_setpc_b64 s[30:31] 2445; 2446; GFX10-LABEL: extractelement_vgpr_v16i8_idx10: 2447; GFX10: ; %bb.0: 2448; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2449; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:10 2450; GFX10-NEXT: s_waitcnt vmcnt(0) 2451; GFX10-NEXT: s_setpc_b64 s[30:31] 2452; 2453; GFX11-LABEL: extractelement_vgpr_v16i8_idx10: 2454; GFX11: ; %bb.0: 2455; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2456; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:10 2457; GFX11-NEXT: s_waitcnt vmcnt(0) 2458; GFX11-NEXT: s_setpc_b64 s[30:31] 2459 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2460 %element = extractelement <16 x i8> %vector, i32 10 2461 ret i8 %element 2462} 2463 2464define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) { 2465; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: 2466; GFX9: ; %bb.0: 2467; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2468; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:11 2469; GFX9-NEXT: s_waitcnt vmcnt(0) 2470; GFX9-NEXT: s_setpc_b64 s[30:31] 2471; 2472; GFX8-LABEL: extractelement_vgpr_v16i8_idx11: 2473; GFX8: ; %bb.0: 2474; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2475; GFX8-NEXT: v_add_u32_e32 v0, vcc, 11, v0 2476; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2477; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2478; GFX8-NEXT: s_waitcnt vmcnt(0) 2479; GFX8-NEXT: s_setpc_b64 s[30:31] 2480; 2481; GFX7-LABEL: extractelement_vgpr_v16i8_idx11: 2482; GFX7: ; %bb.0: 2483; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2484; GFX7-NEXT: s_mov_b32 s6, 0 2485; GFX7-NEXT: s_mov_b32 s7, 0xf000 2486; GFX7-NEXT: s_mov_b64 s[4:5], 0 2487; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11 2488; GFX7-NEXT: s_waitcnt vmcnt(0) 2489; GFX7-NEXT: s_setpc_b64 s[30:31] 2490; 2491; GFX10-LABEL: extractelement_vgpr_v16i8_idx11: 2492; GFX10: ; %bb.0: 2493; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2494; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:11 2495; GFX10-NEXT: s_waitcnt vmcnt(0) 2496; GFX10-NEXT: s_setpc_b64 s[30:31] 2497; 2498; GFX11-LABEL: extractelement_vgpr_v16i8_idx11: 2499; GFX11: ; %bb.0: 2500; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2501; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:11 2502; GFX11-NEXT: s_waitcnt vmcnt(0) 2503; GFX11-NEXT: s_setpc_b64 s[30:31] 2504 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2505 %element = extractelement <16 x i8> %vector, i32 11 2506 ret i8 %element 2507} 2508 2509define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) { 2510; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: 2511; GFX9: ; %bb.0: 2512; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2513; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:12 2514; GFX9-NEXT: s_waitcnt vmcnt(0) 2515; GFX9-NEXT: s_setpc_b64 s[30:31] 2516; 2517; GFX8-LABEL: extractelement_vgpr_v16i8_idx12: 2518; GFX8: ; %bb.0: 2519; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2520; GFX8-NEXT: v_add_u32_e32 v0, vcc, 12, v0 2521; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2522; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2523; GFX8-NEXT: s_waitcnt vmcnt(0) 2524; GFX8-NEXT: s_setpc_b64 s[30:31] 2525; 2526; GFX7-LABEL: extractelement_vgpr_v16i8_idx12: 2527; GFX7: ; %bb.0: 2528; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2529; GFX7-NEXT: s_mov_b32 s6, 0 2530; GFX7-NEXT: s_mov_b32 s7, 0xf000 2531; GFX7-NEXT: s_mov_b64 s[4:5], 0 2532; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:12 2533; GFX7-NEXT: s_waitcnt vmcnt(0) 2534; GFX7-NEXT: s_setpc_b64 s[30:31] 2535; 2536; GFX10-LABEL: extractelement_vgpr_v16i8_idx12: 2537; GFX10: ; %bb.0: 2538; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2539; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:12 2540; GFX10-NEXT: s_waitcnt vmcnt(0) 2541; GFX10-NEXT: s_setpc_b64 s[30:31] 2542; 2543; GFX11-LABEL: extractelement_vgpr_v16i8_idx12: 2544; GFX11: ; %bb.0: 2545; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2546; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:12 2547; GFX11-NEXT: s_waitcnt vmcnt(0) 2548; GFX11-NEXT: s_setpc_b64 s[30:31] 2549 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2550 %element = extractelement <16 x i8> %vector, i32 12 2551 ret i8 %element 2552} 2553 2554define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) { 2555; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: 2556; GFX9: ; %bb.0: 2557; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2558; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:13 2559; GFX9-NEXT: s_waitcnt vmcnt(0) 2560; GFX9-NEXT: s_setpc_b64 s[30:31] 2561; 2562; GFX8-LABEL: extractelement_vgpr_v16i8_idx13: 2563; GFX8: ; %bb.0: 2564; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2565; GFX8-NEXT: v_add_u32_e32 v0, vcc, 13, v0 2566; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2567; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2568; GFX8-NEXT: s_waitcnt vmcnt(0) 2569; GFX8-NEXT: s_setpc_b64 s[30:31] 2570; 2571; GFX7-LABEL: extractelement_vgpr_v16i8_idx13: 2572; GFX7: ; %bb.0: 2573; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX7-NEXT: s_mov_b32 s6, 0 2575; GFX7-NEXT: s_mov_b32 s7, 0xf000 2576; GFX7-NEXT: s_mov_b64 s[4:5], 0 2577; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:13 2578; GFX7-NEXT: s_waitcnt vmcnt(0) 2579; GFX7-NEXT: s_setpc_b64 s[30:31] 2580; 2581; GFX10-LABEL: extractelement_vgpr_v16i8_idx13: 2582; GFX10: ; %bb.0: 2583; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2584; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:13 2585; GFX10-NEXT: s_waitcnt vmcnt(0) 2586; GFX10-NEXT: s_setpc_b64 s[30:31] 2587; 2588; GFX11-LABEL: extractelement_vgpr_v16i8_idx13: 2589; GFX11: ; %bb.0: 2590; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2591; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:13 2592; GFX11-NEXT: s_waitcnt vmcnt(0) 2593; GFX11-NEXT: s_setpc_b64 s[30:31] 2594 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2595 %element = extractelement <16 x i8> %vector, i32 13 2596 ret i8 %element 2597} 2598 2599define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) { 2600; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: 2601; GFX9: ; %bb.0: 2602; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2603; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:14 2604; GFX9-NEXT: s_waitcnt vmcnt(0) 2605; GFX9-NEXT: s_setpc_b64 s[30:31] 2606; 2607; GFX8-LABEL: extractelement_vgpr_v16i8_idx14: 2608; GFX8: ; %bb.0: 2609; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2610; GFX8-NEXT: v_add_u32_e32 v0, vcc, 14, v0 2611; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2612; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2613; GFX8-NEXT: s_waitcnt vmcnt(0) 2614; GFX8-NEXT: s_setpc_b64 s[30:31] 2615; 2616; GFX7-LABEL: extractelement_vgpr_v16i8_idx14: 2617; GFX7: ; %bb.0: 2618; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2619; GFX7-NEXT: s_mov_b32 s6, 0 2620; GFX7-NEXT: s_mov_b32 s7, 0xf000 2621; GFX7-NEXT: s_mov_b64 s[4:5], 0 2622; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:14 2623; GFX7-NEXT: s_waitcnt vmcnt(0) 2624; GFX7-NEXT: s_setpc_b64 s[30:31] 2625; 2626; GFX10-LABEL: extractelement_vgpr_v16i8_idx14: 2627; GFX10: ; %bb.0: 2628; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2629; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:14 2630; GFX10-NEXT: s_waitcnt vmcnt(0) 2631; GFX10-NEXT: s_setpc_b64 s[30:31] 2632; 2633; GFX11-LABEL: extractelement_vgpr_v16i8_idx14: 2634; GFX11: ; %bb.0: 2635; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2636; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:14 2637; GFX11-NEXT: s_waitcnt vmcnt(0) 2638; GFX11-NEXT: s_setpc_b64 s[30:31] 2639 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2640 %element = extractelement <16 x i8> %vector, i32 14 2641 ret i8 %element 2642} 2643 2644define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) { 2645; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: 2646; GFX9: ; %bb.0: 2647; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2648; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:15 2649; GFX9-NEXT: s_waitcnt vmcnt(0) 2650; GFX9-NEXT: s_setpc_b64 s[30:31] 2651; 2652; GFX8-LABEL: extractelement_vgpr_v16i8_idx15: 2653; GFX8: ; %bb.0: 2654; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2655; GFX8-NEXT: v_add_u32_e32 v0, vcc, 15, v0 2656; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2657; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 2658; GFX8-NEXT: s_waitcnt vmcnt(0) 2659; GFX8-NEXT: s_setpc_b64 s[30:31] 2660; 2661; GFX7-LABEL: extractelement_vgpr_v16i8_idx15: 2662; GFX7: ; %bb.0: 2663; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2664; GFX7-NEXT: s_mov_b32 s6, 0 2665; GFX7-NEXT: s_mov_b32 s7, 0xf000 2666; GFX7-NEXT: s_mov_b64 s[4:5], 0 2667; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:15 2668; GFX7-NEXT: s_waitcnt vmcnt(0) 2669; GFX7-NEXT: s_setpc_b64 s[30:31] 2670; 2671; GFX10-LABEL: extractelement_vgpr_v16i8_idx15: 2672; GFX10: ; %bb.0: 2673; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2674; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:15 2675; GFX10-NEXT: s_waitcnt vmcnt(0) 2676; GFX10-NEXT: s_setpc_b64 s[30:31] 2677; 2678; GFX11-LABEL: extractelement_vgpr_v16i8_idx15: 2679; GFX11: ; %bb.0: 2680; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2681; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:15 2682; GFX11-NEXT: s_waitcnt vmcnt(0) 2683; GFX11-NEXT: s_setpc_b64 s[30:31] 2684 %vector = load <16 x i8>, ptr addrspace(1) %ptr 2685 %element = extractelement <16 x i8> %vector, i32 15 2686 ret i8 %element 2687} 2688;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2689; GCN: {{.*}} 2690