1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s 3; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s 8; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s 9 10define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { 11; CIT-LABEL: is_local_vgpr: 12; CIT: ; %bb.0: 13; CIT-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 14; CIT-NEXT: s_load_dword s4, s[6:7], 0x33 15; CIT-NEXT: s_mov_b32 s2, 0 16; CIT-NEXT: s_mov_b32 s3, 0x100f000 17; CIT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 18; CIT-NEXT: v_mov_b32_e32 v1, 0 19; CIT-NEXT: s_waitcnt lgkmcnt(0) 20; CIT-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc 21; CIT-NEXT: s_waitcnt vmcnt(0) 22; CIT-NEXT: s_mov_b32 s2, -1 23; CIT-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 24; CIT-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 25; CIT-NEXT: buffer_store_dword v0, off, s[0:3], 0 26; CIT-NEXT: s_endpgm 27; 28; CIH-LABEL: is_local_vgpr: 29; CIH: ; %bb.0: 30; CIH-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 31; CIH-NEXT: s_load_dword s2, s[6:7], 0x33 32; CIH-NEXT: v_lshlrev_b32_e32 v0, 3, v0 33; CIH-NEXT: s_waitcnt lgkmcnt(0) 34; CIH-NEXT: v_mov_b32_e32 v1, s1 35; CIH-NEXT: v_add_i32_e32 v0, vcc, s0, v0 36; CIH-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 37; CIH-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc 38; CIH-NEXT: s_waitcnt vmcnt(0) 39; CIH-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 40; CIH-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 41; CIH-NEXT: flat_store_dword v[0:1], v0 42; CIH-NEXT: s_endpgm 43; 44; SI-LABEL: is_local_vgpr: 45; SI: ; %bb.0: 46; SI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 47; SI-NEXT: s_load_dword s4, s[8:9], 0x33 48; SI-NEXT: s_mov_b32 s2, 0 49; SI-NEXT: s_mov_b32 s3, 0x100f000 50; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 51; SI-NEXT: v_mov_b32_e32 v1, 0 52; SI-NEXT: s_waitcnt lgkmcnt(0) 53; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc 54; SI-NEXT: s_waitcnt vmcnt(0) 55; SI-NEXT: s_mov_b32 s2, -1 56; SI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 57; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 58; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 59; SI-NEXT: s_endpgm 60; 61; CI-SDAG-LABEL: is_local_vgpr: 62; CI-SDAG: ; %bb.0: 63; CI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 64; CI-SDAG-NEXT: s_load_dword s2, s[8:9], 0x33 65; CI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 66; CI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 67; CI-SDAG-NEXT: v_mov_b32_e32 v1, s1 68; CI-SDAG-NEXT: v_add_i32_e32 v0, vcc, s0, v0 69; CI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 70; CI-SDAG-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc 71; CI-SDAG-NEXT: s_waitcnt vmcnt(0) 72; CI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 73; CI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 74; CI-SDAG-NEXT: flat_store_dword v[0:1], v0 75; CI-SDAG-NEXT: s_endpgm 76; 77; GFX9-LABEL: is_local_vgpr: 78; GFX9: ; %bb.0: 79; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 80; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 81; GFX9-NEXT: s_waitcnt lgkmcnt(0) 82; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc 83; GFX9-NEXT: s_waitcnt vmcnt(0) 84; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base 85; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1 86; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 87; GFX9-NEXT: global_store_dword v[0:1], v0, off 88; GFX9-NEXT: s_endpgm 89; 90; CI-GISEL-LABEL: is_local_vgpr: 91; CI-GISEL: ; %bb.0: 92; CI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 93; CI-GISEL-NEXT: s_load_dword s2, s[8:9], 0x33 94; CI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 95; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 96; CI-GISEL-NEXT: v_mov_b32_e32 v0, s0 97; CI-GISEL-NEXT: v_mov_b32_e32 v1, s1 98; CI-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 99; CI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 100; CI-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc 101; CI-GISEL-NEXT: s_waitcnt vmcnt(0) 102; CI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 103; CI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 104; CI-GISEL-NEXT: flat_store_dword v[0:1], v0 105; CI-GISEL-NEXT: s_endpgm 106; 107; GFX10-LABEL: is_local_vgpr: 108; GFX10: ; %bb.0: 109; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 110; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 111; GFX10-NEXT: s_waitcnt lgkmcnt(0) 112; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc 113; GFX10-NEXT: s_waitcnt vmcnt(0) 114; GFX10-NEXT: s_waitcnt_depctr 0xffe3 115; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base 116; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1 117; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 118; GFX10-NEXT: global_store_dword v[0:1], v0, off 119; GFX10-NEXT: s_endpgm 120; 121; GFX11-LABEL: is_local_vgpr: 122; GFX11: ; %bb.0: 123; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 124; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 125; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 126; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 127; GFX11-NEXT: s_waitcnt lgkmcnt(0) 128; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc 129; GFX11-NEXT: s_waitcnt vmcnt(0) 130; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base 131; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1 132; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 133; GFX11-NEXT: global_store_b32 v[0:1], v0, off 134; GFX11-NEXT: s_endpgm 135 %id = call i32 @llvm.amdgcn.workitem.id.x() 136 %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id 137 %ptr = load volatile ptr, ptr addrspace(1) %gep 138 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) 139 %ext = zext i1 %val to i32 140 store i32 %ext, ptr addrspace(1) undef 141 ret void 142} 143 144; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in 145; select and vcc branch. 146define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { 147; CIT-LABEL: is_local_sgpr: 148; CIT: ; %bb.0: 149; CIT-NEXT: s_load_dword s0, s[6:7], 0x1 150; CIT-NEXT: s_load_dword s1, s[6:7], 0x33 151; CIT-NEXT: s_waitcnt lgkmcnt(0) 152; CIT-NEXT: s_cmp_eq_u32 s0, s1 153; CIT-NEXT: s_cselect_b64 s[0:1], -1, 0 154; CIT-NEXT: s_andn2_b64 vcc, exec, s[0:1] 155; CIT-NEXT: s_cbranch_vccnz .LBB1_2 156; CIT-NEXT: ; %bb.1: ; %bb0 157; CIT-NEXT: s_mov_b32 s3, 0x100f000 158; CIT-NEXT: s_mov_b32 s2, -1 159; CIT-NEXT: v_mov_b32_e32 v0, 0 160; CIT-NEXT: buffer_store_dword v0, off, s[0:3], 0 161; CIT-NEXT: s_waitcnt vmcnt(0) 162; CIT-NEXT: .LBB1_2: ; %bb1 163; CIT-NEXT: s_endpgm 164; 165; CIH-LABEL: is_local_sgpr: 166; CIH: ; %bb.0: 167; CIH-NEXT: s_load_dword s0, s[6:7], 0x1 168; CIH-NEXT: s_load_dword s1, s[6:7], 0x33 169; CIH-NEXT: s_waitcnt lgkmcnt(0) 170; CIH-NEXT: s_cmp_eq_u32 s0, s1 171; CIH-NEXT: s_cselect_b64 s[0:1], -1, 0 172; CIH-NEXT: s_andn2_b64 vcc, exec, s[0:1] 173; CIH-NEXT: s_cbranch_vccnz .LBB1_2 174; CIH-NEXT: ; %bb.1: ; %bb0 175; CIH-NEXT: v_mov_b32_e32 v0, 0 176; CIH-NEXT: flat_store_dword v[0:1], v0 177; CIH-NEXT: s_waitcnt vmcnt(0) 178; CIH-NEXT: .LBB1_2: ; %bb1 179; CIH-NEXT: s_endpgm 180; 181; SI-LABEL: is_local_sgpr: 182; SI: ; %bb.0: 183; SI-NEXT: s_load_dword s0, s[8:9], 0x1 184; SI-NEXT: s_load_dword s1, s[8:9], 0x33 185; SI-NEXT: s_waitcnt lgkmcnt(0) 186; SI-NEXT: s_cmp_eq_u32 s0, s1 187; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 188; SI-NEXT: s_andn2_b64 vcc, exec, s[0:1] 189; SI-NEXT: s_cbranch_vccnz .LBB1_2 190; SI-NEXT: ; %bb.1: ; %bb0 191; SI-NEXT: s_mov_b32 s3, 0x100f000 192; SI-NEXT: s_mov_b32 s2, -1 193; SI-NEXT: v_mov_b32_e32 v0, 0 194; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 195; SI-NEXT: s_waitcnt vmcnt(0) 196; SI-NEXT: .LBB1_2: ; %bb1 197; SI-NEXT: s_endpgm 198; 199; CI-SDAG-LABEL: is_local_sgpr: 200; CI-SDAG: ; %bb.0: 201; CI-SDAG-NEXT: s_load_dword s0, s[8:9], 0x1 202; CI-SDAG-NEXT: s_load_dword s1, s[8:9], 0x33 203; CI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 204; CI-SDAG-NEXT: s_cmp_eq_u32 s0, s1 205; CI-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0 206; CI-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1] 207; CI-SDAG-NEXT: s_cbranch_vccnz .LBB1_2 208; CI-SDAG-NEXT: ; %bb.1: ; %bb0 209; CI-SDAG-NEXT: v_mov_b32_e32 v0, 0 210; CI-SDAG-NEXT: flat_store_dword v[0:1], v0 211; CI-SDAG-NEXT: s_waitcnt vmcnt(0) 212; CI-SDAG-NEXT: .LBB1_2: ; %bb1 213; CI-SDAG-NEXT: s_endpgm 214; 215; GFX9-SDAG-LABEL: is_local_sgpr: 216; GFX9-SDAG: ; %bb.0: 217; GFX9-SDAG-NEXT: s_load_dword s2, s[8:9], 0x4 218; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base 219; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 220; GFX9-SDAG-NEXT: s_cmp_eq_u32 s2, s1 221; GFX9-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0 222; GFX9-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1] 223; GFX9-SDAG-NEXT: s_cbranch_vccnz .LBB1_2 224; GFX9-SDAG-NEXT: ; %bb.1: ; %bb0 225; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 226; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off 227; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 228; GFX9-SDAG-NEXT: .LBB1_2: ; %bb1 229; GFX9-SDAG-NEXT: s_endpgm 230; 231; CI-GISEL-LABEL: is_local_sgpr: 232; CI-GISEL: ; %bb.0: 233; CI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 234; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 235; CI-GISEL-NEXT: s_load_dword s0, s[8:9], 0x33 236; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 237; CI-GISEL-NEXT: s_cmp_lg_u32 s1, s0 238; CI-GISEL-NEXT: s_cbranch_scc1 .LBB1_2 239; CI-GISEL-NEXT: ; %bb.1: ; %bb0 240; CI-GISEL-NEXT: v_mov_b32_e32 v0, 0 241; CI-GISEL-NEXT: flat_store_dword v[0:1], v0 242; CI-GISEL-NEXT: s_waitcnt vmcnt(0) 243; CI-GISEL-NEXT: .LBB1_2: ; %bb1 244; CI-GISEL-NEXT: s_endpgm 245; 246; GFX9-GISEL-LABEL: is_local_sgpr: 247; GFX9-GISEL: ; %bb.0: 248; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 249; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base 250; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 251; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, s3 252; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_2 253; GFX9-GISEL-NEXT: ; %bb.1: ; %bb0 254; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 255; GFX9-GISEL-NEXT: global_store_dword v[0:1], v0, off 256; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 257; GFX9-GISEL-NEXT: .LBB1_2: ; %bb1 258; GFX9-GISEL-NEXT: s_endpgm 259; 260; GFX10-LABEL: is_local_sgpr: 261; GFX10: ; %bb.0: 262; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 263; GFX10-NEXT: s_mov_b64 s[2:3], src_shared_base 264; GFX10-NEXT: s_waitcnt lgkmcnt(0) 265; GFX10-NEXT: s_cmp_lg_u32 s1, s3 266; GFX10-NEXT: s_cbranch_scc1 .LBB1_2 267; GFX10-NEXT: ; %bb.1: ; %bb0 268; GFX10-NEXT: v_mov_b32_e32 v0, 0 269; GFX10-NEXT: global_store_dword v[0:1], v0, off 270; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 271; GFX10-NEXT: .LBB1_2: ; %bb1 272; GFX10-NEXT: s_endpgm 273; 274; GFX11-LABEL: is_local_sgpr: 275; GFX11: ; %bb.0: 276; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 277; GFX11-NEXT: s_mov_b64 s[2:3], src_shared_base 278; GFX11-NEXT: s_waitcnt lgkmcnt(0) 279; GFX11-NEXT: s_cmp_lg_u32 s1, s3 280; GFX11-NEXT: s_cbranch_scc1 .LBB1_2 281; GFX11-NEXT: ; %bb.1: ; %bb0 282; GFX11-NEXT: v_mov_b32_e32 v0, 0 283; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 284; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 285; GFX11-NEXT: .LBB1_2: ; %bb1 286; GFX11-NEXT: s_endpgm 287 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) 288 br i1 %val, label %bb0, label %bb1 289 290bb0: 291 store volatile i32 0, ptr addrspace(1) undef 292 br label %bb1 293 294bb1: 295 ret void 296} 297 298!llvm.module.flags = !{!0} 299!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 300;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 301; CI: {{.*}} 302; GFX10-GISEL: {{.*}} 303; GFX11-GISEL: {{.*}} 304; SI-SDAG: {{.*}} 305