1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; gfx8 required knowing no overflow happened to fold the addressing mode 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5 6define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 { 7; GFX8-LABEL: gep_noflags_alloca: 8; GFX8: ; %bb.0: 9; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 11; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 12; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 13; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 14; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 15; GFX8-NEXT: s_waitcnt vmcnt(0) 16; GFX8-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX9-LABEL: gep_noflags_alloca: 19; GFX9: ; %bb.0: 20; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 22; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 23; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 24; GFX9-NEXT: s_waitcnt vmcnt(0) 25; GFX9-NEXT: s_setpc_b64 s[30:31] 26 %alloca = alloca [32 x i32], addrspace(5) 27 %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 28 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 29 store volatile i32 %val, ptr addrspace(5) %gep1 30 ret void 31} 32 33define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 { 34; GFX8-LABEL: gep_inbounds_alloca: 35; GFX8: ; %bb.0: 36; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 38; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 39; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 40; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 41; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 42; GFX8-NEXT: s_waitcnt vmcnt(0) 43; GFX8-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX9-LABEL: gep_inbounds_alloca: 46; GFX9: ; %bb.0: 47; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 49; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 50; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 51; GFX9-NEXT: s_waitcnt vmcnt(0) 52; GFX9-NEXT: s_setpc_b64 s[30:31] 53 %alloca = alloca [32 x i32], addrspace(5) 54 %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 55 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 56 store volatile i32 %val, ptr addrspace(5) %gep1 57 ret void 58} 59 60define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 { 61; GFX8-LABEL: gep_nuw_alloca: 62; GFX8: ; %bb.0: 63; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 65; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 66; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 67; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 68; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 69; GFX8-NEXT: s_waitcnt vmcnt(0) 70; GFX8-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX9-LABEL: gep_nuw_alloca: 73; GFX9: ; %bb.0: 74; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 76; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 77; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 78; GFX9-NEXT: s_waitcnt vmcnt(0) 79; GFX9-NEXT: s_setpc_b64 s[30:31] 80 %alloca = alloca [32 x i32], addrspace(5) 81 %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 82 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 83 store volatile i32 %val, ptr addrspace(5) %gep1 84 ret void 85} 86 87define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 { 88; GFX8-LABEL: gep_nusw_alloca: 89; GFX8: ; %bb.0: 90; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 92; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 93; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 94; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 95; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 96; GFX8-NEXT: s_waitcnt vmcnt(0) 97; GFX8-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX9-LABEL: gep_nusw_alloca: 100; GFX9: ; %bb.0: 101; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 103; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 104; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 105; GFX9-NEXT: s_waitcnt vmcnt(0) 106; GFX9-NEXT: s_setpc_b64 s[30:31] 107 %alloca = alloca [32 x i32], addrspace(5) 108 %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 109 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 110 store volatile i32 %val, ptr addrspace(5) %gep1 111 ret void 112} 113 114define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 { 115; GFX8-LABEL: gep_inbounds_nuw_alloca: 116; GFX8: ; %bb.0: 117; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 119; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 120; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 121; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 122; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 123; GFX8-NEXT: s_waitcnt vmcnt(0) 124; GFX8-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX9-LABEL: gep_inbounds_nuw_alloca: 127; GFX9: ; %bb.0: 128; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 130; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 131; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 132; GFX9-NEXT: s_waitcnt vmcnt(0) 133; GFX9-NEXT: s_setpc_b64 s[30:31] 134 %alloca = alloca [32 x i32], addrspace(5) 135 %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 136 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 137 store volatile i32 %val, ptr addrspace(5) %gep1 138 ret void 139} 140 141define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 { 142; GFX8-LABEL: gep_nusw_nuw_alloca: 143; GFX8: ; %bb.0: 144; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 146; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 147; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 148; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 149; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 150; GFX8-NEXT: s_waitcnt vmcnt(0) 151; GFX8-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX9-LABEL: gep_nusw_nuw_alloca: 154; GFX9: ; %bb.0: 155; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 157; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 158; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 159; GFX9-NEXT: s_waitcnt vmcnt(0) 160; GFX9-NEXT: s_setpc_b64 s[30:31] 161 %alloca = alloca [32 x i32], addrspace(5) 162 %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx 163 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 164 store volatile i32 %val, ptr addrspace(5) %gep1 165 ret void 166} 167 168define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 { 169; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale: 170; GFX8: ; %bb.0: 171; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX8-NEXT: s_movk_i32 s4, 0x84 173; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4 174; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 175; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 176; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 177; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 178; GFX8-NEXT: s_waitcnt vmcnt(0) 179; GFX8-NEXT: s_setpc_b64 s[30:31] 180; 181; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale: 182; GFX9: ; %bb.0: 183; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX9-NEXT: s_movk_i32 s4, 0x84 185; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4 186; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 187; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 188; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 189; GFX9-NEXT: s_waitcnt vmcnt(0) 190; GFX9-NEXT: s_setpc_b64 s[30:31] 191 %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5) 192 %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4 193 store volatile i32 %val, ptr addrspace(5) %gep1, align 4 194 ret void 195} 196 197attributes #0 = { nounwind } 198