1; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s 2 3@lds0 = addrspace(3) global [512 x float] undef 4@lds1 = addrspace(3) global [256 x float] undef 5@lds2 = addrspace(3) global [4096 x float] undef 6@lds3 = addrspace(3) global [67 x i8] undef 7 8@dynamic_shared0 = external addrspace(3) global [0 x float] 9@dynamic_shared1 = external addrspace(3) global [0 x double] 10@dynamic_shared2 = external addrspace(3) global [0 x double], align 4 11@dynamic_shared3 = external addrspace(3) global [0 x double], align 16 12 13; CHECK-LABEL: {{^}}dynamic_shared_array_0: 14; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}} 15define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) { 16 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 17 %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %tid.x 18 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 19 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 20 store float %val0, ptr addrspace(3) %arrayidx1, align 4 21 ret void 22} 23 24; CHECK-LABEL: {{^}}dynamic_shared_array_1: 25; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 26; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] 27define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) { 28entry: 29 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 30 %idx.0 = add nsw i32 %tid.x, 64 31 %tmp = icmp eq i32 %cond, 0 32 br i1 %tmp, label %if, label %else 33 34if: ; preds = %entry 35 %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 36 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 37 br label %endif 38 39else: ; preds = %entry 40 %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 41 %val1 = load float, ptr addrspace(3) %arrayidx1, align 4 42 br label %endif 43 44endif: ; preds = %else, %if 45 %val = phi float [ %val0, %if ], [ %val1, %else ] 46 %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 47 store float %val, ptr addrspace(3) %arrayidx, align 4 48 ret void 49} 50 51; CHECK-LABEL: {{^}}dynamic_shared_array_2: 52; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 53; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x4000, [[IDX]] 54define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { 55 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 56 %vidx = add i32 %tid.x, %idx 57 %arrayidx0 = getelementptr inbounds [4096 x float], ptr addrspace(3) @lds2, i32 0, i32 %vidx 58 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 59 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 60 store float %val0, ptr addrspace(3) %arrayidx1, align 4 61 ret void 62} 63 64; The offset to the dynamic shared memory array should be aligned on the type 65; specified. 66; CHECK-LABEL: {{^}}dynamic_shared_array_3: 67; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 68; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]] 69define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { 70 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 71 %vidx = add i32 %tid.x, %idx 72 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx 73 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 74 %val1 = uitofp i8 %val0 to float 75 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 76 store float %val1, ptr addrspace(3) %arrayidx1, align 4 77 ret void 78} 79 80; The offset to the dynamic shared memory array should be aligned on the 81; maximal one. 82; CHECK-LABEL: {{^}}dynamic_shared_array_4: 83; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 84; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x48, [[IDX]] 85define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { 86 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 87 %vidx = add i32 %tid.x, %idx 88 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx 89 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 90 %val1 = uitofp i8 %val0 to float 91 %val2 = uitofp i8 %val0 to double 92 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 93 store float %val1, ptr addrspace(3) %arrayidx1, align 4 94 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared1, i32 0, i32 %tid.x 95 store double %val2, ptr addrspace(3) %arrayidx2, align 4 96 ret void 97} 98 99; Honor the explicit alignment from the specified variable. 100; CHECK-LABEL: {{^}}dynamic_shared_array_5: 101; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 102; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]] 103define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { 104 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 105 %vidx = add i32 %tid.x, %idx 106 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx 107 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 108 %val1 = uitofp i8 %val0 to float 109 %val2 = uitofp i8 %val0 to double 110 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 111 store float %val1, ptr addrspace(3) %arrayidx1, align 4 112 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared2, i32 0, i32 %tid.x 113 store double %val2, ptr addrspace(3) %arrayidx2, align 4 114 ret void 115} 116 117; Honor the explicit alignment from the specified variable. 118; CHECK-LABEL: {{^}}dynamic_shared_array_6: 119; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} 120; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x50, [[IDX]] 121define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { 122 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 123 %vidx = add i32 %tid.x, %idx 124 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx 125 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 126 %val1 = uitofp i8 %val0 to float 127 %val2 = uitofp i8 %val0 to double 128 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x 129 store float %val1, ptr addrspace(3) %arrayidx1, align 4 130 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared3, i32 0, i32 %tid.x 131 store double %val2, ptr addrspace(3) %arrayidx2, align 4 132 ret void 133} 134 135declare i32 @llvm.amdgcn.workitem.id.x() 136