1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -amdgpu-promote-kernel-arguments -infer-address-spaces | FileCheck %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -passes=amdgpu-promote-kernel-arguments,infer-address-spaces | FileCheck %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -amdgpu-promote-kernel-arguments -infer-address-spaces | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=GCN %s 5 6; GCN-LABEL: ptr_nest_3: 7; GCN-COUNT-2: global_load_dwordx2 8; GCN: global_store_dword 9define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) { 10; CHECK-LABEL: @ptr_nest_3( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 13; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] 14; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]] 15; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 16; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] 17; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) 18; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 19; CHECK-NEXT: ret void 20; 21entry: 22 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 23 %p1 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i32 %i 24 %p2 = load ptr, ptr addrspace(1) %p1, align 8 25 %p3 = load ptr, ptr %p2, align 8 26 store float 0.000000e+00, ptr %p3, align 4 27 ret void 28} 29 30; GCN-LABEL: ptr_bitcast: 31; GCN: global_load_dwordx2 32; GCN: global_store_dword 33define amdgpu_kernel void @ptr_bitcast(ptr nocapture readonly %Arg) { 34; CHECK-LABEL: @ptr_bitcast( 35; CHECK-NEXT: entry: 36; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) 37; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 38; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]] 39; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0]] 40; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 41; CHECK-NEXT: store i32 0, ptr addrspace(1) [[P2_GLOBAL]], align 4 42; CHECK-NEXT: ret void 43; 44entry: 45 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 46 %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i 47 %p2 = load ptr, ptr %p1, align 8 48 store i32 0, ptr %p2, align 4 49 ret void 50} 51 52%struct.S = type { ptr } 53 54; GCN-LABEL: ptr_in_struct: 55; GCN: s_load_dwordx2 56; GCN: global_store_dword 57define amdgpu_kernel void @ptr_in_struct(ptr addrspace(1) nocapture readonly %Arg) { 58; CHECK-LABEL: @ptr_in_struct( 59; CHECK-NEXT: entry: 60; CHECK-NEXT: [[P1:%.*]] = load ptr, ptr addrspace(1) [[ARG:%.*]], align 8, !amdgpu.noclobber [[META0]] 61; CHECK-NEXT: [[P1_GLOBAL:%.*]] = addrspacecast ptr [[P1]] to ptr addrspace(1) 62; CHECK-NEXT: [[ID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 63; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[P1_GLOBAL]], i32 [[ID]] 64; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[ARRAYIDX]], align 4 65; CHECK-NEXT: ret void 66; 67entry: 68 %p1 = load ptr, ptr addrspace(1) %Arg, align 8 69 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 70 %arrayidx = getelementptr inbounds float, ptr %p1, i32 %id 71 store float 0.000000e+00, ptr %arrayidx, align 4 72 ret void 73} 74 75@LDS = internal unnamed_addr addrspace(3) global [4 x float] undef, align 16 76 77; GCN-LABEL: flat_ptr_arg: 78; GCN-COUNT-2: global_load_dwordx2 79; GCN: global_load_dwordx4 80; GCN: global_store_dword 81define amdgpu_kernel void @flat_ptr_arg(ptr nocapture readonly noalias %Arg, ptr nocapture noalias %Out, i32 %X) { 82; CHECK-LABEL: @flat_ptr_arg( 83; CHECK-NEXT: entry: 84; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT:%.*]] to ptr addrspace(1) 85; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) 86; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 87; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 88; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i64 [[IDXPROM]] 89; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber [[META0]] 90; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) 91; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber [[META0]] 92; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]] 93; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 94; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1 95; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4 96; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 97; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_1]] 98; CHECK-NEXT: store float [[I3]], ptr addrspace(3) [[ARRAYIDX512_1]], align 4 99; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 2 100; CHECK-NEXT: [[I4:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_2]], align 4 101; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 102; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_2]] 103; CHECK-NEXT: store float [[I4]], ptr addrspace(3) [[ARRAYIDX512_2]], align 4 104; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 3 105; CHECK-NEXT: [[I5:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_3]], align 4 106; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 107; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_3]] 108; CHECK-NEXT: store float [[I5]], ptr addrspace(3) [[ARRAYIDX512_3]], align 4 109; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 110; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] 111; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 112; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[IDXPROM]] 113; CHECK-NEXT: [[I7:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX11]], align 8, !amdgpu.noclobber [[META0]] 114; CHECK-NEXT: [[I7_GLOBAL:%.*]] = addrspacecast ptr [[I7]] to ptr addrspace(1) 115; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 116; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I7_GLOBAL]], i64 [[IDXPROM8]] 117; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 118; CHECK-NEXT: ret void 119; 120entry: 121 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 122 %idxprom = zext i32 %i to i64 123 %arrayidx10 = getelementptr inbounds ptr, ptr %Arg, i64 %idxprom 124 %i1 = load ptr, ptr %arrayidx10, align 8 125 %i2 = load float, ptr %i1, align 4 126 %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X 127 store float %i2, ptr addrspace(3) %arrayidx512, align 4 128 %arrayidx3.1 = getelementptr inbounds float, ptr %i1, i64 1 129 %i3 = load float, ptr %arrayidx3.1, align 4 130 %add.1 = add nsw i32 %X, 1 131 %arrayidx512.1 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.1 132 store float %i3, ptr addrspace(3) %arrayidx512.1, align 4 133 %arrayidx3.2 = getelementptr inbounds float, ptr %i1, i64 2 134 %i4 = load float, ptr %arrayidx3.2, align 4 135 %add.2 = add nsw i32 %X, 2 136 %arrayidx512.2 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.2 137 store float %i4, ptr addrspace(3) %arrayidx512.2, align 4 138 %arrayidx3.3 = getelementptr inbounds float, ptr %i1, i64 3 139 %i5 = load float, ptr %arrayidx3.3, align 4 140 %add.3 = add nsw i32 %X, 3 141 %arrayidx512.3 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.3 142 store float %i5, ptr addrspace(3) %arrayidx512.3, align 4 143 %sub = add nsw i32 %X, -1 144 %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub 145 %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 146 %arrayidx11 = getelementptr inbounds ptr, ptr %Out, i64 %idxprom 147 %i7 = load ptr, ptr %arrayidx11, align 8 148 %idxprom8 = sext i32 %X to i64 149 %arrayidx9 = getelementptr inbounds float, ptr %i7, i64 %idxprom8 150 store float %i6, ptr %arrayidx9, align 4 151 ret void 152} 153 154; GCN-LABEL: global_ptr_arg: 155; GCN: global_load_dwordx2 156; GCN: global_load_dwordx4 157; GCN: global_store_dword 158define amdgpu_kernel void @global_ptr_arg(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { 159; CHECK-LABEL: @global_ptr_arg( 160; CHECK-NEXT: entry: 161; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 162; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 163; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] 164; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber [[META0]] 165; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) 166; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber [[META0]] 167; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]] 168; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 169; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1 170; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4 171; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 172; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_1]] 173; CHECK-NEXT: store float [[I3]], ptr addrspace(3) [[ARRAYIDX512_1]], align 4 174; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 2 175; CHECK-NEXT: [[I4:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_2]], align 4 176; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 177; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_2]] 178; CHECK-NEXT: store float [[I4]], ptr addrspace(3) [[ARRAYIDX512_2]], align 4 179; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 3 180; CHECK-NEXT: [[I5:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_3]], align 4 181; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 182; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_3]] 183; CHECK-NEXT: store float [[I5]], ptr addrspace(3) [[ARRAYIDX512_3]], align 4 184; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 185; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] 186; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 187; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 188; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 [[IDXPROM8]] 189; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 190; CHECK-NEXT: ret void 191; 192entry: 193 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 194 %idxprom = zext i32 %i to i64 195 %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom 196 %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 197 %i2 = load float, ptr %i1, align 4 198 %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X 199 store float %i2, ptr addrspace(3) %arrayidx512, align 4 200 %arrayidx3.1 = getelementptr inbounds float, ptr %i1, i64 1 201 %i3 = load float, ptr %arrayidx3.1, align 4 202 %add.1 = add nsw i32 %X, 1 203 %arrayidx512.1 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.1 204 store float %i3, ptr addrspace(3) %arrayidx512.1, align 4 205 %arrayidx3.2 = getelementptr inbounds float, ptr %i1, i64 2 206 %i4 = load float, ptr %arrayidx3.2, align 4 207 %add.2 = add nsw i32 %X, 2 208 %arrayidx512.2 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.2 209 store float %i4, ptr addrspace(3) %arrayidx512.2, align 4 210 %arrayidx3.3 = getelementptr inbounds float, ptr %i1, i64 3 211 %i5 = load float, ptr %arrayidx3.3, align 4 212 %add.3 = add nsw i32 %X, 3 213 %arrayidx512.3 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.3 214 store float %i5, ptr addrspace(3) %arrayidx512.3, align 4 215 %sub = add nsw i32 %X, -1 216 %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub 217 %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 218 %idxprom8 = sext i32 %X to i64 219 %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 220 store float %i6, ptr %arrayidx9, align 4 221 ret void 222} 223 224; GCN-LABEL: global_ptr_arg_clobbered: 225; GCN: global_store_dwordx2 226; GCN: global_load_dwordx2 227; GCN: flat_load_dword 228; GCN: flat_store_dword 229define amdgpu_kernel void @global_ptr_arg_clobbered(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { 230; CHECK-LABEL: @global_ptr_arg_clobbered( 231; CHECK-NEXT: entry: 232; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 233; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 234; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] 235; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARRAYIDX10]], i32 [[X:%.*]] 236; CHECK-NEXT: store ptr null, ptr addrspace(1) [[ARRAYIDX11]], align 4 237; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8 238; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[I1]], align 4 239; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X]] 240; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 241; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 242; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] 243; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 244; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 245; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 [[IDXPROM8]] 246; CHECK-NEXT: store float [[I6]], ptr [[ARRAYIDX9]], align 4 247; CHECK-NEXT: ret void 248; 249entry: 250 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 251 %idxprom = zext i32 %i to i64 252 %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom 253 %arrayidx11 = getelementptr inbounds ptr, ptr addrspace(1) %arrayidx10, i32 %X 254 store ptr null, ptr addrspace(1) %arrayidx11, align 4 255 %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 256 %i2 = load float, ptr %i1, align 4 257 %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X 258 store float %i2, ptr addrspace(3) %arrayidx512, align 4 259 %sub = add nsw i32 %X, -1 260 %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub 261 %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 262 %idxprom8 = sext i32 %X to i64 263 %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 264 store float %i6, ptr %arrayidx9, align 4 265 ret void 266} 267 268; GCN-LABEL: global_ptr_arg_clobbered_after_load: 269; GCN: global_load_dwordx2 270; GCN: global_store_dwordx2 271; GCN: global_load_dword 272; GCN: global_store_dword 273define amdgpu_kernel void @global_ptr_arg_clobbered_after_load(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { 274; CHECK-LABEL: @global_ptr_arg_clobbered_after_load( 275; CHECK-NEXT: entry: 276; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 277; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 278; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] 279; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber [[META0]] 280; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) 281; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARRAYIDX10]], i32 [[X:%.*]] 282; CHECK-NEXT: store ptr null, ptr addrspace(1) [[ARRAYIDX11]], align 4 283; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4 284; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X]] 285; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 286; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 287; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] 288; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 289; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 290; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 [[IDXPROM8]] 291; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 292; CHECK-NEXT: ret void 293; 294entry: 295 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 296 %idxprom = zext i32 %i to i64 297 %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom 298 %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 299 %arrayidx11 = getelementptr inbounds ptr, ptr addrspace(1) %arrayidx10, i32 %X 300 store ptr null, ptr addrspace(1) %arrayidx11, align 4 301 %i2 = load float, ptr %i1, align 4 302 %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X 303 store float %i2, ptr addrspace(3) %arrayidx512, align 4 304 %sub = add nsw i32 %X, -1 305 %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub 306 %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 307 %idxprom8 = sext i32 %X to i64 308 %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 309 store float %i6, ptr %arrayidx9, align 4 310 ret void 311} 312 313; GCN-LABEL: ptr_nest_3_barrier: 314; GCN-COUNT-2: global_load_dwordx2 315; GCN: global_store_dword 316define amdgpu_kernel void @ptr_nest_3_barrier(ptr addrspace(1) nocapture readonly %Arg) { 317; CHECK-LABEL: @ptr_nest_3_barrier( 318; CHECK-NEXT: entry: 319; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() 320; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] 321; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() 322; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0]] 323; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 324; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]] 325; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) 326; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 327; CHECK-NEXT: ret void 328; 329entry: 330 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 331 %p1 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i32 %i 332 tail call void @llvm.amdgcn.s.barrier() 333 %p2 = load ptr, ptr addrspace(1) %p1, align 8 334 %p3 = load ptr, ptr %p2, align 8 335 store float 0.000000e+00, ptr %p3, align 4 336 ret void 337} 338 339; GCN-LABEL: flat_ptr_nest_2: 340; GCN: s_lshl_b64 341; GCN: s_load_dwordx2 342; GCN: global_store_dword 343define amdgpu_kernel void @flat_ptr_nest_2(ptr nocapture readonly %Arg, i32 %i) { 344; CHECK-LABEL: @flat_ptr_nest_2( 345; CHECK-NEXT: entry: 346; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) 347; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] 348; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0]] 349; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 350; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 351; CHECK-NEXT: ret void 352; 353entry: 354 %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i 355 %p2 = load ptr, ptr %p1, align 8 356 store float 0.000000e+00, ptr %p2, align 4 357 ret void 358} 359 360; GCN-LABEL: const_ptr_nest_3: 361; GCN: s_lshl_b64 362; GCN: s_load_dwordx2 363; GCN: s_load_dwordx2 364; GCN: global_store_dword 365define amdgpu_kernel void @const_ptr_nest_3(ptr addrspace(4) nocapture readonly %Arg, i32 %i) { 366; CHECK-LABEL: @const_ptr_nest_3( 367; CHECK-NEXT: entry: 368; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[ARG:%.*]], i32 [[I:%.*]] 369; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[P1]], align 8, !amdgpu.noclobber [[META0]] 370; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(4) [[P2]], align 8, !amdgpu.noclobber [[META0]] 371; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) 372; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 373; CHECK-NEXT: ret void 374; 375entry: 376 %p1 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %Arg, i32 %i 377 %p2 = load ptr addrspace(4), ptr addrspace(4) %p1, align 8 378 %p3 = load ptr, ptr addrspace(4) %p2, align 8 379 store float 0.000000e+00, ptr %p3, align 4 380 ret void 381} 382 383; GCN-LABEL: cast_from_const_const_ptr_nest_3: 384; GCN: s_lshl_b64 385; GCN: s_load_dwordx2 386; GCN: s_load_dwordx2 387; GCN: global_store_dword 388define amdgpu_kernel void @cast_from_const_const_ptr_nest_3(ptr addrspace(4) nocapture readonly %Arg, i32 %i) { 389; CHECK-LABEL: @cast_from_const_const_ptr_nest_3( 390; CHECK-NEXT: entry: 391; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[ARG:%.*]], i32 [[I:%.*]] 392; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[P1]], align 8, !amdgpu.noclobber [[META0]] 393; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(4) [[P2]], align 8, !amdgpu.noclobber [[META0]] 394; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) 395; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 396; CHECK-NEXT: ret void 397; 398entry: 399 %p1 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %Arg, i32 %i 400 %a1 = addrspacecast ptr addrspace(4) %p1 to ptr 401 %p2 = load ptr addrspace(4), ptr %a1, align 8 402 %a2 = addrspacecast ptr addrspace(4) %p2 to ptr 403 %p3 = load ptr, ptr %a2, align 8 404 store float 0.000000e+00, ptr %p3, align 4 405 ret void 406} 407 408; GCN-LABEL: flat_ptr_volatile_load: 409; GCN: s_lshl_b64 410; GCN: flat_load_dwordx2 411; GCN: global_store_dword 412define amdgpu_kernel void @flat_ptr_volatile_load(ptr nocapture readonly %Arg, i32 %i) { 413; CHECK-LABEL: @flat_ptr_volatile_load( 414; CHECK-NEXT: entry: 415; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) 416; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] 417; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr 418; CHECK-NEXT: [[P2:%.*]] = load volatile ptr, ptr [[TMP0]], align 8 419; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 420; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 421; CHECK-NEXT: ret void 422; 423entry: 424 %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i 425 %p2 = load volatile ptr, ptr %p1, align 8 426 store float 0.000000e+00, ptr %p2, align 4 427 ret void 428} 429 430; GCN-LABEL: flat_ptr_atomic_load: 431; GCN: s_lshl_b64 432; GCN: global_load_dwordx2 433; GCN: global_store_dword 434define amdgpu_kernel void @flat_ptr_atomic_load(ptr nocapture readonly %Arg, i32 %i) { 435; CHECK-LABEL: @flat_ptr_atomic_load( 436; CHECK-NEXT: entry: 437; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) 438; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] 439; CHECK-NEXT: [[P2:%.*]] = load atomic ptr, ptr addrspace(1) [[P1]] monotonic, align 8 440; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) 441; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 442; CHECK-NEXT: ret void 443; 444entry: 445 %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i 446 %p2 = load atomic ptr, ptr %p1 monotonic, align 8 447 store float 0.000000e+00, ptr %p2, align 4 448 ret void 449} 450 451; GCN-LABEL: cast_changing_pointee_type: 452; GCN: s_lshl_b64 453; GCN: s_load_dwordx2 454; GCN: s_load_dwordx2 455; GCN: global_store_dword 456define amdgpu_kernel void @cast_changing_pointee_type(ptr addrspace(1) nocapture readonly %Arg, i32 %i) { 457; CHECK-LABEL: @cast_changing_pointee_type( 458; CHECK-NEXT: entry: 459; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]] 460; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0]] 461; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2]], align 8, !amdgpu.noclobber [[META0]] 462; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) 463; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 464; CHECK-NEXT: ret void 465; 466entry: 467 %p1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %Arg, i32 %i 468 %a1 = addrspacecast ptr addrspace(1) %p1 to ptr 469 %p2 = load ptr addrspace(1), ptr %a1, align 8 470 %a2 = addrspacecast ptr addrspace(1) %p2 to ptr 471 %p3 = load ptr, ptr %a2, align 8 472 store float 0.000000e+00, ptr %p3, align 4 473 ret void 474} 475 476declare i32 @llvm.amdgcn.workitem.id.x() 477declare void @llvm.amdgcn.s.barrier() 478