1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s 3 4; Check that invalid IR is not produced on a vector typed 5; getelementptr with a scalar alloca pointer base. 6 7define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { 8; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { 9; CHECK-NEXT: [[BB:.*:]] 10; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 11; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 12; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0:![0-9]+]] 13; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 14; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]] 15; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 16; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 17; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 18; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 19; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 20; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 21; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 22; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 23; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 24; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset.alloca, i32 0, i32 [[TMP13]] 25; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 26; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[GETELEMENTPTR]], i64 0 27; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 28; CHECK-NEXT: ret void 29; 30bb: 31 %alloca = alloca [4 x i32], align 4, addrspace(5) 32 %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 33 %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0 34 store i32 0, ptr addrspace(5) %extractelement 35 ret void 36} 37 38define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) { 39; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select( 40; CHECK-SAME: i1 [[COND:%.*]]) { 41; CHECK-NEXT: [[BB:.*:]] 42; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 43; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 44; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 45; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 46; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 47; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 48; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 49; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 50; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 51; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 52; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 53; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 54; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 55; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 56; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select.alloca, i32 0, i32 [[TMP13]] 57; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 58; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 3, i64 2, i64 1, i64 0> 59; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> [[GETELEMENTPTR0]], <4 x ptr addrspace(3)> [[GETELEMENTPTR1]] 60; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1 61; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 62; CHECK-NEXT: ret void 63; 64bb: 65 %alloca = alloca [4 x i32], align 4, addrspace(5) 66 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 67 %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0> 68 %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1 69 %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 70 store i32 0, ptr addrspace(5) %extractelement 71 ret void 72} 73 74define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0(i1 %cond) { 75; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0( 76; CHECK-SAME: i1 [[COND:%.*]]) { 77; CHECK-NEXT: [[BB:.*:]] 78; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 79; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 80; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 81; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 82; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 83; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 84; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 85; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 86; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 87; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 88; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 89; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 90; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 91; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 92; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0.alloca, i32 0, i32 [[TMP13]] 93; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 94; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]] 95; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1 96; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 97; CHECK-NEXT: ret void 98; 99bb: 100 %alloca = alloca [4 x i32], align 4, addrspace(5) 101 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 102 %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0 103 %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 104 store i32 0, ptr addrspace(5) %extractelement 105 ret void 106} 107 108define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1(i1 %cond) { 109; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1( 110; CHECK-SAME: i1 [[COND:%.*]]) { 111; CHECK-NEXT: [[BB:.*:]] 112; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 113; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 114; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 115; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 116; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 117; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 118; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 119; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 120; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 121; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 122; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 123; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 124; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 125; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 126; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1.alloca, i32 0, i32 [[TMP13]] 127; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 128; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> [[GETELEMENTPTR0]], <4 x ptr addrspace(3)> zeroinitializer 129; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1 130; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 131; CHECK-NEXT: ret void 132; 133bb: 134 %alloca = alloca [4 x i32], align 4, addrspace(5) 135 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 136 %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> zeroinitializer 137 %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 138 store i32 0, ptr addrspace(5) %extractelement 139 ret void 140} 141 142define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0(i1 %cond, ptr addrspace(1) %out) { 143; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0( 144; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) { 145; CHECK-NEXT: [[BB0:.*:]] 146; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 147; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 148; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 149; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 150; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 151; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 152; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 153; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 154; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 155; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 156; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 157; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 158; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 159; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 160; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0.alloca, i32 0, i32 [[TMP13]] 161; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 162; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]] 163; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x ptr addrspace(3)> [[SELECT]], zeroinitializer 164; CHECK-NEXT: store <4 x i1> [[ICMP]], ptr addrspace(1) [[OUT]], align 1 165; CHECK-NEXT: ret void 166; 167bb0: 168 %alloca = alloca [4 x i32], align 4, addrspace(5) 169 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 170 %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0 171 %icmp = icmp eq <4 x ptr addrspace(5)> %select, zeroinitializer 172 store <4 x i1> %icmp, ptr addrspace(1) %out 173 ret void 174} 175 176define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1(i1 %cond, ptr addrspace(1) %out) { 177; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1( 178; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) { 179; CHECK-NEXT: [[BB0:.*:]] 180; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 181; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 182; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 183; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 184; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 185; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 186; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 187; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 188; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 189; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 190; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 191; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 192; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 193; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 194; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1.alloca, i32 0, i32 [[TMP13]] 195; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 196; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]] 197; CHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x ptr addrspace(3)> zeroinitializer, [[SELECT]] 198; CHECK-NEXT: store <4 x i1> [[ICMP]], ptr addrspace(1) [[OUT]], align 1 199; CHECK-NEXT: ret void 200; 201bb0: 202 %alloca = alloca [4 x i32], align 4, addrspace(5) 203 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 204 %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0 205 %icmp = icmp eq <4 x ptr addrspace(5)> zeroinitializer, %select 206 store <4 x i1> %icmp, ptr addrspace(1) %out 207 ret void 208} 209 210define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr(i1 %cond, ptr addrspace(1) %out) { 211; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr( 212; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) { 213; CHECK-NEXT: [[BB0:.*]]: 214; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 215; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 216; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] 217; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 218; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] 219; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 220; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() 221; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() 222; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() 223; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] 224; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] 225; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] 226; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] 227; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] 228; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr.alloca, i32 0, i32 [[TMP13]] 229; CHECK-NEXT: br i1 [[COND]], label %[[BB1:.*]], label %[[BB2:.*]] 230; CHECK: [[BB1]]: 231; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> 232; CHECK-NEXT: br label %[[BB2]] 233; CHECK: [[BB2]]: 234; CHECK-NEXT: [[PHI:%.*]] = phi <4 x ptr addrspace(3)> [ [[GETELEMENTPTR0]], %[[BB1]] ], [ zeroinitializer, %[[BB0]] ] 235; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[PHI]], i64 2 236; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 237; CHECK-NEXT: ret void 238; 239bb0: 240 %alloca = alloca [4 x i32], align 4, addrspace(5) 241 br i1 %cond, label %bb1, label %bb2 242 243bb1: 244 %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 245 br label %bb2 246 247bb2: 248 %phi = phi <4 x ptr addrspace(5)> [ %getelementptr0, %bb1 ], [ zeroinitializer, %bb0] 249 %extractelement = extractelement <4 x ptr addrspace(5)> %phi, i64 2 250 store i32 0, ptr addrspace(5) %extractelement 251 ret void 252} 253;. 254; CHECK: [[META0]] = !{} 255; CHECK: [[RNG1]] = !{i32 0, i32 1025} 256;. 257