xref: /llvm-project/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll (revision 29a5c054e6d56a912ed5ba3f84e8ca631872db8b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3
4; Check that invalid IR is not produced on a vector typed
5; getelementptr with a scalar alloca pointer base.
6
7define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
8; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
9; CHECK-NEXT:  [[BB:.*:]]
10; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
11; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
12; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0:![0-9]+]]
13; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
14; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]]
15; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
16; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
17; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
18; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
19; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
20; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
21; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
22; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
23; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
24; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset.alloca, i32 0, i32 [[TMP13]]
25; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
26; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[GETELEMENTPTR]], i64 0
27; CHECK-NEXT:    store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4
28; CHECK-NEXT:    ret void
29;
30bb:
31  %alloca = alloca [4 x i32], align 4, addrspace(5)
32  %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
33  %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0
34  store i32 0, ptr addrspace(5) %extractelement
35  ret void
36}
37
38define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) {
39; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(
40; CHECK-SAME: i1 [[COND:%.*]]) {
41; CHECK-NEXT:  [[BB:.*:]]
42; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
43; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
44; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
45; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
46; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
47; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
48; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
49; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
50; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
51; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
52; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
53; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
54; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
55; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
56; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select.alloca, i32 0, i32 [[TMP13]]
57; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
58; CHECK-NEXT:    [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 3, i64 2, i64 1, i64 0>
59; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> [[GETELEMENTPTR0]], <4 x ptr addrspace(3)> [[GETELEMENTPTR1]]
60; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1
61; CHECK-NEXT:    store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4
62; CHECK-NEXT:    ret void
63;
64bb:
65  %alloca = alloca [4 x i32], align 4, addrspace(5)
66  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
67  %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0>
68  %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1
69  %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
70  store i32 0, ptr addrspace(5) %extractelement
71  ret void
72}
73
74define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0(i1 %cond) {
75; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0(
76; CHECK-SAME: i1 [[COND:%.*]]) {
77; CHECK-NEXT:  [[BB:.*:]]
78; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
79; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
80; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
81; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
82; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
83; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
84; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
85; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
86; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
87; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
88; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
89; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
90; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
91; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
92; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr0.alloca, i32 0, i32 [[TMP13]]
93; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
94; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]]
95; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1
96; CHECK-NEXT:    store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4
97; CHECK-NEXT:    ret void
98;
99bb:
100  %alloca = alloca [4 x i32], align 4, addrspace(5)
101  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
102  %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0
103  %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
104  store i32 0, ptr addrspace(5) %extractelement
105  ret void
106}
107
108define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1(i1 %cond) {
109; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1(
110; CHECK-SAME: i1 [[COND:%.*]]) {
111; CHECK-NEXT:  [[BB:.*:]]
112; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
113; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
114; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
115; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
116; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
117; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
118; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
119; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
120; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
121; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
122; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
123; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
124; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
125; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
126; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select_nullptr1.alloca, i32 0, i32 [[TMP13]]
127; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
128; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> [[GETELEMENTPTR0]], <4 x ptr addrspace(3)> zeroinitializer
129; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1
130; CHECK-NEXT:    store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4
131; CHECK-NEXT:    ret void
132;
133bb:
134  %alloca = alloca [4 x i32], align 4, addrspace(5)
135  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
136  %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> zeroinitializer
137  %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
138  store i32 0, ptr addrspace(5) %extractelement
139  ret void
140}
141
142define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0(i1 %cond, ptr addrspace(1) %out) {
143; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0(
144; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) {
145; CHECK-NEXT:  [[BB0:.*:]]
146; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
147; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
148; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
149; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
150; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
151; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
152; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
153; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
154; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
155; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
156; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
157; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
158; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
159; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
160; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr0.alloca, i32 0, i32 [[TMP13]]
161; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
162; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]]
163; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq <4 x ptr addrspace(3)> [[SELECT]], zeroinitializer
164; CHECK-NEXT:    store <4 x i1> [[ICMP]], ptr addrspace(1) [[OUT]], align 1
165; CHECK-NEXT:    ret void
166;
167bb0:
168  %alloca = alloca [4 x i32], align 4, addrspace(5)
169  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
170  %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0
171  %icmp = icmp eq <4 x ptr addrspace(5)> %select, zeroinitializer
172  store <4 x i1> %icmp, ptr addrspace(1) %out
173  ret void
174}
175
176define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1(i1 %cond, ptr addrspace(1) %out) {
177; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1(
178; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) {
179; CHECK-NEXT:  [[BB0:.*:]]
180; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
181; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
182; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
183; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
184; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
185; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
186; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
187; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
188; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
189; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
190; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
191; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
192; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
193; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
194; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_icmp_nullptr1.alloca, i32 0, i32 [[TMP13]]
195; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
196; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> zeroinitializer, <4 x ptr addrspace(3)> [[GETELEMENTPTR0]]
197; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq <4 x ptr addrspace(3)> zeroinitializer, [[SELECT]]
198; CHECK-NEXT:    store <4 x i1> [[ICMP]], ptr addrspace(1) [[OUT]], align 1
199; CHECK-NEXT:    ret void
200;
201bb0:
202  %alloca = alloca [4 x i32], align 4, addrspace(5)
203  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
204  %select = select i1 %cond, <4 x ptr addrspace(5)> zeroinitializer, <4 x ptr addrspace(5)> %getelementptr0
205  %icmp = icmp eq <4 x ptr addrspace(5)> zeroinitializer, %select
206  store <4 x i1> %icmp, ptr addrspace(1) %out
207  ret void
208}
209
210define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr(i1 %cond, ptr addrspace(1) %out) {
211; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr(
212; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(1) [[OUT:%.*]]) {
213; CHECK-NEXT:  [[BB0:.*]]:
214; CHECK-NEXT:    [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
215; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
216; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
217; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
218; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
219; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
220; CHECK-NEXT:    [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
221; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
222; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
223; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
224; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
225; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
226; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
227; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
228; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_phi_nullptr.alloca, i32 0, i32 [[TMP13]]
229; CHECK-NEXT:    br i1 [[COND]], label %[[BB1:.*]], label %[[BB2:.*]]
230; CHECK:       [[BB1]]:
231; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
232; CHECK-NEXT:    br label %[[BB2]]
233; CHECK:       [[BB2]]:
234; CHECK-NEXT:    [[PHI:%.*]] = phi <4 x ptr addrspace(3)> [ [[GETELEMENTPTR0]], %[[BB1]] ], [ zeroinitializer, %[[BB0]] ]
235; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[PHI]], i64 2
236; CHECK-NEXT:    store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4
237; CHECK-NEXT:    ret void
238;
239bb0:
240  %alloca = alloca [4 x i32], align 4, addrspace(5)
241  br i1 %cond, label %bb1, label %bb2
242
243bb1:
244  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
245  br label %bb2
246
247bb2:
248  %phi = phi <4 x ptr addrspace(5)> [ %getelementptr0, %bb1 ], [ zeroinitializer, %bb0]
249  %extractelement = extractelement <4 x ptr addrspace(5)> %phi, i64 2
250  store i32 0, ptr addrspace(5) %extractelement
251  ret void
252}
253;.
254; CHECK: [[META0]] = !{}
255; CHECK: [[RNG1]] = !{i32 0, i32 1025}
256;.
257