xref: /llvm-project/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s
3
4; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
5; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
6; the pass should handle it gracefully if it is
7; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
8; should now leave these unchanged
9
10%Block = type { [1 x float], i32 }
11%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
12%struct = type { i32, i32 }
13
14@block = external addrspace(1) global %Block
15@pv = external addrspace(1) global %gl_PerVertex
16
17define amdgpu_vs void @promote_1d_aggr() #0 {
18; CHECK-LABEL: @promote_1d_aggr(
19; CHECK-NEXT:    [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
20; CHECK-NEXT:    [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1
21; CHECK-NEXT:    [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
22; CHECK-NEXT:    [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4
23; CHECK-NEXT:    [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0
24; CHECK-NEXT:    [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
25; CHECK-NEXT:    store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4
26; CHECK-NEXT:    [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]]
27; CHECK-NEXT:    [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4
28; CHECK-NEXT:    [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0
29; CHECK-NEXT:    [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
30; CHECK-NEXT:    [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
31; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3
32; CHECK-NEXT:    store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
33; CHECK-NEXT:    ret void
34;
35  %i = alloca i32, addrspace(5)
36  %f1 = alloca [1 x float], addrspace(5)
37  %foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1
38  %foo1 = load i32, ptr addrspace(1) %foo
39  store i32 %foo1, ptr addrspace(5) %i
40  %foo3 = load [1 x float], ptr addrspace(1) @block
41  store [1 x float] %foo3, ptr addrspace(5) %f1
42  %foo4 = load i32, ptr addrspace(5) %i
43  %foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
44  %foo6 = load float, ptr addrspace(5) %foo5
45  %foo7 = alloca <4 x float>, addrspace(5)
46  %foo8 = load <4 x float>, ptr addrspace(5) %foo7
47  %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
48  %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
49  %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
50  %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
51  store <4 x float> %foo12, ptr addrspace(1) @pv
52  ret void
53}
54
55%Block2 = type { i32, [2 x float] }
56@block2 = external addrspace(1) global %Block2
57
58define amdgpu_vs void @promote_store_aggr() #0 {
59; CHECK-LABEL: @promote_store_aggr(
60; CHECK-NEXT:    [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4
61; CHECK-NEXT:    [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float
62; CHECK-NEXT:    [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0
63; CHECK-NEXT:    [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1
64; CHECK-NEXT:    [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1
65; CHECK-NEXT:    store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4
66; CHECK-NEXT:    store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16
67; CHECK-NEXT:    ret void
68;
69  %i = alloca i32, addrspace(5)
70  %f1 = alloca [2 x float], addrspace(5)
71  %foo1 = load i32, ptr addrspace(1) @block2
72  store i32 %foo1, ptr addrspace(5) %i
73  %foo2 = load i32, ptr addrspace(5) %i
74  %foo3 = sitofp i32 %foo2 to float
75  store float %foo3, ptr addrspace(5) %f1
76  %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1
77  store float 2.000000e+00, ptr addrspace(5) %foo5
78  %foo6 = load [2 x float], ptr addrspace(5) %f1
79  %foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1
80  store [2 x float] %foo6, ptr addrspace(1) %foo7
81  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv
82  ret void
83}
84
85%Block3 = type { [2 x float], i32 }
86@block3 = external addrspace(1) global %Block3
87
88define amdgpu_vs void @promote_load_from_store_aggr() #0 {
89; CHECK-LABEL: @promote_load_from_store_aggr(
90; CHECK-NEXT:    [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1
91; CHECK-NEXT:    [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
92; CHECK-NEXT:    [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4
93; CHECK-NEXT:    [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0
94; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[FOO3_FCA_0_EXTRACT]], i32 0
95; CHECK-NEXT:    [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1
96; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1
97; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]]
98; CHECK-NEXT:    [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
99; CHECK-NEXT:    [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
100; CHECK-NEXT:    [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
101; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3
102; CHECK-NEXT:    store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
103; CHECK-NEXT:    ret void
104;
105  %i = alloca i32, addrspace(5)
106  %f1 = alloca [2 x float], addrspace(5)
107  %foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1
108  %foo1 = load i32, ptr addrspace(1) %foo
109  store i32 %foo1, ptr addrspace(5) %i
110  %foo3 = load [2 x float], ptr addrspace(1) @block3
111  store [2 x float] %foo3, ptr addrspace(5) %f1
112  %foo4 = load i32, ptr addrspace(5) %i
113  %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
114  %foo6 = load float, ptr addrspace(5) %foo5
115  %foo7 = alloca <4 x float>, addrspace(5)
116  %foo8 = load <4 x float>, ptr addrspace(5) %foo7
117  %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
118  %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
119  %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
120  %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
121  store <4 x float> %foo12, ptr addrspace(1) @pv
122  ret void
123}
124
125define amdgpu_vs void @promote_memmove_aggr() #0 {
126; CHECK-LABEL: @promote_memmove_aggr(
127; CHECK-NEXT:    store float 1.000000e+00, ptr addrspace(1) @pv, align 4
128; CHECK-NEXT:    ret void
129;
130  %f1 = alloca [5 x float], addrspace(5)
131  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
132  %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
133  store float 1.0, ptr addrspace(5) %foo1
134  %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
135  store float 2.0, ptr addrspace(5) %foo2
136  call void @llvm.memmove.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false)
137  %foo3 = load float, ptr addrspace(5) %f1
138  store float %foo3, ptr addrspace(1) @pv
139  ret void
140}
141
142define amdgpu_vs void @promote_memcpy_aggr() #0 {
143; CHECK-LABEL: @promote_memcpy_aggr(
144; CHECK-NEXT:    [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
145; CHECK-NEXT:    [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
146; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <5 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 2.000000e+00, float 0.000000e+00>, float 3.000000e+00, i32 [[FOO4]]
147; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
148; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
149; CHECK-NEXT:    store float [[TMP3]], ptr addrspace(1) @pv, align 4
150; CHECK-NEXT:    ret void
151;
152  %f1 = alloca [5 x float], addrspace(5)
153  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
154
155  %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
156  store float 2.0, ptr addrspace(5) %foo2
157
158  %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
159  %foo4 = load i32, ptr addrspace(1) %foo3
160  %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
161  store float 3.0, ptr addrspace(5) %foo5
162
163  call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
164  %foo6 = load float, ptr addrspace(5) %f1
165  store float %foo6, ptr addrspace(1) @pv
166  ret void
167}
168
169define amdgpu_vs void @promote_memcpy_identity_aggr() #0 {
170; CHECK-LABEL: @promote_memcpy_identity_aggr(
171; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(1) @pv, align 4
172; CHECK-NEXT:    ret void
173;
174  %f1 = alloca [5 x float], addrspace(5)
175  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
176  %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
177  store float 1.0, ptr addrspace(5) %foo1
178  %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
179  store float 2.0, ptr addrspace(5) %foo2
180  call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false)
181  %foo3 = load float, ptr addrspace(5) %f1
182  store float %foo3, ptr addrspace(1) @pv
183  ret void
184}
185
186; TODO: promote alloca even there is a memcpy between different alloca
187define amdgpu_vs void @promote_memcpy_two_aggrs() #0 {
188; CHECK-LABEL: @promote_memcpy_two_aggrs(
189; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
190; CHECK-NEXT:    [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5)
191; CHECK-NEXT:    [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
192; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4
193; CHECK-NEXT:    [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
194; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4
195; CHECK-NEXT:    [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
196; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4
197; CHECK-NEXT:    [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
198; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4
199; CHECK-NEXT:    [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
200; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4
201; CHECK-NEXT:    [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0
202; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
203; CHECK-NEXT:    [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1
204; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
205; CHECK-NEXT:    [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2
206; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
207; CHECK-NEXT:    [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3
208; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
209; CHECK-NEXT:    [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4
210; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
211; CHECK-NEXT:    [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
212; CHECK-NEXT:    [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
213; CHECK-NEXT:    [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
214; CHECK-NEXT:    store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
215; CHECK-NEXT:    call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
216; CHECK-NEXT:    [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]]
217; CHECK-NEXT:    [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4
218; CHECK-NEXT:    store float [[FOO7]], ptr addrspace(1) @pv, align 4
219; CHECK-NEXT:    ret void
220;
221  %f1 = alloca [5 x float], addrspace(5)
222  %f2 = alloca [5 x float], addrspace(5)
223
224  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
225  store [5 x float] zeroinitializer, ptr addrspace(5) %f2
226
227  %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
228  %foo4 = load i32, ptr addrspace(1) %foo3
229  %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
230  store float 3.0, ptr addrspace(5) %foo5
231
232  call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
233
234  %foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4
235  %foo7 = load float, ptr addrspace(5) %foo6
236  store float %foo7, ptr addrspace(1) @pv
237  ret void
238}
239
240; TODO: promote alloca even there is a memcpy between the alloca and other memory space.
241define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 {
242; CHECK-LABEL: @promote_memcpy_p1p5_aggr(
243; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
244; CHECK-NEXT:    [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
245; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
246; CHECK-NEXT:    [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
247; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
248; CHECK-NEXT:    [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
249; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
250; CHECK-NEXT:    [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
251; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
252; CHECK-NEXT:    [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
253; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
254; CHECK-NEXT:    [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
255; CHECK-NEXT:    [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
256; CHECK-NEXT:    [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
257; CHECK-NEXT:    store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
258; CHECK-NEXT:    call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
259; CHECK-NEXT:    ret void
260;
261  %f1 = alloca [5 x float], addrspace(5)
262  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
263
264  %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
265  %foo4 = load i32, ptr addrspace(1) %foo3
266  %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
267  store float 3.0, ptr addrspace(5) %foo5
268
269  call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
270  ret void
271}
272
273define amdgpu_vs void @promote_memcpy_inline_aggr() #0 {
274; CHECK-LABEL: @promote_memcpy_inline_aggr(
275; CHECK-NEXT:    [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
276; CHECK-NEXT:    [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
277; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <5 x float> zeroinitializer, float 3.000000e+00, i32 [[FOO4]]
278; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
279; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
280; CHECK-NEXT:    store float [[TMP3]], ptr addrspace(1) @pv, align 4
281; CHECK-NEXT:    ret void
282;
283  %f1 = alloca [5 x float], addrspace(5)
284  store [5 x float] zeroinitializer, ptr addrspace(5) %f1
285
286  %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
287  %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
288  %foo4 = load i32, ptr addrspace(1) %foo3
289  %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
290  store float 3.0, ptr addrspace(5) %foo5
291
292  call void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
293  %foo6 = load float, ptr addrspace(5) %f1
294  store float %foo6, ptr addrspace(1) @pv
295  ret void
296}
297
298declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
299declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
300declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
301declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
302
303@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
304@frag_color = external addrspace(1) global <4 x float>
305
306define amdgpu_ps void @promote_double_aggr() #0 {
307; CHECK-LABEL: @promote_double_aggr(
308; CHECK-NEXT:    [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
309; CHECK-NEXT:    [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8
310; CHECK-NEXT:    [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
311; CHECK-NEXT:    [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8
312; CHECK-NEXT:    [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
313; CHECK-NEXT:    [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
314; CHECK-NEXT:    [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0
315; CHECK-NEXT:    [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1
316; CHECK-NEXT:    [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]]
317; CHECK-NEXT:    [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]]
318; CHECK-NEXT:    [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
319; CHECK-NEXT:    [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0
320; CHECK-NEXT:    [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1
321; CHECK-NEXT:    [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2
322; CHECK-NEXT:    [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3
323; CHECK-NEXT:    store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16
324; CHECK-NEXT:    ret void
325;
326  %s = alloca [2 x double], addrspace(5)
327  %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
328  %foo1 = load double, ptr addrspace(1) %foo
329  %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
330  %foo3 = load double, ptr addrspace(1) %foo2
331  %foo4 = insertvalue [2 x double] undef, double %foo1, 0
332  %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
333  store [2 x double] %foo5, ptr addrspace(5) %s
334  %foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
335  %foo7 = load double, ptr addrspace(5) %foo6
336  %foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
337  %foo9 = load double, ptr addrspace(5) %foo8
338  %foo10 = fadd double %foo7, %foo9
339  store double %foo10, ptr addrspace(5) %s
340  %foo13 = load double, ptr addrspace(5) %s
341  %foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
342  %foo15 = load double, ptr addrspace(5) %foo14
343  %foo16 = fadd double %foo13, %foo15
344  %foo17 = fptrunc double %foo16 to float
345  %foo18 = insertelement <4 x float> undef, float %foo17, i32 0
346  %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1
347  %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2
348  %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3
349  store <4 x float> %foo21, ptr addrspace(1) @frag_color
350  ret void
351}
352
353; Don't crash on a type that isn't a valid vector element.
354define amdgpu_kernel void @alloca_struct() #0 {
355; CHECK-LABEL: @alloca_struct(
356; CHECK-NEXT:  entry:
357; CHECK-NEXT:    ret void
358;
359entry:
360  %alloca = alloca [2 x %struct], align 4, addrspace(5)
361  ret void
362}
363