xref: /llvm-project/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll (revision f104eb6e15503b770734e3a59937c9df865b2814)
1; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s
2; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s
3
4target datalayout = "A5"
5
6; OPT-LABEL: @alloca_8xi64_max1024(
7; OPT-NOT: alloca
8; OPT: <8 x i64>
9; LIMIT32: alloca
10; LIMIT32-NOT: <8 x i64>
11define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
12entry:
13  %tmp = alloca [8 x i64], addrspace(5)
14  store i64 0, ptr addrspace(5) %tmp
15  %tmp1 = getelementptr [8 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
16  %tmp2 = load i64, ptr addrspace(5) %tmp1
17  store i64 %tmp2, ptr addrspace(1) %out
18  ret void
19}
20
21; OPT-LABEL: @alloca_9xi64_max1024(
22; OPT: alloca [9 x i64]
23; OPT-NOT: <9 x i64>
24; LIMIT32: alloca
25; LIMIT32-NOT: <9 x i64>
26define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
27entry:
28  %tmp = alloca [9 x i64], addrspace(5)
29  store i64 0, ptr addrspace(5) %tmp
30  %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
31  %tmp2 = load i64, ptr addrspace(5) %tmp1
32  store i64 %tmp2, ptr addrspace(1) %out
33  ret void
34}
35
36; OPT-LABEL: @alloca_16xi64_max512(
37; OPT-NOT: alloca
38; OPT: <16 x i64>
39; LIMIT32: alloca
40; LIMIT32-NOT: <16 x i64>
41define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
42entry:
43  %tmp = alloca [16 x i64], addrspace(5)
44  store i64 0, ptr addrspace(5) %tmp
45  %tmp1 = getelementptr [16 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
46  %tmp2 = load i64, ptr addrspace(5) %tmp1
47  store i64 %tmp2, ptr addrspace(1) %out
48  ret void
49}
50
51; OPT-LABEL: @alloca_17xi64_max512(
52; OPT: alloca [17 x i64]
53; OPT-NOT: <17 x i64>
54; LIMIT32: alloca
55; LIMIT32-NOT: <17 x i64>
56define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
57entry:
58  %tmp = alloca [17 x i64], addrspace(5)
59  store i64 0, ptr addrspace(5) %tmp
60  %tmp1 = getelementptr [17 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
61  %tmp2 = load i64, ptr addrspace(5) %tmp1
62  store i64 %tmp2, ptr addrspace(1) %out
63  ret void
64}
65
66; OPT-LABEL: @alloca_9xi128_max512(
67; OPT: alloca [9 x i128]
68; OPT-NOT: <9 x i128>
69; LIMIT32: alloca
70; LIMIT32-NOT: <9 x i128>
71define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) #1 {
72entry:
73  %tmp = alloca [9 x i128], addrspace(5)
74  store i128 0, ptr addrspace(5) %tmp
75  %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
76  %tmp2 = load i128, ptr addrspace(5) %tmp1
77  store i128 %tmp2, ptr addrspace(1) %out
78  ret void
79}
80
81; OPT-LABEL: @alloca_9xi128_max256(
82; OPT-NOT: alloca
83; OPT: <9 x i128>
84; LIMIT32: alloca
85; LIMIT32-NOT: <9 x i128>
86define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
87entry:
88  %tmp = alloca [9 x i128], addrspace(5)
89  store i128 0, ptr addrspace(5) %tmp
90  %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
91  %tmp2 = load i128, ptr addrspace(5) %tmp1
92  store i128 %tmp2, ptr addrspace(1) %out
93  ret void
94}
95
96; OPT-LABEL: @alloca_16xi128_max256(
97; OPT-NOT: alloca
98; OPT: <16 x i128>
99; LIMIT32: alloca
100; LIMIT32-NOT: <16 x i128>
101define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
102entry:
103  %tmp = alloca [16 x i128], addrspace(5)
104  store i128 0, ptr addrspace(5) %tmp
105  %tmp1 = getelementptr [16 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
106  %tmp2 = load i128, ptr addrspace(5) %tmp1
107  store i128 %tmp2, ptr addrspace(1) %out
108  ret void
109}
110
111; OPT-LABEL: @alloca_9xi256_max256(
112; OPT: alloca [9 x i256]
113; OPT-NOT: <9 x i256>
114; LIMIT32: alloca
115; LIMIT32-NOT: <9 x i256>
116define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) #2 {
117entry:
118  %tmp = alloca [9 x i256], addrspace(5)
119  store i256 0, ptr addrspace(5) %tmp
120  %tmp1 = getelementptr [9 x i256], ptr addrspace(5) %tmp, i32 0, i32 %index
121  %tmp2 = load i256, ptr addrspace(5) %tmp1
122  store i256 %tmp2, ptr addrspace(1) %out
123  ret void
124}
125
126; OPT-LABEL: @alloca_9xi64_max256(
127; OPT-NOT: alloca
128; OPT: <9 x i64>
129; LIMIT32: alloca
130; LIMIT32-NOT: <9 x i64>
131define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
132entry:
133  %tmp = alloca [9 x i64], addrspace(5)
134  store i64 0, ptr addrspace(5) %tmp
135  %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
136  %tmp2 = load i64, ptr addrspace(5) %tmp1
137  store i64 %tmp2, ptr addrspace(1) %out
138  ret void
139}
140
141; OPT-LABEL: @func_alloca_9xi64_max256(
142; OPT: alloca
143; OPT-NOT: <9 x i64>
144; LIMIT32: alloca
145; LIMIT32-NOT: <9 x i64>
146define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
147entry:
148  %tmp = alloca [9 x i64], addrspace(5)
149  store i64 0, ptr addrspace(5) %tmp
150  %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
151  %tmp2 = load i64, ptr addrspace(5) %tmp1
152  store i64 %tmp2, ptr addrspace(1) %out
153  ret void
154}
155
156; OPT-LABEL: @alwaysinlined_func_alloca_9xi64_max256(
157; OPT-NOT: alloca
158; OPT: <9 x i64>
159; LIMIT32: alloca
160; LIMIT32-NOT: <9 x i64>
161define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #3 {
162entry:
163  %tmp = alloca [9 x i64], addrspace(5)
164  store i64 0, ptr addrspace(5) %tmp
165  %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
166  %tmp2 = load i64, ptr addrspace(5) %tmp1
167  store i64 %tmp2, ptr addrspace(1) %out
168  ret void
169}
170
171attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
172attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
173attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
174attributes #3 = { alwaysinline "amdgpu-flat-work-group-size"="1,256" }
175