xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll (revision ba1759c498367c09d0dd7bcccad2ef0c138ca06e)
1; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2
3; Check that the load/store vectorizer is willing to move loads/stores across
4; intervening instructions only if it's safe.
5;
6;  - Loads can be moved across instructions that don't write or throw.
7;  - Stores can only be moved across instructions which don't read, write, or
8;    throw.
9
10declare void @fn()
11declare void @fn_nounwind() #0
12declare void @fn_nounwind_writeonly() #1
13declare void @fn_nounwind_readonly() #2
14declare void @fn_writeonly() #3
15declare void @fn_readonly() #4
16declare void @fn_readnone() #5
17
18; CHECK-LABEL: @load_fn
19; CHECK: load
20; CHECK: call void @fn()
21; CHECK: load
22define void @load_fn(ptr %p) #0 {
23  %p.1 = getelementptr i32, ptr %p, i32 1
24
25  %v0 = load i32, ptr %p, align 8
26  call void @fn()
27  %v1 = load i32, ptr %p.1, align 4
28  ret void
29}
30
31; CHECK-LABEL: @load_fn_nounwind
32; CHECK: load
33; CHECK: call void @fn_nounwind()
34; CHECK: load
35define void @load_fn_nounwind(ptr %p) #0 {
36  %p.1 = getelementptr i32, ptr %p, i32 1
37
38  %v0 = load i32, ptr %p, align 8
39  call void @fn_nounwind() #0
40  %v1 = load i32, ptr %p.1, align 4
41  ret void
42}
43
44; CHECK-LABEL: @load_fn_nounwind_writeonly
45; CHECK: load
46; CHECK: call void @fn_nounwind_writeonly()
47; CHECK: load
48define void @load_fn_nounwind_writeonly(ptr %p) #0 {
49  %p.1 = getelementptr i32, ptr %p, i32 1
50
51  %v0 = load i32, ptr %p, align 8
52  call void @fn_nounwind_writeonly() #1
53  %v1 = load i32, ptr %p.1, align 4
54  ret void
55}
56
57; CHECK-LABEL: @load_fn_nounwind_readonly
58; CHECK-DAG: load <2 x i32>
59; CHECK-DAG: call void @fn_nounwind_readonly()
60define void @load_fn_nounwind_readonly(ptr %p) #0 {
61  %p.1 = getelementptr i32, ptr %p, i32 1
62
63  %v0 = load i32, ptr %p, align 8
64  call void @fn_nounwind_readonly() #2
65  %v1 = load i32, ptr %p.1, align 4
66  ret void
67}
68
69; CHECK-LABEL: @load_fn_readonly
70; CHECK: load
71; CHECK: call void @fn_readonly
72; CHECK: load
73define void @load_fn_readonly(ptr %p) #0 {
74  %p.1 = getelementptr i32, ptr %p, i32 1
75
76  %v0 = load i32, ptr %p, align 8
77  call void @fn_readonly() #4
78  %v1 = load i32, ptr %p.1, align 4
79  ret void
80}
81
82; CHECK-LABEL: @load_fn_writeonly
83; CHECK: load
84; CHECK: call void @fn_writeonly()
85; CHECK: load
86define void @load_fn_writeonly(ptr %p) #0 {
87  %p.1 = getelementptr i32, ptr %p, i32 1
88
89  %v0 = load i32, ptr %p, align 8
90  call void @fn_writeonly() #3
91  %v1 = load i32, ptr %p.1, align 4
92  ret void
93}
94
95; CHECK-LABEL: @load_fn_readnone
96; CHECK-DAG: load <2 x i32>
97; CHECK-DAG: call void @fn_readnone()
98define void @load_fn_readnone(ptr %p) #0 {
99  %p.1 = getelementptr i32, ptr %p, i32 1
100
101  %v0 = load i32, ptr %p, align 8
102  call void @fn_readnone() #5
103  %v1 = load i32, ptr %p.1, align 4
104  ret void
105}
106
107; ------------------------------------------------
108; Same tests, but now for stores instead of loads.
109; ------------------------------------------------
110
111; CHECK-LABEL: @store_fn
112; CHECK: store
113; CHECK: call void @fn()
114; CHECK: store
115define void @store_fn(ptr %p) #0 {
116  %p.1 = getelementptr i32, ptr %p, i32 1
117
118  store i32 0, ptr %p
119  call void @fn()
120  store i32 0, ptr %p.1
121  ret void
122}
123
124; CHECK-LABEL: @store_fn_nounwind
125; CHECK: store
126; CHECK: call void @fn_nounwind()
127; CHECK: store
128define void @store_fn_nounwind(ptr %p) #0 {
129  %p.1 = getelementptr i32, ptr %p, i32 1
130
131  store i32 0, ptr %p
132  call void @fn_nounwind() #0
133  store i32 0, ptr %p.1
134  ret void
135}
136
137; CHECK-LABEL: @store_fn_nounwind_writeonly
138; CHECK: store
139; CHECK: call void @fn_nounwind_writeonly()
140; CHECK: store
141define void @store_fn_nounwind_writeonly(ptr %p) #0 {
142  %p.1 = getelementptr i32, ptr %p, i32 1
143
144  store i32 0, ptr %p
145  call void @fn_nounwind_writeonly() #1
146  store i32 0, ptr %p.1
147  ret void
148}
149
150; CHECK-LABEL: @store_fn_nounwind_readonly
151; CHECK: store
152; CHECK: call void @fn_nounwind_readonly()
153; CHECK: store
154define void @store_fn_nounwind_readonly(ptr %p) #0 {
155  %p.1 = getelementptr i32, ptr %p, i32 1
156
157  store i32 0, ptr %p
158  call void @fn_nounwind_readonly() #2
159  store i32 0, ptr %p.1
160  ret void
161}
162
163; CHECK-LABEL: @store_fn_readonly
164; CHECK: store
165; CHECK: call void @fn_readonly
166; CHECK: store
167define void @store_fn_readonly(ptr %p) #0 {
168  %p.1 = getelementptr i32, ptr %p, i32 1
169
170  store i32 0, ptr %p
171  call void @fn_readonly() #4
172  store i32 0, ptr %p.1
173  ret void
174}
175
176; CHECK-LABEL: @store_fn_writeonly
177; CHECK: store
178; CHECK: call void @fn_writeonly()
179; CHECK: store
180define void @store_fn_writeonly(ptr %p) #0 {
181  %p.1 = getelementptr i32, ptr %p, i32 1
182
183  store i32 0, ptr %p
184  call void @fn_writeonly() #3
185  store i32 0, ptr %p.1
186  ret void
187}
188
189; This is the only store idiom we can vectorize.
190; CHECK-LABEL: @store_fn_readnone
191; CHECK-DAG: store <2 x i32>
192; CHECK-DAG: call void @fn_readnone()
193define void @store_fn_readnone(ptr %p) #0 {
194  %p.1 = getelementptr i32, ptr %p, i32 1
195
196  store i32 0, ptr %p, align 8
197  call void @fn_readnone() #5
198  store i32 0, ptr %p.1, align 8
199  ret void
200}
201
202
203attributes #0 = { nounwind willreturn }
204attributes #1 = { nounwind willreturn writeonly }
205attributes #2 = { nounwind readonly willreturn }
206attributes #3 = { writeonly }
207attributes #4 = { readonly }
208; readnone implies nounwind, so no need to test separately
209attributes #5 = { nounwind willreturn readnone }
210