xref: /llvm-project/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir (revision 30753afc2a3171e962e261622781852a01fbec72)
1// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(sroa))" --split-input-file | FileCheck %s
2
3// CHECK-LABEL: llvm.func @memset
4llvm.func @memset() -> i32 {
5  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
6  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
7  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
8  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
9  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
10  %0 = llvm.mlir.constant(1 : i32) : i32
11  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
12  %memset_value = llvm.mlir.constant(42 : i8) : i8
13  // 16 bytes means it will span over the first 4 i32 entries
14  %memset_len = llvm.mlir.constant(16 : i32) : i32
15  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
16  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
17  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
18  %3 = llvm.load %2 : !llvm.ptr -> i32
19  llvm.return %3 : i32
20}
21
22// -----
23
24// CHECK-LABEL: llvm.func @memset_inline
25llvm.func @memset_inline() -> i32 {
26  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
27  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
28  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
29  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
30  %0 = llvm.mlir.constant(1 : i32) : i32
31  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
32  %memset_value = llvm.mlir.constant(42 : i8) : i8
33  // 16 bytes means it will span over the first 4 i32 entries.
34  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
35  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> ()
36  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
37  %3 = llvm.load %2 : !llvm.ptr -> i32
38  llvm.return %3 : i32
39}
40
41// -----
42
43// CHECK-LABEL: llvm.func @memset_partial
44llvm.func @memset_partial() -> i32 {
45  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
46  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
47  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
48  // After SROA, only the second i32 will be actually used. As the memset writes up
49  // to half of it, only 2 bytes will be set.
50  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(2 : i32) : i32
51  %0 = llvm.mlir.constant(1 : i32) : i32
52  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
53  %memset_value = llvm.mlir.constant(42 : i8) : i8
54  // 6 bytes means it will span over the first i32 and half of the second i32.
55  %memset_len = llvm.mlir.constant(6 : i32) : i32
56  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
57  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
58  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
59  %3 = llvm.load %2 : !llvm.ptr -> i32
60  llvm.return %3 : i32
61}
62
63// -----
64
65// CHECK-LABEL: llvm.func @memset_inline_partial
66llvm.func @memset_inline_partial() -> i32 {
67  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
68  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
69  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
70  // After SROA, only the second i32 will be actually used. As the memset writes up
71  // to half of it, only 2 bytes will be set.
72  %0 = llvm.mlir.constant(1 : i32) : i32
73  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
74  %memset_value = llvm.mlir.constant(42 : i8) : i8
75  // 6 bytes means it will span over the first i32 and half of the second i32.
76  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
77  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
78  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
79  %3 = llvm.load %2 : !llvm.ptr -> i32
80  llvm.return %3 : i32
81}
82
83// -----
84
85// CHECK-LABEL: llvm.func @memset_full
86llvm.func @memset_full() -> i32 {
87  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
88  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
89  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
90  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
91  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
92  %0 = llvm.mlir.constant(1 : i32) : i32
93  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
94  %memset_value = llvm.mlir.constant(42 : i8) : i8
95  // 40 bytes means it will span over the entire array
96  %memset_len = llvm.mlir.constant(40 : i32) : i32
97  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
98  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
99  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
100  %3 = llvm.load %2 : !llvm.ptr -> i32
101  llvm.return %3 : i32
102}
103
104// -----
105
106// CHECK-LABEL: llvm.func @memset_inline_full
107llvm.func @memset_inline_full() -> i32 {
108  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
109  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
110  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
111  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
112  %0 = llvm.mlir.constant(1 : i32) : i32
113  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
114  %memset_value = llvm.mlir.constant(42 : i8) : i8
115  // 40 bytes means it will span over the entire array.
116  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
117  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> ()
118  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
119  %3 = llvm.load %2 : !llvm.ptr -> i32
120  llvm.return %3 : i32
121}
122
123// -----
124
125// CHECK-LABEL: llvm.func @memset_too_much
126llvm.func @memset_too_much() -> i32 {
127  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
128  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
129  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
130  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(41 : i32) : i32
131  %0 = llvm.mlir.constant(1 : i32) : i32
132  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
133  %memset_value = llvm.mlir.constant(42 : i8) : i8
134  // 41 bytes means it will span over the entire array, and then some
135  %memset_len = llvm.mlir.constant(41 : i32) : i32
136  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
137  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
138  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
139  %3 = llvm.load %2 : !llvm.ptr -> i32
140  llvm.return %3 : i32
141}
142
143// -----
144
145// CHECK-LABEL: llvm.func @memset_inline_too_much
146llvm.func @memset_inline_too_much() -> i32 {
147  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
148  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
149  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
150  %0 = llvm.mlir.constant(1 : i32) : i32
151  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
152  %memset_value = llvm.mlir.constant(42 : i8) : i8
153  // 41 bytes means it will span over the entire array, and then some.
154  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}>
155  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> ()
156  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
157  %3 = llvm.load %2 : !llvm.ptr -> i32
158  llvm.return %3 : i32
159}
160
161// -----
162
163// CHECK-LABEL: llvm.func @memset_no_volatile
164llvm.func @memset_no_volatile() -> i32 {
165  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
166  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
167  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
168  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(16 : i32) : i32
169  %0 = llvm.mlir.constant(1 : i32) : i32
170  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
171  %memset_value = llvm.mlir.constant(42 : i8) : i8
172  %memset_len = llvm.mlir.constant(16 : i32) : i32
173  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = true}>
174  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = true}> : (!llvm.ptr, i8, i32) -> ()
175  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
176  %3 = llvm.load %2 : !llvm.ptr -> i32
177  llvm.return %3 : i32
178}
179
180// -----
181
182// CHECK-LABEL: llvm.func @memset_inline_no_volatile
183llvm.func @memset_inline_no_volatile() -> i32 {
184  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
185  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
186  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
187  %0 = llvm.mlir.constant(1 : i32) : i32
188  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
189  %memset_value = llvm.mlir.constant(42 : i8) : i8
190  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}>
191  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> ()
192  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
193  %3 = llvm.load %2 : !llvm.ptr -> i32
194  llvm.return %3 : i32
195}
196
197// -----
198
199// CHECK-LABEL: llvm.func @indirect_memset
200llvm.func @indirect_memset() -> i32 {
201  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
202  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
203  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
204  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
205  %0 = llvm.mlir.constant(1 : i32) : i32
206  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
207  %memset_value = llvm.mlir.constant(42 : i8) : i8
208  // This memset will only cover the selected element.
209  %memset_len = llvm.mlir.constant(4 : i32) : i32
210  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
211  // CHECK: "llvm.intr.memset"(%[[ALLOCA]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
212  "llvm.intr.memset"(%2, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
213  %3 = llvm.load %2 : !llvm.ptr -> i32
214  llvm.return %3 : i32
215}
216
217// -----
218
219// CHECK-LABEL: llvm.func @indirect_memset_inline
220llvm.func @indirect_memset_inline() -> i32 {
221  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
222  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
223  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
224  %0 = llvm.mlir.constant(1 : i32) : i32
225  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
226  %memset_value = llvm.mlir.constant(42 : i8) : i8
227  // This memset will only cover the selected element.
228  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
229  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
230  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
231  %3 = llvm.load %2 : !llvm.ptr -> i32
232  llvm.return %3 : i32
233}
234
235// -----
236
237// CHECK-LABEL: llvm.func @invalid_indirect_memset
238llvm.func @invalid_indirect_memset() -> i32 {
239  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
240  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)>
241  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
242  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(6 : i32) : i32
243  %0 = llvm.mlir.constant(1 : i32) : i32
244  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
245  %memset_value = llvm.mlir.constant(42 : i8) : i8
246  // This memset will go slightly beyond one of the elements.
247  %memset_len = llvm.mlir.constant(6 : i32) : i32
248  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0]
249  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
250  // CHECK: "llvm.intr.memset"(%[[GEP]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
251  "llvm.intr.memset"(%2, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
252  %3 = llvm.load %2 : !llvm.ptr -> i32
253  llvm.return %3 : i32
254}
255
256// -----
257
258// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline
259llvm.func @invalid_indirect_memset_inline() -> i32 {
260  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
261  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)>
262  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
263  %0 = llvm.mlir.constant(1 : i32) : i32
264  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
265  %memset_value = llvm.mlir.constant(42 : i8) : i8
266  // This memset will go slightly beyond one of the elements.
267  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0]
268  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
269  // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
270  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
271  %3 = llvm.load %2 : !llvm.ptr -> i32
272  llvm.return %3 : i32
273}
274
275// -----
276
277// CHECK-LABEL: llvm.func @memset_double_use
278llvm.func @memset_double_use() -> i32 {
279  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
280  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
281  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
282  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
283  %0 = llvm.mlir.constant(1 : i32) : i32
284  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
285  %memset_value = llvm.mlir.constant(42 : i8) : i8
286  // 8 bytes means it will span over the two i32 entries.
287  %memset_len = llvm.mlir.constant(8 : i32) : i32
288  // We expect two generated memset, one for each field.
289  // CHECK-NOT: "llvm.intr.memset"
290  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
291  // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
292  // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
293  // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
294  // CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
295  // CHECK-NOT: "llvm.intr.memset"
296  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
297  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
298  %3 = llvm.load %2 : !llvm.ptr -> i32
299  %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
300  %5 = llvm.load %4 : !llvm.ptr -> f32
301  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
302  %6 = llvm.bitcast %5 : f32 to i32
303  %7 = llvm.add %3, %6 : i32
304  llvm.return %7 : i32
305}
306
307// -----
308
309// CHECK-LABEL: llvm.func @memset_inline_double_use
310llvm.func @memset_inline_double_use() -> i32 {
311  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
312  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
313  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
314  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
315  %0 = llvm.mlir.constant(1 : i32) : i32
316  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
317  %memset_value = llvm.mlir.constant(42 : i8) : i8
318  // We expect two generated memset, one for each field.
319  // CHECK-NOT: "llvm.intr.memset.inline"
320  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
321  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
322  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
323  // CHECK-NOT: "llvm.intr.memset.inline"
324  // 8 bytes means it will span over the two i32 entries.
325  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
326  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
327  %3 = llvm.load %2 : !llvm.ptr -> i32
328  %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
329  %5 = llvm.load %4 : !llvm.ptr -> f32
330  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
331  %6 = llvm.bitcast %5 : f32 to i32
332  %7 = llvm.add %3, %6 : i32
333  llvm.return %7 : i32
334}
335
336// -----
337
338// CHECK-LABEL: llvm.func @memset_considers_alignment
339llvm.func @memset_considers_alignment() -> i32 {
340  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
341  // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
342  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
343  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
344  // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
345  %0 = llvm.mlir.constant(1 : i32) : i32
346  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
347  %memset_value = llvm.mlir.constant(42 : i8) : i8
348  // 8 bytes means it will span over the i8 and the i32 entry.
349  // Because of padding, the f32 entry will not be touched.
350  %memset_len = llvm.mlir.constant(8 : i32) : i32
351  // Even though the two i32 are used, only one memset should be generated,
352  // as the second i32 is not touched by the initial memset.
353  // CHECK-NOT: "llvm.intr.memset"
354  // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
355  // CHECK-NOT: "llvm.intr.memset"
356  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
357  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
358  %3 = llvm.load %2 : !llvm.ptr -> i32
359  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
360  %5 = llvm.load %4 : !llvm.ptr -> f32
361  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
362  %6 = llvm.bitcast %5 : f32 to i32
363  %7 = llvm.add %3, %6 : i32
364  llvm.return %7 : i32
365}
366
367// -----
368
369// CHECK-LABEL: llvm.func @memset_inline_considers_alignment
370llvm.func @memset_inline_considers_alignment() -> i32 {
371  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
372  // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
373  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
374  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
375  %0 = llvm.mlir.constant(1 : i32) : i32
376  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
377  %memset_value = llvm.mlir.constant(42 : i8) : i8
378  // 8 bytes means it will span over the i8 and the i32 entry.
379  // Because of padding, the f32 entry will not be touched.
380  // Even though the two i32 are used, only one memset should be generated,
381  // as the second i32 is not touched by the initial memset.
382  // CHECK-NOT: "llvm.intr.memset.inline"
383  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
384  // CHECK-NOT: "llvm.intr.memset.inline"
385  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
386  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
387  %3 = llvm.load %2 : !llvm.ptr -> i32
388  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
389  %5 = llvm.load %4 : !llvm.ptr -> f32
390  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
391  %6 = llvm.bitcast %5 : f32 to i32
392  %7 = llvm.add %3, %6 : i32
393  llvm.return %7 : i32
394}
395
396// -----
397
398// CHECK-LABEL: llvm.func @memset_considers_packing
399llvm.func @memset_considers_packing() -> i32 {
400  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
401  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
402  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
403  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
404  %0 = llvm.mlir.constant(1 : i32) : i32
405  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
406  %memset_value = llvm.mlir.constant(42 : i8) : i8
407  // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
408  %memset_len = llvm.mlir.constant(8 : i32) : i32
409  // Now all fields are touched by the memset.
410  // CHECK-NOT: "llvm.intr.memset"
411  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
412  // CHECK: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32
413  // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_WHOLE]]) <{isVolatile = false}>
414  // CHECK: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32
415  // CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_PARTIAL]]) <{isVolatile = false}>
416  // CHECK-NOT: "llvm.intr.memset"
417  "llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
418  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
419  %3 = llvm.load %2 : !llvm.ptr -> i32
420  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
421  %5 = llvm.load %4 : !llvm.ptr -> f32
422  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
423  %6 = llvm.bitcast %5 : f32 to i32
424  %7 = llvm.add %3, %6 : i32
425  llvm.return %7 : i32
426}
427
428// -----
429
430// CHECK-LABEL: llvm.func @memset_inline_considers_packing
431llvm.func @memset_inline_considers_packing() -> i32 {
432  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
433  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
434  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
435  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
436  %0 = llvm.mlir.constant(1 : i32) : i32
437  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
438  %memset_value = llvm.mlir.constant(42 : i8) : i8
439  // Now all fields are touched by the memset.
440  // CHECK-NOT: "llvm.intr.memset.inline"
441  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
442  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
443  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}>
444  // CHECK-NOT: "llvm.intr.memset.inline"
445  // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
446  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
447  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
448  %3 = llvm.load %2 : !llvm.ptr -> i32
449  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
450  %5 = llvm.load %4 : !llvm.ptr -> f32
451  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
452  %6 = llvm.bitcast %5 : f32 to i32
453  %7 = llvm.add %3, %6 : i32
454  llvm.return %7 : i32
455}
456
457// -----
458
459// CHECK-LABEL: llvm.func @memcpy_dest
460// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
461llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
462  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
463  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
464  %0 = llvm.mlir.constant(1 : i32) : i32
465  %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
466  %memcpy_len = llvm.mlir.constant(40 : i32) : i32
467  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
468  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
469  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
470  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
471  "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
472  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
473  %3 = llvm.load %2 : !llvm.ptr -> i32
474  llvm.return %3 : i32
475}
476
477// -----
478
479// CHECK-LABEL: llvm.func @memcpy_src
480// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
481llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
482  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
483  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
484  // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
485  %0 = llvm.mlir.constant(1 : i32) : i32
486  %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
487  %memcpy_len = llvm.mlir.constant(16 : i32) : i32
488  // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
489  // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
490  // as unused slots are not generated.
491  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
492  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
493  // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
494  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
495  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
496  // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
497  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
498  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
499  // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
500  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
501  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
502  // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
503  "llvm.intr.memcpy"(%other_array, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
504  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
505  %3 = llvm.load %2 : !llvm.ptr -> i32
506  llvm.return %3 : i32
507}
508
509// -----
510
511// CHECK-LABEL: llvm.func @memcpy_double
512llvm.func @memcpy_double() -> i32 {
513  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
514  %0 = llvm.mlir.constant(1 : i32) : i32
515  // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x i32
516  // TODO: This should also disappear as a GEP with all zero indices should be
517  // ignored.
518  // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<1 x i32>
519  %1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
520  %2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
521  // Match the dead constant, to avoid collision with the newly created one.
522  // CHECK: llvm.mlir.constant
523  %memcpy_len = llvm.mlir.constant(4 : i32) : i32
524  // CHECK-NOT: "llvm.intr.memcpy"
525  // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
526  // CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
527  // CHECK-NOT: "llvm.intr.memcpy"
528  "llvm.intr.memcpy"(%1, %2, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
529  %3 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<1 x i32>
530  %4 = llvm.load %3 : !llvm.ptr -> i32
531  llvm.return %4 : i32
532}
533
534// -----
535
536// CHECK-LABEL: llvm.func @memcpy_no_partial
537// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
538llvm.func @memcpy_no_partial(%other_array: !llvm.ptr) -> i32 {
539  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
540  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
541  // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(21 : i32) : i32
542  %0 = llvm.mlir.constant(1 : i32) : i32
543  %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
544  %memcpy_len = llvm.mlir.constant(21 : i32) : i32
545  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
546  "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
547  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
548  %3 = llvm.load %2 : !llvm.ptr -> i32
549  llvm.return %3 : i32
550}
551
552// -----
553
554// CHECK-LABEL: llvm.func @memcpy_no_volatile
555// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
556llvm.func @memcpy_no_volatile(%other_array: !llvm.ptr) -> i32 {
557  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
558  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
559  // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(40 : i32) : i32
560  %0 = llvm.mlir.constant(1 : i32) : i32
561  %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
562  %memcpy_len = llvm.mlir.constant(40 : i32) : i32
563  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = true}>
564  "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
565  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
566  %3 = llvm.load %2 : !llvm.ptr -> i32
567  llvm.return %3 : i32
568}
569
570// -----
571
572// CHECK-LABEL: llvm.func @memmove_dest
573// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
574llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
575  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
576  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
577  %0 = llvm.mlir.constant(1 : i32) : i32
578  %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
579  %memmove_len = llvm.mlir.constant(40 : i32) : i32
580  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
581  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
582  // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
583  // CHECK: "llvm.intr.memmove"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMMOVE_LEN]]) <{isVolatile = false}>
584  "llvm.intr.memmove"(%1, %other_array, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
585  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
586  %3 = llvm.load %2 : !llvm.ptr -> i32
587  llvm.return %3 : i32
588}
589
590// -----
591
592// CHECK-LABEL: llvm.func @memmove_src
593// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
594llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 {
595  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
596  // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
597  %0 = llvm.mlir.constant(1 : i32) : i32
598  %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
599  %memmove_len = llvm.mlir.constant(16 : i32) : i32
600  // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
601  // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
602  // as unused slots are not generated.
603  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
604  // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
605  // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
606  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
607  // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
608  // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
609  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
610  // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
611  // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
612  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
613  // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
614  // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
615  "llvm.intr.memmove"(%other_array, %1, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
616  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
617  %3 = llvm.load %2 : !llvm.ptr -> i32
618  llvm.return %3 : i32
619}
620
621// -----
622
623// CHECK-LABEL: llvm.func @memcpy_inline_dest
624// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
625llvm.func @memcpy_inline_dest(%other_array: !llvm.ptr) -> i32 {
626  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
627  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
628  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
629  %0 = llvm.mlir.constant(1 : i32) : i32
630  %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
631  // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
632  // CHECK: "llvm.intr.memcpy.inline"(%[[ALLOCA]], %[[SLOT_IN_OTHER]]) <{isVolatile = false, len = 4 : i32}>
633  "llvm.intr.memcpy.inline"(%1, %other_array) <{isVolatile = false, len = 40 : i32}> : (!llvm.ptr, !llvm.ptr) -> ()
634  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
635  %3 = llvm.load %2 : !llvm.ptr -> i32
636  llvm.return %3 : i32
637}
638
639// -----
640
641// CHECK-LABEL: llvm.func @memcpy_inline_src
642// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
643llvm.func @memcpy_inline_src(%other_array: !llvm.ptr) -> i32 {
644  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
645  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
646  // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
647  %0 = llvm.mlir.constant(1 : i32) : i32
648  %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
649  // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
650  // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
651  // as unused slots are not generated.
652  // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
653  // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
654  // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
655  // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
656  // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
657  // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
658  // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
659  // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
660  "llvm.intr.memcpy.inline"(%other_array, %1) <{isVolatile = false, len = 16 : i32}> : (!llvm.ptr, !llvm.ptr) -> ()
661  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
662  %3 = llvm.load %2 : !llvm.ptr -> i32
663  llvm.return %3 : i32
664}
665