xref: /llvm-project/mlir/test/Dialect/LLVMIR/inlining.mlir (revision b39c5cb6977f35ad727d86b2dd6232099734ffd3)
1// RUN: mlir-opt %s -inline -split-input-file | FileCheck %s
2
3#file = #llvm.di_file<"foo.mlir" in "/foo/">
4#variable = #llvm.di_local_variable<scope = #file>
5#variableAddr = #llvm.di_local_variable<scope = #file>
6#label = #llvm.di_label<scope = #file>
7
8func.func @inner_func_inlinable(%ptr : !llvm.ptr) -> i32 {
9  %0 = llvm.mlir.constant(42 : i32) : i32
10  %stack = llvm.intr.stacksave : !llvm.ptr
11  llvm.store %0, %ptr { alignment = 8 } : i32, !llvm.ptr
12  %1 = llvm.load %ptr { alignment = 8 } : !llvm.ptr -> i32
13  llvm.intr.dbg.value #variable = %0 : i32
14  llvm.intr.dbg.declare #variableAddr = %ptr : !llvm.ptr
15  llvm.intr.dbg.label #label
16  %byte = llvm.mlir.constant(43 : i8) : i8
17  %true = llvm.mlir.constant(1 : i1) : i1
18  "llvm.intr.memset"(%ptr, %byte, %0) <{isVolatile = true}> : (!llvm.ptr, i8, i32) -> ()
19  "llvm.intr.memmove"(%ptr, %ptr, %0) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
20  "llvm.intr.memcpy"(%ptr, %ptr, %0) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
21  llvm.intr.assume %true : i1
22  llvm.fence release
23  %2 = llvm.atomicrmw add %ptr, %0 monotonic : !llvm.ptr, i32
24  %3 = llvm.cmpxchg %ptr, %0, %1 acq_rel monotonic : !llvm.ptr, i32
25  llvm.inline_asm has_side_effects "foo", "bar" : () -> ()
26  llvm.cond_br %true, ^bb1, ^bb2
27^bb1:
28  llvm.unreachable
29^bb2:
30  llvm.intr.stackrestore %stack : !llvm.ptr
31  llvm.call_intrinsic "llvm.x86.sse41.round.ss"() : () -> (vector<8xf32>)
32  return %1 : i32
33}
34
35// CHECK-LABEL: func.func @test_inline(
36// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]
37// CHECK: %[[CST:.*]] = llvm.mlir.constant(42
38// CHECK: %[[STACK:.+]] = llvm.intr.stacksave
39// CHECK: llvm.store %[[CST]], %[[PTR]]
40// CHECK: %[[RES:.+]] = llvm.load %[[PTR]]
41// CHECK: llvm.intr.dbg.value #{{.+}} = %[[CST]]
42// CHECK: llvm.intr.dbg.declare #{{.+}} = %[[PTR]]
43// CHECK: llvm.intr.dbg.label #{{.+}}
44// CHECK: "llvm.intr.memset"(%[[PTR]]
45// CHECK: "llvm.intr.memmove"(%[[PTR]], %[[PTR]]
46// CHECK: "llvm.intr.memcpy"(%[[PTR]], %[[PTR]]
47// CHECK: llvm.intr.assume
48// CHECK: llvm.fence release
49// CHECK: llvm.atomicrmw add %[[PTR]], %[[CST]] monotonic
50// CHECK: llvm.cmpxchg %[[PTR]], %[[CST]], %[[RES]] acq_rel monotonic
51// CHECK: llvm.inline_asm has_side_effects "foo", "bar"
52// CHECK: llvm.unreachable
53// CHECK: llvm.intr.stackrestore %[[STACK]]
54// CHECK: llvm.call_intrinsic "llvm.x86.sse41.round.ss"(
55func.func @test_inline(%ptr : !llvm.ptr) -> i32 {
56  %0 = call @inner_func_inlinable(%ptr) : (!llvm.ptr) -> i32
57  return %0 : i32
58}
59
60// -----
61// Check that llvm.return is correctly handled
62
63func.func @func(%arg0 : i32) -> i32  {
64  llvm.return %arg0 : i32
65}
66// CHECK-LABEL: @llvm_ret
67// CHECK-NOT: call
68// CHECK:  return %arg0
69func.func @llvm_ret(%arg0 : i32) -> i32 {
70  %res = call @func(%arg0) : (i32) -> (i32)
71  return %res : i32
72}
73
74// -----
75
76// Include all function attributes that don't prevent inlining
77llvm.func internal fastcc @callee() -> (i32) attributes { function_entry_count = 42 : i64, dso_local } {
78  %0 = llvm.mlir.constant(42 : i32) : i32
79  llvm.return %0 : i32
80}
81
82// CHECK-LABEL: llvm.func @caller
83// CHECK-NEXT: %[[CST:.+]] = llvm.mlir.constant
84// CHECK-NEXT: llvm.return %[[CST]]
85llvm.func @caller() -> (i32) {
86  // Include all call attributes that don't prevent inlining.
87  %0 = llvm.call fastcc @callee() { fastmathFlags = #llvm.fastmath<nnan, ninf>, branch_weights = dense<42> : vector<1xi32> } : () -> (i32)
88  llvm.return %0 : i32
89}
90
91// -----
92
93llvm.func @foo() -> (i32) attributes { no_inline } {
94  %0 = llvm.mlir.constant(0 : i32) : i32
95  llvm.return %0 : i32
96}
97
98llvm.func @bar() -> (i32) attributes { no_inline } {
99  %0 = llvm.mlir.constant(1 : i32) : i32
100  llvm.return %0 : i32
101}
102
103llvm.func @callee_with_multiple_blocks(%cond: i1) -> (i32) {
104  llvm.cond_br %cond, ^bb1, ^bb2
105^bb1:
106  %0 = llvm.call @foo() : () -> (i32)
107  llvm.br ^bb3(%0: i32)
108^bb2:
109  %1 = llvm.call @bar() : () -> (i32)
110  llvm.br ^bb3(%1: i32)
111^bb3(%arg: i32):
112  llvm.return %arg : i32
113}
114
115// CHECK-LABEL: llvm.func @caller
116// CHECK-NEXT: llvm.cond_br {{.+}}, ^[[BB1:.+]], ^[[BB2:.+]]
117// CHECK-NEXT: ^[[BB1]]:
118// CHECK-NEXT: llvm.call @foo
119// CHECK-NEXT: llvm.br ^[[BB3:[a-zA-Z0-9_]+]]
120// CHECK-NEXT: ^[[BB2]]:
121// CHECK-NEXT: llvm.call @bar
122// CHECK-NEXT: llvm.br ^[[BB3]]
123// CHECK-NEXT: ^[[BB3]]
124// CHECK-NEXT: llvm.br ^[[BB4:[a-zA-Z0-9_]+]]
125// CHECK-NEXT: ^[[BB4]]
126// CHECK-NEXT: llvm.return
127llvm.func @caller(%cond: i1) -> (i32) {
128  %0 = llvm.call @callee_with_multiple_blocks(%cond) : (i1) -> (i32)
129  llvm.return %0 : i32
130}
131
132// -----
133
134llvm.func @personality() -> i32
135
136llvm.func @callee() -> (i32) attributes { personality = @personality } {
137  %0 = llvm.mlir.constant(42 : i32) : i32
138  llvm.return %0 : i32
139}
140
141// CHECK-LABEL: llvm.func @caller
142// CHECK-NEXT: llvm.call @callee
143// CHECK-NEXT: return
144llvm.func @caller() -> (i32) {
145  %0 = llvm.call @callee() : () -> (i32)
146  llvm.return %0 : i32
147}
148
149// -----
150
151llvm.func @callee() attributes { passthrough = ["foo", "bar"] } {
152  llvm.return
153}
154
155// CHECK-LABEL: llvm.func @caller
156// CHECK-NEXT: llvm.return
157llvm.func @caller() {
158  llvm.call @callee() : () -> ()
159  llvm.return
160}
161
162// -----
163
164llvm.func @callee_noinline() attributes { no_inline } {
165  llvm.return
166}
167
168llvm.func @callee_noduplicate() attributes { passthrough = ["noduplicate"] } {
169  llvm.return
170}
171
172llvm.func @callee_presplitcoroutine() attributes { passthrough = ["presplitcoroutine"] } {
173  llvm.return
174}
175
176llvm.func @callee_returns_twice() attributes { passthrough = ["returns_twice"] } {
177  llvm.return
178}
179
180llvm.func @callee_strictfp() attributes { passthrough = ["strictfp"] } {
181  llvm.return
182}
183
184// CHECK-LABEL: llvm.func @caller
185// CHECK-NEXT: llvm.call @callee_noinline
186// CHECK-NEXT: llvm.call @callee_noduplicate
187// CHECK-NEXT: llvm.call @callee_presplitcoroutine
188// CHECK-NEXT: llvm.call @callee_returns_twice
189// CHECK-NEXT: llvm.call @callee_strictfp
190// CHECK-NEXT: llvm.return
191llvm.func @caller() {
192  llvm.call @callee_noinline() : () -> ()
193  llvm.call @callee_noduplicate() : () -> ()
194  llvm.call @callee_presplitcoroutine() : () -> ()
195  llvm.call @callee_returns_twice() : () -> ()
196  llvm.call @callee_strictfp() : () -> ()
197  llvm.return
198}
199
200// -----
201
202llvm.func @static_alloca() -> f32 {
203  %0 = llvm.mlir.constant(4 : i32) : i32
204  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
205  %2 = llvm.load %1 : !llvm.ptr -> f32
206  llvm.return %2 : f32
207}
208
209llvm.func @dynamic_alloca(%size : i32) -> f32 {
210  %0 = llvm.add %size, %size : i32
211  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
212  %2 = llvm.load %1 : !llvm.ptr -> f32
213  llvm.return %2 : f32
214}
215
216// CHECK-LABEL: llvm.func @test_inline
217llvm.func @test_inline(%cond : i1, %size : i32) -> f32 {
218  // Check that the static alloca was moved to the entry block after inlining
219  // with its size defined by a constant.
220  // CHECK-NOT: ^{{.+}}:
221  // CHECK-NEXT: llvm.mlir.constant
222  // CHECK-NEXT: llvm.alloca
223  // CHECK: llvm.cond_br
224  llvm.cond_br %cond, ^bb1, ^bb2
225  // CHECK: ^{{.+}}:
226^bb1:
227  // CHECK-NOT: llvm.call @static_alloca
228  // CHECK: llvm.intr.lifetime.start
229  %0 = llvm.call @static_alloca() : () -> f32
230  // CHECK: llvm.intr.lifetime.end
231  // CHECK: llvm.br ^[[BB3:[a-zA-Z0-9_]+]]
232  llvm.br ^bb3(%0: f32)
233  // CHECK: ^{{.+}}:
234^bb2:
235  // Check that the dynamic alloca was inlined, but that it was not moved to the
236  // entry block.
237  // CHECK: %[[STACK:[a-zA-Z0-9_]+]] = llvm.intr.stacksave
238  // CHECK: llvm.add
239  // CHECK: llvm.alloca
240  // CHECK: llvm.intr.stackrestore %[[STACK]]
241  // CHECK-NOT: llvm.call @dynamic_alloca
242  %1 = llvm.call @dynamic_alloca(%size) : (i32) -> f32
243  // CHECK: llvm.br ^[[BB3]]
244  llvm.br ^bb3(%1: f32)
245  // CHECK: ^[[BB3]]
246^bb3(%arg : f32):
247  // CHECK-NEXT: return
248  llvm.return %arg : f32
249}
250
251// -----
252
253llvm.func @static_alloca_not_in_entry(%cond : i1) -> f32 {
254  llvm.cond_br %cond, ^bb1, ^bb2
255^bb1:
256  %0 = llvm.mlir.constant(4 : i32) : i32
257  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
258  llvm.br ^bb3(%1: !llvm.ptr)
259^bb2:
260  %2 = llvm.mlir.constant(8 : i32) : i32
261  %3 = llvm.alloca %2 x f32 : (i32) -> !llvm.ptr
262  llvm.br ^bb3(%3: !llvm.ptr)
263^bb3(%ptr : !llvm.ptr):
264  %4 = llvm.load %ptr : !llvm.ptr -> f32
265  llvm.return %4 : f32
266}
267
268// CHECK-LABEL: llvm.func @test_inline
269llvm.func @test_inline(%cond : i1) -> f32 {
270  // Make sure the alloca was not moved to the entry block.
271  // CHECK-NOT: llvm.alloca
272  // CHECK: llvm.cond_br
273  // CHECK: llvm.alloca
274  %0 = llvm.call @static_alloca_not_in_entry(%cond) : (i1) -> f32
275  llvm.return %0 : f32
276}
277
278// -----
279
280llvm.func @static_alloca(%cond: i1) -> f32 {
281  %0 = llvm.mlir.constant(4 : i32) : i32
282  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
283  llvm.cond_br %cond, ^bb1, ^bb2
284^bb1:
285  %2 = llvm.load %1 : !llvm.ptr -> f32
286  llvm.return %2 : f32
287^bb2:
288  %3 = llvm.mlir.constant(3.14192 : f32) : f32
289  llvm.return %3 : f32
290}
291
292// CHECK-LABEL: llvm.func @test_inline
293llvm.func @test_inline(%cond0 : i1, %cond1 : i1, %funcArg : f32) -> f32 {
294  // CHECK-NOT: llvm.cond_br
295  // CHECK: %[[PTR:.+]] = llvm.alloca
296  // CHECK: llvm.cond_br %{{.+}}, ^[[BB1:.+]], ^{{.+}}
297  llvm.cond_br %cond0, ^bb1, ^bb2
298  // CHECK: ^[[BB1]]
299^bb1:
300  // Make sure the lifetime begin intrinsic has been inserted where the call
301  // used to be, even though the alloca has been moved to the entry block.
302  // CHECK-NEXT: llvm.intr.lifetime.start 4, %[[PTR]]
303  %0 = llvm.call @static_alloca(%cond1) : (i1) -> f32
304  // CHECK: llvm.cond_br %{{.+}}, ^[[BB2:.+]], ^[[BB3:.+]]
305  llvm.br ^bb3(%0: f32)
306  // Make sure the lifetime end intrinsic has been inserted at both former
307  // return sites of the callee.
308  // CHECK: ^[[BB2]]:
309  // CHECK-NEXT: llvm.load
310  // CHECK-NEXT: llvm.intr.lifetime.end 4, %[[PTR]]
311  // CHECK: ^[[BB3]]:
312  // CHECK-NEXT: llvm.intr.lifetime.end 4, %[[PTR]]
313^bb2:
314  llvm.br ^bb3(%funcArg: f32)
315^bb3(%blockArg: f32):
316  llvm.return %blockArg : f32
317}
318
319// -----
320
321llvm.func @static_alloca() -> f32 {
322  %0 = llvm.mlir.constant(4 : i32) : i32
323  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
324  %2 = llvm.load %1 : !llvm.ptr -> f32
325  llvm.return %2 : f32
326}
327
328// CHECK-LABEL: llvm.func @test_inline
329llvm.func @test_inline(%cond0 : i1) {
330  // Verify the alloca is relocated to the entry block of the parent function
331  // if the region operation is neither marked as isolated from above or
332  // automatic allocation scope.
333  // CHECK: %[[ALLOCA:.+]] = llvm.alloca
334  // CHECK: "test.one_region_op"() ({
335  "test.one_region_op"() ({
336    %0 = llvm.call @static_alloca() : () -> f32
337    // CHECK-NEXT: llvm.intr.lifetime.start 4, %[[ALLOCA]]
338    // CHECK-NEXT: %[[RES:.+]] = llvm.load %[[ALLOCA]]
339    // CHECK-NEXT: llvm.intr.lifetime.end 4, %[[ALLOCA]]
340    // CHECK-NEXT: test.region_yield %[[RES]]
341    test.region_yield %0 : f32
342  }) : () -> ()
343  // Verify the alloca is not relocated out of operations that are marked as
344  // isolated from above.
345  // CHECK-NOT: llvm.alloca
346  // CHECK: test.isolated_regions
347  test.isolated_regions {
348    // CHECK: %[[ALLOCA:.+]] = llvm.alloca
349    %0 = llvm.call @static_alloca() : () -> f32
350    // CHECK: test.region_yield
351    test.region_yield %0 : f32
352  }
353  // Verify the alloca is not relocated out of operations that are marked as
354  // automatic allocation scope.
355  // CHECK-NOT: llvm.alloca
356  // CHECK: test.alloca_scope_region
357  test.alloca_scope_region {
358    // CHECK: %[[ALLOCA:.+]] = llvm.alloca
359    %0 = llvm.call @static_alloca() : () -> f32
360    // CHECK: test.region_yield
361    test.region_yield %0 : f32
362  }
363  llvm.return
364}
365
366// -----
367
368llvm.func @alloca_with_lifetime(%cond: i1) -> f32 {
369  %0 = llvm.mlir.constant(4 : i32) : i32
370  %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
371  llvm.intr.lifetime.start 4, %1 : !llvm.ptr
372  %2 = llvm.load %1 : !llvm.ptr -> f32
373  llvm.intr.lifetime.end 4, %1 : !llvm.ptr
374  %3 = llvm.fadd %2, %2 : f32
375  llvm.return %3 : f32
376}
377
378// CHECK-LABEL: llvm.func @test_inline
379llvm.func @test_inline(%cond0 : i1, %cond1 : i1, %funcArg : f32) -> f32 {
380  // CHECK-NOT: llvm.cond_br
381  // CHECK: %[[PTR:.+]] = llvm.alloca
382  // CHECK: llvm.cond_br %{{.+}}, ^[[BB1:.+]], ^{{.+}}
383  llvm.cond_br %cond0, ^bb1, ^bb2
384  // CHECK: ^[[BB1]]
385^bb1:
386  // Make sure the original lifetime intrinsic has been preserved, rather than
387  // inserting a new one with a larger scope.
388  // CHECK: llvm.intr.lifetime.start 4, %[[PTR]]
389  // CHECK-NEXT: llvm.load %[[PTR]]
390  // CHECK-NEXT: llvm.intr.lifetime.end 4, %[[PTR]]
391  // CHECK: llvm.fadd
392  // CHECK-NOT: llvm.intr.lifetime.end
393  %0 = llvm.call @alloca_with_lifetime(%cond1) : (i1) -> f32
394  llvm.br ^bb3(%0: f32)
395^bb2:
396  llvm.br ^bb3(%funcArg: f32)
397^bb3(%blockArg: f32):
398  llvm.return %blockArg : f32
399}
400
401// -----
402
403llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) {
404  llvm.return
405}
406
407// CHECK-LABEL: llvm.func @test_byval
408// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
409llvm.func @test_byval(%ptr : !llvm.ptr) {
410  // Make sure the new static alloca goes to the entry block.
411  // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64
412  // CHECK: llvm.br ^[[BB1:[a-zA-Z0-9_]+]]
413  llvm.br ^bb1
414  // CHECK: ^[[BB1]]
415^bb1:
416  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
417  llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> ()
418  llvm.br ^bb2
419^bb2:
420  llvm.return
421}
422
423// -----
424
425llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) attributes {memory_effects = #llvm.memory_effects<other = readwrite, argMem = read, inaccessibleMem = readwrite>} {
426  llvm.return
427}
428
429// CHECK-LABEL: llvm.func @test_byval_read_only
430// CHECK-NOT: llvm.call
431// CHECK-NEXT: llvm.return
432llvm.func @test_byval_read_only(%ptr : !llvm.ptr) {
433  llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> ()
434  llvm.return
435}
436
437// -----
438
439llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) attributes {memory_effects = #llvm.memory_effects<other = readwrite, argMem = write, inaccessibleMem = readwrite>} {
440  llvm.return
441}
442
443// CHECK-LABEL: llvm.func @test_byval_write_only
444// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
445// CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64
446// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
447llvm.func @test_byval_write_only(%ptr : !llvm.ptr) {
448  llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> ()
449  llvm.return
450}
451
452// -----
453
454llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory_effects = #llvm.memory_effects<other = read, argMem = read, inaccessibleMem = read>} {
455  llvm.return
456}
457
458// CHECK-LABEL: llvm.func @test_byval_input_aligned
459// CHECK-SAME: %[[UNALIGNED:[a-zA-Z0-9_]+]]: !llvm.ptr
460// CHECK-SAME: %[[ALIGNED:[a-zA-Z0-9_]+]]: !llvm.ptr
461llvm.func @test_byval_input_aligned(%unaligned : !llvm.ptr, %aligned : !llvm.ptr { llvm.align = 16 }) {
462  // Make sure only the unaligned input triggers a memcpy.
463  // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x i16 {alignment = 16
464  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[UNALIGNED]]
465  llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> ()
466  // CHECK-NOT: memcpy
467  llvm.call @aligned_byval_arg(%aligned) : (!llvm.ptr) -> ()
468  llvm.return
469}
470
471// -----
472
473llvm.func @func_that_uses_ptr(%ptr : !llvm.ptr)
474
475llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory_effects = #llvm.memory_effects<other = read, argMem = read, inaccessibleMem = read>} {
476  llvm.call @func_that_uses_ptr(%ptr) : (!llvm.ptr) -> ()
477  llvm.return
478}
479
480// CHECK-LABEL: llvm.func @test_byval_realign_alloca
481llvm.func @test_byval_realign_alloca() {
482  %size = llvm.mlir.constant(4 : i64) : i64
483  // CHECK-NOT: llvm.alloca{{.+}}alignment = 1
484  // CHECK: llvm.alloca {{.+}}alignment = 16 : i64
485  // CHECK-NOT: llvm.intr.memcpy
486  %unaligned = llvm.alloca %size x i16 { alignment = 1 } : (i64) -> !llvm.ptr
487  llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> ()
488  llvm.return
489}
490
491// -----
492
493module attributes {
494  dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.stack_alignment", 32 : i32>>
495} {
496
497llvm.func @func_that_uses_ptr(%ptr : !llvm.ptr)
498
499llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory_effects = #llvm.memory_effects<other = read, argMem = read, inaccessibleMem = read>} {
500  llvm.call @func_that_uses_ptr(%ptr) : (!llvm.ptr) -> ()
501  llvm.return
502}
503
504// CHECK-LABEL: llvm.func @test_exceeds_natural_stack_alignment
505llvm.func @test_exceeds_natural_stack_alignment() {
506  %size = llvm.mlir.constant(4 : i64) : i64
507  // Natural stack alignment is exceeded, so prefer a copy instead of
508  // triggering a dynamic stack realignment.
509  // CHECK-DAG: %[[SRC:[a-zA-Z0-9_]+]] = llvm.alloca{{.+}}alignment = 2
510  // CHECK-DAG: %[[DST:[a-zA-Z0-9_]+]] = llvm.alloca{{.+}}alignment = 16
511  // CHECK: "llvm.intr.memcpy"(%[[DST]], %[[SRC]]
512  %unaligned = llvm.alloca %size x i16 { alignment = 2 } : (i64) -> !llvm.ptr
513  llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> ()
514  llvm.return
515}
516
517}
518
519// -----
520
521module attributes {
522  dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.stack_alignment", 32 : i32>>
523} {
524
525llvm.func @func_that_uses_ptr(%ptr : !llvm.ptr)
526
527llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory_effects = #llvm.memory_effects<other = read, argMem = read, inaccessibleMem = read>} {
528  llvm.call @func_that_uses_ptr(%ptr) : (!llvm.ptr) -> ()
529  llvm.return
530}
531
532// CHECK-LABEL: llvm.func @test_alignment_exceeded_anyway
533llvm.func @test_alignment_exceeded_anyway() {
534  %size = llvm.mlir.constant(4 : i64) : i64
535  // Natural stack alignment is lower than the target alignment, but the
536  // alloca's existing alignment already exceeds it, so we might as well avoid
537  // the copy.
538  // CHECK-NOT: llvm.alloca{{.+}}alignment = 1
539  // CHECK: llvm.alloca {{.+}}alignment = 16 : i64
540  // CHECK-NOT: llvm.intr.memcpy
541  %unaligned = llvm.alloca %size x i16 { alignment = 8 } : (i64) -> !llvm.ptr
542  llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> ()
543  llvm.return
544}
545
546}
547
548// -----
549
550llvm.mlir.global private @unaligned_global(42 : i64) : i64
551llvm.mlir.global private @aligned_global(42 : i64) { alignment = 64 } : i64
552
553llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory_effects = #llvm.memory_effects<other = read, argMem = read, inaccessibleMem = read>} {
554  llvm.return
555}
556
557// CHECK-LABEL: llvm.func @test_byval_global
558llvm.func @test_byval_global() {
559  // Make sure only the unaligned global triggers a memcpy.
560  // CHECK-DAG: %[[UNALIGNED:.+]] = llvm.mlir.addressof @unaligned_global
561  // CHECK-DAG: %[[ALLOCA:.+]] = llvm.alloca
562  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[UNALIGNED]]
563  // CHECK-NOT: llvm.alloca
564  %unaligned = llvm.mlir.addressof @unaligned_global : !llvm.ptr
565  llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> ()
566  %aligned = llvm.mlir.addressof @aligned_global : !llvm.ptr
567  llvm.call @aligned_byval_arg(%aligned) : (!llvm.ptr) -> ()
568  llvm.return
569}
570
571// -----
572
573llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) {
574  llvm.return %ptr : !llvm.ptr
575}
576
577// CHECK-LABEL: @test_ignored_attrs
578// CHECK-NOT: llvm.call
579// CHECK-NEXT: llvm.return
580llvm.func @test_ignored_attrs(%ptr : !llvm.ptr, %x : i32) {
581  llvm.call @ignored_attrs(%ptr, %x) : (!llvm.ptr, i32) -> (!llvm.ptr)
582  llvm.return
583}
584
585// -----
586
587llvm.func @disallowed_arg_attr(%ptr : !llvm.ptr { llvm.inalloca = i64 }) {
588  llvm.return
589}
590
591// CHECK-LABEL: @test_disallow_arg_attr
592// CHECK-NEXT: llvm.call
593llvm.func @test_disallow_arg_attr(%ptr : !llvm.ptr) {
594  llvm.call @disallowed_arg_attr(%ptr) : (!llvm.ptr) -> ()
595  llvm.return
596}
597
598// -----
599
600#callee = #llvm.access_group<id = distinct[0]<>>
601#caller = #llvm.access_group<id = distinct[1]<>>
602
603llvm.func @inlinee(%ptr : !llvm.ptr) -> i32 {
604  %0 = llvm.load %ptr { access_groups = [#callee] } : !llvm.ptr -> i32
605  llvm.return %0 : i32
606}
607
608// CHECK-DAG: #[[$CALLEE:.*]] = #llvm.access_group<id = {{.*}}>
609// CHECK-DAG: #[[$CALLER:.*]] = #llvm.access_group<id = {{.*}}>
610
611// CHECK-LABEL: func @caller
612// CHECK: llvm.load
613// CHECK-SAME: access_groups = [#[[$CALLEE]], #[[$CALLER]]]
614llvm.func @caller(%ptr : !llvm.ptr) -> i32 {
615  %0 = llvm.call @inlinee(%ptr) { access_groups = [#caller] } : (!llvm.ptr) -> (i32)
616  llvm.return %0 : i32
617}
618
619// -----
620
621#caller = #llvm.access_group<id = distinct[1]<>>
622
623llvm.func @inlinee(%ptr : !llvm.ptr) -> i32 {
624  %0 = llvm.load %ptr : !llvm.ptr -> i32
625  llvm.return %0 : i32
626}
627
628// CHECK-DAG: #[[$CALLER:.*]] = #llvm.access_group<id = {{.*}}>
629
630// CHECK-LABEL: func @caller
631// CHECK: llvm.load
632// CHECK-SAME: access_groups = [#[[$CALLER]]]
633// CHECK: llvm.store
634// CHECK-SAME: access_groups = [#[[$CALLER]]]
635llvm.func @caller(%ptr : !llvm.ptr) -> i32 {
636  %c5 = llvm.mlir.constant(5 : i32) : i32
637  %0 = llvm.call @inlinee(%ptr) { access_groups = [#caller] } : (!llvm.ptr) -> (i32)
638  llvm.store %c5, %ptr { access_groups = [#caller] } : i32, !llvm.ptr
639  llvm.return %0 : i32
640}
641
642// -----
643
644llvm.func @vararg_func(...) {
645  llvm.return
646}
647
648llvm.func @vararg_intrinrics() {
649  %0 = llvm.mlir.constant(1 : i32) : i32
650  %list = llvm.alloca %0 x !llvm.struct<"struct.va_list_opaque", (ptr)> : (i32) -> !llvm.ptr
651  // The vararg intinriscs should normally be part of a variadic function.
652  // However, this test uses a non-variadic function to ensure the presence of
653  // the intrinsic alone suffices to prevent inlining.
654  llvm.intr.vastart %list : !llvm.ptr
655  llvm.return
656}
657
658// CHECK-LABEL: func @caller
659llvm.func @caller() {
660  // CHECK-NEXT: llvm.call @vararg_func()
661  llvm.call @vararg_func() vararg(!llvm.func<void (...)>) : () -> ()
662  // CHECK-NEXT: llvm.call @vararg_intrinrics()
663  llvm.call @vararg_intrinrics() : () -> ()
664  llvm.return
665}
666
667// -----
668
669llvm.func @private_func(%a : i32) -> i32 attributes {sym_visibility = "private"} {
670  llvm.return %a : i32
671}
672
673// CHECK-LABEL: func @caller
674llvm.func @caller(%x : i32) -> i32 {
675  // CHECK-NOT: llvm.call @private_func
676  %z = llvm.call @private_func(%x) : (i32) -> (i32)
677  llvm.return %z : i32
678}
679
680// -----
681
682llvm.func @unreachable_func(%a : i32) -> i32 {
683  "llvm.intr.trap"() : () -> ()
684  llvm.unreachable
685}
686
687// CHECK-LABEL: func @caller
688llvm.func @caller(%x : i32) -> i32 {
689  // CHECK-NOT: llvm.call @unreachable_func
690  // CHECK: llvm.intr.trap
691  // CHECK: llvm.unreachable
692  %z = llvm.call @unreachable_func(%x) : (i32) -> (i32)
693  llvm.return %z : i32
694}
695