xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lower-module-lds.ll (revision 1f52060000f1fead314f7173f4c62bc58b9ca7c3)
1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
3
4; Padding to meet alignment, so references to @var1 replaced with gep ptr, 0, 2
5; No i64 as addrspace(3) types with initializers are ignored. Likewise no addrspace(4).
6; CHECK: %llvm.amdgcn.module.lds.t = type { float, [4 x i8], i32 }
7
8; Variable removed by pass
9; CHECK-NOT: @var0
10
11@var0 = addrspace(3) global float poison, align 8
12@var1 = addrspace(3) global i32 poison, align 8
13
14; The invalid use by the global is left unchanged
15; CHECK: @var1 = addrspace(3) global i32 poison, align 8
16; CHECK: @ptr = addrspace(1) global ptr addrspace(3) @var1, align 4
17@ptr = addrspace(1) global ptr addrspace(3) @var1, align 4
18
19; A variable that is unchanged by pass
20; CHECK: @with_init = addrspace(3) global i64 0
21@with_init = addrspace(3) global i64 0
22
23; Instance of new type, aligned to max of element alignment
24; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 8
25
26; Use in func rewritten to access struct at address zero
27; CHECK-LABEL: @func()
28; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.0
29; CHECK: %val0 = load i32, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
30; CHECK: %val1 = add i32 %val0, 4
31; CHECK: store i32 %val1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
32; CHECK: %unused0 = atomicrmw add ptr addrspace(3) @with_init, i64 1 monotonic
33define void @func() {
34  %dec = atomicrmw fsub ptr addrspace(3) @var0, float 1.0 monotonic
35  %val0 = load i32, ptr addrspace(3) @var1, align 4
36  %val1 = add i32 %val0, 4
37  store i32 %val1, ptr addrspace(3) @var1, align 4
38  %unused0 = atomicrmw add ptr addrspace(3) @with_init, i64 1 monotonic
39  ret void
40}
41
42; This kernel calls a function that uses LDS so needs the block
43; CHECK-LABEL: @kern_call() #0
44; CHECK: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
45; CHECK: call void @func()
46; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 2.000000e+00 monotonic, align 8
47define amdgpu_kernel void @kern_call() {
48  call void @func()
49  %dec = atomicrmw fsub ptr addrspace(3) @var0, float 2.0 monotonic
50  ret void
51}
52
53; This kernel does alloc the LDS block as it makes no calls
54; CHECK-LABEL: @kern_empty()
55; CHECK-NOT: call void @llvm.donothing()
56define spir_kernel void @kern_empty() {
57  ret void
58}
59
60; Make sure we don't crash trying to insert code into a kernel
61; declaration.
62declare amdgpu_kernel void @kernel_declaration()
63
64; CHECK: attributes #0 = { "amdgpu-lds-size"="12" }
65