xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll (revision e39f6c1844fab59c638d8059a6cf139adb42279a)
1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
3; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
4; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
5
6; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] }
7; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 }
8; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
9; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x ptr addrspace(3)] }
10
11; SUPER-ALIGN_ON: @lds.unused = addrspace(3) global i32 poison, align 4
12; SUPER-ALIGN_OFF: @lds.unused = addrspace(3) global i32 poison, align 2
13@lds.unused = addrspace(3) global i32 poison, align 2
14
15@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @lds.unused to ptr)], section "llvm.metadata"
16
17; CHECK-NOT: @lds.1
18@lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] poison, align 1
19
20; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 16
21; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 1
22
23; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4
24; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16
25; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 8
26
27; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 8
28; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 4
29
30; CHECK-LABEL: @k1
31; CHECK:  %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds to ptr
32; CHECK:  %ptr = getelementptr inbounds i8, ptr %1, i64 %x
33; CHECK:  store i8 1, ptr %ptr, align 1
34define amdgpu_kernel void @k1(i64 %x) {
35  %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x
36  store i8 1, ptr addrspace(0) %ptr, align 1
37  ret void
38}
39
40@lds.2 = internal unnamed_addr addrspace(3) global i16 poison, align 4
41@lds.3 = internal unnamed_addr addrspace(3) global i16 poison, align 4
42
43; Check that alignment is propagated to uses for scalar variables.
44
45; CHECK-LABEL: @k2
46; CHECK: store i16 1, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 4
47; CHECK: store i16 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, i32 0, i32 2), align 4
48define amdgpu_kernel void @k2() {
49  store i16 1, ptr addrspace(3) @lds.2, align 2
50  store i16 2, ptr addrspace(3) @lds.3, align 2
51  ret void
52}
53
54@lds.4 = internal unnamed_addr addrspace(3) global [32 x i64] poison, align 8
55@lds.5 = internal unnamed_addr addrspace(3) global [32 x i32] poison, align 4
56
57; Check that alignment is propagated to uses for arrays.
58
59; CHECK-LABEL: @k3
60; CHECK:  store i32 1, ptr addrspace(3) %ptr1, align 8
61; CHECK:  store i32 2, ptr addrspace(3) %ptr2, align 4
62; SUPER-ALIGN_ON:  store i32 3, ptr addrspace(3) %ptr3, align 16
63; SUPER-ALIGN_OFF: store i32 3, ptr addrspace(3) %ptr3, align 8
64; CHECK:  store i32 4, ptr addrspace(3) %ptr4, align 4
65; CHECK:  store i32 5, ptr addrspace(3) %ptr5, align 4
66; CHECK:  %load1 = load i32, ptr addrspace(3) %ptr1, align 8
67; CHECK:  %load2 = load i32, ptr addrspace(3) %ptr2, align 4
68; SUPER-ALIGN_ON:   %load3 = load i32, ptr addrspace(3) %ptr3, align 16
69; SUPER-ALIGN_OFF:  %load3 = load i32, ptr addrspace(3) %ptr3, align 8
70; CHECK:  %load4 = load i32, ptr addrspace(3) %ptr4, align 4
71; CHECK:  %load5 = load i32, ptr addrspace(3) %ptr5, align 4
72; CHECK:  %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 8
73; CHECK:  %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 8
74; CHECK:  store i16 11, ptr addrspace(3) %ptr1, align 8
75; CHECK:  store i16 12, ptr addrspace(3) %ptr2, align 4
76; SUPER-ALIGN_ON:   store i16 13, ptr addrspace(3) %ptr3, align 16
77; SUPER-ALIGN_OFF:  store i16 13, ptr addrspace(3) %ptr3, align 8
78; CHECK:  store i16 14, ptr addrspace(3) %ptr4, align 4
79; CHECK:  %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr
80; CHECK:  %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr
81; CHECK:  %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr
82; CHECK:  %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr
83; CHECK:  store i32 21, ptr %ptr1.ac, align 8
84; CHECK:  store i32 22, ptr %ptr2.ac, align 4
85; SUPER-ALIGN_ON:   store i32 23, ptr %ptr3.ac, align 16
86; SUPER-ALIGN_OFF:  store i32 23, ptr %ptr3.ac, align 8
87; CHECK:  store i32 24, ptr %ptr4.ac, align 4
88define amdgpu_kernel void @k3(i64 %x) {
89  store i64 0, ptr addrspace(3) @lds.4, align 8
90
91  %ptr1 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 2
92  %ptr2 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 3
93  %ptr3 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 4
94  %ptr4 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 5
95  %ptr5 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 %x
96
97  store i32 1, ptr addrspace(3) %ptr1, align 4
98  store i32 2, ptr addrspace(3) %ptr2, align 4
99  store i32 3, ptr addrspace(3) %ptr3, align 4
100  store i32 4, ptr addrspace(3) %ptr4, align 4
101  store i32 5, ptr addrspace(3) %ptr5, align 4
102
103  %load1 = load i32, ptr addrspace(3) %ptr1, align 4
104  %load2 = load i32, ptr addrspace(3) %ptr2, align 4
105  %load3 = load i32, ptr addrspace(3) %ptr3, align 4
106  %load4 = load i32, ptr addrspace(3) %ptr4, align 4
107  %load5 = load i32, ptr addrspace(3) %ptr5, align 4
108
109  %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 4
110  %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 4
111
112
113  store i16 11, ptr addrspace(3) %ptr1, align 2
114  store i16 12, ptr addrspace(3) %ptr2, align 2
115  store i16 13, ptr addrspace(3) %ptr3, align 2
116  store i16 14, ptr addrspace(3) %ptr4, align 2
117
118  %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr
119  %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr
120  %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr
121  %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr
122
123  store i32 21, ptr %ptr1.ac, align 4
124  store i32 22, ptr %ptr2.ac, align 4
125  store i32 23, ptr %ptr3.ac, align 4
126  store i32 24, ptr %ptr4.ac, align 4
127
128  ret void
129}
130
131@lds.6 = internal unnamed_addr addrspace(3) global [2 x ptr addrspace(3)] poison, align 4
132
133; Check that aligment is not propagated if use is not a pointer operand.
134
135; CHECK-LABEL: @k4
136; CHECK:           store i32 poison, ptr addrspace(3) %gep, align 4
137; CHECK:           store ptr addrspace(3) %gep, ptr poison, align 4
138; CHECK:           %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
139; CHECK:           %val2 = cmpxchg volatile ptr poison, ptr addrspace(3) %gep, ptr addrspace(3) poison monotonic monotonic, align 4
140define amdgpu_kernel void @k4() {
141  %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) @lds.6, i64 1
142  store i32 poison, ptr addrspace(3) %gep, align 4
143  store ptr addrspace(3) %gep, ptr poison, align 4
144  %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
145  %val2 = cmpxchg volatile ptr poison, ptr addrspace(3) %gep, ptr addrspace(3) poison monotonic monotonic, align 4
146  ret void
147}
148