1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s 2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s 3 4; Properly aligned, same size as alignment. 5; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { [16 x i8], [8 x i8], [4 x i8], [2 x i8], [1 x i8] } 6 7; Different properly aligned values, but same size of 1. 8; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [1 x i8], [1 x i8], [1 x i8], [1 x i8], [1 x i8], [3 x i8], [1 x i8] } 9 10; All are under-aligned, requires to fix each on different alignment boundary. 11; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { [9 x i8], [1 x i8], [2 x i8], [3 x i8], [1 x i8], [5 x i8] } 12 13; All LDS are underaligned, requires to allocate on 8 byte boundary 14; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [7 x i8], [1 x i8], [7 x i8], [1 x i8], [6 x i8], [2 x i8], [5 x i8] } 15 16; All LDS are underaligned, requires to allocate on 16 byte boundary 17; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [12 x i8], [4 x i8], [11 x i8], [5 x i8], [10 x i8], [6 x i8], [9 x i8] } 18 19; All LDS are properly aligned on 16 byte boundary, but they are of different size. 20; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [20 x i8], [12 x i8], [19 x i8], [13 x i8], [18 x i8], [14 x i8], [17 x i8] } 21 22; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 16 23; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 16 24; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 16 25; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 8 26; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 16 27; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16 28 29 30; Properly aligned, same size as alignment. 31; CHECK-NOT: @k0.lds.size.1.align.1 32; CHECK-NOT: @k0.lds.size.2.align.2 33; CHECK-NOT: @k0.lds.size.4.align.4 34; CHECK-NOT: @k0.lds.size.8.align.8 35; CHECK-NOT: @k0.lds.size.16.align.16 36@k0.lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 1 37@k0.lds.size.2.align.2 = internal unnamed_addr addrspace(3) global [2 x i8] poison, align 2 38@k0.lds.size.4.align.4 = internal unnamed_addr addrspace(3) global [4 x i8] poison, align 4 39@k0.lds.size.8.align.8 = internal unnamed_addr addrspace(3) global [8 x i8] poison, align 8 40@k0.lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] poison, align 16 41 42define amdgpu_kernel void @k0() { 43 store i8 1, ptr addrspace(3) @k0.lds.size.1.align.1, align 1 44 45 store i8 2, ptr addrspace(3) @k0.lds.size.2.align.2, align 2 46 47 store i8 3, ptr addrspace(3) @k0.lds.size.4.align.4, align 4 48 49 store i8 4, ptr addrspace(3) @k0.lds.size.8.align.8, align 8 50 51 store i8 5, ptr addrspace(3) @k0.lds.size.16.align.16, align 16 52 53 ret void 54} 55 56; Different properly aligned values, but same size of 1. 57; CHECK-NOT: @k1.lds.size.1.align.1 58; CHECK-NOT: @k1.lds.size.1.align.2 59; CHECK-NOT: @k1.lds.size.1.align.4 60; CHECK-NOT: @k1.lds.size.1.align.8 61; CHECK-NOT: @k1.lds.size.1.align.16 62@k1.lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 1 63@k1.lds.size.1.align.2 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 2 64@k1.lds.size.1.align.4 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 4 65@k1.lds.size.1.align.8 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 8 66@k1.lds.size.1.align.16 = internal unnamed_addr addrspace(3) global [1 x i8] poison, align 16 67 68define amdgpu_kernel void @k1() { 69 store i8 1, ptr addrspace(3) @k1.lds.size.1.align.1, align 1 70 71 store i8 2, ptr addrspace(3) @k1.lds.size.1.align.2, align 2 72 73 store i8 3, ptr addrspace(3) @k1.lds.size.1.align.4, align 4 74 75 store i8 4, ptr addrspace(3) @k1.lds.size.1.align.8, align 8 76 77 store i8 5, ptr addrspace(3) @k1.lds.size.1.align.16, align 16 78 79 ret void 80} 81 82; All are under-aligned, requires to fix each on different alignment boundary. 83; CHECK-NOT: @k2.lds.size.2.align.1 84; CHECK-NOT: @k2.lds.size.3.align.2 85; CHECK-NOT: @k2.lds.size.5.align.4 86; CHECK-NOT: @k2.lds.size.9.align.8 87@k2.lds.size.2.align.1 = internal unnamed_addr addrspace(3) global [2 x i8] poison, align 1 88@k2.lds.size.3.align.2 = internal unnamed_addr addrspace(3) global [3 x i8] poison, align 2 89@k2.lds.size.5.align.4 = internal unnamed_addr addrspace(3) global [5 x i8] poison, align 4 90@k2.lds.size.9.align.8 = internal unnamed_addr addrspace(3) global [9 x i8] poison, align 8 91 92define amdgpu_kernel void @k2() { 93 store i8 1, ptr addrspace(3) @k2.lds.size.2.align.1, align 1 94 95 store i8 2, ptr addrspace(3) @k2.lds.size.3.align.2, align 2 96 97 store i8 3, ptr addrspace(3) @k2.lds.size.5.align.4, align 4 98 99 store i8 4, ptr addrspace(3) @k2.lds.size.9.align.8, align 8 100 101 ret void 102} 103 104; All LDS are underaligned, requires to allocate on 8 byte boundary 105; CHECK-NOT: @k3.lds.size.5.align.2 106; CHECK-NOT: @k3.lds.size.6.align.2 107; CHECK-NOT: @k3.lds.size.7.align.2 108; CHECK-NOT: @k3.lds.size.7.align.4 109@k3.lds.size.5.align.2 = internal unnamed_addr addrspace(3) global [5 x i8] poison, align 2 110@k3.lds.size.6.align.2 = internal unnamed_addr addrspace(3) global [6 x i8] poison, align 2 111@k3.lds.size.7.align.2 = internal unnamed_addr addrspace(3) global [7 x i8] poison, align 2 112@k3.lds.size.7.align.4 = internal unnamed_addr addrspace(3) global [7 x i8] poison, align 4 113 114define amdgpu_kernel void @k3() { 115 store i8 1, ptr addrspace(3) @k3.lds.size.5.align.2, align 2 116 117 store i8 2, ptr addrspace(3) @k3.lds.size.6.align.2, align 2 118 119 store i8 3, ptr addrspace(3) @k3.lds.size.7.align.2, align 2 120 121 store i8 4, ptr addrspace(3) @k3.lds.size.7.align.4, align 4 122 123 ret void 124} 125 126; All LDS are underaligned, requires to allocate on 16 byte boundary 127; CHECK-NOT: @k4.lds.size.9.align.1 128; CHECK-NOT: @k4.lds.size.10.align.2 129; CHECK-NOT: @k4.lds.size.11.align.4 130; CHECK-NOT: @k4.lds.size.12.align.8 131@k4.lds.size.9.align.1 = internal unnamed_addr addrspace(3) global [9 x i8] poison, align 1 132@k4.lds.size.10.align.2 = internal unnamed_addr addrspace(3) global [10 x i8] poison, align 2 133@k4.lds.size.11.align.4 = internal unnamed_addr addrspace(3) global [11 x i8] poison, align 4 134@k4.lds.size.12.align.8 = internal unnamed_addr addrspace(3) global [12 x i8] poison, align 8 135 136define amdgpu_kernel void @k4() { 137 store i8 1, ptr addrspace(3) @k4.lds.size.9.align.1, align 1 138 139 store i8 2, ptr addrspace(3) @k4.lds.size.10.align.2, align 2 140 141 store i8 3, ptr addrspace(3) @k4.lds.size.11.align.4, align 4 142 143 store i8 4, ptr addrspace(3) @k4.lds.size.12.align.8, align 8 144 145 ret void 146} 147 148; CHECK-NOT: @k5.lds.size.17.align.16 149; CHECK-NOT: @k5.lds.size.18.align.16 150; CHECK-NOT: @k5.lds.size.19.align.16 151; CHECK-NOT: @k5.lds.size.20.align.16 152@k5.lds.size.17.align.16 = internal unnamed_addr addrspace(3) global [17 x i8] poison, align 16 153@k5.lds.size.18.align.16 = internal unnamed_addr addrspace(3) global [18 x i8] poison, align 16 154@k5.lds.size.19.align.16 = internal unnamed_addr addrspace(3) global [19 x i8] poison, align 16 155@k5.lds.size.20.align.16 = internal unnamed_addr addrspace(3) global [20 x i8] poison, align 16 156 157define amdgpu_kernel void @k5() { 158 store i8 1, ptr addrspace(3) @k5.lds.size.17.align.16, align 16 159 160 store i8 2, ptr addrspace(3) @k5.lds.size.18.align.16, align 16 161 162 store i8 3, ptr addrspace(3) @k5.lds.size.19.align.16, align 16 163 164 store i8 4, ptr addrspace(3) @k5.lds.size.20.align.16, align 16 165 166 ret void 167} 168