1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s 2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s 3; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s 4; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false --amdgpu-lower-module-lds-strategy=module < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s 5 6; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] } 7; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 } 8; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] } 9; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x ptr addrspace(3)] } 10 11; SUPER-ALIGN_ON: @lds.unused = addrspace(3) global i32 poison, align 4 12; SUPER-ALIGN_OFF: @lds.unused = addrspace(3) global i32 poison, align 2 13@lds.unused = addrspace(3) global i32 poison, align 2 14 15@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @lds.unused to ptr)], section "llvm.metadata" 16 17; CHECK-NOT: @lds.1 18@lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] poison, align 1 19 20; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 16 21; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 1 22 23; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4 24; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16 25; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 8 26 27; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 8 28; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 4 29 30; CHECK-LABEL: @k1 31; CHECK: %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds to ptr 32; CHECK: %ptr = getelementptr inbounds i8, ptr %1, i64 %x 33; CHECK: store i8 1, ptr %ptr, align 1 34define amdgpu_kernel void @k1(i64 %x) { 35 %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x 36 store i8 1, ptr addrspace(0) %ptr, align 1 37 ret void 38} 39 40@lds.2 = internal unnamed_addr addrspace(3) global i16 poison, align 4 41@lds.3 = internal unnamed_addr addrspace(3) global i16 poison, align 4 42 43; Check that alignment is propagated to uses for scalar variables. 44 45; CHECK-LABEL: @k2 46; CHECK: store i16 1, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 4 47; CHECK: store i16 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, i32 0, i32 2), align 4 48define amdgpu_kernel void @k2() { 49 store i16 1, ptr addrspace(3) @lds.2, align 2 50 store i16 2, ptr addrspace(3) @lds.3, align 2 51 ret void 52} 53 54@lds.4 = internal unnamed_addr addrspace(3) global [32 x i64] poison, align 8 55@lds.5 = internal unnamed_addr addrspace(3) global [32 x i32] poison, align 4 56 57; Check that alignment is propagated to uses for arrays. 58 59; CHECK-LABEL: @k3 60; CHECK: store i32 1, ptr addrspace(3) %ptr1, align 8 61; CHECK: store i32 2, ptr addrspace(3) %ptr2, align 4 62; SUPER-ALIGN_ON: store i32 3, ptr addrspace(3) %ptr3, align 16 63; SUPER-ALIGN_OFF: store i32 3, ptr addrspace(3) %ptr3, align 8 64; CHECK: store i32 4, ptr addrspace(3) %ptr4, align 4 65; CHECK: store i32 5, ptr addrspace(3) %ptr5, align 4 66; CHECK: %load1 = load i32, ptr addrspace(3) %ptr1, align 8 67; CHECK: %load2 = load i32, ptr addrspace(3) %ptr2, align 4 68; SUPER-ALIGN_ON: %load3 = load i32, ptr addrspace(3) %ptr3, align 16 69; SUPER-ALIGN_OFF: %load3 = load i32, ptr addrspace(3) %ptr3, align 8 70; CHECK: %load4 = load i32, ptr addrspace(3) %ptr4, align 4 71; CHECK: %load5 = load i32, ptr addrspace(3) %ptr5, align 4 72; CHECK: %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 8 73; CHECK: %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 8 74; CHECK: store i16 11, ptr addrspace(3) %ptr1, align 8 75; CHECK: store i16 12, ptr addrspace(3) %ptr2, align 4 76; SUPER-ALIGN_ON: store i16 13, ptr addrspace(3) %ptr3, align 16 77; SUPER-ALIGN_OFF: store i16 13, ptr addrspace(3) %ptr3, align 8 78; CHECK: store i16 14, ptr addrspace(3) %ptr4, align 4 79; CHECK: %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr 80; CHECK: %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr 81; CHECK: %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr 82; CHECK: %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr 83; CHECK: store i32 21, ptr %ptr1.ac, align 8 84; CHECK: store i32 22, ptr %ptr2.ac, align 4 85; SUPER-ALIGN_ON: store i32 23, ptr %ptr3.ac, align 16 86; SUPER-ALIGN_OFF: store i32 23, ptr %ptr3.ac, align 8 87; CHECK: store i32 24, ptr %ptr4.ac, align 4 88define amdgpu_kernel void @k3(i64 %x) { 89 store i64 0, ptr addrspace(3) @lds.4, align 8 90 91 %ptr1 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 2 92 %ptr2 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 3 93 %ptr3 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 4 94 %ptr4 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 5 95 %ptr5 = getelementptr inbounds i32, ptr addrspace(3) @lds.5, i64 %x 96 97 store i32 1, ptr addrspace(3) %ptr1, align 4 98 store i32 2, ptr addrspace(3) %ptr2, align 4 99 store i32 3, ptr addrspace(3) %ptr3, align 4 100 store i32 4, ptr addrspace(3) %ptr4, align 4 101 store i32 5, ptr addrspace(3) %ptr5, align 4 102 103 %load1 = load i32, ptr addrspace(3) %ptr1, align 4 104 %load2 = load i32, ptr addrspace(3) %ptr2, align 4 105 %load3 = load i32, ptr addrspace(3) %ptr3, align 4 106 %load4 = load i32, ptr addrspace(3) %ptr4, align 4 107 %load5 = load i32, ptr addrspace(3) %ptr5, align 4 108 109 %val1 = atomicrmw volatile add ptr addrspace(3) %ptr1, i32 1 monotonic, align 4 110 %val2 = cmpxchg volatile ptr addrspace(3) %ptr1, i32 1, i32 2 monotonic monotonic, align 4 111 112 113 store i16 11, ptr addrspace(3) %ptr1, align 2 114 store i16 12, ptr addrspace(3) %ptr2, align 2 115 store i16 13, ptr addrspace(3) %ptr3, align 2 116 store i16 14, ptr addrspace(3) %ptr4, align 2 117 118 %ptr1.ac = addrspacecast ptr addrspace(3) %ptr1 to ptr 119 %ptr2.ac = addrspacecast ptr addrspace(3) %ptr2 to ptr 120 %ptr3.ac = addrspacecast ptr addrspace(3) %ptr3 to ptr 121 %ptr4.ac = addrspacecast ptr addrspace(3) %ptr4 to ptr 122 123 store i32 21, ptr %ptr1.ac, align 4 124 store i32 22, ptr %ptr2.ac, align 4 125 store i32 23, ptr %ptr3.ac, align 4 126 store i32 24, ptr %ptr4.ac, align 4 127 128 ret void 129} 130 131@lds.6 = internal unnamed_addr addrspace(3) global [2 x ptr addrspace(3)] poison, align 4 132 133; Check that aligment is not propagated if use is not a pointer operand. 134 135; CHECK-LABEL: @k4 136; CHECK: store i32 poison, ptr addrspace(3) %gep, align 4 137; CHECK: store ptr addrspace(3) %gep, ptr poison, align 4 138; CHECK: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4 139; CHECK: %val2 = cmpxchg volatile ptr poison, ptr addrspace(3) %gep, ptr addrspace(3) poison monotonic monotonic, align 4 140define amdgpu_kernel void @k4() { 141 %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) @lds.6, i64 1 142 store i32 poison, ptr addrspace(3) %gep, align 4 143 store ptr addrspace(3) %gep, ptr poison, align 4 144 %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4 145 %val2 = cmpxchg volatile ptr poison, ptr addrspace(3) %gep, ptr addrspace(3) poison monotonic monotonic, align 4 146 ret void 147} 148