1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s 2 3; Trivial optimization of generic addressing 4 5; CHECK-LABEL: @load_global_from_flat( 6; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(1) 7; CHECK-NEXT: %tmp1 = load float, ptr addrspace(1) %tmp0 8; CHECK-NEXT: ret float %tmp1 9define float @load_global_from_flat(ptr %generic_scalar) #0 { 10 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(1) 11 %tmp1 = load float, ptr addrspace(1) %tmp0 12 ret float %tmp1 13} 14 15; CHECK-LABEL: @load_constant_from_flat( 16; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(4) 17; CHECK-NEXT: %tmp1 = load float, ptr addrspace(4) %tmp0 18; CHECK-NEXT: ret float %tmp1 19define float @load_constant_from_flat(ptr %generic_scalar) #0 { 20 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(4) 21 %tmp1 = load float, ptr addrspace(4) %tmp0 22 ret float %tmp1 23} 24 25; CHECK-LABEL: @load_group_from_flat( 26; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(3) 27; CHECK-NEXT: %tmp1 = load float, ptr addrspace(3) %tmp0 28; CHECK-NEXT: ret float %tmp1 29define float @load_group_from_flat(ptr %generic_scalar) #0 { 30 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(3) 31 %tmp1 = load float, ptr addrspace(3) %tmp0 32 ret float %tmp1 33} 34 35; CHECK-LABEL: @load_private_from_flat( 36; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(5) 37; CHECK-NEXT: %tmp1 = load float, ptr addrspace(5) %tmp0 38; CHECK-NEXT: ret float %tmp1 39define float @load_private_from_flat(ptr %generic_scalar) #0 { 40 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(5) 41 %tmp1 = load float, ptr addrspace(5) %tmp0 42 ret float %tmp1 43} 44 45; CHECK-LABEL: @store_global_from_flat( 46; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(1) 47; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) %tmp0 48define amdgpu_kernel void @store_global_from_flat(ptr %generic_scalar) #0 { 49 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(1) 50 store float 0.0, ptr addrspace(1) %tmp0 51 ret void 52} 53 54; CHECK-LABEL: @store_group_from_flat( 55; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(3) 56; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) %tmp0 57define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 { 58 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(3) 59 store float 0.0, ptr addrspace(3) %tmp0 60 ret void 61} 62 63; CHECK-LABEL: @store_private_from_flat( 64; CHECK-NEXT: %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(5) 65; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) %tmp0 66define amdgpu_kernel void @store_private_from_flat(ptr %generic_scalar) #0 { 67 %tmp0 = addrspacecast ptr %generic_scalar to ptr addrspace(5) 68 store float 0.0, ptr addrspace(5) %tmp0 69 ret void 70} 71 72; optimized to global load/store. 73; CHECK-LABEL: @load_store_global( 74; CHECK-NEXT: %val = load i32, ptr addrspace(1) %input, align 4 75; CHECK-NEXT: store i32 %val, ptr addrspace(1) %output, align 4 76; CHECK-NEXT: ret void 77define amdgpu_kernel void @load_store_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { 78 %tmp0 = addrspacecast ptr addrspace(1) %input to ptr 79 %tmp1 = addrspacecast ptr addrspace(1) %output to ptr 80 %val = load i32, ptr %tmp0, align 4 81 store i32 %val, ptr %tmp1, align 4 82 ret void 83} 84 85; Optimized to group load/store. 86; CHECK-LABEL: @load_store_group( 87; CHECK-NEXT: %val = load i32, ptr addrspace(3) %input, align 4 88; CHECK-NEXT: store i32 %val, ptr addrspace(3) %output, align 4 89; CHECK-NEXT: ret void 90define amdgpu_kernel void @load_store_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { 91 %tmp0 = addrspacecast ptr addrspace(3) %input to ptr 92 %tmp1 = addrspacecast ptr addrspace(3) %output to ptr 93 %val = load i32, ptr %tmp0, align 4 94 store i32 %val, ptr %tmp1, align 4 95 ret void 96} 97 98; Optimized to private load/store. 99; CHECK-LABEL: @load_store_private( 100; CHECK-NEXT: %val = load i32, ptr addrspace(5) %input, align 4 101; CHECK-NEXT: store i32 %val, ptr addrspace(5) %output, align 4 102; CHECK-NEXT: ret void 103define amdgpu_kernel void @load_store_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 { 104 %tmp0 = addrspacecast ptr addrspace(5) %input to ptr 105 %tmp1 = addrspacecast ptr addrspace(5) %output to ptr 106 %val = load i32, ptr %tmp0, align 4 107 store i32 %val, ptr %tmp1, align 4 108 ret void 109} 110 111; No optimization. flat load/store. 112; CHECK-LABEL: @load_store_flat( 113; CHECK-NEXT: %val = load i32, ptr %input, align 4 114; CHECK-NEXT: store i32 %val, ptr %output, align 4 115; CHECK-NEXT: ret void 116define amdgpu_kernel void @load_store_flat(ptr nocapture %input, ptr nocapture %output) #0 { 117 %val = load i32, ptr %input, align 4 118 store i32 %val, ptr %output, align 4 119 ret void 120} 121 122; CHECK-LABEL: @store_addrspacecast_ptr_value( 123; CHECK: %cast = addrspacecast ptr addrspace(1) %input to ptr 124; CHECK-NEXT: store ptr %cast, ptr addrspace(1) %output, align 4 125define amdgpu_kernel void @store_addrspacecast_ptr_value(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { 126 %cast = addrspacecast ptr addrspace(1) %input to ptr 127 store ptr %cast, ptr addrspace(1) %output, align 4 128 ret void 129} 130 131; CHECK-LABEL: @atomicrmw_add_global_to_flat( 132; CHECK-NEXT: %ret = atomicrmw add ptr addrspace(1) %global.ptr, i32 %y seq_cst 133define i32 @atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { 134 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 135 %ret = atomicrmw add ptr %cast, i32 %y seq_cst 136 ret i32 %ret 137} 138 139; CHECK-LABEL: @atomicrmw_add_group_to_flat( 140; CHECK-NEXT: %ret = atomicrmw add ptr addrspace(3) %group.ptr, i32 %y seq_cst 141define i32 @atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { 142 %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 143 %ret = atomicrmw add ptr %cast, i32 %y seq_cst 144 ret i32 %ret 145} 146 147; CHECK-LABEL: @cmpxchg_global_to_flat( 148; CHECK: %ret = cmpxchg ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val seq_cst monotonic 149define { i32, i1 } @cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { 150 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 151 %ret = cmpxchg ptr %cast, i32 %cmp, i32 %val seq_cst monotonic 152 ret { i32, i1 } %ret 153} 154 155; CHECK-LABEL: @cmpxchg_group_to_flat( 156; CHECK: %ret = cmpxchg ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val seq_cst monotonic 157define { i32, i1 } @cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { 158 %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 159 %ret = cmpxchg ptr %cast, i32 %cmp, i32 %val seq_cst monotonic 160 ret { i32, i1 } %ret 161} 162 163; Not pointer operand 164; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( 165; CHECK: %cast.cmp = addrspacecast ptr addrspace(3) %cmp.ptr to ptr 166; CHECK: %ret = cmpxchg ptr addrspace(3) %cas.ptr, ptr %cast.cmp, ptr %val seq_cst monotonic 167define { ptr, i1 } @cmpxchg_group_to_flat_wrong_operand(ptr addrspace(3) %cas.ptr, ptr addrspace(3) %cmp.ptr, ptr %val) #0 { 168 %cast.cmp = addrspacecast ptr addrspace(3) %cmp.ptr to ptr 169 %ret = cmpxchg ptr addrspace(3) %cas.ptr, ptr %cast.cmp, ptr %val seq_cst monotonic 170 ret { ptr, i1 } %ret 171} 172 173; Null pointer in local addr space 174; CHECK-LABEL: @local_nullptr 175; CHECK: icmp ne ptr addrspace(3) %a, addrspacecast (ptr addrspace(5) null to ptr addrspace(3)) 176; CHECK-NOT: ptr addrspace(3) null 177define void @local_nullptr(ptr addrspace(1) nocapture %results, ptr addrspace(3) %a) { 178entry: 179 %tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr addrspace(5) null to ptr addrspace(3)) 180 %conv = zext i1 %tobool to i32 181 store i32 %conv, ptr addrspace(1) %results, align 4 182 ret void 183} 184 185; CHECK-LABEL: @atomicrmw_add_global_to_flat_preserve_amdgpu_md( 186; CHECK-NEXT: %ret = atomicrmw add ptr addrspace(1) %global.ptr, i32 %y seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 187define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md(ptr addrspace(1) %global.ptr, i32 %y) #0 { 188 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 189 %ret = atomicrmw add ptr %cast, i32 %y seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 190 ret i32 %ret 191} 192 193; Make sure there's no assert 194; CHECK-LABEL: @try_infer_getelementptr_constant_null( 195; CHECK-NEXT: %ce = getelementptr i8, ptr getelementptr inbounds (i8, ptr null, i64 8), i64 0 196define ptr @try_infer_getelementptr_constant_null() { 197 %ce = getelementptr i8, ptr getelementptr inbounds (i8, ptr null, i64 8), i64 0 198 ret ptr %ce 199} 200 201attributes #0 = { nounwind } 202 203!0 = !{} 204