1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s 2 3; Check that volatile users of addrspacecast are not replaced. 4 5; CHECK-LABEL: @volatile_load_flat_from_global( 6; CHECK: load volatile i32, ptr 7; CHECK: store i32 %val, ptr addrspace(1) 8define amdgpu_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { 9 %tmp0 = addrspacecast ptr addrspace(1) %input to ptr 10 %tmp1 = addrspacecast ptr addrspace(1) %output to ptr 11 %val = load volatile i32, ptr %tmp0, align 4 12 store i32 %val, ptr %tmp1, align 4 13 ret void 14} 15 16; CHECK-LABEL: @volatile_load_flat_from_constant( 17; CHECK: load volatile i32, ptr 18; CHECK: store i32 %val, ptr addrspace(1) 19define amdgpu_kernel void @volatile_load_flat_from_constant(ptr addrspace(4) nocapture %input, ptr addrspace(1) nocapture %output) #0 { 20 %tmp0 = addrspacecast ptr addrspace(4) %input to ptr 21 %tmp1 = addrspacecast ptr addrspace(1) %output to ptr 22 %val = load volatile i32, ptr %tmp0, align 4 23 store i32 %val, ptr %tmp1, align 4 24 ret void 25} 26 27; CHECK-LABEL: @volatile_load_flat_from_group( 28; CHECK: load volatile i32, ptr 29; CHECK: store i32 %val, ptr addrspace(3) 30define amdgpu_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { 31 %tmp0 = addrspacecast ptr addrspace(3) %input to ptr 32 %tmp1 = addrspacecast ptr addrspace(3) %output to ptr 33 %val = load volatile i32, ptr %tmp0, align 4 34 store i32 %val, ptr %tmp1, align 4 35 ret void 36} 37 38; CHECK-LABEL: @volatile_load_flat_from_private( 39; CHECK: load volatile i32, ptr 40; CHECK: store i32 %val, ptr addrspace(5) 41define amdgpu_kernel void @volatile_load_flat_from_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 { 42 %tmp0 = addrspacecast ptr addrspace(5) %input to ptr 43 %tmp1 = addrspacecast ptr addrspace(5) %output to ptr 44 %val = load volatile i32, ptr %tmp0, align 4 45 store i32 %val, ptr %tmp1, align 4 46 ret void 47} 48 49; CHECK-LABEL: @volatile_store_flat_to_global( 50; CHECK: load i32, ptr addrspace(1) 51; CHECK: store volatile i32 %val, ptr 52define amdgpu_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { 53 %tmp0 = addrspacecast ptr addrspace(1) %input to ptr 54 %tmp1 = addrspacecast ptr addrspace(1) %output to ptr 55 %val = load i32, ptr %tmp0, align 4 56 store volatile i32 %val, ptr %tmp1, align 4 57 ret void 58} 59 60; CHECK-LABEL: @volatile_store_flat_to_group( 61; CHECK: load i32, ptr addrspace(3) 62; CHECK: store volatile i32 %val, ptr 63define amdgpu_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { 64 %tmp0 = addrspacecast ptr addrspace(3) %input to ptr 65 %tmp1 = addrspacecast ptr addrspace(3) %output to ptr 66 %val = load i32, ptr %tmp0, align 4 67 store volatile i32 %val, ptr %tmp1, align 4 68 ret void 69} 70 71; CHECK-LABEL: @volatile_store_flat_to_private( 72; CHECK: load i32, ptr addrspace(5) 73; CHECK: store volatile i32 %val, ptr 74define amdgpu_kernel void @volatile_store_flat_to_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 { 75 %tmp0 = addrspacecast ptr addrspace(5) %input to ptr 76 %tmp1 = addrspacecast ptr addrspace(5) %output to ptr 77 %val = load i32, ptr %tmp0, align 4 78 store volatile i32 %val, ptr %tmp1, align 4 79 ret void 80} 81 82; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat( 83; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr 84; CHECK: atomicrmw volatile add ptr 85define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { 86 %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 87 %ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst 88 ret i32 %ret 89} 90 91; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat( 92; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr 93; CHECK: %ret = atomicrmw volatile add ptr 94define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { 95 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 96 %ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst 97 ret i32 %ret 98} 99 100; CHECK-LABEL: @volatile_cmpxchg_global_to_flat( 101; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr 102; CHECK: cmpxchg volatile ptr 103define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { 104 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 105 %ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic 106 ret { i32, i1 } %ret 107} 108 109; CHECK-LABEL: @volatile_cmpxchg_group_to_flat( 110; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr 111; CHECK: cmpxchg volatile ptr 112define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { 113 %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 114 %ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic 115 ret { i32, i1 } %ret 116} 117 118; CHECK-LABEL: @volatile_memset_group_to_flat( 119; CHECK: %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 120; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true) 121define amdgpu_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { 122 %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr 123 call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true) 124 ret void 125} 126 127; CHECK-LABEL: @volatile_memset_global_to_flat( 128; CHECK: %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 129; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true) 130define amdgpu_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { 131 %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr 132 call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true) 133 ret void 134} 135 136declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1 137 138attributes #0 = { nounwind } 139attributes #1 = { argmemonly nounwind } 140