xref: /llvm-project/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll (revision c8e5ad4e123a5fb082355947a896980464689c31)
1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
2
3; CHECK-LABEL: @memset_group_to_flat(
4; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
5define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
6  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
7  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
8  ret void
9}
10
11; CHECK-LABEL: @memset_global_to_flat(
12; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
13define amdgpu_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
14  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
15  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
16  ret void
17}
18
19; CHECK-LABEL: @memset_group_to_flat_no_md(
20; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}}
21define amdgpu_kernel void @memset_group_to_flat_no_md(ptr addrspace(3) %group.ptr, i64 %size) #0 {
22  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
23  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 %size, i1 false)
24  ret void
25}
26
27; CHECK-LABEL: @memset_global_to_flat_no_md(
28; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}}
29define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.ptr, i64 %size) #0 {
30  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
31  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 %size, i1 false)
32  ret void
33}
34
35; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
36; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
37define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
38  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
39  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
40  ret void
41}
42
43; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group(
44; CHECK: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
45define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
46  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
47  call void @llvm.memcpy.inline.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
48  ret void
49}
50
51; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
52; CHECK: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
53define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
54  %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
55  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
56  ret void
57}
58
59; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
60; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %src.group.ptr, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
61define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
62  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
63  %cast.dest = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
64  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
65  ret void
66}
67
68; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
69; CHECK: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(1) align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
70define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 {
71  %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr
72  %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
73  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
74  ret void
75}
76
77; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
78; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dest.global.ptr, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
79define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 {
80  %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr
81  call void @llvm.memcpy.p0.p3.i32(ptr align 4 %cast.dest, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
82  ret void
83}
84
85; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
86; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !8
87define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
88  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
89  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !8
90  ret void
91}
92
93; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
94; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
95define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
96  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
97  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false)
98  ret void
99}
100
101; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
102; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest0, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
103; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest1, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
104define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
105  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
106  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest0, ptr align 4 %cast.src, i64 %size, i1 false)
107  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest1, ptr align 4 %cast.src, i64 %size, i1 false)
108  ret void
109}
110
111; Check for iterator problems if the pointer has 2 uses in the same call
112; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
113; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %group.ptr, ptr addrspace(3) align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
114define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 {
115  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
116  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast, ptr align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
117  ret void
118}
119; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
120; CHECK: call void @llvm.memmove.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
121define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
122  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
123  call void @llvm.memmove.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
124  ret void
125}
126
127declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1
128declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
129declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
130declare void @llvm.memcpy.p0.p3.i32(ptr nocapture writeonly, ptr addrspace(3) nocapture readonly, i32, i1) #1
131declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
132
133attributes #0 = { nounwind }
134attributes #1 = { argmemonly nounwind }
135
136!0 = !{!1, !1, i64 0}
137!1 = !{!"A", !2}
138!2 = !{!"tbaa root"}
139!3 = !{!4}
140!4 = distinct !{!4, !5, !"some scope 1"}
141!5 = distinct !{!5, !"some domain"}
142!6 = !{!7}
143!7 = distinct !{!7, !5, !"some scope 2"}
144!8 = !{i64 0, i64 8, null}
145