xref: /llvm-project/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir (revision ff9af4c43ad71eeba2cabe99609cfaa0fd54c1d0)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s
3--- |
4  ; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll'
5  source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll"
6  target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
7  target triple = "amdgcn-amd-amdhsa"
8
9  define i32 @test_memcpy(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) #0 {
10    %p0 = bitcast ptr addrspace(1) %p to ptr addrspace(1)
11    %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
12    %p1 = bitcast ptr addrspace(1) %add.ptr to ptr addrspace(1)
13    tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p0, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
14    %1 = bitcast ptr addrspace(1) %q to ptr addrspace(1)
15    %2 = load <2 x i32>, ptr addrspace(1) %1, align 4, !alias.scope !3, !noalias !0
16    %v01 = extractelement <2 x i32> %2, i32 0
17    %v12 = extractelement <2 x i32> %2, i32 1
18    %add = add i32 %v01, %v12
19    ret i32 %add
20  }
21
22  define i32 @test_memcpy_inline(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) #0 {
23    %p0 = bitcast ptr addrspace(1) %p to ptr addrspace(1)
24    %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
25    %p1 = bitcast ptr addrspace(1) %add.ptr to ptr addrspace(1)
26    tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p0, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
27    %1 = bitcast ptr addrspace(1) %q to ptr addrspace(1)
28    %2 = load <2 x i32>, ptr addrspace(1) %1, align 4, !alias.scope !3, !noalias !0
29    %v01 = extractelement <2 x i32> %2, i32 0
30    %v12 = extractelement <2 x i32> %2, i32 1
31    %add = add i32 %v01, %v12
32    ret i32 %add
33  }
34
35  ; Function Attrs: argmemonly nofree nounwind willreturn
36  declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #1
37
38  ; Function Attrs: argmemonly nofree nounwind willreturn
39  declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64 immarg, i1 immarg) #1
40
41  ; Function Attrs: convergent nounwind willreturn
42  declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2
43
44  ; Function Attrs: convergent nounwind willreturn
45  declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2
46
47  ; Function Attrs: convergent nounwind readnone willreturn
48  declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3
49
50  ; Function Attrs: convergent nounwind willreturn
51  declare i1 @llvm.amdgcn.loop.i32(i32) #2
52
53  ; Function Attrs: convergent nounwind willreturn
54  declare void @llvm.amdgcn.end.cf.i32(i32) #2
55
56  attributes #0 = { "target-cpu"="gfx1010" }
57  attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" }
58  attributes #2 = { convergent nounwind willreturn }
59  attributes #3 = { convergent nounwind readnone willreturn }
60
61  !0 = !{!1}
62  !1 = distinct !{!1, !2, !"bax: %p"}
63  !2 = distinct !{!2, !"bax"}
64  !3 = !{!4}
65  !4 = distinct !{!4, !2, !"bax: %q"}
66
67...
68---
69name:            test_memcpy
70machineMetadataNodes:
71  - '!9 = distinct !{!9, !7, !"Dst"}'
72  - '!6 = distinct !{!6, !7, !"Src"}'
73  - '!11 = !{!4, !6}'
74  - '!5 = !{!1, !6}'
75  - '!8 = !{!4, !9}'
76  - '!10 = !{!1, !9}'
77  - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
78body:             |
79  bb.0 (%ir-block.0):
80    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
81
82    ; CHECK-LABEL: name: test_memcpy
83    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
84    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
85    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
86    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
87    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
88    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
89    ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
90    ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
91    ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
92    ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
93    ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
94    ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
95    ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
96    ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
97    ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
98    ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
99    ; CHECK: SI_RETURN implicit $vgpr0
100    %3:vgpr_32 = COPY $vgpr3
101    %2:vgpr_32 = COPY $vgpr2
102    %1:vgpr_32 = COPY $vgpr1
103    %0:vgpr_32 = COPY $vgpr0
104    %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
105    %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
106    %9:vreg_64 = COPY %18
107    %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
108    %10:vreg_64 = COPY %18
109    GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
110    %12:vreg_64 = COPY %17
111    %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
112    %13:vgpr_32 = COPY %11.sub0
113    %14:vgpr_32 = COPY %11.sub1
114    %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
115    $vgpr0 = COPY %15
116    SI_RETURN implicit $vgpr0
117
118...
119---
120name:            test_memcpy_inline
121machineMetadataNodes:
122  - '!6 = distinct !{!6, !7, !"Src"}'
123  - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
124  - '!9 = distinct !{!9, !7, !"Dst"}'
125  - '!11 = !{!4, !6}'
126  - '!5 = !{!1, !6}'
127  - '!8 = !{!4, !9}'
128  - '!10 = !{!1, !9}'
129body:             |
130  bb.0 (%ir-block.0):
131    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
132
133    ; CHECK-LABEL: name: test_memcpy_inline
134    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
135    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
136    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
137    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
139    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
140    ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
141    ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
142    ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
143    ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
144    ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
145    ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
146    ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
147    ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
148    ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
149    ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
150    ; CHECK: SI_RETURN implicit $vgpr0
151    %3:vgpr_32 = COPY $vgpr3
152    %2:vgpr_32 = COPY $vgpr2
153    %1:vgpr_32 = COPY $vgpr1
154    %0:vgpr_32 = COPY $vgpr0
155    %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
156    %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
157    %9:vreg_64 = COPY %18
158    %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
159    %10:vreg_64 = COPY %18
160    GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
161    %12:vreg_64 = COPY %17
162    %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
163    %13:vgpr_32 = COPY %11.sub0
164    %14:vgpr_32 = COPY %11.sub1
165    %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
166    $vgpr0 = COPY %15
167    SI_RETURN implicit $vgpr0
168
169...
170