1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s 3--- | 4 ; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll' 5 source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll" 6 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" 7 target triple = "amdgcn-amd-amdhsa" 8 9 define i32 @test_memcpy(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) #0 { 10 %p0 = bitcast ptr addrspace(1) %p to ptr addrspace(1) 11 %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4 12 %p1 = bitcast ptr addrspace(1) %add.ptr to ptr addrspace(1) 13 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p0, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 14 %1 = bitcast ptr addrspace(1) %q to ptr addrspace(1) 15 %2 = load <2 x i32>, ptr addrspace(1) %1, align 4, !alias.scope !3, !noalias !0 16 %v01 = extractelement <2 x i32> %2, i32 0 17 %v12 = extractelement <2 x i32> %2, i32 1 18 %add = add i32 %v01, %v12 19 ret i32 %add 20 } 21 22 define i32 @test_memcpy_inline(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) #0 { 23 %p0 = bitcast ptr addrspace(1) %p to ptr addrspace(1) 24 %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4 25 %p1 = bitcast ptr addrspace(1) %add.ptr to ptr addrspace(1) 26 tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p0, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 27 %1 = bitcast ptr addrspace(1) %q to ptr addrspace(1) 28 %2 = load <2 x i32>, ptr addrspace(1) %1, align 4, !alias.scope !3, !noalias !0 29 %v01 = extractelement <2 x i32> %2, i32 0 30 %v12 = extractelement <2 x i32> %2, i32 1 31 %add = add i32 %v01, %v12 32 ret i32 %add 33 } 34 35 ; Function Attrs: argmemonly nofree nounwind willreturn 36 declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #1 37 38 ; Function Attrs: argmemonly nofree nounwind willreturn 39 declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64 immarg, i1 immarg) #1 40 41 ; Function Attrs: convergent nounwind willreturn 42 declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2 43 44 ; Function Attrs: convergent nounwind willreturn 45 declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2 46 47 ; Function Attrs: convergent nounwind readnone willreturn 48 declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3 49 50 ; Function Attrs: convergent nounwind willreturn 51 declare i1 @llvm.amdgcn.loop.i32(i32) #2 52 53 ; Function Attrs: convergent nounwind willreturn 54 declare void @llvm.amdgcn.end.cf.i32(i32) #2 55 56 attributes #0 = { "target-cpu"="gfx1010" } 57 attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" } 58 attributes #2 = { convergent nounwind willreturn } 59 attributes #3 = { convergent nounwind readnone willreturn } 60 61 !0 = !{!1} 62 !1 = distinct !{!1, !2, !"bax: %p"} 63 !2 = distinct !{!2, !"bax"} 64 !3 = !{!4} 65 !4 = distinct !{!4, !2, !"bax: %q"} 66 67... 68--- 69name: test_memcpy 70machineMetadataNodes: 71 - '!9 = distinct !{!9, !7, !"Dst"}' 72 - '!6 = distinct !{!6, !7, !"Src"}' 73 - '!11 = !{!4, !6}' 74 - '!5 = !{!1, !6}' 75 - '!8 = !{!4, !9}' 76 - '!10 = !{!1, !9}' 77 - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' 78body: | 79 bb.0 (%ir-block.0): 80 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 81 82 ; CHECK-LABEL: name: test_memcpy 83 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 84 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 85 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 86 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 87 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 88 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 89 ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 90 ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) 91 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 92 ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) 93 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 94 ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) 95 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 96 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 97 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec 98 ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] 99 ; CHECK: SI_RETURN implicit $vgpr0 100 %3:vgpr_32 = COPY $vgpr3 101 %2:vgpr_32 = COPY $vgpr2 102 %1:vgpr_32 = COPY $vgpr1 103 %0:vgpr_32 = COPY $vgpr0 104 %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 105 %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 106 %9:vreg_64 = COPY %18 107 %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) 108 %10:vreg_64 = COPY %18 109 GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) 110 %12:vreg_64 = COPY %17 111 %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) 112 %13:vgpr_32 = COPY %11.sub0 113 %14:vgpr_32 = COPY %11.sub1 114 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec 115 $vgpr0 = COPY %15 116 SI_RETURN implicit $vgpr0 117 118... 119--- 120name: test_memcpy_inline 121machineMetadataNodes: 122 - '!6 = distinct !{!6, !7, !"Src"}' 123 - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' 124 - '!9 = distinct !{!9, !7, !"Dst"}' 125 - '!11 = !{!4, !6}' 126 - '!5 = !{!1, !6}' 127 - '!8 = !{!4, !9}' 128 - '!10 = !{!1, !9}' 129body: | 130 bb.0 (%ir-block.0): 131 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 132 133 ; CHECK-LABEL: name: test_memcpy_inline 134 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 135 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 136 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 137 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 138 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 139 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 140 ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 141 ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) 142 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 143 ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) 144 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 145 ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) 146 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 147 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 148 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec 149 ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] 150 ; CHECK: SI_RETURN implicit $vgpr0 151 %3:vgpr_32 = COPY $vgpr3 152 %2:vgpr_32 = COPY $vgpr2 153 %1:vgpr_32 = COPY $vgpr1 154 %0:vgpr_32 = COPY $vgpr0 155 %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 156 %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 157 %9:vreg_64 = COPY %18 158 %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) 159 %10:vreg_64 = COPY %18 160 GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) 161 %12:vreg_64 = COPY %17 162 %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) 163 %13:vgpr_32 = COPY %11.sub0 164 %14:vgpr_32 = COPY %11.sub1 165 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec 166 $vgpr0 = COPY %15 167 SI_RETURN implicit $vgpr0 168 169... 170