xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll (revision 7652a59407018c057cdc1163c9f64b5b6f0954eb)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck %s
4
5define amdgpu_ps ptr addrspace(8) @basic_raw_buffer(ptr inreg %p) {
6  ; CHECK-LABEL: name: basic_raw_buffer
7  ; CHECK: bb.0 (%ir-block.0):
8  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1
9  ; CHECK-NEXT: {{  $}}
10  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
11  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
12  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
13  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
14  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
15  ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 5678
16  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY1]]
17  ; CHECK-NEXT:   $sgpr1 = COPY [[S_AND_B32_]]
18  ; CHECK-NEXT:   $sgpr2 = COPY [[S_MOV_B32_1]]
19  ; CHECK-NEXT:   $sgpr3 = COPY [[S_MOV_B32_2]]
20  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
21  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 0, i32 1234, i32 5678)
22  ret ptr addrspace(8) %rsrc
23}
24
25define amdgpu_ps float @read_raw_buffer(ptr addrspace(1) inreg %p) {
26  ; CHECK-LABEL: name: read_raw_buffer
27  ; CHECK: bb.0 (%ir-block.0):
28  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1
29  ; CHECK-NEXT: {{  $}}
30  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
31  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
32  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
33  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
34  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
35  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_1]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3
36  ; CHECK-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
37  ; CHECK-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
38  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
39  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %p, i16 0, i32 0, i32 0)
40  %loaded = call float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
41  ret float %loaded
42}
43
44define amdgpu_ps ptr addrspace(8) @basic_struct_buffer(ptr inreg %p) {
45  ; CHECK-LABEL: name: basic_struct_buffer
46  ; CHECK: bb.0 (%ir-block.0):
47  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1
48  ; CHECK-NEXT: {{  $}}
49  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
50  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
51  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
52  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
53  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144
54  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc
55  ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
56  ; CHECK-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 5678
57  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY1]]
58  ; CHECK-NEXT:   $sgpr1 = COPY [[S_OR_B32_]]
59  ; CHECK-NEXT:   $sgpr2 = COPY [[S_MOV_B32_2]]
60  ; CHECK-NEXT:   $sgpr3 = COPY [[S_MOV_B32_3]]
61  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
62  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 4, i32 1234, i32 5678)
63  ret ptr addrspace(8) %rsrc
64}
65
66define amdgpu_ps ptr addrspace(8) @variable_top_half(ptr inreg %p, i32 inreg %numVals, i32 inreg %flags) {
67  ; CHECK-LABEL: name: variable_top_half
68  ; CHECK: bb.0 (%ir-block.0):
69  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
70  ; CHECK-NEXT: {{  $}}
71  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
72  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
73  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
74  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
75  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
76  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], killed [[S_MOV_B32_]], implicit-def dead $scc
77  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144
78  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc
79  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY3]]
80  ; CHECK-NEXT:   $sgpr1 = COPY [[S_OR_B32_]]
81  ; CHECK-NEXT:   $sgpr2 = COPY [[COPY1]]
82  ; CHECK-NEXT:   $sgpr3 = COPY [[COPY]]
83  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
84  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 4, i32 %numVals, i32 %flags)
85  ret ptr addrspace(8) %rsrc
86}
87
88define amdgpu_ps ptr addrspace(8) @general_case(ptr inreg %p, i16 inreg %stride, i32 inreg %numVals, i32 inreg %flags) {
89  ; CHECK-LABEL: name: general_case
90  ; CHECK: bb.0 (%ir-block.0):
91  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
92  ; CHECK-NEXT: {{  $}}
93  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
94  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
95  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
96  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
97  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
98  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
99  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc
100  ; CHECK-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc
101  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc
102  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY4]]
103  ; CHECK-NEXT:   $sgpr1 = COPY [[S_OR_B32_]]
104  ; CHECK-NEXT:   $sgpr2 = COPY [[COPY1]]
105  ; CHECK-NEXT:   $sgpr3 = COPY [[COPY]]
106  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
107  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags)
108  ret ptr addrspace(8) %rsrc
109}
110
111define amdgpu_ps float @general_case_load(ptr inreg %p, i16 inreg %stride, i32 inreg %numVals, i32 inreg %flags) {
112  ; CHECK-LABEL: name: general_case_load
113  ; CHECK: bb.0 (%ir-block.0):
114  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
115  ; CHECK-NEXT: {{  $}}
116  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
117  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
118  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
119  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
120  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
121  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
122  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc
123  ; CHECK-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc
124  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc
125  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[S_OR_B32_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
126  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
127  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
128  ; CHECK-NEXT:   [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
129  ; CHECK-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
130  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
131  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags)
132  %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
133  ret float %value
134}
135
136; None of the components are uniform due to the lack of an inreg
137define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i32 %numVals, i32 %flags) {
138  ; CHECK-LABEL: name: general_case_load_with_waterfall
139  ; CHECK: bb.0 (%ir-block.0):
140  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
141  ; CHECK-NEXT: {{  $}}
142  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
143  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
144  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
145  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
146  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
147  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec
148  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
149  ; CHECK-NEXT:   [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY3]], killed [[S_MOV_B32_]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
150  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[V_AND_OR_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
151  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
152  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
153  ; CHECK-NEXT:   [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
154  ; CHECK-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
155  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG $vgpr0
156  %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags)
157  %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
158  ret float %value
159}
160
161declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32)
162declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) nocapture readnone, i16, i32, i32)
163declare float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg)
164declare float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg)
165