xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll (revision 5879162f7fe9be85bbe58776228e79c24bbf2886)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
5
6; Natural mapping
7define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
8  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
9  ; GFX10_GFX11: bb.1 (%ir-block.0):
10  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
11  ; GFX10_GFX11-NEXT: {{  $}}
12  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
13  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
14  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
15  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
16  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
17  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
18  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
19  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
20  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
21  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
22  ;
23  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
24  ; GFX12: bb.1 (%ir-block.0):
25  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
26  ; GFX12-NEXT: {{  $}}
27  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
28  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
29  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
30  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
31  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
32  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
33  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
34  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
35  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
36  ; GFX12-NEXT:   S_ENDPGM 0
37  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
38  ret void
39}
40
41; Natural mapping
42define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x float> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
43  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
44  ; GFX10_GFX11: bb.1 (%ir-block.0):
45  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
46  ; GFX10_GFX11-NEXT: {{  $}}
47  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
49  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
50  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
51  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
52  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
53  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
54  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
55  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
56  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
57  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8)
58  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
59  ;
60  ; GFX12-LABEL: name: raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
61  ; GFX12: bb.1 (%ir-block.0):
62  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
63  ; GFX12-NEXT: {{  $}}
64  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
65  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
66  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
67  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
68  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
69  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
70  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
71  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
72  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
73  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
74  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8)
75  ; GFX12-NEXT:   S_ENDPGM 0
76  call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
77  ret void
78}
79
80; Natural mapping
81define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<3 x float> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
82  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
83  ; GFX10_GFX11: bb.1 (%ir-block.0):
84  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
85  ; GFX10_GFX11-NEXT: {{  $}}
86  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
87  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
88  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
89  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
90  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
91  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
92  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
93  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5
94  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
95  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
96  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
97  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8)
98  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
99  ;
100  ; GFX12-LABEL: name: raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
101  ; GFX12: bb.1 (%ir-block.0):
102  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
103  ; GFX12-NEXT: {{  $}}
104  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
105  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
106  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
107  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
108  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
109  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
110  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
111  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5
112  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
113  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
114  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
115  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8)
116  ; GFX12-NEXT:   S_ENDPGM 0
117  call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
118  ret void
119}
120
121; Natural mapping
122define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x float> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
123  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
124  ; GFX10_GFX11: bb.1 (%ir-block.0):
125  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
126  ; GFX10_GFX11-NEXT: {{  $}}
127  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
128  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
129  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
130  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
131  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
132  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
133  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
134  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
135  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5
136  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
137  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
138  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
139  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8)
140  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
141  ;
142  ; GFX12-LABEL: name: raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
143  ; GFX12: bb.1 (%ir-block.0):
144  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
145  ; GFX12-NEXT: {{  $}}
146  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
147  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
148  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
149  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
150  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
151  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
152  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3
153  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
154  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5
155  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
156  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
157  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
158  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8)
159  ; GFX12-NEXT:   S_ENDPGM 0
160  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
161  ret void
162}
163
164; Copies for VGPR arguments
165define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) {
166  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset
167  ; GFX10_GFX11: bb.1 (%ir-block.0):
168  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0
169  ; GFX10_GFX11-NEXT: {{  $}}
170  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
171  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
172  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
173  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
174  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
175  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
176  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
177  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7
178  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
179  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
180  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
181  ;
182  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset
183  ; GFX12: bb.1 (%ir-block.0):
184  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0
185  ; GFX12-NEXT: {{  $}}
186  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
187  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
188  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
189  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
190  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
191  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
192  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
193  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7
194  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
195  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
196  ; GFX12-NEXT:   S_ENDPGM 0
197  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
198  ret void
199}
200
201; Waterfall for rsrc
202define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) {
203  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset
204  ; GFX10_GFX11: bb.1 (%ir-block.0):
205  ; GFX10_GFX11-NEXT:   successors: %bb.2(0x80000000)
206  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
207  ; GFX10_GFX11-NEXT: {{  $}}
208  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
209  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
210  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
211  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
212  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
213  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
214  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
215  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
216  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
217  ; GFX10_GFX11-NEXT: {{  $}}
218  ; GFX10_GFX11-NEXT: bb.2:
219  ; GFX10_GFX11-NEXT:   successors: %bb.3(0x80000000)
220  ; GFX10_GFX11-NEXT: {{  $}}
221  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
222  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
223  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
224  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
225  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
226  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
227  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
228  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
229  ; GFX10_GFX11-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
230  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
231  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
232  ; GFX10_GFX11-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
233  ; GFX10_GFX11-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
234  ; GFX10_GFX11-NEXT: {{  $}}
235  ; GFX10_GFX11-NEXT: bb.3:
236  ; GFX10_GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
237  ; GFX10_GFX11-NEXT: {{  $}}
238  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
239  ; GFX10_GFX11-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
240  ; GFX10_GFX11-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
241  ; GFX10_GFX11-NEXT: {{  $}}
242  ; GFX10_GFX11-NEXT: bb.4:
243  ; GFX10_GFX11-NEXT:   successors: %bb.5(0x80000000)
244  ; GFX10_GFX11-NEXT: {{  $}}
245  ; GFX10_GFX11-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
246  ; GFX10_GFX11-NEXT: {{  $}}
247  ; GFX10_GFX11-NEXT: bb.5:
248  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
249  ;
250  ; GFX12-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset
251  ; GFX12: bb.1 (%ir-block.0):
252  ; GFX12-NEXT:   successors: %bb.2(0x80000000)
253  ; GFX12-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
254  ; GFX12-NEXT: {{  $}}
255  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
256  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
257  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
258  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
259  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
260  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
261  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
262  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
263  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
264  ; GFX12-NEXT: {{  $}}
265  ; GFX12-NEXT: bb.2:
266  ; GFX12-NEXT:   successors: %bb.3(0x80000000)
267  ; GFX12-NEXT: {{  $}}
268  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
269  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
270  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
271  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
272  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
273  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
274  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
275  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
276  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
277  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
278  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
279  ; GFX12-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
280  ; GFX12-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
281  ; GFX12-NEXT: {{  $}}
282  ; GFX12-NEXT: bb.3:
283  ; GFX12-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
284  ; GFX12-NEXT: {{  $}}
285  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
286  ; GFX12-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
287  ; GFX12-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
288  ; GFX12-NEXT: {{  $}}
289  ; GFX12-NEXT: bb.4:
290  ; GFX12-NEXT:   successors: %bb.5(0x80000000)
291  ; GFX12-NEXT: {{  $}}
292  ; GFX12-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
293  ; GFX12-NEXT: {{  $}}
294  ; GFX12-NEXT: bb.5:
295  ; GFX12-NEXT:   S_ENDPGM 0
296  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 1)
297  ret void
298}
299
300; Waterfall for rsrc and soffset
301define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
302  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset
303  ; GFX10_GFX11: bb.1 (%ir-block.0):
304  ; GFX10_GFX11-NEXT:   successors: %bb.2(0x80000000)
305  ; GFX10_GFX11-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
306  ; GFX10_GFX11-NEXT: {{  $}}
307  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
308  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
309  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
310  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
311  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
312  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
313  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
314  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
315  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
316  ; GFX10_GFX11-NEXT: {{  $}}
317  ; GFX10_GFX11-NEXT: bb.2:
318  ; GFX10_GFX11-NEXT:   successors: %bb.3(0x80000000)
319  ; GFX10_GFX11-NEXT: {{  $}}
320  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
321  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
322  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
323  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
324  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
325  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
326  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
327  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
328  ; GFX10_GFX11-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
329  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
330  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
331  ; GFX10_GFX11-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
332  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
333  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
334  ; GFX10_GFX11-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
335  ; GFX10_GFX11-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
336  ; GFX10_GFX11-NEXT: {{  $}}
337  ; GFX10_GFX11-NEXT: bb.3:
338  ; GFX10_GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
339  ; GFX10_GFX11-NEXT: {{  $}}
340  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
341  ; GFX10_GFX11-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
342  ; GFX10_GFX11-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
343  ; GFX10_GFX11-NEXT: {{  $}}
344  ; GFX10_GFX11-NEXT: bb.4:
345  ; GFX10_GFX11-NEXT:   successors: %bb.5(0x80000000)
346  ; GFX10_GFX11-NEXT: {{  $}}
347  ; GFX10_GFX11-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
348  ; GFX10_GFX11-NEXT: {{  $}}
349  ; GFX10_GFX11-NEXT: bb.5:
350  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
351  ;
352  ; GFX12-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset
353  ; GFX12: bb.1 (%ir-block.0):
354  ; GFX12-NEXT:   successors: %bb.2(0x80000000)
355  ; GFX12-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
356  ; GFX12-NEXT: {{  $}}
357  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
358  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
359  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
360  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
361  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
362  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
363  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
364  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
365  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
366  ; GFX12-NEXT: {{  $}}
367  ; GFX12-NEXT: bb.2:
368  ; GFX12-NEXT:   successors: %bb.3(0x80000000)
369  ; GFX12-NEXT: {{  $}}
370  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
371  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
372  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
373  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
374  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
375  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
376  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
377  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
378  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
379  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
380  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
381  ; GFX12-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
382  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
383  ; GFX12-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
384  ; GFX12-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
385  ; GFX12-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
386  ; GFX12-NEXT: {{  $}}
387  ; GFX12-NEXT: bb.3:
388  ; GFX12-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
389  ; GFX12-NEXT: {{  $}}
390  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
391  ; GFX12-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
392  ; GFX12-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
393  ; GFX12-NEXT: {{  $}}
394  ; GFX12-NEXT: bb.4:
395  ; GFX12-NEXT:   successors: %bb.5(0x80000000)
396  ; GFX12-NEXT: {{  $}}
397  ; GFX12-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
398  ; GFX12-NEXT: {{  $}}
399  ; GFX12-NEXT: bb.5:
400  ; GFX12-NEXT:   S_ENDPGM 0
401  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
402  ret void
403}
404
405; Waterfall for rsrc and soffset, copy for voffset
406define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) {
407  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset
408  ; GFX10_GFX11: bb.1 (%ir-block.0):
409  ; GFX10_GFX11-NEXT:   successors: %bb.2(0x80000000)
410  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
411  ; GFX10_GFX11-NEXT: {{  $}}
412  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
413  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
414  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
415  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
416  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
417  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
418  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
419  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5
420  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
421  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
422  ; GFX10_GFX11-NEXT: {{  $}}
423  ; GFX10_GFX11-NEXT: bb.2:
424  ; GFX10_GFX11-NEXT:   successors: %bb.3(0x80000000)
425  ; GFX10_GFX11-NEXT: {{  $}}
426  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
427  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
428  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
429  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
430  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
431  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
432  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
433  ; GFX10_GFX11-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
434  ; GFX10_GFX11-NEXT:   [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
435  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
436  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
437  ; GFX10_GFX11-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
438  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
439  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
440  ; GFX10_GFX11-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
441  ; GFX10_GFX11-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
442  ; GFX10_GFX11-NEXT: {{  $}}
443  ; GFX10_GFX11-NEXT: bb.3:
444  ; GFX10_GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
445  ; GFX10_GFX11-NEXT: {{  $}}
446  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
447  ; GFX10_GFX11-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
448  ; GFX10_GFX11-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
449  ; GFX10_GFX11-NEXT: {{  $}}
450  ; GFX10_GFX11-NEXT: bb.4:
451  ; GFX10_GFX11-NEXT:   successors: %bb.5(0x80000000)
452  ; GFX10_GFX11-NEXT: {{  $}}
453  ; GFX10_GFX11-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
454  ; GFX10_GFX11-NEXT: {{  $}}
455  ; GFX10_GFX11-NEXT: bb.5:
456  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
457  ;
458  ; GFX12-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset
459  ; GFX12: bb.1 (%ir-block.0):
460  ; GFX12-NEXT:   successors: %bb.2(0x80000000)
461  ; GFX12-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
462  ; GFX12-NEXT: {{  $}}
463  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
464  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
465  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
466  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
467  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
468  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
469  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
470  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5
471  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
472  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
473  ; GFX12-NEXT: {{  $}}
474  ; GFX12-NEXT: bb.2:
475  ; GFX12-NEXT:   successors: %bb.3(0x80000000)
476  ; GFX12-NEXT: {{  $}}
477  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
478  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
479  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
480  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
481  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
482  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
483  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
484  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
485  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
486  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
487  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
488  ; GFX12-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
489  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
490  ; GFX12-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
491  ; GFX12-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
492  ; GFX12-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
493  ; GFX12-NEXT: {{  $}}
494  ; GFX12-NEXT: bb.3:
495  ; GFX12-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
496  ; GFX12-NEXT: {{  $}}
497  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
498  ; GFX12-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
499  ; GFX12-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
500  ; GFX12-NEXT: {{  $}}
501  ; GFX12-NEXT: bb.4:
502  ; GFX12-NEXT:   successors: %bb.5(0x80000000)
503  ; GFX12-NEXT: {{  $}}
504  ; GFX12-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
505  ; GFX12-NEXT: {{  $}}
506  ; GFX12-NEXT: bb.5:
507  ; GFX12-NEXT:   S_ENDPGM 0
508  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
509  ret void
510}
511
512; Natural mapping + glc
513define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
514  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
515  ; GFX10_GFX11: bb.1 (%ir-block.0):
516  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
517  ; GFX10_GFX11-NEXT: {{  $}}
518  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
519  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
520  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
521  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
522  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
523  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
524  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
525  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
526  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
527  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
528  ;
529  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
530  ; GFX12: bb.1 (%ir-block.0):
531  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
532  ; GFX12-NEXT: {{  $}}
533  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
534  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
535  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
536  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
537  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
538  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
539  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
540  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
541  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
542  ; GFX12-NEXT:   S_ENDPGM 0
543  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
544  ret void
545}
546
547; Natural mapping + slc
548define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
549  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
550  ; GFX10_GFX11: bb.1 (%ir-block.0):
551  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
552  ; GFX10_GFX11-NEXT: {{  $}}
553  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
554  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
555  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
556  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
557  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
558  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
559  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
560  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
561  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
562  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
563  ;
564  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
565  ; GFX12: bb.1 (%ir-block.0):
566  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
567  ; GFX12-NEXT: {{  $}}
568  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
569  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
570  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
571  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
572  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
573  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
574  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
575  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
576  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
577  ; GFX12-NEXT:   S_ENDPGM 0
578  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
579  ret void
580}
581
582; Natural mapping + glc + slc
583define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
584  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
585  ; GFX10_GFX11: bb.1 (%ir-block.0):
586  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
587  ; GFX10_GFX11-NEXT: {{  $}}
588  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
589  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
590  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
591  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
592  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
593  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
594  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
595  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
596  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
597  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
598  ;
599  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
600  ; GFX12: bb.1 (%ir-block.0):
601  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
602  ; GFX12-NEXT: {{  $}}
603  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
604  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
605  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
606  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
607  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
608  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
609  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
610  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
611  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
612  ; GFX12-NEXT:   S_ENDPGM 0
613  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
614  ret void
615}
616
617; Natural mapping + dlc
618define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
619  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
620  ; GFX10_GFX11: bb.1 (%ir-block.0):
621  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
622  ; GFX10_GFX11-NEXT: {{  $}}
623  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
624  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
625  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
626  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
627  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
628  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
629  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
630  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
631  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
632  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
633  ;
634  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
635  ; GFX12: bb.1 (%ir-block.0):
636  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
637  ; GFX12-NEXT: {{  $}}
638  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
639  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
640  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
641  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
642  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
643  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
644  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
645  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
646  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
647  ; GFX12-NEXT:   S_ENDPGM 0
648  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
649  ret void
650}
651
652
653
654define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
655  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0
656  ; GFX10_GFX11: bb.1 (%ir-block.0):
657  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
658  ; GFX10_GFX11-NEXT: {{  $}}
659  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
660  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
661  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
662  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
663  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
664  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
665  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
666  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
667  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
668  ;
669  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0
670  ; GFX12: bb.1 (%ir-block.0):
671  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
672  ; GFX12-NEXT: {{  $}}
673  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
674  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
675  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
676  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
677  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
678  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
679  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
680  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
681  ; GFX12-NEXT:   S_ENDPGM 0
682  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0)
683  ret void
684}
685
686define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
687  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095
688  ; GFX10_GFX11: bb.1 (%ir-block.0):
689  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
690  ; GFX10_GFX11-NEXT: {{  $}}
691  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
692  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
693  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
694  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
695  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
696  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
697  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
698  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
699  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
700  ;
701  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095
702  ; GFX12: bb.1 (%ir-block.0):
703  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
704  ; GFX12-NEXT: {{  $}}
705  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
706  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
707  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
708  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
709  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
710  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
711  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
712  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
713  ; GFX12-NEXT:   S_ENDPGM 0
714  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0)
715  ret void
716}
717
718define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
719  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096
720  ; GFX10_GFX11: bb.1 (%ir-block.0):
721  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
722  ; GFX10_GFX11-NEXT: {{  $}}
723  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
724  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
725  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
726  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
727  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
728  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
729  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
730  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
731  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
732  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
733  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
734  ;
735  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096
736  ; GFX12: bb.1 (%ir-block.0):
737  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
738  ; GFX12-NEXT: {{  $}}
739  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
740  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
741  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
742  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
743  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
744  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
745  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
746  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
747  ; GFX12-NEXT:   S_ENDPGM 0
748  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0)
749  ret void
750}
751
752define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(float %val, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) {
753  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16
754  ; GFX10_GFX11: bb.1 (%ir-block.0):
755  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
756  ; GFX10_GFX11-NEXT: {{  $}}
757  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
758  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
759  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
760  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
761  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
762  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
763  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
764  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
765  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
766  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
767  ;
768  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16
769  ; GFX12: bb.1 (%ir-block.0):
770  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
771  ; GFX12-NEXT: {{  $}}
772  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
773  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
774  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
775  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
776  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
777  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
778  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
779  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
780  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
781  ; GFX12-NEXT:   S_ENDPGM 0
782  %voffset = add i32 %voffset.base, 16
783  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
784  ret void
785}
786
787define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(float %val, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) {
788  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095
789  ; GFX10_GFX11: bb.1 (%ir-block.0):
790  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
791  ; GFX10_GFX11-NEXT: {{  $}}
792  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
793  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
794  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
795  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
796  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
797  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
798  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
799  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
800  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
801  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
802  ;
803  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095
804  ; GFX12: bb.1 (%ir-block.0):
805  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
806  ; GFX12-NEXT: {{  $}}
807  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
808  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
809  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
810  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
811  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
812  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
813  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
814  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
815  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
816  ; GFX12-NEXT:   S_ENDPGM 0
817  %voffset = add i32 %voffset.base, 4095
818  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
819  ret void
820}
821
822define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(float %val, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) {
823  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096
824  ; GFX10_GFX11: bb.1 (%ir-block.0):
825  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
826  ; GFX10_GFX11-NEXT: {{  $}}
827  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
828  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
829  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
830  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
831  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
832  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
833  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
834  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
835  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
836  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
837  ; GFX10_GFX11-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
838  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
839  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
840  ;
841  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096
842  ; GFX12: bb.1 (%ir-block.0):
843  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
844  ; GFX12-NEXT: {{  $}}
845  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
846  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
847  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
848  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
849  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
850  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
851  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
852  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
853  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4096, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
854  ; GFX12-NEXT:   S_ENDPGM 0
855  %voffset = add i32 %voffset.base, 4096
856  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
857  ret void
858}
859
860define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(float %val, <4 x i32> inreg %rsrc, i32 %voffset) {
861  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095
862  ; GFX10_GFX11: bb.1 (%ir-block.0):
863  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
864  ; GFX10_GFX11-NEXT: {{  $}}
865  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
866  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
867  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
868  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
869  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
870  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
871  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
872  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
873  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
874  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
875  ;
876  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095
877  ; GFX12: bb.1 (%ir-block.0):
878  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
879  ; GFX12-NEXT: {{  $}}
880  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
881  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
882  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
883  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
884  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
885  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
886  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
887  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
888  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
889  ; GFX12-NEXT:   S_ENDPGM 0
890  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0)
891  ret void
892}
893
894define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(float %val, <4 x i32> inreg %rsrc, i32 %voffset) {
895  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096
896  ; GFX10_GFX11: bb.1 (%ir-block.0):
897  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
898  ; GFX10_GFX11-NEXT: {{  $}}
899  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
900  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
901  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
902  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
903  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
904  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
905  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
906  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
907  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
908  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
909  ;
910  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096
911  ; GFX12: bb.1 (%ir-block.0):
912  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
913  ; GFX12-NEXT: {{  $}}
914  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
915  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
916  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
917  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
918  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
919  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
920  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
921  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
922  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
923  ; GFX12-NEXT:   S_ENDPGM 0
924  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0)
925  ret void
926}
927
928define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) {
929  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16
930  ; GFX10_GFX11: bb.1 (%ir-block.0):
931  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
932  ; GFX10_GFX11-NEXT: {{  $}}
933  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
934  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
935  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
936  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
937  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
938  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
939  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
940  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
941  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
942  ; GFX10_GFX11-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
943  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
944  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
945  ;
946  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16
947  ; GFX12: bb.1 (%ir-block.0):
948  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
949  ; GFX12-NEXT: {{  $}}
950  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
951  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
952  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
953  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
954  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
955  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
956  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
957  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
958  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
959  ; GFX12-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
960  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
961  ; GFX12-NEXT:   S_ENDPGM 0
962  %soffset = add i32 %soffset.base, 16
963  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
964  ret void
965}
966
967define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) {
968  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095
969  ; GFX10_GFX11: bb.1 (%ir-block.0):
970  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
971  ; GFX10_GFX11-NEXT: {{  $}}
972  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
973  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
974  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
975  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
976  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
977  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
978  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
979  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
980  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
981  ; GFX10_GFX11-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
982  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
983  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
984  ;
985  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095
986  ; GFX12: bb.1 (%ir-block.0):
987  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
988  ; GFX12-NEXT: {{  $}}
989  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
990  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
991  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
992  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
993  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
994  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
995  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
996  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
997  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
998  ; GFX12-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
999  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1000  ; GFX12-NEXT:   S_ENDPGM 0
1001  %soffset = add i32 %soffset.base, 4095
1002  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1003  ret void
1004}
1005
1006define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) {
1007  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096
1008  ; GFX10_GFX11: bb.1 (%ir-block.0):
1009  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
1010  ; GFX10_GFX11-NEXT: {{  $}}
1011  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1012  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
1013  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
1014  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
1015  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
1016  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1017  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1018  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
1019  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1020  ; GFX10_GFX11-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
1021  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1022  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
1023  ;
1024  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096
1025  ; GFX12: bb.1 (%ir-block.0):
1026  ; GFX12-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
1027  ; GFX12-NEXT: {{  $}}
1028  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1029  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
1030  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
1031  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
1032  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
1033  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1034  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1035  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
1036  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1037  ; GFX12-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
1038  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1039  ; GFX12-NEXT:   S_ENDPGM 0
1040  %soffset = add i32 %soffset.base, 4096
1041  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1042  ret void
1043}
1044
1045; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
1046define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset.base) {
1047  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000
1048  ; GFX10_GFX11: bb.1 (%ir-block.0):
1049  ; GFX10_GFX11-NEXT:   successors: %bb.2(0x80000000)
1050  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1051  ; GFX10_GFX11-NEXT: {{  $}}
1052  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1053  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1054  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1055  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1056  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1057  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1058  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1059  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
1060  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
1061  ; GFX10_GFX11-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
1062  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
1063  ; GFX10_GFX11-NEXT: {{  $}}
1064  ; GFX10_GFX11-NEXT: bb.2:
1065  ; GFX10_GFX11-NEXT:   successors: %bb.3(0x80000000)
1066  ; GFX10_GFX11-NEXT: {{  $}}
1067  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
1068  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
1069  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
1070  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1071  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1072  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
1073  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
1074  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
1075  ; GFX10_GFX11-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
1076  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
1077  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
1078  ; GFX10_GFX11-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
1079  ; GFX10_GFX11-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
1080  ; GFX10_GFX11-NEXT: {{  $}}
1081  ; GFX10_GFX11-NEXT: bb.3:
1082  ; GFX10_GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
1083  ; GFX10_GFX11-NEXT: {{  $}}
1084  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1085  ; GFX10_GFX11-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
1086  ; GFX10_GFX11-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
1087  ; GFX10_GFX11-NEXT: {{  $}}
1088  ; GFX10_GFX11-NEXT: bb.4:
1089  ; GFX10_GFX11-NEXT:   successors: %bb.5(0x80000000)
1090  ; GFX10_GFX11-NEXT: {{  $}}
1091  ; GFX10_GFX11-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_1]]
1092  ; GFX10_GFX11-NEXT: {{  $}}
1093  ; GFX10_GFX11-NEXT: bb.5:
1094  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
1095  ;
1096  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000
1097  ; GFX12: bb.1 (%ir-block.0):
1098  ; GFX12-NEXT:   successors: %bb.2(0x80000000)
1099  ; GFX12-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1100  ; GFX12-NEXT: {{  $}}
1101  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1102  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1103  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1104  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1105  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1106  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1107  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1108  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
1109  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
1110  ; GFX12-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def dead $scc
1111  ; GFX12-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
1112  ; GFX12-NEXT: {{  $}}
1113  ; GFX12-NEXT: bb.2:
1114  ; GFX12-NEXT:   successors: %bb.3(0x80000000)
1115  ; GFX12-NEXT: {{  $}}
1116  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
1117  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
1118  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
1119  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1120  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1121  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
1122  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
1123  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
1124  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
1125  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
1126  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
1127  ; GFX12-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
1128  ; GFX12-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
1129  ; GFX12-NEXT: {{  $}}
1130  ; GFX12-NEXT: bb.3:
1131  ; GFX12-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
1132  ; GFX12-NEXT: {{  $}}
1133  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1134  ; GFX12-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
1135  ; GFX12-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
1136  ; GFX12-NEXT: {{  $}}
1137  ; GFX12-NEXT: bb.4:
1138  ; GFX12-NEXT:   successors: %bb.5(0x80000000)
1139  ; GFX12-NEXT: {{  $}}
1140  ; GFX12-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_1]]
1141  ; GFX12-NEXT: {{  $}}
1142  ; GFX12-NEXT: bb.5:
1143  ; GFX12-NEXT:   S_ENDPGM 0
1144  %soffset = add i32 %soffset.base, 5000
1145  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1146  ret void
1147}
1148
1149; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
1150define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 inreg %soffset) {
1151  ; GFX10_GFX11-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000
1152  ; GFX10_GFX11: bb.1 (%ir-block.0):
1153  ; GFX10_GFX11-NEXT:   successors: %bb.2(0x80000000)
1154  ; GFX10_GFX11-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1155  ; GFX10_GFX11-NEXT: {{  $}}
1156  ; GFX10_GFX11-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1157  ; GFX10_GFX11-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1158  ; GFX10_GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1159  ; GFX10_GFX11-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1160  ; GFX10_GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1161  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1162  ; GFX10_GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1163  ; GFX10_GFX11-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
1164  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1165  ; GFX10_GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1166  ; GFX10_GFX11-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
1167  ; GFX10_GFX11-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
1168  ; GFX10_GFX11-NEXT: {{  $}}
1169  ; GFX10_GFX11-NEXT: bb.2:
1170  ; GFX10_GFX11-NEXT:   successors: %bb.3(0x80000000)
1171  ; GFX10_GFX11-NEXT: {{  $}}
1172  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
1173  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
1174  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
1175  ; GFX10_GFX11-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1176  ; GFX10_GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1177  ; GFX10_GFX11-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
1178  ; GFX10_GFX11-NEXT:   [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
1179  ; GFX10_GFX11-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
1180  ; GFX10_GFX11-NEXT:   [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
1181  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
1182  ; GFX10_GFX11-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
1183  ; GFX10_GFX11-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
1184  ; GFX10_GFX11-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
1185  ; GFX10_GFX11-NEXT: {{  $}}
1186  ; GFX10_GFX11-NEXT: bb.3:
1187  ; GFX10_GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
1188  ; GFX10_GFX11-NEXT: {{  $}}
1189  ; GFX10_GFX11-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1190  ; GFX10_GFX11-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
1191  ; GFX10_GFX11-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
1192  ; GFX10_GFX11-NEXT: {{  $}}
1193  ; GFX10_GFX11-NEXT: bb.4:
1194  ; GFX10_GFX11-NEXT:   successors: %bb.5(0x80000000)
1195  ; GFX10_GFX11-NEXT: {{  $}}
1196  ; GFX10_GFX11-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_1]]
1197  ; GFX10_GFX11-NEXT: {{  $}}
1198  ; GFX10_GFX11-NEXT: bb.5:
1199  ; GFX10_GFX11-NEXT:   S_ENDPGM 0
1200  ;
1201  ; GFX12-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000
1202  ; GFX12: bb.1 (%ir-block.0):
1203  ; GFX12-NEXT:   successors: %bb.2(0x80000000)
1204  ; GFX12-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1205  ; GFX12-NEXT: {{  $}}
1206  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1207  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1208  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1209  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1210  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1211  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1212  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1213  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
1214  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
1215  ; GFX12-NEXT: {{  $}}
1216  ; GFX12-NEXT: bb.2:
1217  ; GFX12-NEXT:   successors: %bb.3(0x80000000)
1218  ; GFX12-NEXT: {{  $}}
1219  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
1220  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
1221  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
1222  ; GFX12-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1223  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1224  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
1225  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
1226  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
1227  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
1228  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
1229  ; GFX12-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
1230  ; GFX12-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
1231  ; GFX12-NEXT:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
1232  ; GFX12-NEXT: {{  $}}
1233  ; GFX12-NEXT: bb.3:
1234  ; GFX12-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
1235  ; GFX12-NEXT: {{  $}}
1236  ; GFX12-NEXT:   TBUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 5000, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1237  ; GFX12-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
1238  ; GFX12-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
1239  ; GFX12-NEXT: {{  $}}
1240  ; GFX12-NEXT: bb.4:
1241  ; GFX12-NEXT:   successors: %bb.5(0x80000000)
1242  ; GFX12-NEXT: {{  $}}
1243  ; GFX12-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_]]
1244  ; GFX12-NEXT: {{  $}}
1245  ; GFX12-NEXT: bb.5:
1246  ; GFX12-NEXT:   S_ENDPGM 0
1247  %voffset = add i32 %voffset.base, 5000
1248  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1249  ret void
1250}
1251
1252declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1253declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1254declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1255declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1256