xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll (revision 1e6a82b8ef19abdc45fc72c309b0bef6cb6eda72)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX6
3; RUN: llc -global-isel -mcpu=hawaii -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX7
4; RUN: llc -global-isel -mcpu=fiji -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX8
5; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX910
6; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX910
7; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
8; RUN: llc -global-isel -mcpu=gfx1200 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
9
10define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
11  ; GFX67-LABEL: name: raw_buffer_load_i8_tfe
12  ; GFX67: bb.1 (%ir-block.0):
13  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
14  ; GFX67-NEXT: {{  $}}
15  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
16  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
17  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
18  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
19  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
20  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
21  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
22  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
23  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
24  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
25  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
26  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
27  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
28  ; GFX67-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
29  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0
30  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1
31  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
32  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
33  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
34  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
35  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
36  ; GFX67-NEXT:   BUFFER_STORE_BYTE_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
37  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
38  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
39  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
40  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
41  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
42  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
43  ; GFX67-NEXT:   S_ENDPGM 0
44  ;
45  ; GFX8-LABEL: name: raw_buffer_load_i8_tfe
46  ; GFX8: bb.1 (%ir-block.0):
47  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
48  ; GFX8-NEXT: {{  $}}
49  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
50  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
51  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
52  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
53  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
54  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
55  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
56  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
57  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
58  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
59  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
60  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
61  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
62  ; GFX8-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
63  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0
64  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1
65  ; GFX8-NEXT:   FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8) into %ir.data_addr, addrspace 1)
66  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
67  ; GFX8-NEXT:   S_ENDPGM 0
68  ;
69  ; GFX910-LABEL: name: raw_buffer_load_i8_tfe
70  ; GFX910: bb.1 (%ir-block.0):
71  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
72  ; GFX910-NEXT: {{  $}}
73  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
74  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
75  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
76  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
77  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
78  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
79  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
80  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
81  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
82  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
83  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
84  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
85  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
86  ; GFX910-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
87  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0
88  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1
89  ; GFX910-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
90  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
91  ; GFX910-NEXT:   S_ENDPGM 0
92  ;
93  ; GFX11-LABEL: name: raw_buffer_load_i8_tfe
94  ; GFX11: bb.1 (%ir-block.0):
95  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
96  ; GFX11-NEXT: {{  $}}
97  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
98  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
99  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
100  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
101  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
102  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
103  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
104  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
105  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
106  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
107  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
108  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
109  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
110  ; GFX11-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
111  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub0
112  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_IDXEN]].sub1
113  ; GFX11-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
114  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
115  ; GFX11-NEXT:   S_ENDPGM 0
116  ;
117  ; GFX12-LABEL: name: raw_buffer_load_i8_tfe
118  ; GFX12: bb.1 (%ir-block.0):
119  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
120  ; GFX12-NEXT: {{  $}}
121  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
122  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
123  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
124  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
125  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
126  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
127  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
128  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
129  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
130  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
131  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
132  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
133  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
134  ; GFX12-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
135  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub0
136  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_IDXEN]].sub1
137  ; GFX12-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
138  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
139  ; GFX12-NEXT:   S_ENDPGM 0
140  %res = call { i8, i32 } @llvm.amdgcn.struct.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
141  %data = extractvalue { i8, i32 } %res, 0
142  store i8 %data, ptr addrspace(1) %data_addr
143  %tfe = extractvalue { i8, i32 } %res, 1
144  store i32 %tfe, ptr addrspace(1) %tfe_addr
145  ret void
146}
147
148define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
149  ; GFX67-LABEL: name: raw_buffer_load_i16_tfe
150  ; GFX67: bb.1 (%ir-block.0):
151  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
152  ; GFX67-NEXT: {{  $}}
153  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
154  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
155  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
156  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
157  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
158  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
159  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
160  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
161  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
162  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
163  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
164  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
165  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
166  ; GFX67-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
167  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
168  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
169  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
170  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
171  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
172  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
173  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
174  ; GFX67-NEXT:   BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
175  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
176  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
177  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
178  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
179  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
180  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
181  ; GFX67-NEXT:   S_ENDPGM 0
182  ;
183  ; GFX8-LABEL: name: raw_buffer_load_i16_tfe
184  ; GFX8: bb.1 (%ir-block.0):
185  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
186  ; GFX8-NEXT: {{  $}}
187  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
188  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
189  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
190  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
191  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
192  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
193  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
194  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
195  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
196  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
197  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
198  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
199  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
200  ; GFX8-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
201  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
202  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
203  ; GFX8-NEXT:   FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1)
204  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
205  ; GFX8-NEXT:   S_ENDPGM 0
206  ;
207  ; GFX910-LABEL: name: raw_buffer_load_i16_tfe
208  ; GFX910: bb.1 (%ir-block.0):
209  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
210  ; GFX910-NEXT: {{  $}}
211  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
212  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
213  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
214  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
215  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
216  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
217  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
218  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
219  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
220  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
221  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
222  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
223  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
224  ; GFX910-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
225  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
226  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
227  ; GFX910-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
228  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
229  ; GFX910-NEXT:   S_ENDPGM 0
230  ;
231  ; GFX11-LABEL: name: raw_buffer_load_i16_tfe
232  ; GFX11: bb.1 (%ir-block.0):
233  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
234  ; GFX11-NEXT: {{  $}}
235  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
236  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
237  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
238  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
239  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
240  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
241  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
242  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
243  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
244  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
245  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
246  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
247  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
248  ; GFX11-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
249  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
250  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
251  ; GFX11-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
252  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
253  ; GFX11-NEXT:   S_ENDPGM 0
254  ;
255  ; GFX12-LABEL: name: raw_buffer_load_i16_tfe
256  ; GFX12: bb.1 (%ir-block.0):
257  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
258  ; GFX12-NEXT: {{  $}}
259  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
260  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
261  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
262  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
263  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
264  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
265  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
266  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
267  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
268  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
269  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
270  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
271  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
272  ; GFX12-NEXT:   [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
273  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub0
274  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub1
275  ; GFX12-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
276  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
277  ; GFX12-NEXT:   S_ENDPGM 0
278  %res = call { i16, i32 } @llvm.amdgcn.struct.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
279  %data = extractvalue { i16, i32 } %res, 0
280  store i16 %data, ptr addrspace(1) %data_addr
281  %tfe = extractvalue { i16, i32 } %res, 1
282  store i32 %tfe, ptr addrspace(1) %tfe_addr
283  ret void
284}
285
286define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
287  ; GFX67-LABEL: name: raw_buffer_load_f16_tfe
288  ; GFX67: bb.1 (%ir-block.0):
289  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
290  ; GFX67-NEXT: {{  $}}
291  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
292  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
293  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
294  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
295  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
296  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
297  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
298  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
299  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
300  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
301  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
302  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
303  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
304  ; GFX67-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
305  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
306  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
307  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
308  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
309  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
310  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
311  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
312  ; GFX67-NEXT:   BUFFER_STORE_SHORT_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
313  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
314  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
315  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
316  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
317  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
318  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
319  ; GFX67-NEXT:   S_ENDPGM 0
320  ;
321  ; GFX8-LABEL: name: raw_buffer_load_f16_tfe
322  ; GFX8: bb.1 (%ir-block.0):
323  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
324  ; GFX8-NEXT: {{  $}}
325  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
326  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
327  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
328  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
329  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
330  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
331  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
332  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
333  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
334  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
335  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
336  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
337  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
338  ; GFX8-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
339  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
340  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
341  ; GFX8-NEXT:   FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1)
342  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
343  ; GFX8-NEXT:   S_ENDPGM 0
344  ;
345  ; GFX910-LABEL: name: raw_buffer_load_f16_tfe
346  ; GFX910: bb.1 (%ir-block.0):
347  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
348  ; GFX910-NEXT: {{  $}}
349  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
350  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
351  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
352  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
353  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
354  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
355  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
356  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
357  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
358  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
359  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
360  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
361  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
362  ; GFX910-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
363  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
364  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
365  ; GFX910-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
366  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
367  ; GFX910-NEXT:   S_ENDPGM 0
368  ;
369  ; GFX11-LABEL: name: raw_buffer_load_f16_tfe
370  ; GFX11: bb.1 (%ir-block.0):
371  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
372  ; GFX11-NEXT: {{  $}}
373  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
374  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
375  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
376  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
377  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
378  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
379  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
380  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
381  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
382  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
383  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
384  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
385  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
386  ; GFX11-NEXT:   [[BUFFER_LOAD_USHORT_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
387  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub0
388  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_IDXEN]].sub1
389  ; GFX11-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
390  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
391  ; GFX11-NEXT:   S_ENDPGM 0
392  ;
393  ; GFX12-LABEL: name: raw_buffer_load_f16_tfe
394  ; GFX12: bb.1 (%ir-block.0):
395  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
396  ; GFX12-NEXT: {{  $}}
397  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
398  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
399  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
400  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
401  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
402  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
403  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
404  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
405  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
406  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
407  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
408  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
409  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
410  ; GFX12-NEXT:   [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
411  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub0
412  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_IDXEN]].sub1
413  ; GFX12-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
414  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
415  ; GFX12-NEXT:   S_ENDPGM 0
416  %res = call { half, i32 } @llvm.amdgcn.struct.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
417  %data = extractvalue { half, i32 } %res, 0
418  store half %data, ptr addrspace(1) %data_addr
419  %tfe = extractvalue { half, i32 } %res, 1
420  store i32 %tfe, ptr addrspace(1) %tfe_addr
421  ret void
422}
423
424define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
425  ; GFX67-LABEL: name: raw_buffer_load_i32_tfe
426  ; GFX67: bb.1 (%ir-block.0):
427  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
428  ; GFX67-NEXT: {{  $}}
429  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
430  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
431  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
432  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
433  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
434  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
435  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
436  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
437  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
438  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
439  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
440  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
441  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
442  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
443  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0
444  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1
445  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
446  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
447  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
448  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
449  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
450  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
451  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
452  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
453  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
454  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
455  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
456  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
457  ; GFX67-NEXT:   S_ENDPGM 0
458  ;
459  ; GFX8-LABEL: name: raw_buffer_load_i32_tfe
460  ; GFX8: bb.1 (%ir-block.0):
461  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
462  ; GFX8-NEXT: {{  $}}
463  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
464  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
465  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
466  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
467  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
468  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
469  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
470  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
471  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
472  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
473  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
474  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
475  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
476  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
477  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0
478  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1
479  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.data_addr, addrspace 1)
480  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
481  ; GFX8-NEXT:   S_ENDPGM 0
482  ;
483  ; GFX910-LABEL: name: raw_buffer_load_i32_tfe
484  ; GFX910: bb.1 (%ir-block.0):
485  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
486  ; GFX910-NEXT: {{  $}}
487  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
488  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
489  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
490  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
491  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
492  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
493  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
494  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
495  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
496  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
497  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
498  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
499  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
500  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
501  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0
502  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1
503  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
504  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
505  ; GFX910-NEXT:   S_ENDPGM 0
506  ;
507  ; GFX11-LABEL: name: raw_buffer_load_i32_tfe
508  ; GFX11: bb.1 (%ir-block.0):
509  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
510  ; GFX11-NEXT: {{  $}}
511  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
512  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
513  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
514  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
515  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
516  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
517  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
518  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
519  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
520  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
521  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
522  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
523  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
524  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORD_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
525  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub0
526  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_IDXEN]].sub1
527  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
528  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
529  ; GFX11-NEXT:   S_ENDPGM 0
530  ;
531  ; GFX12-LABEL: name: raw_buffer_load_i32_tfe
532  ; GFX12: bb.1 (%ir-block.0):
533  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
534  ; GFX12-NEXT: {{  $}}
535  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
536  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
537  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
538  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
539  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
540  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
541  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
542  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
543  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
544  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
545  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
546  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
547  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
548  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
549  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub0
550  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_IDXEN]].sub1
551  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
552  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
553  ; GFX12-NEXT:   S_ENDPGM 0
554  %res = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
555  %data = extractvalue { i32, i32 } %res, 0
556  store i32 %data, ptr addrspace(1) %data_addr
557  %tfe = extractvalue { i32, i32 } %res, 1
558  store i32 %tfe, ptr addrspace(1) %tfe_addr
559  ret void
560}
561
562define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
563  ; GFX67-LABEL: name: raw_buffer_load_v2i32_tfe
564  ; GFX67: bb.1 (%ir-block.0):
565  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
566  ; GFX67-NEXT: {{  $}}
567  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
568  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
569  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
570  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
571  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
572  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
573  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
574  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
575  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
576  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
577  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
578  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
579  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
580  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
581  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
582  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
583  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
584  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
585  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
586  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
587  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
588  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
589  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
590  ; GFX67-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
591  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
592  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
593  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
594  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
595  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
596  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
597  ; GFX67-NEXT:   S_ENDPGM 0
598  ;
599  ; GFX8-LABEL: name: raw_buffer_load_v2i32_tfe
600  ; GFX8: bb.1 (%ir-block.0):
601  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
602  ; GFX8-NEXT: {{  $}}
603  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
604  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
605  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
606  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
607  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
608  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
609  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
610  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
611  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
612  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
613  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
614  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
615  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
616  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
617  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
618  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
619  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
620  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
621  ; GFX8-NEXT:   FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
622  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
623  ; GFX8-NEXT:   S_ENDPGM 0
624  ;
625  ; GFX910-LABEL: name: raw_buffer_load_v2i32_tfe
626  ; GFX910: bb.1 (%ir-block.0):
627  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
628  ; GFX910-NEXT: {{  $}}
629  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
630  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
631  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
632  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
633  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
634  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
635  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
636  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
637  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
638  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
639  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
640  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
641  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
642  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
643  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
644  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
645  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
646  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
647  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
648  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
649  ; GFX910-NEXT:   S_ENDPGM 0
650  ;
651  ; GFX11-LABEL: name: raw_buffer_load_v2i32_tfe
652  ; GFX11: bb.1 (%ir-block.0):
653  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
654  ; GFX11-NEXT: {{  $}}
655  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
656  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
657  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
658  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
659  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
660  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
661  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
662  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
663  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
664  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
665  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
666  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
667  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
668  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
669  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
670  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
671  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
672  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
673  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
674  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
675  ; GFX11-NEXT:   S_ENDPGM 0
676  ;
677  ; GFX12-LABEL: name: raw_buffer_load_v2i32_tfe
678  ; GFX12: bb.1 (%ir-block.0):
679  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
680  ; GFX12-NEXT: {{  $}}
681  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
682  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
683  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
684  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
685  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
686  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
687  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
688  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
689  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
690  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
691  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
692  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
693  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
694  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
695  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0
696  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1
697  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2
698  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
699  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
700  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
701  ; GFX12-NEXT:   S_ENDPGM 0
702  %res = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
703  %data = extractvalue { <2 x i32>, i32 } %res, 0
704  store <2 x i32> %data, ptr addrspace(1) %data_addr
705  %tfe = extractvalue { <2 x i32>, i32 } %res, 1
706  store i32 %tfe, ptr addrspace(1) %tfe_addr
707  ret void
708}
709
710define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
711  ; GFX67-LABEL: name: raw_buffer_load_v2f32_tfe
712  ; GFX67: bb.1 (%ir-block.0):
713  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
714  ; GFX67-NEXT: {{  $}}
715  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
716  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
717  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
718  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
719  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
720  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
721  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
722  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
723  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
724  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
725  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
726  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
727  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
728  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
729  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
730  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
731  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
732  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
733  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
734  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
735  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
736  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
737  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
738  ; GFX67-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
739  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
740  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
741  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
742  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
743  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
744  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
745  ; GFX67-NEXT:   S_ENDPGM 0
746  ;
747  ; GFX8-LABEL: name: raw_buffer_load_v2f32_tfe
748  ; GFX8: bb.1 (%ir-block.0):
749  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
750  ; GFX8-NEXT: {{  $}}
751  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
752  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
753  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
754  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
755  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
756  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
757  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
758  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
759  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
760  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
761  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
762  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
763  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
764  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
765  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
766  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
767  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
768  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
769  ; GFX8-NEXT:   FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
770  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
771  ; GFX8-NEXT:   S_ENDPGM 0
772  ;
773  ; GFX910-LABEL: name: raw_buffer_load_v2f32_tfe
774  ; GFX910: bb.1 (%ir-block.0):
775  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
776  ; GFX910-NEXT: {{  $}}
777  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
778  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
779  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
780  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
781  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
782  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
783  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
784  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
785  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
786  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
787  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
788  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
789  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
790  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
791  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
792  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
793  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
794  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
795  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
796  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
797  ; GFX910-NEXT:   S_ENDPGM 0
798  ;
799  ; GFX11-LABEL: name: raw_buffer_load_v2f32_tfe
800  ; GFX11: bb.1 (%ir-block.0):
801  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
802  ; GFX11-NEXT: {{  $}}
803  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
804  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
805  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
806  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
807  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
808  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
809  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
810  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
811  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
812  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
813  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
814  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
815  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
816  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
817  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub0
818  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub1
819  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_IDXEN]].sub2
820  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
821  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
822  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
823  ; GFX11-NEXT:   S_ENDPGM 0
824  ;
825  ; GFX12-LABEL: name: raw_buffer_load_v2f32_tfe
826  ; GFX12: bb.1 (%ir-block.0):
827  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
828  ; GFX12-NEXT: {{  $}}
829  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
830  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
831  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
832  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
833  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
834  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
835  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
836  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
837  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
838  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
839  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
840  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
841  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
842  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
843  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub0
844  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub1
845  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_IDXEN]].sub2
846  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
847  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
848  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
849  ; GFX12-NEXT:   S_ENDPGM 0
850  %res = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
851  %data = extractvalue { <2 x float>, i32 } %res, 0
852  store <2 x float> %data, ptr addrspace(1) %data_addr
853  %tfe = extractvalue { <2 x float>, i32 } %res, 1
854  store i32 %tfe, ptr addrspace(1) %tfe_addr
855  ret void
856}
857
858define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
859  ; GFX6-LABEL: name: raw_buffer_load_v3i32_tfe
860  ; GFX6: bb.1 (%ir-block.0):
861  ; GFX6-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
862  ; GFX6-NEXT: {{  $}}
863  ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
864  ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
865  ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
866  ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
867  ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
868  ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
869  ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
870  ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
871  ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
872  ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
873  ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
874  ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
875  ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
876  ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
877  ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
878  ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
879  ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
880  ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
881  ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1
882  ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3
883  ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
884  ; GFX6-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
885  ; GFX6-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
886  ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
887  ; GFX6-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
888  ; GFX6-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1)
889  ; GFX6-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
890  ; GFX6-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
891  ; GFX6-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
892  ; GFX6-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
893  ; GFX6-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
894  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1)
895  ; GFX6-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0
896  ; GFX6-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
897  ; GFX6-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1
898  ; GFX6-NEXT:   [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
899  ; GFX6-NEXT:   [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3
900  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
901  ; GFX6-NEXT:   S_ENDPGM 0
902  ;
903  ; GFX7-LABEL: name: raw_buffer_load_v3i32_tfe
904  ; GFX7: bb.1 (%ir-block.0):
905  ; GFX7-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
906  ; GFX7-NEXT: {{  $}}
907  ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
908  ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
909  ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
910  ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
911  ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
912  ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
913  ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
914  ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
915  ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
916  ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
917  ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
918  ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
919  ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
920  ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
921  ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
922  ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
923  ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
924  ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
925  ; GFX7-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
926  ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
927  ; GFX7-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
928  ; GFX7-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
929  ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
930  ; GFX7-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
931  ; GFX7-NEXT:   BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
932  ; GFX7-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
933  ; GFX7-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
934  ; GFX7-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
935  ; GFX7-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
936  ; GFX7-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
937  ; GFX7-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
938  ; GFX7-NEXT:   S_ENDPGM 0
939  ;
940  ; GFX8-LABEL: name: raw_buffer_load_v3i32_tfe
941  ; GFX8: bb.1 (%ir-block.0):
942  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
943  ; GFX8-NEXT: {{  $}}
944  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
945  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
946  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
947  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
948  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
949  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
950  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
951  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
952  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
953  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
954  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
955  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
956  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
957  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
958  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
959  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
960  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
961  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
962  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
963  ; GFX8-NEXT:   FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
964  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
965  ; GFX8-NEXT:   S_ENDPGM 0
966  ;
967  ; GFX910-LABEL: name: raw_buffer_load_v3i32_tfe
968  ; GFX910: bb.1 (%ir-block.0):
969  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
970  ; GFX910-NEXT: {{  $}}
971  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
972  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
973  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
974  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
975  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
976  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
977  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
978  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
979  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
980  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
981  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
982  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
983  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
984  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
985  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
986  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
987  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
988  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
989  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
990  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
991  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
992  ; GFX910-NEXT:   S_ENDPGM 0
993  ;
994  ; GFX11-LABEL: name: raw_buffer_load_v3i32_tfe
995  ; GFX11: bb.1 (%ir-block.0):
996  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
997  ; GFX11-NEXT: {{  $}}
998  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
999  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1000  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1001  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1002  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1003  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1004  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1005  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1006  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1007  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1008  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1009  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1010  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1011  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1012  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1013  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1014  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1015  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1016  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1017  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1018  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1019  ; GFX11-NEXT:   S_ENDPGM 0
1020  ;
1021  ; GFX12-LABEL: name: raw_buffer_load_v3i32_tfe
1022  ; GFX12: bb.1 (%ir-block.0):
1023  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1024  ; GFX12-NEXT: {{  $}}
1025  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1026  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1027  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1028  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1029  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1030  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1031  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1032  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1033  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1034  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1035  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1036  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1037  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1038  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1039  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0
1040  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1
1041  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2
1042  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3
1043  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1044  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1045  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1046  ; GFX12-NEXT:   S_ENDPGM 0
1047  %res = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1048  %data = extractvalue { <3 x i32>, i32 } %res, 0
1049  store <3 x i32> %data, ptr addrspace(1) %data_addr
1050  %tfe = extractvalue { <3 x i32>, i32 } %res, 1
1051  store i32 %tfe, ptr addrspace(1) %tfe_addr
1052  ret void
1053}
1054
1055define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1056  ; GFX6-LABEL: name: raw_buffer_load_v3f32_tfe
1057  ; GFX6: bb.1 (%ir-block.0):
1058  ; GFX6-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1059  ; GFX6-NEXT: {{  $}}
1060  ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1061  ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1062  ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1063  ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1064  ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1065  ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1066  ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1067  ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1068  ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1069  ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1070  ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1071  ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1072  ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1073  ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1074  ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1075  ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1076  ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1077  ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1078  ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0_sub1
1079  ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2_sub3
1080  ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1081  ; GFX6-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1082  ; GFX6-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1083  ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1084  ; GFX6-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
1085  ; GFX6-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[COPY13]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1)
1086  ; GFX6-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1087  ; GFX6-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1088  ; GFX6-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1089  ; GFX6-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1090  ; GFX6-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
1091  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1)
1092  ; GFX6-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1093  ; GFX6-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1094  ; GFX6-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1
1095  ; GFX6-NEXT:   [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1096  ; GFX6-NEXT:   [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3
1097  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1098  ; GFX6-NEXT:   S_ENDPGM 0
1099  ;
1100  ; GFX7-LABEL: name: raw_buffer_load_v3f32_tfe
1101  ; GFX7: bb.1 (%ir-block.0):
1102  ; GFX7-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1103  ; GFX7-NEXT: {{  $}}
1104  ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1105  ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1106  ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1107  ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1108  ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1109  ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1110  ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1111  ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1112  ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1113  ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1114  ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1115  ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1116  ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1117  ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1118  ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1119  ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1120  ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1121  ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1122  ; GFX7-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1123  ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1124  ; GFX7-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1125  ; GFX7-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1126  ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1127  ; GFX7-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1128  ; GFX7-NEXT:   BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1129  ; GFX7-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1130  ; GFX7-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1131  ; GFX7-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1132  ; GFX7-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1133  ; GFX7-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1134  ; GFX7-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1135  ; GFX7-NEXT:   S_ENDPGM 0
1136  ;
1137  ; GFX8-LABEL: name: raw_buffer_load_v3f32_tfe
1138  ; GFX8: bb.1 (%ir-block.0):
1139  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1140  ; GFX8-NEXT: {{  $}}
1141  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1142  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1143  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1144  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1145  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1146  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1147  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1148  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1149  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1150  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1151  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1152  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1153  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1154  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1155  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1156  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1157  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1158  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1159  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1160  ; GFX8-NEXT:   FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1161  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1162  ; GFX8-NEXT:   S_ENDPGM 0
1163  ;
1164  ; GFX910-LABEL: name: raw_buffer_load_v3f32_tfe
1165  ; GFX910: bb.1 (%ir-block.0):
1166  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1167  ; GFX910-NEXT: {{  $}}
1168  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1169  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1170  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1171  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1172  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1173  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1174  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1175  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1176  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1177  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1178  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1179  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1180  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1181  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1182  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1183  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1184  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1185  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1186  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1187  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1188  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1189  ; GFX910-NEXT:   S_ENDPGM 0
1190  ;
1191  ; GFX11-LABEL: name: raw_buffer_load_v3f32_tfe
1192  ; GFX11: bb.1 (%ir-block.0):
1193  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1194  ; GFX11-NEXT: {{  $}}
1195  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1196  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1197  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1198  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1199  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1200  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1201  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1202  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1203  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1204  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1205  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1206  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1207  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1208  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1209  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub0
1210  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub1
1211  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub2
1212  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_IDXEN]].sub3
1213  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1214  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1215  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1216  ; GFX11-NEXT:   S_ENDPGM 0
1217  ;
1218  ; GFX12-LABEL: name: raw_buffer_load_v3f32_tfe
1219  ; GFX12: bb.1 (%ir-block.0):
1220  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1221  ; GFX12-NEXT: {{  $}}
1222  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1223  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1224  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1225  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1226  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1227  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1228  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1229  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1230  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1231  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1232  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1233  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1234  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1235  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1236  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub0
1237  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub1
1238  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub2
1239  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_IDXEN]].sub3
1240  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2
1241  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1242  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1243  ; GFX12-NEXT:   S_ENDPGM 0
1244  %res = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1245  %data = extractvalue { <3 x float>, i32 } %res, 0
1246  store <3 x float> %data, ptr addrspace(1) %data_addr
1247  %tfe = extractvalue { <3 x float>, i32 } %res, 1
1248  store i32 %tfe, ptr addrspace(1) %tfe_addr
1249  ret void
1250}
1251
1252define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1253  ; GFX67-LABEL: name: raw_buffer_load_v4i32_tfe
1254  ; GFX67: bb.1 (%ir-block.0):
1255  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1256  ; GFX67-NEXT: {{  $}}
1257  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1258  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1259  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1260  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1261  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1262  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1263  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1264  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1265  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1266  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1267  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1268  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1269  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1270  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1271  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1272  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1273  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1274  ; GFX67-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1275  ; GFX67-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1276  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1277  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1278  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1279  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1280  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1281  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1282  ; GFX67-NEXT:   BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1283  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1284  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1285  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1286  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1287  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1288  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1289  ; GFX67-NEXT:   S_ENDPGM 0
1290  ;
1291  ; GFX8-LABEL: name: raw_buffer_load_v4i32_tfe
1292  ; GFX8: bb.1 (%ir-block.0):
1293  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1294  ; GFX8-NEXT: {{  $}}
1295  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1296  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1297  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1298  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1299  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1300  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1301  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1302  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1303  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1304  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1305  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1306  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1307  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1308  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1309  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1310  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1311  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1312  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1313  ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1314  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1315  ; GFX8-NEXT:   FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1316  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1317  ; GFX8-NEXT:   S_ENDPGM 0
1318  ;
1319  ; GFX910-LABEL: name: raw_buffer_load_v4i32_tfe
1320  ; GFX910: bb.1 (%ir-block.0):
1321  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1322  ; GFX910-NEXT: {{  $}}
1323  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1324  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1325  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1326  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1327  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1328  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1329  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1330  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1331  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1332  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1333  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1334  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1335  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1336  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1337  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1338  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1339  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1340  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1341  ; GFX910-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1342  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1343  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1344  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1345  ; GFX910-NEXT:   S_ENDPGM 0
1346  ;
1347  ; GFX11-LABEL: name: raw_buffer_load_v4i32_tfe
1348  ; GFX11: bb.1 (%ir-block.0):
1349  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1350  ; GFX11-NEXT: {{  $}}
1351  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1352  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1353  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1354  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1355  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1356  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1357  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1358  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1359  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1360  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1361  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1362  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1363  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1364  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1365  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1366  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1367  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1368  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1369  ; GFX11-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1370  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1371  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1372  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1373  ; GFX11-NEXT:   S_ENDPGM 0
1374  ;
1375  ; GFX12-LABEL: name: raw_buffer_load_v4i32_tfe
1376  ; GFX12: bb.1 (%ir-block.0):
1377  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1378  ; GFX12-NEXT: {{  $}}
1379  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1380  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1381  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1382  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1383  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1384  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1385  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1386  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1387  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1388  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1389  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1390  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1391  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1392  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1393  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0
1394  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1
1395  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2
1396  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3
1397  ; GFX12-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4
1398  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1399  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1400  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1401  ; GFX12-NEXT:   S_ENDPGM 0
1402  %res = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1403  %data = extractvalue { <4 x i32>, i32 } %res, 0
1404  store <4 x i32> %data, ptr addrspace(1) %data_addr
1405  %tfe = extractvalue { <4 x i32>, i32 } %res, 1
1406  store i32 %tfe, ptr addrspace(1) %tfe_addr
1407  ret void
1408}
1409
1410define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1411  ; GFX67-LABEL: name: raw_buffer_load_v4f32_tfe
1412  ; GFX67: bb.1 (%ir-block.0):
1413  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1414  ; GFX67-NEXT: {{  $}}
1415  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1416  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1417  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1418  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1419  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1420  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1421  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1422  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1423  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1424  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1425  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1426  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1427  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1428  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1429  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1430  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1431  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1432  ; GFX67-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1433  ; GFX67-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1434  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1435  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1436  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1437  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1438  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1439  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1440  ; GFX67-NEXT:   BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1441  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1442  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1443  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1444  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1445  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1446  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY13]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1447  ; GFX67-NEXT:   S_ENDPGM 0
1448  ;
1449  ; GFX8-LABEL: name: raw_buffer_load_v4f32_tfe
1450  ; GFX8: bb.1 (%ir-block.0):
1451  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1452  ; GFX8-NEXT: {{  $}}
1453  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1454  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1455  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1456  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1457  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1458  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1459  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1460  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1461  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1462  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1463  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1464  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1465  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1466  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1467  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1468  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1469  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1470  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1471  ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1472  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1473  ; GFX8-NEXT:   FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1474  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1475  ; GFX8-NEXT:   S_ENDPGM 0
1476  ;
1477  ; GFX910-LABEL: name: raw_buffer_load_v4f32_tfe
1478  ; GFX910: bb.1 (%ir-block.0):
1479  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1480  ; GFX910-NEXT: {{  $}}
1481  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1482  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1483  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1484  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1485  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1486  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1487  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1488  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1489  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1490  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1491  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1492  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1493  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1494  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1495  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1496  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1497  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1498  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1499  ; GFX910-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1500  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1501  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1502  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1503  ; GFX910-NEXT:   S_ENDPGM 0
1504  ;
1505  ; GFX11-LABEL: name: raw_buffer_load_v4f32_tfe
1506  ; GFX11: bb.1 (%ir-block.0):
1507  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1508  ; GFX11-NEXT: {{  $}}
1509  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1510  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1511  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1512  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1513  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1514  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1515  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1516  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1517  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1518  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1519  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1520  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1521  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1522  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1523  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub0
1524  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub1
1525  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub2
1526  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub3
1527  ; GFX11-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_IDXEN]].sub4
1528  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1529  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1530  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1531  ; GFX11-NEXT:   S_ENDPGM 0
1532  ;
1533  ; GFX12-LABEL: name: raw_buffer_load_v4f32_tfe
1534  ; GFX12: bb.1 (%ir-block.0):
1535  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1536  ; GFX12-NEXT: {{  $}}
1537  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1538  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1539  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1540  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1541  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1542  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1543  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1544  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1545  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1546  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1547  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1548  ; GFX12-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1549  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
1550  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN [[COPY8]], [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1551  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub0
1552  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub1
1553  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub2
1554  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub3
1555  ; GFX12-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_IDXEN]].sub4
1556  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
1557  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1558  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1559  ; GFX12-NEXT:   S_ENDPGM 0
1560  %res = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1561  %data = extractvalue { <4 x float>, i32 } %res, 0
1562  store <4 x float> %data, ptr addrspace(1) %data_addr
1563  %tfe = extractvalue { <4 x float>, i32 } %res, 1
1564  store i32 %tfe, ptr addrspace(1) %tfe_addr
1565  ret void
1566}
1567
1568declare { i8, i32 } @llvm.amdgcn.struct.buffer.load.sl_i8i32s(<4 x i32>, i32, i32, i32, i32)
1569declare { i16, i32 } @llvm.amdgcn.struct.buffer.load.sl_i16i32s(<4 x i32>, i32, i32, i32, i32)
1570declare { half, i32 } @llvm.amdgcn.struct.buffer.load.sl_f16i32s(<4 x i32>, i32, i32, i32, i32)
1571declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.sl_i32i32s(<4 x i32>, i32, i32, i32, i32)
1572declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32)
1573declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32)
1574declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32)
1575declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32)
1576declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32)
1577declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32)
1578