xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll (revision 1e6a82b8ef19abdc45fc72c309b0bef6cb6eda72)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX6
3; RUN: llc -global-isel -mcpu=hawaii -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX67,GFX7
4; RUN: llc -global-isel -mcpu=fiji -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX8
5; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX910
6; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX910
7; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
8; RUN: llc -global-isel -mcpu=gfx1200 -mtriple=amdgcn-- -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
9
10define amdgpu_ps void @raw_buffer_load_i8_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
11  ; GFX67-LABEL: name: raw_buffer_load_i8_tfe
12  ; GFX67: bb.1 (%ir-block.0):
13  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
14  ; GFX67-NEXT: {{  $}}
15  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
16  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
17  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
18  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
19  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
20  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
21  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
22  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
23  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
24  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
25  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
26  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
27  ; GFX67-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
28  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0
29  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1
30  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
31  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
32  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
33  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
34  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
35  ; GFX67-NEXT:   BUFFER_STORE_BYTE_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
36  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
37  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
38  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
39  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
40  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
41  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
42  ; GFX67-NEXT:   S_ENDPGM 0
43  ;
44  ; GFX8-LABEL: name: raw_buffer_load_i8_tfe
45  ; GFX8: bb.1 (%ir-block.0):
46  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
47  ; GFX8-NEXT: {{  $}}
48  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
49  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
50  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
51  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
52  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
53  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
54  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
55  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
56  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
57  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
58  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
59  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
60  ; GFX8-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
61  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0
62  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1
63  ; GFX8-NEXT:   FLAT_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8) into %ir.data_addr, addrspace 1)
64  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
65  ; GFX8-NEXT:   S_ENDPGM 0
66  ;
67  ; GFX910-LABEL: name: raw_buffer_load_i8_tfe
68  ; GFX910: bb.1 (%ir-block.0):
69  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
70  ; GFX910-NEXT: {{  $}}
71  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
72  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
73  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
74  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
75  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
76  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
77  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
78  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
79  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
81  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
82  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
83  ; GFX910-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
84  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0
85  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1
86  ; GFX910-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
87  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
88  ; GFX910-NEXT:   S_ENDPGM 0
89  ;
90  ; GFX11-LABEL: name: raw_buffer_load_i8_tfe
91  ; GFX11: bb.1 (%ir-block.0):
92  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
93  ; GFX11-NEXT: {{  $}}
94  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
95  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
96  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
97  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
98  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
99  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
100  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
101  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
102  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
103  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
104  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
105  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
106  ; GFX11-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
107  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub0
108  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_OFFSET]].sub1
109  ; GFX11-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
110  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
111  ; GFX11-NEXT:   S_ENDPGM 0
112  ;
113  ; GFX12-LABEL: name: raw_buffer_load_i8_tfe
114  ; GFX12: bb.1 (%ir-block.0):
115  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
116  ; GFX12-NEXT: {{  $}}
117  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
118  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
119  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
120  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
121  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
122  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
123  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
124  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
125  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
126  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
127  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
128  ; GFX12-NEXT:   [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
129  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET]].sub0
130  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_UBYTE_TFE_VBUFFER_OFFSET]].sub1
131  ; GFX12-NEXT:   GLOBAL_STORE_BYTE [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s8) into %ir.data_addr, addrspace 1)
132  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
133  ; GFX12-NEXT:   S_ENDPGM 0
134  %res = call { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
135  %data = extractvalue { i8, i32 } %res, 0
136  store i8 %data, ptr addrspace(1) %data_addr
137  %tfe = extractvalue { i8, i32 } %res, 1
138  store i32 %tfe, ptr addrspace(1) %tfe_addr
139  ret void
140}
141
142define amdgpu_ps void @raw_buffer_load_i16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
143  ; GFX67-LABEL: name: raw_buffer_load_i16_tfe
144  ; GFX67: bb.1 (%ir-block.0):
145  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
146  ; GFX67-NEXT: {{  $}}
147  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
148  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
149  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
150  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
151  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
152  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
153  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
154  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
155  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
156  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
157  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
158  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
159  ; GFX67-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
160  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
161  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
162  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
163  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
164  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
165  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
166  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
167  ; GFX67-NEXT:   BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
168  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
169  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
170  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
171  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
172  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
173  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
174  ; GFX67-NEXT:   S_ENDPGM 0
175  ;
176  ; GFX8-LABEL: name: raw_buffer_load_i16_tfe
177  ; GFX8: bb.1 (%ir-block.0):
178  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
179  ; GFX8-NEXT: {{  $}}
180  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
181  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
182  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
183  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
184  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
185  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
186  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
187  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
188  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
189  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
190  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
191  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
192  ; GFX8-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
193  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
194  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
195  ; GFX8-NEXT:   FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1)
196  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
197  ; GFX8-NEXT:   S_ENDPGM 0
198  ;
199  ; GFX910-LABEL: name: raw_buffer_load_i16_tfe
200  ; GFX910: bb.1 (%ir-block.0):
201  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
202  ; GFX910-NEXT: {{  $}}
203  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
204  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
205  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
206  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
207  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
208  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
209  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
210  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
211  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
212  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
213  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
214  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
215  ; GFX910-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
216  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
217  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
218  ; GFX910-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
219  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
220  ; GFX910-NEXT:   S_ENDPGM 0
221  ;
222  ; GFX11-LABEL: name: raw_buffer_load_i16_tfe
223  ; GFX11: bb.1 (%ir-block.0):
224  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
225  ; GFX11-NEXT: {{  $}}
226  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
227  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
228  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
229  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
230  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
231  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
232  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
233  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
234  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
235  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
236  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
237  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
238  ; GFX11-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
239  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
240  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
241  ; GFX11-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
242  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
243  ; GFX11-NEXT:   S_ENDPGM 0
244  ;
245  ; GFX12-LABEL: name: raw_buffer_load_i16_tfe
246  ; GFX12: bb.1 (%ir-block.0):
247  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
248  ; GFX12-NEXT: {{  $}}
249  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
250  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
251  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
252  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
253  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
254  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
255  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
256  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
257  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
258  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
259  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
260  ; GFX12-NEXT:   [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
261  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub0
262  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub1
263  ; GFX12-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
264  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
265  ; GFX12-NEXT:   S_ENDPGM 0
266  %res = call { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
267  %data = extractvalue { i16, i32 } %res, 0
268  store i16 %data, ptr addrspace(1) %data_addr
269  %tfe = extractvalue { i16, i32 } %res, 1
270  store i32 %tfe, ptr addrspace(1) %tfe_addr
271  ret void
272}
273
274define amdgpu_ps void @raw_buffer_load_f16_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
275  ; GFX67-LABEL: name: raw_buffer_load_f16_tfe
276  ; GFX67: bb.1 (%ir-block.0):
277  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
278  ; GFX67-NEXT: {{  $}}
279  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
280  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
281  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
282  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
283  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
284  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
285  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
286  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
287  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
288  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
289  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
290  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
291  ; GFX67-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
292  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
293  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
294  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
295  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
296  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
297  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
298  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
299  ; GFX67-NEXT:   BUFFER_STORE_SHORT_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
300  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
301  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
302  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
303  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
304  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
305  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
306  ; GFX67-NEXT:   S_ENDPGM 0
307  ;
308  ; GFX8-LABEL: name: raw_buffer_load_f16_tfe
309  ; GFX8: bb.1 (%ir-block.0):
310  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
311  ; GFX8-NEXT: {{  $}}
312  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
313  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
314  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
315  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
316  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
317  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
318  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
319  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
320  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
321  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
322  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
323  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
324  ; GFX8-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
325  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
326  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
327  ; GFX8-NEXT:   FLAT_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.data_addr, addrspace 1)
328  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
329  ; GFX8-NEXT:   S_ENDPGM 0
330  ;
331  ; GFX910-LABEL: name: raw_buffer_load_f16_tfe
332  ; GFX910: bb.1 (%ir-block.0):
333  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
334  ; GFX910-NEXT: {{  $}}
335  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
336  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
337  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
338  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
339  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
340  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
341  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
342  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
343  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
344  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
345  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
346  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
347  ; GFX910-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
348  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
349  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
350  ; GFX910-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
351  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
352  ; GFX910-NEXT:   S_ENDPGM 0
353  ;
354  ; GFX11-LABEL: name: raw_buffer_load_f16_tfe
355  ; GFX11: bb.1 (%ir-block.0):
356  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
357  ; GFX11-NEXT: {{  $}}
358  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
359  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
360  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
361  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
362  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
363  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
364  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
365  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
366  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
367  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
368  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
369  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
370  ; GFX11-NEXT:   [[BUFFER_LOAD_USHORT_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
371  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub0
372  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_OFFSET]].sub1
373  ; GFX11-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
374  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
375  ; GFX11-NEXT:   S_ENDPGM 0
376  ;
377  ; GFX12-LABEL: name: raw_buffer_load_f16_tfe
378  ; GFX12: bb.1 (%ir-block.0):
379  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
380  ; GFX12-NEXT: {{  $}}
381  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
382  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
383  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
384  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
385  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
386  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
387  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
388  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
389  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
390  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
391  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
392  ; GFX12-NEXT:   [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
393  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub0
394  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_USHORT_TFE_VBUFFER_OFFSET]].sub1
395  ; GFX12-NEXT:   GLOBAL_STORE_SHORT [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s16) into %ir.data_addr, addrspace 1)
396  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
397  ; GFX12-NEXT:   S_ENDPGM 0
398  %res = call { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
399  %data = extractvalue { half, i32 } %res, 0
400  store half %data, ptr addrspace(1) %data_addr
401  %tfe = extractvalue { half, i32 } %res, 1
402  store i32 %tfe, ptr addrspace(1) %tfe_addr
403  ret void
404}
405
406define amdgpu_ps void @raw_buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
407  ; GFX67-LABEL: name: raw_buffer_load_i32_tfe
408  ; GFX67: bb.1 (%ir-block.0):
409  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
410  ; GFX67-NEXT: {{  $}}
411  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
412  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
413  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
414  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
415  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
416  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
417  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
418  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
419  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
420  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
421  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
422  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
423  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
424  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0
425  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1
426  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
427  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
428  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
429  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
430  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
431  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY8]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
432  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
433  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
434  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
435  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
436  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
437  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY9]], [[REG_SEQUENCE2]], [[REG_SEQUENCE6]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
438  ; GFX67-NEXT:   S_ENDPGM 0
439  ;
440  ; GFX8-LABEL: name: raw_buffer_load_i32_tfe
441  ; GFX8: bb.1 (%ir-block.0):
442  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
443  ; GFX8-NEXT: {{  $}}
444  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
445  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
446  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
447  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
448  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
449  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
450  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
451  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
452  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
453  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
454  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
455  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
456  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
457  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0
458  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1
459  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.data_addr, addrspace 1)
460  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
461  ; GFX8-NEXT:   S_ENDPGM 0
462  ;
463  ; GFX910-LABEL: name: raw_buffer_load_i32_tfe
464  ; GFX910: bb.1 (%ir-block.0):
465  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
466  ; GFX910-NEXT: {{  $}}
467  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
468  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
469  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
470  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
471  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
472  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
473  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
474  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
475  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
476  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
477  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
478  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
479  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
480  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0
481  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1
482  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
483  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
484  ; GFX910-NEXT:   S_ENDPGM 0
485  ;
486  ; GFX11-LABEL: name: raw_buffer_load_i32_tfe
487  ; GFX11: bb.1 (%ir-block.0):
488  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
489  ; GFX11-NEXT: {{  $}}
490  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
491  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
492  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
493  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
494  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
495  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
496  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
497  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
498  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
499  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
500  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
501  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
502  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORD_TFE_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
503  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub0
504  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_OFFSET]].sub1
505  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
506  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
507  ; GFX11-NEXT:   S_ENDPGM 0
508  ;
509  ; GFX12-LABEL: name: raw_buffer_load_i32_tfe
510  ; GFX12: bb.1 (%ir-block.0):
511  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
512  ; GFX12-NEXT: {{  $}}
513  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
514  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
515  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
516  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
517  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
518  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
519  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
520  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
521  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
522  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
523  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
524  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
525  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET]].sub0
526  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORD_TFE_VBUFFER_OFFSET]].sub1
527  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (store (s32) into %ir.data_addr, addrspace 1)
528  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
529  ; GFX12-NEXT:   S_ENDPGM 0
530  %res = call { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
531  %data = extractvalue { i32, i32 } %res, 0
532  store i32 %data, ptr addrspace(1) %data_addr
533  %tfe = extractvalue { i32, i32 } %res, 1
534  store i32 %tfe, ptr addrspace(1) %tfe_addr
535  ret void
536}
537
538define amdgpu_ps void @raw_buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
539  ; GFX67-LABEL: name: raw_buffer_load_v2i32_tfe
540  ; GFX67: bb.1 (%ir-block.0):
541  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
542  ; GFX67-NEXT: {{  $}}
543  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
544  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
545  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
546  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
547  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
548  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
549  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
550  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
551  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
552  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
553  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
554  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
555  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
556  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
557  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
558  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
559  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
560  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
561  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
562  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
563  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
564  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
565  ; GFX67-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
566  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
567  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
568  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
569  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
570  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
571  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
572  ; GFX67-NEXT:   S_ENDPGM 0
573  ;
574  ; GFX8-LABEL: name: raw_buffer_load_v2i32_tfe
575  ; GFX8: bb.1 (%ir-block.0):
576  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
577  ; GFX8-NEXT: {{  $}}
578  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
579  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
580  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
581  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
582  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
583  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
584  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
585  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
586  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
587  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
588  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
589  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
590  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
591  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
592  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
593  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
594  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
595  ; GFX8-NEXT:   FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
596  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
597  ; GFX8-NEXT:   S_ENDPGM 0
598  ;
599  ; GFX910-LABEL: name: raw_buffer_load_v2i32_tfe
600  ; GFX910: bb.1 (%ir-block.0):
601  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
602  ; GFX910-NEXT: {{  $}}
603  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
604  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
605  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
606  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
607  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
608  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
609  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
610  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
611  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
612  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
613  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
614  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
615  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
616  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
617  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
618  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
619  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
620  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
621  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
622  ; GFX910-NEXT:   S_ENDPGM 0
623  ;
624  ; GFX11-LABEL: name: raw_buffer_load_v2i32_tfe
625  ; GFX11: bb.1 (%ir-block.0):
626  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
627  ; GFX11-NEXT: {{  $}}
628  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
629  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
630  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
631  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
632  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
633  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
634  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
635  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
636  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
637  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
638  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
639  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
640  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
641  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
642  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
643  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
644  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
645  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
646  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
647  ; GFX11-NEXT:   S_ENDPGM 0
648  ;
649  ; GFX12-LABEL: name: raw_buffer_load_v2i32_tfe
650  ; GFX12: bb.1 (%ir-block.0):
651  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
652  ; GFX12-NEXT: {{  $}}
653  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
654  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
655  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
656  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
657  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
658  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
659  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
660  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
661  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
662  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
663  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
664  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
665  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub0
666  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub1
667  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub2
668  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
669  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
670  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
671  ; GFX12-NEXT:   S_ENDPGM 0
672  %res = call { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
673  %data = extractvalue { <2 x i32>, i32 } %res, 0
674  store <2 x i32> %data, ptr addrspace(1) %data_addr
675  %tfe = extractvalue { <2 x i32>, i32 } %res, 1
676  store i32 %tfe, ptr addrspace(1) %tfe_addr
677  ret void
678}
679
680define amdgpu_ps void @raw_buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
681  ; GFX67-LABEL: name: raw_buffer_load_v2f32_tfe
682  ; GFX67: bb.1 (%ir-block.0):
683  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
684  ; GFX67-NEXT: {{  $}}
685  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
686  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
687  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
688  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
689  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
690  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
691  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
692  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
693  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
694  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
695  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
696  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
697  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
698  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
699  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
700  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
701  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
702  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
703  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
704  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
705  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
706  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
707  ; GFX67-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
708  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
709  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
710  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
711  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
712  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
713  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
714  ; GFX67-NEXT:   S_ENDPGM 0
715  ;
716  ; GFX8-LABEL: name: raw_buffer_load_v2f32_tfe
717  ; GFX8: bb.1 (%ir-block.0):
718  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
719  ; GFX8-NEXT: {{  $}}
720  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
721  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
722  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
723  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
724  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
725  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
726  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
727  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
728  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
729  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
730  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
731  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
732  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
733  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
734  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
735  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
736  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
737  ; GFX8-NEXT:   FLAT_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
738  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
739  ; GFX8-NEXT:   S_ENDPGM 0
740  ;
741  ; GFX910-LABEL: name: raw_buffer_load_v2f32_tfe
742  ; GFX910: bb.1 (%ir-block.0):
743  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
744  ; GFX910-NEXT: {{  $}}
745  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
746  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
747  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
748  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
749  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
750  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
751  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
752  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
753  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
754  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
755  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
756  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
757  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
758  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
759  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
760  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
761  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
762  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
763  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
764  ; GFX910-NEXT:   S_ENDPGM 0
765  ;
766  ; GFX11-LABEL: name: raw_buffer_load_v2f32_tfe
767  ; GFX11: bb.1 (%ir-block.0):
768  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
769  ; GFX11-NEXT: {{  $}}
770  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
771  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
772  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
773  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
774  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
775  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
776  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
777  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
778  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
779  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
780  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
781  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
782  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
783  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub0
784  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub1
785  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_OFFSET]].sub2
786  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
787  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
788  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
789  ; GFX11-NEXT:   S_ENDPGM 0
790  ;
791  ; GFX12-LABEL: name: raw_buffer_load_v2f32_tfe
792  ; GFX12: bb.1 (%ir-block.0):
793  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
794  ; GFX12-NEXT: {{  $}}
795  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
796  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
797  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
798  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
799  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
800  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
801  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
802  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
803  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
804  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
805  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
806  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8)
807  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub0
808  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub1
809  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_TFE_VBUFFER_OFFSET]].sub2
810  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
811  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, addrspace 1)
812  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
813  ; GFX12-NEXT:   S_ENDPGM 0
814  %res = call { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
815  %data = extractvalue { <2 x float>, i32 } %res, 0
816  store <2 x float> %data, ptr addrspace(1) %data_addr
817  %tfe = extractvalue { <2 x float>, i32 } %res, 1
818  store i32 %tfe, ptr addrspace(1) %tfe_addr
819  ret void
820}
821
822define amdgpu_ps void @raw_buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
823  ; GFX6-LABEL: name: raw_buffer_load_v3i32_tfe
824  ; GFX6: bb.1 (%ir-block.0):
825  ; GFX6-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
826  ; GFX6-NEXT: {{  $}}
827  ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
828  ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
829  ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
830  ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
831  ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
832  ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
833  ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
834  ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
835  ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
836  ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
837  ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
838  ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
839  ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
840  ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
841  ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
842  ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
843  ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
844  ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0_sub1
845  ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2_sub3
846  ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
847  ; GFX6-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
848  ; GFX6-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
849  ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
850  ; GFX6-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
851  ; GFX6-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[COPY12]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1)
852  ; GFX6-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
853  ; GFX6-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
854  ; GFX6-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
855  ; GFX6-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
856  ; GFX6-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
857  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1)
858  ; GFX6-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0
859  ; GFX6-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
860  ; GFX6-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1
861  ; GFX6-NEXT:   [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
862  ; GFX6-NEXT:   [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3
863  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
864  ; GFX6-NEXT:   S_ENDPGM 0
865  ;
866  ; GFX7-LABEL: name: raw_buffer_load_v3i32_tfe
867  ; GFX7: bb.1 (%ir-block.0):
868  ; GFX7-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
869  ; GFX7-NEXT: {{  $}}
870  ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
871  ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
872  ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
873  ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
874  ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
875  ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
876  ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
877  ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
878  ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
879  ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
880  ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
881  ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
882  ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
883  ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
884  ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
885  ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
886  ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
887  ; GFX7-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
888  ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
889  ; GFX7-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
890  ; GFX7-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
891  ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
892  ; GFX7-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
893  ; GFX7-NEXT:   BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
894  ; GFX7-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
895  ; GFX7-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
896  ; GFX7-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
897  ; GFX7-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
898  ; GFX7-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
899  ; GFX7-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
900  ; GFX7-NEXT:   S_ENDPGM 0
901  ;
902  ; GFX8-LABEL: name: raw_buffer_load_v3i32_tfe
903  ; GFX8: bb.1 (%ir-block.0):
904  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
905  ; GFX8-NEXT: {{  $}}
906  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
907  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
908  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
909  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
910  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
911  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
912  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
913  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
914  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
915  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
916  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
917  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
918  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
919  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
920  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
921  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
922  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
923  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
924  ; GFX8-NEXT:   FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
925  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
926  ; GFX8-NEXT:   S_ENDPGM 0
927  ;
928  ; GFX910-LABEL: name: raw_buffer_load_v3i32_tfe
929  ; GFX910: bb.1 (%ir-block.0):
930  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
931  ; GFX910-NEXT: {{  $}}
932  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
933  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
934  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
935  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
936  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
937  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
938  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
939  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
940  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
941  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
942  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
943  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
944  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
945  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
946  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
947  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
948  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
949  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
950  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
951  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
952  ; GFX910-NEXT:   S_ENDPGM 0
953  ;
954  ; GFX11-LABEL: name: raw_buffer_load_v3i32_tfe
955  ; GFX11: bb.1 (%ir-block.0):
956  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
957  ; GFX11-NEXT: {{  $}}
958  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
959  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
960  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
961  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
962  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
963  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
964  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
965  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
966  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
967  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
968  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
969  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
970  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
971  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
972  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
973  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
974  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
975  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
976  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
977  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
978  ; GFX11-NEXT:   S_ENDPGM 0
979  ;
980  ; GFX12-LABEL: name: raw_buffer_load_v3i32_tfe
981  ; GFX12: bb.1 (%ir-block.0):
982  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
983  ; GFX12-NEXT: {{  $}}
984  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
985  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
986  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
987  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
988  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
989  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
990  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
991  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
992  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
993  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
994  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
995  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
996  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub0
997  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub1
998  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub2
999  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub3
1000  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1001  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1002  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1003  ; GFX12-NEXT:   S_ENDPGM 0
1004  %res = call { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
1005  %data = extractvalue { <3 x i32>, i32 } %res, 0
1006  store <3 x i32> %data, ptr addrspace(1) %data_addr
1007  %tfe = extractvalue { <3 x i32>, i32 } %res, 1
1008  store i32 %tfe, ptr addrspace(1) %tfe_addr
1009  ret void
1010}
1011
1012define amdgpu_ps void @raw_buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1013  ; GFX6-LABEL: name: raw_buffer_load_v3f32_tfe
1014  ; GFX6: bb.1 (%ir-block.0):
1015  ; GFX6-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1016  ; GFX6-NEXT: {{  $}}
1017  ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1018  ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1019  ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1020  ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1021  ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1022  ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1023  ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1024  ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1025  ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1026  ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1027  ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1028  ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1029  ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1030  ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
1031  ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
1032  ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
1033  ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
1034  ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0_sub1
1035  ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2_sub3
1036  ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1037  ; GFX6-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1038  ; GFX6-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1039  ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1040  ; GFX6-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
1041  ; GFX6-NEXT:   BUFFER_STORE_DWORDX2_ADDR64 [[COPY12]], [[REG_SEQUENCE1]], [[REG_SEQUENCE4]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>) into %ir.data_addr, align 16, addrspace 1)
1042  ; GFX6-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1043  ; GFX6-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1044  ; GFX6-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1045  ; GFX6-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1046  ; GFX6-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE5]], %subreg.sub2_sub3
1047  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY10]], [[REG_SEQUENCE1]], [[REG_SEQUENCE6]], 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.data_addr + 8, align 8, basealign 16, addrspace 1)
1048  ; GFX6-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1049  ; GFX6-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1050  ; GFX6-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_5]], %subreg.sub0, [[S_MOV_B32_6]], %subreg.sub1
1051  ; GFX6-NEXT:   [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1052  ; GFX6-NEXT:   [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_2]], %subreg.sub0_sub1, [[REG_SEQUENCE7]], %subreg.sub2_sub3
1053  ; GFX6-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE8]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1054  ; GFX6-NEXT:   S_ENDPGM 0
1055  ;
1056  ; GFX7-LABEL: name: raw_buffer_load_v3f32_tfe
1057  ; GFX7: bb.1 (%ir-block.0):
1058  ; GFX7-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1059  ; GFX7-NEXT: {{  $}}
1060  ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1061  ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1062  ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1063  ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1064  ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1065  ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1066  ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1067  ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1068  ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1069  ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1070  ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1071  ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1072  ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1073  ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
1074  ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
1075  ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
1076  ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
1077  ; GFX7-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1078  ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1079  ; GFX7-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1080  ; GFX7-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1081  ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1082  ; GFX7-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1083  ; GFX7-NEXT:   BUFFER_STORE_DWORDX3_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1084  ; GFX7-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1085  ; GFX7-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1086  ; GFX7-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1087  ; GFX7-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1088  ; GFX7-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1089  ; GFX7-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY11]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1090  ; GFX7-NEXT:   S_ENDPGM 0
1091  ;
1092  ; GFX8-LABEL: name: raw_buffer_load_v3f32_tfe
1093  ; GFX8: bb.1 (%ir-block.0):
1094  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1095  ; GFX8-NEXT: {{  $}}
1096  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1097  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1098  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1099  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1100  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1101  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1102  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1103  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1104  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1105  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1106  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1107  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1108  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1109  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
1110  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
1111  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
1112  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
1113  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1114  ; GFX8-NEXT:   FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1115  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1116  ; GFX8-NEXT:   S_ENDPGM 0
1117  ;
1118  ; GFX910-LABEL: name: raw_buffer_load_v3f32_tfe
1119  ; GFX910: bb.1 (%ir-block.0):
1120  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1121  ; GFX910-NEXT: {{  $}}
1122  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1123  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1124  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1125  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1126  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1127  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1128  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1129  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1130  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1131  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1132  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1133  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1134  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1135  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
1136  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
1137  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
1138  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
1139  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1140  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1141  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1142  ; GFX910-NEXT:   S_ENDPGM 0
1143  ;
1144  ; GFX11-LABEL: name: raw_buffer_load_v3f32_tfe
1145  ; GFX11: bb.1 (%ir-block.0):
1146  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1147  ; GFX11-NEXT: {{  $}}
1148  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1149  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1150  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1151  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1152  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1153  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1154  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1155  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1156  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1157  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1158  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1159  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1160  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1161  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub0
1162  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub1
1163  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub2
1164  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_OFFSET]].sub3
1165  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1166  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1167  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1168  ; GFX11-NEXT:   S_ENDPGM 0
1169  ;
1170  ; GFX12-LABEL: name: raw_buffer_load_v3f32_tfe
1171  ; GFX12: bb.1 (%ir-block.0):
1172  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1173  ; GFX12-NEXT: {{  $}}
1174  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1175  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1176  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1177  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1178  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1179  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1180  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1181  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1182  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1183  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1184  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1185  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8)
1186  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub0
1187  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub1
1188  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub2
1189  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_TFE_VBUFFER_OFFSET]].sub3
1190  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2
1191  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<3 x s32>) into %ir.data_addr, align 16, addrspace 1)
1192  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY11]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1193  ; GFX12-NEXT:   S_ENDPGM 0
1194  %res = call { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
1195  %data = extractvalue { <3 x float>, i32 } %res, 0
1196  store <3 x float> %data, ptr addrspace(1) %data_addr
1197  %tfe = extractvalue { <3 x float>, i32 } %res, 1
1198  store i32 %tfe, ptr addrspace(1) %tfe_addr
1199  ret void
1200}
1201
1202define amdgpu_ps void @raw_buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1203  ; GFX67-LABEL: name: raw_buffer_load_v4i32_tfe
1204  ; GFX67: bb.1 (%ir-block.0):
1205  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1206  ; GFX67-NEXT: {{  $}}
1207  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1208  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1209  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1210  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1211  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1212  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1213  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1214  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1215  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1216  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1217  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1218  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1219  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1220  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1221  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1222  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1223  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1224  ; GFX67-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1225  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1226  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1227  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1228  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1229  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1230  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1231  ; GFX67-NEXT:   BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1232  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1233  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1234  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1235  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1236  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1237  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1238  ; GFX67-NEXT:   S_ENDPGM 0
1239  ;
1240  ; GFX8-LABEL: name: raw_buffer_load_v4i32_tfe
1241  ; GFX8: bb.1 (%ir-block.0):
1242  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1243  ; GFX8-NEXT: {{  $}}
1244  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1245  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1246  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1247  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1248  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1249  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1250  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1251  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1252  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1253  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1254  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1255  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1256  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1257  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1258  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1259  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1260  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1261  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1262  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1263  ; GFX8-NEXT:   FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1264  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1265  ; GFX8-NEXT:   S_ENDPGM 0
1266  ;
1267  ; GFX910-LABEL: name: raw_buffer_load_v4i32_tfe
1268  ; GFX910: bb.1 (%ir-block.0):
1269  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1270  ; GFX910-NEXT: {{  $}}
1271  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1272  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1273  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1274  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1275  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1276  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1277  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1278  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1279  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1280  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1281  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1282  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1283  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1284  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1285  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1286  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1287  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1288  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1289  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1290  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1291  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1292  ; GFX910-NEXT:   S_ENDPGM 0
1293  ;
1294  ; GFX11-LABEL: name: raw_buffer_load_v4i32_tfe
1295  ; GFX11: bb.1 (%ir-block.0):
1296  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1297  ; GFX11-NEXT: {{  $}}
1298  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1299  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1300  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1301  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1302  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1303  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1304  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1305  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1306  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1307  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1308  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1309  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1310  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1311  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1312  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1313  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1314  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1315  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1316  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1317  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1318  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1319  ; GFX11-NEXT:   S_ENDPGM 0
1320  ;
1321  ; GFX12-LABEL: name: raw_buffer_load_v4i32_tfe
1322  ; GFX12: bb.1 (%ir-block.0):
1323  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1324  ; GFX12-NEXT: {{  $}}
1325  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1326  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1327  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1328  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1329  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1330  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1331  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1332  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1333  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1334  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1335  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1336  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1337  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub0
1338  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub1
1339  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub2
1340  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub3
1341  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub4
1342  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1343  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1344  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1345  ; GFX12-NEXT:   S_ENDPGM 0
1346  %res = call { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
1347  %data = extractvalue { <4 x i32>, i32 } %res, 0
1348  store <4 x i32> %data, ptr addrspace(1) %data_addr
1349  %tfe = extractvalue { <4 x i32>, i32 } %res, 1
1350  store i32 %tfe, ptr addrspace(1) %tfe_addr
1351  ret void
1352}
1353
1354define amdgpu_ps void @raw_buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
1355  ; GFX67-LABEL: name: raw_buffer_load_v4f32_tfe
1356  ; GFX67: bb.1 (%ir-block.0):
1357  ; GFX67-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1358  ; GFX67-NEXT: {{  $}}
1359  ; GFX67-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1360  ; GFX67-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1361  ; GFX67-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1362  ; GFX67-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1363  ; GFX67-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1364  ; GFX67-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1365  ; GFX67-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1366  ; GFX67-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1367  ; GFX67-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1368  ; GFX67-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1369  ; GFX67-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1370  ; GFX67-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1371  ; GFX67-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1372  ; GFX67-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1373  ; GFX67-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1374  ; GFX67-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1375  ; GFX67-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1376  ; GFX67-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1377  ; GFX67-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1378  ; GFX67-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1379  ; GFX67-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1380  ; GFX67-NEXT:   [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_2]], %subreg.sub1
1381  ; GFX67-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1382  ; GFX67-NEXT:   [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE4]], %subreg.sub2_sub3
1383  ; GFX67-NEXT:   BUFFER_STORE_DWORDX4_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]], [[REG_SEQUENCE5]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1384  ; GFX67-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1385  ; GFX67-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1386  ; GFX67-NEXT:   [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_3]], %subreg.sub0, [[S_MOV_B32_4]], %subreg.sub1
1387  ; GFX67-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1388  ; GFX67-NEXT:   [[REG_SEQUENCE7:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_1]], %subreg.sub0_sub1, [[REG_SEQUENCE6]], %subreg.sub2_sub3
1389  ; GFX67-NEXT:   BUFFER_STORE_DWORD_ADDR64 [[COPY12]], [[REG_SEQUENCE2]], [[REG_SEQUENCE7]], 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1390  ; GFX67-NEXT:   S_ENDPGM 0
1391  ;
1392  ; GFX8-LABEL: name: raw_buffer_load_v4f32_tfe
1393  ; GFX8: bb.1 (%ir-block.0):
1394  ; GFX8-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1395  ; GFX8-NEXT: {{  $}}
1396  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1397  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1398  ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1399  ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1400  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1401  ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1402  ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1403  ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1404  ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1405  ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1406  ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1407  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1408  ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1409  ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1410  ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1411  ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1412  ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1413  ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1414  ; GFX8-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1415  ; GFX8-NEXT:   FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1416  ; GFX8-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tfe_addr, addrspace 1)
1417  ; GFX8-NEXT:   S_ENDPGM 0
1418  ;
1419  ; GFX910-LABEL: name: raw_buffer_load_v4f32_tfe
1420  ; GFX910: bb.1 (%ir-block.0):
1421  ; GFX910-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1422  ; GFX910-NEXT: {{  $}}
1423  ; GFX910-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1424  ; GFX910-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1425  ; GFX910-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1426  ; GFX910-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1427  ; GFX910-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1428  ; GFX910-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1429  ; GFX910-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1430  ; GFX910-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1431  ; GFX910-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1432  ; GFX910-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1433  ; GFX910-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1434  ; GFX910-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1435  ; GFX910-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1436  ; GFX910-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1437  ; GFX910-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1438  ; GFX910-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1439  ; GFX910-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1440  ; GFX910-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1441  ; GFX910-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1442  ; GFX910-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1443  ; GFX910-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1444  ; GFX910-NEXT:   S_ENDPGM 0
1445  ;
1446  ; GFX11-LABEL: name: raw_buffer_load_v4f32_tfe
1447  ; GFX11: bb.1 (%ir-block.0):
1448  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1449  ; GFX11-NEXT: {{  $}}
1450  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1451  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1452  ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1453  ; GFX11-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1454  ; GFX11-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1455  ; GFX11-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1456  ; GFX11-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1457  ; GFX11-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1458  ; GFX11-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1459  ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1460  ; GFX11-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1461  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1462  ; GFX11-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1463  ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub0
1464  ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub1
1465  ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub2
1466  ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub3
1467  ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_OFFSET]].sub4
1468  ; GFX11-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1469  ; GFX11-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1470  ; GFX11-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1471  ; GFX11-NEXT:   S_ENDPGM 0
1472  ;
1473  ; GFX12-LABEL: name: raw_buffer_load_v4f32_tfe
1474  ; GFX12: bb.1 (%ir-block.0):
1475  ; GFX12-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
1476  ; GFX12-NEXT: {{  $}}
1477  ; GFX12-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1478  ; GFX12-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
1479  ; GFX12-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
1480  ; GFX12-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
1481  ; GFX12-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1482  ; GFX12-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1483  ; GFX12-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1484  ; GFX12-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
1485  ; GFX12-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1486  ; GFX12-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1487  ; GFX12-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
1488  ; GFX12-NEXT:   [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET:%[0-9]+]]:vreg_160 = BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
1489  ; GFX12-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub0
1490  ; GFX12-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub1
1491  ; GFX12-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub2
1492  ; GFX12-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub3
1493  ; GFX12-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_TFE_VBUFFER_OFFSET]].sub4
1494  ; GFX12-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY11]], %subreg.sub3
1495  ; GFX12-NEXT:   GLOBAL_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (<4 x s32>) into %ir.data_addr, addrspace 1)
1496  ; GFX12-NEXT:   GLOBAL_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec :: (store (s32) into %ir.tfe_addr, addrspace 1)
1497  ; GFX12-NEXT:   S_ENDPGM 0
1498  %res = call { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
1499  %data = extractvalue { <4 x float>, i32 } %res, 0
1500  store <4 x float> %data, ptr addrspace(1) %data_addr
1501  %tfe = extractvalue { <4 x float>, i32 } %res, 1
1502  store i32 %tfe, ptr addrspace(1) %tfe_addr
1503  ret void
1504}
1505
1506declare { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(<4 x i32>, i32, i32, i32)
1507declare { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(<4 x i32>, i32, i32, i32)
1508declare { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(<4 x i32>, i32, i32, i32)
1509declare { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(<4 x i32>, i32, i32, i32)
1510declare { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(<4 x i32>, i32, i32, i32)
1511declare { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(<4 x i32>, i32, i32, i32)
1512declare { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(<4 x i32>, i32, i32, i32)
1513declare { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(<4 x i32>, i32, i32, i32)
1514declare { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(<4 x i32>, i32, i32, i32)
1515declare { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(<4 x i32>, i32, i32, i32)
1516