xref: /llvm-project/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
9; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
10
11; Test splitting flat instruction offsets into the low and high bits
12; when the offset doesn't fit in the offset field.
13
14define i8 @flat_inst_valu_offset_1(ptr %p) {
15; GFX9-LABEL: flat_inst_valu_offset_1:
16; GFX9:       ; %bb.0:
17; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1
19; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
20; GFX9-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX10-LABEL: flat_inst_valu_offset_1:
23; GFX10:       ; %bb.0:
24; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
26; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
27; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
28; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
29; GFX10-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX11-LABEL: flat_inst_valu_offset_1:
32; GFX11:       ; %bb.0:
33; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:1
35; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
36; GFX11-NEXT:    s_setpc_b64 s[30:31]
37;
38; GFX12-LABEL: flat_inst_valu_offset_1:
39; GFX12:       ; %bb.0:
40; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
41; GFX12-NEXT:    s_wait_expcnt 0x0
42; GFX12-NEXT:    s_wait_samplecnt 0x0
43; GFX12-NEXT:    s_wait_bvhcnt 0x0
44; GFX12-NEXT:    s_wait_kmcnt 0x0
45; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:1
46; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
47; GFX12-NEXT:    s_setpc_b64 s[30:31]
48  %gep = getelementptr i8, ptr %p, i64 1
49  %load = load i8, ptr %gep, align 4
50  ret i8 %load
51}
52
53define i8 @flat_inst_valu_offset_11bit_max(ptr %p) {
54; GFX9-LABEL: flat_inst_valu_offset_11bit_max:
55; GFX9:       ; %bb.0:
56; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
58; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
59; GFX9-NEXT:    s_setpc_b64 s[30:31]
60;
61; GFX10-LABEL: flat_inst_valu_offset_11bit_max:
62; GFX10:       ; %bb.0:
63; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
65; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
66; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
67; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX11-LABEL: flat_inst_valu_offset_11bit_max:
71; GFX11:       ; %bb.0:
72; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
74; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
75; GFX11-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX12-LABEL: flat_inst_valu_offset_11bit_max:
78; GFX12:       ; %bb.0:
79; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
80; GFX12-NEXT:    s_wait_expcnt 0x0
81; GFX12-NEXT:    s_wait_samplecnt 0x0
82; GFX12-NEXT:    s_wait_bvhcnt 0x0
83; GFX12-NEXT:    s_wait_kmcnt 0x0
84; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
85; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
86; GFX12-NEXT:    s_setpc_b64 s[30:31]
87  %gep = getelementptr i8, ptr %p, i64 2047
88  %load = load i8, ptr %gep, align 4
89  ret i8 %load
90}
91
92define i8 @flat_inst_valu_offset_12bit_max(ptr %p) {
93; GFX9-LABEL: flat_inst_valu_offset_12bit_max:
94; GFX9:       ; %bb.0:
95; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
97; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
98; GFX9-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX10-LABEL: flat_inst_valu_offset_12bit_max:
101; GFX10:       ; %bb.0:
102; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
104; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
105; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
106; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
107; GFX10-NEXT:    s_setpc_b64 s[30:31]
108;
109; GFX11-LABEL: flat_inst_valu_offset_12bit_max:
110; GFX11:       ; %bb.0:
111; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
113; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
114; GFX11-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX12-LABEL: flat_inst_valu_offset_12bit_max:
117; GFX12:       ; %bb.0:
118; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
119; GFX12-NEXT:    s_wait_expcnt 0x0
120; GFX12-NEXT:    s_wait_samplecnt 0x0
121; GFX12-NEXT:    s_wait_bvhcnt 0x0
122; GFX12-NEXT:    s_wait_kmcnt 0x0
123; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
124; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
125; GFX12-NEXT:    s_setpc_b64 s[30:31]
126  %gep = getelementptr i8, ptr %p, i64 4095
127  %load = load i8, ptr %gep, align 4
128  ret i8 %load
129}
130
131define i8 @flat_inst_valu_offset_13bit_max(ptr %p) {
132; GFX9-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
133; GFX9-SDAG:       ; %bb.0:
134; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
136; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
137; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
138; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
139; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
140;
141; GFX10-LABEL: flat_inst_valu_offset_13bit_max:
142; GFX10:       ; %bb.0:
143; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
145; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
146; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
147; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
148; GFX10-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
151; GFX11-SDAG:       ; %bb.0:
152; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
154; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
155; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
156; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
157; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
158;
159; GFX12-LABEL: flat_inst_valu_offset_13bit_max:
160; GFX12:       ; %bb.0:
161; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
162; GFX12-NEXT:    s_wait_expcnt 0x0
163; GFX12-NEXT:    s_wait_samplecnt 0x0
164; GFX12-NEXT:    s_wait_bvhcnt 0x0
165; GFX12-NEXT:    s_wait_kmcnt 0x0
166; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:8191
167; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
168; GFX12-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX9-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
171; GFX9-GISEL:       ; %bb.0:
172; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
174; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
175; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
176; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
177; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
180; GFX11-GISEL:       ; %bb.0:
181; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
183; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
184; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
185; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
186; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
187  %gep = getelementptr i8, ptr %p, i64 8191
188  %load = load i8, ptr %gep, align 4
189  ret i8 %load
190}
191
192define i8 @flat_inst_valu_offset_24bit_max(ptr %p) {
193; GFX9-SDAG-LABEL: flat_inst_valu_offset_24bit_max:
194; GFX9-SDAG:       ; %bb.0:
195; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
197; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
198; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
199; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
200; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
201;
202; GFX10-LABEL: flat_inst_valu_offset_24bit_max:
203; GFX10:       ; %bb.0:
204; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
206; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
207; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
208; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
209; GFX10-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX11-SDAG-LABEL: flat_inst_valu_offset_24bit_max:
212; GFX11-SDAG:       ; %bb.0:
213; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
215; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
216; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
217; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
218; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX12-LABEL: flat_inst_valu_offset_24bit_max:
221; GFX12:       ; %bb.0:
222; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
223; GFX12-NEXT:    s_wait_expcnt 0x0
224; GFX12-NEXT:    s_wait_samplecnt 0x0
225; GFX12-NEXT:    s_wait_bvhcnt 0x0
226; GFX12-NEXT:    s_wait_kmcnt 0x0
227; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:8388607
228; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
229; GFX12-NEXT:    s_setpc_b64 s[30:31]
230;
231; GFX9-GISEL-LABEL: flat_inst_valu_offset_24bit_max:
232; GFX9-GISEL:       ; %bb.0:
233; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
235; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
236; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
237; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
238; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
239;
240; GFX11-GISEL-LABEL: flat_inst_valu_offset_24bit_max:
241; GFX11-GISEL:       ; %bb.0:
242; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
244; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
245; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
246; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
247; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
248  %gep = getelementptr i8, ptr %p, i64 8388607
249  %load = load i8, ptr %gep, align 4
250  ret i8 %load
251}
252
253define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) {
254; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
255; GFX9:       ; %bb.0:
256; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
258; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
259; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
260; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
261; GFX9-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max:
264; GFX10:       ; %bb.0:
265; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
267; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
268; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
269; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
270; GFX10-NEXT:    s_setpc_b64 s[30:31]
271;
272; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max:
273; GFX11:       ; %bb.0:
274; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
276; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
277; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
278; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
279; GFX11-NEXT:    s_setpc_b64 s[30:31]
280;
281; GFX12-LABEL: flat_inst_valu_offset_neg_11bit_max:
282; GFX12:       ; %bb.0:
283; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
284; GFX12-NEXT:    s_wait_expcnt 0x0
285; GFX12-NEXT:    s_wait_samplecnt 0x0
286; GFX12-NEXT:    s_wait_bvhcnt 0x0
287; GFX12-NEXT:    s_wait_kmcnt 0x0
288; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-2048
289; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
290; GFX12-NEXT:    s_setpc_b64 s[30:31]
291  %gep = getelementptr i8, ptr %p, i64 -2048
292  %load = load i8, ptr %gep, align 4
293  ret i8 %load
294}
295
296define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) {
297; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max:
298; GFX9:       ; %bb.0:
299; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
301; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
302; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
303; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
304; GFX9-NEXT:    s_setpc_b64 s[30:31]
305;
306; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max:
307; GFX10:       ; %bb.0:
308; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
310; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
311; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
312; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
313; GFX10-NEXT:    s_setpc_b64 s[30:31]
314;
315; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max:
316; GFX11:       ; %bb.0:
317; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
319; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
320; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
321; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
322; GFX11-NEXT:    s_setpc_b64 s[30:31]
323;
324; GFX12-LABEL: flat_inst_valu_offset_neg_12bit_max:
325; GFX12:       ; %bb.0:
326; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
327; GFX12-NEXT:    s_wait_expcnt 0x0
328; GFX12-NEXT:    s_wait_samplecnt 0x0
329; GFX12-NEXT:    s_wait_bvhcnt 0x0
330; GFX12-NEXT:    s_wait_kmcnt 0x0
331; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-4096
332; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
333; GFX12-NEXT:    s_setpc_b64 s[30:31]
334  %gep = getelementptr i8, ptr %p, i64 -4096
335  %load = load i8, ptr %gep, align 4
336  ret i8 %load
337}
338
339define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) {
340; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max:
341; GFX9:       ; %bb.0:
342; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
344; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
345; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
346; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
347; GFX9-NEXT:    s_setpc_b64 s[30:31]
348;
349; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max:
350; GFX10:       ; %bb.0:
351; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
353; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
354; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
355; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
356; GFX10-NEXT:    s_setpc_b64 s[30:31]
357;
358; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max:
359; GFX11:       ; %bb.0:
360; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
362; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
363; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
364; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
365; GFX11-NEXT:    s_setpc_b64 s[30:31]
366;
367; GFX12-LABEL: flat_inst_valu_offset_neg_13bit_max:
368; GFX12:       ; %bb.0:
369; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
370; GFX12-NEXT:    s_wait_expcnt 0x0
371; GFX12-NEXT:    s_wait_samplecnt 0x0
372; GFX12-NEXT:    s_wait_bvhcnt 0x0
373; GFX12-NEXT:    s_wait_kmcnt 0x0
374; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-8192
375; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
376; GFX12-NEXT:    s_setpc_b64 s[30:31]
377  %gep = getelementptr i8, ptr %p, i64 -8192
378  %load = load i8, ptr %gep, align 4
379  ret i8 %load
380}
381
382define i8 @flat_inst_valu_offset_neg_24bit_max(ptr %p) {
383; GFX9-LABEL: flat_inst_valu_offset_neg_24bit_max:
384; GFX9:       ; %bb.0:
385; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
387; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
388; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
389; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
390; GFX9-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX10-LABEL: flat_inst_valu_offset_neg_24bit_max:
393; GFX10:       ; %bb.0:
394; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
396; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
397; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
398; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
399; GFX10-NEXT:    s_setpc_b64 s[30:31]
400;
401; GFX11-LABEL: flat_inst_valu_offset_neg_24bit_max:
402; GFX11:       ; %bb.0:
403; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
405; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
406; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
407; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
408; GFX11-NEXT:    s_setpc_b64 s[30:31]
409;
410; GFX12-LABEL: flat_inst_valu_offset_neg_24bit_max:
411; GFX12:       ; %bb.0:
412; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
413; GFX12-NEXT:    s_wait_expcnt 0x0
414; GFX12-NEXT:    s_wait_samplecnt 0x0
415; GFX12-NEXT:    s_wait_bvhcnt 0x0
416; GFX12-NEXT:    s_wait_kmcnt 0x0
417; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-8388608
418; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
419; GFX12-NEXT:    s_setpc_b64 s[30:31]
420  %gep = getelementptr i8, ptr %p, i64 -8388608
421  %load = load i8, ptr %gep, align 4
422  ret i8 %load
423}
424
425
426define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) {
427; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max:
428; GFX9:       ; %bb.0:
429; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
431; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
432; GFX9-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max:
435; GFX10:       ; %bb.0:
436; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
438; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
439; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
440; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
441; GFX10-NEXT:    s_setpc_b64 s[30:31]
442;
443; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max:
444; GFX11:       ; %bb.0:
445; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
447; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
448; GFX11-NEXT:    s_setpc_b64 s[30:31]
449;
450; GFX12-LABEL: flat_inst_valu_offset_2x_11bit_max:
451; GFX12:       ; %bb.0:
452; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
453; GFX12-NEXT:    s_wait_expcnt 0x0
454; GFX12-NEXT:    s_wait_samplecnt 0x0
455; GFX12-NEXT:    s_wait_bvhcnt 0x0
456; GFX12-NEXT:    s_wait_kmcnt 0x0
457; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
458; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
459; GFX12-NEXT:    s_setpc_b64 s[30:31]
460  %gep = getelementptr i8, ptr %p, i64 4095
461  %load = load i8, ptr %gep, align 4
462  ret i8 %load
463}
464
465define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) {
466; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
467; GFX9-SDAG:       ; %bb.0:
468; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
470; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
471; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
472; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
473; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
474;
475; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max:
476; GFX10:       ; %bb.0:
477; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
479; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
480; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
481; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
482; GFX10-NEXT:    s_setpc_b64 s[30:31]
483;
484; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
485; GFX11-SDAG:       ; %bb.0:
486; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
488; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
489; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
490; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
491; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX12-LABEL: flat_inst_valu_offset_2x_12bit_max:
494; GFX12:       ; %bb.0:
495; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
496; GFX12-NEXT:    s_wait_expcnt 0x0
497; GFX12-NEXT:    s_wait_samplecnt 0x0
498; GFX12-NEXT:    s_wait_bvhcnt 0x0
499; GFX12-NEXT:    s_wait_kmcnt 0x0
500; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:8191
501; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
502; GFX12-NEXT:    s_setpc_b64 s[30:31]
503;
504; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
505; GFX9-GISEL:       ; %bb.0:
506; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
508; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
509; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
510; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
511; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
514; GFX11-GISEL:       ; %bb.0:
515; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
517; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
518; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
519; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
520; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
521  %gep = getelementptr i8, ptr %p, i64 8191
522  %load = load i8, ptr %gep, align 4
523  ret i8 %load
524}
525
526define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) {
527; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
528; GFX9-SDAG:       ; %bb.0:
529; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
531; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
532; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
533; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
534; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
535;
536; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max:
537; GFX10:       ; %bb.0:
538; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3fff, v0
540; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
541; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
542; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
543; GFX10-NEXT:    s_setpc_b64 s[30:31]
544;
545; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
546; GFX11-SDAG:       ; %bb.0:
547; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3000, v0
549; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
550; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
551; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
552; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
553;
554; GFX12-LABEL: flat_inst_valu_offset_2x_13bit_max:
555; GFX12:       ; %bb.0:
556; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
557; GFX12-NEXT:    s_wait_expcnt 0x0
558; GFX12-NEXT:    s_wait_samplecnt 0x0
559; GFX12-NEXT:    s_wait_bvhcnt 0x0
560; GFX12-NEXT:    s_wait_kmcnt 0x0
561; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:16383
562; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
563; GFX12-NEXT:    s_setpc_b64 s[30:31]
564;
565; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
566; GFX9-GISEL:       ; %bb.0:
567; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3fff, v0
569; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
570; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
571; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
572; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
573;
574; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
575; GFX11-GISEL:       ; %bb.0:
576; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3fff, v0
578; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
579; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
580; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
581; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
582  %gep = getelementptr i8, ptr %p, i64 16383
583  %load = load i8, ptr %gep, align 4
584  ret i8 %load
585}
586
587define i8 @flat_inst_valu_offset_2x_24bit_max(ptr %p) {
588; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max:
589; GFX9-SDAG:       ; %bb.0:
590; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff000, v0
592; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
593; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4094
594; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
595; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX10-LABEL: flat_inst_valu_offset_2x_24bit_max:
598; GFX10:       ; %bb.0:
599; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
601; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
602; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
603; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
604; GFX10-NEXT:    s_setpc_b64 s[30:31]
605;
606; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max:
607; GFX11-SDAG:       ; %bb.0:
608; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff000, v0
610; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
611; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4094
612; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
613; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
614;
615; GFX12-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max:
616; GFX12-SDAG:       ; %bb.0:
617; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
618; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
619; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
620; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
621; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
622; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
623; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
624; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:8388606
625; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
626; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
627;
628; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max:
629; GFX9-GISEL:       ; %bb.0:
630; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffe, v0
632; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
633; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
634; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
635; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
636;
637; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max:
638; GFX11-GISEL:       ; %bb.0:
639; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
641; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
642; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
643; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
644; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
645;
646; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max:
647; GFX12-GISEL:       ; %bb.0:
648; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
649; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
650; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
651; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
652; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
653; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
654; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
655; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
656; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
657; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
658  %gep = getelementptr i8, ptr %p, i64 16777214
659  %load = load i8, ptr %gep, align 4
660  ret i8 %load
661}
662
663define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) {
664; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
665; GFX9:       ; %bb.0:
666; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
668; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
669; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
670; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
671; GFX9-NEXT:    s_setpc_b64 s[30:31]
672;
673; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
674; GFX10:       ; %bb.0:
675; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
677; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
678; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
679; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
680; GFX10-NEXT:    s_setpc_b64 s[30:31]
681;
682; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
683; GFX11:       ; %bb.0:
684; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
686; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
687; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
688; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
689; GFX11-NEXT:    s_setpc_b64 s[30:31]
690;
691; GFX12-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
692; GFX12:       ; %bb.0:
693; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
694; GFX12-NEXT:    s_wait_expcnt 0x0
695; GFX12-NEXT:    s_wait_samplecnt 0x0
696; GFX12-NEXT:    s_wait_bvhcnt 0x0
697; GFX12-NEXT:    s_wait_kmcnt 0x0
698; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-4096
699; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
700; GFX12-NEXT:    s_setpc_b64 s[30:31]
701  %gep = getelementptr i8, ptr %p, i64 -4096
702  %load = load i8, ptr %gep, align 4
703  ret i8 %load
704}
705
706define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) {
707; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
708; GFX9:       ; %bb.0:
709; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
711; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
712; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
713; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
714; GFX9-NEXT:    s_setpc_b64 s[30:31]
715;
716; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
717; GFX10:       ; %bb.0:
718; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
719; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
720; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
721; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
722; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
723; GFX10-NEXT:    s_setpc_b64 s[30:31]
724;
725; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
726; GFX11:       ; %bb.0:
727; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
729; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
730; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
731; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
732; GFX11-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX12-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
735; GFX12:       ; %bb.0:
736; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
737; GFX12-NEXT:    s_wait_expcnt 0x0
738; GFX12-NEXT:    s_wait_samplecnt 0x0
739; GFX12-NEXT:    s_wait_bvhcnt 0x0
740; GFX12-NEXT:    s_wait_kmcnt 0x0
741; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-8192
742; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
743; GFX12-NEXT:    s_setpc_b64 s[30:31]
744  %gep = getelementptr i8, ptr %p, i64 -8192
745  %load = load i8, ptr %gep, align 4
746  ret i8 %load
747}
748
749define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) {
750; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
751; GFX9:       ; %bb.0:
752; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
754; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
755; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
756; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
757; GFX9-NEXT:    s_setpc_b64 s[30:31]
758;
759; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
760; GFX10:       ; %bb.0:
761; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
763; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
764; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
765; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
766; GFX10-NEXT:    s_setpc_b64 s[30:31]
767;
768; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
769; GFX11:       ; %bb.0:
770; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
772; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
773; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
774; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
775; GFX11-NEXT:    s_setpc_b64 s[30:31]
776;
777; GFX12-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
778; GFX12:       ; %bb.0:
779; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
780; GFX12-NEXT:    s_wait_expcnt 0x0
781; GFX12-NEXT:    s_wait_samplecnt 0x0
782; GFX12-NEXT:    s_wait_bvhcnt 0x0
783; GFX12-NEXT:    s_wait_kmcnt 0x0
784; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-16384
785; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
786; GFX12-NEXT:    s_setpc_b64 s[30:31]
787  %gep = getelementptr i8, ptr %p, i64 -16384
788  %load = load i8, ptr %gep, align 4
789  ret i8 %load
790}
791
792define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) {
793; GFX9-LABEL: flat_inst_valu_offset_2x_neg_24bit_max:
794; GFX9:       ; %bb.0:
795; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff000001, v0
797; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
798; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
799; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
800; GFX9-NEXT:    s_setpc_b64 s[30:31]
801;
802; GFX10-LABEL: flat_inst_valu_offset_2x_neg_24bit_max:
803; GFX10:       ; %bb.0:
804; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
806; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
807; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
808; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
809; GFX10-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX11-LABEL: flat_inst_valu_offset_2x_neg_24bit_max:
812; GFX11:       ; %bb.0:
813; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
815; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
816; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
817; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
818; GFX11-NEXT:    s_setpc_b64 s[30:31]
819;
820; GFX12-SDAG-LABEL: flat_inst_valu_offset_2x_neg_24bit_max:
821; GFX12-SDAG:       ; %bb.0:
822; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
823; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
824; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
825; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
826; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
827; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
828; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
829; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8388607
830; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
831; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
832;
833; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_24bit_max:
834; GFX12-GISEL:       ; %bb.0:
835; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
836; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
837; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
838; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
839; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
840; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
841; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
842; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
843; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
844; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
845  %gep = getelementptr i8, ptr %p, i64 -16777215
846  %load = load i8, ptr %gep, align 4
847  ret i8 %load
848}
849
850; Fill 11-bit low-bits (1ull << 33) | 2047
851define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
852; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
853; GFX9-SDAG:       ; %bb.0:
854; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
856; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
857; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
858; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
859; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
860;
861; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
862; GFX10:       ; %bb.0:
863; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
865; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
866; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
867; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
868; GFX10-NEXT:    s_setpc_b64 s[30:31]
869;
870; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
871; GFX11-SDAG:       ; %bb.0:
872; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
874; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
875; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
876; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
877; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
878;
879; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
880; GFX12-SDAG:       ; %bb.0:
881; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
882; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
883; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
884; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
885; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
886; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
887; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
888; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
889; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
890; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
891;
892; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
893; GFX9-GISEL:       ; %bb.0:
894; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
895; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
896; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
897; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
898; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
899; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
900;
901; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
902; GFX11-GISEL:       ; %bb.0:
903; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
905; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
906; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
907; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
908; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
909;
910; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
911; GFX12-GISEL:       ; %bb.0:
912; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
913; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
914; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
915; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
916; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
917; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
918; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
919; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
920; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
921; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
922  %gep = getelementptr i8, ptr %p, i64 8589936639
923  %load = load i8, ptr %gep, align 4
924  ret i8 %load
925}
926
927; Fill 11-bit low-bits (1ull << 33) | 2048
928define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
929; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
930; GFX9-SDAG:       ; %bb.0:
931; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
932; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
933; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
934; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
935; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
936; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
937;
938; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
939; GFX10:       ; %bb.0:
940; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
942; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
943; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
944; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
945; GFX10-NEXT:    s_setpc_b64 s[30:31]
946;
947; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
948; GFX11-SDAG:       ; %bb.0:
949; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
951; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
952; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2048
953; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
954; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
955;
956; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
957; GFX12-SDAG:       ; %bb.0:
958; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
959; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
960; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
961; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
962; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
963; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
964; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
965; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2048
966; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
967; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
968;
969; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
970; GFX9-GISEL:       ; %bb.0:
971; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
973; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
974; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
975; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
976; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
977;
978; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
979; GFX11-GISEL:       ; %bb.0:
980; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
982; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
983; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
984; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
985; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
986;
987; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
988; GFX12-GISEL:       ; %bb.0:
989; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
990; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
991; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
992; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
993; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
994; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
995; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
996; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
997; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
998; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
999  %gep = getelementptr i8, ptr %p, i64 8589936640
1000  %load = load i8, ptr %gep, align 4
1001  ret i8 %load
1002}
1003
1004; Fill 12-bit low-bits (1ull << 33) | 4095
1005define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
1006; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1007; GFX9-SDAG:       ; %bb.0:
1008; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
1010; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1011; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
1012; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1013; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1014;
1015; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1016; GFX10:       ; %bb.0:
1017; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1019; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1020; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1021; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1022; GFX10-NEXT:    s_setpc_b64 s[30:31]
1023;
1024; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1025; GFX11-SDAG:       ; %bb.0:
1026; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1028; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1029; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
1030; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1031; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1032;
1033; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1034; GFX12-SDAG:       ; %bb.0:
1035; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1036; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1037; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1038; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1039; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1040; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1041; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1042; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
1043; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1044; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1045;
1046; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1047; GFX9-GISEL:       ; %bb.0:
1048; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1050; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1051; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1052; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1053; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1054;
1055; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1056; GFX11-GISEL:       ; %bb.0:
1057; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1059; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1060; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1061; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1062; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1063;
1064; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
1065; GFX12-GISEL:       ; %bb.0:
1066; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1067; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1068; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1069; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1070; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1071; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1072; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1073; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1074; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1075; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1076  %gep = getelementptr i8, ptr %p, i64 8589938687
1077  %load = load i8, ptr %gep, align 4
1078  ret i8 %load
1079}
1080
1081; Fill 12-bit low-bits (1ull << 33) | 4096
1082define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
1083; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
1084; GFX9:       ; %bb.0:
1085; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1087; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1088; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1089; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1090; GFX9-NEXT:    s_setpc_b64 s[30:31]
1091;
1092; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
1093; GFX10:       ; %bb.0:
1094; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1095; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1096; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1097; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1098; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1099; GFX10-NEXT:    s_setpc_b64 s[30:31]
1100;
1101; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
1102; GFX11:       ; %bb.0:
1103; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1105; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1106; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1107; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1108; GFX11-NEXT:    s_setpc_b64 s[30:31]
1109;
1110; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
1111; GFX12-SDAG:       ; %bb.0:
1112; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1113; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1114; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1115; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1116; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1117; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1118; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1119; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4096
1120; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1121; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1122;
1123; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
1124; GFX12-GISEL:       ; %bb.0:
1125; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1126; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1127; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1128; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1129; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1130; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1131; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1132; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1133; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1134; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1135  %gep = getelementptr i8, ptr %p, i64 8589938688
1136  %load = load i8, ptr %gep, align 4
1137  ret i8 %load
1138}
1139
1140; Fill 13-bit low-bits (1ull << 33) | 8191
1141define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
1142; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1143; GFX9-SDAG:       ; %bb.0:
1144; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1146; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1147; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
1148; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1149; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1150;
1151; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1152; GFX10:       ; %bb.0:
1153; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1154; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1155; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1156; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1157; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1158; GFX10-NEXT:    s_setpc_b64 s[30:31]
1159;
1160; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1161; GFX11-SDAG:       ; %bb.0:
1162; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1163; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1164; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1165; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
1166; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1167; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1168;
1169; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1170; GFX12-SDAG:       ; %bb.0:
1171; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1172; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1173; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1174; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1175; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1176; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1177; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1178; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:8191
1179; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1180; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1181;
1182; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1183; GFX9-GISEL:       ; %bb.0:
1184; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1186; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1187; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1188; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1189; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1190;
1191; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1192; GFX11-GISEL:       ; %bb.0:
1193; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1195; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1196; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1197; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1198; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
1201; GFX12-GISEL:       ; %bb.0:
1202; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1203; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1204; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1205; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1206; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1207; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1208; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1209; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1210; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1211; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1212  %gep = getelementptr i8, ptr %p, i64 8589942783
1213  %load = load i8, ptr %gep, align 4
1214  ret i8 %load
1215}
1216
1217; Fill 13-bit low-bits (1ull << 33) | 8192
1218define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
1219; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
1220; GFX9:       ; %bb.0:
1221; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1223; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1224; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1225; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1226; GFX9-NEXT:    s_setpc_b64 s[30:31]
1227;
1228; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
1229; GFX10:       ; %bb.0:
1230; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1231; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1232; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1233; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1234; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1235; GFX10-NEXT:    s_setpc_b64 s[30:31]
1236;
1237; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
1238; GFX11:       ; %bb.0:
1239; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1240; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1241; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1242; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1243; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1244; GFX11-NEXT:    s_setpc_b64 s[30:31]
1245;
1246; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
1247; GFX12-SDAG:       ; %bb.0:
1248; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1249; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1250; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1251; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1252; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1253; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1254; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1255; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:8192
1256; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1257; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1258;
1259; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
1260; GFX12-GISEL:       ; %bb.0:
1261; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1262; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1263; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1264; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1265; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1266; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1267; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1268; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1269; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1270; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1271  %gep = getelementptr i8, ptr %p, i64 8589942784
1272  %load = load i8, ptr %gep, align 4
1273  ret i8 %load
1274}
1275
1276; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1277define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
1278; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1279; GFX9-SDAG:       ; %bb.0:
1280; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
1282; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1283; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1284; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1285; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1286; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1287;
1288; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1289; GFX10:       ; %bb.0:
1290; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1292; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1293; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1294; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1295; GFX10-NEXT:    s_setpc_b64 s[30:31]
1296;
1297; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1298; GFX11:       ; %bb.0:
1299; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1300; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1301; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1302; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1303; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1304; GFX11-NEXT:    s_setpc_b64 s[30:31]
1305;
1306; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1307; GFX12-SDAG:       ; %bb.0:
1308; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1309; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1310; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1311; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1312; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1313; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1314; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1315; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8386561
1316; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1317; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1318;
1319; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1320; GFX9-GISEL:       ; %bb.0:
1321; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1323; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
1324; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1325; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1326; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1327; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1328;
1329; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
1330; GFX12-GISEL:       ; %bb.0:
1331; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1332; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1333; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1334; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1335; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1336; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1337; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1338; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1339; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1340; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1341  %gep = getelementptr i8, ptr %p, i64 -9223372036854773761
1342  %load = load i8, ptr %gep, align 4
1343  ret i8 %load
1344}
1345
1346; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1347define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
1348; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1349; GFX9-SDAG:       ; %bb.0:
1350; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1352; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1353; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1354; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1355; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1356; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1359; GFX10:       ; %bb.0:
1360; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1362; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1363; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1364; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1365; GFX10-NEXT:    s_setpc_b64 s[30:31]
1366;
1367; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1368; GFX11:       ; %bb.0:
1369; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1370; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1371; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1372; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1373; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1374; GFX11-NEXT:    s_setpc_b64 s[30:31]
1375;
1376; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1377; GFX12-SDAG:       ; %bb.0:
1378; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1379; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1380; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1381; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1382; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1383; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1384; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1385; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8386560
1386; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1387; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1388;
1389; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1390; GFX9-GISEL:       ; %bb.0:
1391; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1393; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1394; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1395; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1396; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1397; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1400; GFX12-GISEL:       ; %bb.0:
1401; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1402; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1403; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1404; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1405; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1406; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1407; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1408; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1409; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1410; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1411  %gep = getelementptr i8, ptr %p, i64 -9223372036854773760
1412  %load = load i8, ptr %gep, align 4
1413  ret i8 %load
1414}
1415
1416; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1417define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
1418; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1419; GFX9-SDAG:       ; %bb.0:
1420; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1422; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1423; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1424; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1425; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1426; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1427;
1428; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1429; GFX10:       ; %bb.0:
1430; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1432; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1433; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1434; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1435; GFX10-NEXT:    s_setpc_b64 s[30:31]
1436;
1437; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1438; GFX11:       ; %bb.0:
1439; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1440; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1441; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1442; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1443; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1444; GFX11-NEXT:    s_setpc_b64 s[30:31]
1445;
1446; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1447; GFX12-SDAG:       ; %bb.0:
1448; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1449; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1450; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1451; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1452; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1453; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1454; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1455; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8384513
1456; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1457; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1458;
1459; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1460; GFX9-GISEL:       ; %bb.0:
1461; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1462; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1463; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1464; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1465; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1466; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1467; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1468;
1469; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1470; GFX12-GISEL:       ; %bb.0:
1471; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1472; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1473; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1474; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1475; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1476; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1477; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1478; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1479; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1480; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1481  %gep = getelementptr i8, ptr %p, i64 -9223372036854771713
1482  %load = load i8, ptr %gep, align 4
1483  ret i8 %load
1484}
1485
1486; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1487define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
1488; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1489; GFX9-SDAG:       ; %bb.0:
1490; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1492; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1493; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1494; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1495; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1496; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1497;
1498; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1499; GFX10:       ; %bb.0:
1500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1501; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1502; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1503; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1504; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1505; GFX10-NEXT:    s_setpc_b64 s[30:31]
1506;
1507; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1508; GFX11:       ; %bb.0:
1509; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1511; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1512; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1513; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1514; GFX11-NEXT:    s_setpc_b64 s[30:31]
1515;
1516; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1517; GFX12-SDAG:       ; %bb.0:
1518; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1519; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1520; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1521; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1522; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1523; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1524; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1525; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8384512
1526; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1527; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1528;
1529; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1530; GFX9-GISEL:       ; %bb.0:
1531; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1533; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1534; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1535; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1536; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1537; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1538;
1539; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1540; GFX12-GISEL:       ; %bb.0:
1541; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1542; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1543; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1544; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1545; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1546; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1547; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1548; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1549; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1550; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1551  %gep = getelementptr i8, ptr %p, i64 -9223372036854771712
1552  %load = load i8, ptr %gep, align 4
1553  ret i8 %load
1554}
1555
1556; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1557define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
1558; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1559; GFX9-SDAG:       ; %bb.0:
1560; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1561; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1562; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1563; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1564; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1565; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1566; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1567;
1568; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1569; GFX10:       ; %bb.0:
1570; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1572; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1573; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1574; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1575; GFX10-NEXT:    s_setpc_b64 s[30:31]
1576;
1577; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1578; GFX11:       ; %bb.0:
1579; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1581; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1582; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1583; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1584; GFX11-NEXT:    s_setpc_b64 s[30:31]
1585;
1586; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1587; GFX12-SDAG:       ; %bb.0:
1588; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1589; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1590; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1591; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1592; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1593; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1594; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1595; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8380417
1596; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1597; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1598;
1599; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1600; GFX9-GISEL:       ; %bb.0:
1601; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1603; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1604; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1605; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1606; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1607; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1608;
1609; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1610; GFX12-GISEL:       ; %bb.0:
1611; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1612; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1613; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1614; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1615; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1616; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1617; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1618; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1619; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1620; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1621  %gep = getelementptr i8, ptr %p, i64 -9223372036854767617
1622  %load = load i8, ptr %gep, align 4
1623  ret i8 %load
1624}
1625
1626; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1627define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
1628; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1629; GFX9-SDAG:       ; %bb.0:
1630; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1631; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1632; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1633; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1634; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1]
1635; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1636; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1637;
1638; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1639; GFX10:       ; %bb.0:
1640; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1641; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1642; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1643; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1644; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1645; GFX10-NEXT:    s_setpc_b64 s[30:31]
1646;
1647; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1648; GFX11:       ; %bb.0:
1649; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1651; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1652; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
1653; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1654; GFX11-NEXT:    s_setpc_b64 s[30:31]
1655;
1656; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1657; GFX12-SDAG:       ; %bb.0:
1658; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1659; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1660; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1661; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1662; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1663; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1664; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1665; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8380416
1666; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1667; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1668;
1669; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1670; GFX9-GISEL:       ; %bb.0:
1671; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1672; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1673; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1674; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1675; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1]
1676; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1677; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1680; GFX12-GISEL:       ; %bb.0:
1681; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1682; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1683; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1684; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1685; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1686; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1687; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1688; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1]
1689; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1690; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1691  %gep = getelementptr i8, ptr %p, i64 -9223372036854767616
1692  %load = load i8, ptr %gep, align 4
1693  ret i8 %load
1694}
1695
1696define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) {
1697; GFX9-LABEL: flat_inst_salu_offset_1:
1698; GFX9:       ; %bb.0:
1699; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1700; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1701; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1702; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1703; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1 glc
1704; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1705; GFX9-NEXT:    flat_store_byte v[0:1], v0
1706; GFX9-NEXT:    s_endpgm
1707;
1708; GFX10-LABEL: flat_inst_salu_offset_1:
1709; GFX10:       ; %bb.0:
1710; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1711; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1712; GFX10-NEXT:    s_add_u32 s0, s0, 1
1713; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1714; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1715; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1716; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1717; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1718; GFX10-NEXT:    flat_store_byte v[0:1], v0
1719; GFX10-NEXT:    s_endpgm
1720;
1721; GFX11-LABEL: flat_inst_salu_offset_1:
1722; GFX11:       ; %bb.0:
1723; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1724; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1725; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1726; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:1 glc dlc
1727; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1728; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1729; GFX11-NEXT:    s_endpgm
1730;
1731; GFX12-LABEL: flat_inst_salu_offset_1:
1732; GFX12:       ; %bb.0:
1733; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1734; GFX12-NEXT:    s_wait_kmcnt 0x0
1735; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1736; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:1 scope:SCOPE_SYS
1737; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1738; GFX12-NEXT:    flat_store_b8 v[0:1], v0
1739; GFX12-NEXT:    s_endpgm
1740  %gep = getelementptr i8, ptr %p, i64 1
1741  %load = load volatile i8, ptr %gep, align 1
1742  store i8 %load, ptr undef
1743  ret void
1744}
1745
1746define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) {
1747; GFX9-LABEL: flat_inst_salu_offset_11bit_max:
1748; GFX9:       ; %bb.0:
1749; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1750; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1751; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1752; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1753; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047 glc
1754; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1755; GFX9-NEXT:    flat_store_byte v[0:1], v0
1756; GFX9-NEXT:    s_endpgm
1757;
1758; GFX10-LABEL: flat_inst_salu_offset_11bit_max:
1759; GFX10:       ; %bb.0:
1760; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1761; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1762; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1763; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1764; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1765; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1766; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1767; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1768; GFX10-NEXT:    flat_store_byte v[0:1], v0
1769; GFX10-NEXT:    s_endpgm
1770;
1771; GFX11-LABEL: flat_inst_salu_offset_11bit_max:
1772; GFX11:       ; %bb.0:
1773; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1774; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1775; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1776; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 glc dlc
1777; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1778; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1779; GFX11-NEXT:    s_endpgm
1780;
1781; GFX12-LABEL: flat_inst_salu_offset_11bit_max:
1782; GFX12:       ; %bb.0:
1783; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1784; GFX12-NEXT:    s_wait_kmcnt 0x0
1785; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1786; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 scope:SCOPE_SYS
1787; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1788; GFX12-NEXT:    flat_store_b8 v[0:1], v0
1789; GFX12-NEXT:    s_endpgm
1790  %gep = getelementptr i8, ptr %p, i64 2047
1791  %load = load volatile i8, ptr %gep, align 1
1792  store i8 %load, ptr undef
1793  ret void
1794}
1795
1796define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) {
1797; GFX9-LABEL: flat_inst_salu_offset_12bit_max:
1798; GFX9:       ; %bb.0:
1799; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1800; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1801; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1802; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1803; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1804; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1805; GFX9-NEXT:    flat_store_byte v[0:1], v0
1806; GFX9-NEXT:    s_endpgm
1807;
1808; GFX10-LABEL: flat_inst_salu_offset_12bit_max:
1809; GFX10:       ; %bb.0:
1810; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1811; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1812; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1813; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1814; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1815; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1816; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1817; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1818; GFX10-NEXT:    flat_store_byte v[0:1], v0
1819; GFX10-NEXT:    s_endpgm
1820;
1821; GFX11-LABEL: flat_inst_salu_offset_12bit_max:
1822; GFX11:       ; %bb.0:
1823; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1824; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1825; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1826; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1827; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1828; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1829; GFX11-NEXT:    s_endpgm
1830;
1831; GFX12-LABEL: flat_inst_salu_offset_12bit_max:
1832; GFX12:       ; %bb.0:
1833; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1834; GFX12-NEXT:    s_wait_kmcnt 0x0
1835; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1836; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS
1837; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1838; GFX12-NEXT:    flat_store_b8 v[0:1], v0
1839; GFX12-NEXT:    s_endpgm
1840  %gep = getelementptr i8, ptr %p, i64 4095
1841  %load = load volatile i8, ptr %gep, align 1
1842  store i8 %load, ptr undef
1843  ret void
1844}
1845
1846define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(ptr %p) {
1847; GFX9-SDAG-LABEL: flat_inst_salu_offset_13bit_max:
1848; GFX9-SDAG:       ; %bb.0:
1849; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1850; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1851; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1852; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1853; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1854; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1855; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1856; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1857; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
1858; GFX9-SDAG-NEXT:    s_endpgm
1859;
1860; GFX10-LABEL: flat_inst_salu_offset_13bit_max:
1861; GFX10:       ; %bb.0:
1862; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1863; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1864; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1865; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1866; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1867; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1868; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1869; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1870; GFX10-NEXT:    flat_store_byte v[0:1], v0
1871; GFX10-NEXT:    s_endpgm
1872;
1873; GFX11-SDAG-LABEL: flat_inst_salu_offset_13bit_max:
1874; GFX11-SDAG:       ; %bb.0:
1875; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1876; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1877; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1878; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1879; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1880; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1881; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1882; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
1883; GFX11-SDAG-NEXT:    s_endpgm
1884;
1885; GFX12-LABEL: flat_inst_salu_offset_13bit_max:
1886; GFX12:       ; %bb.0:
1887; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1888; GFX12-NEXT:    s_wait_kmcnt 0x0
1889; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1890; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS
1891; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1892; GFX12-NEXT:    flat_store_b8 v[0:1], v0
1893; GFX12-NEXT:    s_endpgm
1894;
1895; GFX9-GISEL-LABEL: flat_inst_salu_offset_13bit_max:
1896; GFX9-GISEL:       ; %bb.0:
1897; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1898; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1899; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
1900; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1901; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1902; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1903; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
1904; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1905; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
1906; GFX9-GISEL-NEXT:    s_endpgm
1907;
1908; GFX11-GISEL-LABEL: flat_inst_salu_offset_13bit_max:
1909; GFX11-GISEL:       ; %bb.0:
1910; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1911; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1912; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
1913; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0
1914; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1915; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1916; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1917; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1918; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
1919; GFX11-GISEL-NEXT:    s_endpgm
1920  %gep = getelementptr i8, ptr %p, i64 8191
1921  %load = load volatile i8, ptr %gep, align 1
1922  store i8 %load, ptr undef
1923  ret void
1924}
1925
1926define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(ptr %p) {
1927; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max:
1928; GFX9-SDAG:       ; %bb.0:
1929; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1930; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1931; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1932; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1933; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1934; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1935; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
1936; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1937; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
1938; GFX9-SDAG-NEXT:    s_endpgm
1939;
1940; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max:
1941; GFX10:       ; %bb.0:
1942; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1943; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1944; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff800
1945; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1946; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1947; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1948; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1949; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1950; GFX10-NEXT:    flat_store_byte v[0:1], v0
1951; GFX10-NEXT:    s_endpgm
1952;
1953; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max:
1954; GFX11-SDAG:       ; %bb.0:
1955; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1956; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1957; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff800, s0
1958; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1959; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1960; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1961; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1962; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
1963; GFX11-SDAG-NEXT:    s_endpgm
1964;
1965; GFX12-LABEL: flat_inst_salu_offset_neg_11bit_max:
1966; GFX12:       ; %bb.0:
1967; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1968; GFX12-NEXT:    s_wait_kmcnt 0x0
1969; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1970; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-2048 scope:SCOPE_SYS
1971; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1972; GFX12-NEXT:    flat_store_b8 v[0:1], v0
1973; GFX12-NEXT:    s_endpgm
1974;
1975; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max:
1976; GFX9-GISEL:       ; %bb.0:
1977; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1978; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1979; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff800
1980; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
1981; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1982; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1983; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
1984; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1985; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
1986; GFX9-GISEL-NEXT:    s_endpgm
1987;
1988; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max:
1989; GFX11-GISEL:       ; %bb.0:
1990; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1991; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1992; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff800
1993; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
1994; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1995; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1996; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1997; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1998; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
1999; GFX11-GISEL-NEXT:    s_endpgm
2000  %gep = getelementptr i8, ptr %p, i64 -2048
2001  %load = load volatile i8, ptr %gep, align 1
2002  store i8 %load, ptr undef
2003  ret void
2004}
2005
2006define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(ptr %p) {
2007; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max:
2008; GFX9-SDAG:       ; %bb.0:
2009; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2010; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2011; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2012; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2013; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
2014; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2015; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2016; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2017; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2018; GFX9-SDAG-NEXT:    s_endpgm
2019;
2020; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max:
2021; GFX10:       ; %bb.0:
2022; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2023; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2024; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
2025; GFX10-NEXT:    s_addc_u32 s1, s1, -1
2026; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2027; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2028; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2029; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2030; GFX10-NEXT:    flat_store_byte v[0:1], v0
2031; GFX10-NEXT:    s_endpgm
2032;
2033; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max:
2034; GFX11-SDAG:       ; %bb.0:
2035; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2036; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2037; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
2038; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2039; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2040; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2041; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2042; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2043; GFX11-SDAG-NEXT:    s_endpgm
2044;
2045; GFX12-LABEL: flat_inst_salu_offset_neg_12bit_max:
2046; GFX12:       ; %bb.0:
2047; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2048; GFX12-NEXT:    s_wait_kmcnt 0x0
2049; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2050; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-4096 scope:SCOPE_SYS
2051; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2052; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2053; GFX12-NEXT:    s_endpgm
2054;
2055; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max:
2056; GFX9-GISEL:       ; %bb.0:
2057; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2058; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2059; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2060; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2061; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2062; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2063; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2064; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2065; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2066; GFX9-GISEL-NEXT:    s_endpgm
2067;
2068; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max:
2069; GFX11-GISEL:       ; %bb.0:
2070; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2071; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2072; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2073; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2074; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2075; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2076; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2077; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2078; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2079; GFX11-GISEL-NEXT:    s_endpgm
2080  %gep = getelementptr i8, ptr %p, i64 -4096
2081  %load = load volatile i8, ptr %gep, align 1
2082  store i8 %load, ptr undef
2083  ret void
2084}
2085
2086define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(ptr %p) {
2087; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max:
2088; GFX9-SDAG:       ; %bb.0:
2089; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2090; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2091; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2092; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2093; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
2094; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2095; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2096; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2097; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2098; GFX9-SDAG-NEXT:    s_endpgm
2099;
2100; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max:
2101; GFX10:       ; %bb.0:
2102; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2103; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2104; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
2105; GFX10-NEXT:    s_addc_u32 s1, s1, -1
2106; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2107; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2108; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2109; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2110; GFX10-NEXT:    flat_store_byte v[0:1], v0
2111; GFX10-NEXT:    s_endpgm
2112;
2113; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max:
2114; GFX11-SDAG:       ; %bb.0:
2115; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2116; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2117; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2118; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2119; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2120; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2121; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2122; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2123; GFX11-SDAG-NEXT:    s_endpgm
2124;
2125; GFX12-LABEL: flat_inst_salu_offset_neg_13bit_max:
2126; GFX12:       ; %bb.0:
2127; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2128; GFX12-NEXT:    s_wait_kmcnt 0x0
2129; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2130; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-8192 scope:SCOPE_SYS
2131; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2132; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2133; GFX12-NEXT:    s_endpgm
2134;
2135; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max:
2136; GFX9-GISEL:       ; %bb.0:
2137; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2138; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2139; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2140; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2141; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2142; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2143; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2144; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2145; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2146; GFX9-GISEL-NEXT:    s_endpgm
2147;
2148; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max:
2149; GFX11-GISEL:       ; %bb.0:
2150; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2151; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2152; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2153; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2154; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2155; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2156; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2157; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2158; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2159; GFX11-GISEL-NEXT:    s_endpgm
2160  %gep = getelementptr i8, ptr %p, i64 -8192
2161  %load = load volatile i8, ptr %gep, align 1
2162  store i8 %load, ptr undef
2163  ret void
2164}
2165
2166define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) {
2167; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max:
2168; GFX9:       ; %bb.0:
2169; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2170; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2171; GFX9-NEXT:    v_mov_b32_e32 v0, s0
2172; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2173; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
2174; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2175; GFX9-NEXT:    flat_store_byte v[0:1], v0
2176; GFX9-NEXT:    s_endpgm
2177;
2178; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max:
2179; GFX10:       ; %bb.0:
2180; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2181; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2182; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
2183; GFX10-NEXT:    s_addc_u32 s1, s1, 0
2184; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2185; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2186; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2187; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2188; GFX10-NEXT:    flat_store_byte v[0:1], v0
2189; GFX10-NEXT:    s_endpgm
2190;
2191; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max:
2192; GFX11:       ; %bb.0:
2193; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2194; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2195; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2196; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2197; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2198; GFX11-NEXT:    flat_store_b8 v[0:1], v0
2199; GFX11-NEXT:    s_endpgm
2200;
2201; GFX12-LABEL: flat_inst_salu_offset_2x_11bit_max:
2202; GFX12:       ; %bb.0:
2203; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2204; GFX12-NEXT:    s_wait_kmcnt 0x0
2205; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2206; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS
2207; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2208; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2209; GFX12-NEXT:    s_endpgm
2210  %gep = getelementptr i8, ptr %p, i64 4095
2211  %load = load volatile i8, ptr %gep, align 1
2212  store i8 %load, ptr undef
2213  ret void
2214}
2215
2216define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(ptr %p) {
2217; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max:
2218; GFX9-SDAG:       ; %bb.0:
2219; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2220; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2221; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2222; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2223; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
2224; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
2225; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
2226; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2227; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2228; GFX9-SDAG-NEXT:    s_endpgm
2229;
2230; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max:
2231; GFX10:       ; %bb.0:
2232; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2233; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2234; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
2235; GFX10-NEXT:    s_addc_u32 s1, s1, 0
2236; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2237; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2238; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2239; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2240; GFX10-NEXT:    flat_store_byte v[0:1], v0
2241; GFX10-NEXT:    s_endpgm
2242;
2243; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max:
2244; GFX11-SDAG:       ; %bb.0:
2245; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2246; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2247; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
2248; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2249; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
2250; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2251; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2252; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2253; GFX11-SDAG-NEXT:    s_endpgm
2254;
2255; GFX12-LABEL: flat_inst_salu_offset_2x_12bit_max:
2256; GFX12:       ; %bb.0:
2257; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2258; GFX12-NEXT:    s_wait_kmcnt 0x0
2259; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2260; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS
2261; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2262; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2263; GFX12-NEXT:    s_endpgm
2264;
2265; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max:
2266; GFX9-GISEL:       ; %bb.0:
2267; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2268; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2269; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
2270; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
2271; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2272; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2273; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2274; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2275; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2276; GFX9-GISEL-NEXT:    s_endpgm
2277;
2278; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max:
2279; GFX11-GISEL:       ; %bb.0:
2280; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2281; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2282; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
2283; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0
2284; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2285; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2286; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2287; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2288; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2289; GFX11-GISEL-NEXT:    s_endpgm
2290  %gep = getelementptr i8, ptr %p, i64 8191
2291  %load = load volatile i8, ptr %gep, align 1
2292  store i8 %load, ptr undef
2293  ret void
2294}
2295
2296define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(ptr %p) {
2297; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max:
2298; GFX9-SDAG:       ; %bb.0:
2299; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2300; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2301; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2302; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2303; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
2304; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
2305; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
2306; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2307; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2308; GFX9-SDAG-NEXT:    s_endpgm
2309;
2310; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max:
2311; GFX10:       ; %bb.0:
2312; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2313; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2314; GFX10-NEXT:    s_add_u32 s0, s0, 0x3fff
2315; GFX10-NEXT:    s_addc_u32 s1, s1, 0
2316; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2317; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2318; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2319; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2320; GFX10-NEXT:    flat_store_byte v[0:1], v0
2321; GFX10-NEXT:    s_endpgm
2322;
2323; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max:
2324; GFX11-SDAG:       ; %bb.0:
2325; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2326; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2327; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x3000, s0
2328; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2329; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
2330; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2331; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2332; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2333; GFX11-SDAG-NEXT:    s_endpgm
2334;
2335; GFX12-LABEL: flat_inst_salu_offset_2x_13bit_max:
2336; GFX12:       ; %bb.0:
2337; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2338; GFX12-NEXT:    s_wait_kmcnt 0x0
2339; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2340; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:16383 scope:SCOPE_SYS
2341; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2342; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2343; GFX12-NEXT:    s_endpgm
2344;
2345; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max:
2346; GFX9-GISEL:       ; %bb.0:
2347; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2348; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2349; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x3fff
2350; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
2351; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2352; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2353; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2354; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2355; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2356; GFX9-GISEL-NEXT:    s_endpgm
2357;
2358; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max:
2359; GFX11-GISEL:       ; %bb.0:
2360; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2361; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2362; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x3fff
2363; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0
2364; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2365; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2366; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2367; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2368; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2369; GFX11-GISEL-NEXT:    s_endpgm
2370  %gep = getelementptr i8, ptr %p, i64 16383
2371  %load = load volatile i8, ptr %gep, align 1
2372  store i8 %load, ptr undef
2373  ret void
2374}
2375
2376define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(ptr %p) {
2377; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2378; GFX9-SDAG:       ; %bb.0:
2379; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2380; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2381; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2382; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2383; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
2384; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2385; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2386; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2387; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2388; GFX9-SDAG-NEXT:    s_endpgm
2389;
2390; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2391; GFX10:       ; %bb.0:
2392; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2393; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2394; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
2395; GFX10-NEXT:    s_addc_u32 s1, s1, -1
2396; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2397; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2398; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2399; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2400; GFX10-NEXT:    flat_store_byte v[0:1], v0
2401; GFX10-NEXT:    s_endpgm
2402;
2403; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2404; GFX11-SDAG:       ; %bb.0:
2405; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2406; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2407; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
2408; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2409; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2410; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2411; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2412; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2413; GFX11-SDAG-NEXT:    s_endpgm
2414;
2415; GFX12-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2416; GFX12:       ; %bb.0:
2417; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2418; GFX12-NEXT:    s_wait_kmcnt 0x0
2419; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2420; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-4096 scope:SCOPE_SYS
2421; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2422; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2423; GFX12-NEXT:    s_endpgm
2424;
2425; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2426; GFX9-GISEL:       ; %bb.0:
2427; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2428; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2429; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2430; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2431; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2432; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2433; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2434; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2435; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2436; GFX9-GISEL-NEXT:    s_endpgm
2437;
2438; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
2439; GFX11-GISEL:       ; %bb.0:
2440; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2441; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2442; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2443; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2444; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2445; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2446; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2447; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2448; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2449; GFX11-GISEL-NEXT:    s_endpgm
2450  %gep = getelementptr i8, ptr %p, i64 -4096
2451  %load = load volatile i8, ptr %gep, align 1
2452  store i8 %load, ptr undef
2453  ret void
2454}
2455
2456define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(ptr %p) {
2457; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2458; GFX9-SDAG:       ; %bb.0:
2459; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2460; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2461; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2462; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2463; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
2464; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2465; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2466; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2467; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2468; GFX9-SDAG-NEXT:    s_endpgm
2469;
2470; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2471; GFX10:       ; %bb.0:
2472; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2473; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2474; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
2475; GFX10-NEXT:    s_addc_u32 s1, s1, -1
2476; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2477; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2478; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2479; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2480; GFX10-NEXT:    flat_store_byte v[0:1], v0
2481; GFX10-NEXT:    s_endpgm
2482;
2483; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2484; GFX11-SDAG:       ; %bb.0:
2485; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2486; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2487; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2488; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2489; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2490; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2491; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2492; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2493; GFX11-SDAG-NEXT:    s_endpgm
2494;
2495; GFX12-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2496; GFX12:       ; %bb.0:
2497; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2498; GFX12-NEXT:    s_wait_kmcnt 0x0
2499; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2500; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-8192 scope:SCOPE_SYS
2501; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2502; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2503; GFX12-NEXT:    s_endpgm
2504;
2505; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2506; GFX9-GISEL:       ; %bb.0:
2507; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2508; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2509; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2510; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2511; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2512; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2513; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2514; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2515; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2516; GFX9-GISEL-NEXT:    s_endpgm
2517;
2518; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2519; GFX11-GISEL:       ; %bb.0:
2520; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2521; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2522; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2523; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2524; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2525; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2526; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2527; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2528; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2529; GFX11-GISEL-NEXT:    s_endpgm
2530  %gep = getelementptr i8, ptr %p, i64 -8192
2531  %load = load volatile i8, ptr %gep, align 1
2532  store i8 %load, ptr undef
2533  ret void
2534}
2535
2536define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(ptr %p) {
2537; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2538; GFX9-SDAG:       ; %bb.0:
2539; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2540; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2541; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2542; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2543; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
2544; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2545; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2546; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2547; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2548; GFX9-SDAG-NEXT:    s_endpgm
2549;
2550; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2551; GFX10:       ; %bb.0:
2552; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2553; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2554; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffc000
2555; GFX10-NEXT:    s_addc_u32 s1, s1, -1
2556; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2557; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2558; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2559; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2560; GFX10-NEXT:    flat_store_byte v[0:1], v0
2561; GFX10-NEXT:    s_endpgm
2562;
2563; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2564; GFX11-SDAG:       ; %bb.0:
2565; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2566; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2567; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
2568; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2569; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2570; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2571; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2572; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2573; GFX11-SDAG-NEXT:    s_endpgm
2574;
2575; GFX12-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2576; GFX12:       ; %bb.0:
2577; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2578; GFX12-NEXT:    s_wait_kmcnt 0x0
2579; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2580; GFX12-NEXT:    flat_load_u8 v0, v[0:1] offset:-16384 scope:SCOPE_SYS
2581; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2582; GFX12-NEXT:    flat_store_b8 v[0:1], v0
2583; GFX12-NEXT:    s_endpgm
2584;
2585; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2586; GFX9-GISEL:       ; %bb.0:
2587; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2588; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2589; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffc000
2590; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2591; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2592; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2593; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2594; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2595; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2596; GFX9-GISEL-NEXT:    s_endpgm
2597;
2598; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2599; GFX11-GISEL:       ; %bb.0:
2600; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2601; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2602; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffc000
2603; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2604; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2605; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2606; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2607; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2608; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2609; GFX11-GISEL-NEXT:    s_endpgm
2610  %gep = getelementptr i8, ptr %p, i64 -16384
2611  %load = load volatile i8, ptr %gep, align 1
2612  store i8 %load, ptr undef
2613  ret void
2614}
2615
2616; Fill 11-bit low-bits (1ull << 33) | 2047
2617define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) {
2618; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2619; GFX9-SDAG:       ; %bb.0:
2620; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2621; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2622; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2623; GFX9-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
2624; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2625; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047 glc
2626; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2627; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2628; GFX9-SDAG-NEXT:    s_endpgm
2629;
2630; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2631; GFX10:       ; %bb.0:
2632; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2633; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2634; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
2635; GFX10-NEXT:    s_addc_u32 s1, s1, 2
2636; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2637; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2638; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2639; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2640; GFX10-NEXT:    flat_store_byte v[0:1], v0
2641; GFX10-NEXT:    s_endpgm
2642;
2643; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2644; GFX11-SDAG:       ; %bb.0:
2645; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2646; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2647; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2648; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2649; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2650; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 glc dlc
2651; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2652; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2653; GFX11-SDAG-NEXT:    s_endpgm
2654;
2655; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2656; GFX12-SDAG:       ; %bb.0:
2657; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2658; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2659; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2660; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2661; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2662; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 scope:SCOPE_SYS
2663; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2664; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2665; GFX12-SDAG-NEXT:    s_endpgm
2666;
2667; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2668; GFX9-GISEL:       ; %bb.0:
2669; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2670; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2671; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
2672; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2673; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2674; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2675; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2676; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2677; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2678; GFX9-GISEL-NEXT:    s_endpgm
2679;
2680; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2681; GFX11-GISEL:       ; %bb.0:
2682; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2683; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2684; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
2685; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2686; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2687; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2688; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2689; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2690; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2691; GFX11-GISEL-NEXT:    s_endpgm
2692;
2693; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2694; GFX12-GISEL:       ; %bb.0:
2695; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2696; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2697; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x7ff
2698; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2699; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2700; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2701; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
2702; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2703; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2704; GFX12-GISEL-NEXT:    s_endpgm
2705  %gep = getelementptr i8, ptr %p, i64 8589936639
2706  %load = load volatile i8, ptr %gep, align 1
2707  store i8 %load, ptr undef
2708  ret void
2709}
2710
2711; Fill 11-bit low-bits (1ull << 33) | 2048
2712define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) {
2713; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2714; GFX9-SDAG:       ; %bb.0:
2715; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2716; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2717; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2718; GFX9-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
2719; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2720; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048 glc
2721; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2722; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2723; GFX9-SDAG-NEXT:    s_endpgm
2724;
2725; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2726; GFX10:       ; %bb.0:
2727; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2728; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2729; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
2730; GFX10-NEXT:    s_addc_u32 s1, s1, 2
2731; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2732; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2733; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2734; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2735; GFX10-NEXT:    flat_store_byte v[0:1], v0
2736; GFX10-NEXT:    s_endpgm
2737;
2738; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2739; GFX11-SDAG:       ; %bb.0:
2740; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2741; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2742; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2743; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2744; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2745; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2048 glc dlc
2746; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2747; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2748; GFX11-SDAG-NEXT:    s_endpgm
2749;
2750; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2751; GFX12-SDAG:       ; %bb.0:
2752; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2753; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2754; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2755; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2756; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2757; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:2048 scope:SCOPE_SYS
2758; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2759; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2760; GFX12-SDAG-NEXT:    s_endpgm
2761;
2762; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2763; GFX9-GISEL:       ; %bb.0:
2764; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2765; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2766; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
2767; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2768; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2769; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2770; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2771; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2772; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2773; GFX9-GISEL-NEXT:    s_endpgm
2774;
2775; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2776; GFX11-GISEL:       ; %bb.0:
2777; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2778; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2779; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
2780; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2781; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2782; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2783; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2784; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2785; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2786; GFX11-GISEL-NEXT:    s_endpgm
2787;
2788; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2789; GFX12-GISEL:       ; %bb.0:
2790; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2791; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2792; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800
2793; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2794; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2795; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2796; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
2797; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2798; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2799; GFX12-GISEL-NEXT:    s_endpgm
2800  %gep = getelementptr i8, ptr %p, i64 8589936640
2801  %load = load volatile i8, ptr %gep, align 1
2802  store i8 %load, ptr undef
2803  ret void
2804}
2805
2806; Fill 12-bit low-bits (1ull << 33) | 4095
2807define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) {
2808; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2809; GFX9-SDAG:       ; %bb.0:
2810; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2811; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2812; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2813; GFX9-SDAG-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
2814; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2815; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
2816; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2817; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2818; GFX9-SDAG-NEXT:    s_endpgm
2819;
2820; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2821; GFX10:       ; %bb.0:
2822; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2823; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2824; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
2825; GFX10-NEXT:    s_addc_u32 s1, s1, 2
2826; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2827; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2828; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2829; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2830; GFX10-NEXT:    flat_store_byte v[0:1], v0
2831; GFX10-NEXT:    s_endpgm
2832;
2833; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2834; GFX11-SDAG:       ; %bb.0:
2835; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2836; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2837; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2838; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2839; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2840; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2841; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2842; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2843; GFX11-SDAG-NEXT:    s_endpgm
2844;
2845; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2846; GFX12-SDAG:       ; %bb.0:
2847; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2848; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2849; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2850; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2851; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2852; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS
2853; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2854; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2855; GFX12-SDAG-NEXT:    s_endpgm
2856;
2857; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2858; GFX9-GISEL:       ; %bb.0:
2859; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2860; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2861; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
2862; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2863; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2864; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2865; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2866; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2867; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2868; GFX9-GISEL-NEXT:    s_endpgm
2869;
2870; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2871; GFX11-GISEL:       ; %bb.0:
2872; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2873; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2874; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
2875; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2876; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2877; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2878; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2879; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2880; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2881; GFX11-GISEL-NEXT:    s_endpgm
2882;
2883; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2884; GFX12-GISEL:       ; %bb.0:
2885; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2886; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2887; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
2888; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2889; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2890; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2891; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
2892; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2893; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2894; GFX12-GISEL-NEXT:    s_endpgm
2895  %gep = getelementptr i8, ptr %p, i64 8589938687
2896  %load = load volatile i8, ptr %gep, align 1
2897  store i8 %load, ptr undef
2898  ret void
2899}
2900
2901; Fill 12-bit low-bits (1ull << 33) | 4096
2902define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) {
2903; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2904; GFX9-SDAG:       ; %bb.0:
2905; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2906; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2907; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
2908; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2909; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
2910; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2911; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
2912; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2913; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
2914; GFX9-SDAG-NEXT:    s_endpgm
2915;
2916; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2917; GFX10:       ; %bb.0:
2918; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2919; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2920; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
2921; GFX10-NEXT:    s_addc_u32 s1, s1, 2
2922; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2923; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2924; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
2925; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2926; GFX10-NEXT:    flat_store_byte v[0:1], v0
2927; GFX10-NEXT:    s_endpgm
2928;
2929; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2930; GFX11-SDAG:       ; %bb.0:
2931; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2932; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2933; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
2934; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2935; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2936; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2937; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2938; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2939; GFX11-SDAG-NEXT:    s_endpgm
2940;
2941; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2942; GFX12-SDAG:       ; %bb.0:
2943; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2944; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2945; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2946; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2947; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2948; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4096 scope:SCOPE_SYS
2949; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2950; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
2951; GFX12-SDAG-NEXT:    s_endpgm
2952;
2953; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2954; GFX9-GISEL:       ; %bb.0:
2955; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2956; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2957; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
2958; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2959; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2960; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2961; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
2962; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2963; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
2964; GFX9-GISEL-NEXT:    s_endpgm
2965;
2966; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2967; GFX11-GISEL:       ; %bb.0:
2968; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2969; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2970; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
2971; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2972; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2973; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2974; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
2975; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2976; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2977; GFX11-GISEL-NEXT:    s_endpgm
2978;
2979; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2980; GFX12-GISEL:       ; %bb.0:
2981; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2982; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2983; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
2984; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2985; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2986; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2987; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
2988; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2989; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
2990; GFX12-GISEL-NEXT:    s_endpgm
2991  %gep = getelementptr i8, ptr %p, i64 8589938688
2992  %load = load volatile i8, ptr %gep, align 1
2993  store i8 %load, ptr undef
2994  ret void
2995}
2996
2997; Fill 13-bit low-bits (1ull << 33) | 8191
2998define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) {
2999; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3000; GFX9-SDAG:       ; %bb.0:
3001; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3002; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3003; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3004; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3005; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
3006; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
3007; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
3008; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3009; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3010; GFX9-SDAG-NEXT:    s_endpgm
3011;
3012; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3013; GFX10:       ; %bb.0:
3014; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3015; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3016; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
3017; GFX10-NEXT:    s_addc_u32 s1, s1, 2
3018; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3019; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3020; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3021; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3022; GFX10-NEXT:    flat_store_byte v[0:1], v0
3023; GFX10-NEXT:    s_endpgm
3024;
3025; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3026; GFX11-SDAG:       ; %bb.0:
3027; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3028; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3029; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
3030; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3031; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3032; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
3033; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3034; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3035; GFX11-SDAG-NEXT:    s_endpgm
3036;
3037; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3038; GFX12-SDAG:       ; %bb.0:
3039; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3040; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3041; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
3042; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3043; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3044; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS
3045; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3046; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3047; GFX12-SDAG-NEXT:    s_endpgm
3048;
3049; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3050; GFX9-GISEL:       ; %bb.0:
3051; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3052; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3053; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
3054; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3055; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3056; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3057; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3058; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3059; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3060; GFX9-GISEL-NEXT:    s_endpgm
3061;
3062; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3063; GFX11-GISEL:       ; %bb.0:
3064; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3065; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3066; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
3067; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3068; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3069; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3070; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3071; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3072; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3073; GFX11-GISEL-NEXT:    s_endpgm
3074;
3075; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
3076; GFX12-GISEL:       ; %bb.0:
3077; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3078; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3079; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1fff
3080; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
3081; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3082; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3083; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3084; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3085; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3086; GFX12-GISEL-NEXT:    s_endpgm
3087  %gep = getelementptr i8, ptr %p, i64 8589942783
3088  %load = load volatile i8, ptr %gep, align 1
3089  store i8 %load, ptr undef
3090  ret void
3091}
3092
3093; Fill 13-bit low-bits (1ull << 33) | 8192
3094define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) {
3095; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3096; GFX9-SDAG:       ; %bb.0:
3097; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3098; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3099; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3100; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3101; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
3102; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
3103; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3104; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3105; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3106; GFX9-SDAG-NEXT:    s_endpgm
3107;
3108; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3109; GFX10:       ; %bb.0:
3110; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3111; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3112; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
3113; GFX10-NEXT:    s_addc_u32 s1, s1, 2
3114; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3115; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3116; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3117; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3118; GFX10-NEXT:    flat_store_byte v[0:1], v0
3119; GFX10-NEXT:    s_endpgm
3120;
3121; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3122; GFX11-SDAG:       ; %bb.0:
3123; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3124; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3125; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
3126; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3127; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3128; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3129; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3130; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3131; GFX11-SDAG-NEXT:    s_endpgm
3132;
3133; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3134; GFX12-SDAG:       ; %bb.0:
3135; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3136; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3137; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
3138; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3139; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3140; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:8192 scope:SCOPE_SYS
3141; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3142; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3143; GFX12-SDAG-NEXT:    s_endpgm
3144;
3145; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3146; GFX9-GISEL:       ; %bb.0:
3147; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3148; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3149; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3150; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3151; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3152; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3153; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3154; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3155; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3156; GFX9-GISEL-NEXT:    s_endpgm
3157;
3158; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3159; GFX11-GISEL:       ; %bb.0:
3160; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3161; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3162; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3163; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3164; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3165; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3166; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3167; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3168; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3169; GFX11-GISEL-NEXT:    s_endpgm
3170;
3171; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
3172; GFX12-GISEL:       ; %bb.0:
3173; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3174; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3175; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x2000
3176; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
3177; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3178; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3179; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3180; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3181; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3182; GFX12-GISEL-NEXT:    s_endpgm
3183  %gep = getelementptr i8, ptr %p, i64 8589942784
3184  %load = load volatile i8, ptr %gep, align 1
3185  store i8 %load, ptr undef
3186  ret void
3187}
3188
3189; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
3190define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr %p) {
3191; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3192; GFX9-SDAG:       ; %bb.0:
3193; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3194; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3195; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3196; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3197; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3198; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
3199; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3200; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3201; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3202; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3203; GFX9-SDAG-NEXT:    s_endpgm
3204;
3205; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3206; GFX10:       ; %bb.0:
3207; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3208; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3209; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
3210; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3211; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3212; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3213; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3214; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3215; GFX10-NEXT:    flat_store_byte v[0:1], v0
3216; GFX10-NEXT:    s_endpgm
3217;
3218; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3219; GFX11-SDAG:       ; %bb.0:
3220; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3221; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3222; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3223; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, s0
3224; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3225; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3226; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3227; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3228; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3229; GFX11-SDAG-NEXT:    s_endpgm
3230;
3231; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3232; GFX12-SDAG:       ; %bb.0:
3233; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3234; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3235; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3236; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3237; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3238; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3239; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8386561 scope:SCOPE_SYS
3240; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3241; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3242; GFX12-SDAG-NEXT:    s_endpgm
3243;
3244; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3245; GFX9-GISEL:       ; %bb.0:
3246; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3247; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3248; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
3249; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3250; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3251; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3252; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3253; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3254; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3255; GFX9-GISEL-NEXT:    s_endpgm
3256;
3257; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3258; GFX11-GISEL:       ; %bb.0:
3259; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3260; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3261; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
3262; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3263; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3264; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3265; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3266; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3267; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3268; GFX11-GISEL-NEXT:    s_endpgm
3269;
3270; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
3271; GFX12-GISEL:       ; %bb.0:
3272; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3273; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3274; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x7ff
3275; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3276; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3277; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3278; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3279; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3280; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3281; GFX12-GISEL-NEXT:    s_endpgm
3282  %gep = getelementptr i8, ptr %p, i64 -9223372036854773761
3283  %load = load volatile i8, ptr %gep, align 1
3284  store i8 %load, ptr undef
3285  ret void
3286}
3287
3288; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
3289define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr %p) {
3290; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3291; GFX9-SDAG:       ; %bb.0:
3292; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3293; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3294; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3295; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3296; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3297; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
3298; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3299; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3300; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3301; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3302; GFX9-SDAG-NEXT:    s_endpgm
3303;
3304; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3305; GFX10:       ; %bb.0:
3306; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3307; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3308; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
3309; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3310; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3311; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3312; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3313; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3314; GFX10-NEXT:    flat_store_byte v[0:1], v0
3315; GFX10-NEXT:    s_endpgm
3316;
3317; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3318; GFX11-SDAG:       ; %bb.0:
3319; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3320; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3321; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3322; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, s0
3323; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3324; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3325; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3326; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3327; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3328; GFX11-SDAG-NEXT:    s_endpgm
3329;
3330; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3331; GFX12-SDAG:       ; %bb.0:
3332; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3333; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3334; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3335; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3336; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3337; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3338; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8386560 scope:SCOPE_SYS
3339; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3340; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3341; GFX12-SDAG-NEXT:    s_endpgm
3342;
3343; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3344; GFX9-GISEL:       ; %bb.0:
3345; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3346; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3347; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
3348; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3349; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3350; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3351; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3352; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3353; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3354; GFX9-GISEL-NEXT:    s_endpgm
3355;
3356; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3357; GFX11-GISEL:       ; %bb.0:
3358; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3359; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3360; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
3361; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3362; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3363; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3364; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3365; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3366; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3367; GFX11-GISEL-NEXT:    s_endpgm
3368;
3369; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
3370; GFX12-GISEL:       ; %bb.0:
3371; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3372; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3373; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800
3374; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3375; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3376; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3377; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3378; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3379; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3380; GFX12-GISEL-NEXT:    s_endpgm
3381  %gep = getelementptr i8, ptr %p, i64 -9223372036854773760
3382  %load = load volatile i8, ptr %gep, align 1
3383  store i8 %load, ptr undef
3384  ret void
3385}
3386
3387; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
3388define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr %p) {
3389; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3390; GFX9-SDAG:       ; %bb.0:
3391; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3392; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3393; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3394; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3395; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3396; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
3397; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3398; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3399; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3400; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3401; GFX9-SDAG-NEXT:    s_endpgm
3402;
3403; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3404; GFX10:       ; %bb.0:
3405; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3406; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3407; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
3408; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3409; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3410; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3411; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3412; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3413; GFX10-NEXT:    flat_store_byte v[0:1], v0
3414; GFX10-NEXT:    s_endpgm
3415;
3416; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3417; GFX11-SDAG:       ; %bb.0:
3418; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3419; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3420; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3421; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, s0
3422; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3423; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3424; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3425; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3426; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3427; GFX11-SDAG-NEXT:    s_endpgm
3428;
3429; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3430; GFX12-SDAG:       ; %bb.0:
3431; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3432; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3433; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3434; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3435; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3436; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3437; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8384513 scope:SCOPE_SYS
3438; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3439; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3440; GFX12-SDAG-NEXT:    s_endpgm
3441;
3442; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3443; GFX9-GISEL:       ; %bb.0:
3444; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3445; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3446; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
3447; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3448; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3449; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3450; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3451; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3452; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3453; GFX9-GISEL-NEXT:    s_endpgm
3454;
3455; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3456; GFX11-GISEL:       ; %bb.0:
3457; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3458; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3459; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
3460; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3461; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3462; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3463; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3464; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3465; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3466; GFX11-GISEL-NEXT:    s_endpgm
3467;
3468; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
3469; GFX12-GISEL:       ; %bb.0:
3470; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3471; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3472; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
3473; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3474; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3475; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3476; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3477; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3478; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3479; GFX12-GISEL-NEXT:    s_endpgm
3480  %gep = getelementptr i8, ptr %p, i64 -9223372036854771713
3481  %load = load volatile i8, ptr %gep, align 1
3482  store i8 %load, ptr undef
3483  ret void
3484}
3485
3486; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
3487define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr %p) {
3488; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3489; GFX9-SDAG:       ; %bb.0:
3490; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3491; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3492; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3493; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3494; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3495; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
3496; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3497; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3498; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3499; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3500; GFX9-SDAG-NEXT:    s_endpgm
3501;
3502; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3503; GFX10:       ; %bb.0:
3504; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3505; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3506; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
3507; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3508; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3509; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3510; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3511; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3512; GFX10-NEXT:    flat_store_byte v[0:1], v0
3513; GFX10-NEXT:    s_endpgm
3514;
3515; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3516; GFX11-SDAG:       ; %bb.0:
3517; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3518; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3519; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3520; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, s0
3521; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3522; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3523; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3524; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3525; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3526; GFX11-SDAG-NEXT:    s_endpgm
3527;
3528; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3529; GFX12-SDAG:       ; %bb.0:
3530; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3531; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3532; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3533; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3534; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3535; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3536; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8384512 scope:SCOPE_SYS
3537; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3538; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3539; GFX12-SDAG-NEXT:    s_endpgm
3540;
3541; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3542; GFX9-GISEL:       ; %bb.0:
3543; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3544; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3545; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
3546; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3547; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3548; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3549; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3550; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3551; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3552; GFX9-GISEL-NEXT:    s_endpgm
3553;
3554; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3555; GFX11-GISEL:       ; %bb.0:
3556; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3557; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3558; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
3559; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3560; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3561; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3562; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3563; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3564; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3565; GFX11-GISEL-NEXT:    s_endpgm
3566;
3567; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
3568; GFX12-GISEL:       ; %bb.0:
3569; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3570; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3571; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
3572; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3573; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3574; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3575; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3576; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3577; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3578; GFX12-GISEL-NEXT:    s_endpgm
3579  %gep = getelementptr i8, ptr %p, i64 -9223372036854771712
3580  %load = load volatile i8, ptr %gep, align 1
3581  store i8 %load, ptr undef
3582  ret void
3583}
3584
3585; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
3586define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr %p) {
3587; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3588; GFX9-SDAG:       ; %bb.0:
3589; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3590; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3591; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3592; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3593; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3594; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
3595; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3596; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3597; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3598; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3599; GFX9-SDAG-NEXT:    s_endpgm
3600;
3601; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3602; GFX10:       ; %bb.0:
3603; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3604; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3605; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
3606; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3607; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3608; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3609; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3610; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3611; GFX10-NEXT:    flat_store_byte v[0:1], v0
3612; GFX10-NEXT:    s_endpgm
3613;
3614; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3615; GFX11-SDAG:       ; %bb.0:
3616; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3617; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3618; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3619; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, s0
3620; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3621; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3622; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3623; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3624; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3625; GFX11-SDAG-NEXT:    s_endpgm
3626;
3627; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3628; GFX12-SDAG:       ; %bb.0:
3629; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3630; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3631; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3632; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3633; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3634; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3635; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8380417 scope:SCOPE_SYS
3636; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3637; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3638; GFX12-SDAG-NEXT:    s_endpgm
3639;
3640; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3641; GFX9-GISEL:       ; %bb.0:
3642; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3643; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3644; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
3645; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3646; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3647; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3648; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3649; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3650; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3651; GFX9-GISEL-NEXT:    s_endpgm
3652;
3653; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3654; GFX11-GISEL:       ; %bb.0:
3655; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3656; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3657; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
3658; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3659; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3660; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3661; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3662; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3663; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3664; GFX11-GISEL-NEXT:    s_endpgm
3665;
3666; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
3667; GFX12-GISEL:       ; %bb.0:
3668; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3669; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3670; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1fff
3671; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3672; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3673; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3674; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3675; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3676; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3677; GFX12-GISEL-NEXT:    s_endpgm
3678  %gep = getelementptr i8, ptr %p, i64 -9223372036854767617
3679  %load = load volatile i8, ptr %gep, align 1
3680  store i8 %load, ptr undef
3681  ret void
3682}
3683
3684; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
3685define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr %p) {
3686; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3687; GFX9-SDAG:       ; %bb.0:
3688; GFX9-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3689; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v1, 1
3690; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3691; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
3692; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s1
3693; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
3694; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
3695; GFX9-SDAG-NEXT:    flat_load_ubyte v0, v[0:1] glc
3696; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3697; GFX9-SDAG-NEXT:    flat_store_byte v[0:1], v0
3698; GFX9-SDAG-NEXT:    s_endpgm
3699;
3700; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3701; GFX10:       ; %bb.0:
3702; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3703; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3704; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
3705; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3706; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3707; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3708; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
3709; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3710; GFX10-NEXT:    flat_store_byte v[0:1], v0
3711; GFX10-NEXT:    s_endpgm
3712;
3713; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3714; GFX11-SDAG:       ; %bb.0:
3715; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3716; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3717; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3718; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, s0
3719; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3720; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3721; GFX11-SDAG-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3722; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3723; GFX11-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3724; GFX11-SDAG-NEXT:    s_endpgm
3725;
3726; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3727; GFX12-SDAG:       ; %bb.0:
3728; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3729; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3730; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3731; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, s0
3732; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3733; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
3734; GFX12-SDAG-NEXT:    flat_load_u8 v0, v[0:1] offset:-8380416 scope:SCOPE_SYS
3735; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3736; GFX12-SDAG-NEXT:    flat_store_b8 v[0:1], v0
3737; GFX12-SDAG-NEXT:    s_endpgm
3738;
3739; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3740; GFX9-GISEL:       ; %bb.0:
3741; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3742; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3743; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3744; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3745; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3746; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3747; GFX9-GISEL-NEXT:    flat_load_ubyte v0, v[0:1] glc
3748; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3749; GFX9-GISEL-NEXT:    flat_store_byte v[0:1], v0
3750; GFX9-GISEL-NEXT:    s_endpgm
3751;
3752; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3753; GFX11-GISEL:       ; %bb.0:
3754; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3755; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3756; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3757; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 0x80000000
3758; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3759; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3760; GFX11-GISEL-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
3761; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3762; GFX11-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3763; GFX11-GISEL-NEXT:    s_endpgm
3764;
3765; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
3766; GFX12-GISEL:       ; %bb.0:
3767; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3768; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3769; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x2000
3770; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3771; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3772; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3773; GFX12-GISEL-NEXT:    flat_load_u8 v0, v[0:1] scope:SCOPE_SYS
3774; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3775; GFX12-GISEL-NEXT:    flat_store_b8 v[0:1], v0
3776; GFX12-GISEL-NEXT:    s_endpgm
3777  %gep = getelementptr i8, ptr %p, i64 -9223372036854767616
3778  %load = load volatile i8, ptr %gep, align 1
3779  store i8 %load, ptr undef
3780  ret void
3781}
3782;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
3783; GFX10-GISEL: {{.*}}
3784; GFX10-SDAG: {{.*}}
3785