xref: /llvm-project/llvm/test/CodeGen/AMDGPU/offset-split-global.ll (revision 89cb0eefcbb6303ba6813238d5ad37b103495d11)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
10
11; Test splitting flat instruction offsets into the low and high bits
12; when the offset doesn't fit in the offset field.
13
14define i8 @global_inst_valu_offset_1(ptr addrspace(1) %p) {
15; GFX9-LABEL: global_inst_valu_offset_1:
16; GFX9:       ; %bb.0:
17; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
19; GFX9-NEXT:    s_waitcnt vmcnt(0)
20; GFX9-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX10-LABEL: global_inst_valu_offset_1:
23; GFX10:       ; %bb.0:
24; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
26; GFX10-NEXT:    s_waitcnt vmcnt(0)
27; GFX10-NEXT:    s_setpc_b64 s[30:31]
28;
29; GFX11-LABEL: global_inst_valu_offset_1:
30; GFX11:       ; %bb.0:
31; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:1
33; GFX11-NEXT:    s_waitcnt vmcnt(0)
34; GFX11-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX12-LABEL: global_inst_valu_offset_1:
37; GFX12:       ; %bb.0:
38; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
39; GFX12-NEXT:    s_wait_expcnt 0x0
40; GFX12-NEXT:    s_wait_samplecnt 0x0
41; GFX12-NEXT:    s_wait_bvhcnt 0x0
42; GFX12-NEXT:    s_wait_kmcnt 0x0
43; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:1
44; GFX12-NEXT:    s_wait_loadcnt 0x0
45; GFX12-NEXT:    s_setpc_b64 s[30:31]
46  %gep = getelementptr i8, ptr addrspace(1) %p, i64 1
47  %load = load i8, ptr addrspace(1) %gep, align 4
48  ret i8 %load
49}
50
51define i8 @global_inst_valu_offset_11bit_max(ptr addrspace(1) %p) {
52; GFX9-LABEL: global_inst_valu_offset_11bit_max:
53; GFX9:       ; %bb.0:
54; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
56; GFX9-NEXT:    s_waitcnt vmcnt(0)
57; GFX9-NEXT:    s_setpc_b64 s[30:31]
58;
59; GFX10-LABEL: global_inst_valu_offset_11bit_max:
60; GFX10:       ; %bb.0:
61; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
63; GFX10-NEXT:    s_waitcnt vmcnt(0)
64; GFX10-NEXT:    s_setpc_b64 s[30:31]
65;
66; GFX11-LABEL: global_inst_valu_offset_11bit_max:
67; GFX11:       ; %bb.0:
68; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
70; GFX11-NEXT:    s_waitcnt vmcnt(0)
71; GFX11-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX12-LABEL: global_inst_valu_offset_11bit_max:
74; GFX12:       ; %bb.0:
75; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
76; GFX12-NEXT:    s_wait_expcnt 0x0
77; GFX12-NEXT:    s_wait_samplecnt 0x0
78; GFX12-NEXT:    s_wait_bvhcnt 0x0
79; GFX12-NEXT:    s_wait_kmcnt 0x0
80; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
81; GFX12-NEXT:    s_wait_loadcnt 0x0
82; GFX12-NEXT:    s_setpc_b64 s[30:31]
83  %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047
84  %load = load i8, ptr addrspace(1) %gep, align 4
85  ret i8 %load
86}
87
88define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) {
89; GFX9-LABEL: global_inst_valu_offset_12bit_max:
90; GFX9:       ; %bb.0:
91; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
93; GFX9-NEXT:    s_waitcnt vmcnt(0)
94; GFX9-NEXT:    s_setpc_b64 s[30:31]
95;
96; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max:
97; GFX10-GISEL:       ; %bb.0:
98; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
100; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
101; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
102; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
103; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX11-LABEL: global_inst_valu_offset_12bit_max:
106; GFX11:       ; %bb.0:
107; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
109; GFX11-NEXT:    s_waitcnt vmcnt(0)
110; GFX11-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX12-LABEL: global_inst_valu_offset_12bit_max:
113; GFX12:       ; %bb.0:
114; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
115; GFX12-NEXT:    s_wait_expcnt 0x0
116; GFX12-NEXT:    s_wait_samplecnt 0x0
117; GFX12-NEXT:    s_wait_bvhcnt 0x0
118; GFX12-NEXT:    s_wait_kmcnt 0x0
119; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
120; GFX12-NEXT:    s_wait_loadcnt 0x0
121; GFX12-NEXT:    s_setpc_b64 s[30:31]
122;
123; GFX10-SDAG-LABEL: global_inst_valu_offset_12bit_max:
124; GFX10-SDAG:       ; %bb.0:
125; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
127; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
128; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
129; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
130; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
131  %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
132  %load = load i8, ptr addrspace(1) %gep, align 4
133  ret i8 %load
134}
135
136define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) {
137; GFX9-GISEL-LABEL: global_inst_valu_offset_13bit_max:
138; GFX9-GISEL:       ; %bb.0:
139; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
141; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
142; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
143; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
144; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
145;
146; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max:
147; GFX10-GISEL:       ; %bb.0:
148; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
150; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
151; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
152; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
153; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
154;
155; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max:
156; GFX11-GISEL:       ; %bb.0:
157; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
159; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
160; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
161; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
162; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
163;
164; GFX12-LABEL: global_inst_valu_offset_13bit_max:
165; GFX12:       ; %bb.0:
166; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
167; GFX12-NEXT:    s_wait_expcnt 0x0
168; GFX12-NEXT:    s_wait_samplecnt 0x0
169; GFX12-NEXT:    s_wait_bvhcnt 0x0
170; GFX12-NEXT:    s_wait_kmcnt 0x0
171; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:8191
172; GFX12-NEXT:    s_wait_loadcnt 0x0
173; GFX12-NEXT:    s_setpc_b64 s[30:31]
174;
175; GFX9-SDAG-LABEL: global_inst_valu_offset_13bit_max:
176; GFX9-SDAG:       ; %bb.0:
177; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
179; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
180; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
181; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
182; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
183;
184; GFX10-SDAG-LABEL: global_inst_valu_offset_13bit_max:
185; GFX10-SDAG:       ; %bb.0:
186; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
188; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
189; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
190; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
191; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max:
194; GFX11-SDAG:       ; %bb.0:
195; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
197; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
198; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
199; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
200; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
201  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
202  %load = load i8, ptr addrspace(1) %gep, align 4
203  ret i8 %load
204}
205
206define i8 @global_inst_valu_offset_24bit_max(ptr addrspace(1) %p) {
207; GFX9-GISEL-LABEL: global_inst_valu_offset_24bit_max:
208; GFX9-GISEL:       ; %bb.0:
209; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
211; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
212; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
213; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
214; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX10-GISEL-LABEL: global_inst_valu_offset_24bit_max:
217; GFX10-GISEL:       ; %bb.0:
218; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
220; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
221; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
222; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
223; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX11-GISEL-LABEL: global_inst_valu_offset_24bit_max:
226; GFX11-GISEL:       ; %bb.0:
227; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
229; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
230; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
231; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
232; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
233;
234; GFX12-LABEL: global_inst_valu_offset_24bit_max:
235; GFX12:       ; %bb.0:
236; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
237; GFX12-NEXT:    s_wait_expcnt 0x0
238; GFX12-NEXT:    s_wait_samplecnt 0x0
239; GFX12-NEXT:    s_wait_bvhcnt 0x0
240; GFX12-NEXT:    s_wait_kmcnt 0x0
241; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:8388607
242; GFX12-NEXT:    s_wait_loadcnt 0x0
243; GFX12-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX9-SDAG-LABEL: global_inst_valu_offset_24bit_max:
246; GFX9-SDAG:       ; %bb.0:
247; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
249; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
250; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
251; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
252; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
253;
254; GFX10-SDAG-LABEL: global_inst_valu_offset_24bit_max:
255; GFX10-SDAG:       ; %bb.0:
256; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
258; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
259; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
260; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
261; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX11-SDAG-LABEL: global_inst_valu_offset_24bit_max:
264; GFX11-SDAG:       ; %bb.0:
265; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
267; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
268; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
269; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
270; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
271  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8388607
272  %load = load i8, ptr addrspace(1) %gep, align 4
273  ret i8 %load
274}
275
276define i8 @global_inst_valu_offset_neg_11bit_max(ptr addrspace(1) %p) {
277; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
278; GFX9:       ; %bb.0:
279; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
281; GFX9-NEXT:    s_waitcnt vmcnt(0)
282; GFX9-NEXT:    s_setpc_b64 s[30:31]
283;
284; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
285; GFX10:       ; %bb.0:
286; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
288; GFX10-NEXT:    s_waitcnt vmcnt(0)
289; GFX10-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max:
292; GFX11:       ; %bb.0:
293; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-2048
295; GFX11-NEXT:    s_waitcnt vmcnt(0)
296; GFX11-NEXT:    s_setpc_b64 s[30:31]
297;
298; GFX12-LABEL: global_inst_valu_offset_neg_11bit_max:
299; GFX12:       ; %bb.0:
300; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
301; GFX12-NEXT:    s_wait_expcnt 0x0
302; GFX12-NEXT:    s_wait_samplecnt 0x0
303; GFX12-NEXT:    s_wait_bvhcnt 0x0
304; GFX12-NEXT:    s_wait_kmcnt 0x0
305; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-2048
306; GFX12-NEXT:    s_wait_loadcnt 0x0
307; GFX12-NEXT:    s_setpc_b64 s[30:31]
308  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048
309  %load = load i8, ptr addrspace(1) %gep, align 4
310  ret i8 %load
311}
312
313define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) {
314; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
315; GFX9:       ; %bb.0:
316; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
318; GFX9-NEXT:    s_waitcnt vmcnt(0)
319; GFX9-NEXT:    s_setpc_b64 s[30:31]
320;
321; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
322; GFX10:       ; %bb.0:
323; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
325; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
326; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
327; GFX10-NEXT:    s_waitcnt vmcnt(0)
328; GFX10-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max:
331; GFX11:       ; %bb.0:
332; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
334; GFX11-NEXT:    s_waitcnt vmcnt(0)
335; GFX11-NEXT:    s_setpc_b64 s[30:31]
336;
337; GFX12-LABEL: global_inst_valu_offset_neg_12bit_max:
338; GFX12:       ; %bb.0:
339; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
340; GFX12-NEXT:    s_wait_expcnt 0x0
341; GFX12-NEXT:    s_wait_samplecnt 0x0
342; GFX12-NEXT:    s_wait_bvhcnt 0x0
343; GFX12-NEXT:    s_wait_kmcnt 0x0
344; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
345; GFX12-NEXT:    s_wait_loadcnt 0x0
346; GFX12-NEXT:    s_setpc_b64 s[30:31]
347  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
348  %load = load i8, ptr addrspace(1) %gep, align 4
349  ret i8 %load
350}
351
352define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) {
353; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
354; GFX9:       ; %bb.0:
355; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
357; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
358; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
359; GFX9-NEXT:    s_waitcnt vmcnt(0)
360; GFX9-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
363; GFX10:       ; %bb.0:
364; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
366; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
367; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
368; GFX10-NEXT:    s_waitcnt vmcnt(0)
369; GFX10-NEXT:    s_setpc_b64 s[30:31]
370;
371; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max:
372; GFX11:       ; %bb.0:
373; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
375; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
376; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
377; GFX11-NEXT:    s_waitcnt vmcnt(0)
378; GFX11-NEXT:    s_setpc_b64 s[30:31]
379;
380; GFX12-LABEL: global_inst_valu_offset_neg_13bit_max:
381; GFX12:       ; %bb.0:
382; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
383; GFX12-NEXT:    s_wait_expcnt 0x0
384; GFX12-NEXT:    s_wait_samplecnt 0x0
385; GFX12-NEXT:    s_wait_bvhcnt 0x0
386; GFX12-NEXT:    s_wait_kmcnt 0x0
387; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-8192
388; GFX12-NEXT:    s_wait_loadcnt 0x0
389; GFX12-NEXT:    s_setpc_b64 s[30:31]
390  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
391  %load = load i8, ptr addrspace(1) %gep, align 4
392  ret i8 %load
393}
394
395define i8 @global_inst_valu_offset_neg_24bit_max(ptr addrspace(1) %p) {
396; GFX9-LABEL: global_inst_valu_offset_neg_24bit_max:
397; GFX9:       ; %bb.0:
398; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff800000, v0
400; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
401; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
402; GFX9-NEXT:    s_waitcnt vmcnt(0)
403; GFX9-NEXT:    s_setpc_b64 s[30:31]
404;
405; GFX10-LABEL: global_inst_valu_offset_neg_24bit_max:
406; GFX10:       ; %bb.0:
407; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
409; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
410; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
411; GFX10-NEXT:    s_waitcnt vmcnt(0)
412; GFX10-NEXT:    s_setpc_b64 s[30:31]
413;
414; GFX11-LABEL: global_inst_valu_offset_neg_24bit_max:
415; GFX11:       ; %bb.0:
416; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
418; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
419; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
420; GFX11-NEXT:    s_waitcnt vmcnt(0)
421; GFX11-NEXT:    s_setpc_b64 s[30:31]
422;
423; GFX12-LABEL: global_inst_valu_offset_neg_24bit_max:
424; GFX12:       ; %bb.0:
425; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
426; GFX12-NEXT:    s_wait_expcnt 0x0
427; GFX12-NEXT:    s_wait_samplecnt 0x0
428; GFX12-NEXT:    s_wait_bvhcnt 0x0
429; GFX12-NEXT:    s_wait_kmcnt 0x0
430; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-8388608
431; GFX12-NEXT:    s_wait_loadcnt 0x0
432; GFX12-NEXT:    s_setpc_b64 s[30:31]
433  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8388608
434  %load = load i8, ptr addrspace(1) %gep, align 4
435  ret i8 %load
436}
437
438define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) {
439; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
440; GFX9:       ; %bb.0:
441; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
443; GFX9-NEXT:    s_waitcnt vmcnt(0)
444; GFX9-NEXT:    s_setpc_b64 s[30:31]
445;
446; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max:
447; GFX10-GISEL:       ; %bb.0:
448; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
450; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
451; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
452; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
453; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
454;
455; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max:
456; GFX11:       ; %bb.0:
457; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
458; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
459; GFX11-NEXT:    s_waitcnt vmcnt(0)
460; GFX11-NEXT:    s_setpc_b64 s[30:31]
461;
462; GFX12-LABEL: global_inst_valu_offset_2x_11bit_max:
463; GFX12:       ; %bb.0:
464; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
465; GFX12-NEXT:    s_wait_expcnt 0x0
466; GFX12-NEXT:    s_wait_samplecnt 0x0
467; GFX12-NEXT:    s_wait_bvhcnt 0x0
468; GFX12-NEXT:    s_wait_kmcnt 0x0
469; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
470; GFX12-NEXT:    s_wait_loadcnt 0x0
471; GFX12-NEXT:    s_setpc_b64 s[30:31]
472;
473; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_11bit_max:
474; GFX10-SDAG:       ; %bb.0:
475; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
477; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
478; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
479; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
480; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
481  %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
482  %load = load i8, ptr addrspace(1) %gep, align 4
483  ret i8 %load
484}
485
486define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) {
487; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
488; GFX9-GISEL:       ; %bb.0:
489; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
491; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
492; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
493; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
494; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
497; GFX10-GISEL:       ; %bb.0:
498; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
500; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
501; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
502; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
503; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
504;
505; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
506; GFX11-GISEL:       ; %bb.0:
507; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
509; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
510; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
511; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
512; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
513;
514; GFX12-LABEL: global_inst_valu_offset_2x_12bit_max:
515; GFX12:       ; %bb.0:
516; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
517; GFX12-NEXT:    s_wait_expcnt 0x0
518; GFX12-NEXT:    s_wait_samplecnt 0x0
519; GFX12-NEXT:    s_wait_bvhcnt 0x0
520; GFX12-NEXT:    s_wait_kmcnt 0x0
521; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:8191
522; GFX12-NEXT:    s_wait_loadcnt 0x0
523; GFX12-NEXT:    s_setpc_b64 s[30:31]
524;
525; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
526; GFX9-SDAG:       ; %bb.0:
527; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
529; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
530; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
531; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
532; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
535; GFX10-SDAG:       ; %bb.0:
536; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
538; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
539; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
540; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
541; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
544; GFX11-SDAG:       ; %bb.0:
545; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
547; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
548; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
549; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
550; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
551  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
552  %load = load i8, ptr addrspace(1) %gep, align 4
553  ret i8 %load
554}
555
556define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) {
557; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
558; GFX9-GISEL:       ; %bb.0:
559; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3fff, v0
561; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
562; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
563; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
564; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
565;
566; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
567; GFX10-GISEL:       ; %bb.0:
568; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3fff, v0
570; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
571; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
572; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
573; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
574;
575; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
576; GFX11-GISEL:       ; %bb.0:
577; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3fff, v0
579; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
580; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
581; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
582; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
583;
584; GFX12-LABEL: global_inst_valu_offset_2x_13bit_max:
585; GFX12:       ; %bb.0:
586; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
587; GFX12-NEXT:    s_wait_expcnt 0x0
588; GFX12-NEXT:    s_wait_samplecnt 0x0
589; GFX12-NEXT:    s_wait_bvhcnt 0x0
590; GFX12-NEXT:    s_wait_kmcnt 0x0
591; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:16383
592; GFX12-NEXT:    s_wait_loadcnt 0x0
593; GFX12-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
596; GFX9-SDAG:       ; %bb.0:
597; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
599; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
600; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
601; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
602; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
603;
604; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
605; GFX10-SDAG:       ; %bb.0:
606; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3800, v0
608; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
609; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
610; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
611; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
612;
613; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
614; GFX11-SDAG:       ; %bb.0:
615; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3000, v0
617; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
618; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
619; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
620; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
621  %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383
622  %load = load i8, ptr addrspace(1) %gep, align 4
623  ret i8 %load
624}
625
626define i8 @global_inst_valu_offset_2x_24bit_max(ptr addrspace(1) %p) {
627; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max:
628; GFX9-GISEL:       ; %bb.0:
629; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffe, v0
631; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
632; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
633; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
634; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
635;
636; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max:
637; GFX10-GISEL:       ; %bb.0:
638; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
640; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
641; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
642; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
643; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
644;
645; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max:
646; GFX11-GISEL:       ; %bb.0:
647; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
649; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
650; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
651; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
652; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
653;
654; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max:
655; GFX12-GISEL:       ; %bb.0:
656; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
657; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
658; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
659; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
660; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
661; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffffe, v0
662; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
663; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
664; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
665; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max:
668; GFX9-SDAG:       ; %bb.0:
669; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff000, v0
671; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
672; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4094
673; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
674; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
675;
676; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max:
677; GFX10-SDAG:       ; %bb.0:
678; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff800, v0
680; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
681; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2046
682; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
683; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max:
686; GFX11-SDAG:       ; %bb.0:
687; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff000, v0
689; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
690; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4094
691; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
692; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
693;
694; GFX12-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max:
695; GFX12-SDAG:       ; %bb.0:
696; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
697; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
698; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
699; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
700; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
701; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
702; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
703; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:8388606
704; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
705; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
706  %gep = getelementptr i8, ptr addrspace(1) %p, i64 16777214
707  %load = load i8, ptr addrspace(1) %gep, align 4
708  ret i8 %load
709}
710
711define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) {
712; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
713; GFX9:       ; %bb.0:
714; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
716; GFX9-NEXT:    s_waitcnt vmcnt(0)
717; GFX9-NEXT:    s_setpc_b64 s[30:31]
718;
719; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
720; GFX10:       ; %bb.0:
721; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
723; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
724; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
725; GFX10-NEXT:    s_waitcnt vmcnt(0)
726; GFX10-NEXT:    s_setpc_b64 s[30:31]
727;
728; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
729; GFX11:       ; %bb.0:
730; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
732; GFX11-NEXT:    s_waitcnt vmcnt(0)
733; GFX11-NEXT:    s_setpc_b64 s[30:31]
734;
735; GFX12-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
736; GFX12:       ; %bb.0:
737; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
738; GFX12-NEXT:    s_wait_expcnt 0x0
739; GFX12-NEXT:    s_wait_samplecnt 0x0
740; GFX12-NEXT:    s_wait_bvhcnt 0x0
741; GFX12-NEXT:    s_wait_kmcnt 0x0
742; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
743; GFX12-NEXT:    s_wait_loadcnt 0x0
744; GFX12-NEXT:    s_setpc_b64 s[30:31]
745  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
746  %load = load i8, ptr addrspace(1) %gep, align 4
747  ret i8 %load
748}
749
750define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) {
751; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
752; GFX9:       ; %bb.0:
753; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
755; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
756; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
757; GFX9-NEXT:    s_waitcnt vmcnt(0)
758; GFX9-NEXT:    s_setpc_b64 s[30:31]
759;
760; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
761; GFX10:       ; %bb.0:
762; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
764; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
765; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
766; GFX10-NEXT:    s_waitcnt vmcnt(0)
767; GFX10-NEXT:    s_setpc_b64 s[30:31]
768;
769; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
770; GFX11:       ; %bb.0:
771; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
773; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
774; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
775; GFX11-NEXT:    s_waitcnt vmcnt(0)
776; GFX11-NEXT:    s_setpc_b64 s[30:31]
777;
778; GFX12-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
779; GFX12:       ; %bb.0:
780; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
781; GFX12-NEXT:    s_wait_expcnt 0x0
782; GFX12-NEXT:    s_wait_samplecnt 0x0
783; GFX12-NEXT:    s_wait_bvhcnt 0x0
784; GFX12-NEXT:    s_wait_kmcnt 0x0
785; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-8192
786; GFX12-NEXT:    s_wait_loadcnt 0x0
787; GFX12-NEXT:    s_setpc_b64 s[30:31]
788  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
789  %load = load i8, ptr addrspace(1) %gep, align 4
790  ret i8 %load
791}
792
793define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) {
794; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
795; GFX9:       ; %bb.0:
796; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
798; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
799; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
800; GFX9-NEXT:    s_waitcnt vmcnt(0)
801; GFX9-NEXT:    s_setpc_b64 s[30:31]
802;
803; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
804; GFX10:       ; %bb.0:
805; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
806; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
807; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
808; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
809; GFX10-NEXT:    s_waitcnt vmcnt(0)
810; GFX10-NEXT:    s_setpc_b64 s[30:31]
811;
812; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
813; GFX11:       ; %bb.0:
814; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
816; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
817; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
818; GFX11-NEXT:    s_waitcnt vmcnt(0)
819; GFX11-NEXT:    s_setpc_b64 s[30:31]
820;
821; GFX12-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
822; GFX12:       ; %bb.0:
823; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
824; GFX12-NEXT:    s_wait_expcnt 0x0
825; GFX12-NEXT:    s_wait_samplecnt 0x0
826; GFX12-NEXT:    s_wait_bvhcnt 0x0
827; GFX12-NEXT:    s_wait_kmcnt 0x0
828; GFX12-NEXT:    global_load_u8 v0, v[0:1], off offset:-16384
829; GFX12-NEXT:    s_wait_loadcnt 0x0
830; GFX12-NEXT:    s_setpc_b64 s[30:31]
831  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384
832  %load = load i8, ptr addrspace(1) %gep, align 4
833  ret i8 %load
834}
835
836define i8 @global_inst_valu_offset_2x_neg_24bit_max(ptr addrspace(1) %p) {
837; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
838; GFX9-GISEL:       ; %bb.0:
839; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff000001, v0
841; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
842; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
843; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
844; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
845;
846; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
847; GFX10-GISEL:       ; %bb.0:
848; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
850; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
851; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
852; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
853; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
854;
855; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
856; GFX11-GISEL:       ; %bb.0:
857; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
858; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
859; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
860; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
861; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
862; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
863;
864; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
865; GFX12-GISEL:       ; %bb.0:
866; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
867; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
868; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
869; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
870; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
871; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000001, v0
872; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
873; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
874; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
875; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
876;
877; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
878; GFX9-SDAG:       ; %bb.0:
879; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xff001000, v0
881; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
882; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4095
883; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
884; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
885;
886; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
887; GFX10-SDAG:       ; %bb.0:
888; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff000800, v0
890; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
891; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2047
892; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
893; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
894;
895; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
896; GFX11-SDAG:       ; %bb.0:
897; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
898; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff001000, v0
899; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
900; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-4095
901; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
902; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
903;
904; GFX12-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max:
905; GFX12-SDAG:       ; %bb.0:
906; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
907; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
908; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
909; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
910; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
911; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0xff800000, v0
912; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
913; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8388607
914; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
915; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
916  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16777215
917  %load = load i8, ptr addrspace(1) %gep, align 4
918  ret i8 %load
919}
920
921
922; Fill 11-bit low-bits (1ull << 33) | 2047
923define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
924; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
925; GFX9-GISEL:       ; %bb.0:
926; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
928; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
929; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
930; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
931; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
932;
933; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
934; GFX10-GISEL:       ; %bb.0:
935; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
937; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
938; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
939; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
940; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
941;
942; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
943; GFX11-GISEL:       ; %bb.0:
944; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
946; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
947; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
948; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
949; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
950;
951; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
952; GFX12-GISEL:       ; %bb.0:
953; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
954; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
955; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
956; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
957; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
958; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
959; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
960; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
961; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
962; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
963;
964; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
965; GFX9-SDAG:       ; %bb.0:
966; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
967; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
968; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
969; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
970; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
971; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
972;
973; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
974; GFX10-SDAG:       ; %bb.0:
975; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
977; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
978; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
979; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
980; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
981;
982; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
983; GFX11-SDAG:       ; %bb.0:
984; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
986; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
987; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
988; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
989; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
990;
991; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
992; GFX12-SDAG:       ; %bb.0:
993; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
994; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
995; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
996; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
997; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
998; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
999; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1000; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
1001; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1002; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1003  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639
1004  %load = load i8, ptr addrspace(1) %gep, align 4
1005  ret i8 %load
1006}
1007
1008; Fill 11-bit low-bits (1ull << 33) | 2048
1009define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
1010; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1011; GFX9-GISEL:       ; %bb.0:
1012; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1014; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1015; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1016; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1017; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1018;
1019; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1020; GFX10:       ; %bb.0:
1021; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1023; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1024; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1025; GFX10-NEXT:    s_waitcnt vmcnt(0)
1026; GFX10-NEXT:    s_setpc_b64 s[30:31]
1027;
1028; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1029; GFX11-GISEL:       ; %bb.0:
1030; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1032; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1033; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1034; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1035; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1036;
1037; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1038; GFX12-GISEL:       ; %bb.0:
1039; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1040; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1041; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1042; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1043; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1044; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1045; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1046; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1047; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1048; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1049;
1050; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1051; GFX9-SDAG:       ; %bb.0:
1052; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
1054; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1055; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
1056; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1057; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1058;
1059; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1060; GFX11-SDAG:       ; %bb.0:
1061; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1063; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1064; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2048
1065; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1066; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1067;
1068; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
1069; GFX12-SDAG:       ; %bb.0:
1070; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1071; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1072; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1073; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1074; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1075; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1076; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1077; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2048
1078; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1079; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1080  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640
1081  %load = load i8, ptr addrspace(1) %gep, align 4
1082  ret i8 %load
1083}
1084
1085; Fill 12-bit low-bits (1ull << 33) | 4095
1086define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
1087; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1088; GFX9-GISEL:       ; %bb.0:
1089; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1091; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1092; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1093; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1094; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1095;
1096; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1097; GFX10-GISEL:       ; %bb.0:
1098; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1100; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1101; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1102; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
1103; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1104;
1105; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1106; GFX11-GISEL:       ; %bb.0:
1107; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1109; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1110; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1111; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1112; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1113;
1114; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1115; GFX12-GISEL:       ; %bb.0:
1116; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1117; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1118; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1119; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1120; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1121; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1122; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1123; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1124; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1125; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1126;
1127; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1128; GFX9-SDAG:       ; %bb.0:
1129; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
1131; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1132; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1133; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1134; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1135;
1136; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1137; GFX10-SDAG:       ; %bb.0:
1138; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1140; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1141; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1142; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
1143; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1144;
1145; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1146; GFX11-SDAG:       ; %bb.0:
1147; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1148; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1149; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1150; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
1151; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1152; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1153;
1154; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
1155; GFX12-SDAG:       ; %bb.0:
1156; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1157; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1158; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1159; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1160; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1161; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1162; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1163; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
1164; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1165; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1166  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687
1167  %load = load i8, ptr addrspace(1) %gep, align 4
1168  ret i8 %load
1169}
1170
1171; Fill 12-bit low-bits (1ull << 33) | 4096
1172define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
1173; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
1174; GFX9:       ; %bb.0:
1175; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1177; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1178; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1179; GFX9-NEXT:    s_waitcnt vmcnt(0)
1180; GFX9-NEXT:    s_setpc_b64 s[30:31]
1181;
1182; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
1183; GFX10:       ; %bb.0:
1184; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1186; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1187; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1188; GFX10-NEXT:    s_waitcnt vmcnt(0)
1189; GFX10-NEXT:    s_setpc_b64 s[30:31]
1190;
1191; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1:
1192; GFX11:       ; %bb.0:
1193; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1195; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1196; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1197; GFX11-NEXT:    s_waitcnt vmcnt(0)
1198; GFX11-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
1201; GFX12-GISEL:       ; %bb.0:
1202; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1203; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1204; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1205; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1206; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1207; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1208; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1209; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1210; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1211; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1212;
1213; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
1214; GFX12-SDAG:       ; %bb.0:
1215; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1216; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1217; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1218; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1219; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1220; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1221; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1222; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4096
1223; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1224; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1225  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688
1226  %load = load i8, ptr addrspace(1) %gep, align 4
1227  ret i8 %load
1228}
1229
1230; Fill 13-bit low-bits (1ull << 33) | 8191
1231define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
1232; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1233; GFX9-GISEL:       ; %bb.0:
1234; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1236; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1237; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1238; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1239; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1240;
1241; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1242; GFX10-GISEL:       ; %bb.0:
1243; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1244; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1245; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1246; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1247; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
1248; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1249;
1250; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1251; GFX11-GISEL:       ; %bb.0:
1252; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1253; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1254; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1255; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1256; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1257; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1258;
1259; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1260; GFX12-GISEL:       ; %bb.0:
1261; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1262; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1263; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1264; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1265; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1266; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1267; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1268; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1269; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1270; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1273; GFX9-SDAG:       ; %bb.0:
1274; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1276; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1277; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1278; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1279; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1280;
1281; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1282; GFX10-SDAG:       ; %bb.0:
1283; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
1285; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1286; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1287; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
1288; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1289;
1290; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1291; GFX11-SDAG:       ; %bb.0:
1292; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1294; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1295; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
1296; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1297; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1298;
1299; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
1300; GFX12-SDAG:       ; %bb.0:
1301; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1302; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1303; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1304; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1305; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1306; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1307; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1308; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:8191
1309; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1310; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1311  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783
1312  %load = load i8, ptr addrspace(1) %gep, align 4
1313  ret i8 %load
1314}
1315
1316; Fill 13-bit low-bits (1ull << 33) | 8192
1317define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
1318; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
1319; GFX9:       ; %bb.0:
1320; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1322; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1323; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1324; GFX9-NEXT:    s_waitcnt vmcnt(0)
1325; GFX9-NEXT:    s_setpc_b64 s[30:31]
1326;
1327; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
1328; GFX10:       ; %bb.0:
1329; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1331; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1332; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1333; GFX10-NEXT:    s_waitcnt vmcnt(0)
1334; GFX10-NEXT:    s_setpc_b64 s[30:31]
1335;
1336; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1:
1337; GFX11:       ; %bb.0:
1338; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1340; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1341; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1342; GFX11-NEXT:    s_waitcnt vmcnt(0)
1343; GFX11-NEXT:    s_setpc_b64 s[30:31]
1344;
1345; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
1346; GFX12-GISEL:       ; %bb.0:
1347; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1348; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1349; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1350; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1351; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1352; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1353; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1354; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1355; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1356; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
1359; GFX12-SDAG:       ; %bb.0:
1360; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1361; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1362; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1363; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1364; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1365; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
1366; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
1367; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:8192
1368; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1369; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1370  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784
1371  %load = load i8, ptr addrspace(1) %gep, align 4
1372  ret i8 %load
1373}
1374
1375; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1376define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) {
1377; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1378; GFX9-GISEL:       ; %bb.0:
1379; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1381; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
1382; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1383; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1384; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1385; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1386;
1387; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1388; GFX10-GISEL:       ; %bb.0:
1389; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1390; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1391; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1392; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1393; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
1394; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1395;
1396; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1397; GFX11-GISEL:       ; %bb.0:
1398; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1400; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1401; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1402; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1403; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1404;
1405; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1406; GFX12-GISEL:       ; %bb.0:
1407; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1408; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1409; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1410; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1411; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1412; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
1413; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1414; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1415; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1416; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1417;
1418; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1419; GFX9-SDAG:       ; %bb.0:
1420; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1422; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1423; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1424; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
1425; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1426; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1427;
1428; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1429; GFX10-SDAG:       ; %bb.0:
1430; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1432; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1433; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1434; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
1435; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1436;
1437; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1438; GFX11-SDAG:       ; %bb.0:
1439; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1440; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1441; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1442; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-2049
1443; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1444; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1445;
1446; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
1447; GFX12-SDAG:       ; %bb.0:
1448; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1449; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1450; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1451; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1452; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1453; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1454; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1455; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8386561
1456; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1457; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1458  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761
1459  %load = load i8, ptr addrspace(1) %gep, align 4
1460  ret i8 %load
1461}
1462
1463; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1464define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) {
1465; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1466; GFX9-GISEL:       ; %bb.0:
1467; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1469; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1470; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1471; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1472; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1473; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1474;
1475; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1476; GFX10:       ; %bb.0:
1477; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1478; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1479; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1480; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1481; GFX10-NEXT:    s_waitcnt vmcnt(0)
1482; GFX10-NEXT:    s_setpc_b64 s[30:31]
1483;
1484; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1485; GFX11-GISEL:       ; %bb.0:
1486; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1488; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1489; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1490; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1491; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1492;
1493; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1494; GFX12-GISEL:       ; %bb.0:
1495; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1496; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1497; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1498; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1499; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1500; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
1501; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1502; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1503; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1504; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1505;
1506; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1507; GFX9-SDAG:       ; %bb.0:
1508; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1510; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1511; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1512; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
1513; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1514; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1515;
1516; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1517; GFX11-SDAG:       ; %bb.0:
1518; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1520; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1521; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-2048
1522; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1523; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1524;
1525; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1526; GFX12-SDAG:       ; %bb.0:
1527; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1528; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1529; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1530; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1531; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1532; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1533; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1534; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8386560
1535; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1536; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1537  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760
1538  %load = load i8, ptr addrspace(1) %gep, align 4
1539  ret i8 %load
1540}
1541
1542; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1543define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) {
1544; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1545; GFX9-GISEL:       ; %bb.0:
1546; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1548; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1549; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1550; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1551; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1552; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1553;
1554; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1555; GFX10-GISEL:       ; %bb.0:
1556; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1558; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1559; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1560; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
1561; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1562;
1563; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1564; GFX11-GISEL:       ; %bb.0:
1565; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1567; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1568; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1569; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1570; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1571;
1572; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1573; GFX12-GISEL:       ; %bb.0:
1574; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1575; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1576; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1577; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1578; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1579; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
1580; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1581; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1582; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1583; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1584;
1585; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1586; GFX9-SDAG:       ; %bb.0:
1587; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1588; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1589; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1590; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1591; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1592; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1593; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1594;
1595; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1596; GFX10-SDAG:       ; %bb.0:
1597; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1599; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1600; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1601; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
1602; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1603;
1604; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1605; GFX11-SDAG:       ; %bb.0:
1606; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1608; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1609; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-1
1610; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1611; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1612;
1613; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1614; GFX12-SDAG:       ; %bb.0:
1615; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1616; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1617; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1618; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1619; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1620; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1621; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1622; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8384513
1623; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1624; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1625  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713
1626  %load = load i8, ptr addrspace(1) %gep, align 4
1627  ret i8 %load
1628}
1629
1630; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1631define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) {
1632; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1633; GFX9-GISEL:       ; %bb.0:
1634; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1635; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1636; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1637; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1638; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1639; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1640; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1641;
1642; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1643; GFX10:       ; %bb.0:
1644; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1646; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1647; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1648; GFX10-NEXT:    s_waitcnt vmcnt(0)
1649; GFX10-NEXT:    s_setpc_b64 s[30:31]
1650;
1651; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1652; GFX11:       ; %bb.0:
1653; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1654; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1655; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1656; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1657; GFX11-NEXT:    s_waitcnt vmcnt(0)
1658; GFX11-NEXT:    s_setpc_b64 s[30:31]
1659;
1660; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1661; GFX12-GISEL:       ; %bb.0:
1662; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1663; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1664; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1665; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1666; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1667; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
1668; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1669; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1670; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1671; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1672;
1673; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1674; GFX9-SDAG:       ; %bb.0:
1675; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1677; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1678; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1679; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off
1680; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1681; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1682;
1683; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1684; GFX12-SDAG:       ; %bb.0:
1685; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1686; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1687; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1688; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1689; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1690; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1691; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1692; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8384512
1693; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1694; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1695  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712
1696  %load = load i8, ptr addrspace(1) %gep, align 4
1697  ret i8 %load
1698}
1699
1700; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1701define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) {
1702; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1703; GFX9-GISEL:       ; %bb.0:
1704; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1706; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1707; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1708; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1709; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1710; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1711;
1712; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1713; GFX10-GISEL:       ; %bb.0:
1714; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1716; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1717; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1718; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
1719; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1720;
1721; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1722; GFX11-GISEL:       ; %bb.0:
1723; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1725; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1726; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1727; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1728; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1729;
1730; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1731; GFX12-GISEL:       ; %bb.0:
1732; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1733; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1734; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1735; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1736; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1737; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1738; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1739; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1740; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1741; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1742;
1743; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1744; GFX9-SDAG:       ; %bb.0:
1745; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1746; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1747; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1748; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1749; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1750; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1751; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1752;
1753; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1754; GFX10-SDAG:       ; %bb.0:
1755; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756; GFX10-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1757; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1758; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1759; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
1760; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1761;
1762; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1763; GFX11-SDAG:       ; %bb.0:
1764; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1765; GFX11-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1766; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1767; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-1
1768; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1769; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1770;
1771; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1772; GFX12-SDAG:       ; %bb.0:
1773; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1774; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1775; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1776; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1777; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1778; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1779; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1780; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8380417
1781; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1782; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1783  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617
1784  %load = load i8, ptr addrspace(1) %gep, align 4
1785  ret i8 %load
1786}
1787
1788; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1789define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) {
1790; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1791; GFX9-GISEL:       ; %bb.0:
1792; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v2, 1
1794; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1795; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1796; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
1797; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1798; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1799;
1800; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1801; GFX10:       ; %bb.0:
1802; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1803; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1804; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1805; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1806; GFX10-NEXT:    s_waitcnt vmcnt(0)
1807; GFX10-NEXT:    s_setpc_b64 s[30:31]
1808;
1809; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1810; GFX11:       ; %bb.0:
1811; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1813; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1814; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1815; GFX11-NEXT:    s_waitcnt vmcnt(0)
1816; GFX11-NEXT:    s_setpc_b64 s[30:31]
1817;
1818; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1819; GFX12-GISEL:       ; %bb.0:
1820; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1821; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
1822; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
1823; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
1824; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1825; GFX12-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
1826; GFX12-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1827; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off
1828; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
1829; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
1830;
1831; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1832; GFX9-SDAG:       ; %bb.0:
1833; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1834; GFX9-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1835; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v2, 1
1836; GFX9-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1837; GFX9-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off
1838; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1839; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1840;
1841; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1842; GFX12-SDAG:       ; %bb.0:
1843; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1844; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
1845; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
1846; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
1847; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1848; GFX12-SDAG-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800000, v0
1849; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1850; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:-8380416
1851; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
1852; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
1853  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616
1854  %load = load i8, ptr addrspace(1) %gep, align 4
1855  ret i8 %load
1856}
1857
1858define amdgpu_kernel void @global_inst_salu_offset_1(ptr addrspace(1) %p) {
1859; GFX9-LABEL: global_inst_salu_offset_1:
1860; GFX9:       ; %bb.0:
1861; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1862; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1863; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1864; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1 glc
1865; GFX9-NEXT:    s_waitcnt vmcnt(0)
1866; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1867; GFX9-NEXT:    s_endpgm
1868;
1869; GFX10-LABEL: global_inst_salu_offset_1:
1870; GFX10:       ; %bb.0:
1871; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1872; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1873; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1874; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc
1875; GFX10-NEXT:    s_waitcnt vmcnt(0)
1876; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1877; GFX10-NEXT:    s_endpgm
1878;
1879; GFX11-LABEL: global_inst_salu_offset_1:
1880; GFX11:       ; %bb.0:
1881; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1882; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1883; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1884; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:1 glc dlc
1885; GFX11-NEXT:    s_waitcnt vmcnt(0)
1886; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1887; GFX11-NEXT:    s_endpgm
1888;
1889; GFX12-LABEL: global_inst_salu_offset_1:
1890; GFX12:       ; %bb.0:
1891; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1892; GFX12-NEXT:    v_mov_b32_e32 v0, 0
1893; GFX12-NEXT:    s_wait_kmcnt 0x0
1894; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:1 scope:SCOPE_SYS
1895; GFX12-NEXT:    s_wait_loadcnt 0x0
1896; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
1897; GFX12-NEXT:    s_endpgm
1898  %gep = getelementptr i8, ptr addrspace(1) %p, i64 1
1899  %load = load volatile i8, ptr addrspace(1) %gep, align 1
1900  store i8 %load, ptr addrspace(1) undef
1901  ret void
1902}
1903
1904define amdgpu_kernel void @global_inst_salu_offset_11bit_max(ptr addrspace(1) %p) {
1905; GFX9-LABEL: global_inst_salu_offset_11bit_max:
1906; GFX9:       ; %bb.0:
1907; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1908; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1909; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1910; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc
1911; GFX9-NEXT:    s_waitcnt vmcnt(0)
1912; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1913; GFX9-NEXT:    s_endpgm
1914;
1915; GFX10-LABEL: global_inst_salu_offset_11bit_max:
1916; GFX10:       ; %bb.0:
1917; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1918; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1919; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1920; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1921; GFX10-NEXT:    s_waitcnt vmcnt(0)
1922; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1923; GFX10-NEXT:    s_endpgm
1924;
1925; GFX11-LABEL: global_inst_salu_offset_11bit_max:
1926; GFX11:       ; %bb.0:
1927; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1928; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1929; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1930; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc
1931; GFX11-NEXT:    s_waitcnt vmcnt(0)
1932; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1933; GFX11-NEXT:    s_endpgm
1934;
1935; GFX12-LABEL: global_inst_salu_offset_11bit_max:
1936; GFX12:       ; %bb.0:
1937; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1938; GFX12-NEXT:    v_mov_b32_e32 v0, 0
1939; GFX12-NEXT:    s_wait_kmcnt 0x0
1940; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:2047 scope:SCOPE_SYS
1941; GFX12-NEXT:    s_wait_loadcnt 0x0
1942; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
1943; GFX12-NEXT:    s_endpgm
1944  %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047
1945  %load = load volatile i8, ptr addrspace(1) %gep, align 1
1946  store i8 %load, ptr addrspace(1) undef
1947  ret void
1948}
1949
1950define amdgpu_kernel void @global_inst_salu_offset_12bit_max(ptr addrspace(1) %p) {
1951; GFX9-LABEL: global_inst_salu_offset_12bit_max:
1952; GFX9:       ; %bb.0:
1953; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1954; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1955; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1956; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1957; GFX9-NEXT:    s_waitcnt vmcnt(0)
1958; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1959; GFX9-NEXT:    s_endpgm
1960;
1961; GFX10-LABEL: global_inst_salu_offset_12bit_max:
1962; GFX10:       ; %bb.0:
1963; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1964; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
1965; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1966; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1967; GFX10-NEXT:    s_waitcnt vmcnt(0)
1968; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1969; GFX10-NEXT:    s_endpgm
1970;
1971; GFX11-LABEL: global_inst_salu_offset_12bit_max:
1972; GFX11:       ; %bb.0:
1973; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1974; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1975; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1976; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1977; GFX11-NEXT:    s_waitcnt vmcnt(0)
1978; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1979; GFX11-NEXT:    s_endpgm
1980;
1981; GFX12-LABEL: global_inst_salu_offset_12bit_max:
1982; GFX12:       ; %bb.0:
1983; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1984; GFX12-NEXT:    v_mov_b32_e32 v0, 0
1985; GFX12-NEXT:    s_wait_kmcnt 0x0
1986; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 scope:SCOPE_SYS
1987; GFX12-NEXT:    s_wait_loadcnt 0x0
1988; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
1989; GFX12-NEXT:    s_endpgm
1990  %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
1991  %load = load volatile i8, ptr addrspace(1) %gep, align 1
1992  store i8 %load, ptr addrspace(1) undef
1993  ret void
1994}
1995
1996define amdgpu_kernel void @global_inst_salu_offset_13bit_max(ptr addrspace(1) %p) {
1997; GFX9-LABEL: global_inst_salu_offset_13bit_max:
1998; GFX9:       ; %bb.0:
1999; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2000; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
2001; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2002; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
2003; GFX9-NEXT:    s_waitcnt vmcnt(0)
2004; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2005; GFX9-NEXT:    s_endpgm
2006;
2007; GFX10-LABEL: global_inst_salu_offset_13bit_max:
2008; GFX10:       ; %bb.0:
2009; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2010; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
2011; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2012; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
2013; GFX10-NEXT:    s_waitcnt vmcnt(0)
2014; GFX10-NEXT:    global_store_byte v[0:1], v0, off
2015; GFX10-NEXT:    s_endpgm
2016;
2017; GFX11-LABEL: global_inst_salu_offset_13bit_max:
2018; GFX11:       ; %bb.0:
2019; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2020; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
2021; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2022; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
2023; GFX11-NEXT:    s_waitcnt vmcnt(0)
2024; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2025; GFX11-NEXT:    s_endpgm
2026;
2027; GFX12-LABEL: global_inst_salu_offset_13bit_max:
2028; GFX12:       ; %bb.0:
2029; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2030; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2031; GFX12-NEXT:    s_wait_kmcnt 0x0
2032; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:8191 scope:SCOPE_SYS
2033; GFX12-NEXT:    s_wait_loadcnt 0x0
2034; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2035; GFX12-NEXT:    s_endpgm
2036  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
2037  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2038  store i8 %load, ptr addrspace(1) undef
2039  ret void
2040}
2041
2042define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(ptr addrspace(1) %p) {
2043; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
2044; GFX9:       ; %bb.0:
2045; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2046; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2047; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2048; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048 glc
2049; GFX9-NEXT:    s_waitcnt vmcnt(0)
2050; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2051; GFX9-NEXT:    s_endpgm
2052;
2053; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
2054; GFX10:       ; %bb.0:
2055; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2056; GFX10-NEXT:    v_mov_b32_e32 v0, 0
2057; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2058; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc
2059; GFX10-NEXT:    s_waitcnt vmcnt(0)
2060; GFX10-NEXT:    global_store_byte v[0:1], v0, off
2061; GFX10-NEXT:    s_endpgm
2062;
2063; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max:
2064; GFX11:       ; %bb.0:
2065; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2066; GFX11-NEXT:    v_mov_b32_e32 v0, 0
2067; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2068; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc
2069; GFX11-NEXT:    s_waitcnt vmcnt(0)
2070; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2071; GFX11-NEXT:    s_endpgm
2072;
2073; GFX12-LABEL: global_inst_salu_offset_neg_11bit_max:
2074; GFX12:       ; %bb.0:
2075; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2076; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2077; GFX12-NEXT:    s_wait_kmcnt 0x0
2078; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-2048 scope:SCOPE_SYS
2079; GFX12-NEXT:    s_wait_loadcnt 0x0
2080; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2081; GFX12-NEXT:    s_endpgm
2082  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048
2083  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2084  store i8 %load, ptr addrspace(1) undef
2085  ret void
2086}
2087
2088define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(ptr addrspace(1) %p) {
2089; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
2090; GFX9:       ; %bb.0:
2091; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2092; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2093; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2094; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
2095; GFX9-NEXT:    s_waitcnt vmcnt(0)
2096; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2097; GFX9-NEXT:    s_endpgm
2098;
2099; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_12bit_max:
2100; GFX10-GISEL:       ; %bb.0:
2101; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2102; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2103; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2104; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2105; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2106; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2107; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2108; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2109; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2110; GFX10-GISEL-NEXT:    s_endpgm
2111;
2112; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max:
2113; GFX11:       ; %bb.0:
2114; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2115; GFX11-NEXT:    v_mov_b32_e32 v0, 0
2116; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2117; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
2118; GFX11-NEXT:    s_waitcnt vmcnt(0)
2119; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2120; GFX11-NEXT:    s_endpgm
2121;
2122; GFX12-LABEL: global_inst_salu_offset_neg_12bit_max:
2123; GFX12:       ; %bb.0:
2124; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2125; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2126; GFX12-NEXT:    s_wait_kmcnt 0x0
2127; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 scope:SCOPE_SYS
2128; GFX12-NEXT:    s_wait_loadcnt 0x0
2129; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2130; GFX12-NEXT:    s_endpgm
2131;
2132; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_12bit_max:
2133; GFX10-SDAG:       ; %bb.0:
2134; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2135; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2136; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
2137; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
2138; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2139; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2140; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2141; GFX10-SDAG-NEXT:    s_endpgm
2142  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
2143  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2144  store i8 %load, ptr addrspace(1) undef
2145  ret void
2146}
2147
2148define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(ptr addrspace(1) %p) {
2149; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
2150; GFX9:       ; %bb.0:
2151; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2152; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2153; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2154; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffe000
2155; GFX9-NEXT:    s_addc_u32 s1, s1, -1
2156; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2157; GFX9-NEXT:    s_waitcnt vmcnt(0)
2158; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2159; GFX9-NEXT:    s_endpgm
2160;
2161; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max:
2162; GFX10-GISEL:       ; %bb.0:
2163; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2164; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2165; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2166; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2167; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2168; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2169; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2170; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2171; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2172; GFX10-GISEL-NEXT:    s_endpgm
2173;
2174; GFX11-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max:
2175; GFX11-GISEL:       ; %bb.0:
2176; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2177; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2178; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2179; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2180; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2181; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2182; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2183; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2184; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2185; GFX11-GISEL-NEXT:    s_endpgm
2186;
2187; GFX12-LABEL: global_inst_salu_offset_neg_13bit_max:
2188; GFX12:       ; %bb.0:
2189; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2190; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2191; GFX12-NEXT:    s_wait_kmcnt 0x0
2192; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-8192 scope:SCOPE_SYS
2193; GFX12-NEXT:    s_wait_loadcnt 0x0
2194; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2195; GFX12-NEXT:    s_endpgm
2196;
2197; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max:
2198; GFX10-SDAG:       ; %bb.0:
2199; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2200; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2201; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2202; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
2203; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2204; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2205; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2206; GFX10-SDAG-NEXT:    s_endpgm
2207;
2208; GFX11-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max:
2209; GFX11-SDAG:       ; %bb.0:
2210; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2211; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2212; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2213; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2214; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2215; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2216; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2217; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2218; GFX11-SDAG-NEXT:    s_endpgm
2219  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
2220  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2221  store i8 %load, ptr addrspace(1) undef
2222  ret void
2223}
2224
2225define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(ptr addrspace(1) %p) {
2226; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
2227; GFX9:       ; %bb.0:
2228; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2229; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2230; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2231; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
2232; GFX9-NEXT:    s_waitcnt vmcnt(0)
2233; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2234; GFX9-NEXT:    s_endpgm
2235;
2236; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
2237; GFX10:       ; %bb.0:
2238; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2239; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
2240; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2241; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
2242; GFX10-NEXT:    s_waitcnt vmcnt(0)
2243; GFX10-NEXT:    global_store_byte v[0:1], v0, off
2244; GFX10-NEXT:    s_endpgm
2245;
2246; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max:
2247; GFX11:       ; %bb.0:
2248; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2249; GFX11-NEXT:    v_mov_b32_e32 v0, 0
2250; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2251; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
2252; GFX11-NEXT:    s_waitcnt vmcnt(0)
2253; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2254; GFX11-NEXT:    s_endpgm
2255;
2256; GFX12-LABEL: global_inst_salu_offset_2x_11bit_max:
2257; GFX12:       ; %bb.0:
2258; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2259; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2260; GFX12-NEXT:    s_wait_kmcnt 0x0
2261; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 scope:SCOPE_SYS
2262; GFX12-NEXT:    s_wait_loadcnt 0x0
2263; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2264; GFX12-NEXT:    s_endpgm
2265  %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
2266  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2267  store i8 %load, ptr addrspace(1) undef
2268  ret void
2269}
2270
2271define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(ptr addrspace(1) %p) {
2272; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
2273; GFX9:       ; %bb.0:
2274; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2275; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
2276; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2277; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
2278; GFX9-NEXT:    s_waitcnt vmcnt(0)
2279; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2280; GFX9-NEXT:    s_endpgm
2281;
2282; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
2283; GFX10:       ; %bb.0:
2284; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2285; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
2286; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2287; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
2288; GFX10-NEXT:    s_waitcnt vmcnt(0)
2289; GFX10-NEXT:    global_store_byte v[0:1], v0, off
2290; GFX10-NEXT:    s_endpgm
2291;
2292; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max:
2293; GFX11:       ; %bb.0:
2294; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2295; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
2296; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2297; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
2298; GFX11-NEXT:    s_waitcnt vmcnt(0)
2299; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2300; GFX11-NEXT:    s_endpgm
2301;
2302; GFX12-LABEL: global_inst_salu_offset_2x_12bit_max:
2303; GFX12:       ; %bb.0:
2304; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2305; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2306; GFX12-NEXT:    s_wait_kmcnt 0x0
2307; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:8191 scope:SCOPE_SYS
2308; GFX12-NEXT:    s_wait_loadcnt 0x0
2309; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2310; GFX12-NEXT:    s_endpgm
2311  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
2312  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2313  store i8 %load, ptr addrspace(1) undef
2314  ret void
2315}
2316
2317define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(ptr addrspace(1) %p) {
2318; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
2319; GFX9:       ; %bb.0:
2320; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2321; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3000
2322; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2323; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
2324; GFX9-NEXT:    s_waitcnt vmcnt(0)
2325; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2326; GFX9-NEXT:    s_endpgm
2327;
2328; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
2329; GFX10:       ; %bb.0:
2330; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2331; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800
2332; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2333; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
2334; GFX10-NEXT:    s_waitcnt vmcnt(0)
2335; GFX10-NEXT:    global_store_byte v[0:1], v0, off
2336; GFX10-NEXT:    s_endpgm
2337;
2338; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max:
2339; GFX11:       ; %bb.0:
2340; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2341; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3000
2342; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2343; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
2344; GFX11-NEXT:    s_waitcnt vmcnt(0)
2345; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2346; GFX11-NEXT:    s_endpgm
2347;
2348; GFX12-LABEL: global_inst_salu_offset_2x_13bit_max:
2349; GFX12:       ; %bb.0:
2350; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2351; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2352; GFX12-NEXT:    s_wait_kmcnt 0x0
2353; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:16383 scope:SCOPE_SYS
2354; GFX12-NEXT:    s_wait_loadcnt 0x0
2355; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2356; GFX12-NEXT:    s_endpgm
2357  %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383
2358  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2359  store i8 %load, ptr addrspace(1) undef
2360  ret void
2361}
2362
2363define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) {
2364; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
2365; GFX9:       ; %bb.0:
2366; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2367; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2368; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2369; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
2370; GFX9-NEXT:    s_waitcnt vmcnt(0)
2371; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2372; GFX9-NEXT:    s_endpgm
2373;
2374; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
2375; GFX10-GISEL:       ; %bb.0:
2376; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2377; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2378; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xfffff000
2379; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2380; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2381; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2382; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2383; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2384; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2385; GFX10-GISEL-NEXT:    s_endpgm
2386;
2387; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
2388; GFX11:       ; %bb.0:
2389; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2390; GFX11-NEXT:    v_mov_b32_e32 v0, 0
2391; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2392; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
2393; GFX11-NEXT:    s_waitcnt vmcnt(0)
2394; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
2395; GFX11-NEXT:    s_endpgm
2396;
2397; GFX12-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
2398; GFX12:       ; %bb.0:
2399; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2400; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2401; GFX12-NEXT:    s_wait_kmcnt 0x0
2402; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 scope:SCOPE_SYS
2403; GFX12-NEXT:    s_wait_loadcnt 0x0
2404; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2405; GFX12-NEXT:    s_endpgm
2406;
2407; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
2408; GFX10-SDAG:       ; %bb.0:
2409; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2410; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2411; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
2412; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
2413; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2414; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2415; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2416; GFX10-SDAG-NEXT:    s_endpgm
2417  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
2418  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2419  store i8 %load, ptr addrspace(1) undef
2420  ret void
2421}
2422
2423define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) {
2424; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2425; GFX9:       ; %bb.0:
2426; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2427; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2428; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2429; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffe000
2430; GFX9-NEXT:    s_addc_u32 s1, s1, -1
2431; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2432; GFX9-NEXT:    s_waitcnt vmcnt(0)
2433; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2434; GFX9-NEXT:    s_endpgm
2435;
2436; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2437; GFX10-GISEL:       ; %bb.0:
2438; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2439; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2440; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2441; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2442; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2443; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2444; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2445; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2446; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2447; GFX10-GISEL-NEXT:    s_endpgm
2448;
2449; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2450; GFX11-GISEL:       ; %bb.0:
2451; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2452; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2453; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffe000
2454; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2455; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2456; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2457; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2458; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2459; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2460; GFX11-GISEL-NEXT:    s_endpgm
2461;
2462; GFX12-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2463; GFX12:       ; %bb.0:
2464; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2465; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2466; GFX12-NEXT:    s_wait_kmcnt 0x0
2467; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-8192 scope:SCOPE_SYS
2468; GFX12-NEXT:    s_wait_loadcnt 0x0
2469; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2470; GFX12-NEXT:    s_endpgm
2471;
2472; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2473; GFX10-SDAG:       ; %bb.0:
2474; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2475; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2476; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2477; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
2478; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2479; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2480; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2481; GFX10-SDAG-NEXT:    s_endpgm
2482;
2483; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
2484; GFX11-SDAG:       ; %bb.0:
2485; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2486; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2487; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
2488; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2489; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2490; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2491; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2492; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2493; GFX11-SDAG-NEXT:    s_endpgm
2494  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
2495  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2496  store i8 %load, ptr addrspace(1) undef
2497  ret void
2498}
2499
2500define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) {
2501; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2502; GFX9:       ; %bb.0:
2503; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2504; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2505; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2506; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffc000
2507; GFX9-NEXT:    s_addc_u32 s1, s1, -1
2508; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2509; GFX9-NEXT:    s_waitcnt vmcnt(0)
2510; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2511; GFX9-NEXT:    s_endpgm
2512;
2513; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2514; GFX10-GISEL:       ; %bb.0:
2515; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2516; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2517; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffc000
2518; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2519; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2520; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2521; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2522; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2523; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2524; GFX10-GISEL-NEXT:    s_endpgm
2525;
2526; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2527; GFX11-GISEL:       ; %bb.0:
2528; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2529; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2530; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xffffc000
2531; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, -1
2532; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2533; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2534; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2535; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2536; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2537; GFX11-GISEL-NEXT:    s_endpgm
2538;
2539; GFX12-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2540; GFX12:       ; %bb.0:
2541; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2542; GFX12-NEXT:    v_mov_b32_e32 v0, 0
2543; GFX12-NEXT:    s_wait_kmcnt 0x0
2544; GFX12-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-16384 scope:SCOPE_SYS
2545; GFX12-NEXT:    s_wait_loadcnt 0x0
2546; GFX12-NEXT:    global_store_b8 v[0:1], v0, off
2547; GFX12-NEXT:    s_endpgm
2548;
2549; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2550; GFX10-SDAG:       ; %bb.0:
2551; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2552; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2553; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
2554; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
2555; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2556; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2557; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2558; GFX10-SDAG-NEXT:    s_endpgm
2559;
2560; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
2561; GFX11-SDAG:       ; %bb.0:
2562; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2563; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2564; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
2565; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2566; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2567; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2568; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2569; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2570; GFX11-SDAG-NEXT:    s_endpgm
2571  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384
2572  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2573  store i8 %load, ptr addrspace(1) undef
2574  ret void
2575}
2576
2577; Fill 11-bit low-bits (1ull << 33) | 2047
2578define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
2579; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2580; GFX9:       ; %bb.0:
2581; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2582; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2583; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2584; GFX9-NEXT:    s_add_u32 s0, s0, 0x7ff
2585; GFX9-NEXT:    s_addc_u32 s1, s1, 2
2586; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2587; GFX9-NEXT:    s_waitcnt vmcnt(0)
2588; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2589; GFX9-NEXT:    s_endpgm
2590;
2591; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2592; GFX10-GISEL:       ; %bb.0:
2593; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2594; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2595; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
2596; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2597; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2598; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2599; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2600; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2601; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2602; GFX10-GISEL-NEXT:    s_endpgm
2603;
2604; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2605; GFX11-GISEL:       ; %bb.0:
2606; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2607; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2608; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x7ff
2609; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2610; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2611; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2612; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2613; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2614; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2615; GFX11-GISEL-NEXT:    s_endpgm
2616;
2617; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2618; GFX12-GISEL:       ; %bb.0:
2619; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2620; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2621; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x7ff
2622; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2623; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2624; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2625; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
2626; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
2627; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2628; GFX12-GISEL-NEXT:    s_endpgm
2629;
2630; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2631; GFX10-SDAG:       ; %bb.0:
2632; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2633; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2634; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2635; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2636; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
2637; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2638; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2639; GFX10-SDAG-NEXT:    s_endpgm
2640;
2641; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2642; GFX11-SDAG:       ; %bb.0:
2643; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2644; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2645; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2646; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2647; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2648; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2047 glc dlc
2649; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2650; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2651; GFX11-SDAG-NEXT:    s_endpgm
2652;
2653; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2654; GFX12-SDAG:       ; %bb.0:
2655; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2656; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2657; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2658; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2659; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2660; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2047 scope:SCOPE_SYS
2661; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
2662; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2663; GFX12-SDAG-NEXT:    s_endpgm
2664  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639
2665  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2666  store i8 %load, ptr addrspace(1) undef
2667  ret void
2668}
2669
2670; Fill 11-bit low-bits (1ull << 33) | 2048
2671define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
2672; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2673; GFX9:       ; %bb.0:
2674; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2675; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2676; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2677; GFX9-NEXT:    s_add_u32 s0, s0, 0x800
2678; GFX9-NEXT:    s_addc_u32 s1, s1, 2
2679; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2680; GFX9-NEXT:    s_waitcnt vmcnt(0)
2681; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2682; GFX9-NEXT:    s_endpgm
2683;
2684; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2685; GFX10-GISEL:       ; %bb.0:
2686; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2687; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2688; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
2689; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2690; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2691; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2692; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2693; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2694; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2695; GFX10-GISEL-NEXT:    s_endpgm
2696;
2697; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2698; GFX11-GISEL:       ; %bb.0:
2699; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2700; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2701; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x800
2702; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2703; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2704; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2705; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2706; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2707; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2708; GFX11-GISEL-NEXT:    s_endpgm
2709;
2710; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2711; GFX12-GISEL:       ; %bb.0:
2712; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2713; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2714; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800
2715; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2716; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2717; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2718; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
2719; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
2720; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2721; GFX12-GISEL-NEXT:    s_endpgm
2722;
2723; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2724; GFX10-SDAG:       ; %bb.0:
2725; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2726; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2727; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
2728; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2729; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2730; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2731; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2732; GFX10-SDAG-NEXT:    s_endpgm
2733;
2734; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2735; GFX11-SDAG:       ; %bb.0:
2736; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2737; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2738; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2739; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2740; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2741; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2048 glc dlc
2742; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2743; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2744; GFX11-SDAG-NEXT:    s_endpgm
2745;
2746; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2747; GFX12-SDAG:       ; %bb.0:
2748; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2749; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2750; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2751; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2752; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2753; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:2048 scope:SCOPE_SYS
2754; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
2755; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2756; GFX12-SDAG-NEXT:    s_endpgm
2757  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640
2758  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2759  store i8 %load, ptr addrspace(1) undef
2760  ret void
2761}
2762
2763; Fill 12-bit low-bits (1ull << 33) | 4095
2764define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
2765; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2766; GFX9:       ; %bb.0:
2767; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2768; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2769; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2770; GFX9-NEXT:    s_add_u32 s0, s0, 0xfff
2771; GFX9-NEXT:    s_addc_u32 s1, s1, 2
2772; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2773; GFX9-NEXT:    s_waitcnt vmcnt(0)
2774; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2775; GFX9-NEXT:    s_endpgm
2776;
2777; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2778; GFX10-GISEL:       ; %bb.0:
2779; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2780; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2781; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
2782; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2783; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2784; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2785; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2786; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2787; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2788; GFX10-GISEL-NEXT:    s_endpgm
2789;
2790; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2791; GFX11-GISEL:       ; %bb.0:
2792; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2793; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2794; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0xfff
2795; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2796; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2797; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2798; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2799; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2800; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2801; GFX11-GISEL-NEXT:    s_endpgm
2802;
2803; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2804; GFX12-GISEL:       ; %bb.0:
2805; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2806; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2807; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
2808; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2809; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2810; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2811; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
2812; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
2813; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2814; GFX12-GISEL-NEXT:    s_endpgm
2815;
2816; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2817; GFX10-SDAG:       ; %bb.0:
2818; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2819; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2820; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
2821; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2822; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
2823; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2824; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2825; GFX10-SDAG-NEXT:    s_endpgm
2826;
2827; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2828; GFX11-SDAG:       ; %bb.0:
2829; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2830; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2831; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2832; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2833; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2834; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095 glc dlc
2835; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2836; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2837; GFX11-SDAG-NEXT:    s_endpgm
2838;
2839; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2840; GFX12-SDAG:       ; %bb.0:
2841; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2842; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2843; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2844; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2845; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2846; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095 scope:SCOPE_SYS
2847; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
2848; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2849; GFX12-SDAG-NEXT:    s_endpgm
2850  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687
2851  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2852  store i8 %load, ptr addrspace(1) undef
2853  ret void
2854}
2855
2856; Fill 12-bit low-bits (1ull << 33) | 4096
2857define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
2858; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2859; GFX9:       ; %bb.0:
2860; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2861; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2862; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2863; GFX9-NEXT:    s_add_u32 s0, s0, 0x1000
2864; GFX9-NEXT:    s_addc_u32 s1, s1, 2
2865; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2866; GFX9-NEXT:    s_waitcnt vmcnt(0)
2867; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2868; GFX9-NEXT:    s_endpgm
2869;
2870; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2871; GFX10-GISEL:       ; %bb.0:
2872; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2873; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2874; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
2875; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2876; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2877; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2878; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2879; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2880; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2881; GFX10-GISEL-NEXT:    s_endpgm
2882;
2883; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2884; GFX11-GISEL:       ; %bb.0:
2885; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2886; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2887; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1000
2888; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2889; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2890; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2891; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2892; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2893; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2894; GFX11-GISEL-NEXT:    s_endpgm
2895;
2896; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2897; GFX12-GISEL:       ; %bb.0:
2898; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2899; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2900; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
2901; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2902; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2903; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2904; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
2905; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
2906; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2907; GFX12-GISEL-NEXT:    s_endpgm
2908;
2909; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2910; GFX10-SDAG:       ; %bb.0:
2911; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2912; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2913; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
2914; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2915; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2916; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
2917; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
2918; GFX10-SDAG-NEXT:    s_endpgm
2919;
2920; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2921; GFX11-SDAG:       ; %bb.0:
2922; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2923; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2924; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
2925; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2926; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2927; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2928; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2929; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2930; GFX11-SDAG-NEXT:    s_endpgm
2931;
2932; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2933; GFX12-SDAG:       ; %bb.0:
2934; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2935; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
2936; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
2937; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2938; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2939; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4096 scope:SCOPE_SYS
2940; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
2941; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
2942; GFX12-SDAG-NEXT:    s_endpgm
2943  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688
2944  %load = load volatile i8, ptr addrspace(1) %gep, align 1
2945  store i8 %load, ptr addrspace(1) undef
2946  ret void
2947}
2948
2949; Fill 13-bit low-bits (1ull << 33) | 8191
2950define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
2951; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2952; GFX9:       ; %bb.0:
2953; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2954; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2955; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2956; GFX9-NEXT:    s_add_u32 s0, s0, 0x1fff
2957; GFX9-NEXT:    s_addc_u32 s1, s1, 2
2958; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
2959; GFX9-NEXT:    s_waitcnt vmcnt(0)
2960; GFX9-NEXT:    global_store_byte v[0:1], v0, off
2961; GFX9-NEXT:    s_endpgm
2962;
2963; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2964; GFX10-GISEL:       ; %bb.0:
2965; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2966; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2967; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
2968; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2969; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2970; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2971; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
2972; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
2973; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
2974; GFX10-GISEL-NEXT:    s_endpgm
2975;
2976; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2977; GFX11-GISEL:       ; %bb.0:
2978; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2979; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2980; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x1fff
2981; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
2982; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2983; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2984; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
2985; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2986; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
2987; GFX11-GISEL-NEXT:    s_endpgm
2988;
2989; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2990; GFX12-GISEL:       ; %bb.0:
2991; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2992; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
2993; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1fff
2994; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
2995; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2996; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2997; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
2998; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
2999; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3000; GFX12-GISEL-NEXT:    s_endpgm
3001;
3002; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0:
3003; GFX10-SDAG:       ; %bb.0:
3004; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3005; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3006; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1800, s0
3007; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
3008; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
3009; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
3010; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
3011; GFX10-SDAG-NEXT:    s_endpgm
3012;
3013; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0:
3014; GFX11-SDAG:       ; %bb.0:
3015; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3016; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3017; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
3018; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3019; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3020; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:4095 glc dlc
3021; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
3022; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3023; GFX11-SDAG-NEXT:    s_endpgm
3024;
3025; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0:
3026; GFX12-SDAG:       ; %bb.0:
3027; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3028; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3029; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
3030; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3031; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3032; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:8191 scope:SCOPE_SYS
3033; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3034; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3035; GFX12-SDAG-NEXT:    s_endpgm
3036  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783
3037  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3038  store i8 %load, ptr addrspace(1) undef
3039  ret void
3040}
3041
3042; Fill 13-bit low-bits (1ull << 33) | 8192
3043define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
3044; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3045; GFX9:       ; %bb.0:
3046; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3047; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3048; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3049; GFX9-NEXT:    s_add_u32 s0, s0, 0x2000
3050; GFX9-NEXT:    s_addc_u32 s1, s1, 2
3051; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3052; GFX9-NEXT:    s_waitcnt vmcnt(0)
3053; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3054; GFX9-NEXT:    s_endpgm
3055;
3056; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3057; GFX10-GISEL:       ; %bb.0:
3058; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3059; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3060; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3061; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3062; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3063; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3064; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
3065; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
3066; GFX10-GISEL-NEXT:    global_store_byte v[0:1], v0, off
3067; GFX10-GISEL-NEXT:    s_endpgm
3068;
3069; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3070; GFX11-GISEL:       ; %bb.0:
3071; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3072; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3073; GFX11-GISEL-NEXT:    s_add_u32 s0, s0, 0x2000
3074; GFX11-GISEL-NEXT:    s_addc_u32 s1, s1, 2
3075; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3076; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3077; GFX11-GISEL-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
3078; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
3079; GFX11-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3080; GFX11-GISEL-NEXT:    s_endpgm
3081;
3082; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3083; GFX12-GISEL:       ; %bb.0:
3084; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3085; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3086; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x2000
3087; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 2
3088; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3089; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3090; GFX12-GISEL-NEXT:    global_load_u8 v0, v[0:1], off scope:SCOPE_SYS
3091; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3092; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3093; GFX12-GISEL-NEXT:    s_endpgm
3094;
3095; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3096; GFX10-SDAG:       ; %bb.0:
3097; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3098; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3099; GFX10-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
3100; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
3101; GFX10-SDAG-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
3102; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
3103; GFX10-SDAG-NEXT:    global_store_byte v[0:1], v0, off
3104; GFX10-SDAG-NEXT:    s_endpgm
3105;
3106; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3107; GFX11-SDAG:       ; %bb.0:
3108; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3109; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3110; GFX11-SDAG-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
3111; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3112; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3113; GFX11-SDAG-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
3114; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
3115; GFX11-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3116; GFX11-SDAG-NEXT:    s_endpgm
3117;
3118; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1:
3119; GFX12-SDAG:       ; %bb.0:
3120; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3121; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3122; GFX12-SDAG-NEXT:    v_add_co_u32 v0, s0, 0, s0
3123; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3124; GFX12-SDAG-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
3125; GFX12-SDAG-NEXT:    global_load_u8 v0, v[0:1], off offset:8192 scope:SCOPE_SYS
3126; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3127; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3128; GFX12-SDAG-NEXT:    s_endpgm
3129  %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784
3130  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3131  store i8 %load, ptr addrspace(1) undef
3132  ret void
3133}
3134
3135; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
3136define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) {
3137; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
3138; GFX9:       ; %bb.0:
3139; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3140; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3141; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3142; GFX9-NEXT:    s_add_u32 s0, s0, 0x7ff
3143; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3144; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3145; GFX9-NEXT:    s_waitcnt vmcnt(0)
3146; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3147; GFX9-NEXT:    s_endpgm
3148;
3149; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
3150; GFX10:       ; %bb.0:
3151; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3152; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3153; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3154; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
3155; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3156; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3157; GFX10-NEXT:    s_waitcnt vmcnt(0)
3158; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3159; GFX10-NEXT:    s_endpgm
3160;
3161; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
3162; GFX11:       ; %bb.0:
3163; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3164; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3165; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3166; GFX11-NEXT:    s_add_u32 s0, s0, 0x7ff
3167; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3168; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3169; GFX11-NEXT:    s_waitcnt vmcnt(0)
3170; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3171; GFX11-NEXT:    s_endpgm
3172;
3173; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
3174; GFX12-GISEL:       ; %bb.0:
3175; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3176; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3177; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3178; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x7ff
3179; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3180; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3181; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3182; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3183; GFX12-GISEL-NEXT:    s_endpgm
3184;
3185; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
3186; GFX12-SDAG:       ; %bb.0:
3187; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3188; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x7ff
3189; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3190; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3191; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3192; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3193; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3194; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3195; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3196; GFX12-SDAG-NEXT:    s_endpgm
3197  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761
3198  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3199  store i8 %load, ptr addrspace(1) undef
3200  ret void
3201}
3202
3203; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
3204define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) {
3205; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
3206; GFX9:       ; %bb.0:
3207; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3208; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3209; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3210; GFX9-NEXT:    s_add_u32 s0, s0, 0x800
3211; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3212; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3213; GFX9-NEXT:    s_waitcnt vmcnt(0)
3214; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3215; GFX9-NEXT:    s_endpgm
3216;
3217; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
3218; GFX10:       ; %bb.0:
3219; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3220; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3221; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3222; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
3223; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3224; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3225; GFX10-NEXT:    s_waitcnt vmcnt(0)
3226; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3227; GFX10-NEXT:    s_endpgm
3228;
3229; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
3230; GFX11:       ; %bb.0:
3231; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3232; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3233; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3234; GFX11-NEXT:    s_add_u32 s0, s0, 0x800
3235; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3236; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3237; GFX11-NEXT:    s_waitcnt vmcnt(0)
3238; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3239; GFX11-NEXT:    s_endpgm
3240;
3241; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
3242; GFX12-GISEL:       ; %bb.0:
3243; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3244; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3245; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3246; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x800
3247; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3248; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3249; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3250; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3251; GFX12-GISEL-NEXT:    s_endpgm
3252;
3253; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
3254; GFX12-SDAG:       ; %bb.0:
3255; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3256; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x800
3257; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3258; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3259; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3260; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3261; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3262; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3263; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3264; GFX12-SDAG-NEXT:    s_endpgm
3265  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760
3266  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3267  store i8 %load, ptr addrspace(1) undef
3268  ret void
3269}
3270
3271; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
3272define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) {
3273; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
3274; GFX9:       ; %bb.0:
3275; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3276; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3277; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3278; GFX9-NEXT:    s_add_u32 s0, s0, 0xfff
3279; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3280; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3281; GFX9-NEXT:    s_waitcnt vmcnt(0)
3282; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3283; GFX9-NEXT:    s_endpgm
3284;
3285; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
3286; GFX10:       ; %bb.0:
3287; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3288; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3289; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3290; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
3291; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3292; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3293; GFX10-NEXT:    s_waitcnt vmcnt(0)
3294; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3295; GFX10-NEXT:    s_endpgm
3296;
3297; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
3298; GFX11:       ; %bb.0:
3299; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3300; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3301; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3302; GFX11-NEXT:    s_add_u32 s0, s0, 0xfff
3303; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3304; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3305; GFX11-NEXT:    s_waitcnt vmcnt(0)
3306; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3307; GFX11-NEXT:    s_endpgm
3308;
3309; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
3310; GFX12-GISEL:       ; %bb.0:
3311; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3312; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3313; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3314; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0xfff
3315; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3316; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3317; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3318; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3319; GFX12-GISEL-NEXT:    s_endpgm
3320;
3321; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
3322; GFX12-SDAG:       ; %bb.0:
3323; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3324; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0xfff
3325; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3326; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3327; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3328; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3329; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3330; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3331; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3332; GFX12-SDAG-NEXT:    s_endpgm
3333  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713
3334  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3335  store i8 %load, ptr addrspace(1) undef
3336  ret void
3337}
3338
3339; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
3340define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) {
3341; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
3342; GFX9:       ; %bb.0:
3343; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3344; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3345; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3346; GFX9-NEXT:    s_add_u32 s0, s0, 0x1000
3347; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3348; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3349; GFX9-NEXT:    s_waitcnt vmcnt(0)
3350; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3351; GFX9-NEXT:    s_endpgm
3352;
3353; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
3354; GFX10:       ; %bb.0:
3355; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3356; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3357; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3358; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
3359; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3360; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3361; GFX10-NEXT:    s_waitcnt vmcnt(0)
3362; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3363; GFX10-NEXT:    s_endpgm
3364;
3365; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
3366; GFX11:       ; %bb.0:
3367; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3368; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3369; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3370; GFX11-NEXT:    s_add_u32 s0, s0, 0x1000
3371; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3372; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3373; GFX11-NEXT:    s_waitcnt vmcnt(0)
3374; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3375; GFX11-NEXT:    s_endpgm
3376;
3377; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
3378; GFX12-GISEL:       ; %bb.0:
3379; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3380; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3381; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3382; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1000
3383; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3384; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3385; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3386; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3387; GFX12-GISEL-NEXT:    s_endpgm
3388;
3389; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
3390; GFX12-SDAG:       ; %bb.0:
3391; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3392; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x1000
3393; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3394; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3395; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3396; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3397; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3398; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3399; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3400; GFX12-SDAG-NEXT:    s_endpgm
3401  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712
3402  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3403  store i8 %load, ptr addrspace(1) undef
3404  ret void
3405}
3406
3407; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
3408define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) {
3409; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
3410; GFX9:       ; %bb.0:
3411; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3412; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3413; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3414; GFX9-NEXT:    s_add_u32 s0, s0, 0x1fff
3415; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3416; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3417; GFX9-NEXT:    s_waitcnt vmcnt(0)
3418; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3419; GFX9-NEXT:    s_endpgm
3420;
3421; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
3422; GFX10:       ; %bb.0:
3423; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3424; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3425; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3426; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
3427; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3428; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3429; GFX10-NEXT:    s_waitcnt vmcnt(0)
3430; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3431; GFX10-NEXT:    s_endpgm
3432;
3433; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
3434; GFX11:       ; %bb.0:
3435; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3436; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3437; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3438; GFX11-NEXT:    s_add_u32 s0, s0, 0x1fff
3439; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3440; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3441; GFX11-NEXT:    s_waitcnt vmcnt(0)
3442; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3443; GFX11-NEXT:    s_endpgm
3444;
3445; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
3446; GFX12-GISEL:       ; %bb.0:
3447; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3448; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3449; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3450; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x1fff
3451; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3452; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3453; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3454; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3455; GFX12-GISEL-NEXT:    s_endpgm
3456;
3457; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
3458; GFX12-SDAG:       ; %bb.0:
3459; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3460; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x1fff
3461; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3462; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3463; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3464; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3465; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3466; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3467; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3468; GFX12-SDAG-NEXT:    s_endpgm
3469  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617
3470  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3471  store i8 %load, ptr addrspace(1) undef
3472  ret void
3473}
3474
3475; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
3476define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) {
3477; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
3478; GFX9:       ; %bb.0:
3479; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3480; GFX9-NEXT:    v_mov_b32_e32 v0, 0
3481; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3482; GFX9-NEXT:    s_add_u32 s0, s0, 0x2000
3483; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
3484; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
3485; GFX9-NEXT:    s_waitcnt vmcnt(0)
3486; GFX9-NEXT:    global_store_byte v[0:1], v0, off
3487; GFX9-NEXT:    s_endpgm
3488;
3489; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
3490; GFX10:       ; %bb.0:
3491; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3492; GFX10-NEXT:    v_mov_b32_e32 v0, 0
3493; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3494; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
3495; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
3496; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
3497; GFX10-NEXT:    s_waitcnt vmcnt(0)
3498; GFX10-NEXT:    global_store_byte v[0:1], v0, off
3499; GFX10-NEXT:    s_endpgm
3500;
3501; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
3502; GFX11:       ; %bb.0:
3503; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3504; GFX11-NEXT:    v_mov_b32_e32 v0, 0
3505; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3506; GFX11-NEXT:    s_add_u32 s0, s0, 0x2000
3507; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
3508; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
3509; GFX11-NEXT:    s_waitcnt vmcnt(0)
3510; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
3511; GFX11-NEXT:    s_endpgm
3512;
3513; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
3514; GFX12-GISEL:       ; %bb.0:
3515; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3516; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
3517; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
3518; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, 0x2000
3519; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, 0x80000000
3520; GFX12-GISEL-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3521; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
3522; GFX12-GISEL-NEXT:    global_store_b8 v[0:1], v0, off
3523; GFX12-GISEL-NEXT:    s_endpgm
3524;
3525; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
3526; GFX12-SDAG:       ; %bb.0:
3527; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
3528; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x2000
3529; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
3530; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
3531; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
3532; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
3533; GFX12-SDAG-NEXT:    global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS
3534; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
3535; GFX12-SDAG-NEXT:    global_store_b8 v[0:1], v0, off
3536; GFX12-SDAG-NEXT:    s_endpgm
3537  %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616
3538  %load = load volatile i8, ptr addrspace(1) %gep, align 1
3539  store i8 %load, ptr addrspace(1) undef
3540  ret void
3541}
3542