xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll (revision 68694259b298614f16f87d83a56be1207f36fa53)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s
10; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX940 %s
11; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s
12; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s
13
14define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
15; GFX9-LABEL: store_load_sindex_kernel:
16; GFX9:       ; %bb.0: ; %bb
17; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
18; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
19; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
20; GFX9-NEXT:    v_mov_b32_e32 v0, 15
21; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
22; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
23; GFX9-NEXT:    s_and_b32 s0, s0, 15
24; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
25; GFX9-NEXT:    scratch_store_dword off, v0, s1
26; GFX9-NEXT:    s_waitcnt vmcnt(0)
27; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
28; GFX9-NEXT:    s_waitcnt vmcnt(0)
29; GFX9-NEXT:    s_endpgm
30;
31; GFX10-LABEL: store_load_sindex_kernel:
32; GFX10:       ; %bb.0: ; %bb
33; GFX10-NEXT:    s_add_u32 s8, s8, s13
34; GFX10-NEXT:    s_addc_u32 s9, s9, 0
35; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
36; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
37; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
38; GFX10-NEXT:    v_mov_b32_e32 v0, 15
39; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
40; GFX10-NEXT:    s_and_b32 s1, s0, 15
41; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
42; GFX10-NEXT:    s_lshl_b32 s1, s1, 2
43; GFX10-NEXT:    scratch_store_dword off, v0, s0
44; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
45; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
46; GFX10-NEXT:    s_waitcnt vmcnt(0)
47; GFX10-NEXT:    s_endpgm
48;
49; GFX940-LABEL: store_load_sindex_kernel:
50; GFX940:       ; %bb.0: ; %bb
51; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
52; GFX940-NEXT:    v_mov_b32_e32 v0, 15
53; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
55; GFX940-NEXT:    s_and_b32 s0, s0, 15
56; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
57; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
58; GFX940-NEXT:    s_waitcnt vmcnt(0)
59; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
60; GFX940-NEXT:    s_waitcnt vmcnt(0)
61; GFX940-NEXT:    s_endpgm
62;
63; GFX11-LABEL: store_load_sindex_kernel:
64; GFX11:       ; %bb.0: ; %bb
65; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
66; GFX11-NEXT:    v_mov_b32_e32 v0, 15
67; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX11-NEXT:    s_and_b32 s1, s0, 15
69; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
70; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
71; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
72; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
73; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
74; GFX11-NEXT:    s_waitcnt vmcnt(0)
75; GFX11-NEXT:    s_endpgm
76;
77; GFX12-LABEL: store_load_sindex_kernel:
78; GFX12:       ; %bb.0: ; %bb
79; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
80; GFX12-NEXT:    v_mov_b32_e32 v0, 15
81; GFX12-NEXT:    s_wait_kmcnt 0x0
82; GFX12-NEXT:    s_and_b32 s1, s0, 15
83; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
84; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
85; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
86; GFX12-NEXT:    s_wait_storecnt 0x0
87; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
88; GFX12-NEXT:    s_wait_loadcnt 0x0
89; GFX12-NEXT:    s_endpgm
90;
91; UNALIGNED_GFX9-LABEL: store_load_sindex_kernel:
92; UNALIGNED_GFX9:       ; %bb.0: ; %bb
93; UNALIGNED_GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
94; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
95; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
96; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
97; UNALIGNED_GFX9-NEXT:    s_waitcnt lgkmcnt(0)
98; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s1, s0, 2
99; UNALIGNED_GFX9-NEXT:    s_and_b32 s0, s0, 15
100; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s0, s0, 2
101; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s1
102; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
103; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
104; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
105; UNALIGNED_GFX9-NEXT:    s_endpgm
106;
107; UNALIGNED_GFX10-LABEL: store_load_sindex_kernel:
108; UNALIGNED_GFX10:       ; %bb.0: ; %bb
109; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
110; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
111; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
112; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
113; UNALIGNED_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
114; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 15
115; UNALIGNED_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
116; UNALIGNED_GFX10-NEXT:    s_and_b32 s1, s0, 15
117; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s0, s0, 2
118; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s1, s1, 2
119; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s0
120; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
121; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
122; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
123; UNALIGNED_GFX10-NEXT:    s_endpgm
124;
125; UNALIGNED_GFX940-LABEL: store_load_sindex_kernel:
126; UNALIGNED_GFX940:       ; %bb.0: ; %bb
127; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
128; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
129; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
130; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
131; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
132; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
133; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
134; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
135; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
136; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
137; UNALIGNED_GFX940-NEXT:    s_endpgm
138;
139; UNALIGNED_GFX11-LABEL: store_load_sindex_kernel:
140; UNALIGNED_GFX11:       ; %bb.0: ; %bb
141; UNALIGNED_GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
142; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v0, 15
143; UNALIGNED_GFX11-NEXT:    s_waitcnt lgkmcnt(0)
144; UNALIGNED_GFX11-NEXT:    s_and_b32 s1, s0, 15
145; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s0, s0, 2
146; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s1, s1, 2
147; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
148; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
149; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
150; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
151; UNALIGNED_GFX11-NEXT:    s_endpgm
152;
153; UNALIGNED_GFX12-LABEL: store_load_sindex_kernel:
154; UNALIGNED_GFX12:       ; %bb.0: ; %bb
155; UNALIGNED_GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
156; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v0, 15
157; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
158; UNALIGNED_GFX12-NEXT:    s_and_b32 s1, s0, 15
159; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 2
160; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s1, s1, 2
161; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
162; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
163; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
164; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
165; UNALIGNED_GFX12-NEXT:    s_endpgm
166bb:
167  %i = alloca [32 x float], align 4, addrspace(5)
168  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
169  store volatile i32 15, ptr addrspace(5) %i7, align 4
170  %i9 = and i32 %idx, 15
171  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
172  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
173  ret void
174}
175
176define amdgpu_kernel void @store_load_vindex_kernel() {
177; GFX9-LABEL: store_load_vindex_kernel:
178; GFX9:       ; %bb.0: ; %bb
179; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
180; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
181; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
182; GFX9-NEXT:    v_mov_b32_e32 v2, 15
183; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
184; GFX9-NEXT:    scratch_store_dword v1, v2, off
185; GFX9-NEXT:    s_waitcnt vmcnt(0)
186; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
187; GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
188; GFX9-NEXT:    s_waitcnt vmcnt(0)
189; GFX9-NEXT:    s_endpgm
190;
191; GFX10-LABEL: store_load_vindex_kernel:
192; GFX10:       ; %bb.0: ; %bb
193; GFX10-NEXT:    s_add_u32 s8, s8, s13
194; GFX10-NEXT:    s_addc_u32 s9, s9, 0
195; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
196; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
197; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
198; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
199; GFX10-NEXT:    v_mov_b32_e32 v2, 15
200; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
201; GFX10-NEXT:    scratch_store_dword v0, v2, off
202; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
203; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
204; GFX10-NEXT:    s_waitcnt vmcnt(0)
205; GFX10-NEXT:    s_endpgm
206;
207; GFX940-LABEL: store_load_vindex_kernel:
208; GFX940:       ; %bb.0: ; %bb
209; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
210; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
211; GFX940-NEXT:    v_mov_b32_e32 v2, 15
212; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
213; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
214; GFX940-NEXT:    s_waitcnt vmcnt(0)
215; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
216; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
217; GFX940-NEXT:    s_waitcnt vmcnt(0)
218; GFX940-NEXT:    s_endpgm
219;
220; GFX11-LABEL: store_load_vindex_kernel:
221; GFX11:       ; %bb.0: ; %bb
222; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
223; GFX11-NEXT:    v_mov_b32_e32 v2, 15
224; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
225; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
226; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
227; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
228; GFX11-NEXT:    scratch_store_b32 v0, v2, off dlc
229; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
230; GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
231; GFX11-NEXT:    s_waitcnt vmcnt(0)
232; GFX11-NEXT:    s_endpgm
233;
234; GFX12-LABEL: store_load_vindex_kernel:
235; GFX12:       ; %bb.0: ; %bb
236; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
237; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
238; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
239; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
240; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
241; GFX12-NEXT:    scratch_store_b32 v0, v2, off scope:SCOPE_SYS
242; GFX12-NEXT:    s_wait_storecnt 0x0
243; GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
244; GFX12-NEXT:    s_wait_loadcnt 0x0
245; GFX12-NEXT:    s_endpgm
246;
247; UNALIGNED_GFX9-LABEL: store_load_vindex_kernel:
248; UNALIGNED_GFX9:       ; %bb.0: ; %bb
249; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
250; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
251; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
252; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
253; UNALIGNED_GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
254; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
255; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
256; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
257; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
258; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
259; UNALIGNED_GFX9-NEXT:    s_endpgm
260;
261; UNALIGNED_GFX10-LABEL: store_load_vindex_kernel:
262; UNALIGNED_GFX10:       ; %bb.0: ; %bb
263; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
264; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
265; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
266; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
267; UNALIGNED_GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
268; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
269; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
270; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
271; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
272; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
273; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
274; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
275; UNALIGNED_GFX10-NEXT:    s_endpgm
276;
277; UNALIGNED_GFX940-LABEL: store_load_vindex_kernel:
278; UNALIGNED_GFX940:       ; %bb.0: ; %bb
279; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
280; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
281; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
282; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
283; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
284; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
285; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
286; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
287; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
288; UNALIGNED_GFX940-NEXT:    s_endpgm
289;
290; UNALIGNED_GFX11-LABEL: store_load_vindex_kernel:
291; UNALIGNED_GFX11:       ; %bb.0: ; %bb
292; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
293; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v2, 15
294; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
295; UNALIGNED_GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
296; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
297; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
298; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v2, off dlc
299; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
300; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
301; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
302; UNALIGNED_GFX11-NEXT:    s_endpgm
303;
304; UNALIGNED_GFX12-LABEL: store_load_vindex_kernel:
305; UNALIGNED_GFX12:       ; %bb.0: ; %bb
306; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
307; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
308; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
309; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
310; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
311; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off scope:SCOPE_SYS
312; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
313; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
314; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
315; UNALIGNED_GFX12-NEXT:    s_endpgm
316bb:
317  %i = alloca [32 x float], align 4, addrspace(5)
318  %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
319  %i3 = zext i32 %i2 to i64
320  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
321  store volatile i32 15, ptr addrspace(5) %i7, align 4
322  %i9 = sub nsw i32 31, %i2
323  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
324  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
325  ret void
326}
327
328define void @store_load_vindex_foo(i32 %idx) {
329; GFX9-LABEL: store_load_vindex_foo:
330; GFX9:       ; %bb.0: ; %bb
331; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
333; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
334; GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
335; GFX9-NEXT:    v_mov_b32_e32 v2, 15
336; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
337; GFX9-NEXT:    scratch_store_dword v1, v2, off
338; GFX9-NEXT:    s_waitcnt vmcnt(0)
339; GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
340; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
341; GFX9-NEXT:    s_waitcnt vmcnt(0)
342; GFX9-NEXT:    s_setpc_b64 s[30:31]
343;
344; GFX10-LABEL: store_load_vindex_foo:
345; GFX10:       ; %bb.0: ; %bb
346; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
348; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
349; GFX10-NEXT:    v_mov_b32_e32 v2, 15
350; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
351; GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
352; GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
353; GFX10-NEXT:    scratch_store_dword v0, v2, off
354; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
355; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
356; GFX10-NEXT:    s_waitcnt vmcnt(0)
357; GFX10-NEXT:    s_setpc_b64 s[30:31]
358;
359; GFX940-LABEL: store_load_vindex_foo:
360; GFX940:       ; %bb.0: ; %bb
361; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
363; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
364; GFX940-NEXT:    v_mov_b32_e32 v2, 15
365; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
366; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
367; GFX940-NEXT:    s_waitcnt vmcnt(0)
368; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
369; GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
370; GFX940-NEXT:    s_waitcnt vmcnt(0)
371; GFX940-NEXT:    s_setpc_b64 s[30:31]
372;
373; GFX11-LABEL: store_load_vindex_foo:
374; GFX11:       ; %bb.0: ; %bb
375; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
377; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
378; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
379; GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
380; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
381; GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
382; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
383; GFX11-NEXT:    scratch_load_b32 v0, v0, s32 glc dlc
384; GFX11-NEXT:    s_waitcnt vmcnt(0)
385; GFX11-NEXT:    s_setpc_b64 s[30:31]
386;
387; GFX12-LABEL: store_load_vindex_foo:
388; GFX12:       ; %bb.0: ; %bb
389; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
390; GFX12-NEXT:    s_wait_expcnt 0x0
391; GFX12-NEXT:    s_wait_samplecnt 0x0
392; GFX12-NEXT:    s_wait_bvhcnt 0x0
393; GFX12-NEXT:    s_wait_kmcnt 0x0
394; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
395; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
396; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
397; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
398; GFX12-NEXT:    s_wait_storecnt 0x0
399; GFX12-NEXT:    scratch_store_b32 v0, v2, s32 scope:SCOPE_SYS
400; GFX12-NEXT:    s_wait_storecnt 0x0
401; GFX12-NEXT:    scratch_load_b32 v0, v1, s32 scope:SCOPE_SYS
402; GFX12-NEXT:    s_wait_loadcnt 0x0
403; GFX12-NEXT:    s_setpc_b64 s[30:31]
404;
405; UNALIGNED_GFX9-LABEL: store_load_vindex_foo:
406; UNALIGNED_GFX9:       ; %bb.0: ; %bb
407; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
409; UNALIGNED_GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
410; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
411; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
412; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
413; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
414; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
415; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
416; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off glc
417; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
418; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
419;
420; UNALIGNED_GFX10-LABEL: store_load_vindex_foo:
421; UNALIGNED_GFX10:       ; %bb.0: ; %bb
422; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423; UNALIGNED_GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
424; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
425; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
426; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
427; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
428; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
429; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
430; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
431; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
432; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
433; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
434;
435; UNALIGNED_GFX940-LABEL: store_load_vindex_foo:
436; UNALIGNED_GFX940:       ; %bb.0: ; %bb
437; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
439; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
440; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
441; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
442; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
443; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
444; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
445; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s32 sc0 sc1
446; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
447; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
448;
449; UNALIGNED_GFX11-LABEL: store_load_vindex_foo:
450; UNALIGNED_GFX11:       ; %bb.0: ; %bb
451; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
453; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
454; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
455; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
456; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
457; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
458; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
459; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v0, s32 glc dlc
460; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
461; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
462;
463; UNALIGNED_GFX12-LABEL: store_load_vindex_foo:
464; UNALIGNED_GFX12:       ; %bb.0: ; %bb
465; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
466; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
467; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
468; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
469; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
470; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
471; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
472; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
473; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
474; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
475; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, s32 scope:SCOPE_SYS
476; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
477; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s32 scope:SCOPE_SYS
478; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
479; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
480bb:
481  %i = alloca [32 x float], align 4, addrspace(5)
482  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
483  store volatile i32 15, ptr addrspace(5) %i7, align 4
484  %i9 = and i32 %idx, 15
485  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
486  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
487  ret void
488}
489
490define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
491; GFX9-LABEL: private_ptr_foo:
492; GFX9:       ; %bb.0:
493; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX9-NEXT:    v_mov_b32_e32 v1, 0x41200000
495; GFX9-NEXT:    scratch_store_dword v0, v1, off offset:4
496; GFX9-NEXT:    s_waitcnt vmcnt(0)
497; GFX9-NEXT:    s_setpc_b64 s[30:31]
498;
499; GFX10-LABEL: private_ptr_foo:
500; GFX10:       ; %bb.0:
501; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX10-NEXT:    v_mov_b32_e32 v1, 0x41200000
503; GFX10-NEXT:    scratch_store_dword v0, v1, off offset:4
504; GFX10-NEXT:    s_setpc_b64 s[30:31]
505;
506; GFX940-LABEL: private_ptr_foo:
507; GFX940:       ; %bb.0:
508; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509; GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
510; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
511; GFX940-NEXT:    s_waitcnt vmcnt(0)
512; GFX940-NEXT:    s_setpc_b64 s[30:31]
513;
514; GFX11-LABEL: private_ptr_foo:
515; GFX11:       ; %bb.0:
516; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517; GFX11-NEXT:    v_mov_b32_e32 v1, 0x41200000
518; GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:4
519; GFX11-NEXT:    s_setpc_b64 s[30:31]
520;
521; GFX12-LABEL: private_ptr_foo:
522; GFX12:       ; %bb.0:
523; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
524; GFX12-NEXT:    s_wait_expcnt 0x0
525; GFX12-NEXT:    s_wait_samplecnt 0x0
526; GFX12-NEXT:    s_wait_bvhcnt 0x0
527; GFX12-NEXT:    s_wait_kmcnt 0x0
528; GFX12-NEXT:    v_mov_b32_e32 v1, 0x41200000
529; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:4
530; GFX12-NEXT:    s_setpc_b64 s[30:31]
531;
532; UNALIGNED_GFX9-LABEL: private_ptr_foo:
533; UNALIGNED_GFX9:       ; %bb.0:
534; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 0x41200000
536; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off offset:4
537; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
538; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
539;
540; UNALIGNED_GFX10-LABEL: private_ptr_foo:
541; UNALIGNED_GFX10:       ; %bb.0:
542; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 0x41200000
544; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v1, off offset:4
545; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
546;
547; UNALIGNED_GFX940-LABEL: private_ptr_foo:
548; UNALIGNED_GFX940:       ; %bb.0:
549; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 0x41200000
551; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off offset:4 sc0 sc1
552; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
553; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
554;
555; UNALIGNED_GFX11-LABEL: private_ptr_foo:
556; UNALIGNED_GFX11:       ; %bb.0:
557; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v1, 0x41200000
559; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:4
560; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
561;
562; UNALIGNED_GFX12-LABEL: private_ptr_foo:
563; UNALIGNED_GFX12:       ; %bb.0:
564; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
565; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
566; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
567; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
568; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
569; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v1, 0x41200000
570; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:4
571; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
572  %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1
573  store float 1.000000e+01, ptr addrspace(5) %gep, align 4
574  ret void
575}
576
577define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
578; GFX9-LABEL: store_load_sindex_small_offset_kernel:
579; GFX9:       ; %bb.0: ; %bb
580; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
581; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
582; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
583; GFX9-NEXT:    s_mov_b32 s1, 0
584; GFX9-NEXT:    scratch_load_dword v0, off, s1 glc
585; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
586; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
587; GFX9-NEXT:    s_and_b32 s0, s0, 15
588; GFX9-NEXT:    v_mov_b32_e32 v0, 15
589; GFX9-NEXT:    s_addk_i32 s1, 0x100
590; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
591; GFX9-NEXT:    scratch_store_dword off, v0, s1
592; GFX9-NEXT:    s_waitcnt vmcnt(0)
593; GFX9-NEXT:    s_addk_i32 s0, 0x100
594; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
595; GFX9-NEXT:    s_waitcnt vmcnt(0)
596; GFX9-NEXT:    s_endpgm
597;
598; GFX10-LABEL: store_load_sindex_small_offset_kernel:
599; GFX10:       ; %bb.0: ; %bb
600; GFX10-NEXT:    s_add_u32 s8, s8, s13
601; GFX10-NEXT:    s_addc_u32 s9, s9, 0
602; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
603; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
604; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
605; GFX10-NEXT:    scratch_load_dword v0, off, off glc dlc
606; GFX10-NEXT:    s_waitcnt vmcnt(0)
607; GFX10-NEXT:    v_mov_b32_e32 v0, 15
608; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
609; GFX10-NEXT:    s_and_b32 s1, s0, 15
610; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
611; GFX10-NEXT:    s_lshl_b32 s1, s1, 2
612; GFX10-NEXT:    s_addk_i32 s0, 0x100
613; GFX10-NEXT:    s_addk_i32 s1, 0x100
614; GFX10-NEXT:    scratch_store_dword off, v0, s0
615; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
616; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
617; GFX10-NEXT:    s_waitcnt vmcnt(0)
618; GFX10-NEXT:    s_endpgm
619;
620; GFX940-LABEL: store_load_sindex_small_offset_kernel:
621; GFX940:       ; %bb.0: ; %bb
622; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
623; GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
624; GFX940-NEXT:    s_waitcnt vmcnt(0)
625; GFX940-NEXT:    v_mov_b32_e32 v0, 15
626; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
627; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
628; GFX940-NEXT:    s_and_b32 s0, s0, 15
629; GFX940-NEXT:    s_addk_i32 s1, 0x100
630; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
631; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
632; GFX940-NEXT:    s_waitcnt vmcnt(0)
633; GFX940-NEXT:    s_addk_i32 s0, 0x100
634; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
635; GFX940-NEXT:    s_waitcnt vmcnt(0)
636; GFX940-NEXT:    s_endpgm
637;
638; GFX11-LABEL: store_load_sindex_small_offset_kernel:
639; GFX11:       ; %bb.0: ; %bb
640; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
641; GFX11-NEXT:    scratch_load_b32 v0, off, off glc dlc
642; GFX11-NEXT:    s_waitcnt vmcnt(0)
643; GFX11-NEXT:    v_mov_b32_e32 v0, 15
644; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
645; GFX11-NEXT:    s_and_b32 s1, s0, 15
646; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
647; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
648; GFX11-NEXT:    s_addk_i32 s0, 0x100
649; GFX11-NEXT:    s_addk_i32 s1, 0x100
650; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
651; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
652; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
653; GFX11-NEXT:    s_waitcnt vmcnt(0)
654; GFX11-NEXT:    s_endpgm
655;
656; GFX12-LABEL: store_load_sindex_small_offset_kernel:
657; GFX12:       ; %bb.0: ; %bb
658; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
659; GFX12-NEXT:    scratch_load_b32 v0, off, off scope:SCOPE_SYS
660; GFX12-NEXT:    s_wait_loadcnt 0x0
661; GFX12-NEXT:    v_mov_b32_e32 v0, 15
662; GFX12-NEXT:    s_wait_kmcnt 0x0
663; GFX12-NEXT:    s_and_b32 s1, s0, 15
664; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
665; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
666; GFX12-NEXT:    s_addk_co_i32 s0, 0x100
667; GFX12-NEXT:    s_addk_co_i32 s1, 0x100
668; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
669; GFX12-NEXT:    s_wait_storecnt 0x0
670; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
671; GFX12-NEXT:    s_wait_loadcnt 0x0
672; GFX12-NEXT:    s_endpgm
673;
674; UNALIGNED_GFX9-LABEL: store_load_sindex_small_offset_kernel:
675; UNALIGNED_GFX9:       ; %bb.0: ; %bb
676; UNALIGNED_GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
677; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
678; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
679; UNALIGNED_GFX9-NEXT:    s_mov_b32 s1, 0
680; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s1 glc
681; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
682; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s1, s0, 2
683; UNALIGNED_GFX9-NEXT:    s_and_b32 s0, s0, 15
684; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
685; UNALIGNED_GFX9-NEXT:    s_addk_i32 s1, 0x100
686; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s0, s0, 2
687; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s1
688; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
689; UNALIGNED_GFX9-NEXT:    s_addk_i32 s0, 0x100
690; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
691; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
692; UNALIGNED_GFX9-NEXT:    s_endpgm
693;
694; UNALIGNED_GFX10-LABEL: store_load_sindex_small_offset_kernel:
695; UNALIGNED_GFX10:       ; %bb.0: ; %bb
696; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
697; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
698; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
699; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
700; UNALIGNED_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
701; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, off glc dlc
702; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
703; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 15
704; UNALIGNED_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
705; UNALIGNED_GFX10-NEXT:    s_and_b32 s1, s0, 15
706; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s0, s0, 2
707; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s1, s1, 2
708; UNALIGNED_GFX10-NEXT:    s_addk_i32 s0, 0x100
709; UNALIGNED_GFX10-NEXT:    s_addk_i32 s1, 0x100
710; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s0
711; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
712; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
713; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
714; UNALIGNED_GFX10-NEXT:    s_endpgm
715;
716; UNALIGNED_GFX940-LABEL: store_load_sindex_small_offset_kernel:
717; UNALIGNED_GFX940:       ; %bb.0: ; %bb
718; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
719; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, off sc0 sc1
720; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
721; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
722; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
723; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
724; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
725; UNALIGNED_GFX940-NEXT:    s_addk_i32 s1, 0x100
726; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
727; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
728; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
729; UNALIGNED_GFX940-NEXT:    s_addk_i32 s0, 0x100
730; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
731; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
732; UNALIGNED_GFX940-NEXT:    s_endpgm
733;
734; UNALIGNED_GFX11-LABEL: store_load_sindex_small_offset_kernel:
735; UNALIGNED_GFX11:       ; %bb.0: ; %bb
736; UNALIGNED_GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
737; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, off glc dlc
738; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
739; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v0, 15
740; UNALIGNED_GFX11-NEXT:    s_waitcnt lgkmcnt(0)
741; UNALIGNED_GFX11-NEXT:    s_and_b32 s1, s0, 15
742; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s0, s0, 2
743; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s1, s1, 2
744; UNALIGNED_GFX11-NEXT:    s_addk_i32 s0, 0x100
745; UNALIGNED_GFX11-NEXT:    s_addk_i32 s1, 0x100
746; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
747; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
748; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
749; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
750; UNALIGNED_GFX11-NEXT:    s_endpgm
751;
752; UNALIGNED_GFX12-LABEL: store_load_sindex_small_offset_kernel:
753; UNALIGNED_GFX12:       ; %bb.0: ; %bb
754; UNALIGNED_GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
755; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, off scope:SCOPE_SYS
756; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
757; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v0, 15
758; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
759; UNALIGNED_GFX12-NEXT:    s_and_b32 s1, s0, 15
760; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 2
761; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s1, s1, 2
762; UNALIGNED_GFX12-NEXT:    s_addk_co_i32 s0, 0x100
763; UNALIGNED_GFX12-NEXT:    s_addk_co_i32 s1, 0x100
764; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
765; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
766; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
767; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
768; UNALIGNED_GFX12-NEXT:    s_endpgm
769bb:
770  %padding = alloca [64 x i32], align 4, addrspace(5)
771  %i = alloca [32 x float], align 4, addrspace(5)
772  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
773  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
774  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
775  store volatile i32 15, ptr addrspace(5) %i7, align 4
776  %i9 = and i32 %idx, 15
777  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
778  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
779  ret void
780}
781
782define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
783; GFX9-LABEL: store_load_vindex_small_offset_kernel:
784; GFX9:       ; %bb.0: ; %bb
785; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
786; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
787; GFX9-NEXT:    s_mov_b32 s0, 0
788; GFX9-NEXT:    scratch_load_dword v1, off, s0 glc
789; GFX9-NEXT:    s_waitcnt vmcnt(0)
790; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
791; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
792; GFX9-NEXT:    v_add_u32_e32 v1, 0x100, v1
793; GFX9-NEXT:    v_mov_b32_e32 v2, 15
794; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
795; GFX9-NEXT:    scratch_store_dword v1, v2, off
796; GFX9-NEXT:    s_waitcnt vmcnt(0)
797; GFX9-NEXT:    v_add_u32_e32 v0, 0x100, v0
798; GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
799; GFX9-NEXT:    s_waitcnt vmcnt(0)
800; GFX9-NEXT:    s_endpgm
801;
802; GFX10-LABEL: store_load_vindex_small_offset_kernel:
803; GFX10:       ; %bb.0: ; %bb
804; GFX10-NEXT:    s_add_u32 s8, s8, s13
805; GFX10-NEXT:    s_addc_u32 s9, s9, 0
806; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
807; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
808; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
809; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
810; GFX10-NEXT:    v_mov_b32_e32 v2, 15
811; GFX10-NEXT:    scratch_load_dword v3, off, off glc dlc
812; GFX10-NEXT:    s_waitcnt vmcnt(0)
813; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
814; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
815; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
816; GFX10-NEXT:    scratch_store_dword v0, v2, off
817; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
818; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
819; GFX10-NEXT:    s_waitcnt vmcnt(0)
820; GFX10-NEXT:    s_endpgm
821;
822; GFX940-LABEL: store_load_vindex_small_offset_kernel:
823; GFX940:       ; %bb.0: ; %bb
824; GFX940-NEXT:    scratch_load_dword v1, off, off sc0 sc1
825; GFX940-NEXT:    s_waitcnt vmcnt(0)
826; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
827; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
828; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
829; GFX940-NEXT:    v_mov_b32_e32 v2, 15
830; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
831; GFX940-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
832; GFX940-NEXT:    s_waitcnt vmcnt(0)
833; GFX940-NEXT:    v_add_u32_e32 v0, 0x100, v0
834; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
835; GFX940-NEXT:    s_waitcnt vmcnt(0)
836; GFX940-NEXT:    s_endpgm
837;
838; GFX11-LABEL: store_load_vindex_small_offset_kernel:
839; GFX11:       ; %bb.0: ; %bb
840; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
841; GFX11-NEXT:    scratch_load_b32 v3, off, off glc dlc
842; GFX11-NEXT:    s_waitcnt vmcnt(0)
843; GFX11-NEXT:    v_mov_b32_e32 v2, 15
844; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
845; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
846; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
847; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
848; GFX11-NEXT:    scratch_store_b32 v0, v2, off offset:256 dlc
849; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
850; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
851; GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
852; GFX11-NEXT:    s_waitcnt vmcnt(0)
853; GFX11-NEXT:    s_endpgm
854;
855; GFX12-LABEL: store_load_vindex_small_offset_kernel:
856; GFX12:       ; %bb.0: ; %bb
857; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
858; GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS
859; GFX12-NEXT:    s_wait_loadcnt 0x0
860; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
861; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
862; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
863; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
864; GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:256 scope:SCOPE_SYS
865; GFX12-NEXT:    s_wait_storecnt 0x0
866; GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:380 scope:SCOPE_SYS
867; GFX12-NEXT:    s_wait_loadcnt 0x0
868; GFX12-NEXT:    s_endpgm
869;
870; UNALIGNED_GFX9-LABEL: store_load_vindex_small_offset_kernel:
871; UNALIGNED_GFX9:       ; %bb.0: ; %bb
872; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
873; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
874; UNALIGNED_GFX9-NEXT:    s_mov_b32 s0, 0
875; UNALIGNED_GFX9-NEXT:    scratch_load_dword v1, off, s0 glc
876; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
877; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
878; UNALIGNED_GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
879; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 0x100, v1
880; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
881; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
882; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
883; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
884; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, 0x100, v0
885; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
886; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
887; UNALIGNED_GFX9-NEXT:    s_endpgm
888;
889; UNALIGNED_GFX10-LABEL: store_load_vindex_small_offset_kernel:
890; UNALIGNED_GFX10:       ; %bb.0: ; %bb
891; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
892; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
893; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
894; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
895; UNALIGNED_GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
896; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
897; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
898; UNALIGNED_GFX10-NEXT:    scratch_load_dword v3, off, off glc dlc
899; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
900; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
901; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
902; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
903; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
904; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
905; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
906; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
907; UNALIGNED_GFX10-NEXT:    s_endpgm
908;
909; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_kernel:
910; UNALIGNED_GFX940:       ; %bb.0: ; %bb
911; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, off sc0 sc1
912; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
913; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
914; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
915; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
916; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
917; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
918; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off offset:256 sc0 sc1
919; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
920; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, 0x100, v0
921; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
922; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
923; UNALIGNED_GFX940-NEXT:    s_endpgm
924;
925; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_kernel:
926; UNALIGNED_GFX11:       ; %bb.0: ; %bb
927; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
928; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v3, off, off glc dlc
929; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
930; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v2, 15
931; UNALIGNED_GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
932; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
933; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
934; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
935; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v2, off offset:256 dlc
936; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
937; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
938; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
939; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
940; UNALIGNED_GFX11-NEXT:    s_endpgm
941;
942; UNALIGNED_GFX12-LABEL: store_load_vindex_small_offset_kernel:
943; UNALIGNED_GFX12:       ; %bb.0: ; %bb
944; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
945; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS
946; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
947; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
948; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
949; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
950; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
951; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:256 scope:SCOPE_SYS
952; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
953; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:380 scope:SCOPE_SYS
954; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
955; UNALIGNED_GFX12-NEXT:    s_endpgm
956bb:
957  %padding = alloca [64 x i32], align 4, addrspace(5)
958  %i = alloca [32 x float], align 4, addrspace(5)
959  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
960  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
961  %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
962  %i3 = zext i32 %i2 to i64
963  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
964  store volatile i32 15, ptr addrspace(5) %i7, align 4
965  %i9 = sub nsw i32 31, %i2
966  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
967  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
968  ret void
969}
970
971define void @store_load_vindex_small_offset_foo(i32 %idx) {
972; GFX9-LABEL: store_load_vindex_small_offset_foo:
973; GFX9:       ; %bb.0: ; %bb
974; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975; GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
976; GFX9-NEXT:    s_waitcnt vmcnt(0)
977; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
978; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
979; GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
980; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
981; GFX9-NEXT:    v_add_u32_e32 v1, 0x100, v1
982; GFX9-NEXT:    v_mov_b32_e32 v2, 15
983; GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
984; GFX9-NEXT:    scratch_store_dword v1, v2, off
985; GFX9-NEXT:    s_waitcnt vmcnt(0)
986; GFX9-NEXT:    v_add_u32_e32 v0, 0x100, v0
987; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
988; GFX9-NEXT:    s_waitcnt vmcnt(0)
989; GFX9-NEXT:    s_setpc_b64 s[30:31]
990;
991; GFX10-LABEL: store_load_vindex_small_offset_foo:
992; GFX10:       ; %bb.0: ; %bb
993; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
995; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
996; GFX10-NEXT:    v_mov_b32_e32 v2, 15
997; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
998; GFX10-NEXT:    s_waitcnt vmcnt(0)
999; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1000; GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
1001; GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1002; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
1003; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
1004; GFX10-NEXT:    scratch_store_dword v0, v2, off
1005; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1006; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
1007; GFX10-NEXT:    s_waitcnt vmcnt(0)
1008; GFX10-NEXT:    s_setpc_b64 s[30:31]
1009;
1010; GFX940-LABEL: store_load_vindex_small_offset_foo:
1011; GFX940:       ; %bb.0: ; %bb
1012; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
1014; GFX940-NEXT:    s_waitcnt vmcnt(0)
1015; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1016; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
1017; GFX940-NEXT:    v_add_u32_e32 v1, 0x100, v1
1018; GFX940-NEXT:    v_mov_b32_e32 v2, 15
1019; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
1020; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
1021; GFX940-NEXT:    s_waitcnt vmcnt(0)
1022; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1023; GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
1024; GFX940-NEXT:    s_waitcnt vmcnt(0)
1025; GFX940-NEXT:    s_setpc_b64 s[30:31]
1026;
1027; GFX11-LABEL: store_load_vindex_small_offset_foo:
1028; GFX11:       ; %bb.0: ; %bb
1029; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
1031; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
1032; GFX11-NEXT:    scratch_load_b32 v3, off, s32 glc dlc
1033; GFX11-NEXT:    s_waitcnt vmcnt(0)
1034; GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1035; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1036; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1037; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
1038; GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
1039; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1040; GFX11-NEXT:    scratch_load_b32 v0, v0, s32 offset:256 glc dlc
1041; GFX11-NEXT:    s_waitcnt vmcnt(0)
1042; GFX11-NEXT:    s_setpc_b64 s[30:31]
1043;
1044; GFX12-LABEL: store_load_vindex_small_offset_foo:
1045; GFX12:       ; %bb.0: ; %bb
1046; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1047; GFX12-NEXT:    s_wait_expcnt 0x0
1048; GFX12-NEXT:    s_wait_samplecnt 0x0
1049; GFX12-NEXT:    s_wait_bvhcnt 0x0
1050; GFX12-NEXT:    s_wait_kmcnt 0x0
1051; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
1052; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1053; GFX12-NEXT:    scratch_load_b32 v3, off, s32 scope:SCOPE_SYS
1054; GFX12-NEXT:    s_wait_loadcnt 0x0
1055; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1056; GFX12-NEXT:    s_wait_storecnt 0x0
1057; GFX12-NEXT:    scratch_store_b32 v0, v2, s32 offset:256 scope:SCOPE_SYS
1058; GFX12-NEXT:    s_wait_storecnt 0x0
1059; GFX12-NEXT:    scratch_load_b32 v0, v1, s32 offset:256 scope:SCOPE_SYS
1060; GFX12-NEXT:    s_wait_loadcnt 0x0
1061; GFX12-NEXT:    s_setpc_b64 s[30:31]
1062;
1063; UNALIGNED_GFX9-LABEL: store_load_vindex_small_offset_foo:
1064; UNALIGNED_GFX9:       ; %bb.0: ; %bb
1065; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066; UNALIGNED_GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
1067; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1068; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1069; UNALIGNED_GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
1070; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
1071; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1072; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 0x100, v1
1073; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
1074; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
1075; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
1076; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1077; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, 0x100, v0
1078; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off glc
1079; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1080; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
1081;
1082; UNALIGNED_GFX10-LABEL: store_load_vindex_small_offset_foo:
1083; UNALIGNED_GFX10:       ; %bb.0: ; %bb
1084; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085; UNALIGNED_GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
1086; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1087; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
1088; UNALIGNED_GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
1089; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1090; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1091; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
1092; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1093; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
1094; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
1095; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
1096; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1097; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
1098; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1099; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
1100;
1101; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_foo:
1102; UNALIGNED_GFX940:       ; %bb.0: ; %bb
1103; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
1105; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1106; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1107; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
1108; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 0x100, v1
1109; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
1110; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
1111; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
1112; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1113; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1114; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
1115; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1116; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
1117;
1118; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_foo:
1119; UNALIGNED_GFX11:       ; %bb.0: ; %bb
1120; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
1122; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
1123; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v3, off, s32 glc dlc
1124; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1125; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1126; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1127; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1128; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
1129; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
1130; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1131; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v0, s32 offset:256 glc dlc
1132; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1133; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
1134;
1135; UNALIGNED_GFX12-LABEL: store_load_vindex_small_offset_foo:
1136; UNALIGNED_GFX12:       ; %bb.0: ; %bb
1137; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1138; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
1139; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
1140; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
1141; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
1142; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
1143; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1144; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, s32 scope:SCOPE_SYS
1145; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1146; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1147; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1148; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, s32 offset:256 scope:SCOPE_SYS
1149; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1150; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s32 offset:256 scope:SCOPE_SYS
1151; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1152; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
1153bb:
1154  %padding = alloca [64 x i32], align 4, addrspace(5)
1155  %i = alloca [32 x float], align 4, addrspace(5)
1156  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1157  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
1158  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
1159  store volatile i32 15, ptr addrspace(5) %i7, align 4
1160  %i9 = and i32 %idx, 15
1161  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
1162  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
1163  ret void
1164}
1165
1166define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
1167; GFX9-LABEL: store_load_sindex_large_offset_kernel:
1168; GFX9:       ; %bb.0: ; %bb
1169; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
1170; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1171; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1172; GFX9-NEXT:    s_mov_b32 s1, 0
1173; GFX9-NEXT:    scratch_load_dword v0, off, s1 offset:4 glc
1174; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1175; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
1176; GFX9-NEXT:    s_and_b32 s0, s0, 15
1177; GFX9-NEXT:    v_mov_b32_e32 v0, 15
1178; GFX9-NEXT:    s_addk_i32 s1, 0x4004
1179; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
1180; GFX9-NEXT:    scratch_store_dword off, v0, s1
1181; GFX9-NEXT:    s_waitcnt vmcnt(0)
1182; GFX9-NEXT:    s_addk_i32 s0, 0x4004
1183; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
1184; GFX9-NEXT:    s_waitcnt vmcnt(0)
1185; GFX9-NEXT:    s_endpgm
1186;
1187; GFX10-LABEL: store_load_sindex_large_offset_kernel:
1188; GFX10:       ; %bb.0: ; %bb
1189; GFX10-NEXT:    s_add_u32 s8, s8, s13
1190; GFX10-NEXT:    s_addc_u32 s9, s9, 0
1191; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1192; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1193; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
1194; GFX10-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
1195; GFX10-NEXT:    s_waitcnt vmcnt(0)
1196; GFX10-NEXT:    v_mov_b32_e32 v0, 15
1197; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1198; GFX10-NEXT:    s_and_b32 s1, s0, 15
1199; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
1200; GFX10-NEXT:    s_lshl_b32 s1, s1, 2
1201; GFX10-NEXT:    s_addk_i32 s0, 0x4004
1202; GFX10-NEXT:    s_addk_i32 s1, 0x4004
1203; GFX10-NEXT:    scratch_store_dword off, v0, s0
1204; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1205; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
1206; GFX10-NEXT:    s_waitcnt vmcnt(0)
1207; GFX10-NEXT:    s_endpgm
1208;
1209; GFX940-LABEL: store_load_sindex_large_offset_kernel:
1210; GFX940:       ; %bb.0: ; %bb
1211; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
1212; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
1213; GFX940-NEXT:    s_waitcnt vmcnt(0)
1214; GFX940-NEXT:    v_mov_b32_e32 v0, 15
1215; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1216; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
1217; GFX940-NEXT:    s_and_b32 s0, s0, 15
1218; GFX940-NEXT:    s_addk_i32 s1, 0x4004
1219; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
1220; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
1221; GFX940-NEXT:    s_waitcnt vmcnt(0)
1222; GFX940-NEXT:    s_addk_i32 s0, 0x4004
1223; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
1224; GFX940-NEXT:    s_waitcnt vmcnt(0)
1225; GFX940-NEXT:    s_endpgm
1226;
1227; GFX11-LABEL: store_load_sindex_large_offset_kernel:
1228; GFX11:       ; %bb.0: ; %bb
1229; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
1230; GFX11-NEXT:    scratch_load_b32 v0, off, off offset:4 glc dlc
1231; GFX11-NEXT:    s_waitcnt vmcnt(0)
1232; GFX11-NEXT:    v_mov_b32_e32 v0, 15
1233; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1234; GFX11-NEXT:    s_and_b32 s1, s0, 15
1235; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
1236; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
1237; GFX11-NEXT:    s_addk_i32 s0, 0x4004
1238; GFX11-NEXT:    s_addk_i32 s1, 0x4004
1239; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
1240; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1241; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
1242; GFX11-NEXT:    s_waitcnt vmcnt(0)
1243; GFX11-NEXT:    s_endpgm
1244;
1245; GFX12-LABEL: store_load_sindex_large_offset_kernel:
1246; GFX12:       ; %bb.0: ; %bb
1247; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
1248; GFX12-NEXT:    scratch_load_b32 v0, off, off scope:SCOPE_SYS
1249; GFX12-NEXT:    s_wait_loadcnt 0x0
1250; GFX12-NEXT:    v_mov_b32_e32 v0, 15
1251; GFX12-NEXT:    s_wait_kmcnt 0x0
1252; GFX12-NEXT:    s_and_b32 s1, s0, 15
1253; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
1254; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
1255; GFX12-NEXT:    s_addk_co_i32 s0, 0x4000
1256; GFX12-NEXT:    s_addk_co_i32 s1, 0x4000
1257; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
1258; GFX12-NEXT:    s_wait_storecnt 0x0
1259; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
1260; GFX12-NEXT:    s_wait_loadcnt 0x0
1261; GFX12-NEXT:    s_endpgm
1262;
1263; UNALIGNED_GFX9-LABEL: store_load_sindex_large_offset_kernel:
1264; UNALIGNED_GFX9:       ; %bb.0: ; %bb
1265; UNALIGNED_GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
1266; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1267; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1268; UNALIGNED_GFX9-NEXT:    s_mov_b32 s1, 0
1269; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s1 offset:4 glc
1270; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1271; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s1, s0, 2
1272; UNALIGNED_GFX9-NEXT:    s_and_b32 s0, s0, 15
1273; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
1274; UNALIGNED_GFX9-NEXT:    s_addk_i32 s1, 0x4004
1275; UNALIGNED_GFX9-NEXT:    s_lshl_b32 s0, s0, 2
1276; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s1
1277; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1278; UNALIGNED_GFX9-NEXT:    s_addk_i32 s0, 0x4004
1279; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
1280; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1281; UNALIGNED_GFX9-NEXT:    s_endpgm
1282;
1283; UNALIGNED_GFX10-LABEL: store_load_sindex_large_offset_kernel:
1284; UNALIGNED_GFX10:       ; %bb.0: ; %bb
1285; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
1286; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
1287; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1288; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1289; UNALIGNED_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
1290; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
1291; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1292; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 15
1293; UNALIGNED_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1294; UNALIGNED_GFX10-NEXT:    s_and_b32 s1, s0, 15
1295; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s0, s0, 2
1296; UNALIGNED_GFX10-NEXT:    s_lshl_b32 s1, s1, 2
1297; UNALIGNED_GFX10-NEXT:    s_addk_i32 s0, 0x4004
1298; UNALIGNED_GFX10-NEXT:    s_addk_i32 s1, 0x4004
1299; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s0
1300; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1301; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
1302; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1303; UNALIGNED_GFX10-NEXT:    s_endpgm
1304;
1305; UNALIGNED_GFX940-LABEL: store_load_sindex_large_offset_kernel:
1306; UNALIGNED_GFX940:       ; %bb.0: ; %bb
1307; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
1308; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
1309; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1310; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
1311; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1312; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s1, s0, 2
1313; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, 15
1314; UNALIGNED_GFX940-NEXT:    s_addk_i32 s1, 0x4004
1315; UNALIGNED_GFX940-NEXT:    s_lshl_b32 s0, s0, 2
1316; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
1317; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1318; UNALIGNED_GFX940-NEXT:    s_addk_i32 s0, 0x4004
1319; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
1320; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1321; UNALIGNED_GFX940-NEXT:    s_endpgm
1322;
1323; UNALIGNED_GFX11-LABEL: store_load_sindex_large_offset_kernel:
1324; UNALIGNED_GFX11:       ; %bb.0: ; %bb
1325; UNALIGNED_GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
1326; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, off offset:4 glc dlc
1327; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1328; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v0, 15
1329; UNALIGNED_GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1330; UNALIGNED_GFX11-NEXT:    s_and_b32 s1, s0, 15
1331; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s0, s0, 2
1332; UNALIGNED_GFX11-NEXT:    s_lshl_b32 s1, s1, 2
1333; UNALIGNED_GFX11-NEXT:    s_addk_i32 s0, 0x4004
1334; UNALIGNED_GFX11-NEXT:    s_addk_i32 s1, 0x4004
1335; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
1336; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1337; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
1338; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1339; UNALIGNED_GFX11-NEXT:    s_endpgm
1340;
1341; UNALIGNED_GFX12-LABEL: store_load_sindex_large_offset_kernel:
1342; UNALIGNED_GFX12:       ; %bb.0: ; %bb
1343; UNALIGNED_GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
1344; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, off scope:SCOPE_SYS
1345; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1346; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v0, 15
1347; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
1348; UNALIGNED_GFX12-NEXT:    s_and_b32 s1, s0, 15
1349; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 2
1350; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s1, s1, 2
1351; UNALIGNED_GFX12-NEXT:    s_addk_co_i32 s0, 0x4000
1352; UNALIGNED_GFX12-NEXT:    s_addk_co_i32 s1, 0x4000
1353; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
1354; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1355; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
1356; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1357; UNALIGNED_GFX12-NEXT:    s_endpgm
1358bb:
1359  %padding = alloca [4096 x i32], align 4, addrspace(5)
1360  %i = alloca [32 x float], align 4, addrspace(5)
1361  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1362  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
1363  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
1364  store volatile i32 15, ptr addrspace(5) %i7, align 4
1365  %i9 = and i32 %idx, 15
1366  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
1367  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
1368  ret void
1369}
1370
1371define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
1372; GFX9-LABEL: store_load_vindex_large_offset_kernel:
1373; GFX9:       ; %bb.0: ; %bb
1374; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1375; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1376; GFX9-NEXT:    s_mov_b32 s0, 0
1377; GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
1378; GFX9-NEXT:    s_waitcnt vmcnt(0)
1379; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1380; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
1381; GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1382; GFX9-NEXT:    v_mov_b32_e32 v2, 15
1383; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1384; GFX9-NEXT:    scratch_store_dword v1, v2, off
1385; GFX9-NEXT:    s_waitcnt vmcnt(0)
1386; GFX9-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1387; GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
1388; GFX9-NEXT:    s_waitcnt vmcnt(0)
1389; GFX9-NEXT:    s_endpgm
1390;
1391; GFX10-LABEL: store_load_vindex_large_offset_kernel:
1392; GFX10:       ; %bb.0: ; %bb
1393; GFX10-NEXT:    s_add_u32 s8, s8, s13
1394; GFX10-NEXT:    s_addc_u32 s9, s9, 0
1395; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1396; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1397; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1398; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1399; GFX10-NEXT:    v_mov_b32_e32 v2, 15
1400; GFX10-NEXT:    scratch_load_dword v3, off, off offset:4 glc dlc
1401; GFX10-NEXT:    s_waitcnt vmcnt(0)
1402; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1403; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x4004, v0
1404; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1405; GFX10-NEXT:    scratch_store_dword v0, v2, off
1406; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1407; GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
1408; GFX10-NEXT:    s_waitcnt vmcnt(0)
1409; GFX10-NEXT:    s_endpgm
1410;
1411; GFX940-LABEL: store_load_vindex_large_offset_kernel:
1412; GFX940:       ; %bb.0: ; %bb
1413; GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
1414; GFX940-NEXT:    s_waitcnt vmcnt(0)
1415; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1416; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1417; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
1418; GFX940-NEXT:    v_mov_b32_e32 v2, 15
1419; GFX940-NEXT:    s_movk_i32 s0, 0x4004
1420; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1421; GFX940-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
1422; GFX940-NEXT:    s_waitcnt vmcnt(0)
1423; GFX940-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1424; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
1425; GFX940-NEXT:    s_waitcnt vmcnt(0)
1426; GFX940-NEXT:    s_endpgm
1427;
1428; GFX11-LABEL: store_load_vindex_large_offset_kernel:
1429; GFX11:       ; %bb.0: ; %bb
1430; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1431; GFX11-NEXT:    s_movk_i32 s0, 0x4004
1432; GFX11-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
1433; GFX11-NEXT:    s_waitcnt vmcnt(0)
1434; GFX11-NEXT:    v_mov_b32_e32 v2, 15
1435; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1436; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1437; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1438; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1439; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
1440; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1441; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1442; GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
1443; GFX11-NEXT:    s_waitcnt vmcnt(0)
1444; GFX11-NEXT:    s_endpgm
1445;
1446; GFX12-LABEL: store_load_vindex_large_offset_kernel:
1447; GFX12:       ; %bb.0: ; %bb
1448; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1449; GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS
1450; GFX12-NEXT:    s_wait_loadcnt 0x0
1451; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1452; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1453; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1454; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
1455; GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:16384 scope:SCOPE_SYS
1456; GFX12-NEXT:    s_wait_storecnt 0x0
1457; GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:16508 scope:SCOPE_SYS
1458; GFX12-NEXT:    s_wait_loadcnt 0x0
1459; GFX12-NEXT:    s_endpgm
1460;
1461; UNALIGNED_GFX9-LABEL: store_load_vindex_large_offset_kernel:
1462; UNALIGNED_GFX9:       ; %bb.0: ; %bb
1463; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1464; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1465; UNALIGNED_GFX9-NEXT:    s_mov_b32 s0, 0
1466; UNALIGNED_GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
1467; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1468; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1469; UNALIGNED_GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
1470; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1471; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
1472; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1473; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
1474; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1475; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1476; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
1477; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1478; UNALIGNED_GFX9-NEXT:    s_endpgm
1479;
1480; UNALIGNED_GFX10-LABEL: store_load_vindex_large_offset_kernel:
1481; UNALIGNED_GFX10:       ; %bb.0: ; %bb
1482; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
1483; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
1484; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1485; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1486; UNALIGNED_GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1487; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1488; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
1489; UNALIGNED_GFX10-NEXT:    scratch_load_dword v3, off, off offset:4 glc dlc
1490; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1491; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1492; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x4004, v0
1493; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1494; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
1495; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1496; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off offset:124 glc dlc
1497; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1498; UNALIGNED_GFX10-NEXT:    s_endpgm
1499;
1500; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_kernel:
1501; UNALIGNED_GFX940:       ; %bb.0: ; %bb
1502; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, off offset:4 sc0 sc1
1503; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1504; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1505; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1506; UNALIGNED_GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
1507; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
1508; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x4004
1509; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1510; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, s0 sc0 sc1
1511; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1512; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1513; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:124 sc0 sc1
1514; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1515; UNALIGNED_GFX940-NEXT:    s_endpgm
1516;
1517; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_kernel:
1518; UNALIGNED_GFX11:       ; %bb.0: ; %bb
1519; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1520; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x4004
1521; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
1522; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1523; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v2, 15
1524; UNALIGNED_GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1525; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1526; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1527; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1528; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
1529; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1530; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1531; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v1, off offset:124 glc dlc
1532; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1533; UNALIGNED_GFX11-NEXT:    s_endpgm
1534;
1535; UNALIGNED_GFX12-LABEL: store_load_vindex_large_offset_kernel:
1536; UNALIGNED_GFX12:       ; %bb.0: ; %bb
1537; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1538; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS
1539; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1540; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
1541; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1542; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1543; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v1
1544; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:16384 scope:SCOPE_SYS
1545; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1546; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:16508 scope:SCOPE_SYS
1547; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1548; UNALIGNED_GFX12-NEXT:    s_endpgm
1549bb:
1550  %padding = alloca [4096 x i32], align 4, addrspace(5)
1551  %i = alloca [32 x float], align 4, addrspace(5)
1552  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1553  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
1554  %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
1555  %i3 = zext i32 %i2 to i64
1556  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
1557  store volatile i32 15, ptr addrspace(5) %i7, align 4
1558  %i9 = sub nsw i32 31, %i2
1559  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
1560  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
1561  ret void
1562}
1563
1564define void @store_load_vindex_large_offset_foo(i32 %idx) {
1565; GFX9-LABEL: store_load_vindex_large_offset_foo:
1566; GFX9:       ; %bb.0: ; %bb
1567; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568; GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
1569; GFX9-NEXT:    s_waitcnt vmcnt(0)
1570; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1571; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
1572; GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
1573; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1574; GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1575; GFX9-NEXT:    v_mov_b32_e32 v2, 15
1576; GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
1577; GFX9-NEXT:    scratch_store_dword v1, v2, off
1578; GFX9-NEXT:    s_waitcnt vmcnt(0)
1579; GFX9-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1580; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
1581; GFX9-NEXT:    s_waitcnt vmcnt(0)
1582; GFX9-NEXT:    s_setpc_b64 s[30:31]
1583;
1584; GFX10-LABEL: store_load_vindex_large_offset_foo:
1585; GFX10:       ; %bb.0: ; %bb
1586; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
1588; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1589; GFX10-NEXT:    v_mov_b32_e32 v2, 15
1590; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
1591; GFX10-NEXT:    s_waitcnt vmcnt(0)
1592; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1593; GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
1594; GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1595; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x4004, v0
1596; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1597; GFX10-NEXT:    scratch_store_dword v0, v2, off
1598; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1599; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
1600; GFX10-NEXT:    s_waitcnt vmcnt(0)
1601; GFX10-NEXT:    s_setpc_b64 s[30:31]
1602;
1603; GFX940-LABEL: store_load_vindex_large_offset_foo:
1604; GFX940:       ; %bb.0: ; %bb
1605; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
1607; GFX940-NEXT:    s_waitcnt vmcnt(0)
1608; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1609; GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
1610; GFX940-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1611; GFX940-NEXT:    v_mov_b32_e32 v2, 15
1612; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
1613; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
1614; GFX940-NEXT:    s_waitcnt vmcnt(0)
1615; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1616; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
1617; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
1618; GFX940-NEXT:    s_waitcnt vmcnt(0)
1619; GFX940-NEXT:    s_setpc_b64 s[30:31]
1620;
1621; GFX11-LABEL: store_load_vindex_large_offset_foo:
1622; GFX11:       ; %bb.0: ; %bb
1623; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1624; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
1625; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
1626; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
1627; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
1628; GFX11-NEXT:    s_waitcnt vmcnt(0)
1629; GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1630; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1631; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1632; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1633; GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
1634; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1635; GFX11-NEXT:    scratch_load_b32 v0, v0, s0 glc dlc
1636; GFX11-NEXT:    s_waitcnt vmcnt(0)
1637; GFX11-NEXT:    s_setpc_b64 s[30:31]
1638;
1639; GFX12-LABEL: store_load_vindex_large_offset_foo:
1640; GFX12:       ; %bb.0: ; %bb
1641; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1642; GFX12-NEXT:    s_wait_expcnt 0x0
1643; GFX12-NEXT:    s_wait_samplecnt 0x0
1644; GFX12-NEXT:    s_wait_bvhcnt 0x0
1645; GFX12-NEXT:    s_wait_kmcnt 0x0
1646; GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
1647; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1648; GFX12-NEXT:    scratch_load_b32 v3, off, s32 scope:SCOPE_SYS
1649; GFX12-NEXT:    s_wait_loadcnt 0x0
1650; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1651; GFX12-NEXT:    s_wait_storecnt 0x0
1652; GFX12-NEXT:    scratch_store_b32 v0, v2, s32 offset:16384 scope:SCOPE_SYS
1653; GFX12-NEXT:    s_wait_storecnt 0x0
1654; GFX12-NEXT:    scratch_load_b32 v0, v1, s32 offset:16384 scope:SCOPE_SYS
1655; GFX12-NEXT:    s_wait_loadcnt 0x0
1656; GFX12-NEXT:    s_setpc_b64 s[30:31]
1657;
1658; UNALIGNED_GFX9-LABEL: store_load_vindex_large_offset_foo:
1659; UNALIGNED_GFX9:       ; %bb.0: ; %bb
1660; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1661; UNALIGNED_GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
1662; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1663; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1664; UNALIGNED_GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
1665; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, s32, v1
1666; UNALIGNED_GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1667; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1668; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 15
1669; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s32, v0
1670; UNALIGNED_GFX9-NEXT:    scratch_store_dword v1, v2, off
1671; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1672; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, 0x4004, v0
1673; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off glc
1674; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1675; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
1676;
1677; UNALIGNED_GFX10-LABEL: store_load_vindex_large_offset_foo:
1678; UNALIGNED_GFX10:       ; %bb.0: ; %bb
1679; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1680; UNALIGNED_GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
1681; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1682; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 15
1683; UNALIGNED_GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
1684; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1685; UNALIGNED_GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1686; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s32, v0
1687; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1688; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x4004, v0
1689; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1690; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v2, off
1691; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1692; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
1693; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1694; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
1695;
1696; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_foo:
1697; UNALIGNED_GFX940:       ; %bb.0: ; %bb
1698; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1699; UNALIGNED_GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
1700; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1701; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1702; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, s32, v1
1703; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 0x4004, v1
1704; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v2, 15
1705; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
1706; UNALIGNED_GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
1707; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1708; UNALIGNED_GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1709; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
1710; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
1711; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1712; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
1713;
1714; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_foo:
1715; UNALIGNED_GFX11:       ; %bb.0: ; %bb
1716; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1717; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
1718; UNALIGNED_GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
1719; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
1720; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
1721; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1722; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, s32, v1
1723; UNALIGNED_GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1724; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1725; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x4004, v1
1726; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
1727; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1728; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v0, s0 glc dlc
1729; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1730; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
1731;
1732; UNALIGNED_GFX12-LABEL: store_load_vindex_large_offset_foo:
1733; UNALIGNED_GFX12:       ; %bb.0: ; %bb
1734; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1735; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
1736; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
1737; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
1738; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
1739; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
1740; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1741; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, s32 scope:SCOPE_SYS
1742; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1743; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
1744; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1745; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, s32 offset:16384 scope:SCOPE_SYS
1746; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1747; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s32 offset:16384 scope:SCOPE_SYS
1748; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1749; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
1750bb:
1751  %padding = alloca [4096 x i32], align 4, addrspace(5)
1752  %i = alloca [32 x float], align 4, addrspace(5)
1753  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1754  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
1755  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
1756  store volatile i32 15, ptr addrspace(5) %i7, align 4
1757  %i9 = and i32 %idx, 15
1758  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
1759  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
1760  ret void
1761}
1762
1763define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1764; GFX9-LABEL: store_load_large_imm_offset_kernel:
1765; GFX9:       ; %bb.0: ; %bb
1766; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1767; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1768; GFX9-NEXT:    v_mov_b32_e32 v0, 13
1769; GFX9-NEXT:    s_mov_b32 s0, 0
1770; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
1771; GFX9-NEXT:    s_waitcnt vmcnt(0)
1772; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
1773; GFX9-NEXT:    v_mov_b32_e32 v0, 15
1774; GFX9-NEXT:    s_add_i32 s0, s0, 4
1775; GFX9-NEXT:    scratch_store_dword off, v0, s0
1776; GFX9-NEXT:    s_waitcnt vmcnt(0)
1777; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
1778; GFX9-NEXT:    s_waitcnt vmcnt(0)
1779; GFX9-NEXT:    s_endpgm
1780;
1781; GFX10-LABEL: store_load_large_imm_offset_kernel:
1782; GFX10:       ; %bb.0: ; %bb
1783; GFX10-NEXT:    s_add_u32 s8, s8, s13
1784; GFX10-NEXT:    s_addc_u32 s9, s9, 0
1785; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1786; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1787; GFX10-NEXT:    v_mov_b32_e32 v0, 13
1788; GFX10-NEXT:    v_mov_b32_e32 v1, 15
1789; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
1790; GFX10-NEXT:    s_add_i32 s0, s0, 4
1791; GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
1792; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1793; GFX10-NEXT:    scratch_store_dword off, v1, s0
1794; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1795; GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
1796; GFX10-NEXT:    s_waitcnt vmcnt(0)
1797; GFX10-NEXT:    s_endpgm
1798;
1799; GFX940-LABEL: store_load_large_imm_offset_kernel:
1800; GFX940:       ; %bb.0: ; %bb
1801; GFX940-NEXT:    v_mov_b32_e32 v0, 13
1802; GFX940-NEXT:    s_movk_i32 s0, 0x3e80
1803; GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
1804; GFX940-NEXT:    s_waitcnt vmcnt(0)
1805; GFX940-NEXT:    v_mov_b32_e32 v0, 15
1806; GFX940-NEXT:    s_add_i32 s0, s0, 4
1807; GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
1808; GFX940-NEXT:    s_waitcnt vmcnt(0)
1809; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
1810; GFX940-NEXT:    s_waitcnt vmcnt(0)
1811; GFX940-NEXT:    s_endpgm
1812;
1813; GFX11-LABEL: store_load_large_imm_offset_kernel:
1814; GFX11:       ; %bb.0: ; %bb
1815; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1816; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
1817; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1818; GFX11-NEXT:    s_add_i32 s0, s0, 4
1819; GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
1820; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1821; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
1822; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1823; GFX11-NEXT:    scratch_load_b32 v0, off, s0 glc dlc
1824; GFX11-NEXT:    s_waitcnt vmcnt(0)
1825; GFX11-NEXT:    s_endpgm
1826;
1827; GFX12-LABEL: store_load_large_imm_offset_kernel:
1828; GFX12:       ; %bb.0: ; %bb
1829; GFX12-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1830; GFX12-NEXT:    scratch_store_b32 off, v0, off scope:SCOPE_SYS
1831; GFX12-NEXT:    s_wait_storecnt 0x0
1832; GFX12-NEXT:    scratch_store_b32 off, v1, off offset:16000 scope:SCOPE_SYS
1833; GFX12-NEXT:    s_wait_storecnt 0x0
1834; GFX12-NEXT:    scratch_load_b32 v0, off, off offset:16000 scope:SCOPE_SYS
1835; GFX12-NEXT:    s_wait_loadcnt 0x0
1836; GFX12-NEXT:    s_endpgm
1837;
1838; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_kernel:
1839; UNALIGNED_GFX9:       ; %bb.0: ; %bb
1840; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
1841; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
1842; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 13
1843; UNALIGNED_GFX9-NEXT:    s_mov_b32 s0, 0
1844; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
1845; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1846; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
1847; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
1848; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s0, 4
1849; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
1850; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1851; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
1852; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
1853; UNALIGNED_GFX9-NEXT:    s_endpgm
1854;
1855; UNALIGNED_GFX10-LABEL: store_load_large_imm_offset_kernel:
1856; UNALIGNED_GFX10:       ; %bb.0: ; %bb
1857; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
1858; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
1859; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
1860; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1861; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
1862; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
1863; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
1864; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s0, 4
1865; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
1866; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1867; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
1868; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1869; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
1870; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
1871; UNALIGNED_GFX10-NEXT:    s_endpgm
1872;
1873; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_kernel:
1874; UNALIGNED_GFX940:       ; %bb.0: ; %bb
1875; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 13
1876; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x3e80
1877; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
1878; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1879; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
1880; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s0, 4
1881; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
1882; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1883; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
1884; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
1885; UNALIGNED_GFX940-NEXT:    s_endpgm
1886;
1887; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
1888; UNALIGNED_GFX11:       ; %bb.0: ; %bb
1889; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1890; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
1891; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1892; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s0, 4
1893; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
1894; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1895; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
1896; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1897; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, s0 glc dlc
1898; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
1899; UNALIGNED_GFX11-NEXT:    s_endpgm
1900;
1901; UNALIGNED_GFX12-LABEL: store_load_large_imm_offset_kernel:
1902; UNALIGNED_GFX12:       ; %bb.0: ; %bb
1903; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1904; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v0, off scope:SCOPE_SYS
1905; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1906; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v1, off offset:16000 scope:SCOPE_SYS
1907; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
1908; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, off offset:16000 scope:SCOPE_SYS
1909; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
1910; UNALIGNED_GFX12-NEXT:    s_endpgm
1911bb:
1912  %i = alloca [4096 x i32], align 4, addrspace(5)
1913  %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
1914  store volatile i32 13, ptr addrspace(5) %i1, align 4
1915  %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
1916  store volatile i32 15, ptr addrspace(5) %i7, align 4
1917  %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
1918  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
1919  ret void
1920}
1921
1922define void @store_load_large_imm_offset_foo() {
1923; GFX9-LABEL: store_load_large_imm_offset_foo:
1924; GFX9:       ; %bb.0: ; %bb
1925; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
1927; GFX9-NEXT:    v_mov_b32_e32 v0, 13
1928; GFX9-NEXT:    s_add_i32 s1, s32, s0
1929; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
1930; GFX9-NEXT:    s_waitcnt vmcnt(0)
1931; GFX9-NEXT:    v_mov_b32_e32 v0, 15
1932; GFX9-NEXT:    s_add_i32 s0, s1, 4
1933; GFX9-NEXT:    scratch_store_dword off, v0, s0
1934; GFX9-NEXT:    s_waitcnt vmcnt(0)
1935; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
1936; GFX9-NEXT:    s_waitcnt vmcnt(0)
1937; GFX9-NEXT:    s_setpc_b64 s[30:31]
1938;
1939; GFX10-LABEL: store_load_large_imm_offset_foo:
1940; GFX10:       ; %bb.0: ; %bb
1941; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942; GFX10-NEXT:    v_mov_b32_e32 v0, 13
1943; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
1944; GFX10-NEXT:    v_mov_b32_e32 v1, 15
1945; GFX10-NEXT:    s_add_i32 s1, s32, s0
1946; GFX10-NEXT:    s_add_i32 s0, s1, 4
1947; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
1948; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1949; GFX10-NEXT:    scratch_store_dword off, v1, s0
1950; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1951; GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
1952; GFX10-NEXT:    s_waitcnt vmcnt(0)
1953; GFX10-NEXT:    s_setpc_b64 s[30:31]
1954;
1955; GFX940-LABEL: store_load_large_imm_offset_foo:
1956; GFX940:       ; %bb.0: ; %bb
1957; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958; GFX940-NEXT:    s_movk_i32 s0, 0x3e80
1959; GFX940-NEXT:    v_mov_b32_e32 v0, 13
1960; GFX940-NEXT:    s_add_i32 s1, s32, s0
1961; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
1962; GFX940-NEXT:    s_waitcnt vmcnt(0)
1963; GFX940-NEXT:    v_mov_b32_e32 v0, 15
1964; GFX940-NEXT:    s_add_i32 s0, s1, 4
1965; GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
1966; GFX940-NEXT:    s_waitcnt vmcnt(0)
1967; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
1968; GFX940-NEXT:    s_waitcnt vmcnt(0)
1969; GFX940-NEXT:    s_setpc_b64 s[30:31]
1970;
1971; GFX11-LABEL: store_load_large_imm_offset_foo:
1972; GFX11:       ; %bb.0: ; %bb
1973; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1975; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
1976; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977; GFX11-NEXT:    s_add_i32 s1, s32, s0
1978; GFX11-NEXT:    s_add_i32 s0, s1, 4
1979; GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
1980; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1981; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
1982; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1983; GFX11-NEXT:    scratch_load_b32 v0, off, s0 glc dlc
1984; GFX11-NEXT:    s_waitcnt vmcnt(0)
1985; GFX11-NEXT:    s_setpc_b64 s[30:31]
1986;
1987; GFX12-LABEL: store_load_large_imm_offset_foo:
1988; GFX12:       ; %bb.0: ; %bb
1989; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1990; GFX12-NEXT:    s_wait_expcnt 0x0
1991; GFX12-NEXT:    s_wait_samplecnt 0x0
1992; GFX12-NEXT:    s_wait_bvhcnt 0x0
1993; GFX12-NEXT:    s_wait_kmcnt 0x0
1994; GFX12-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1995; GFX12-NEXT:    s_wait_storecnt 0x0
1996; GFX12-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SYS
1997; GFX12-NEXT:    s_wait_storecnt 0x0
1998; GFX12-NEXT:    scratch_store_b32 off, v1, s32 offset:16000 scope:SCOPE_SYS
1999; GFX12-NEXT:    s_wait_storecnt 0x0
2000; GFX12-NEXT:    scratch_load_b32 v0, off, s32 offset:16000 scope:SCOPE_SYS
2001; GFX12-NEXT:    s_wait_loadcnt 0x0
2002; GFX12-NEXT:    s_setpc_b64 s[30:31]
2003;
2004; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
2005; UNALIGNED_GFX9:       ; %bb.0: ; %bb
2006; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
2008; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 13
2009; UNALIGNED_GFX9-NEXT:    s_add_i32 s1, s32, s0
2010; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
2011; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2012; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
2013; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s1, 4
2014; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
2015; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2016; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
2017; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2018; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
2019;
2020; UNALIGNED_GFX10-LABEL: store_load_large_imm_offset_foo:
2021; UNALIGNED_GFX10:       ; %bb.0: ; %bb
2022; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
2024; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
2025; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
2026; UNALIGNED_GFX10-NEXT:    s_add_i32 s1, s32, s0
2027; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s1, 4
2028; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
2029; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2030; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
2031; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2032; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
2033; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2034; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
2035;
2036; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_foo:
2037; UNALIGNED_GFX940:       ; %bb.0: ; %bb
2038; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039; UNALIGNED_GFX940-NEXT:    s_movk_i32 s0, 0x3e80
2040; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 13
2041; UNALIGNED_GFX940-NEXT:    s_add_i32 s1, s32, s0
2042; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
2043; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2044; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v0, 15
2045; UNALIGNED_GFX940-NEXT:    s_add_i32 s0, s1, 4
2046; UNALIGNED_GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
2047; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2048; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
2049; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2050; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
2051;
2052; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_foo:
2053; UNALIGNED_GFX11:       ; %bb.0: ; %bb
2054; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2055; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2056; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
2057; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058; UNALIGNED_GFX11-NEXT:    s_add_i32 s1, s32, s0
2059; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s1, 4
2060; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
2061; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2062; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
2063; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2064; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, off, s0 glc dlc
2065; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2066; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
2067;
2068; UNALIGNED_GFX12-LABEL: store_load_large_imm_offset_foo:
2069; UNALIGNED_GFX12:       ; %bb.0: ; %bb
2070; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2071; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
2072; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
2073; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
2074; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
2075; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2076; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2077; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v0, s32 scope:SCOPE_SYS
2078; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2079; UNALIGNED_GFX12-NEXT:    scratch_store_b32 off, v1, s32 offset:16000 scope:SCOPE_SYS
2080; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2081; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, off, s32 offset:16000 scope:SCOPE_SYS
2082; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2083; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
2084bb:
2085  %i = alloca [4096 x i32], align 4, addrspace(5)
2086  %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
2087  store volatile i32 13, ptr addrspace(5) %i1, align 4
2088  %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
2089  store volatile i32 15, ptr addrspace(5) %i7, align 4
2090  %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
2091  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
2092  ret void
2093}
2094
2095define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
2096; GFX9-LABEL: store_load_vidx_sidx_offset:
2097; GFX9:       ; %bb.0: ; %bb
2098; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
2099; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
2100; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
2101; GFX9-NEXT:    v_mov_b32_e32 v1, 15
2102; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2103; GFX9-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2104; GFX9-NEXT:    scratch_store_dword v0, v1, off offset:1024
2105; GFX9-NEXT:    s_waitcnt vmcnt(0)
2106; GFX9-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc
2107; GFX9-NEXT:    s_waitcnt vmcnt(0)
2108; GFX9-NEXT:    s_endpgm
2109;
2110; GFX10-LABEL: store_load_vidx_sidx_offset:
2111; GFX10:       ; %bb.0: ; %bb
2112; GFX10-NEXT:    s_add_u32 s8, s8, s13
2113; GFX10-NEXT:    s_addc_u32 s9, s9, 0
2114; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
2115; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
2116; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
2117; GFX10-NEXT:    v_mov_b32_e32 v1, 15
2118; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2119; GFX10-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2120; GFX10-NEXT:    scratch_store_dword v0, v1, off offset:1024
2121; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2122; GFX10-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc dlc
2123; GFX10-NEXT:    s_waitcnt vmcnt(0)
2124; GFX10-NEXT:    s_endpgm
2125;
2126; GFX940-LABEL: store_load_vidx_sidx_offset:
2127; GFX940:       ; %bb.0: ; %bb
2128; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
2129; GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2130; GFX940-NEXT:    v_mov_b32_e32 v1, 15
2131; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
2132; GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2133; GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
2134; GFX940-NEXT:    s_waitcnt vmcnt(0)
2135; GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
2136; GFX940-NEXT:    s_waitcnt vmcnt(0)
2137; GFX940-NEXT:    s_endpgm
2138;
2139; GFX11-LABEL: store_load_vidx_sidx_offset:
2140; GFX11:       ; %bb.0: ; %bb
2141; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
2142; GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2143; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2144; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2145; GFX11-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2146; GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:1024 dlc
2147; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2148; GFX11-NEXT:    scratch_load_b32 v0, v0, off offset:1024 glc dlc
2149; GFX11-NEXT:    s_waitcnt vmcnt(0)
2150; GFX11-NEXT:    s_endpgm
2151;
2152; GFX12-LABEL: store_load_vidx_sidx_offset:
2153; GFX12:       ; %bb.0: ; %bb
2154; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
2155; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2156; GFX12-NEXT:    s_wait_kmcnt 0x0
2157; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2158; GFX12-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2159; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:1024 scope:SCOPE_SYS
2160; GFX12-NEXT:    s_wait_storecnt 0x0
2161; GFX12-NEXT:    scratch_load_b32 v0, v0, off offset:1024 scope:SCOPE_SYS
2162; GFX12-NEXT:    s_wait_loadcnt 0x0
2163; GFX12-NEXT:    s_endpgm
2164;
2165; UNALIGNED_GFX9-LABEL: store_load_vidx_sidx_offset:
2166; UNALIGNED_GFX9:       ; %bb.0: ; %bb
2167; UNALIGNED_GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
2168; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
2169; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
2170; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 15
2171; UNALIGNED_GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2172; UNALIGNED_GFX9-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2173; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off offset:1024
2174; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2175; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc
2176; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2177; UNALIGNED_GFX9-NEXT:    s_endpgm
2178;
2179; UNALIGNED_GFX10-LABEL: store_load_vidx_sidx_offset:
2180; UNALIGNED_GFX10:       ; %bb.0: ; %bb
2181; UNALIGNED_GFX10-NEXT:    s_add_u32 s8, s8, s13
2182; UNALIGNED_GFX10-NEXT:    s_addc_u32 s9, s9, 0
2183; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
2184; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
2185; UNALIGNED_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
2186; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
2187; UNALIGNED_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2188; UNALIGNED_GFX10-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2189; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v1, off offset:1024
2190; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2191; UNALIGNED_GFX10-NEXT:    scratch_load_dword v0, v0, off offset:1024 glc dlc
2192; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2193; UNALIGNED_GFX10-NEXT:    s_endpgm
2194;
2195; UNALIGNED_GFX940-LABEL: store_load_vidx_sidx_offset:
2196; UNALIGNED_GFX940:       ; %bb.0: ; %bb
2197; UNALIGNED_GFX940-NEXT:    s_load_dword s0, s[4:5], 0x0
2198; UNALIGNED_GFX940-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2199; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
2200; UNALIGNED_GFX940-NEXT:    s_waitcnt lgkmcnt(0)
2201; UNALIGNED_GFX940-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2202; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off offset:1024 sc0 sc1
2203; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2204; UNALIGNED_GFX940-NEXT:    scratch_load_dword v0, v0, off offset:1024 sc0 sc1
2205; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2206; UNALIGNED_GFX940-NEXT:    s_endpgm
2207;
2208; UNALIGNED_GFX11-LABEL: store_load_vidx_sidx_offset:
2209; UNALIGNED_GFX11:       ; %bb.0: ; %bb
2210; UNALIGNED_GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x0
2211; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2212; UNALIGNED_GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2213; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2214; UNALIGNED_GFX11-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2215; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:1024 dlc
2216; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2217; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v0, v0, off offset:1024 glc dlc
2218; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2219; UNALIGNED_GFX11-NEXT:    s_endpgm
2220;
2221; UNALIGNED_GFX12-LABEL: store_load_vidx_sidx_offset:
2222; UNALIGNED_GFX12:       ; %bb.0: ; %bb
2223; UNALIGNED_GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0
2224; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
2225; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
2226; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2227; UNALIGNED_GFX12-NEXT:    v_add_lshl_u32 v0, s0, v0, 2
2228; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:1024 scope:SCOPE_SYS
2229; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2230; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v0, off offset:1024 scope:SCOPE_SYS
2231; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2232; UNALIGNED_GFX12-NEXT:    s_endpgm
2233bb:
2234  %alloca = alloca [32 x i32], align 4, addrspace(5)
2235  %vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
2236  %add1 = add nsw i32 %sidx, %vidx
2237  %add2 = add nsw i32 %add1, 256
2238  %gep = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %add2
2239  store volatile i32 15, ptr addrspace(5) %gep, align 4
2240  %load = load volatile i32, ptr addrspace(5) %gep, align 4
2241  ret void
2242}
2243
2244define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
2245; GFX9-LABEL: store_load_i64_aligned:
2246; GFX9:       ; %bb.0: ; %bb
2247; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2248; GFX9-NEXT:    v_mov_b32_e32 v1, 15
2249; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2250; GFX9-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2251; GFX9-NEXT:    s_waitcnt vmcnt(0)
2252; GFX9-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc
2253; GFX9-NEXT:    s_waitcnt vmcnt(0)
2254; GFX9-NEXT:    s_setpc_b64 s[30:31]
2255;
2256; GFX10-LABEL: store_load_i64_aligned:
2257; GFX10:       ; %bb.0: ; %bb
2258; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259; GFX10-NEXT:    v_mov_b32_e32 v1, 15
2260; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2261; GFX10-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2262; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2263; GFX10-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
2264; GFX10-NEXT:    s_waitcnt vmcnt(0)
2265; GFX10-NEXT:    s_setpc_b64 s[30:31]
2266;
2267; GFX940-LABEL: store_load_i64_aligned:
2268; GFX940:       ; %bb.0: ; %bb
2269; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
2271; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
2272; GFX940-NEXT:    s_waitcnt vmcnt(0)
2273; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
2274; GFX940-NEXT:    s_waitcnt vmcnt(0)
2275; GFX940-NEXT:    s_setpc_b64 s[30:31]
2276;
2277; GFX11-LABEL: store_load_i64_aligned:
2278; GFX11:       ; %bb.0: ; %bb
2279; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2280; GFX11-NEXT:    v_mov_b32_e32 v1, 15
2281; GFX11-NEXT:    v_mov_b32_e32 v2, 0
2282; GFX11-NEXT:    scratch_store_b64 v0, v[1:2], off dlc
2283; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2284; GFX11-NEXT:    scratch_load_b64 v[0:1], v0, off glc dlc
2285; GFX11-NEXT:    s_waitcnt vmcnt(0)
2286; GFX11-NEXT:    s_setpc_b64 s[30:31]
2287;
2288; GFX12-LABEL: store_load_i64_aligned:
2289; GFX12:       ; %bb.0: ; %bb
2290; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2291; GFX12-NEXT:    s_wait_expcnt 0x0
2292; GFX12-NEXT:    s_wait_samplecnt 0x0
2293; GFX12-NEXT:    s_wait_bvhcnt 0x0
2294; GFX12-NEXT:    s_wait_kmcnt 0x0
2295; GFX12-NEXT:    v_mov_b32_e32 v1, 15
2296; GFX12-NEXT:    v_mov_b32_e32 v2, 0
2297; GFX12-NEXT:    s_wait_storecnt 0x0
2298; GFX12-NEXT:    scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS
2299; GFX12-NEXT:    s_wait_storecnt 0x0
2300; GFX12-NEXT:    scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS
2301; GFX12-NEXT:    s_wait_loadcnt 0x0
2302; GFX12-NEXT:    s_setpc_b64 s[30:31]
2303;
2304; UNALIGNED_GFX9-LABEL: store_load_i64_aligned:
2305; UNALIGNED_GFX9:       ; %bb.0: ; %bb
2306; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2307; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 15
2308; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v2, 0
2309; UNALIGNED_GFX9-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2310; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2311; UNALIGNED_GFX9-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc
2312; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2313; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
2314;
2315; UNALIGNED_GFX10-LABEL: store_load_i64_aligned:
2316; UNALIGNED_GFX10:       ; %bb.0: ; %bb
2317; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2318; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
2319; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 0
2320; UNALIGNED_GFX10-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2321; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2322; UNALIGNED_GFX10-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
2323; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2324; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
2325;
2326; UNALIGNED_GFX940-LABEL: store_load_i64_aligned:
2327; UNALIGNED_GFX940:       ; %bb.0: ; %bb
2328; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2329; UNALIGNED_GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
2330; UNALIGNED_GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
2331; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2332; UNALIGNED_GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
2333; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2334; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
2335;
2336; UNALIGNED_GFX11-LABEL: store_load_i64_aligned:
2337; UNALIGNED_GFX11:       ; %bb.0: ; %bb
2338; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v1, 15
2340; UNALIGNED_GFX11-NEXT:    v_mov_b32_e32 v2, 0
2341; UNALIGNED_GFX11-NEXT:    scratch_store_b64 v0, v[1:2], off dlc
2342; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2343; UNALIGNED_GFX11-NEXT:    scratch_load_b64 v[0:1], v0, off glc dlc
2344; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2345; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
2346;
2347; UNALIGNED_GFX12-LABEL: store_load_i64_aligned:
2348; UNALIGNED_GFX12:       ; %bb.0: ; %bb
2349; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2350; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
2351; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
2352; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
2353; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
2354; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v1, 15
2355; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 0
2356; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2357; UNALIGNED_GFX12-NEXT:    scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS
2358; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2359; UNALIGNED_GFX12-NEXT:    scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS
2360; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2361; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
2362bb:
2363  store volatile i64 15, ptr addrspace(5) %arg, align 8
2364  %load = load volatile i64, ptr addrspace(5) %arg, align 8
2365  ret void
2366}
2367
2368define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
2369; GFX9-LABEL: store_load_i64_unaligned:
2370; GFX9:       ; %bb.0: ; %bb
2371; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2372; GFX9-NEXT:    v_mov_b32_e32 v1, 15
2373; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2374; GFX9-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2375; GFX9-NEXT:    s_waitcnt vmcnt(0)
2376; GFX9-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc
2377; GFX9-NEXT:    s_waitcnt vmcnt(0)
2378; GFX9-NEXT:    s_setpc_b64 s[30:31]
2379;
2380; GFX10-LABEL: store_load_i64_unaligned:
2381; GFX10:       ; %bb.0: ; %bb
2382; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2383; GFX10-NEXT:    v_mov_b32_e32 v1, 15
2384; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2385; GFX10-NEXT:    scratch_store_dwordx2 v0, v[1:2], off
2386; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2387; GFX10-NEXT:    scratch_load_dwordx2 v[0:1], v0, off glc dlc
2388; GFX10-NEXT:    s_waitcnt vmcnt(0)
2389; GFX10-NEXT:    s_setpc_b64 s[30:31]
2390;
2391; GFX940-LABEL: store_load_i64_unaligned:
2392; GFX940:       ; %bb.0: ; %bb
2393; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 15
2395; GFX940-NEXT:    scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
2396; GFX940-NEXT:    s_waitcnt vmcnt(0)
2397; GFX940-NEXT:    scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
2398; GFX940-NEXT:    s_waitcnt vmcnt(0)
2399; GFX940-NEXT:    s_setpc_b64 s[30:31]
2400;
2401; GFX11-LABEL: store_load_i64_unaligned:
2402; GFX11:       ; %bb.0: ; %bb
2403; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404; GFX11-NEXT:    v_mov_b32_e32 v1, 15
2405; GFX11-NEXT:    v_mov_b32_e32 v2, 0
2406; GFX11-NEXT:    scratch_store_b64 v0, v[1:2], off dlc
2407; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2408; GFX11-NEXT:    scratch_load_b64 v[0:1], v0, off glc dlc
2409; GFX11-NEXT:    s_waitcnt vmcnt(0)
2410; GFX11-NEXT:    s_setpc_b64 s[30:31]
2411;
2412; GFX12-LABEL: store_load_i64_unaligned:
2413; GFX12:       ; %bb.0: ; %bb
2414; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2415; GFX12-NEXT:    s_wait_expcnt 0x0
2416; GFX12-NEXT:    s_wait_samplecnt 0x0
2417; GFX12-NEXT:    s_wait_bvhcnt 0x0
2418; GFX12-NEXT:    s_wait_kmcnt 0x0
2419; GFX12-NEXT:    v_mov_b32_e32 v1, 15
2420; GFX12-NEXT:    v_mov_b32_e32 v2, 0
2421; GFX12-NEXT:    s_wait_storecnt 0x0
2422; GFX12-NEXT:    scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS
2423; GFX12-NEXT:    s_wait_storecnt 0x0
2424; GFX12-NEXT:    scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS
2425; GFX12-NEXT:    s_wait_loadcnt 0x0
2426; GFX12-NEXT:    s_setpc_b64 s[30:31]
2427;
2428; UNALIGNED_GFX9-LABEL: store_load_i64_unaligned:
2429; UNALIGNED_GFX9:       ; %bb.0: ; %bb
2430; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2431; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v4, 15
2432; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 4, v0
2433; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v2, 2, v0
2434; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
2435; UNALIGNED_GFX9-NEXT:    scratch_store_byte v0, v4, off
2436; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2437; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v4, 0
2438; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v6, 6, v0
2439; UNALIGNED_GFX9-NEXT:    scratch_store_byte v3, v4, off
2440; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2441; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v5, 3, v0
2442; UNALIGNED_GFX9-NEXT:    scratch_store_byte v2, v4, off
2443; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2444; UNALIGNED_GFX9-NEXT:    scratch_store_byte v5, v4, off
2445; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2446; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v7, 5, v0
2447; UNALIGNED_GFX9-NEXT:    scratch_store_byte v1, v4, off
2448; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2449; UNALIGNED_GFX9-NEXT:    scratch_store_byte v7, v4, off
2450; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2451; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v8, 7, v0
2452; UNALIGNED_GFX9-NEXT:    scratch_store_byte v6, v4, off
2453; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2454; UNALIGNED_GFX9-NEXT:    scratch_store_byte v8, v4, off
2455; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2456; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v0, off glc
2457; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2458; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr7
2459; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr2
2460; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr6
2461; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr1
2462; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr3
2463; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr5
2464; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr8
2465; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr0
2466; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v3, off glc
2467; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2468; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v2, off glc
2469; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2470; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v5, off glc
2471; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2472; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v1, off glc
2473; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2474; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v7, off glc
2475; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2476; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v6, off glc
2477; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2478; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v4, v8, off glc
2479; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2480; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
2481;
2482; UNALIGNED_GFX10-LABEL: store_load_i64_unaligned:
2483; UNALIGNED_GFX10:       ; %bb.0: ; %bb
2484; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2485; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
2486; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 0
2487; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
2488; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v3, 4, v0
2489; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v5, 2, v0
2490; UNALIGNED_GFX10-NEXT:    scratch_store_byte v0, v1, off
2491; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2492; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 3, v0
2493; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v6, 5, v0
2494; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v7, 6, v0
2495; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v8, 7, v0
2496; UNALIGNED_GFX10-NEXT:    scratch_store_byte v4, v2, off
2497; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2498; UNALIGNED_GFX10-NEXT:    scratch_store_byte v5, v2, off
2499; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2500; UNALIGNED_GFX10-NEXT:    scratch_store_byte v1, v2, off
2501; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2502; UNALIGNED_GFX10-NEXT:    scratch_store_byte v3, v2, off
2503; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2504; UNALIGNED_GFX10-NEXT:    scratch_store_byte v6, v2, off
2505; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2506; UNALIGNED_GFX10-NEXT:    scratch_store_byte v7, v2, off
2507; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2508; UNALIGNED_GFX10-NEXT:    scratch_store_byte v8, v2, off
2509; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2510; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v0, off glc dlc
2511; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2512; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v4, off glc dlc
2513; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2514; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v5, off glc dlc
2515; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2516; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v1, off glc dlc
2517; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2518; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v3, off glc dlc
2519; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2520; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v6, off glc dlc
2521; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2522; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v7, off glc dlc
2523; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2524; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v8, off glc dlc
2525; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2526; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
2527;
2528; UNALIGNED_GFX940-LABEL: store_load_i64_unaligned:
2529; UNALIGNED_GFX940:       ; %bb.0: ; %bb
2530; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2531; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v4, 15
2532; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 4, v0
2533; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
2534; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v3, 1, v0
2535; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v4, off sc0 sc1
2536; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2537; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v4, 0
2538; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 6, v0
2539; UNALIGNED_GFX940-NEXT:    scratch_store_byte v3, v4, off sc0 sc1
2540; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2541; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
2542; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v4, off sc0 sc1
2543; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2544; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v4, off sc0 sc1
2545; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2546; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 5, v0
2547; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v4, off sc0 sc1
2548; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2549; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v4, off sc0 sc1
2550; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2551; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 7, v0
2552; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v4, off sc0 sc1
2553; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2554; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v4, off sc0 sc1
2555; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2556; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v0, off sc0 sc1
2557; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2558; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
2559; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
2560; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr6
2561; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
2562; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr3
2563; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
2564; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
2565; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
2566; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v3, off sc0 sc1
2567; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2568; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v2, off sc0 sc1
2569; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2570; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v5, off sc0 sc1
2571; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2572; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v1, off sc0 sc1
2573; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2574; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v7, off sc0 sc1
2575; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2576; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v6, off sc0 sc1
2577; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2578; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v4, v8, off sc0 sc1
2579; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2580; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
2581;
2582; UNALIGNED_GFX11-LABEL: store_load_i64_unaligned:
2583; UNALIGNED_GFX11:       ; %bb.0: ; %bb
2584; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2585; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
2586; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v4, 1, v0
2587; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v3, 4, v0
2588; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v5, 2, v0
2589; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v0, v1, off dlc
2590; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2591; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 3, v0
2592; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v6, 5, v0
2593; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v7, 6, v0
2594; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v8, 7, v0
2595; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v4, v2, off dlc
2596; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2597; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v5, v2, off dlc
2598; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2599; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v1, v2, off dlc
2600; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2601; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v3, v2, off dlc
2602; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2603; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v6, v2, off dlc
2604; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2605; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v7, v2, off dlc
2606; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2607; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v8, v2, off dlc
2608; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2609; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v0, off glc dlc
2610; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2611; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v4, off glc dlc
2612; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2613; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v5, off glc dlc
2614; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2615; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v1, off glc dlc
2616; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2617; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v3, off glc dlc
2618; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2619; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v6, off glc dlc
2620; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2621; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v7, off glc dlc
2622; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2623; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v8, off glc dlc
2624; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
2625; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
2626;
2627; UNALIGNED_GFX12-LABEL: store_load_i64_unaligned:
2628; UNALIGNED_GFX12:       ; %bb.0: ; %bb
2629; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2630; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
2631; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
2632; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
2633; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
2634; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
2635; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2636; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v1, off scope:SCOPE_SYS
2637; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2638; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
2639; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2640; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
2641; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2642; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
2643; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2644; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:4 scope:SCOPE_SYS
2645; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2646; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
2647; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2648; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
2649; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2650; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
2651; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
2652; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off scope:SCOPE_SYS
2653; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2654; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
2655; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2656; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
2657; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2658; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
2659; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2660; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
2661; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2662; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
2663; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2664; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
2665; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2666; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v0, v0, off offset:7 scope:SCOPE_SYS
2667; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
2668; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
2669bb:
2670  store volatile i64 15, ptr addrspace(5) %arg, align 1
2671  %load = load volatile i64, ptr addrspace(5) %arg, align 1
2672  ret void
2673}
2674
2675define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
2676; GFX9-LABEL: store_load_v3i32_unaligned:
2677; GFX9:       ; %bb.0: ; %bb
2678; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2679; GFX9-NEXT:    s_mov_b32 s2, 3
2680; GFX9-NEXT:    s_mov_b32 s1, 2
2681; GFX9-NEXT:    s_mov_b32 s0, 1
2682; GFX9-NEXT:    v_mov_b32_e32 v3, s2
2683; GFX9-NEXT:    v_mov_b32_e32 v2, s1
2684; GFX9-NEXT:    v_mov_b32_e32 v1, s0
2685; GFX9-NEXT:    scratch_store_dwordx3 v0, v[1:3], off
2686; GFX9-NEXT:    s_waitcnt vmcnt(0)
2687; GFX9-NEXT:    scratch_load_dwordx3 v[0:2], v0, off glc
2688; GFX9-NEXT:    s_waitcnt vmcnt(0)
2689; GFX9-NEXT:    s_setpc_b64 s[30:31]
2690;
2691; GFX10-LABEL: store_load_v3i32_unaligned:
2692; GFX10:       ; %bb.0: ; %bb
2693; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2694; GFX10-NEXT:    s_mov_b32 s2, 3
2695; GFX10-NEXT:    s_mov_b32 s1, 2
2696; GFX10-NEXT:    s_mov_b32 s0, 1
2697; GFX10-NEXT:    v_mov_b32_e32 v3, s2
2698; GFX10-NEXT:    v_mov_b32_e32 v2, s1
2699; GFX10-NEXT:    v_mov_b32_e32 v1, s0
2700; GFX10-NEXT:    scratch_store_dwordx3 v0, v[1:3], off
2701; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2702; GFX10-NEXT:    scratch_load_dwordx3 v[0:2], v0, off glc dlc
2703; GFX10-NEXT:    s_waitcnt vmcnt(0)
2704; GFX10-NEXT:    s_setpc_b64 s[30:31]
2705;
2706; GFX940-LABEL: store_load_v3i32_unaligned:
2707; GFX940:       ; %bb.0: ; %bb
2708; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GFX940-NEXT:    s_mov_b32 s2, 3
2710; GFX940-NEXT:    s_mov_b32 s1, 2
2711; GFX940-NEXT:    s_mov_b32 s0, 1
2712; GFX940-NEXT:    v_mov_b32_e32 v4, s2
2713; GFX940-NEXT:    v_mov_b32_e32 v3, s1
2714; GFX940-NEXT:    v_mov_b32_e32 v2, s0
2715; GFX940-NEXT:    scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
2716; GFX940-NEXT:    s_waitcnt vmcnt(0)
2717; GFX940-NEXT:    scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
2718; GFX940-NEXT:    s_waitcnt vmcnt(0)
2719; GFX940-NEXT:    s_setpc_b64 s[30:31]
2720;
2721; GFX11-LABEL: store_load_v3i32_unaligned:
2722; GFX11:       ; %bb.0: ; %bb
2723; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2724; GFX11-NEXT:    s_mov_b32 s2, 3
2725; GFX11-NEXT:    s_mov_b32 s1, 2
2726; GFX11-NEXT:    s_mov_b32 s0, 1
2727; GFX11-NEXT:    v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1
2728; GFX11-NEXT:    v_mov_b32_e32 v1, s0
2729; GFX11-NEXT:    scratch_store_b96 v0, v[1:3], off dlc
2730; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2731; GFX11-NEXT:    scratch_load_b96 v[0:2], v0, off glc dlc
2732; GFX11-NEXT:    s_waitcnt vmcnt(0)
2733; GFX11-NEXT:    s_setpc_b64 s[30:31]
2734;
2735; GFX12-LABEL: store_load_v3i32_unaligned:
2736; GFX12:       ; %bb.0: ; %bb
2737; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2738; GFX12-NEXT:    s_wait_expcnt 0x0
2739; GFX12-NEXT:    s_wait_samplecnt 0x0
2740; GFX12-NEXT:    s_wait_bvhcnt 0x0
2741; GFX12-NEXT:    s_wait_kmcnt 0x0
2742; GFX12-NEXT:    s_mov_b32 s2, 3
2743; GFX12-NEXT:    s_mov_b32 s1, 2
2744; GFX12-NEXT:    s_mov_b32 s0, 1
2745; GFX12-NEXT:    s_wait_alu 0xfffe
2746; GFX12-NEXT:    v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1
2747; GFX12-NEXT:    v_mov_b32_e32 v1, s0
2748; GFX12-NEXT:    s_wait_storecnt 0x0
2749; GFX12-NEXT:    scratch_store_b96 v0, v[1:3], off scope:SCOPE_SYS
2750; GFX12-NEXT:    s_wait_storecnt 0x0
2751; GFX12-NEXT:    scratch_load_b96 v[0:2], v0, off scope:SCOPE_SYS
2752; GFX12-NEXT:    s_wait_loadcnt 0x0
2753; GFX12-NEXT:    s_setpc_b64 s[30:31]
2754;
2755; UNALIGNED_GFX9-LABEL: store_load_v3i32_unaligned:
2756; UNALIGNED_GFX9:       ; %bb.0: ; %bb
2757; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2758; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v3, 1
2759; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 2
2760; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v2, 2, v0
2761; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
2762; UNALIGNED_GFX9-NEXT:    scratch_store_byte v0, v3, off
2763; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2764; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v3, 0
2765; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v6, 4, v0
2766; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v7, 6, v0
2767; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v9, 8, v0
2768; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v10, 10, v0
2769; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v12, 3
2770; UNALIGNED_GFX9-NEXT:    scratch_store_byte v4, v3, off
2771; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2772; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v5, 3, v0
2773; UNALIGNED_GFX9-NEXT:    scratch_store_byte v2, v3, off
2774; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2775; UNALIGNED_GFX9-NEXT:    scratch_store_byte v5, v3, off
2776; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2777; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v8, 5, v0
2778; UNALIGNED_GFX9-NEXT:    scratch_store_byte v6, v1, off
2779; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2780; UNALIGNED_GFX9-NEXT:    scratch_store_byte v8, v3, off
2781; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2782; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 7, v0
2783; UNALIGNED_GFX9-NEXT:    scratch_store_byte v7, v3, off
2784; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2785; UNALIGNED_GFX9-NEXT:    scratch_store_byte v1, v3, off
2786; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2787; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v11, 9, v0
2788; UNALIGNED_GFX9-NEXT:    scratch_store_byte v9, v12, off
2789; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2790; UNALIGNED_GFX9-NEXT:    scratch_store_byte v11, v3, off
2791; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2792; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v12, 11, v0
2793; UNALIGNED_GFX9-NEXT:    scratch_store_byte v10, v3, off
2794; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2795; UNALIGNED_GFX9-NEXT:    scratch_store_byte v12, v3, off
2796; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2797; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v0, off glc
2798; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2799; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr12
2800; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr4
2801; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr11
2802; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr7
2803; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr6
2804; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr10
2805; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr5
2806; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr9
2807; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr1
2808; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr8
2809; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr2
2810; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr0
2811; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v4, off glc
2812; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2813; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v2, off glc
2814; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2815; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v5, off glc
2816; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2817; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v6, off glc
2818; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2819; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v8, off glc
2820; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2821; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v7, off glc
2822; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2823; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v1, off glc
2824; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2825; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v9, off glc
2826; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2827; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v11, off glc
2828; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2829; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v10, off glc
2830; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2831; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v12, off glc
2832; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
2833; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
2834;
2835; UNALIGNED_GFX10-LABEL: store_load_v3i32_unaligned:
2836; UNALIGNED_GFX10:       ; %bb.0: ; %bb
2837; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2838; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 1
2839; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v3, 0
2840; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v0
2841; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 2
2842; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v4, 2, v0
2843; UNALIGNED_GFX10-NEXT:    scratch_store_byte v0, v1, off
2844; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2845; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 3, v0
2846; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v6, 4, v0
2847; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v7, 5, v0
2848; UNALIGNED_GFX10-NEXT:    scratch_store_byte v5, v3, off
2849; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2850; UNALIGNED_GFX10-NEXT:    scratch_store_byte v4, v3, off
2851; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2852; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v8, 6, v0
2853; UNALIGNED_GFX10-NEXT:    scratch_store_byte v1, v3, off
2854; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2855; UNALIGNED_GFX10-NEXT:    scratch_store_byte v6, v2, off
2856; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2857; UNALIGNED_GFX10-NEXT:    scratch_store_byte v7, v3, off
2858; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2859; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v2, 7, v0
2860; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v9, 8, v0
2861; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v10, 3
2862; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v11, 9, v0
2863; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v12, 10, v0
2864; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v13, 11, v0
2865; UNALIGNED_GFX10-NEXT:    scratch_store_byte v8, v3, off
2866; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2867; UNALIGNED_GFX10-NEXT:    scratch_store_byte v2, v3, off
2868; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2869; UNALIGNED_GFX10-NEXT:    scratch_store_byte v9, v10, off
2870; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2871; UNALIGNED_GFX10-NEXT:    scratch_store_byte v11, v3, off
2872; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2873; UNALIGNED_GFX10-NEXT:    scratch_store_byte v12, v3, off
2874; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2875; UNALIGNED_GFX10-NEXT:    scratch_store_byte v13, v3, off
2876; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2877; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v0, off glc dlc
2878; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2879; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v5, off glc dlc
2880; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2881; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v4, off glc dlc
2882; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2883; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v1, off glc dlc
2884; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2885; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v6, off glc dlc
2886; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2887; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v7, off glc dlc
2888; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2889; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v8, off glc dlc
2890; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2891; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v2, off glc dlc
2892; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2893; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v9, off glc dlc
2894; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2895; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v11, off glc dlc
2896; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2897; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v12, off glc dlc
2898; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2899; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v13, off glc dlc
2900; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
2901; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
2902;
2903; UNALIGNED_GFX940-LABEL: store_load_v3i32_unaligned:
2904; UNALIGNED_GFX940:       ; %bb.0: ; %bb
2905; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2906; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 1
2907; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 2
2908; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
2909; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v4, 1, v0
2910; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
2911; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2912; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 0
2913; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 4, v0
2914; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 6, v0
2915; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v9, 8, v0
2916; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v10, 10, v0
2917; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v12, 3
2918; UNALIGNED_GFX940-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
2919; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2920; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
2921; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
2922; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2923; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
2924; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2925; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 5, v0
2926; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v1, off sc0 sc1
2927; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2928; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
2929; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2930; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 7, v0
2931; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v3, off sc0 sc1
2932; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2933; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
2934; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2935; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v11, 9, v0
2936; UNALIGNED_GFX940-NEXT:    scratch_store_byte v9, v12, off sc0 sc1
2937; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2938; UNALIGNED_GFX940-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
2939; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2940; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v12, 11, v0
2941; UNALIGNED_GFX940-NEXT:    scratch_store_byte v10, v3, off sc0 sc1
2942; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2943; UNALIGNED_GFX940-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
2944; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2945; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
2946; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2947; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr12
2948; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr4
2949; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr11
2950; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
2951; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr6
2952; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr10
2953; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
2954; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr9
2955; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
2956; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
2957; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
2958; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
2959; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
2960; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2961; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
2962; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2963; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
2964; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2965; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v6, off sc0 sc1
2966; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2967; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
2968; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2969; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
2970; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2971; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
2972; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2973; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
2974; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2975; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
2976; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2977; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
2978; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2979; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
2980; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
2981; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
2982;
2983; UNALIGNED_GFX11-LABEL: store_load_v3i32_unaligned:
2984; UNALIGNED_GFX11:       ; %bb.0: ; %bb
2985; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2986; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
2987; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 2, v0
2988; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v10, 3 :: v_dual_add_nc_u32 v5, 1, v0
2989; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v0, v1, off dlc
2990; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2991; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 3, v0
2992; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v6, 4, v0
2993; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v7, 5, v0
2994; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v5, v3, off dlc
2995; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2996; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v4, v3, off dlc
2997; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2998; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v8, 6, v0
2999; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v1, v3, off dlc
3000; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3001; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v6, v2, off dlc
3002; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3003; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v7, v3, off dlc
3004; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3005; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v2, 7, v0
3006; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v9, 8, v0
3007; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v11, 9, v0
3008; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v12, 10, v0
3009; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v13, 11, v0
3010; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v8, v3, off dlc
3011; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3012; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v2, v3, off dlc
3013; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3014; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v9, v10, off dlc
3015; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3016; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v11, v3, off dlc
3017; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3018; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v12, v3, off dlc
3019; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3020; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v13, v3, off dlc
3021; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3022; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v0, off glc dlc
3023; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3024; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v5, off glc dlc
3025; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3026; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v4, off glc dlc
3027; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3028; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v1, off glc dlc
3029; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3030; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v6, off glc dlc
3031; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3032; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v7, off glc dlc
3033; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3034; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v8, off glc dlc
3035; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3036; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v2, off glc dlc
3037; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3038; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v9, off glc dlc
3039; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3040; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v11, off glc dlc
3041; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3042; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v12, off glc dlc
3043; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3044; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v13, off glc dlc
3045; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3046; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
3047;
3048; UNALIGNED_GFX12-LABEL: store_load_v3i32_unaligned:
3049; UNALIGNED_GFX12:       ; %bb.0: ; %bb
3050; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3051; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
3052; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
3053; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
3054; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
3055; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0
3056; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v3, 2
3057; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3058; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v1, off scope:SCOPE_SYS
3059; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3060; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
3061; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3062; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
3063; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3064; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
3065; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3066; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v1, 3
3067; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
3068; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3069; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
3070; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3071; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
3072; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3073; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
3074; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3075; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS
3076; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3077; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS
3078; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3079; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS
3080; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3081; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS
3082; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3083; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off scope:SCOPE_SYS
3084; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3085; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
3086; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3087; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
3088; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3089; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
3090; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3091; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
3092; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3093; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
3094; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3095; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
3096; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3097; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS
3098; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3099; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS
3100; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3101; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS
3102; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3103; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS
3104; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3105; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v0, v0, off offset:11 scope:SCOPE_SYS
3106; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3107; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
3108bb:
3109  store volatile <3 x i32> <i32 1, i32 2, i32 3>, ptr addrspace(5) %arg, align 1
3110  %load = load volatile <3 x i32>, ptr addrspace(5) %arg, align 1
3111  ret void
3112}
3113
3114define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
3115; GFX9-LABEL: store_load_v4i32_unaligned:
3116; GFX9:       ; %bb.0: ; %bb
3117; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3118; GFX9-NEXT:    s_mov_b32 s3, 4
3119; GFX9-NEXT:    s_mov_b32 s2, 3
3120; GFX9-NEXT:    s_mov_b32 s1, 2
3121; GFX9-NEXT:    s_mov_b32 s0, 1
3122; GFX9-NEXT:    v_mov_b32_e32 v4, s3
3123; GFX9-NEXT:    v_mov_b32_e32 v3, s2
3124; GFX9-NEXT:    v_mov_b32_e32 v2, s1
3125; GFX9-NEXT:    v_mov_b32_e32 v1, s0
3126; GFX9-NEXT:    scratch_store_dwordx4 v0, v[1:4], off
3127; GFX9-NEXT:    s_waitcnt vmcnt(0)
3128; GFX9-NEXT:    scratch_load_dwordx4 v[0:3], v0, off glc
3129; GFX9-NEXT:    s_waitcnt vmcnt(0)
3130; GFX9-NEXT:    s_setpc_b64 s[30:31]
3131;
3132; GFX10-LABEL: store_load_v4i32_unaligned:
3133; GFX10:       ; %bb.0: ; %bb
3134; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3135; GFX10-NEXT:    s_mov_b32 s3, 4
3136; GFX10-NEXT:    s_mov_b32 s2, 3
3137; GFX10-NEXT:    s_mov_b32 s1, 2
3138; GFX10-NEXT:    s_mov_b32 s0, 1
3139; GFX10-NEXT:    v_mov_b32_e32 v4, s3
3140; GFX10-NEXT:    v_mov_b32_e32 v3, s2
3141; GFX10-NEXT:    v_mov_b32_e32 v2, s1
3142; GFX10-NEXT:    v_mov_b32_e32 v1, s0
3143; GFX10-NEXT:    scratch_store_dwordx4 v0, v[1:4], off
3144; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3145; GFX10-NEXT:    scratch_load_dwordx4 v[0:3], v0, off glc dlc
3146; GFX10-NEXT:    s_waitcnt vmcnt(0)
3147; GFX10-NEXT:    s_setpc_b64 s[30:31]
3148;
3149; GFX940-LABEL: store_load_v4i32_unaligned:
3150; GFX940:       ; %bb.0: ; %bb
3151; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3152; GFX940-NEXT:    s_mov_b32 s3, 4
3153; GFX940-NEXT:    s_mov_b32 s2, 3
3154; GFX940-NEXT:    s_mov_b32 s1, 2
3155; GFX940-NEXT:    s_mov_b32 s0, 1
3156; GFX940-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
3157; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
3158; GFX940-NEXT:    scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
3159; GFX940-NEXT:    s_waitcnt vmcnt(0)
3160; GFX940-NEXT:    scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
3161; GFX940-NEXT:    s_waitcnt vmcnt(0)
3162; GFX940-NEXT:    s_setpc_b64 s[30:31]
3163;
3164; GFX11-LABEL: store_load_v4i32_unaligned:
3165; GFX11:       ; %bb.0: ; %bb
3166; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3167; GFX11-NEXT:    s_mov_b32 s3, 4
3168; GFX11-NEXT:    s_mov_b32 s2, 3
3169; GFX11-NEXT:    s_mov_b32 s1, 2
3170; GFX11-NEXT:    s_mov_b32 s0, 1
3171; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
3172; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
3173; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off dlc
3174; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3175; GFX11-NEXT:    scratch_load_b128 v[0:3], v0, off glc dlc
3176; GFX11-NEXT:    s_waitcnt vmcnt(0)
3177; GFX11-NEXT:    s_setpc_b64 s[30:31]
3178;
3179; GFX12-LABEL: store_load_v4i32_unaligned:
3180; GFX12:       ; %bb.0: ; %bb
3181; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3182; GFX12-NEXT:    s_wait_expcnt 0x0
3183; GFX12-NEXT:    s_wait_samplecnt 0x0
3184; GFX12-NEXT:    s_wait_bvhcnt 0x0
3185; GFX12-NEXT:    s_wait_kmcnt 0x0
3186; GFX12-NEXT:    s_mov_b32 s3, 4
3187; GFX12-NEXT:    s_mov_b32 s2, 3
3188; GFX12-NEXT:    s_mov_b32 s1, 2
3189; GFX12-NEXT:    s_mov_b32 s0, 1
3190; GFX12-NEXT:    s_wait_alu 0xfffe
3191; GFX12-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
3192; GFX12-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
3193; GFX12-NEXT:    s_wait_storecnt 0x0
3194; GFX12-NEXT:    scratch_store_b128 v0, v[1:4], off scope:SCOPE_SYS
3195; GFX12-NEXT:    s_wait_storecnt 0x0
3196; GFX12-NEXT:    scratch_load_b128 v[0:3], v0, off scope:SCOPE_SYS
3197; GFX12-NEXT:    s_wait_loadcnt 0x0
3198; GFX12-NEXT:    s_setpc_b64 s[30:31]
3199;
3200; UNALIGNED_GFX9-LABEL: store_load_v4i32_unaligned:
3201; UNALIGNED_GFX9:       ; %bb.0: ; %bb
3202; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3203; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v3, 1
3204; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 2
3205; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v2, 2, v0
3206; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
3207; UNALIGNED_GFX9-NEXT:    scratch_store_byte v0, v3, off
3208; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3209; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v3, 0
3210; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v6, 4
3211; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v7, 4, v0
3212; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v8, 6, v0
3213; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v10, 8, v0
3214; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v11, 10, v0
3215; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v13, 3
3216; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v14, 12, v0
3217; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v15, 14, v0
3218; UNALIGNED_GFX9-NEXT:    scratch_store_byte v4, v3, off
3219; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3220; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v5, 3, v0
3221; UNALIGNED_GFX9-NEXT:    scratch_store_byte v2, v3, off
3222; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3223; UNALIGNED_GFX9-NEXT:    scratch_store_byte v5, v3, off
3224; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3225; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v9, 5, v0
3226; UNALIGNED_GFX9-NEXT:    scratch_store_byte v7, v1, off
3227; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3228; UNALIGNED_GFX9-NEXT:    scratch_store_byte v9, v3, off
3229; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3230; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v1, 7, v0
3231; UNALIGNED_GFX9-NEXT:    scratch_store_byte v8, v3, off
3232; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3233; UNALIGNED_GFX9-NEXT:    scratch_store_byte v1, v3, off
3234; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3235; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v12, 9, v0
3236; UNALIGNED_GFX9-NEXT:    scratch_store_byte v10, v13, off
3237; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3238; UNALIGNED_GFX9-NEXT:    scratch_store_byte v12, v3, off
3239; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3240; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v13, 11, v0
3241; UNALIGNED_GFX9-NEXT:    scratch_store_byte v11, v3, off
3242; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3243; UNALIGNED_GFX9-NEXT:    scratch_store_byte v13, v3, off
3244; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3245; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v16, 13, v0
3246; UNALIGNED_GFX9-NEXT:    scratch_store_byte v14, v6, off
3247; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3248; UNALIGNED_GFX9-NEXT:    scratch_store_byte v16, v3, off
3249; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3250; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v6, 15, v0
3251; UNALIGNED_GFX9-NEXT:    scratch_store_byte v15, v3, off
3252; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3253; UNALIGNED_GFX9-NEXT:    scratch_store_byte v6, v3, off
3254; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3255; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v0, off glc
3256; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3257; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v4, off glc
3258; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3259; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v2, off glc
3260; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3261; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v5, off glc
3262; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3263; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v7, off glc
3264; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3265; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v9, off glc
3266; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3267; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v8, off glc
3268; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3269; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v1, off glc
3270; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3271; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v10, off glc
3272; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3273; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v12, off glc
3274; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3275; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v11, off glc
3276; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3277; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v13, off glc
3278; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3279; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v14, off glc
3280; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3281; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v16, off glc
3282; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3283; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v3, v15, off glc
3284; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3285; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr2
3286; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr1
3287; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr9
3288; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr16
3289; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr11
3290; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr4
3291; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr15
3292; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr10
3293; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr7
3294; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr13
3295; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr5
3296; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr14
3297; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr12
3298; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr8
3299; UNALIGNED_GFX9-NEXT:    ; kill: killed $vgpr0
3300; UNALIGNED_GFX9-NEXT:    scratch_load_ubyte v0, v6, off glc
3301; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3302; UNALIGNED_GFX9-NEXT:    s_setpc_b64 s[30:31]
3303;
3304; UNALIGNED_GFX10-LABEL: store_load_v4i32_unaligned:
3305; UNALIGNED_GFX10:       ; %bb.0: ; %bb
3306; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 1
3308; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v2, 2
3309; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v3, 0
3310; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
3311; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v6, 4, v0
3312; UNALIGNED_GFX10-NEXT:    scratch_store_byte v0, v1, off
3313; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3314; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v1, 3, v0
3315; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v5, 2, v0
3316; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v7, 5, v0
3317; UNALIGNED_GFX10-NEXT:    scratch_store_byte v4, v3, off
3318; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3319; UNALIGNED_GFX10-NEXT:    scratch_store_byte v5, v3, off
3320; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3321; UNALIGNED_GFX10-NEXT:    scratch_store_byte v1, v3, off
3322; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3323; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v9, 6, v0
3324; UNALIGNED_GFX10-NEXT:    scratch_store_byte v6, v2, off
3325; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3326; UNALIGNED_GFX10-NEXT:    scratch_store_byte v7, v3, off
3327; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3328; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v2, 7, v0
3329; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v10, 8, v0
3330; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v11, 3
3331; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v12, 9, v0
3332; UNALIGNED_GFX10-NEXT:    scratch_store_byte v9, v3, off
3333; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3334; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v13, 10, v0
3335; UNALIGNED_GFX10-NEXT:    scratch_store_byte v2, v3, off
3336; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3337; UNALIGNED_GFX10-NEXT:    scratch_store_byte v10, v11, off
3338; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3339; UNALIGNED_GFX10-NEXT:    scratch_store_byte v12, v3, off
3340; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3341; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v11, 11, v0
3342; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v8, 4
3343; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v14, 12, v0
3344; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v15, 13, v0
3345; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v16, 14, v0
3346; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v17, 15, v0
3347; UNALIGNED_GFX10-NEXT:    scratch_store_byte v13, v3, off
3348; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3349; UNALIGNED_GFX10-NEXT:    scratch_store_byte v11, v3, off
3350; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3351; UNALIGNED_GFX10-NEXT:    scratch_store_byte v14, v8, off
3352; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3353; UNALIGNED_GFX10-NEXT:    scratch_store_byte v15, v3, off
3354; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3355; UNALIGNED_GFX10-NEXT:    scratch_store_byte v16, v3, off
3356; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3357; UNALIGNED_GFX10-NEXT:    scratch_store_byte v17, v3, off
3358; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3359; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v0, off glc dlc
3360; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3361; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v4, off glc dlc
3362; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3363; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v5, off glc dlc
3364; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3365; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v1, off glc dlc
3366; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3367; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v6, off glc dlc
3368; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3369; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v7, off glc dlc
3370; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3371; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v9, off glc dlc
3372; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3373; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v2, off glc dlc
3374; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3375; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v10, off glc dlc
3376; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3377; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v12, off glc dlc
3378; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3379; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v13, off glc dlc
3380; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3381; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v11, off glc dlc
3382; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3383; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v14, off glc dlc
3384; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3385; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v15, off glc dlc
3386; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3387; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v16, off glc dlc
3388; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3389; UNALIGNED_GFX10-NEXT:    scratch_load_ubyte v0, v17, off glc dlc
3390; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3391; UNALIGNED_GFX10-NEXT:    s_setpc_b64 s[30:31]
3392;
3393; UNALIGNED_GFX940-LABEL: store_load_v4i32_unaligned:
3394; UNALIGNED_GFX940:       ; %bb.0: ; %bb
3395; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3396; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 1
3397; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 2
3398; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v2, 2, v0
3399; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v4, 1, v0
3400; UNALIGNED_GFX940-NEXT:    scratch_store_byte v0, v3, off sc0 sc1
3401; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3402; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v3, 0
3403; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v6, 4
3404; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v7, 4, v0
3405; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v8, 6, v0
3406; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v10, 8, v0
3407; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v11, 10, v0
3408; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v13, 3
3409; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v14, 12, v0
3410; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v15, 14, v0
3411; UNALIGNED_GFX940-NEXT:    scratch_store_byte v4, v3, off sc0 sc1
3412; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3413; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v5, 3, v0
3414; UNALIGNED_GFX940-NEXT:    scratch_store_byte v2, v3, off sc0 sc1
3415; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3416; UNALIGNED_GFX940-NEXT:    scratch_store_byte v5, v3, off sc0 sc1
3417; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3418; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v9, 5, v0
3419; UNALIGNED_GFX940-NEXT:    scratch_store_byte v7, v1, off sc0 sc1
3420; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3421; UNALIGNED_GFX940-NEXT:    scratch_store_byte v9, v3, off sc0 sc1
3422; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3423; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v1, 7, v0
3424; UNALIGNED_GFX940-NEXT:    scratch_store_byte v8, v3, off sc0 sc1
3425; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3426; UNALIGNED_GFX940-NEXT:    scratch_store_byte v1, v3, off sc0 sc1
3427; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3428; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v12, 9, v0
3429; UNALIGNED_GFX940-NEXT:    scratch_store_byte v10, v13, off sc0 sc1
3430; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3431; UNALIGNED_GFX940-NEXT:    scratch_store_byte v12, v3, off sc0 sc1
3432; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3433; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v13, 11, v0
3434; UNALIGNED_GFX940-NEXT:    scratch_store_byte v11, v3, off sc0 sc1
3435; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3436; UNALIGNED_GFX940-NEXT:    scratch_store_byte v13, v3, off sc0 sc1
3437; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3438; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v16, 13, v0
3439; UNALIGNED_GFX940-NEXT:    scratch_store_byte v14, v6, off sc0 sc1
3440; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3441; UNALIGNED_GFX940-NEXT:    scratch_store_byte v16, v3, off sc0 sc1
3442; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3443; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v6, 15, v0
3444; UNALIGNED_GFX940-NEXT:    scratch_store_byte v15, v3, off sc0 sc1
3445; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3446; UNALIGNED_GFX940-NEXT:    scratch_store_byte v6, v3, off sc0 sc1
3447; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3448; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v0, off sc0 sc1
3449; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3450; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v4, off sc0 sc1
3451; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3452; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v2, off sc0 sc1
3453; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3454; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v5, off sc0 sc1
3455; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3456; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v7, off sc0 sc1
3457; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3458; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v9, off sc0 sc1
3459; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3460; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v8, off sc0 sc1
3461; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3462; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v1, off sc0 sc1
3463; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3464; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v10, off sc0 sc1
3465; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3466; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v12, off sc0 sc1
3467; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3468; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v11, off sc0 sc1
3469; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3470; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v13, off sc0 sc1
3471; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3472; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v14, off sc0 sc1
3473; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3474; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v16, off sc0 sc1
3475; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3476; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v3, v15, off sc0 sc1
3477; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3478; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr2
3479; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr1
3480; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr9
3481; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr16
3482; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr11
3483; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr4
3484; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr15
3485; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr10
3486; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr7
3487; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr13
3488; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr5
3489; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr14
3490; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr12
3491; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr8
3492; UNALIGNED_GFX940-NEXT:    ; kill: killed $vgpr0
3493; UNALIGNED_GFX940-NEXT:    scratch_load_ubyte v0, v6, off sc0 sc1
3494; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3495; UNALIGNED_GFX940-NEXT:    s_setpc_b64 s[30:31]
3496;
3497; UNALIGNED_GFX11-LABEL: store_load_v4i32_unaligned:
3498; UNALIGNED_GFX11:       ; %bb.0: ; %bb
3499; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3500; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
3501; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 1, v0
3502; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v11, 3 :: v_dual_add_nc_u32 v6, 4, v0
3503; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v0, v1, off dlc
3504; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3505; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v1, 3, v0
3506; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v8, 4 :: v_dual_add_nc_u32 v5, 2, v0
3507; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v7, 5, v0
3508; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v4, v3, off dlc
3509; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3510; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v5, v3, off dlc
3511; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3512; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v1, v3, off dlc
3513; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3514; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v9, 6, v0
3515; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v6, v2, off dlc
3516; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3517; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v7, v3, off dlc
3518; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3519; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v2, 7, v0
3520; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v10, 8, v0
3521; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v12, 9, v0
3522; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v9, v3, off dlc
3523; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3524; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v13, 10, v0
3525; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v2, v3, off dlc
3526; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3527; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v10, v11, off dlc
3528; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3529; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v12, v3, off dlc
3530; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3531; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v11, 11, v0
3532; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v14, 12, v0
3533; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v15, 13, v0
3534; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v16, 14, v0
3535; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v17, 15, v0
3536; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v13, v3, off dlc
3537; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3538; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v11, v3, off dlc
3539; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3540; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v14, v8, off dlc
3541; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3542; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v15, v3, off dlc
3543; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3544; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v16, v3, off dlc
3545; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3546; UNALIGNED_GFX11-NEXT:    scratch_store_b8 v17, v3, off dlc
3547; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3548; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v0, off glc dlc
3549; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3550; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v4, off glc dlc
3551; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3552; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v5, off glc dlc
3553; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3554; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v1, off glc dlc
3555; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3556; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v6, off glc dlc
3557; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3558; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v7, off glc dlc
3559; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3560; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v9, off glc dlc
3561; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3562; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v2, off glc dlc
3563; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3564; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v10, off glc dlc
3565; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3566; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v12, off glc dlc
3567; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3568; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v13, off glc dlc
3569; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3570; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v11, off glc dlc
3571; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3572; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v14, off glc dlc
3573; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3574; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v15, off glc dlc
3575; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3576; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v16, off glc dlc
3577; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3578; UNALIGNED_GFX11-NEXT:    scratch_load_u8 v0, v17, off glc dlc
3579; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3580; UNALIGNED_GFX11-NEXT:    s_setpc_b64 s[30:31]
3581;
3582; UNALIGNED_GFX12-LABEL: store_load_v4i32_unaligned:
3583; UNALIGNED_GFX12:       ; %bb.0: ; %bb
3584; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3585; UNALIGNED_GFX12-NEXT:    s_wait_expcnt 0x0
3586; UNALIGNED_GFX12-NEXT:    s_wait_samplecnt 0x0
3587; UNALIGNED_GFX12-NEXT:    s_wait_bvhcnt 0x0
3588; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0
3589; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0
3590; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v3, 2
3591; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3592; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v1, off scope:SCOPE_SYS
3593; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3594; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
3595; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3596; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
3597; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3598; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
3599; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3600; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
3601; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3602; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v1, 3
3603; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v3, 4
3604; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
3605; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3606; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
3607; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3608; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
3609; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3610; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS
3611; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3612; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS
3613; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3614; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS
3615; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3616; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS
3617; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3618; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v3, off offset:12 scope:SCOPE_SYS
3619; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3620; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:13 scope:SCOPE_SYS
3621; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3622; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:14 scope:SCOPE_SYS
3623; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3624; UNALIGNED_GFX12-NEXT:    scratch_store_b8 v0, v2, off offset:15 scope:SCOPE_SYS
3625; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3626; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off scope:SCOPE_SYS
3627; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3628; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
3629; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3630; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
3631; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3632; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
3633; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3634; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
3635; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3636; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
3637; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3638; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
3639; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3640; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS
3641; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3642; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS
3643; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3644; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS
3645; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3646; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS
3647; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3648; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:11 scope:SCOPE_SYS
3649; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3650; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:12 scope:SCOPE_SYS
3651; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3652; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:13 scope:SCOPE_SYS
3653; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3654; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v1, v0, off offset:14 scope:SCOPE_SYS
3655; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3656; UNALIGNED_GFX12-NEXT:    scratch_load_u8 v0, v0, off offset:15 scope:SCOPE_SYS
3657; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3658; UNALIGNED_GFX12-NEXT:    s_setpc_b64 s[30:31]
3659bb:
3660  store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %arg, align 1
3661  %load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1
3662  ret void
3663}
3664
3665define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
3666; GFX9-LABEL: sgpr_base_large_offset:
3667; GFX9:       ; %bb.0: ; %entry
3668; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3669; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3670; GFX9-NEXT:    s_add_u32 s0, s2, 0xffe8
3671; GFX9-NEXT:    scratch_load_dword v2, off, s0
3672; GFX9-NEXT:    s_waitcnt vmcnt(0)
3673; GFX9-NEXT:    global_store_dword v[0:1], v2, off
3674; GFX9-NEXT:    s_endpgm
3675;
3676; GFX10-LABEL: sgpr_base_large_offset:
3677; GFX10:       ; %bb.0: ; %entry
3678; GFX10-NEXT:    s_add_u32 s0, s0, s5
3679; GFX10-NEXT:    s_addc_u32 s1, s1, 0
3680; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3681; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3682; GFX10-NEXT:    s_add_u32 s0, s2, 0xffe8
3683; GFX10-NEXT:    scratch_load_dword v2, off, s0
3684; GFX10-NEXT:    s_waitcnt vmcnt(0)
3685; GFX10-NEXT:    global_store_dword v[0:1], v2, off
3686; GFX10-NEXT:    s_endpgm
3687;
3688; GFX940-LABEL: sgpr_base_large_offset:
3689; GFX940:       ; %bb.0: ; %entry
3690; GFX940-NEXT:    s_add_u32 s0, s0, 0xffe8
3691; GFX940-NEXT:    scratch_load_dword v2, off, s0
3692; GFX940-NEXT:    s_waitcnt vmcnt(0)
3693; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
3694; GFX940-NEXT:    s_endpgm
3695;
3696; GFX11-LABEL: sgpr_base_large_offset:
3697; GFX11:       ; %bb.0: ; %entry
3698; GFX11-NEXT:    s_add_u32 s0, s0, 0xffe8
3699; GFX11-NEXT:    scratch_load_b32 v2, off, s0
3700; GFX11-NEXT:    s_waitcnt vmcnt(0)
3701; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
3702; GFX11-NEXT:    s_endpgm
3703;
3704; GFX12-LABEL: sgpr_base_large_offset:
3705; GFX12:       ; %bb.0: ; %entry
3706; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
3707; GFX12-NEXT:    s_wait_loadcnt 0x0
3708; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
3709; GFX12-NEXT:    s_endpgm
3710;
3711; UNALIGNED_GFX9-LABEL: sgpr_base_large_offset:
3712; UNALIGNED_GFX9:       ; %bb.0: ; %entry
3713; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3714; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3715; UNALIGNED_GFX9-NEXT:    s_add_u32 s0, s2, 0xffe8
3716; UNALIGNED_GFX9-NEXT:    scratch_load_dword v2, off, s0
3717; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3718; UNALIGNED_GFX9-NEXT:    global_store_dword v[0:1], v2, off
3719; UNALIGNED_GFX9-NEXT:    s_endpgm
3720;
3721; UNALIGNED_GFX10-LABEL: sgpr_base_large_offset:
3722; UNALIGNED_GFX10:       ; %bb.0: ; %entry
3723; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, s5
3724; UNALIGNED_GFX10-NEXT:    s_addc_u32 s1, s1, 0
3725; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3726; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3727; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s2, 0xffe8
3728; UNALIGNED_GFX10-NEXT:    scratch_load_dword v2, off, s0
3729; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3730; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
3731; UNALIGNED_GFX10-NEXT:    s_endpgm
3732;
3733; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset:
3734; UNALIGNED_GFX940:       ; %bb.0: ; %entry
3735; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0xffe8
3736; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0
3737; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3738; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
3739; UNALIGNED_GFX940-NEXT:    s_endpgm
3740;
3741; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset:
3742; UNALIGNED_GFX11:       ; %bb.0: ; %entry
3743; UNALIGNED_GFX11-NEXT:    s_add_u32 s0, s0, 0xffe8
3744; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v2, off, s0
3745; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3746; UNALIGNED_GFX11-NEXT:    global_store_b32 v[0:1], v2, off
3747; UNALIGNED_GFX11-NEXT:    s_endpgm
3748;
3749; UNALIGNED_GFX12-LABEL: sgpr_base_large_offset:
3750; UNALIGNED_GFX12:       ; %bb.0: ; %entry
3751; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
3752; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3753; UNALIGNED_GFX12-NEXT:    global_store_b32 v[0:1], v2, off
3754; UNALIGNED_GFX12-NEXT:    s_endpgm
3755entry:
3756  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
3757  %load = load i32, ptr addrspace(5) %large_offset, align 4
3758  store i32 %load, ptr addrspace(1) %out
3759  ret void
3760}
3761
3762define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
3763; GFX9-LABEL: sgpr_base_large_offset_split:
3764; GFX9:       ; %bb.0: ; %entry
3765; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3766; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3767; GFX9-NEXT:    s_and_b32 s0, s2, -4
3768; GFX9-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3769; GFX9-NEXT:    scratch_load_dword v2, off, s0 glc
3770; GFX9-NEXT:    s_waitcnt vmcnt(0)
3771; GFX9-NEXT:    global_store_dword v[0:1], v2, off
3772; GFX9-NEXT:    s_endpgm
3773;
3774; GFX10-LABEL: sgpr_base_large_offset_split:
3775; GFX10:       ; %bb.0: ; %entry
3776; GFX10-NEXT:    s_add_u32 s0, s0, s5
3777; GFX10-NEXT:    s_addc_u32 s1, s1, 0
3778; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3779; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3780; GFX10-NEXT:    s_and_b32 s0, s2, -4
3781; GFX10-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3782; GFX10-NEXT:    scratch_load_dword v2, off, s0 glc dlc
3783; GFX10-NEXT:    s_waitcnt vmcnt(0)
3784; GFX10-NEXT:    global_store_dword v[0:1], v2, off
3785; GFX10-NEXT:    s_endpgm
3786;
3787; GFX940-LABEL: sgpr_base_large_offset_split:
3788; GFX940:       ; %bb.0: ; %entry
3789; GFX940-NEXT:    s_and_b32 s0, s0, -4
3790; GFX940-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3791; GFX940-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
3792; GFX940-NEXT:    s_waitcnt vmcnt(0)
3793; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
3794; GFX940-NEXT:    s_endpgm
3795;
3796; GFX11-LABEL: sgpr_base_large_offset_split:
3797; GFX11:       ; %bb.0: ; %entry
3798; GFX11-NEXT:    s_and_b32 s0, s0, -4
3799; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3800; GFX11-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3801; GFX11-NEXT:    scratch_load_b32 v2, off, s0 glc dlc
3802; GFX11-NEXT:    s_waitcnt vmcnt(0)
3803; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
3804; GFX11-NEXT:    s_endpgm
3805;
3806; GFX12-LABEL: sgpr_base_large_offset_split:
3807; GFX12:       ; %bb.0: ; %entry
3808; GFX12-NEXT:    s_and_b32 s0, s0, -4
3809; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3810; GFX12-NEXT:    s_add_co_u32 s0, s0, 0x100ffe8
3811; GFX12-NEXT:    scratch_load_b32 v2, off, s0 scope:SCOPE_SYS
3812; GFX12-NEXT:    s_wait_loadcnt 0x0
3813; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
3814; GFX12-NEXT:    s_endpgm
3815;
3816; UNALIGNED_GFX9-LABEL: sgpr_base_large_offset_split:
3817; UNALIGNED_GFX9:       ; %bb.0: ; %entry
3818; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3819; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3820; UNALIGNED_GFX9-NEXT:    s_and_b32 s0, s2, -4
3821; UNALIGNED_GFX9-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3822; UNALIGNED_GFX9-NEXT:    scratch_load_dword v2, off, s0 glc
3823; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3824; UNALIGNED_GFX9-NEXT:    global_store_dword v[0:1], v2, off
3825; UNALIGNED_GFX9-NEXT:    s_endpgm
3826;
3827; UNALIGNED_GFX10-LABEL: sgpr_base_large_offset_split:
3828; UNALIGNED_GFX10:       ; %bb.0: ; %entry
3829; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, s5
3830; UNALIGNED_GFX10-NEXT:    s_addc_u32 s1, s1, 0
3831; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3832; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3833; UNALIGNED_GFX10-NEXT:    s_and_b32 s0, s2, -4
3834; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3835; UNALIGNED_GFX10-NEXT:    scratch_load_dword v2, off, s0 glc dlc
3836; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
3837; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
3838; UNALIGNED_GFX10-NEXT:    s_endpgm
3839;
3840; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset_split:
3841; UNALIGNED_GFX940:       ; %bb.0: ; %entry
3842; UNALIGNED_GFX940-NEXT:    s_and_b32 s0, s0, -4
3843; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3844; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
3845; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3846; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
3847; UNALIGNED_GFX940-NEXT:    s_endpgm
3848;
3849; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset_split:
3850; UNALIGNED_GFX11:       ; %bb.0: ; %entry
3851; UNALIGNED_GFX11-NEXT:    s_and_b32 s0, s0, -4
3852; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3853; UNALIGNED_GFX11-NEXT:    s_add_u32 s0, s0, 0x100ffe8
3854; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v2, off, s0 glc dlc
3855; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
3856; UNALIGNED_GFX11-NEXT:    global_store_b32 v[0:1], v2, off
3857; UNALIGNED_GFX11-NEXT:    s_endpgm
3858;
3859; UNALIGNED_GFX12-LABEL: sgpr_base_large_offset_split:
3860; UNALIGNED_GFX12:       ; %bb.0: ; %entry
3861; UNALIGNED_GFX12-NEXT:    s_and_b32 s0, s0, -4
3862; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3863; UNALIGNED_GFX12-NEXT:    s_add_co_u32 s0, s0, 0x100ffe8
3864; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v2, off, s0 scope:SCOPE_SYS
3865; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
3866; UNALIGNED_GFX12-NEXT:    global_store_b32 v[0:1], v2, off
3867; UNALIGNED_GFX12-NEXT:    s_endpgm
3868entry:
3869  ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5)
3870  %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32
3871  %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292
3872  %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5)
3873  %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728
3874  %load = load volatile i32, ptr addrspace(5) %split_offset, align 4
3875  store i32 %load, ptr addrspace(1) %out
3876  ret void
3877}
3878
3879define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
3880; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3881; GFX9:       ; %bb.0: ; %bb
3882; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3883; GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
3884; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffe8
3885; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3886; GFX9-NEXT:    v_add3_u32 v0, s2, v0, v1
3887; GFX9-NEXT:    v_mov_b32_e32 v1, 15
3888; GFX9-NEXT:    scratch_store_dword v0, v1, off
3889; GFX9-NEXT:    s_waitcnt vmcnt(0)
3890; GFX9-NEXT:    s_endpgm
3891;
3892; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3893; GFX10:       ; %bb.0: ; %bb
3894; GFX10-NEXT:    s_add_u32 s0, s0, s5
3895; GFX10-NEXT:    s_addc_u32 s1, s1, 0
3896; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3897; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3898; GFX10-NEXT:    v_add_nc_u32_e32 v0, s3, v0
3899; GFX10-NEXT:    v_mov_b32_e32 v1, 15
3900; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 0xffe8
3901; GFX10-NEXT:    scratch_store_dword v0, v1, off
3902; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3903; GFX10-NEXT:    s_endpgm
3904;
3905; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3906; GFX940:       ; %bb.0: ; %bb
3907; GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
3908; GFX940-NEXT:    v_mov_b32_e32 v1, 0xffe8
3909; GFX940-NEXT:    v_add3_u32 v0, s0, v0, v1
3910; GFX940-NEXT:    v_mov_b32_e32 v1, 15
3911; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
3912; GFX940-NEXT:    s_waitcnt vmcnt(0)
3913; GFX940-NEXT:    s_endpgm
3914;
3915; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3916; GFX11:       ; %bb.0: ; %bb
3917; GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
3918; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3919; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
3920; GFX11-NEXT:    scratch_store_b32 v0, v1, off dlc
3921; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3922; GFX11-NEXT:    s_endpgm
3923;
3924; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3925; GFX12:       ; %bb.0: ; %bb
3926; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
3927; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3928; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0
3929; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
3930; GFX12-NEXT:    s_wait_storecnt 0x0
3931; GFX12-NEXT:    s_endpgm
3932;
3933; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3934; UNALIGNED_GFX9:       ; %bb.0: ; %bb
3935; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3936; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
3937; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 0xffe8
3938; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3939; UNALIGNED_GFX9-NEXT:    v_add3_u32 v0, s2, v0, v1
3940; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 15
3941; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off
3942; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
3943; UNALIGNED_GFX9-NEXT:    s_endpgm
3944;
3945; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3946; UNALIGNED_GFX10:       ; %bb.0: ; %bb
3947; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, s5
3948; UNALIGNED_GFX10-NEXT:    s_addc_u32 s1, s1, 0
3949; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
3950; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
3951; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s3, v0
3952; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
3953; UNALIGNED_GFX10-NEXT:    v_add3_u32 v0, s2, v0, 0xffe8
3954; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v1, off
3955; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3956; UNALIGNED_GFX10-NEXT:    s_endpgm
3957;
3958; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3959; UNALIGNED_GFX940:       ; %bb.0: ; %bb
3960; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
3961; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 0xffe8
3962; UNALIGNED_GFX940-NEXT:    v_add3_u32 v0, s0, v0, v1
3963; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
3964; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
3965; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
3966; UNALIGNED_GFX940-NEXT:    s_endpgm
3967;
3968; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3969; UNALIGNED_GFX11:       ; %bb.0: ; %bb
3970; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
3971; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3972; UNALIGNED_GFX11-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
3973; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v1, off dlc
3974; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3975; UNALIGNED_GFX11-NEXT:    s_endpgm
3976;
3977; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
3978; UNALIGNED_GFX12:       ; %bb.0: ; %bb
3979; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
3980; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3981; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0
3982; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
3983; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
3984; UNALIGNED_GFX12-NEXT:    s_endpgm
3985bb:
3986  %add1 = add nsw i32 %sidx, %vidx
3987  %add2 = add nsw i32 %add1, 65512
3988  %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
3989  store volatile i32 15, ptr addrspace(5) %gep, align 4
3990  ret void
3991}
3992
3993define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
3994; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
3995; GFX9:       ; %bb.0: ; %bb
3996; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
3997; GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
3998; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
3999; GFX9-NEXT:    v_add3_u32 v0, s2, v0, -16
4000; GFX9-NEXT:    v_mov_b32_e32 v1, 15
4001; GFX9-NEXT:    scratch_store_dword v0, v1, off
4002; GFX9-NEXT:    s_waitcnt vmcnt(0)
4003; GFX9-NEXT:    s_endpgm
4004;
4005; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4006; GFX10:       ; %bb.0: ; %bb
4007; GFX10-NEXT:    s_add_u32 s0, s0, s5
4008; GFX10-NEXT:    s_addc_u32 s1, s1, 0
4009; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4010; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4011; GFX10-NEXT:    v_add_nc_u32_e32 v0, s3, v0
4012; GFX10-NEXT:    v_mov_b32_e32 v1, 15
4013; GFX10-NEXT:    v_add_nc_u32_e32 v0, s2, v0
4014; GFX10-NEXT:    scratch_store_dword v0, v1, off offset:-16
4015; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4016; GFX10-NEXT:    s_endpgm
4017;
4018; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4019; GFX940:       ; %bb.0: ; %bb
4020; GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
4021; GFX940-NEXT:    v_add3_u32 v0, s0, v0, -16
4022; GFX940-NEXT:    v_mov_b32_e32 v1, 15
4023; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
4024; GFX940-NEXT:    s_waitcnt vmcnt(0)
4025; GFX940-NEXT:    s_endpgm
4026;
4027; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4028; GFX11:       ; %bb.0: ; %bb
4029; GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4030; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4031; GFX11-NEXT:    v_add_nc_u32_e32 v0, s0, v0
4032; GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:-16 dlc
4033; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4034; GFX11-NEXT:    s_endpgm
4035;
4036; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4037; GFX12:       ; %bb.0: ; %bb
4038; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4039; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4040; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0
4041; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4042; GFX12-NEXT:    s_wait_storecnt 0x0
4043; GFX12-NEXT:    s_endpgm
4044;
4045; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4046; UNALIGNED_GFX9:       ; %bb.0: ; %bb
4047; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
4048; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
4049; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
4050; UNALIGNED_GFX9-NEXT:    v_add3_u32 v0, s2, v0, -16
4051; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 15
4052; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off
4053; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
4054; UNALIGNED_GFX9-NEXT:    s_endpgm
4055;
4056; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4057; UNALIGNED_GFX10:       ; %bb.0: ; %bb
4058; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, s5
4059; UNALIGNED_GFX10-NEXT:    s_addc_u32 s1, s1, 0
4060; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4061; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4062; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s3, v0
4063; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
4064; UNALIGNED_GFX10-NEXT:    v_add_nc_u32_e32 v0, s2, v0
4065; UNALIGNED_GFX10-NEXT:    scratch_store_dword v0, v1, off offset:-16
4066; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4067; UNALIGNED_GFX10-NEXT:    s_endpgm
4068;
4069; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4070; UNALIGNED_GFX940:       ; %bb.0: ; %bb
4071; UNALIGNED_GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
4072; UNALIGNED_GFX940-NEXT:    v_add3_u32 v0, s0, v0, -16
4073; UNALIGNED_GFX940-NEXT:    v_mov_b32_e32 v1, 15
4074; UNALIGNED_GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
4075; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
4076; UNALIGNED_GFX940-NEXT:    s_endpgm
4077;
4078; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4079; UNALIGNED_GFX11:       ; %bb.0: ; %bb
4080; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4081; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4082; UNALIGNED_GFX11-NEXT:    v_add_nc_u32_e32 v0, s0, v0
4083; UNALIGNED_GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:-16 dlc
4084; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4085; UNALIGNED_GFX11-NEXT:    s_endpgm
4086;
4087; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4088; UNALIGNED_GFX12:       ; %bb.0: ; %bb
4089; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4090; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4091; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0
4092; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4093; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0
4094; UNALIGNED_GFX12-NEXT:    s_endpgm
4095bb:
4096  %add1 = add nsw i32 %sidx, %vidx
4097  %add2 = add nsw i32 %add1, -16
4098  %gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
4099  store volatile i32 15, ptr addrspace(5) %gep, align 4
4100  ret void
4101}
4102
4103define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
4104; GFX9-LABEL: sgpr_base_negative_offset:
4105; GFX9:       ; %bb.0: ; %entry
4106; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
4107; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
4108; GFX9-NEXT:    s_add_u32 s0, s2, 0xffffffe8
4109; GFX9-NEXT:    scratch_load_dword v2, off, s0
4110; GFX9-NEXT:    s_waitcnt vmcnt(0)
4111; GFX9-NEXT:    global_store_dword v[0:1], v2, off
4112; GFX9-NEXT:    s_endpgm
4113;
4114; GFX10-LABEL: sgpr_base_negative_offset:
4115; GFX10:       ; %bb.0: ; %entry
4116; GFX10-NEXT:    s_add_u32 s0, s0, s5
4117; GFX10-NEXT:    s_addc_u32 s1, s1, 0
4118; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4119; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4120; GFX10-NEXT:    scratch_load_dword v2, off, s2 offset:-24
4121; GFX10-NEXT:    s_waitcnt vmcnt(0)
4122; GFX10-NEXT:    global_store_dword v[0:1], v2, off
4123; GFX10-NEXT:    s_endpgm
4124;
4125; GFX940-LABEL: sgpr_base_negative_offset:
4126; GFX940:       ; %bb.0: ; %entry
4127; GFX940-NEXT:    s_add_u32 s0, s0, 0xffffffe8
4128; GFX940-NEXT:    scratch_load_dword v2, off, s0
4129; GFX940-NEXT:    s_waitcnt vmcnt(0)
4130; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
4131; GFX940-NEXT:    s_endpgm
4132;
4133; GFX11-LABEL: sgpr_base_negative_offset:
4134; GFX11:       ; %bb.0: ; %entry
4135; GFX11-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
4136; GFX11-NEXT:    s_waitcnt vmcnt(0)
4137; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
4138; GFX11-NEXT:    s_endpgm
4139;
4140; GFX12-LABEL: sgpr_base_negative_offset:
4141; GFX12:       ; %bb.0: ; %entry
4142; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
4143; GFX12-NEXT:    s_wait_loadcnt 0x0
4144; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
4145; GFX12-NEXT:    s_endpgm
4146;
4147; UNALIGNED_GFX9-LABEL: sgpr_base_negative_offset:
4148; UNALIGNED_GFX9:       ; %bb.0: ; %entry
4149; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
4150; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
4151; UNALIGNED_GFX9-NEXT:    s_add_u32 s0, s2, 0xffffffe8
4152; UNALIGNED_GFX9-NEXT:    scratch_load_dword v2, off, s0
4153; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
4154; UNALIGNED_GFX9-NEXT:    global_store_dword v[0:1], v2, off
4155; UNALIGNED_GFX9-NEXT:    s_endpgm
4156;
4157; UNALIGNED_GFX10-LABEL: sgpr_base_negative_offset:
4158; UNALIGNED_GFX10:       ; %bb.0: ; %entry
4159; UNALIGNED_GFX10-NEXT:    s_add_u32 s0, s0, s5
4160; UNALIGNED_GFX10-NEXT:    s_addc_u32 s1, s1, 0
4161; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4162; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4163; UNALIGNED_GFX10-NEXT:    scratch_load_dword v2, off, s2 offset:-24
4164; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0)
4165; UNALIGNED_GFX10-NEXT:    global_store_dword v[0:1], v2, off
4166; UNALIGNED_GFX10-NEXT:    s_endpgm
4167;
4168; UNALIGNED_GFX940-LABEL: sgpr_base_negative_offset:
4169; UNALIGNED_GFX940:       ; %bb.0: ; %entry
4170; UNALIGNED_GFX940-NEXT:    s_add_u32 s0, s0, 0xffffffe8
4171; UNALIGNED_GFX940-NEXT:    scratch_load_dword v2, off, s0
4172; UNALIGNED_GFX940-NEXT:    s_waitcnt vmcnt(0)
4173; UNALIGNED_GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
4174; UNALIGNED_GFX940-NEXT:    s_endpgm
4175;
4176; UNALIGNED_GFX11-LABEL: sgpr_base_negative_offset:
4177; UNALIGNED_GFX11:       ; %bb.0: ; %entry
4178; UNALIGNED_GFX11-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
4179; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0)
4180; UNALIGNED_GFX11-NEXT:    global_store_b32 v[0:1], v2, off
4181; UNALIGNED_GFX11-NEXT:    s_endpgm
4182;
4183; UNALIGNED_GFX12-LABEL: sgpr_base_negative_offset:
4184; UNALIGNED_GFX12:       ; %bb.0: ; %entry
4185; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
4186; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0
4187; UNALIGNED_GFX12-NEXT:    global_store_b32 v[0:1], v2, off
4188; UNALIGNED_GFX12-NEXT:    s_endpgm
4189entry:
4190  %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24
4191  %0 = load i32, ptr addrspace(5) %scevgep28, align 4
4192  store i32 %0, ptr addrspace(1) %out
4193  ret void
4194}
4195
4196declare i32 @llvm.amdgcn.workitem.id.x()
4197