xref: /llvm-project/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll (revision c8bbbaa5c70a32f31a072740c87708be8f15f831)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-SDAG
7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-GISEL
8
9; Test flat scratch SVS addressing mode with various combinations of alignment
10; of soffset, voffset and inst_offset.
11
12declare i32 @llvm.amdgcn.workitem.id.x()
13
14define amdgpu_kernel void @soff1_voff1(i32 %soff) {
15; GFX940-SDAG-LABEL: soff1_voff1:
16; GFX940-SDAG:       ; %bb.0: ; %bb
17; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
18; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
19; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
20; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
21; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
22; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
23; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
24; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
25; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
26; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
27; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
28; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
29; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
30; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
31; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
32; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
33; GFX940-SDAG-NEXT:    s_endpgm
34;
35; GFX940-GISEL-LABEL: soff1_voff1:
36; GFX940-GISEL:       ; %bb.0: ; %bb
37; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
38; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
39; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
40; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
41; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
42; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
43; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
44; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
45; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
46; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
47; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
48; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
49; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
50; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
51; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
52; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
53; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
54; GFX940-GISEL-NEXT:    s_endpgm
55;
56; GFX11-SDAG-LABEL: soff1_voff1:
57; GFX11-SDAG:       ; %bb.0: ; %bb
58; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
59; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
60; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
61; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
63; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
64; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 1, v0
65; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v5, 2, v0
66; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 4, v0
67; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v1, off dlc
68; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
69; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v2, off dlc
70; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
71; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off dlc
72; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
73; GFX11-SDAG-NEXT:    s_endpgm
74;
75; GFX11-GISEL-LABEL: soff1_voff1:
76; GFX11-GISEL:       ; %bb.0: ; %bb
77; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
78; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
79; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
80; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
81; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
82; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
83; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
84; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
85; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
86; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
87; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
88; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
89; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
90; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
91; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
92; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
93; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
94; GFX11-GISEL-NEXT:    s_endpgm
95;
96; GFX12-SDAG-LABEL: soff1_voff1:
97; GFX12-SDAG:       ; %bb.0: ; %bb
98; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
99; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
100; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
101; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
102; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
103; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
104; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
105; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
106; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
107; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
108; GFX12-SDAG-NEXT:    s_endpgm
109;
110; GFX12-GISEL-LABEL: soff1_voff1:
111; GFX12-GISEL:       ; %bb.0: ; %bb
112; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
113; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
114; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
115; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
116; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
117; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
118; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
119; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
120; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
121; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
122; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
123; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
124; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
125; GFX12-GISEL-NEXT:    s_endpgm
126bb:
127  %soff1 = mul i32 %soff, 1
128  %a = alloca i8, i32 64, align 4, addrspace(5)
129  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1
130  %voff = call i32 @llvm.amdgcn.workitem.id.x()
131  %voff1 = mul i32 %voff, 1
132  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1
133  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
134  store volatile i8 1, ptr addrspace(5) %p1
135  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
136  store volatile i8 2, ptr addrspace(5) %p2
137  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
138  store volatile i8 4, ptr addrspace(5) %p4
139  ret void
140}
141
142define amdgpu_kernel void @soff1_voff2(i32 %soff) {
143; GFX940-SDAG-LABEL: soff1_voff2:
144; GFX940-SDAG:       ; %bb.0: ; %bb
145; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
146; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
147; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
148; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
149; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
150; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
151; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
152; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
153; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
154; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
155; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
156; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
157; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
158; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
159; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
160; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
161; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
162; GFX940-SDAG-NEXT:    s_endpgm
163;
164; GFX940-GISEL-LABEL: soff1_voff2:
165; GFX940-GISEL:       ; %bb.0: ; %bb
166; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
167; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
168; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
169; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
170; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
171; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
172; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
173; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
174; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
175; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
176; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
177; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
178; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
179; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
180; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
181; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
182; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
183; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
184; GFX940-GISEL-NEXT:    s_endpgm
185;
186; GFX11-SDAG-LABEL: soff1_voff2:
187; GFX11-SDAG:       ; %bb.0: ; %bb
188; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
189; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
190; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
191; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
192; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
193; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
194; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
195; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
196; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 1, v0
197; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 4, v0
198; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v1, off dlc
199; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
200; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v2, off dlc
201; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
202; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off dlc
203; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
204; GFX11-SDAG-NEXT:    s_endpgm
205;
206; GFX11-GISEL-LABEL: soff1_voff2:
207; GFX11-GISEL:       ; %bb.0: ; %bb
208; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
209; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
210; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
211; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
212; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
213; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
214; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
215; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
216; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
217; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
218; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
219; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
220; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
221; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
222; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
223; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
224; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
225; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
226; GFX11-GISEL-NEXT:    s_endpgm
227;
228; GFX12-SDAG-LABEL: soff1_voff2:
229; GFX12-SDAG:       ; %bb.0: ; %bb
230; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
231; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
232; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
233; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
234; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
235; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
236; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
237; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
238; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
239; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
240; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
241; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
242; GFX12-SDAG-NEXT:    s_endpgm
243;
244; GFX12-GISEL-LABEL: soff1_voff2:
245; GFX12-GISEL:       ; %bb.0: ; %bb
246; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
247; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
248; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
249; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
250; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
251; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
252; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
253; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
254; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
255; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
256; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
257; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
258; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
259; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
260; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
261; GFX12-GISEL-NEXT:    s_endpgm
262bb:
263  %soff1 = mul i32 %soff, 1
264  %a = alloca i8, i32 64, align 4, addrspace(5)
265  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1
266  %voff = call i32 @llvm.amdgcn.workitem.id.x()
267  %voff2 = mul i32 %voff, 2
268  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2
269  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
270  store volatile i8 1, ptr addrspace(5) %p1
271  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
272  store volatile i8 2, ptr addrspace(5) %p2
273  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
274  store volatile i8 4, ptr addrspace(5) %p4
275  ret void
276}
277
278define amdgpu_kernel void @soff1_voff4(i32 %soff) {
279; GFX940-SDAG-LABEL: soff1_voff4:
280; GFX940-SDAG:       ; %bb.0: ; %bb
281; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
282; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
283; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
284; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
285; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
286; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
287; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
288; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
289; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
290; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
291; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
292; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
293; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
294; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
295; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
296; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
297; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
298; GFX940-SDAG-NEXT:    s_endpgm
299;
300; GFX940-GISEL-LABEL: soff1_voff4:
301; GFX940-GISEL:       ; %bb.0: ; %bb
302; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
303; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
304; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
305; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
306; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
307; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
308; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
309; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
310; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
311; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
312; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
313; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
314; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
315; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
316; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
317; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
318; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
319; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
320; GFX940-GISEL-NEXT:    s_endpgm
321;
322; GFX11-SDAG-LABEL: soff1_voff4:
323; GFX11-SDAG:       ; %bb.0: ; %bb
324; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
325; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
326; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
327; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
328; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
329; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
330; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
331; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
332; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 1, v0
333; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 4, v0
334; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v1, off dlc
335; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
336; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v2, off dlc
337; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
338; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off dlc
339; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
340; GFX11-SDAG-NEXT:    s_endpgm
341;
342; GFX11-GISEL-LABEL: soff1_voff4:
343; GFX11-GISEL:       ; %bb.0: ; %bb
344; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
345; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
346; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
347; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
348; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
349; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
350; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
351; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
352; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
353; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
354; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
355; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
356; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
357; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
358; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
359; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
360; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
361; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
362; GFX11-GISEL-NEXT:    s_endpgm
363;
364; GFX12-SDAG-LABEL: soff1_voff4:
365; GFX12-SDAG:       ; %bb.0: ; %bb
366; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
367; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
368; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
369; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
370; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
371; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
372; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
373; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
374; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
375; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
376; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
377; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
378; GFX12-SDAG-NEXT:    s_endpgm
379;
380; GFX12-GISEL-LABEL: soff1_voff4:
381; GFX12-GISEL:       ; %bb.0: ; %bb
382; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
383; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
384; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
385; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
386; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
387; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
388; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
389; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
390; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
391; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
392; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
393; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
394; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
395; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
396; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
397; GFX12-GISEL-NEXT:    s_endpgm
398bb:
399  %soff1 = mul i32 %soff, 1
400  %a = alloca i8, i32 64, align 4, addrspace(5)
401  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1
402  %voff = call i32 @llvm.amdgcn.workitem.id.x()
403  %voff4 = mul i32 %voff, 4
404  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4
405  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
406  store volatile i8 1, ptr addrspace(5) %p1
407  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
408  store volatile i8 2, ptr addrspace(5) %p2
409  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
410  store volatile i8 4, ptr addrspace(5) %p4
411  ret void
412}
413
414define amdgpu_kernel void @soff2_voff1(i32 %soff) {
415; GFX940-SDAG-LABEL: soff2_voff1:
416; GFX940-SDAG:       ; %bb.0: ; %bb
417; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
418; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
419; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
420; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
421; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
422; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
423; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
424; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
425; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
426; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
427; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
428; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
429; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
430; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
431; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
432; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
433; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
434; GFX940-SDAG-NEXT:    s_endpgm
435;
436; GFX940-GISEL-LABEL: soff2_voff1:
437; GFX940-GISEL:       ; %bb.0: ; %bb
438; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
439; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
440; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
441; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
442; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
443; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
444; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
445; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
446; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
447; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
448; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
449; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
450; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
451; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
452; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
453; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
454; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
455; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
456; GFX940-GISEL-NEXT:    s_endpgm
457;
458; GFX11-SDAG-LABEL: soff2_voff1:
459; GFX11-SDAG:       ; %bb.0: ; %bb
460; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
461; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
462; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
463; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
464; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
465; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
466; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
467; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
468; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 1, v0
469; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v5, 2, v0
470; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 4, v0
471; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v1, off dlc
472; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
473; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v2, off dlc
474; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
475; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off dlc
476; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
477; GFX11-SDAG-NEXT:    s_endpgm
478;
479; GFX11-GISEL-LABEL: soff2_voff1:
480; GFX11-GISEL:       ; %bb.0: ; %bb
481; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
482; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
483; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
484; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
485; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
486; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
487; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
488; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
489; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
490; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
491; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
492; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
493; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
494; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
495; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
496; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
497; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
498; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
499; GFX11-GISEL-NEXT:    s_endpgm
500;
501; GFX12-SDAG-LABEL: soff2_voff1:
502; GFX12-SDAG:       ; %bb.0: ; %bb
503; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
504; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
505; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
506; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
507; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
508; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
509; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
510; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
511; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
512; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
513; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
514; GFX12-SDAG-NEXT:    s_endpgm
515;
516; GFX12-GISEL-LABEL: soff2_voff1:
517; GFX12-GISEL:       ; %bb.0: ; %bb
518; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
519; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
520; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
521; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
522; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
523; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
524; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
525; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
526; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
527; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
528; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
529; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
530; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
531; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
532; GFX12-GISEL-NEXT:    s_endpgm
533bb:
534  %soff2 = mul i32 %soff, 2
535  %a = alloca i8, i32 64, align 4, addrspace(5)
536  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2
537  %voff = call i32 @llvm.amdgcn.workitem.id.x()
538  %voff1 = mul i32 %voff, 1
539  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1
540  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
541  store volatile i8 1, ptr addrspace(5) %p1
542  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
543  store volatile i8 2, ptr addrspace(5) %p2
544  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
545  store volatile i8 4, ptr addrspace(5) %p4
546  ret void
547}
548
549define amdgpu_kernel void @soff2_voff2(i32 %soff) {
550; GFX940-SDAG-LABEL: soff2_voff2:
551; GFX940-SDAG:       ; %bb.0: ; %bb
552; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
553; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
554; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
555; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
556; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
557; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
558; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
559; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
560; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
561; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
562; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
563; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
564; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
565; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
566; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
567; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
568; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
569; GFX940-SDAG-NEXT:    s_endpgm
570;
571; GFX940-GISEL-LABEL: soff2_voff2:
572; GFX940-GISEL:       ; %bb.0: ; %bb
573; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
574; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
575; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
576; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
577; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
578; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
579; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
580; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
581; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
582; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
583; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
584; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
585; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
586; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
587; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
588; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
589; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
590; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
591; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
592; GFX940-GISEL-NEXT:    s_endpgm
593;
594; GFX11-SDAG-LABEL: soff2_voff2:
595; GFX11-SDAG:       ; %bb.0: ; %bb
596; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
597; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
598; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
599; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
600; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
601; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
602; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
603; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
604; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
605; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0
606; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 2, v0
607; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
608; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
609; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v2, off dlc
610; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
611; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v3, off dlc
612; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
613; GFX11-SDAG-NEXT:    s_endpgm
614;
615; GFX11-GISEL-LABEL: soff2_voff2:
616; GFX11-GISEL:       ; %bb.0: ; %bb
617; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
618; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
619; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
620; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
621; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
622; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
623; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
624; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
625; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
626; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
627; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
628; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
629; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
630; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
631; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
632; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
633; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
634; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
635; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
636; GFX11-GISEL-NEXT:    s_endpgm
637;
638; GFX12-SDAG-LABEL: soff2_voff2:
639; GFX12-SDAG:       ; %bb.0: ; %bb
640; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
641; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
642; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
643; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
644; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
645; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
646; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
647; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
648; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
649; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
650; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
651; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
652; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
653; GFX12-SDAG-NEXT:    s_endpgm
654;
655; GFX12-GISEL-LABEL: soff2_voff2:
656; GFX12-GISEL:       ; %bb.0: ; %bb
657; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
658; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
659; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
660; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
661; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
662; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
663; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
664; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
665; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
666; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
667; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
668; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
669; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
670; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
671; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
672; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
673; GFX12-GISEL-NEXT:    s_endpgm
674bb:
675  %soff2 = mul i32 %soff, 2
676  %a = alloca i8, i32 64, align 4, addrspace(5)
677  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2
678  %voff = call i32 @llvm.amdgcn.workitem.id.x()
679  %voff2 = mul i32 %voff, 2
680  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2
681  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
682  store volatile i8 1, ptr addrspace(5) %p1
683  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
684  store volatile i8 2, ptr addrspace(5) %p2
685  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
686  store volatile i8 4, ptr addrspace(5) %p4
687  ret void
688}
689
690define amdgpu_kernel void @soff2_voff4(i32 %soff) {
691; GFX940-SDAG-LABEL: soff2_voff4:
692; GFX940-SDAG:       ; %bb.0: ; %bb
693; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
694; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
695; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
696; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
697; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
698; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
699; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
700; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
701; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
702; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
703; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
704; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
705; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
706; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
707; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
708; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
709; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
710; GFX940-SDAG-NEXT:    s_endpgm
711;
712; GFX940-GISEL-LABEL: soff2_voff4:
713; GFX940-GISEL:       ; %bb.0: ; %bb
714; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
715; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
716; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
717; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
718; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
719; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
720; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
721; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
722; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
723; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
724; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
725; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
726; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
727; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
728; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
729; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
730; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
731; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
732; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
733; GFX940-GISEL-NEXT:    s_endpgm
734;
735; GFX11-SDAG-LABEL: soff2_voff4:
736; GFX11-SDAG:       ; %bb.0: ; %bb
737; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
738; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
739; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
740; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
741; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
742; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
743; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
744; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
745; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
746; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0
747; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 2, v0
748; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
749; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
750; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v2, off dlc
751; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
752; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v3, off dlc
753; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
754; GFX11-SDAG-NEXT:    s_endpgm
755;
756; GFX11-GISEL-LABEL: soff2_voff4:
757; GFX11-GISEL:       ; %bb.0: ; %bb
758; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
759; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
760; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
761; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
762; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
763; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
764; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
765; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
766; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
767; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
768; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
769; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
770; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
771; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
772; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
773; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
774; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
775; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
776; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
777; GFX11-GISEL-NEXT:    s_endpgm
778;
779; GFX12-SDAG-LABEL: soff2_voff4:
780; GFX12-SDAG:       ; %bb.0: ; %bb
781; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
782; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
783; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
784; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
785; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
786; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
787; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
788; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
789; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
790; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
791; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
792; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
793; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
794; GFX12-SDAG-NEXT:    s_endpgm
795;
796; GFX12-GISEL-LABEL: soff2_voff4:
797; GFX12-GISEL:       ; %bb.0: ; %bb
798; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
799; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
800; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
801; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
802; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
803; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
804; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
805; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
806; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
807; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
808; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
809; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
810; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
811; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
812; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
813; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
814; GFX12-GISEL-NEXT:    s_endpgm
815bb:
816  %soff2 = mul i32 %soff, 2
817  %a = alloca i8, i32 64, align 4, addrspace(5)
818  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff2
819  %voff = call i32 @llvm.amdgcn.workitem.id.x()
820  %voff4 = mul i32 %voff, 4
821  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4
822  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
823  store volatile i8 1, ptr addrspace(5) %p1
824  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
825  store volatile i8 2, ptr addrspace(5) %p2
826  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
827  store volatile i8 4, ptr addrspace(5) %p4
828  ret void
829}
830
831define amdgpu_kernel void @soff4_voff1(i32 %soff) {
832; GFX940-SDAG-LABEL: soff4_voff1:
833; GFX940-SDAG:       ; %bb.0: ; %bb
834; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
835; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
836; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
837; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
838; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
839; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
840; GFX940-SDAG-NEXT:    v_add_u32_e32 v2, 1, v0
841; GFX940-SDAG-NEXT:    v_add_u32_e32 v3, 2, v0
842; GFX940-SDAG-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
843; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
844; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 2
845; GFX940-SDAG-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
846; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
847; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
848; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
849; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
850; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
851; GFX940-SDAG-NEXT:    s_endpgm
852;
853; GFX940-GISEL-LABEL: soff4_voff1:
854; GFX940-GISEL:       ; %bb.0: ; %bb
855; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
856; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
857; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
858; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
859; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
860; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
861; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
862; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
863; GFX940-GISEL-NEXT:    v_add_u32_e32 v3, 2, v0
864; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
865; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
866; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
867; GFX940-GISEL-NEXT:    scratch_store_byte v3, v1, off sc0 sc1
868; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
869; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
870; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
871; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
872; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
873; GFX940-GISEL-NEXT:    s_endpgm
874;
875; GFX11-SDAG-LABEL: soff4_voff1:
876; GFX11-SDAG:       ; %bb.0: ; %bb
877; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
878; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
879; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
880; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
881; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
882; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
883; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
884; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
885; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 1, v0
886; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v5, 2, v0
887; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 4, v0
888; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v1, off dlc
889; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
890; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v2, off dlc
891; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
892; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off dlc
893; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
894; GFX11-SDAG-NEXT:    s_endpgm
895;
896; GFX11-GISEL-LABEL: soff4_voff1:
897; GFX11-GISEL:       ; %bb.0: ; %bb
898; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
899; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
900; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
901; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
902; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
903; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
904; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
905; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
906; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
907; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
908; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
909; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
910; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
911; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
912; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
913; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
914; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
915; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
916; GFX11-GISEL-NEXT:    s_endpgm
917;
918; GFX12-SDAG-LABEL: soff4_voff1:
919; GFX12-SDAG:       ; %bb.0: ; %bb
920; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
921; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
922; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
923; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
924; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
925; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
926; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
927; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
928; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
929; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
930; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
931; GFX12-SDAG-NEXT:    s_endpgm
932;
933; GFX12-GISEL-LABEL: soff4_voff1:
934; GFX12-GISEL:       ; %bb.0: ; %bb
935; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
936; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
937; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
938; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
939; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
940; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
941; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
942; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
943; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
944; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
945; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
946; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
947; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
948; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
949; GFX12-GISEL-NEXT:    s_endpgm
950bb:
951  %soff4 = mul i32 %soff, 4
952  %a = alloca i8, i32 64, align 4, addrspace(5)
953  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4
954  %voff = call i32 @llvm.amdgcn.workitem.id.x()
955  %voff1 = mul i32 %voff, 1
956  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1
957  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
958  store volatile i8 1, ptr addrspace(5) %p1
959  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
960  store volatile i8 2, ptr addrspace(5) %p2
961  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
962  store volatile i8 4, ptr addrspace(5) %p4
963  ret void
964}
965
966define amdgpu_kernel void @soff4_voff2(i32 %soff) {
967; GFX940-SDAG-LABEL: soff4_voff2:
968; GFX940-SDAG:       ; %bb.0: ; %bb
969; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
970; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
971; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
972; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
973; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
974; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, s0
975; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 1, v2
976; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
977; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
978; GFX940-SDAG-NEXT:    v_add_u32_e32 v1, 2, v0
979; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
980; GFX940-SDAG-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
981; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
982; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
983; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
984; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
985; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
986; GFX940-SDAG-NEXT:    s_endpgm
987;
988; GFX940-GISEL-LABEL: soff4_voff2:
989; GFX940-GISEL:       ; %bb.0: ; %bb
990; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
991; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
992; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
993; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
994; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
995; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
996; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
997; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
998; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
999; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
1000; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1001; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
1002; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
1003; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
1004; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1005; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
1006; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
1007; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
1008; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1009; GFX940-GISEL-NEXT:    s_endpgm
1010;
1011; GFX11-SDAG-LABEL: soff4_voff2:
1012; GFX11-SDAG:       ; %bb.0: ; %bb
1013; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1014; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1015; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1016; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
1017; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1018; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
1019; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1020; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
1021; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1022; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 4, v0
1023; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v4, 2, v0
1024; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
1025; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1026; GFX11-SDAG-NEXT:    scratch_store_b8 v4, v2, off dlc
1027; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1028; GFX11-SDAG-NEXT:    scratch_store_b8 v5, v3, off dlc
1029; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1030; GFX11-SDAG-NEXT:    s_endpgm
1031;
1032; GFX11-GISEL-LABEL: soff4_voff2:
1033; GFX11-GISEL:       ; %bb.0: ; %bb
1034; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1035; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1036; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1037; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
1038; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1039; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
1040; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
1041; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1042; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1043; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1044; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
1045; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
1046; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
1047; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
1048; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1049; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
1050; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1051; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
1052; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1053; GFX11-GISEL-NEXT:    s_endpgm
1054;
1055; GFX12-SDAG-LABEL: soff4_voff2:
1056; GFX12-SDAG:       ; %bb.0: ; %bb
1057; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1058; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1059; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
1060; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1061; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1062; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1063; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
1064; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
1065; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1066; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
1067; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1068; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
1069; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1070; GFX12-SDAG-NEXT:    s_endpgm
1071;
1072; GFX12-GISEL-LABEL: soff4_voff2:
1073; GFX12-GISEL:       ; %bb.0: ; %bb
1074; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1075; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1076; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
1077; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1078; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1079; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1080; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
1081; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
1082; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1083; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1084; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
1085; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1086; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
1087; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1088; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
1089; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1090; GFX12-GISEL-NEXT:    s_endpgm
1091bb:
1092  %soff4 = mul i32 %soff, 4
1093  %a = alloca i8, i32 64, align 4, addrspace(5)
1094  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4
1095  %voff = call i32 @llvm.amdgcn.workitem.id.x()
1096  %voff2 = mul i32 %voff, 2
1097  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff2
1098  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
1099  store volatile i8 1, ptr addrspace(5) %p1
1100  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
1101  store volatile i8 2, ptr addrspace(5) %p2
1102  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
1103  store volatile i8 4, ptr addrspace(5) %p4
1104  ret void
1105}
1106
1107define amdgpu_kernel void @soff4_voff4(i32 %soff) {
1108; GFX940-SDAG-LABEL: soff4_voff4:
1109; GFX940-SDAG:       ; %bb.0: ; %bb
1110; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
1111; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1112; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
1113; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
1114; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1115; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
1116; GFX940-SDAG-NEXT:    v_mov_b32_e32 v3, s0
1117; GFX940-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, v3
1118; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:1 sc0 sc1
1119; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
1120; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, off offset:2 sc0 sc1
1121; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
1122; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, 4, v0
1123; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
1124; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
1125; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
1126; GFX940-SDAG-NEXT:    s_endpgm
1127;
1128; GFX940-GISEL-LABEL: soff4_voff4:
1129; GFX940-GISEL:       ; %bb.0: ; %bb
1130; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
1131; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1132; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1133; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
1134; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1135; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
1136; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
1137; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
1138; GFX940-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
1139; GFX940-GISEL-NEXT:    scratch_store_byte v2, v1, off sc0 sc1
1140; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1141; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, 2, v0
1142; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 2
1143; GFX940-GISEL-NEXT:    scratch_store_byte v1, v2, off sc0 sc1
1144; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1145; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, 4, v0
1146; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
1147; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
1148; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1149; GFX940-GISEL-NEXT:    s_endpgm
1150;
1151; GFX11-SDAG-LABEL: soff4_voff4:
1152; GFX11-SDAG:       ; %bb.0: ; %bb
1153; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1154; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1155; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
1156; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1157; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1158; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1159; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
1160; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1161; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
1162; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1163; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v3, 4, v0
1164; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
1165; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1166; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
1167; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1168; GFX11-SDAG-NEXT:    scratch_store_b8 v3, v4, off dlc
1169; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1170; GFX11-SDAG-NEXT:    s_endpgm
1171;
1172; GFX11-GISEL-LABEL: soff4_voff4:
1173; GFX11-GISEL:       ; %bb.0: ; %bb
1174; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1175; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1176; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1177; GFX11-GISEL-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
1178; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1179; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
1180; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
1181; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1182; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1183; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1184; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
1185; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v0
1186; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 4, v0
1187; GFX11-GISEL-NEXT:    scratch_store_b8 v4, v1, off dlc
1188; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1189; GFX11-GISEL-NEXT:    scratch_store_b8 v5, v2, off dlc
1190; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1191; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off dlc
1192; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1193; GFX11-GISEL-NEXT:    s_endpgm
1194;
1195; GFX12-SDAG-LABEL: soff4_voff4:
1196; GFX12-SDAG:       ; %bb.0: ; %bb
1197; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1198; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1199; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
1200; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1201; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1202; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1203; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
1204; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
1205; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1206; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
1207; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1208; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
1209; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1210; GFX12-SDAG-NEXT:    s_endpgm
1211;
1212; GFX12-GISEL-LABEL: soff4_voff4:
1213; GFX12-GISEL:       ; %bb.0: ; %bb
1214; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1215; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1216; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
1217; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1218; GFX12-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1219; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1220; GFX12-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
1221; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
1222; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1223; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1224; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
1225; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1226; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
1227; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1228; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
1229; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1230; GFX12-GISEL-NEXT:    s_endpgm
1231bb:
1232  %soff4 = mul i32 %soff, 4
1233  %a = alloca i8, i32 64, align 4, addrspace(5)
1234  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff4
1235  %voff = call i32 @llvm.amdgcn.workitem.id.x()
1236  %voff4 = mul i32 %voff, 4
1237  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff4
1238  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 1
1239  store volatile i8 1, ptr addrspace(5) %p1
1240  %p2 = getelementptr i8, ptr addrspace(5) %asv, i32 2
1241  store volatile i8 2, ptr addrspace(5) %p2
1242  %p4 = getelementptr i8, ptr addrspace(5) %asv, i32 4
1243  store volatile i8 4, ptr addrspace(5) %p4
1244  ret void
1245}
1246
1247define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
1248; GFX940-SDAG-LABEL: soff1_voff1_negative:
1249; GFX940-SDAG:       ; %bb.0: ; %bb
1250; GFX940-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x24
1251; GFX940-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1252; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
1253; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1254; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
1255; GFX940-SDAG-NEXT:    v_add_u32_e32 v0, -1, v0
1256; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
1257; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
1258; GFX940-SDAG-NEXT:    s_endpgm
1259;
1260; GFX940-GISEL-LABEL: soff1_voff1_negative:
1261; GFX940-GISEL:       ; %bb.0: ; %bb
1262; GFX940-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x24
1263; GFX940-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1264; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 1
1265; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1266; GFX940-GISEL-NEXT:    s_add_u32 s0, 0, s0
1267; GFX940-GISEL-NEXT:    v_add3_u32 v0, s0, v0, -1
1268; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
1269; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
1270; GFX940-GISEL-NEXT:    s_endpgm
1271;
1272; GFX11-SDAG-LABEL: soff1_voff1_negative:
1273; GFX11-SDAG:       ; %bb.0: ; %bb
1274; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1275; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1276; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1277; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1278; GFX11-SDAG-NEXT:    v_add3_u32 v0, 0, s0, v0
1279; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:-1 dlc
1280; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1281; GFX11-SDAG-NEXT:    s_endpgm
1282;
1283; GFX11-GISEL-LABEL: soff1_voff1_negative:
1284; GFX11-GISEL:       ; %bb.0: ; %bb
1285; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1286; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1287; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1288; GFX11-GISEL-NEXT:    s_add_u32 s0, 0, s0
1289; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1290; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1291; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:-1 dlc
1292; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1293; GFX11-GISEL-NEXT:    s_endpgm
1294;
1295; GFX12-SDAG-LABEL: soff1_voff1_negative:
1296; GFX12-SDAG:       ; %bb.0: ; %bb
1297; GFX12-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x24
1298; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1299; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
1300; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:-1 scope:SCOPE_SYS
1301; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
1302; GFX12-SDAG-NEXT:    s_endpgm
1303;
1304; GFX12-GISEL-LABEL: soff1_voff1_negative:
1305; GFX12-GISEL:       ; %bb.0: ; %bb
1306; GFX12-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x24
1307; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1308; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
1309; GFX12-GISEL-NEXT:    s_add_co_u32 s0, 0, s0
1310; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1311; GFX12-GISEL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
1312; GFX12-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS
1313; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
1314; GFX12-GISEL-NEXT:    s_endpgm
1315bb:
1316  %a = alloca [64 x i8], align 4, addrspace(5)
1317  %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff
1318  %voff = call i32 @llvm.amdgcn.workitem.id.x()
1319  %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff
1320  %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 -1
1321  store volatile i8 1, ptr addrspace(5) %p1
1322  ret void
1323}
1324