xref: /llvm-project/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
4
5; Due to high register pressure, regalloc would split the liverange of wwm VGPR register used for SGPR spills
6; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it.
7; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes
8; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly.
9define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
10; GFX906-LABEL: preserve_wwm_copy_dstreg:
11; GFX906:       ; %bb.0:
12; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX906-NEXT:    s_mov_b32 s16, s33
14; GFX906-NEXT:    s_mov_b32 s33, s32
15; GFX906-NEXT:    s_xor_saveexec_b64 s[18:19], -1
16; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
17; GFX906-NEXT:    s_mov_b64 exec, -1
18; GFX906-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
19; GFX906-NEXT:    s_mov_b64 exec, s[18:19]
20; GFX906-NEXT:    s_mov_b32 s21, s15
21; GFX906-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
22; GFX906-NEXT:    s_mov_b32 s22, s14
23; GFX906-NEXT:    v_writelane_b32 v39, s21, 0
24; GFX906-NEXT:    v_writelane_b32 v39, s22, 1
25; GFX906-NEXT:    s_mov_b32 s23, s13
26; GFX906-NEXT:    v_writelane_b32 v39, s23, 2
27; GFX906-NEXT:    s_mov_b32 s24, s12
28; GFX906-NEXT:    v_writelane_b32 v39, s24, 3
29; GFX906-NEXT:    s_mov_b64 s[26:27], s[10:11]
30; GFX906-NEXT:    v_writelane_b32 v39, s26, 4
31; GFX906-NEXT:    v_writelane_b32 v39, s27, 5
32; GFX906-NEXT:    v_writelane_b32 v39, s8, 6
33; GFX906-NEXT:    v_writelane_b32 v41, s16, 4
34; GFX906-NEXT:    v_writelane_b32 v39, s9, 7
35; GFX906-NEXT:    v_writelane_b32 v41, s34, 2
36; GFX906-NEXT:    v_writelane_b32 v39, s6, 8
37; GFX906-NEXT:    v_writelane_b32 v41, s35, 3
38; GFX906-NEXT:    v_writelane_b32 v39, s7, 9
39; GFX906-NEXT:    v_writelane_b32 v41, s30, 0
40; GFX906-NEXT:    v_writelane_b32 v39, s4, 10
41; GFX906-NEXT:    s_addk_i32 s32, 0x2800
42; GFX906-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
43; GFX906-NEXT:    v_writelane_b32 v41, s31, 1
44; GFX906-NEXT:    v_mov_b32_e32 v32, v31
45; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
46; GFX906-NEXT:    s_nop 0
47; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
48; GFX906-NEXT:    v_writelane_b32 v39, s5, 11
49; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
50; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
51; GFX906-NEXT:    ;;#ASMSTART
52; GFX906-NEXT:    ; def v[0:31]
53; GFX906-NEXT:    ;;#ASMEND
54; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
55; GFX906-NEXT:    s_nop 0
56; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
57; GFX906-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
58; GFX906-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
59; GFX906-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
60; GFX906-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
61; GFX906-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
62; GFX906-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
63; GFX906-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
64; GFX906-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
65; GFX906-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
66; GFX906-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
67; GFX906-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
68; GFX906-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
69; GFX906-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
70; GFX906-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
71; GFX906-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
72; GFX906-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
73; GFX906-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
74; GFX906-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
75; GFX906-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
76; GFX906-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
77; GFX906-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
78; GFX906-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
79; GFX906-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
80; GFX906-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
81; GFX906-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
82; GFX906-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
83; GFX906-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
84; GFX906-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
85; GFX906-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
86; GFX906-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
87; GFX906-NEXT:    ;;#ASMSTART
88; GFX906-NEXT:    ; def v40
89; GFX906-NEXT:    ;;#ASMEND
90; GFX906-NEXT:    ;;#ASMSTART
91; GFX906-NEXT:    ; def s11
92; GFX906-NEXT:    ;;#ASMEND
93; GFX906-NEXT:    v_writelane_b32 v39, s11, 12
94; GFX906-NEXT:    ;;#ASMSTART
95; GFX906-NEXT:    ; def s12
96; GFX906-NEXT:    ;;#ASMEND
97; GFX906-NEXT:    v_writelane_b32 v39, s12, 13
98; GFX906-NEXT:    ;;#ASMSTART
99; GFX906-NEXT:    ; def s13
100; GFX906-NEXT:    ;;#ASMEND
101; GFX906-NEXT:    v_writelane_b32 v39, s13, 14
102; GFX906-NEXT:    ;;#ASMSTART
103; GFX906-NEXT:    ; def s14
104; GFX906-NEXT:    ;;#ASMEND
105; GFX906-NEXT:    v_writelane_b32 v39, s14, 15
106; GFX906-NEXT:    ;;#ASMSTART
107; GFX906-NEXT:    ; def s15
108; GFX906-NEXT:    ;;#ASMEND
109; GFX906-NEXT:    v_writelane_b32 v39, s15, 16
110; GFX906-NEXT:    s_getpc_b64 s[10:11]
111; GFX906-NEXT:    s_add_u32 s10, s10, foo@gotpcrel32@lo+4
112; GFX906-NEXT:    s_addc_u32 s11, s11, foo@gotpcrel32@hi+12
113; GFX906-NEXT:    ;;#ASMSTART
114; GFX906-NEXT:    ; def s16
115; GFX906-NEXT:    ;;#ASMEND
116; GFX906-NEXT:    v_writelane_b32 v39, s16, 17
117; GFX906-NEXT:    s_load_dwordx2 s[10:11], s[10:11], 0x0
118; GFX906-NEXT:    ;;#ASMSTART
119; GFX906-NEXT:    ; def s17
120; GFX906-NEXT:    ;;#ASMEND
121; GFX906-NEXT:    v_writelane_b32 v39, s17, 18
122; GFX906-NEXT:    ;;#ASMSTART
123; GFX906-NEXT:    ; def s18
124; GFX906-NEXT:    ;;#ASMEND
125; GFX906-NEXT:    v_writelane_b32 v39, s18, 19
126; GFX906-NEXT:    ;;#ASMSTART
127; GFX906-NEXT:    ; def s19
128; GFX906-NEXT:    ;;#ASMEND
129; GFX906-NEXT:    v_writelane_b32 v39, s19, 20
130; GFX906-NEXT:    ;;#ASMSTART
131; GFX906-NEXT:    ; def s20
132; GFX906-NEXT:    ;;#ASMEND
133; GFX906-NEXT:    v_writelane_b32 v39, s20, 21
134; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
135; GFX906-NEXT:    v_writelane_b32 v39, s10, 22
136; GFX906-NEXT:    v_writelane_b32 v39, s11, 23
137; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
138; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
139; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
140; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
141; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
142; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
143; GFX906-NEXT:    s_mov_b32 s12, s24
144; GFX906-NEXT:    s_mov_b32 s13, s23
145; GFX906-NEXT:    s_mov_b32 s14, s22
146; GFX906-NEXT:    v_mov_b32_e32 v31, v32
147; GFX906-NEXT:    s_mov_b32 s15, s21
148; GFX906-NEXT:    s_mov_b64 s[10:11], s[26:27]
149; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
150; GFX906-NEXT:    v_mov_b32_e32 v40, v32
151; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
152; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
153; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
154; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
155; GFX906-NEXT:    s_waitcnt vmcnt(0)
156; GFX906-NEXT:    v_readlane_b32 s11, v39, 12
157; GFX906-NEXT:    ;;#ASMSTART
158; GFX906-NEXT:    ; use s11
159; GFX906-NEXT:    ;;#ASMEND
160; GFX906-NEXT:    v_readlane_b32 s12, v39, 13
161; GFX906-NEXT:    ;;#ASMSTART
162; GFX906-NEXT:    ; use s12
163; GFX906-NEXT:    ;;#ASMEND
164; GFX906-NEXT:    v_readlane_b32 s13, v39, 14
165; GFX906-NEXT:    ;;#ASMSTART
166; GFX906-NEXT:    ; use s13
167; GFX906-NEXT:    ;;#ASMEND
168; GFX906-NEXT:    v_readlane_b32 s14, v39, 15
169; GFX906-NEXT:    ;;#ASMSTART
170; GFX906-NEXT:    ; use s14
171; GFX906-NEXT:    ;;#ASMEND
172; GFX906-NEXT:    v_readlane_b32 s15, v39, 16
173; GFX906-NEXT:    ;;#ASMSTART
174; GFX906-NEXT:    ; use s15
175; GFX906-NEXT:    ;;#ASMEND
176; GFX906-NEXT:    v_readlane_b32 s16, v39, 17
177; GFX906-NEXT:    ;;#ASMSTART
178; GFX906-NEXT:    ; use s16
179; GFX906-NEXT:    ;;#ASMEND
180; GFX906-NEXT:    v_readlane_b32 s17, v39, 18
181; GFX906-NEXT:    ;;#ASMSTART
182; GFX906-NEXT:    ; use s17
183; GFX906-NEXT:    ;;#ASMEND
184; GFX906-NEXT:    v_readlane_b32 s18, v39, 19
185; GFX906-NEXT:    ;;#ASMSTART
186; GFX906-NEXT:    ; use s18
187; GFX906-NEXT:    ;;#ASMEND
188; GFX906-NEXT:    v_readlane_b32 s19, v39, 20
189; GFX906-NEXT:    ;;#ASMSTART
190; GFX906-NEXT:    ; use s19
191; GFX906-NEXT:    ;;#ASMEND
192; GFX906-NEXT:    v_readlane_b32 s20, v39, 21
193; GFX906-NEXT:    ;;#ASMSTART
194; GFX906-NEXT:    ; use s20
195; GFX906-NEXT:    ;;#ASMEND
196; GFX906-NEXT:    ;;#ASMSTART
197; GFX906-NEXT:    ; def s21
198; GFX906-NEXT:    ;;#ASMEND
199; GFX906-NEXT:    v_writelane_b32 v39, s21, 12
200; GFX906-NEXT:    ;;#ASMSTART
201; GFX906-NEXT:    ; def s22
202; GFX906-NEXT:    ;;#ASMEND
203; GFX906-NEXT:    v_writelane_b32 v39, s22, 13
204; GFX906-NEXT:    ;;#ASMSTART
205; GFX906-NEXT:    ; def s23
206; GFX906-NEXT:    ;;#ASMEND
207; GFX906-NEXT:    v_writelane_b32 v39, s23, 14
208; GFX906-NEXT:    ;;#ASMSTART
209; GFX906-NEXT:    ; def s24
210; GFX906-NEXT:    ;;#ASMEND
211; GFX906-NEXT:    v_writelane_b32 v39, s24, 15
212; GFX906-NEXT:    ;;#ASMSTART
213; GFX906-NEXT:    ; def s25
214; GFX906-NEXT:    ;;#ASMEND
215; GFX906-NEXT:    v_writelane_b32 v39, s25, 16
216; GFX906-NEXT:    ;;#ASMSTART
217; GFX906-NEXT:    ; def s26
218; GFX906-NEXT:    ;;#ASMEND
219; GFX906-NEXT:    v_writelane_b32 v39, s26, 17
220; GFX906-NEXT:    ;;#ASMSTART
221; GFX906-NEXT:    ; def s27
222; GFX906-NEXT:    ;;#ASMEND
223; GFX906-NEXT:    v_writelane_b32 v39, s27, 18
224; GFX906-NEXT:    ;;#ASMSTART
225; GFX906-NEXT:    ; def s28
226; GFX906-NEXT:    ;;#ASMEND
227; GFX906-NEXT:    v_writelane_b32 v39, s28, 19
228; GFX906-NEXT:    ;;#ASMSTART
229; GFX906-NEXT:    ; def s29
230; GFX906-NEXT:    ;;#ASMEND
231; GFX906-NEXT:    v_writelane_b32 v39, s29, 20
232; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
233; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
234; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
235; GFX906-NEXT:    v_readlane_b32 s4, v39, 10
236; GFX906-NEXT:    v_readlane_b32 s6, v39, 8
237; GFX906-NEXT:    v_readlane_b32 s8, v39, 6
238; GFX906-NEXT:    v_readlane_b32 s10, v39, 4
239; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
240; GFX906-NEXT:    v_readlane_b32 s12, v39, 3
241; GFX906-NEXT:    v_mov_b32_e32 v31, v40
242; GFX906-NEXT:    v_readlane_b32 s13, v39, 2
243; GFX906-NEXT:    v_readlane_b32 s14, v39, 1
244; GFX906-NEXT:    v_readlane_b32 s15, v39, 0
245; GFX906-NEXT:    v_readlane_b32 s5, v39, 11
246; GFX906-NEXT:    v_readlane_b32 s7, v39, 9
247; GFX906-NEXT:    v_readlane_b32 s9, v39, 7
248; GFX906-NEXT:    v_readlane_b32 s11, v39, 5
249; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
250; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
251; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
252; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
253; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
254; GFX906-NEXT:    s_waitcnt vmcnt(0)
255; GFX906-NEXT:    v_readlane_b32 s4, v39, 10
256; GFX906-NEXT:    v_readlane_b32 s6, v39, 8
257; GFX906-NEXT:    v_readlane_b32 s8, v39, 6
258; GFX906-NEXT:    v_readlane_b32 s10, v39, 4
259; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
260; GFX906-NEXT:    v_readlane_b32 s5, v39, 11
261; GFX906-NEXT:    v_readlane_b32 s7, v39, 9
262; GFX906-NEXT:    v_readlane_b32 s9, v39, 7
263; GFX906-NEXT:    v_readlane_b32 s11, v39, 5
264; GFX906-NEXT:    v_readlane_b32 s12, v39, 3
265; GFX906-NEXT:    v_readlane_b32 s13, v39, 2
266; GFX906-NEXT:    v_readlane_b32 s14, v39, 1
267; GFX906-NEXT:    v_readlane_b32 s15, v39, 0
268; GFX906-NEXT:    v_mov_b32_e32 v31, v40
269; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
270; GFX906-NEXT:    v_readlane_b32 s21, v39, 12
271; GFX906-NEXT:    ;;#ASMSTART
272; GFX906-NEXT:    ; use s21
273; GFX906-NEXT:    ;;#ASMEND
274; GFX906-NEXT:    v_readlane_b32 s22, v39, 13
275; GFX906-NEXT:    ;;#ASMSTART
276; GFX906-NEXT:    ; use s22
277; GFX906-NEXT:    ;;#ASMEND
278; GFX906-NEXT:    v_readlane_b32 s23, v39, 14
279; GFX906-NEXT:    ;;#ASMSTART
280; GFX906-NEXT:    ; use s23
281; GFX906-NEXT:    ;;#ASMEND
282; GFX906-NEXT:    v_readlane_b32 s24, v39, 15
283; GFX906-NEXT:    ;;#ASMSTART
284; GFX906-NEXT:    ; use s24
285; GFX906-NEXT:    ;;#ASMEND
286; GFX906-NEXT:    v_readlane_b32 s25, v39, 16
287; GFX906-NEXT:    ;;#ASMSTART
288; GFX906-NEXT:    ; use s25
289; GFX906-NEXT:    ;;#ASMEND
290; GFX906-NEXT:    v_readlane_b32 s26, v39, 17
291; GFX906-NEXT:    ;;#ASMSTART
292; GFX906-NEXT:    ; use s26
293; GFX906-NEXT:    ;;#ASMEND
294; GFX906-NEXT:    v_readlane_b32 s27, v39, 18
295; GFX906-NEXT:    ;;#ASMSTART
296; GFX906-NEXT:    ; use s27
297; GFX906-NEXT:    ;;#ASMEND
298; GFX906-NEXT:    v_readlane_b32 s28, v39, 19
299; GFX906-NEXT:    ;;#ASMSTART
300; GFX906-NEXT:    ; use s28
301; GFX906-NEXT:    ;;#ASMEND
302; GFX906-NEXT:    v_readlane_b32 s29, v39, 20
303; GFX906-NEXT:    ;;#ASMSTART
304; GFX906-NEXT:    ; use s29
305; GFX906-NEXT:    ;;#ASMEND
306; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
307; GFX906-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
308; GFX906-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
309; GFX906-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
310; GFX906-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
311; GFX906-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
312; GFX906-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
313; GFX906-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
314; GFX906-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
315; GFX906-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
316; GFX906-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
317; GFX906-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
318; GFX906-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
319; GFX906-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
320; GFX906-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
321; GFX906-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
322; GFX906-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
323; GFX906-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
324; GFX906-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
325; GFX906-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
326; GFX906-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
327; GFX906-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
328; GFX906-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
329; GFX906-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
330; GFX906-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
331; GFX906-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
332; GFX906-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
333; GFX906-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
334; GFX906-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
335; GFX906-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
336; GFX906-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
337; GFX906-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
338; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
339; GFX906-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
340; GFX906-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
341; GFX906-NEXT:    v_readlane_b32 s31, v41, 1
342; GFX906-NEXT:    v_readlane_b32 s30, v41, 0
343; GFX906-NEXT:    s_mov_b32 s32, s33
344; GFX906-NEXT:    v_readlane_b32 s4, v41, 4
345; GFX906-NEXT:    v_readlane_b32 s34, v41, 2
346; GFX906-NEXT:    v_readlane_b32 s35, v41, 3
347; GFX906-NEXT:    s_waitcnt vmcnt(0)
348; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[30:33] offset:112
349; GFX906-NEXT:    s_waitcnt vmcnt(0)
350; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[26:29] offset:96
351; GFX906-NEXT:    s_waitcnt vmcnt(0)
352; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[22:25] offset:80
353; GFX906-NEXT:    s_waitcnt vmcnt(0)
354; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[18:21] offset:64
355; GFX906-NEXT:    s_waitcnt vmcnt(0)
356; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[14:17] offset:48
357; GFX906-NEXT:    s_waitcnt vmcnt(0)
358; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[10:13] offset:32
359; GFX906-NEXT:    s_waitcnt vmcnt(0)
360; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[6:9] offset:16
361; GFX906-NEXT:    s_waitcnt vmcnt(0)
362; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
363; GFX906-NEXT:    s_waitcnt vmcnt(0)
364; GFX906-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
365; GFX906-NEXT:    s_xor_saveexec_b64 s[6:7], -1
366; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
367; GFX906-NEXT:    s_mov_b64 exec, -1
368; GFX906-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
369; GFX906-NEXT:    s_mov_b64 exec, s[6:7]
370; GFX906-NEXT:    s_mov_b32 s33, s4
371; GFX906-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
372; GFX906-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX908-LABEL: preserve_wwm_copy_dstreg:
375; GFX908:       ; %bb.0:
376; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX908-NEXT:    s_mov_b32 s16, s33
378; GFX908-NEXT:    s_mov_b32 s33, s32
379; GFX908-NEXT:    s_xor_saveexec_b64 s[18:19], -1
380; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
381; GFX908-NEXT:    s_mov_b64 exec, s[18:19]
382; GFX908-NEXT:    v_mov_b32_e32 v2, s16
383; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
384; GFX908-NEXT:    v_mov_b32_e32 v2, s34
385; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
386; GFX908-NEXT:    v_mov_b32_e32 v2, s35
387; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
388; GFX908-NEXT:    s_addk_i32 s32, 0x2c00
389; GFX908-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
390; GFX908-NEXT:    s_mov_b64 s[16:17], exec
391; GFX908-NEXT:    s_mov_b64 exec, 1
392; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
393; GFX908-NEXT:    v_writelane_b32 v2, s30, 0
394; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
395; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
396; GFX908-NEXT:    s_waitcnt vmcnt(0)
397; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
398; GFX908-NEXT:    s_mov_b64 s[16:17], exec
399; GFX908-NEXT:    s_mov_b64 exec, 1
400; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
401; GFX908-NEXT:    v_writelane_b32 v2, s31, 0
402; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
403; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
404; GFX908-NEXT:    s_waitcnt vmcnt(0)
405; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
406; GFX908-NEXT:    s_mov_b32 s21, s15
407; GFX908-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
408; GFX908-NEXT:    s_mov_b32 s22, s14
409; GFX908-NEXT:    v_writelane_b32 v39, s21, 0
410; GFX908-NEXT:    v_writelane_b32 v39, s22, 1
411; GFX908-NEXT:    s_mov_b32 s23, s13
412; GFX908-NEXT:    v_writelane_b32 v39, s23, 2
413; GFX908-NEXT:    s_mov_b32 s24, s12
414; GFX908-NEXT:    v_writelane_b32 v39, s24, 3
415; GFX908-NEXT:    s_mov_b64 s[26:27], s[10:11]
416; GFX908-NEXT:    v_writelane_b32 v39, s26, 4
417; GFX908-NEXT:    v_writelane_b32 v39, s27, 5
418; GFX908-NEXT:    v_writelane_b32 v39, s8, 6
419; GFX908-NEXT:    v_writelane_b32 v39, s9, 7
420; GFX908-NEXT:    v_writelane_b32 v39, s6, 8
421; GFX908-NEXT:    v_writelane_b32 v39, s7, 9
422; GFX908-NEXT:    v_writelane_b32 v39, s4, 10
423; GFX908-NEXT:    v_mov_b32_e32 v32, v31
424; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
425; GFX908-NEXT:    s_nop 0
426; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
427; GFX908-NEXT:    v_writelane_b32 v39, s5, 11
428; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
429; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
430; GFX908-NEXT:    ;;#ASMSTART
431; GFX908-NEXT:    ; def v[0:31]
432; GFX908-NEXT:    ;;#ASMEND
433; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
434; GFX908-NEXT:    s_nop 0
435; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
436; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
437; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
438; GFX908-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
439; GFX908-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
440; GFX908-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
441; GFX908-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
442; GFX908-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
443; GFX908-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
444; GFX908-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
445; GFX908-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
446; GFX908-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
447; GFX908-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
448; GFX908-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
449; GFX908-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
450; GFX908-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
451; GFX908-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
452; GFX908-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
453; GFX908-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
454; GFX908-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
455; GFX908-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
456; GFX908-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
457; GFX908-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
458; GFX908-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
459; GFX908-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
460; GFX908-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
461; GFX908-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
462; GFX908-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
463; GFX908-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
464; GFX908-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
465; GFX908-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
466; GFX908-NEXT:    ;;#ASMSTART
467; GFX908-NEXT:    ; def v40
468; GFX908-NEXT:    ;;#ASMEND
469; GFX908-NEXT:    ;;#ASMSTART
470; GFX908-NEXT:    ; def s11
471; GFX908-NEXT:    ;;#ASMEND
472; GFX908-NEXT:    v_writelane_b32 v39, s11, 12
473; GFX908-NEXT:    ;;#ASMSTART
474; GFX908-NEXT:    ; def s12
475; GFX908-NEXT:    ;;#ASMEND
476; GFX908-NEXT:    v_writelane_b32 v39, s12, 13
477; GFX908-NEXT:    ;;#ASMSTART
478; GFX908-NEXT:    ; def s13
479; GFX908-NEXT:    ;;#ASMEND
480; GFX908-NEXT:    v_writelane_b32 v39, s13, 14
481; GFX908-NEXT:    ;;#ASMSTART
482; GFX908-NEXT:    ; def s14
483; GFX908-NEXT:    ;;#ASMEND
484; GFX908-NEXT:    v_writelane_b32 v39, s14, 15
485; GFX908-NEXT:    ;;#ASMSTART
486; GFX908-NEXT:    ; def s15
487; GFX908-NEXT:    ;;#ASMEND
488; GFX908-NEXT:    v_writelane_b32 v39, s15, 16
489; GFX908-NEXT:    s_getpc_b64 s[10:11]
490; GFX908-NEXT:    s_add_u32 s10, s10, foo@gotpcrel32@lo+4
491; GFX908-NEXT:    s_addc_u32 s11, s11, foo@gotpcrel32@hi+12
492; GFX908-NEXT:    ;;#ASMSTART
493; GFX908-NEXT:    ; def s16
494; GFX908-NEXT:    ;;#ASMEND
495; GFX908-NEXT:    v_writelane_b32 v39, s16, 17
496; GFX908-NEXT:    s_load_dwordx2 s[10:11], s[10:11], 0x0
497; GFX908-NEXT:    ;;#ASMSTART
498; GFX908-NEXT:    ; def s17
499; GFX908-NEXT:    ;;#ASMEND
500; GFX908-NEXT:    v_writelane_b32 v39, s17, 18
501; GFX908-NEXT:    ;;#ASMSTART
502; GFX908-NEXT:    ; def s18
503; GFX908-NEXT:    ;;#ASMEND
504; GFX908-NEXT:    v_writelane_b32 v39, s18, 19
505; GFX908-NEXT:    ;;#ASMSTART
506; GFX908-NEXT:    ; def s19
507; GFX908-NEXT:    ;;#ASMEND
508; GFX908-NEXT:    v_writelane_b32 v39, s19, 20
509; GFX908-NEXT:    ;;#ASMSTART
510; GFX908-NEXT:    ; def s20
511; GFX908-NEXT:    ;;#ASMEND
512; GFX908-NEXT:    v_writelane_b32 v39, s20, 21
513; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
514; GFX908-NEXT:    v_writelane_b32 v39, s10, 22
515; GFX908-NEXT:    v_writelane_b32 v39, s11, 23
516; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
517; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
518; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
519; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
520; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
521; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
522; GFX908-NEXT:    s_mov_b32 s12, s24
523; GFX908-NEXT:    s_mov_b32 s13, s23
524; GFX908-NEXT:    s_mov_b32 s14, s22
525; GFX908-NEXT:    v_mov_b32_e32 v31, v32
526; GFX908-NEXT:    s_mov_b32 s15, s21
527; GFX908-NEXT:    s_mov_b64 s[10:11], s[26:27]
528; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
529; GFX908-NEXT:    v_mov_b32_e32 v40, v32
530; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
531; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
532; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
533; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
534; GFX908-NEXT:    s_waitcnt vmcnt(0)
535; GFX908-NEXT:    v_readlane_b32 s11, v39, 12
536; GFX908-NEXT:    ;;#ASMSTART
537; GFX908-NEXT:    ; use s11
538; GFX908-NEXT:    ;;#ASMEND
539; GFX908-NEXT:    v_readlane_b32 s12, v39, 13
540; GFX908-NEXT:    ;;#ASMSTART
541; GFX908-NEXT:    ; use s12
542; GFX908-NEXT:    ;;#ASMEND
543; GFX908-NEXT:    v_readlane_b32 s13, v39, 14
544; GFX908-NEXT:    ;;#ASMSTART
545; GFX908-NEXT:    ; use s13
546; GFX908-NEXT:    ;;#ASMEND
547; GFX908-NEXT:    v_readlane_b32 s14, v39, 15
548; GFX908-NEXT:    ;;#ASMSTART
549; GFX908-NEXT:    ; use s14
550; GFX908-NEXT:    ;;#ASMEND
551; GFX908-NEXT:    v_readlane_b32 s15, v39, 16
552; GFX908-NEXT:    ;;#ASMSTART
553; GFX908-NEXT:    ; use s15
554; GFX908-NEXT:    ;;#ASMEND
555; GFX908-NEXT:    v_readlane_b32 s16, v39, 17
556; GFX908-NEXT:    ;;#ASMSTART
557; GFX908-NEXT:    ; use s16
558; GFX908-NEXT:    ;;#ASMEND
559; GFX908-NEXT:    v_readlane_b32 s17, v39, 18
560; GFX908-NEXT:    ;;#ASMSTART
561; GFX908-NEXT:    ; use s17
562; GFX908-NEXT:    ;;#ASMEND
563; GFX908-NEXT:    v_readlane_b32 s18, v39, 19
564; GFX908-NEXT:    ;;#ASMSTART
565; GFX908-NEXT:    ; use s18
566; GFX908-NEXT:    ;;#ASMEND
567; GFX908-NEXT:    v_readlane_b32 s19, v39, 20
568; GFX908-NEXT:    ;;#ASMSTART
569; GFX908-NEXT:    ; use s19
570; GFX908-NEXT:    ;;#ASMEND
571; GFX908-NEXT:    v_readlane_b32 s20, v39, 21
572; GFX908-NEXT:    ;;#ASMSTART
573; GFX908-NEXT:    ; use s20
574; GFX908-NEXT:    ;;#ASMEND
575; GFX908-NEXT:    ;;#ASMSTART
576; GFX908-NEXT:    ; def s21
577; GFX908-NEXT:    ;;#ASMEND
578; GFX908-NEXT:    v_writelane_b32 v39, s21, 12
579; GFX908-NEXT:    ;;#ASMSTART
580; GFX908-NEXT:    ; def s22
581; GFX908-NEXT:    ;;#ASMEND
582; GFX908-NEXT:    v_writelane_b32 v39, s22, 13
583; GFX908-NEXT:    ;;#ASMSTART
584; GFX908-NEXT:    ; def s23
585; GFX908-NEXT:    ;;#ASMEND
586; GFX908-NEXT:    v_writelane_b32 v39, s23, 14
587; GFX908-NEXT:    ;;#ASMSTART
588; GFX908-NEXT:    ; def s24
589; GFX908-NEXT:    ;;#ASMEND
590; GFX908-NEXT:    v_writelane_b32 v39, s24, 15
591; GFX908-NEXT:    ;;#ASMSTART
592; GFX908-NEXT:    ; def s25
593; GFX908-NEXT:    ;;#ASMEND
594; GFX908-NEXT:    v_writelane_b32 v39, s25, 16
595; GFX908-NEXT:    ;;#ASMSTART
596; GFX908-NEXT:    ; def s26
597; GFX908-NEXT:    ;;#ASMEND
598; GFX908-NEXT:    v_writelane_b32 v39, s26, 17
599; GFX908-NEXT:    ;;#ASMSTART
600; GFX908-NEXT:    ; def s27
601; GFX908-NEXT:    ;;#ASMEND
602; GFX908-NEXT:    v_writelane_b32 v39, s27, 18
603; GFX908-NEXT:    ;;#ASMSTART
604; GFX908-NEXT:    ; def s28
605; GFX908-NEXT:    ;;#ASMEND
606; GFX908-NEXT:    v_writelane_b32 v39, s28, 19
607; GFX908-NEXT:    ;;#ASMSTART
608; GFX908-NEXT:    ; def s29
609; GFX908-NEXT:    ;;#ASMEND
610; GFX908-NEXT:    v_writelane_b32 v39, s29, 20
611; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
612; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
613; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
614; GFX908-NEXT:    v_readlane_b32 s4, v39, 10
615; GFX908-NEXT:    v_readlane_b32 s6, v39, 8
616; GFX908-NEXT:    v_readlane_b32 s8, v39, 6
617; GFX908-NEXT:    v_readlane_b32 s10, v39, 4
618; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
619; GFX908-NEXT:    v_readlane_b32 s12, v39, 3
620; GFX908-NEXT:    v_mov_b32_e32 v31, v40
621; GFX908-NEXT:    v_readlane_b32 s13, v39, 2
622; GFX908-NEXT:    v_readlane_b32 s14, v39, 1
623; GFX908-NEXT:    v_readlane_b32 s15, v39, 0
624; GFX908-NEXT:    v_readlane_b32 s5, v39, 11
625; GFX908-NEXT:    v_readlane_b32 s7, v39, 9
626; GFX908-NEXT:    v_readlane_b32 s9, v39, 7
627; GFX908-NEXT:    v_readlane_b32 s11, v39, 5
628; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
629; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
630; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
631; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
632; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
633; GFX908-NEXT:    s_waitcnt vmcnt(0)
634; GFX908-NEXT:    v_readlane_b32 s4, v39, 10
635; GFX908-NEXT:    v_readlane_b32 s6, v39, 8
636; GFX908-NEXT:    v_readlane_b32 s8, v39, 6
637; GFX908-NEXT:    v_readlane_b32 s10, v39, 4
638; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
639; GFX908-NEXT:    v_readlane_b32 s5, v39, 11
640; GFX908-NEXT:    v_readlane_b32 s7, v39, 9
641; GFX908-NEXT:    v_readlane_b32 s9, v39, 7
642; GFX908-NEXT:    v_readlane_b32 s11, v39, 5
643; GFX908-NEXT:    v_readlane_b32 s12, v39, 3
644; GFX908-NEXT:    v_readlane_b32 s13, v39, 2
645; GFX908-NEXT:    v_readlane_b32 s14, v39, 1
646; GFX908-NEXT:    v_readlane_b32 s15, v39, 0
647; GFX908-NEXT:    v_mov_b32_e32 v31, v40
648; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
649; GFX908-NEXT:    v_readlane_b32 s21, v39, 12
650; GFX908-NEXT:    ;;#ASMSTART
651; GFX908-NEXT:    ; use s21
652; GFX908-NEXT:    ;;#ASMEND
653; GFX908-NEXT:    v_readlane_b32 s22, v39, 13
654; GFX908-NEXT:    ;;#ASMSTART
655; GFX908-NEXT:    ; use s22
656; GFX908-NEXT:    ;;#ASMEND
657; GFX908-NEXT:    v_readlane_b32 s23, v39, 14
658; GFX908-NEXT:    ;;#ASMSTART
659; GFX908-NEXT:    ; use s23
660; GFX908-NEXT:    ;;#ASMEND
661; GFX908-NEXT:    v_readlane_b32 s24, v39, 15
662; GFX908-NEXT:    ;;#ASMSTART
663; GFX908-NEXT:    ; use s24
664; GFX908-NEXT:    ;;#ASMEND
665; GFX908-NEXT:    v_readlane_b32 s25, v39, 16
666; GFX908-NEXT:    ;;#ASMSTART
667; GFX908-NEXT:    ; use s25
668; GFX908-NEXT:    ;;#ASMEND
669; GFX908-NEXT:    v_readlane_b32 s26, v39, 17
670; GFX908-NEXT:    ;;#ASMSTART
671; GFX908-NEXT:    ; use s26
672; GFX908-NEXT:    ;;#ASMEND
673; GFX908-NEXT:    v_readlane_b32 s27, v39, 18
674; GFX908-NEXT:    ;;#ASMSTART
675; GFX908-NEXT:    ; use s27
676; GFX908-NEXT:    ;;#ASMEND
677; GFX908-NEXT:    v_readlane_b32 s28, v39, 19
678; GFX908-NEXT:    ;;#ASMSTART
679; GFX908-NEXT:    ; use s28
680; GFX908-NEXT:    ;;#ASMEND
681; GFX908-NEXT:    v_readlane_b32 s29, v39, 20
682; GFX908-NEXT:    ;;#ASMSTART
683; GFX908-NEXT:    ; use s29
684; GFX908-NEXT:    ;;#ASMEND
685; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
686; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
687; GFX908-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
688; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
689; GFX908-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
690; GFX908-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
691; GFX908-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
692; GFX908-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
693; GFX908-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
694; GFX908-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
695; GFX908-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
696; GFX908-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
697; GFX908-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
698; GFX908-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
699; GFX908-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
700; GFX908-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
701; GFX908-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
702; GFX908-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
703; GFX908-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
704; GFX908-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
705; GFX908-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
706; GFX908-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
707; GFX908-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
708; GFX908-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
709; GFX908-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
710; GFX908-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
711; GFX908-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
712; GFX908-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
713; GFX908-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
714; GFX908-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
715; GFX908-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
716; GFX908-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
717; GFX908-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
718; GFX908-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
719; GFX908-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
720; GFX908-NEXT:    s_mov_b64 s[4:5], exec
721; GFX908-NEXT:    s_waitcnt vmcnt(0)
722; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[30:33] offset:112
723; GFX908-NEXT:    s_waitcnt vmcnt(0)
724; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[26:29] offset:96
725; GFX908-NEXT:    s_waitcnt vmcnt(0)
726; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[22:25] offset:80
727; GFX908-NEXT:    s_waitcnt vmcnt(0)
728; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[18:21] offset:64
729; GFX908-NEXT:    s_waitcnt vmcnt(0)
730; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[14:17] offset:48
731; GFX908-NEXT:    s_waitcnt vmcnt(0)
732; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[10:13] offset:32
733; GFX908-NEXT:    s_waitcnt vmcnt(0)
734; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[6:9] offset:16
735; GFX908-NEXT:    s_waitcnt vmcnt(0)
736; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
737; GFX908-NEXT:    s_waitcnt vmcnt(0)
738; GFX908-NEXT:    s_mov_b64 exec, 1
739; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
740; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
741; GFX908-NEXT:    s_waitcnt vmcnt(0)
742; GFX908-NEXT:    v_readlane_b32 s31, v0, 0
743; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
744; GFX908-NEXT:    s_waitcnt vmcnt(0)
745; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
746; GFX908-NEXT:    s_mov_b64 s[4:5], exec
747; GFX908-NEXT:    s_mov_b64 exec, 1
748; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
749; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
750; GFX908-NEXT:    s_waitcnt vmcnt(0)
751; GFX908-NEXT:    v_readlane_b32 s30, v0, 0
752; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
753; GFX908-NEXT:    s_waitcnt vmcnt(0)
754; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
755; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
756; GFX908-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
757; GFX908-NEXT:    s_mov_b32 s32, s33
758; GFX908-NEXT:    s_waitcnt vmcnt(0)
759; GFX908-NEXT:    v_readfirstlane_b32 s4, v0
760; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
761; GFX908-NEXT:    s_waitcnt vmcnt(0)
762; GFX908-NEXT:    v_readfirstlane_b32 s34, v0
763; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
764; GFX908-NEXT:    s_waitcnt vmcnt(0)
765; GFX908-NEXT:    v_readfirstlane_b32 s35, v0
766; GFX908-NEXT:    s_xor_saveexec_b64 s[6:7], -1
767; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
768; GFX908-NEXT:    s_mov_b64 exec, s[6:7]
769; GFX908-NEXT:    s_mov_b32 s33, s4
770; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
771; GFX908-NEXT:    s_setpc_b64 s[30:31]
772  %vreg0 = call <32 x float> asm sideeffect "; def $0", "=v"()
773  %v40 = call i32 asm sideeffect "; def $0","=${v40}"()
774
775  %s11 = call i32 asm sideeffect "; def $0","=${s11}"()
776  %s12 = call i32 asm sideeffect "; def $0","=${s12}"()
777  %s13 = call i32 asm sideeffect "; def $0","=${s13}"()
778  %s14 = call i32 asm sideeffect "; def $0","=${s14}"()
779  %s15 = call i32 asm sideeffect "; def $0","=${s15}"()
780  %s16 = call i32 asm sideeffect "; def $0","=${s16}"()
781  %s17 = call i32 asm sideeffect "; def $0","=${s17}"()
782  %s18 = call i32 asm sideeffect "; def $0","=${s18}"()
783  %s19 = call i32 asm sideeffect "; def $0","=${s19}"()
784  %s20 = call i32 asm sideeffect "; def $0","=${s20}"()
785  call void @foo()
786  call void asm sideeffect "; use $0","${s11}"(i32 %s11)
787  call void asm sideeffect "; use $0","${s12}"(i32 %s12)
788  call void asm sideeffect "; use $0","${s13}"(i32 %s13)
789  call void asm sideeffect "; use $0","${s14}"(i32 %s14)
790  call void asm sideeffect "; use $0","${s15}"(i32 %s15)
791  call void asm sideeffect "; use $0","${s16}"(i32 %s16)
792  call void asm sideeffect "; use $0","${s17}"(i32 %s17)
793  call void asm sideeffect "; use $0","${s18}"(i32 %s18)
794  call void asm sideeffect "; use $0","${s19}"(i32 %s19)
795  call void asm sideeffect "; use $0","${s20}"(i32 %s20)
796
797  %s21 = call i32 asm sideeffect "; def $0","=${s21}"()
798  %s22 = call i32 asm sideeffect "; def $0","=${s22}"()
799  %s23 = call i32 asm sideeffect "; def $0","=${s23}"()
800  %s24 = call i32 asm sideeffect "; def $0","=${s24}"()
801  %s25 = call i32 asm sideeffect "; def $0","=${s25}"()
802  %s26 = call i32 asm sideeffect "; def $0","=${s26}"()
803  %s27 = call i32 asm sideeffect "; def $0","=${s27}"()
804  %s28 = call i32 asm sideeffect "; def $0","=${s28}"()
805  %s29 = call i32 asm sideeffect "; def $0","=${s29}"()
806  call void @foo()
807  call void asm sideeffect "; use $0","${s21}"(i32 %s21)
808  call void asm sideeffect "; use $0","${s22}"(i32 %s22)
809  call void asm sideeffect "; use $0","${s23}"(i32 %s23)
810  call void asm sideeffect "; use $0","${s24}"(i32 %s24)
811  call void asm sideeffect "; use $0","${s25}"(i32 %s25)
812  call void asm sideeffect "; use $0","${s26}"(i32 %s26)
813  call void asm sideeffect "; use $0","${s27}"(i32 %s27)
814  call void asm sideeffect "; use $0","${s28}"(i32 %s28)
815  call void asm sideeffect "; use $0","${s29}"(i32 %s29)
816
817  call void @foo()
818
819  store volatile <32 x float> %vreg0, ptr %parg0
820
821  ret void
822}
823
824declare void @foo()
825
826attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"}
827
828!llvm.module.flags = !{!0}
829!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
830