xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/gfx_v9_0.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53 
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57 
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129 
130 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142 
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147 
148 enum ta_ras_gfx_subblock {
149 	/*CPC*/
150 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152 	TA_RAS_BLOCK__GFX_CPC_UCODE,
153 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160 	/* CPF*/
161 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164 	TA_RAS_BLOCK__GFX_CPF_TAG,
165 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166 	/* CPG*/
167 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170 	TA_RAS_BLOCK__GFX_CPG_TAG,
171 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172 	/* GDS*/
173 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180 	/* SPI*/
181 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182 	/* SQ*/
183 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
186 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
187 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189 	/* SQC (3 ranges)*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191 	/* SQC range 0*/
192 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203 	/* SQC range 1*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217 	/* SQC range 2*/
218 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232 	/* TA*/
233 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
234 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240 	/* TCA*/
241 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245 	/* TCC (5 sub-ranges)*/
246 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247 	/* TCC range 0*/
248 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258 	/* TCC range 1*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264 	/* TCC range 2*/
265 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276 	/* TCC range 3*/
277 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282 	/* TCC range 4*/
283 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290 	/* TCI*/
291 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292 	/* TCP*/
293 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302 	/* TD*/
303 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
304 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308 	/* EA (3 sub-ranges)*/
309 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
310 	/* EA range 0*/
311 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321 	/* EA range 1*/
322 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331 	/* EA range 2*/
332 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339 	/* UTC VM L2 bank*/
340 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341 	/* UTC VM walker*/
342 	TA_RAS_BLOCK__UTC_VML2_WALKER,
343 	/* UTC ATC L2 2MB cache*/
344 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345 	/* UTC ATC L2 4KB cache*/
346 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347 	TA_RAS_BLOCK__GFX_MAX
348 };
349 
350 struct ras_gfx_subblock {
351 	unsigned char *name;
352 	int ta_subblock;
353 	int hw_supported_error_type;
354 	int sw_supported_error_type;
355 };
356 
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359 		#subblock,                                                     \
360 		TA_RAS_BLOCK__##subblock,                                      \
361 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363 	}
364 
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383 			     0),
384 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385 			     0),
386 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394 			     0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398 			     0, 0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402 			     0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406 			     1),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408 			     0, 0, 0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420 			     0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424 			     0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426 			     0, 0, 0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428 			     0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430 			     0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436 			     0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438 			     0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440 			     0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450 			     1),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452 			     1),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454 			     1),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456 			     0),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458 			     0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471 			     0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474 			     0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476 			     0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478 			     0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538 
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560 
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575 
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603 
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614 
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637 
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653 
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660 
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680 
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697 
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712 
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717 
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729 
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741 
742 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
743 {
744 	static void *scratch_reg0;
745 	static void *scratch_reg1;
746 	static void *scratch_reg2;
747 	static void *scratch_reg3;
748 	static void *spare_int;
749 	static uint32_t grbm_cntl;
750 	static uint32_t grbm_idx;
751 
752 	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
753 	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
754 	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
755 	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
756 	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
757 
758 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
759 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
760 
761 	if (amdgpu_sriov_runtime(adev)) {
762 		pr_err("shouldn't call rlcg write register during runtime\n");
763 		return;
764 	}
765 
766 	if (offset == grbm_cntl || offset == grbm_idx) {
767 		if (offset  == grbm_cntl)
768 			writel(v, scratch_reg2);
769 		else if (offset == grbm_idx)
770 			writel(v, scratch_reg3);
771 
772 		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
773 	} else {
774 		uint32_t i = 0;
775 		uint32_t retries = 50000;
776 
777 		writel(v, scratch_reg0);
778 		writel(offset | 0x80000000, scratch_reg1);
779 		writel(1, spare_int);
780 		for (i = 0; i < retries; i++) {
781 			u32 tmp;
782 
783 			tmp = readl(scratch_reg1);
784 			if (!(tmp & 0x80000000))
785 				break;
786 
787 			udelay(10);
788 		}
789 		if (i >= retries)
790 			pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
791 	}
792 
793 }
794 
795 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
796 			       u32 v, u32 acc_flags, u32 hwip)
797 {
798 	if ((acc_flags & AMDGPU_REGS_RLC) &&
799 	    amdgpu_sriov_fullaccess(adev)) {
800 		gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
801 
802 		return;
803 	}
804 
805 	if (acc_flags & AMDGPU_REGS_NO_KIQ)
806 		WREG32_NO_KIQ(offset, v);
807 	else
808 		WREG32(offset, v);
809 }
810 
811 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
812 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
813 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
814 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
815 
816 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
817 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
818 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
819 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
821 				struct amdgpu_cu_info *cu_info);
822 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
823 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
824 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
825 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
826 					  void *ras_error_status);
827 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
828 				     void *inject_if);
829 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
830 
831 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
832 				uint64_t queue_mask)
833 {
834 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
835 	amdgpu_ring_write(kiq_ring,
836 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
837 		/* vmid_mask:0* queue_type:0 (KIQ) */
838 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
839 	amdgpu_ring_write(kiq_ring,
840 			lower_32_bits(queue_mask));	/* queue mask lo */
841 	amdgpu_ring_write(kiq_ring,
842 			upper_32_bits(queue_mask));	/* queue mask hi */
843 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
844 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
845 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
846 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
847 }
848 
849 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
850 				 struct amdgpu_ring *ring)
851 {
852 	struct amdgpu_device *adev = kiq_ring->adev;
853 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
854 	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
855 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
856 
857 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
858 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
859 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
860 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
861 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
862 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
863 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
864 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
865 			 /*queue_type: normal compute queue */
866 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
867 			 /* alloc format: all_on_one_pipe */
868 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
869 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
870 			 /* num_queues: must be 1 */
871 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
872 	amdgpu_ring_write(kiq_ring,
873 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
874 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
875 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
876 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
877 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
878 }
879 
880 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
881 				   struct amdgpu_ring *ring,
882 				   enum amdgpu_unmap_queues_action action,
883 				   u64 gpu_addr, u64 seq)
884 {
885 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
886 
887 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
888 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
890 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
891 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
892 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
893 	amdgpu_ring_write(kiq_ring,
894 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
895 
896 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
897 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
898 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
899 		amdgpu_ring_write(kiq_ring, seq);
900 	} else {
901 		amdgpu_ring_write(kiq_ring, 0);
902 		amdgpu_ring_write(kiq_ring, 0);
903 		amdgpu_ring_write(kiq_ring, 0);
904 	}
905 }
906 
907 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
908 				   struct amdgpu_ring *ring,
909 				   u64 addr,
910 				   u64 seq)
911 {
912 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
913 
914 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
915 	amdgpu_ring_write(kiq_ring,
916 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
917 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
918 			  PACKET3_QUERY_STATUS_COMMAND(2));
919 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920 	amdgpu_ring_write(kiq_ring,
921 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
922 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
923 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
924 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
925 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
926 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
927 }
928 
929 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
930 				uint16_t pasid, uint32_t flush_type,
931 				bool all_hub)
932 {
933 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
934 	amdgpu_ring_write(kiq_ring,
935 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
936 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
937 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
938 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
939 }
940 
941 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
942 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
943 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
944 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
945 	.kiq_query_status = gfx_v9_0_kiq_query_status,
946 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
947 	.set_resources_size = 8,
948 	.map_queues_size = 7,
949 	.unmap_queues_size = 6,
950 	.query_status_size = 7,
951 	.invalidate_tlbs_size = 2,
952 };
953 
954 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
955 {
956 	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
957 }
958 
959 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
960 {
961 	switch (adev->asic_type) {
962 	case CHIP_VEGA10:
963 		soc15_program_register_sequence(adev,
964 						golden_settings_gc_9_0,
965 						ARRAY_SIZE(golden_settings_gc_9_0));
966 		soc15_program_register_sequence(adev,
967 						golden_settings_gc_9_0_vg10,
968 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
969 		break;
970 	case CHIP_VEGA12:
971 		soc15_program_register_sequence(adev,
972 						golden_settings_gc_9_2_1,
973 						ARRAY_SIZE(golden_settings_gc_9_2_1));
974 		soc15_program_register_sequence(adev,
975 						golden_settings_gc_9_2_1_vg12,
976 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
977 		break;
978 	case CHIP_VEGA20:
979 		soc15_program_register_sequence(adev,
980 						golden_settings_gc_9_0,
981 						ARRAY_SIZE(golden_settings_gc_9_0));
982 		soc15_program_register_sequence(adev,
983 						golden_settings_gc_9_0_vg20,
984 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
985 		break;
986 	case CHIP_ARCTURUS:
987 		soc15_program_register_sequence(adev,
988 						golden_settings_gc_9_4_1_arct,
989 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
990 		break;
991 	case CHIP_RAVEN:
992 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
993 						ARRAY_SIZE(golden_settings_gc_9_1));
994 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
995 			soc15_program_register_sequence(adev,
996 							golden_settings_gc_9_1_rv2,
997 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
998 		else
999 			soc15_program_register_sequence(adev,
1000 							golden_settings_gc_9_1_rv1,
1001 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1002 		break;
1003 	 case CHIP_RENOIR:
1004 		soc15_program_register_sequence(adev,
1005 						golden_settings_gc_9_1_rn,
1006 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1007 		return; /* for renoir, don't need common goldensetting */
1008 	case CHIP_ALDEBARAN:
1009 		gfx_v9_4_2_init_golden_registers(adev,
1010 						 adev->smuio.funcs->get_die_id(adev));
1011 		break;
1012 	default:
1013 		break;
1014 	}
1015 
1016 	if ((adev->asic_type != CHIP_ARCTURUS) &&
1017 	    (adev->asic_type != CHIP_ALDEBARAN))
1018 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1019 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1020 }
1021 
1022 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1023 {
1024 	adev->gfx.scratch.num_reg = 8;
1025 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1026 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1027 }
1028 
1029 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1030 				       bool wc, uint32_t reg, uint32_t val)
1031 {
1032 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1033 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1034 				WRITE_DATA_DST_SEL(0) |
1035 				(wc ? WR_CONFIRM : 0));
1036 	amdgpu_ring_write(ring, reg);
1037 	amdgpu_ring_write(ring, 0);
1038 	amdgpu_ring_write(ring, val);
1039 }
1040 
1041 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1042 				  int mem_space, int opt, uint32_t addr0,
1043 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1044 				  uint32_t inv)
1045 {
1046 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1047 	amdgpu_ring_write(ring,
1048 				 /* memory (1) or register (0) */
1049 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1050 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1051 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1052 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1053 
1054 	if (mem_space)
1055 		BUG_ON(addr0 & 0x3); /* Dword align */
1056 	amdgpu_ring_write(ring, addr0);
1057 	amdgpu_ring_write(ring, addr1);
1058 	amdgpu_ring_write(ring, ref);
1059 	amdgpu_ring_write(ring, mask);
1060 	amdgpu_ring_write(ring, inv); /* poll interval */
1061 }
1062 
1063 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1064 {
1065 	struct amdgpu_device *adev = ring->adev;
1066 	uint32_t scratch;
1067 	uint32_t tmp = 0;
1068 	unsigned i;
1069 	int r;
1070 
1071 	r = amdgpu_gfx_scratch_get(adev, &scratch);
1072 	if (r)
1073 		return r;
1074 
1075 	WREG32(scratch, 0xCAFEDEAD);
1076 	r = amdgpu_ring_alloc(ring, 3);
1077 	if (r)
1078 		goto error_free_scratch;
1079 
1080 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1081 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1082 	amdgpu_ring_write(ring, 0xDEADBEEF);
1083 	amdgpu_ring_commit(ring);
1084 
1085 	for (i = 0; i < adev->usec_timeout; i++) {
1086 		tmp = RREG32(scratch);
1087 		if (tmp == 0xDEADBEEF)
1088 			break;
1089 		udelay(1);
1090 	}
1091 
1092 	if (i >= adev->usec_timeout)
1093 		r = -ETIMEDOUT;
1094 
1095 error_free_scratch:
1096 	amdgpu_gfx_scratch_free(adev, scratch);
1097 	return r;
1098 }
1099 
1100 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1101 {
1102 	struct amdgpu_device *adev = ring->adev;
1103 	struct amdgpu_ib ib;
1104 	struct dma_fence *f = NULL;
1105 
1106 	unsigned index;
1107 	uint64_t gpu_addr;
1108 	uint32_t tmp;
1109 	long r;
1110 
1111 	r = amdgpu_device_wb_get(adev, &index);
1112 	if (r)
1113 		return r;
1114 
1115 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1116 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1117 	memset(&ib, 0, sizeof(ib));
1118 	r = amdgpu_ib_get(adev, NULL, 16,
1119 					AMDGPU_IB_POOL_DIRECT, &ib);
1120 	if (r)
1121 		goto err1;
1122 
1123 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1124 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1125 	ib.ptr[2] = lower_32_bits(gpu_addr);
1126 	ib.ptr[3] = upper_32_bits(gpu_addr);
1127 	ib.ptr[4] = 0xDEADBEEF;
1128 	ib.length_dw = 5;
1129 
1130 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1131 	if (r)
1132 		goto err2;
1133 
1134 	r = dma_fence_wait_timeout(f, false, timeout);
1135 	if (r == 0) {
1136 		r = -ETIMEDOUT;
1137 		goto err2;
1138 	} else if (r < 0) {
1139 		goto err2;
1140 	}
1141 
1142 	tmp = adev->wb.wb[index];
1143 	if (tmp == 0xDEADBEEF)
1144 		r = 0;
1145 	else
1146 		r = -EINVAL;
1147 
1148 err2:
1149 	amdgpu_ib_free(adev, &ib, NULL);
1150 	dma_fence_put(f);
1151 err1:
1152 	amdgpu_device_wb_free(adev, index);
1153 	return r;
1154 }
1155 
1156 
1157 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1158 {
1159 	release_firmware(adev->gfx.pfp_fw);
1160 	adev->gfx.pfp_fw = NULL;
1161 	release_firmware(adev->gfx.me_fw);
1162 	adev->gfx.me_fw = NULL;
1163 	release_firmware(adev->gfx.ce_fw);
1164 	adev->gfx.ce_fw = NULL;
1165 	release_firmware(adev->gfx.rlc_fw);
1166 	adev->gfx.rlc_fw = NULL;
1167 	release_firmware(adev->gfx.mec_fw);
1168 	adev->gfx.mec_fw = NULL;
1169 	release_firmware(adev->gfx.mec2_fw);
1170 	adev->gfx.mec2_fw = NULL;
1171 
1172 	kfree(adev->gfx.rlc.register_list_format);
1173 }
1174 
1175 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1176 {
1177 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1178 
1179 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1180 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1181 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1182 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1183 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1184 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1185 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1186 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1187 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1188 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1189 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1190 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1191 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1192 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1193 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1194 }
1195 
1196 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1197 {
1198 	adev->gfx.me_fw_write_wait = false;
1199 	adev->gfx.mec_fw_write_wait = false;
1200 
1201 	if ((adev->asic_type != CHIP_ARCTURUS) &&
1202 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1203 	    (adev->gfx.mec_feature_version < 46) ||
1204 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1205 	    (adev->gfx.pfp_feature_version < 46)))
1206 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1207 
1208 	switch (adev->asic_type) {
1209 	case CHIP_VEGA10:
1210 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1211 		    (adev->gfx.me_feature_version >= 42) &&
1212 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1213 		    (adev->gfx.pfp_feature_version >= 42))
1214 			adev->gfx.me_fw_write_wait = true;
1215 
1216 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1217 		    (adev->gfx.mec_feature_version >= 42))
1218 			adev->gfx.mec_fw_write_wait = true;
1219 		break;
1220 	case CHIP_VEGA12:
1221 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1222 		    (adev->gfx.me_feature_version >= 44) &&
1223 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1224 		    (adev->gfx.pfp_feature_version >= 44))
1225 			adev->gfx.me_fw_write_wait = true;
1226 
1227 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1228 		    (adev->gfx.mec_feature_version >= 44))
1229 			adev->gfx.mec_fw_write_wait = true;
1230 		break;
1231 	case CHIP_VEGA20:
1232 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1233 		    (adev->gfx.me_feature_version >= 44) &&
1234 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1235 		    (adev->gfx.pfp_feature_version >= 44))
1236 			adev->gfx.me_fw_write_wait = true;
1237 
1238 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1239 		    (adev->gfx.mec_feature_version >= 44))
1240 			adev->gfx.mec_fw_write_wait = true;
1241 		break;
1242 	case CHIP_RAVEN:
1243 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1244 		    (adev->gfx.me_feature_version >= 42) &&
1245 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1246 		    (adev->gfx.pfp_feature_version >= 42))
1247 			adev->gfx.me_fw_write_wait = true;
1248 
1249 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1250 		    (adev->gfx.mec_feature_version >= 42))
1251 			adev->gfx.mec_fw_write_wait = true;
1252 		break;
1253 	default:
1254 		adev->gfx.me_fw_write_wait = true;
1255 		adev->gfx.mec_fw_write_wait = true;
1256 		break;
1257 	}
1258 }
1259 
1260 struct amdgpu_gfxoff_quirk {
1261 	u16 chip_vendor;
1262 	u16 chip_device;
1263 	u16 subsys_vendor;
1264 	u16 subsys_device;
1265 	u8 revision;
1266 };
1267 
1268 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1269 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1270 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1271 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1272 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1273 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1274 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1275 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1276 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1277 	{ 0, 0, 0, 0, 0 },
1278 };
1279 
1280 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1281 {
1282 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1283 
1284 	while (p && p->chip_device != 0) {
1285 		if (pdev->vendor == p->chip_vendor &&
1286 		    pdev->device == p->chip_device &&
1287 		    pdev->subsystem_vendor == p->subsys_vendor &&
1288 		    pdev->subsystem_device == p->subsys_device &&
1289 		    pdev->revision == p->revision) {
1290 			return true;
1291 		}
1292 		++p;
1293 	}
1294 	return false;
1295 }
1296 
1297 static bool is_raven_kicker(struct amdgpu_device *adev)
1298 {
1299 	if (adev->pm.fw_version >= 0x41e2b)
1300 		return true;
1301 	else
1302 		return false;
1303 }
1304 
1305 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1306 {
1307 	if ((adev->asic_type == CHIP_RENOIR) &&
1308 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1309 	    (adev->gfx.me_feature_version >= 52))
1310 		return true;
1311 	else
1312 		return false;
1313 }
1314 
1315 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1316 {
1317 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1318 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1319 
1320 	switch (adev->asic_type) {
1321 	case CHIP_VEGA10:
1322 	case CHIP_VEGA12:
1323 	case CHIP_VEGA20:
1324 		break;
1325 	case CHIP_RAVEN:
1326 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1327 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1328 		    ((!is_raven_kicker(adev) &&
1329 		      adev->gfx.rlc_fw_version < 531) ||
1330 		     (adev->gfx.rlc_feature_version < 1) ||
1331 		     !adev->gfx.rlc.is_rlc_v2_1))
1332 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1333 
1334 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1335 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1336 				AMD_PG_SUPPORT_CP |
1337 				AMD_PG_SUPPORT_RLC_SMU_HS;
1338 		break;
1339 	case CHIP_RENOIR:
1340 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1341 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1342 				AMD_PG_SUPPORT_CP |
1343 				AMD_PG_SUPPORT_RLC_SMU_HS;
1344 		break;
1345 	default:
1346 		break;
1347 	}
1348 }
1349 
1350 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1351 					  const char *chip_name)
1352 {
1353 	char fw_name[30];
1354 	int err;
1355 	struct amdgpu_firmware_info *info = NULL;
1356 	const struct common_firmware_header *header = NULL;
1357 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1358 
1359 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1360 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1361 	if (err)
1362 		goto out;
1363 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1364 	if (err)
1365 		goto out;
1366 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1367 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1368 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1369 
1370 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1371 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1372 	if (err)
1373 		goto out;
1374 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1375 	if (err)
1376 		goto out;
1377 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1378 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1379 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1380 
1381 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1382 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1383 	if (err)
1384 		goto out;
1385 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1386 	if (err)
1387 		goto out;
1388 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1389 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1390 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1391 
1392 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1393 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1394 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1395 		info->fw = adev->gfx.pfp_fw;
1396 		header = (const struct common_firmware_header *)info->fw->data;
1397 		adev->firmware.fw_size +=
1398 			roundup2(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1399 
1400 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1401 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1402 		info->fw = adev->gfx.me_fw;
1403 		header = (const struct common_firmware_header *)info->fw->data;
1404 		adev->firmware.fw_size +=
1405 			roundup2(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1406 
1407 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1408 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1409 		info->fw = adev->gfx.ce_fw;
1410 		header = (const struct common_firmware_header *)info->fw->data;
1411 		adev->firmware.fw_size +=
1412 			roundup2(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1413 	}
1414 
1415 out:
1416 	if (err) {
1417 		dev_err(adev->dev,
1418 			"gfx9: Failed to load firmware \"%s\"\n",
1419 			fw_name);
1420 		release_firmware(adev->gfx.pfp_fw);
1421 		adev->gfx.pfp_fw = NULL;
1422 		release_firmware(adev->gfx.me_fw);
1423 		adev->gfx.me_fw = NULL;
1424 		release_firmware(adev->gfx.ce_fw);
1425 		adev->gfx.ce_fw = NULL;
1426 	}
1427 	return err;
1428 }
1429 
1430 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1431 					  const char *chip_name)
1432 {
1433 	char fw_name[30];
1434 	int err;
1435 	struct amdgpu_firmware_info *info = NULL;
1436 	const struct common_firmware_header *header = NULL;
1437 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1438 	unsigned int *tmp = NULL;
1439 	unsigned int i = 0;
1440 	uint16_t version_major;
1441 	uint16_t version_minor;
1442 	uint32_t smu_version;
1443 
1444 	/*
1445 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1446 	 * instead of picasso_rlc.bin.
1447 	 * Judgment method:
1448 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1449 	 *          or revision >= 0xD8 && revision <= 0xDF
1450 	 * otherwise is PCO FP5
1451 	 */
1452 	if (!strcmp(chip_name, "picasso") &&
1453 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1454 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1455 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1456 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1457 		(smu_version >= 0x41e2b))
1458 		/**
1459 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1460 		*/
1461 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1462 	else
1463 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1464 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1465 	if (err)
1466 		goto out;
1467 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1468 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1469 
1470 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1471 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1472 	if (version_major == 2 && version_minor == 1)
1473 		adev->gfx.rlc.is_rlc_v2_1 = true;
1474 
1475 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1476 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1477 	adev->gfx.rlc.save_and_restore_offset =
1478 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1479 	adev->gfx.rlc.clear_state_descriptor_offset =
1480 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1481 	adev->gfx.rlc.avail_scratch_ram_locations =
1482 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1483 	adev->gfx.rlc.reg_restore_list_size =
1484 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1485 	adev->gfx.rlc.reg_list_format_start =
1486 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1487 	adev->gfx.rlc.reg_list_format_separate_start =
1488 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1489 	adev->gfx.rlc.starting_offsets_start =
1490 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1491 	adev->gfx.rlc.reg_list_format_size_bytes =
1492 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1493 	adev->gfx.rlc.reg_list_size_bytes =
1494 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1495 	adev->gfx.rlc.register_list_format =
1496 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1497 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1498 	if (!adev->gfx.rlc.register_list_format) {
1499 		err = -ENOMEM;
1500 		goto out;
1501 	}
1502 
1503 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1504 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1505 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1506 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1507 
1508 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1509 
1510 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1511 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1512 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1513 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1514 
1515 	if (adev->gfx.rlc.is_rlc_v2_1)
1516 		gfx_v9_0_init_rlc_ext_microcode(adev);
1517 
1518 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1519 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1520 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1521 		info->fw = adev->gfx.rlc_fw;
1522 		header = (const struct common_firmware_header *)info->fw->data;
1523 		adev->firmware.fw_size +=
1524 			roundup2(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1525 
1526 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1527 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1528 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1529 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1530 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1531 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1532 			info->fw = adev->gfx.rlc_fw;
1533 			adev->firmware.fw_size +=
1534 				roundup2(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1535 
1536 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1537 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1538 			info->fw = adev->gfx.rlc_fw;
1539 			adev->firmware.fw_size +=
1540 				roundup2(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1541 
1542 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1543 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1544 			info->fw = adev->gfx.rlc_fw;
1545 			adev->firmware.fw_size +=
1546 				roundup2(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1547 		}
1548 	}
1549 
1550 out:
1551 	if (err) {
1552 		dev_err(adev->dev,
1553 			"gfx9: Failed to load firmware \"%s\"\n",
1554 			fw_name);
1555 		release_firmware(adev->gfx.rlc_fw);
1556 		adev->gfx.rlc_fw = NULL;
1557 	}
1558 	return err;
1559 }
1560 
1561 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1562 {
1563 	if (adev->asic_type == CHIP_ALDEBARAN ||
1564 	    adev->asic_type == CHIP_ARCTURUS ||
1565 	    adev->asic_type == CHIP_RENOIR)
1566 		return false;
1567 
1568 	return true;
1569 }
1570 
1571 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1572 					  const char *chip_name)
1573 {
1574 	char fw_name[30];
1575 	int err;
1576 	struct amdgpu_firmware_info *info = NULL;
1577 	const struct common_firmware_header *header = NULL;
1578 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1579 
1580 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1581 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1582 	if (err)
1583 		goto out;
1584 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1585 	if (err)
1586 		goto out;
1587 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1588 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1589 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1590 
1591 
1592 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1593 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1594 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1595 		if (!err) {
1596 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1597 			if (err)
1598 				goto out;
1599 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1600 			adev->gfx.mec2_fw->data;
1601 			adev->gfx.mec2_fw_version =
1602 			le32_to_cpu(cp_hdr->header.ucode_version);
1603 			adev->gfx.mec2_feature_version =
1604 			le32_to_cpu(cp_hdr->ucode_feature_version);
1605 		} else {
1606 			err = 0;
1607 			adev->gfx.mec2_fw = NULL;
1608 		}
1609 	} else {
1610 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1611 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1612 	}
1613 
1614 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1615 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1616 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1617 		info->fw = adev->gfx.mec_fw;
1618 		header = (const struct common_firmware_header *)info->fw->data;
1619 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1620 		adev->firmware.fw_size +=
1621 			roundup2(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1622 
1623 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1624 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1625 		info->fw = adev->gfx.mec_fw;
1626 		adev->firmware.fw_size +=
1627 			roundup2(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1628 
1629 		if (adev->gfx.mec2_fw) {
1630 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1631 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1632 			info->fw = adev->gfx.mec2_fw;
1633 			header = (const struct common_firmware_header *)info->fw->data;
1634 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1635 			adev->firmware.fw_size +=
1636 				roundup2(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1637 
1638 			/* TODO: Determine if MEC2 JT FW loading can be removed
1639 				 for all GFX V9 asic and above */
1640 			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1641 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1642 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1643 				info->fw = adev->gfx.mec2_fw;
1644 				adev->firmware.fw_size +=
1645 					roundup2(le32_to_cpu(cp_hdr->jt_size) * 4,
1646 					PAGE_SIZE);
1647 			}
1648 		}
1649 	}
1650 
1651 out:
1652 	gfx_v9_0_check_if_need_gfxoff(adev);
1653 	gfx_v9_0_check_fw_write_wait(adev);
1654 	if (err) {
1655 		dev_err(adev->dev,
1656 			"gfx9: Failed to load firmware \"%s\"\n",
1657 			fw_name);
1658 		release_firmware(adev->gfx.mec_fw);
1659 		adev->gfx.mec_fw = NULL;
1660 		release_firmware(adev->gfx.mec2_fw);
1661 		adev->gfx.mec2_fw = NULL;
1662 	}
1663 	return err;
1664 }
1665 
1666 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1667 {
1668 	const char *chip_name;
1669 	int r;
1670 
1671 	DRM_DEBUG("\n");
1672 
1673 	switch (adev->asic_type) {
1674 	case CHIP_VEGA10:
1675 		chip_name = "vega10";
1676 		break;
1677 	case CHIP_VEGA12:
1678 		chip_name = "vega12";
1679 		break;
1680 	case CHIP_VEGA20:
1681 		chip_name = "vega20";
1682 		break;
1683 	case CHIP_RAVEN:
1684 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1685 			chip_name = "raven2";
1686 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1687 			chip_name = "picasso";
1688 		else
1689 			chip_name = "raven";
1690 		break;
1691 	case CHIP_ARCTURUS:
1692 		chip_name = "arcturus";
1693 		break;
1694 	case CHIP_RENOIR:
1695 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1696 			chip_name = "renoir";
1697 		else
1698 			chip_name = "green_sardine";
1699 		break;
1700 	case CHIP_ALDEBARAN:
1701 		chip_name = "aldebaran";
1702 		break;
1703 	default:
1704 		BUG();
1705 	}
1706 
1707 	/* No CPG in Arcturus */
1708 	if (adev->gfx.num_gfx_rings) {
1709 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1710 		if (r)
1711 			return r;
1712 	}
1713 
1714 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1715 	if (r)
1716 		return r;
1717 
1718 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1719 	if (r)
1720 		return r;
1721 
1722 	return r;
1723 }
1724 
1725 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1726 {
1727 	u32 count = 0;
1728 	const struct cs_section_def *sect = NULL;
1729 	const struct cs_extent_def *ext = NULL;
1730 
1731 	/* begin clear state */
1732 	count += 2;
1733 	/* context control state */
1734 	count += 3;
1735 
1736 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1737 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1738 			if (sect->id == SECT_CONTEXT)
1739 				count += 2 + ext->reg_count;
1740 			else
1741 				return 0;
1742 		}
1743 	}
1744 
1745 	/* end clear state */
1746 	count += 2;
1747 	/* clear state */
1748 	count += 2;
1749 
1750 	return count;
1751 }
1752 
1753 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1754 				    volatile u32 *buffer)
1755 {
1756 	u32 count = 0, i;
1757 	const struct cs_section_def *sect = NULL;
1758 	const struct cs_extent_def *ext = NULL;
1759 
1760 	if (adev->gfx.rlc.cs_data == NULL)
1761 		return;
1762 	if (buffer == NULL)
1763 		return;
1764 
1765 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1766 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1767 
1768 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1769 	buffer[count++] = cpu_to_le32(0x80000000);
1770 	buffer[count++] = cpu_to_le32(0x80000000);
1771 
1772 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1773 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1774 			if (sect->id == SECT_CONTEXT) {
1775 				buffer[count++] =
1776 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1777 				buffer[count++] = cpu_to_le32(ext->reg_index -
1778 						PACKET3_SET_CONTEXT_REG_START);
1779 				for (i = 0; i < ext->reg_count; i++)
1780 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1781 			} else {
1782 				return;
1783 			}
1784 		}
1785 	}
1786 
1787 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1788 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1789 
1790 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1791 	buffer[count++] = cpu_to_le32(0);
1792 }
1793 
1794 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1795 {
1796 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1797 	uint32_t pg_always_on_cu_num = 2;
1798 	uint32_t always_on_cu_num;
1799 	uint32_t i, j, k;
1800 	uint32_t mask, cu_bitmap, counter;
1801 
1802 	if (adev->flags & AMD_IS_APU)
1803 		always_on_cu_num = 4;
1804 	else if (adev->asic_type == CHIP_VEGA12)
1805 		always_on_cu_num = 8;
1806 	else
1807 		always_on_cu_num = 12;
1808 
1809 	mutex_lock(&adev->grbm_idx_mutex);
1810 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1811 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1812 			mask = 1;
1813 			cu_bitmap = 0;
1814 			counter = 0;
1815 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1816 
1817 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1818 				if (cu_info->bitmap[i][j] & mask) {
1819 					if (counter == pg_always_on_cu_num)
1820 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1821 					if (counter < always_on_cu_num)
1822 						cu_bitmap |= mask;
1823 					else
1824 						break;
1825 					counter++;
1826 				}
1827 				mask <<= 1;
1828 			}
1829 
1830 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1831 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1832 		}
1833 	}
1834 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1835 	mutex_unlock(&adev->grbm_idx_mutex);
1836 }
1837 
1838 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1839 {
1840 	uint32_t data;
1841 
1842 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1843 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1844 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1845 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1846 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1847 
1848 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1849 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1850 
1851 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1852 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1853 
1854 	mutex_lock(&adev->grbm_idx_mutex);
1855 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1856 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1857 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1858 
1859 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1860 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1861 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1862 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1863 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1864 
1865 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1866 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1867 	data &= 0x0000FFFF;
1868 	data |= 0x00C00000;
1869 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1870 
1871 	/*
1872 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1873 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1874 	 */
1875 
1876 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1877 	 * but used for RLC_LB_CNTL configuration */
1878 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1879 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1880 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1881 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1882 	mutex_unlock(&adev->grbm_idx_mutex);
1883 
1884 	gfx_v9_0_init_always_on_cu_mask(adev);
1885 }
1886 
1887 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1888 {
1889 	uint32_t data;
1890 
1891 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1892 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1893 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1894 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1895 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1896 
1897 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1898 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1899 
1900 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1901 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1902 
1903 	mutex_lock(&adev->grbm_idx_mutex);
1904 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1905 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1906 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1907 
1908 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1909 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1910 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1911 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1912 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1913 
1914 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1915 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1916 	data &= 0x0000FFFF;
1917 	data |= 0x00C00000;
1918 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1919 
1920 	/*
1921 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1922 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1923 	 */
1924 
1925 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1926 	 * but used for RLC_LB_CNTL configuration */
1927 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1928 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1929 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1930 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1931 	mutex_unlock(&adev->grbm_idx_mutex);
1932 
1933 	gfx_v9_0_init_always_on_cu_mask(adev);
1934 }
1935 
1936 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1937 {
1938 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1939 }
1940 
1941 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1942 {
1943 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1944 		return 5;
1945 	else
1946 		return 4;
1947 }
1948 
1949 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1950 {
1951 	const struct cs_section_def *cs_data;
1952 	int r;
1953 
1954 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1955 
1956 	cs_data = adev->gfx.rlc.cs_data;
1957 
1958 	if (cs_data) {
1959 		/* init clear state block */
1960 		r = amdgpu_gfx_rlc_init_csb(adev);
1961 		if (r)
1962 			return r;
1963 	}
1964 
1965 	if (adev->flags & AMD_IS_APU) {
1966 		/* TODO: double check the cp_table_size for RV */
1967 		adev->gfx.rlc.cp_table_size = roundup2(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1968 		r = amdgpu_gfx_rlc_init_cpt(adev);
1969 		if (r)
1970 			return r;
1971 	}
1972 
1973 	switch (adev->asic_type) {
1974 	case CHIP_RAVEN:
1975 		gfx_v9_0_init_lbpw(adev);
1976 		break;
1977 	case CHIP_VEGA20:
1978 		gfx_v9_4_init_lbpw(adev);
1979 		break;
1980 	default:
1981 		break;
1982 	}
1983 
1984 	/* init spm vmid with 0xf */
1985 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1986 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1987 
1988 	return 0;
1989 }
1990 
1991 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1992 {
1993 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1994 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1995 }
1996 
1997 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1998 {
1999 	int r;
2000 	u32 *hpd;
2001 	const __le32 *fw_data;
2002 	unsigned fw_size;
2003 	u32 *fw;
2004 	size_t mec_hpd_size;
2005 
2006 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2007 
2008 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2009 
2010 	/* take ownership of the relevant compute queues */
2011 	amdgpu_gfx_compute_queue_acquire(adev);
2012 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2013 	if (mec_hpd_size) {
2014 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2015 					      AMDGPU_GEM_DOMAIN_VRAM,
2016 					      &adev->gfx.mec.hpd_eop_obj,
2017 					      &adev->gfx.mec.hpd_eop_gpu_addr,
2018 					      (void **)&hpd);
2019 		if (r) {
2020 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2021 			gfx_v9_0_mec_fini(adev);
2022 			return r;
2023 		}
2024 
2025 		memset(hpd, 0, mec_hpd_size);
2026 
2027 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2028 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2029 	}
2030 
2031 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2032 
2033 	fw_data = (const __le32 *)
2034 		(adev->gfx.mec_fw->data +
2035 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2036 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2037 
2038 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2039 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2040 				      &adev->gfx.mec.mec_fw_obj,
2041 				      &adev->gfx.mec.mec_fw_gpu_addr,
2042 				      (void **)&fw);
2043 	if (r) {
2044 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2045 		gfx_v9_0_mec_fini(adev);
2046 		return r;
2047 	}
2048 
2049 	memcpy(fw, fw_data, fw_size);
2050 
2051 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2052 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2053 
2054 	return 0;
2055 }
2056 
2057 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2058 {
2059 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2060 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2061 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2062 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2063 		(SQ_IND_INDEX__FORCE_READ_MASK));
2064 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2065 }
2066 
2067 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2068 			   uint32_t wave, uint32_t thread,
2069 			   uint32_t regno, uint32_t num, uint32_t *out)
2070 {
2071 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2072 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2073 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2074 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2075 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2076 		(SQ_IND_INDEX__FORCE_READ_MASK) |
2077 		(SQ_IND_INDEX__AUTO_INCR_MASK));
2078 	while (num--)
2079 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2080 }
2081 
2082 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2083 {
2084 	/* type 1 wave data */
2085 	dst[(*no_fields)++] = 1;
2086 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2087 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2088 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2089 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2090 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2091 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2092 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2093 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2094 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2095 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2096 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2097 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2098 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2099 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2100 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2101 }
2102 
2103 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2104 				     uint32_t wave, uint32_t start,
2105 				     uint32_t size, uint32_t *dst)
2106 {
2107 	wave_read_regs(
2108 		adev, simd, wave, 0,
2109 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2110 }
2111 
2112 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2113 				     uint32_t wave, uint32_t thread,
2114 				     uint32_t start, uint32_t size,
2115 				     uint32_t *dst)
2116 {
2117 	wave_read_regs(
2118 		adev, simd, wave, thread,
2119 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2120 }
2121 
2122 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2123 				  u32 me, u32 pipe, u32 q, u32 vm)
2124 {
2125 	soc15_grbm_select(adev, me, pipe, q, vm);
2126 }
2127 
2128 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2129         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2130         .select_se_sh = &gfx_v9_0_select_se_sh,
2131         .read_wave_data = &gfx_v9_0_read_wave_data,
2132         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2133         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2134         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2135 };
2136 
2137 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2138 	.ras_late_init = amdgpu_gfx_ras_late_init,
2139 	.ras_fini = amdgpu_gfx_ras_fini,
2140 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2141 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2142 	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2143 };
2144 
2145 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2146 {
2147 	u32 gb_addr_config;
2148 	int err;
2149 
2150 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2151 
2152 	switch (adev->asic_type) {
2153 	case CHIP_VEGA10:
2154 		adev->gfx.config.max_hw_contexts = 8;
2155 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2156 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2157 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2158 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2159 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2160 		break;
2161 	case CHIP_VEGA12:
2162 		adev->gfx.config.max_hw_contexts = 8;
2163 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2164 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2165 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2166 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2167 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2168 		DRM_INFO("fix gfx.config for vega12\n");
2169 		break;
2170 	case CHIP_VEGA20:
2171 		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2172 		adev->gfx.config.max_hw_contexts = 8;
2173 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2174 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2175 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2176 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2177 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2178 		gb_addr_config &= ~0xf3e777ff;
2179 		gb_addr_config |= 0x22014042;
2180 		/* check vbios table if gpu info is not available */
2181 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2182 		if (err)
2183 			return err;
2184 		break;
2185 	case CHIP_RAVEN:
2186 		adev->gfx.config.max_hw_contexts = 8;
2187 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2188 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2189 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2190 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2191 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2192 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2193 		else
2194 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2195 		break;
2196 	case CHIP_ARCTURUS:
2197 		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2198 		adev->gfx.config.max_hw_contexts = 8;
2199 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2200 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2201 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2202 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2203 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2204 		gb_addr_config &= ~0xf3e777ff;
2205 		gb_addr_config |= 0x22014042;
2206 		break;
2207 	case CHIP_RENOIR:
2208 		adev->gfx.config.max_hw_contexts = 8;
2209 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2210 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2211 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2212 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2213 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2214 		gb_addr_config &= ~0xf3e777ff;
2215 		gb_addr_config |= 0x22010042;
2216 		break;
2217 	case CHIP_ALDEBARAN:
2218 		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2219 		adev->gfx.config.max_hw_contexts = 8;
2220 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2221 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2222 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2223 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2224 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2225 		gb_addr_config &= ~0xf3e777ff;
2226 		gb_addr_config |= 0x22014042;
2227 		/* check vbios table if gpu info is not available */
2228 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2229 		if (err)
2230 			return err;
2231 		break;
2232 	default:
2233 		BUG();
2234 		break;
2235 	}
2236 
2237 	adev->gfx.config.gb_addr_config = gb_addr_config;
2238 
2239 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2240 			REG_GET_FIELD(
2241 					adev->gfx.config.gb_addr_config,
2242 					GB_ADDR_CONFIG,
2243 					NUM_PIPES);
2244 
2245 	adev->gfx.config.max_tile_pipes =
2246 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2247 
2248 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2249 			REG_GET_FIELD(
2250 					adev->gfx.config.gb_addr_config,
2251 					GB_ADDR_CONFIG,
2252 					NUM_BANKS);
2253 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2254 			REG_GET_FIELD(
2255 					adev->gfx.config.gb_addr_config,
2256 					GB_ADDR_CONFIG,
2257 					MAX_COMPRESSED_FRAGS);
2258 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2259 			REG_GET_FIELD(
2260 					adev->gfx.config.gb_addr_config,
2261 					GB_ADDR_CONFIG,
2262 					NUM_RB_PER_SE);
2263 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2264 			REG_GET_FIELD(
2265 					adev->gfx.config.gb_addr_config,
2266 					GB_ADDR_CONFIG,
2267 					NUM_SHADER_ENGINES);
2268 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2269 			REG_GET_FIELD(
2270 					adev->gfx.config.gb_addr_config,
2271 					GB_ADDR_CONFIG,
2272 					PIPE_INTERLEAVE_SIZE));
2273 
2274 	return 0;
2275 }
2276 
2277 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2278 				      int mec, int pipe, int queue)
2279 {
2280 	unsigned irq_type;
2281 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2282 	unsigned int hw_prio;
2283 
2284 	ring = &adev->gfx.compute_ring[ring_id];
2285 
2286 	/* mec0 is me1 */
2287 	ring->me = mec + 1;
2288 	ring->pipe = pipe;
2289 	ring->queue = queue;
2290 
2291 	ring->ring_obj = NULL;
2292 	ring->use_doorbell = true;
2293 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2294 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2295 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2296 	snprintf(ring->name, sizeof(ring->name), "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2297 
2298 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2299 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2300 		+ ring->pipe;
2301 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2302 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2303 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2304 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2305 				hw_prio, NULL);
2306 }
2307 
2308 static int gfx_v9_0_sw_init(void *handle)
2309 {
2310 	int i, j, k, r, ring_id;
2311 	struct amdgpu_ring *ring;
2312 	struct amdgpu_kiq *kiq;
2313 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2314 
2315 	switch (adev->asic_type) {
2316 	case CHIP_VEGA10:
2317 	case CHIP_VEGA12:
2318 	case CHIP_VEGA20:
2319 	case CHIP_RAVEN:
2320 	case CHIP_ARCTURUS:
2321 	case CHIP_RENOIR:
2322 	case CHIP_ALDEBARAN:
2323 		adev->gfx.mec.num_mec = 2;
2324 		break;
2325 	default:
2326 		adev->gfx.mec.num_mec = 1;
2327 		break;
2328 	}
2329 
2330 	adev->gfx.mec.num_pipe_per_mec = 4;
2331 	adev->gfx.mec.num_queue_per_pipe = 8;
2332 
2333 	/* EOP Event */
2334 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2335 	if (r)
2336 		return r;
2337 
2338 	/* Privileged reg */
2339 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2340 			      &adev->gfx.priv_reg_irq);
2341 	if (r)
2342 		return r;
2343 
2344 	/* Privileged inst */
2345 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2346 			      &adev->gfx.priv_inst_irq);
2347 	if (r)
2348 		return r;
2349 
2350 	/* ECC error */
2351 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2352 			      &adev->gfx.cp_ecc_error_irq);
2353 	if (r)
2354 		return r;
2355 
2356 	/* FUE error */
2357 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2358 			      &adev->gfx.cp_ecc_error_irq);
2359 	if (r)
2360 		return r;
2361 
2362 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2363 
2364 	gfx_v9_0_scratch_init(adev);
2365 
2366 	r = gfx_v9_0_init_microcode(adev);
2367 	if (r) {
2368 		DRM_ERROR("Failed to load gfx firmware!\n");
2369 		return r;
2370 	}
2371 
2372 	r = adev->gfx.rlc.funcs->init(adev);
2373 	if (r) {
2374 		DRM_ERROR("Failed to init rlc BOs!\n");
2375 		return r;
2376 	}
2377 
2378 	r = gfx_v9_0_mec_init(adev);
2379 	if (r) {
2380 		DRM_ERROR("Failed to init MEC BOs!\n");
2381 		return r;
2382 	}
2383 
2384 	/* set up the gfx ring */
2385 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2386 		ring = &adev->gfx.gfx_ring[i];
2387 		ring->ring_obj = NULL;
2388 		if (!i)
2389 			snprintf(ring->name, sizeof(ring->name), "gfx");
2390 		else
2391 			snprintf(ring->name, sizeof(ring->name), "gfx_%d", i);
2392 		ring->use_doorbell = true;
2393 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2394 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2395 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2396 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2397 		if (r)
2398 			return r;
2399 	}
2400 
2401 	/* set up the compute queues - allocate horizontally across pipes */
2402 	ring_id = 0;
2403 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2404 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2405 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2406 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2407 					continue;
2408 
2409 				r = gfx_v9_0_compute_ring_init(adev,
2410 							       ring_id,
2411 							       i, k, j);
2412 				if (r)
2413 					return r;
2414 
2415 				ring_id++;
2416 			}
2417 		}
2418 	}
2419 
2420 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2421 	if (r) {
2422 		DRM_ERROR("Failed to init KIQ BOs!\n");
2423 		return r;
2424 	}
2425 
2426 	kiq = &adev->gfx.kiq;
2427 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2428 	if (r)
2429 		return r;
2430 
2431 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2432 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2433 	if (r)
2434 		return r;
2435 
2436 	adev->gfx.ce_ram_size = 0x8000;
2437 
2438 	r = gfx_v9_0_gpu_early_init(adev);
2439 	if (r)
2440 		return r;
2441 
2442 	return 0;
2443 }
2444 
2445 
2446 static int gfx_v9_0_sw_fini(void *handle)
2447 {
2448 	int i;
2449 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2450 
2451 	if (adev->gfx.ras_funcs &&
2452 	    adev->gfx.ras_funcs->ras_fini)
2453 		adev->gfx.ras_funcs->ras_fini(adev);
2454 
2455 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2456 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2457 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2458 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2459 
2460 	amdgpu_gfx_mqd_sw_fini(adev);
2461 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2462 	amdgpu_gfx_kiq_fini(adev);
2463 
2464 	gfx_v9_0_mec_fini(adev);
2465 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2466 	if (adev->flags & AMD_IS_APU) {
2467 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2468 				&adev->gfx.rlc.cp_table_gpu_addr,
2469 				(void **)&adev->gfx.rlc.cp_table_ptr);
2470 	}
2471 	gfx_v9_0_free_microcode(adev);
2472 
2473 	return 0;
2474 }
2475 
2476 
2477 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2478 {
2479 	/* TODO */
2480 }
2481 
2482 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2483 			   u32 instance)
2484 {
2485 	u32 data;
2486 
2487 	if (instance == 0xffffffff)
2488 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2489 	else
2490 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2491 
2492 	if (se_num == 0xffffffff)
2493 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2494 	else
2495 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2496 
2497 	if (sh_num == 0xffffffff)
2498 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2499 	else
2500 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2501 
2502 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2503 }
2504 
2505 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2506 {
2507 	u32 data, mask;
2508 
2509 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2510 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2511 
2512 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2513 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2514 
2515 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2516 					 adev->gfx.config.max_sh_per_se);
2517 
2518 	return (~data) & mask;
2519 }
2520 
2521 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2522 {
2523 	int i, j;
2524 	u32 data;
2525 	u32 active_rbs = 0;
2526 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2527 					adev->gfx.config.max_sh_per_se;
2528 
2529 	mutex_lock(&adev->grbm_idx_mutex);
2530 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2531 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2532 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2533 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2534 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2535 					       rb_bitmap_width_per_sh);
2536 		}
2537 	}
2538 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2539 	mutex_unlock(&adev->grbm_idx_mutex);
2540 
2541 	adev->gfx.config.backend_enable_mask = active_rbs;
2542 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2543 }
2544 
2545 #define DEFAULT_SH_MEM_BASES	(0x6000)
2546 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2547 {
2548 	int i;
2549 	uint32_t sh_mem_config;
2550 	uint32_t sh_mem_bases;
2551 
2552 	/*
2553 	 * Configure apertures:
2554 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2555 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2556 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2557 	 */
2558 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2559 
2560 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2561 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2562 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2563 
2564 	mutex_lock(&adev->srbm_mutex);
2565 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2566 		soc15_grbm_select(adev, 0, 0, 0, i);
2567 		/* CP and shaders */
2568 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2569 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2570 	}
2571 	soc15_grbm_select(adev, 0, 0, 0, 0);
2572 	mutex_unlock(&adev->srbm_mutex);
2573 
2574 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2575 	   acccess. These should be enabled by FW for target VMIDs. */
2576 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2577 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2578 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2579 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2580 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2581 	}
2582 }
2583 
2584 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2585 {
2586 	int vmid;
2587 
2588 	/*
2589 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2590 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2591 	 * the driver can enable them for graphics. VMID0 should maintain
2592 	 * access so that HWS firmware can save/restore entries.
2593 	 */
2594 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2595 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2596 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2597 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2598 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2599 	}
2600 }
2601 
2602 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2603 {
2604 	uint32_t tmp;
2605 
2606 	switch (adev->asic_type) {
2607 	case CHIP_ARCTURUS:
2608 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2609 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2610 					DISABLE_BARRIER_WAITCNT, 1);
2611 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2612 		break;
2613 	default:
2614 		break;
2615 	}
2616 }
2617 
2618 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2619 {
2620 	u32 tmp;
2621 	int i;
2622 
2623 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2624 
2625 	gfx_v9_0_tiling_mode_table_init(adev);
2626 
2627 	gfx_v9_0_setup_rb(adev);
2628 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2629 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2630 
2631 	/* XXX SH_MEM regs */
2632 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2633 	mutex_lock(&adev->srbm_mutex);
2634 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2635 		soc15_grbm_select(adev, 0, 0, 0, i);
2636 		/* CP and shaders */
2637 		if (i == 0) {
2638 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2639 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2640 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2641 					    !!adev->gmc.noretry);
2642 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2643 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2644 		} else {
2645 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648 					    !!adev->gmc.noretry);
2649 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2651 				(adev->gmc.private_aperture_start >> 48));
2652 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2653 				(adev->gmc.shared_aperture_start >> 48));
2654 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2655 		}
2656 	}
2657 	soc15_grbm_select(adev, 0, 0, 0, 0);
2658 
2659 	mutex_unlock(&adev->srbm_mutex);
2660 
2661 	gfx_v9_0_init_compute_vmid(adev);
2662 	gfx_v9_0_init_gds_vmid(adev);
2663 	gfx_v9_0_init_sq_config(adev);
2664 }
2665 
2666 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2667 {
2668 	u32 i, j, k;
2669 	u32 mask;
2670 
2671 	mutex_lock(&adev->grbm_idx_mutex);
2672 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2673 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2674 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2675 			for (k = 0; k < adev->usec_timeout; k++) {
2676 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2677 					break;
2678 				udelay(1);
2679 			}
2680 			if (k == adev->usec_timeout) {
2681 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2682 						      0xffffffff, 0xffffffff);
2683 				mutex_unlock(&adev->grbm_idx_mutex);
2684 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2685 					 i, j);
2686 				return;
2687 			}
2688 		}
2689 	}
2690 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2691 	mutex_unlock(&adev->grbm_idx_mutex);
2692 
2693 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2694 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2695 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2696 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2697 	for (k = 0; k < adev->usec_timeout; k++) {
2698 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2699 			break;
2700 		udelay(1);
2701 	}
2702 }
2703 
2704 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2705 					       bool enable)
2706 {
2707 	u32 tmp;
2708 
2709 	/* These interrupts should be enabled to drive DS clock */
2710 
2711 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2712 
2713 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2714 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2715 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 	if(adev->gfx.num_gfx_rings)
2717 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2718 
2719 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2720 }
2721 
2722 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2723 {
2724 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2725 	/* csib */
2726 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2727 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2728 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2729 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2730 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2731 			adev->gfx.rlc.clear_state_size);
2732 }
2733 
2734 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2735 				int indirect_offset,
2736 				int list_size,
2737 				int *unique_indirect_regs,
2738 				int unique_indirect_reg_count,
2739 				int *indirect_start_offsets,
2740 				int *indirect_start_offsets_count,
2741 				int max_start_offsets_count)
2742 {
2743 	int idx;
2744 
2745 	for (; indirect_offset < list_size; indirect_offset++) {
2746 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2747 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2748 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2749 
2750 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2751 			indirect_offset += 2;
2752 
2753 			/* look for the matching indice */
2754 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2755 				if (unique_indirect_regs[idx] ==
2756 					register_list_format[indirect_offset] ||
2757 					!unique_indirect_regs[idx])
2758 					break;
2759 			}
2760 
2761 			BUG_ON(idx >= unique_indirect_reg_count);
2762 
2763 			if (!unique_indirect_regs[idx])
2764 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2765 
2766 			indirect_offset++;
2767 		}
2768 	}
2769 }
2770 
2771 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2772 {
2773 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2774 	int unique_indirect_reg_count = 0;
2775 
2776 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2777 	int indirect_start_offsets_count = 0;
2778 
2779 	int list_size = 0;
2780 	int i = 0, j = 0;
2781 	u32 tmp = 0;
2782 
2783 	u32 *register_list_format =
2784 		kmemdup(adev->gfx.rlc.register_list_format,
2785 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2786 	if (!register_list_format)
2787 		return -ENOMEM;
2788 
2789 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2790 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2791 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2792 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2793 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2794 				    unique_indirect_regs,
2795 				    unique_indirect_reg_count,
2796 				    indirect_start_offsets,
2797 				    &indirect_start_offsets_count,
2798 				    ARRAY_SIZE(indirect_start_offsets));
2799 
2800 	/* enable auto inc in case it is disabled */
2801 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2802 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2803 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2804 
2805 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2806 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2807 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2808 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2809 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2810 			adev->gfx.rlc.register_restore[i]);
2811 
2812 	/* load indirect register */
2813 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2814 		adev->gfx.rlc.reg_list_format_start);
2815 
2816 	/* direct register portion */
2817 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2818 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2819 			register_list_format[i]);
2820 
2821 	/* indirect register portion */
2822 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2823 		if (register_list_format[i] == 0xFFFFFFFF) {
2824 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2825 			continue;
2826 		}
2827 
2828 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2829 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2830 
2831 		for (j = 0; j < unique_indirect_reg_count; j++) {
2832 			if (register_list_format[i] == unique_indirect_regs[j]) {
2833 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2834 				break;
2835 			}
2836 		}
2837 
2838 		BUG_ON(j >= unique_indirect_reg_count);
2839 
2840 		i++;
2841 	}
2842 
2843 	/* set save/restore list size */
2844 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2845 	list_size = list_size >> 1;
2846 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2847 		adev->gfx.rlc.reg_restore_list_size);
2848 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2849 
2850 	/* write the starting offsets to RLC scratch ram */
2851 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2852 		adev->gfx.rlc.starting_offsets_start);
2853 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2854 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2855 		       indirect_start_offsets[i]);
2856 
2857 	/* load unique indirect regs*/
2858 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2859 		if (unique_indirect_regs[i] != 0) {
2860 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2861 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2862 			       unique_indirect_regs[i] & 0x3FFFF);
2863 
2864 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2865 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2866 			       unique_indirect_regs[i] >> 20);
2867 		}
2868 	}
2869 
2870 	kfree(register_list_format);
2871 	return 0;
2872 }
2873 
2874 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2875 {
2876 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2877 }
2878 
2879 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2880 					     bool enable)
2881 {
2882 	uint32_t data = 0;
2883 	uint32_t default_data = 0;
2884 
2885 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2886 	if (enable) {
2887 		/* enable GFXIP control over CGPG */
2888 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2889 		if(default_data != data)
2890 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2891 
2892 		/* update status */
2893 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2894 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2895 		if(default_data != data)
2896 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2897 	} else {
2898 		/* restore GFXIP control over GCPG */
2899 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2900 		if(default_data != data)
2901 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2902 	}
2903 }
2904 
2905 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2906 {
2907 	uint32_t data = 0;
2908 
2909 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2910 			      AMD_PG_SUPPORT_GFX_SMG |
2911 			      AMD_PG_SUPPORT_GFX_DMG)) {
2912 		/* init IDLE_POLL_COUNT = 60 */
2913 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2914 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2915 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2916 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2917 
2918 		/* init RLC PG Delay */
2919 		data = 0;
2920 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2921 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2922 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2923 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2924 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2925 
2926 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2927 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2928 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2929 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2930 
2931 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2932 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2933 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2934 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2935 
2936 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2937 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2938 
2939 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2940 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2941 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2942 		if (adev->asic_type != CHIP_RENOIR)
2943 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2944 	}
2945 }
2946 
2947 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2948 						bool enable)
2949 {
2950 	uint32_t data = 0;
2951 	uint32_t default_data = 0;
2952 
2953 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2954 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2955 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2956 			     enable ? 1 : 0);
2957 	if (default_data != data)
2958 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2959 }
2960 
2961 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2962 						bool enable)
2963 {
2964 	uint32_t data = 0;
2965 	uint32_t default_data = 0;
2966 
2967 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2968 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2969 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2970 			     enable ? 1 : 0);
2971 	if(default_data != data)
2972 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2973 }
2974 
2975 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2976 					bool enable)
2977 {
2978 	uint32_t data = 0;
2979 	uint32_t default_data = 0;
2980 
2981 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2982 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2983 			     CP_PG_DISABLE,
2984 			     enable ? 0 : 1);
2985 	if(default_data != data)
2986 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2987 }
2988 
2989 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2990 						bool enable)
2991 {
2992 	uint32_t data, default_data;
2993 
2994 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2995 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2996 			     GFX_POWER_GATING_ENABLE,
2997 			     enable ? 1 : 0);
2998 	if(default_data != data)
2999 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3000 }
3001 
3002 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3003 						bool enable)
3004 {
3005 	uint32_t data, default_data;
3006 
3007 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3008 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3009 			     GFX_PIPELINE_PG_ENABLE,
3010 			     enable ? 1 : 0);
3011 	if(default_data != data)
3012 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3013 
3014 	if (!enable)
3015 		/* read any GFX register to wake up GFX */
3016 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3017 }
3018 
3019 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3020 						       bool enable)
3021 {
3022 	uint32_t data, default_data;
3023 
3024 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3025 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3026 			     STATIC_PER_CU_PG_ENABLE,
3027 			     enable ? 1 : 0);
3028 	if(default_data != data)
3029 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3030 }
3031 
3032 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3033 						bool enable)
3034 {
3035 	uint32_t data, default_data;
3036 
3037 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3038 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3039 			     DYN_PER_CU_PG_ENABLE,
3040 			     enable ? 1 : 0);
3041 	if(default_data != data)
3042 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3043 }
3044 
3045 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3046 {
3047 	gfx_v9_0_init_csb(adev);
3048 
3049 	/*
3050 	 * Rlc save restore list is workable since v2_1.
3051 	 * And it's needed by gfxoff feature.
3052 	 */
3053 	if (adev->gfx.rlc.is_rlc_v2_1) {
3054 		if (adev->asic_type == CHIP_VEGA12 ||
3055 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3056 			gfx_v9_1_init_rlc_save_restore_list(adev);
3057 		gfx_v9_0_enable_save_restore_machine(adev);
3058 	}
3059 
3060 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3061 			      AMD_PG_SUPPORT_GFX_SMG |
3062 			      AMD_PG_SUPPORT_GFX_DMG |
3063 			      AMD_PG_SUPPORT_CP |
3064 			      AMD_PG_SUPPORT_GDS |
3065 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3066 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3067 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3068 		gfx_v9_0_init_gfx_power_gating(adev);
3069 	}
3070 }
3071 
3072 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3073 {
3074 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3075 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3076 	gfx_v9_0_wait_for_rlc_serdes(adev);
3077 }
3078 
3079 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3080 {
3081 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3082 	udelay(50);
3083 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3084 	udelay(50);
3085 }
3086 
3087 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3088 {
3089 #ifdef AMDGPU_RLC_DEBUG_RETRY
3090 	u32 rlc_ucode_ver;
3091 #endif
3092 
3093 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3094 	udelay(50);
3095 
3096 	/* carrizo do enable cp interrupt after cp inited */
3097 	if (!(adev->flags & AMD_IS_APU)) {
3098 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3099 		udelay(50);
3100 	}
3101 
3102 #ifdef AMDGPU_RLC_DEBUG_RETRY
3103 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3104 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3105 	if(rlc_ucode_ver == 0x108) {
3106 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3107 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3108 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3109 		 * default is 0x9C4 to create a 100us interval */
3110 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3111 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3112 		 * to disable the page fault retry interrupts, default is
3113 		 * 0x100 (256) */
3114 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3115 	}
3116 #endif
3117 }
3118 
3119 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3120 {
3121 	const struct rlc_firmware_header_v2_0 *hdr;
3122 	const __le32 *fw_data;
3123 	unsigned i, fw_size;
3124 
3125 	if (!adev->gfx.rlc_fw)
3126 		return -EINVAL;
3127 
3128 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3129 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3130 
3131 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3132 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3133 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3134 
3135 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3136 			RLCG_UCODE_LOADING_START_ADDRESS);
3137 	for (i = 0; i < fw_size; i++)
3138 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3139 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3140 
3141 	return 0;
3142 }
3143 
3144 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3145 {
3146 	int r;
3147 
3148 	if (amdgpu_sriov_vf(adev)) {
3149 		gfx_v9_0_init_csb(adev);
3150 		return 0;
3151 	}
3152 
3153 	adev->gfx.rlc.funcs->stop(adev);
3154 
3155 	/* disable CG */
3156 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3157 
3158 	gfx_v9_0_init_pg(adev);
3159 
3160 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3161 		/* legacy rlc firmware loading */
3162 		r = gfx_v9_0_rlc_load_microcode(adev);
3163 		if (r)
3164 			return r;
3165 	}
3166 
3167 	switch (adev->asic_type) {
3168 	case CHIP_RAVEN:
3169 		if (amdgpu_lbpw == 0)
3170 			gfx_v9_0_enable_lbpw(adev, false);
3171 		else
3172 			gfx_v9_0_enable_lbpw(adev, true);
3173 		break;
3174 	case CHIP_VEGA20:
3175 		if (amdgpu_lbpw > 0)
3176 			gfx_v9_0_enable_lbpw(adev, true);
3177 		else
3178 			gfx_v9_0_enable_lbpw(adev, false);
3179 		break;
3180 	default:
3181 		break;
3182 	}
3183 
3184 	adev->gfx.rlc.funcs->start(adev);
3185 
3186 	return 0;
3187 }
3188 
3189 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3190 {
3191 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3192 
3193 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3194 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3195 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3196 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3197 	udelay(50);
3198 }
3199 
3200 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3201 {
3202 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3203 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3204 	const struct gfx_firmware_header_v1_0 *me_hdr;
3205 	const __le32 *fw_data;
3206 	unsigned i, fw_size;
3207 
3208 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3209 		return -EINVAL;
3210 
3211 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3212 		adev->gfx.pfp_fw->data;
3213 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3214 		adev->gfx.ce_fw->data;
3215 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3216 		adev->gfx.me_fw->data;
3217 
3218 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3219 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3220 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3221 
3222 	gfx_v9_0_cp_gfx_enable(adev, false);
3223 
3224 	/* PFP */
3225 	fw_data = (const __le32 *)
3226 		(adev->gfx.pfp_fw->data +
3227 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3228 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3229 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3230 	for (i = 0; i < fw_size; i++)
3231 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3232 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3233 
3234 	/* CE */
3235 	fw_data = (const __le32 *)
3236 		(adev->gfx.ce_fw->data +
3237 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3238 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3239 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3240 	for (i = 0; i < fw_size; i++)
3241 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3242 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3243 
3244 	/* ME */
3245 	fw_data = (const __le32 *)
3246 		(adev->gfx.me_fw->data +
3247 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3248 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3249 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3250 	for (i = 0; i < fw_size; i++)
3251 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3252 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3253 
3254 	return 0;
3255 }
3256 
3257 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3258 {
3259 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3260 	const struct cs_section_def *sect = NULL;
3261 	const struct cs_extent_def *ext = NULL;
3262 	int r, i, tmp;
3263 
3264 	/* init the CP */
3265 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3266 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3267 
3268 	gfx_v9_0_cp_gfx_enable(adev, true);
3269 
3270 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3271 	if (r) {
3272 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3273 		return r;
3274 	}
3275 
3276 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3277 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3278 
3279 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3280 	amdgpu_ring_write(ring, 0x80000000);
3281 	amdgpu_ring_write(ring, 0x80000000);
3282 
3283 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3284 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3285 			if (sect->id == SECT_CONTEXT) {
3286 				amdgpu_ring_write(ring,
3287 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3288 					       ext->reg_count));
3289 				amdgpu_ring_write(ring,
3290 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3291 				for (i = 0; i < ext->reg_count; i++)
3292 					amdgpu_ring_write(ring, ext->extent[i]);
3293 			}
3294 		}
3295 	}
3296 
3297 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299 
3300 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3301 	amdgpu_ring_write(ring, 0);
3302 
3303 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3304 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3305 	amdgpu_ring_write(ring, 0x8000);
3306 	amdgpu_ring_write(ring, 0x8000);
3307 
3308 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3309 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3310 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3311 	amdgpu_ring_write(ring, tmp);
3312 	amdgpu_ring_write(ring, 0);
3313 
3314 	amdgpu_ring_commit(ring);
3315 
3316 	return 0;
3317 }
3318 
3319 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3320 {
3321 	struct amdgpu_ring *ring;
3322 	u32 tmp;
3323 	u32 rb_bufsz;
3324 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3325 
3326 	/* Set the write pointer delay */
3327 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3328 
3329 	/* set the RB to use vmid 0 */
3330 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3331 
3332 	/* Set ring buffer size */
3333 	ring = &adev->gfx.gfx_ring[0];
3334 	rb_bufsz = order_base_2(ring->ring_size / 8);
3335 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3336 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3337 #ifdef __BIG_ENDIAN
3338 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3339 #endif
3340 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3341 
3342 	/* Initialize the ring buffer's write pointers */
3343 	ring->wptr = 0;
3344 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3345 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3346 
3347 	/* set the wb address wether it's enabled or not */
3348 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3349 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3350 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3351 
3352 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3353 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3354 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3355 
3356 	mdelay(1);
3357 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3358 
3359 	rb_addr = ring->gpu_addr >> 8;
3360 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3361 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3362 
3363 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3364 	if (ring->use_doorbell) {
3365 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3366 				    DOORBELL_OFFSET, ring->doorbell_index);
3367 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3368 				    DOORBELL_EN, 1);
3369 	} else {
3370 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3371 	}
3372 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3373 
3374 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3375 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3376 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3377 
3378 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3379 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3380 
3381 
3382 	/* start the ring */
3383 	gfx_v9_0_cp_gfx_start(adev);
3384 	ring->sched.ready = true;
3385 
3386 	return 0;
3387 }
3388 
3389 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3390 {
3391 	if (enable) {
3392 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3393 	} else {
3394 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3395 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3396 		adev->gfx.kiq.ring.sched.ready = false;
3397 	}
3398 	udelay(50);
3399 }
3400 
3401 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3402 {
3403 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3404 	const __le32 *fw_data;
3405 	unsigned i;
3406 	u32 tmp;
3407 
3408 	if (!adev->gfx.mec_fw)
3409 		return -EINVAL;
3410 
3411 	gfx_v9_0_cp_compute_enable(adev, false);
3412 
3413 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3414 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3415 
3416 	fw_data = (const __le32 *)
3417 		(adev->gfx.mec_fw->data +
3418 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3419 	tmp = 0;
3420 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3421 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3422 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3423 
3424 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3425 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3426 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3427 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3428 
3429 	/* MEC1 */
3430 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3431 			 mec_hdr->jt_offset);
3432 	for (i = 0; i < mec_hdr->jt_size; i++)
3433 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3434 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3435 
3436 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3437 			adev->gfx.mec_fw_version);
3438 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3439 
3440 	return 0;
3441 }
3442 
3443 /* KIQ functions */
3444 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3445 {
3446 	uint32_t tmp;
3447 	struct amdgpu_device *adev = ring->adev;
3448 
3449 	/* tell RLC which is KIQ queue */
3450 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3451 	tmp &= 0xffffff00;
3452 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3453 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3454 	tmp |= 0x80;
3455 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3456 }
3457 
3458 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3459 {
3460 	struct amdgpu_device *adev = ring->adev;
3461 
3462 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3463 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3464 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3465 			mqd->cp_hqd_queue_priority =
3466 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3467 		}
3468 	}
3469 }
3470 
3471 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3472 {
3473 	struct amdgpu_device *adev = ring->adev;
3474 	struct v9_mqd *mqd = ring->mqd_ptr;
3475 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3476 	uint32_t tmp;
3477 
3478 	mqd->header = 0xC0310800;
3479 	mqd->compute_pipelinestat_enable = 0x00000001;
3480 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3481 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3482 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3483 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3484 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3485 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3486 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3487 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3488 	mqd->compute_misc_reserved = 0x00000003;
3489 
3490 	mqd->dynamic_cu_mask_addr_lo =
3491 		lower_32_bits(ring->mqd_gpu_addr
3492 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3493 	mqd->dynamic_cu_mask_addr_hi =
3494 		upper_32_bits(ring->mqd_gpu_addr
3495 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3496 
3497 	eop_base_addr = ring->eop_gpu_addr >> 8;
3498 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3499 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3500 
3501 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3502 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3503 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3504 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3505 
3506 	mqd->cp_hqd_eop_control = tmp;
3507 
3508 	/* enable doorbell? */
3509 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3510 
3511 	if (ring->use_doorbell) {
3512 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3513 				    DOORBELL_OFFSET, ring->doorbell_index);
3514 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3515 				    DOORBELL_EN, 1);
3516 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3517 				    DOORBELL_SOURCE, 0);
3518 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3519 				    DOORBELL_HIT, 0);
3520 	} else {
3521 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522 					 DOORBELL_EN, 0);
3523 	}
3524 
3525 	mqd->cp_hqd_pq_doorbell_control = tmp;
3526 
3527 	/* disable the queue if it's active */
3528 	ring->wptr = 0;
3529 	mqd->cp_hqd_dequeue_request = 0;
3530 	mqd->cp_hqd_pq_rptr = 0;
3531 	mqd->cp_hqd_pq_wptr_lo = 0;
3532 	mqd->cp_hqd_pq_wptr_hi = 0;
3533 
3534 	/* set the pointer to the MQD */
3535 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3536 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3537 
3538 	/* set MQD vmid to 0 */
3539 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3540 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3541 	mqd->cp_mqd_control = tmp;
3542 
3543 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3544 	hqd_gpu_addr = ring->gpu_addr >> 8;
3545 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3546 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3547 
3548 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3549 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3550 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3551 			    (order_base_2(ring->ring_size / 4) - 1));
3552 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3553 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3554 #ifdef __BIG_ENDIAN
3555 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3556 #endif
3557 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3558 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3559 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3561 	mqd->cp_hqd_pq_control = tmp;
3562 
3563 	/* set the wb address whether it's enabled or not */
3564 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3565 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3566 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3567 		upper_32_bits(wb_gpu_addr) & 0xffff;
3568 
3569 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3570 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3571 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3572 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3573 
3574 	tmp = 0;
3575 	/* enable the doorbell if requested */
3576 	if (ring->use_doorbell) {
3577 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3578 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3579 				DOORBELL_OFFSET, ring->doorbell_index);
3580 
3581 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3582 					 DOORBELL_EN, 1);
3583 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3584 					 DOORBELL_SOURCE, 0);
3585 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3586 					 DOORBELL_HIT, 0);
3587 	}
3588 
3589 	mqd->cp_hqd_pq_doorbell_control = tmp;
3590 
3591 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3592 	ring->wptr = 0;
3593 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3594 
3595 	/* set the vmid for the queue */
3596 	mqd->cp_hqd_vmid = 0;
3597 
3598 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3599 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3600 	mqd->cp_hqd_persistent_state = tmp;
3601 
3602 	/* set MIN_IB_AVAIL_SIZE */
3603 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3604 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3605 	mqd->cp_hqd_ib_control = tmp;
3606 
3607 	/* set static priority for a queue/ring */
3608 	gfx_v9_0_mqd_set_priority(ring, mqd);
3609 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3610 
3611 	/* map_queues packet doesn't need activate the queue,
3612 	 * so only kiq need set this field.
3613 	 */
3614 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3615 		mqd->cp_hqd_active = 1;
3616 
3617 	return 0;
3618 }
3619 
3620 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3621 {
3622 	struct amdgpu_device *adev = ring->adev;
3623 	struct v9_mqd *mqd = ring->mqd_ptr;
3624 	int j;
3625 
3626 	/* disable wptr polling */
3627 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3628 
3629 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3630 	       mqd->cp_hqd_eop_base_addr_lo);
3631 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3632 	       mqd->cp_hqd_eop_base_addr_hi);
3633 
3634 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3635 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3636 	       mqd->cp_hqd_eop_control);
3637 
3638 	/* enable doorbell? */
3639 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3640 	       mqd->cp_hqd_pq_doorbell_control);
3641 
3642 	/* disable the queue if it's active */
3643 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3644 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3645 		for (j = 0; j < adev->usec_timeout; j++) {
3646 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3647 				break;
3648 			udelay(1);
3649 		}
3650 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3651 		       mqd->cp_hqd_dequeue_request);
3652 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3653 		       mqd->cp_hqd_pq_rptr);
3654 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3655 		       mqd->cp_hqd_pq_wptr_lo);
3656 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3657 		       mqd->cp_hqd_pq_wptr_hi);
3658 	}
3659 
3660 	/* set the pointer to the MQD */
3661 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3662 	       mqd->cp_mqd_base_addr_lo);
3663 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3664 	       mqd->cp_mqd_base_addr_hi);
3665 
3666 	/* set MQD vmid to 0 */
3667 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3668 	       mqd->cp_mqd_control);
3669 
3670 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3672 	       mqd->cp_hqd_pq_base_lo);
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3674 	       mqd->cp_hqd_pq_base_hi);
3675 
3676 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3677 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3678 	       mqd->cp_hqd_pq_control);
3679 
3680 	/* set the wb address whether it's enabled or not */
3681 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3682 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3683 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3684 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3685 
3686 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3687 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3688 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3689 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3690 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3691 
3692 	/* enable the doorbell if requested */
3693 	if (ring->use_doorbell) {
3694 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3695 					(adev->doorbell_index.kiq * 2) << 2);
3696 		/* If GC has entered CGPG, ringing doorbell > first page
3697 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3698 		 * workaround this issue. And this change has to align with firmware
3699 		 * update.
3700 		 */
3701 		if (check_if_enlarge_doorbell_range(adev))
3702 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3703 					(adev->doorbell.size - 4));
3704 		else
3705 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3706 					(adev->doorbell_index.userqueue_end * 2) << 2);
3707 	}
3708 
3709 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3710 	       mqd->cp_hqd_pq_doorbell_control);
3711 
3712 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3713 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3714 	       mqd->cp_hqd_pq_wptr_lo);
3715 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3716 	       mqd->cp_hqd_pq_wptr_hi);
3717 
3718 	/* set the vmid for the queue */
3719 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3720 
3721 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3722 	       mqd->cp_hqd_persistent_state);
3723 
3724 	/* activate the queue */
3725 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3726 	       mqd->cp_hqd_active);
3727 
3728 	if (ring->use_doorbell)
3729 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3730 
3731 	return 0;
3732 }
3733 
3734 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3735 {
3736 	struct amdgpu_device *adev = ring->adev;
3737 	int j;
3738 
3739 	/* disable the queue if it's active */
3740 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3741 
3742 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3743 
3744 		for (j = 0; j < adev->usec_timeout; j++) {
3745 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3746 				break;
3747 			udelay(1);
3748 		}
3749 
3750 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3751 			DRM_DEBUG("KIQ dequeue request failed.\n");
3752 
3753 			/* Manual disable if dequeue request times out */
3754 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3755 		}
3756 
3757 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3758 		      0);
3759 	}
3760 
3761 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3762 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3763 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3764 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3765 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3766 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3767 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3768 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3769 
3770 	return 0;
3771 }
3772 
3773 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3774 {
3775 	struct amdgpu_device *adev = ring->adev;
3776 	struct v9_mqd *mqd = ring->mqd_ptr;
3777 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3778 	struct v9_mqd *tmp_mqd;
3779 
3780 	gfx_v9_0_kiq_setting(ring);
3781 
3782 	/* GPU could be in bad state during probe, driver trigger the reset
3783 	 * after load the SMU, in this case , the mqd is not be initialized.
3784 	 * driver need to re-init the mqd.
3785 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3786 	 */
3787 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3788 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3789 		/* for GPU_RESET case , reset MQD to a clean status */
3790 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3791 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3792 
3793 		/* reset ring buffer */
3794 		ring->wptr = 0;
3795 		amdgpu_ring_clear_ring(ring);
3796 
3797 		mutex_lock(&adev->srbm_mutex);
3798 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3799 		gfx_v9_0_kiq_init_register(ring);
3800 		soc15_grbm_select(adev, 0, 0, 0, 0);
3801 		mutex_unlock(&adev->srbm_mutex);
3802 	} else {
3803 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3804 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3805 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3806 		mutex_lock(&adev->srbm_mutex);
3807 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3808 		gfx_v9_0_mqd_init(ring);
3809 		gfx_v9_0_kiq_init_register(ring);
3810 		soc15_grbm_select(adev, 0, 0, 0, 0);
3811 		mutex_unlock(&adev->srbm_mutex);
3812 
3813 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3814 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3815 	}
3816 
3817 	return 0;
3818 }
3819 
3820 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3821 {
3822 	struct amdgpu_device *adev = ring->adev;
3823 	struct v9_mqd *mqd = ring->mqd_ptr;
3824 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3825 	struct v9_mqd *tmp_mqd;
3826 
3827 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3828 	 * is not be initialized before
3829 	 */
3830 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3831 
3832 	if (!tmp_mqd->cp_hqd_pq_control ||
3833 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3834 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3835 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3836 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3837 		mutex_lock(&adev->srbm_mutex);
3838 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3839 		gfx_v9_0_mqd_init(ring);
3840 		soc15_grbm_select(adev, 0, 0, 0, 0);
3841 		mutex_unlock(&adev->srbm_mutex);
3842 
3843 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3844 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3845 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3846 		/* reset MQD to a clean status */
3847 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3848 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3849 
3850 		/* reset ring buffer */
3851 		ring->wptr = 0;
3852 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3853 		amdgpu_ring_clear_ring(ring);
3854 	} else {
3855 		amdgpu_ring_clear_ring(ring);
3856 	}
3857 
3858 	return 0;
3859 }
3860 
3861 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3862 {
3863 	struct amdgpu_ring *ring;
3864 	int r;
3865 
3866 	ring = &adev->gfx.kiq.ring;
3867 
3868 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3869 	if (unlikely(r != 0))
3870 		return r;
3871 
3872 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3873 	if (unlikely(r != 0))
3874 		return r;
3875 
3876 	gfx_v9_0_kiq_init_queue(ring);
3877 	amdgpu_bo_kunmap(ring->mqd_obj);
3878 	ring->mqd_ptr = NULL;
3879 	amdgpu_bo_unreserve(ring->mqd_obj);
3880 	ring->sched.ready = true;
3881 	return 0;
3882 }
3883 
3884 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3885 {
3886 	struct amdgpu_ring *ring = NULL;
3887 	int r = 0, i;
3888 
3889 	gfx_v9_0_cp_compute_enable(adev, true);
3890 
3891 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3892 		ring = &adev->gfx.compute_ring[i];
3893 
3894 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3895 		if (unlikely(r != 0))
3896 			goto done;
3897 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3898 		if (!r) {
3899 			r = gfx_v9_0_kcq_init_queue(ring);
3900 			amdgpu_bo_kunmap(ring->mqd_obj);
3901 			ring->mqd_ptr = NULL;
3902 		}
3903 		amdgpu_bo_unreserve(ring->mqd_obj);
3904 		if (r)
3905 			goto done;
3906 	}
3907 
3908 	r = amdgpu_gfx_enable_kcq(adev);
3909 done:
3910 	return r;
3911 }
3912 
3913 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3914 {
3915 	int r, i;
3916 	struct amdgpu_ring *ring;
3917 
3918 	if (!(adev->flags & AMD_IS_APU))
3919 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3920 
3921 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3922 		if (adev->gfx.num_gfx_rings) {
3923 			/* legacy firmware loading */
3924 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3925 			if (r)
3926 				return r;
3927 		}
3928 
3929 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3930 		if (r)
3931 			return r;
3932 	}
3933 
3934 	r = gfx_v9_0_kiq_resume(adev);
3935 	if (r)
3936 		return r;
3937 
3938 	if (adev->gfx.num_gfx_rings) {
3939 		r = gfx_v9_0_cp_gfx_resume(adev);
3940 		if (r)
3941 			return r;
3942 	}
3943 
3944 	r = gfx_v9_0_kcq_resume(adev);
3945 	if (r)
3946 		return r;
3947 
3948 	if (adev->gfx.num_gfx_rings) {
3949 		ring = &adev->gfx.gfx_ring[0];
3950 		r = amdgpu_ring_test_helper(ring);
3951 		if (r)
3952 			return r;
3953 	}
3954 
3955 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3956 		ring = &adev->gfx.compute_ring[i];
3957 		amdgpu_ring_test_helper(ring);
3958 	}
3959 
3960 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3961 
3962 	return 0;
3963 }
3964 
3965 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3966 {
3967 	u32 tmp;
3968 
3969 	if (adev->asic_type != CHIP_ARCTURUS &&
3970 	    adev->asic_type != CHIP_ALDEBARAN)
3971 		return;
3972 
3973 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3974 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3975 				adev->df.hash_status.hash_64k);
3976 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3977 				adev->df.hash_status.hash_2m);
3978 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3979 				adev->df.hash_status.hash_1g);
3980 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3981 }
3982 
3983 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3984 {
3985 	if (adev->gfx.num_gfx_rings)
3986 		gfx_v9_0_cp_gfx_enable(adev, enable);
3987 	gfx_v9_0_cp_compute_enable(adev, enable);
3988 }
3989 
3990 static int gfx_v9_0_hw_init(void *handle)
3991 {
3992 	int r;
3993 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3994 
3995 	if (!amdgpu_sriov_vf(adev))
3996 		gfx_v9_0_init_golden_registers(adev);
3997 
3998 	gfx_v9_0_constants_init(adev);
3999 
4000 	gfx_v9_0_init_tcp_config(adev);
4001 
4002 	r = adev->gfx.rlc.funcs->resume(adev);
4003 	if (r)
4004 		return r;
4005 
4006 	r = gfx_v9_0_cp_resume(adev);
4007 	if (r)
4008 		return r;
4009 
4010 	if (adev->asic_type == CHIP_ALDEBARAN)
4011 		gfx_v9_4_2_set_power_brake_sequence(adev);
4012 
4013 	return r;
4014 }
4015 
4016 static int gfx_v9_0_hw_fini(void *handle)
4017 {
4018 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4019 
4020 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4021 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4022 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4023 
4024 	/* DF freeze and kcq disable will fail */
4025 	if (!amdgpu_ras_intr_triggered())
4026 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4027 		amdgpu_gfx_disable_kcq(adev);
4028 
4029 	if (amdgpu_sriov_vf(adev)) {
4030 		gfx_v9_0_cp_gfx_enable(adev, false);
4031 		/* must disable polling for SRIOV when hw finished, otherwise
4032 		 * CPC engine may still keep fetching WB address which is already
4033 		 * invalid after sw finished and trigger DMAR reading error in
4034 		 * hypervisor side.
4035 		 */
4036 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4037 		return 0;
4038 	}
4039 
4040 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4041 	 * otherwise KIQ is hanging when binding back
4042 	 */
4043 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4044 		mutex_lock(&adev->srbm_mutex);
4045 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4046 				adev->gfx.kiq.ring.pipe,
4047 				adev->gfx.kiq.ring.queue, 0);
4048 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4049 		soc15_grbm_select(adev, 0, 0, 0, 0);
4050 		mutex_unlock(&adev->srbm_mutex);
4051 	}
4052 
4053 	gfx_v9_0_cp_enable(adev, false);
4054 
4055 	/* Skip suspend with A+A reset */
4056 	if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4057 		dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4058 		return 0;
4059 	}
4060 
4061 	adev->gfx.rlc.funcs->stop(adev);
4062 	return 0;
4063 }
4064 
4065 static int gfx_v9_0_suspend(void *handle)
4066 {
4067 	return gfx_v9_0_hw_fini(handle);
4068 }
4069 
4070 static int gfx_v9_0_resume(void *handle)
4071 {
4072 	return gfx_v9_0_hw_init(handle);
4073 }
4074 
4075 static bool gfx_v9_0_is_idle(void *handle)
4076 {
4077 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4078 
4079 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4080 				GRBM_STATUS, GUI_ACTIVE))
4081 		return false;
4082 	else
4083 		return true;
4084 }
4085 
4086 static int gfx_v9_0_wait_for_idle(void *handle)
4087 {
4088 	unsigned i;
4089 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4090 
4091 	for (i = 0; i < adev->usec_timeout; i++) {
4092 		if (gfx_v9_0_is_idle(handle))
4093 			return 0;
4094 		udelay(1);
4095 	}
4096 	return -ETIMEDOUT;
4097 }
4098 
4099 static int gfx_v9_0_soft_reset(void *handle)
4100 {
4101 	u32 grbm_soft_reset = 0;
4102 	u32 tmp;
4103 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4104 
4105 	/* GRBM_STATUS */
4106 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4107 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4108 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4109 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4110 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4111 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4112 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4113 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4114 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4115 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4116 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4117 	}
4118 
4119 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4120 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4121 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4122 	}
4123 
4124 	/* GRBM_STATUS2 */
4125 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4126 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4127 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4128 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4129 
4130 
4131 	if (grbm_soft_reset) {
4132 		/* stop the rlc */
4133 		adev->gfx.rlc.funcs->stop(adev);
4134 
4135 		if (adev->gfx.num_gfx_rings)
4136 			/* Disable GFX parsing/prefetching */
4137 			gfx_v9_0_cp_gfx_enable(adev, false);
4138 
4139 		/* Disable MEC parsing/prefetching */
4140 		gfx_v9_0_cp_compute_enable(adev, false);
4141 
4142 		if (grbm_soft_reset) {
4143 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4144 			tmp |= grbm_soft_reset;
4145 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4146 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4147 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4148 
4149 			udelay(50);
4150 
4151 			tmp &= ~grbm_soft_reset;
4152 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4153 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4154 		}
4155 
4156 		/* Wait a little for things to settle down */
4157 		udelay(50);
4158 	}
4159 	return 0;
4160 }
4161 
4162 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4163 {
4164 	signed long r, cnt = 0;
4165 	unsigned long flags;
4166 	uint32_t seq, reg_val_offs = 0;
4167 	uint64_t value = 0;
4168 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4169 	struct amdgpu_ring *ring = &kiq->ring;
4170 
4171 	BUG_ON(!ring->funcs->emit_rreg);
4172 
4173 	spin_lock_irqsave(&kiq->ring_lock, flags);
4174 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4175 		pr_err("critical bug! too many kiq readers\n");
4176 		goto failed_unlock;
4177 	}
4178 	amdgpu_ring_alloc(ring, 32);
4179 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4180 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4181 				(5 << 8) |	/* dst: memory */
4182 				(1 << 16) |	/* count sel */
4183 				(1 << 20));	/* write confirm */
4184 	amdgpu_ring_write(ring, 0);
4185 	amdgpu_ring_write(ring, 0);
4186 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4187 				reg_val_offs * 4));
4188 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4189 				reg_val_offs * 4));
4190 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4191 	if (r)
4192 		goto failed_undo;
4193 
4194 	amdgpu_ring_commit(ring);
4195 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4196 
4197 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4198 
4199 	/* don't wait anymore for gpu reset case because this way may
4200 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4201 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4202 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4203 	 * gpu_recover() hang there.
4204 	 *
4205 	 * also don't wait anymore for IRQ context
4206 	 * */
4207 	if (r < 1 && (amdgpu_in_reset(adev)))
4208 		goto failed_kiq_read;
4209 
4210 	might_sleep();
4211 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4212 		drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4213 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4214 	}
4215 
4216 	if (cnt > MAX_KIQ_REG_TRY)
4217 		goto failed_kiq_read;
4218 
4219 	mb();
4220 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4221 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4222 	amdgpu_device_wb_free(adev, reg_val_offs);
4223 	return value;
4224 
4225 failed_undo:
4226 	amdgpu_ring_undo(ring);
4227 failed_unlock:
4228 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4229 failed_kiq_read:
4230 	if (reg_val_offs)
4231 		amdgpu_device_wb_free(adev, reg_val_offs);
4232 	pr_err("failed to read gpu clock\n");
4233 	return ~0;
4234 }
4235 
4236 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4237 {
4238 	uint64_t clock, clock_lo, clock_hi, hi_check;
4239 
4240 	switch (adev->asic_type) {
4241 	case CHIP_RENOIR:
4242 		preempt_disable();
4243 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4244 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4245 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4246 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4247 		 * roughly every 42 seconds.
4248 		 */
4249 		if (hi_check != clock_hi) {
4250 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4251 			clock_hi = hi_check;
4252 		}
4253 		preempt_enable();
4254 		clock = clock_lo | (clock_hi << 32ULL);
4255 		break;
4256 	default:
4257 		amdgpu_gfx_off_ctrl(adev, false);
4258 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4259 		if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4260 			clock = gfx_v9_0_kiq_read_clock(adev);
4261 		} else {
4262 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4263 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4264 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4265 		}
4266 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4267 		amdgpu_gfx_off_ctrl(adev, true);
4268 		break;
4269 	}
4270 	return clock;
4271 }
4272 
4273 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4274 					  uint32_t vmid,
4275 					  uint32_t gds_base, uint32_t gds_size,
4276 					  uint32_t gws_base, uint32_t gws_size,
4277 					  uint32_t oa_base, uint32_t oa_size)
4278 {
4279 	struct amdgpu_device *adev = ring->adev;
4280 
4281 	/* GDS Base */
4282 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4283 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4284 				   gds_base);
4285 
4286 	/* GDS Size */
4287 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4288 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4289 				   gds_size);
4290 
4291 	/* GWS */
4292 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4293 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4294 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4295 
4296 	/* OA */
4297 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4298 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4299 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4300 }
4301 
4302 static const u32 vgpr_init_compute_shader[] =
4303 {
4304 	0xb07c0000, 0xbe8000ff,
4305 	0x000000f8, 0xbf110800,
4306 	0x7e000280, 0x7e020280,
4307 	0x7e040280, 0x7e060280,
4308 	0x7e080280, 0x7e0a0280,
4309 	0x7e0c0280, 0x7e0e0280,
4310 	0x80808800, 0xbe803200,
4311 	0xbf84fff5, 0xbf9c0000,
4312 	0xd28c0001, 0x0001007f,
4313 	0xd28d0001, 0x0002027e,
4314 	0x10020288, 0xb8810904,
4315 	0xb7814000, 0xd1196a01,
4316 	0x00000301, 0xbe800087,
4317 	0xbefc00c1, 0xd89c4000,
4318 	0x00020201, 0xd89cc080,
4319 	0x00040401, 0x320202ff,
4320 	0x00000800, 0x80808100,
4321 	0xbf84fff8, 0x7e020280,
4322 	0xbf810000, 0x00000000,
4323 };
4324 
4325 static const u32 sgpr_init_compute_shader[] =
4326 {
4327 	0xb07c0000, 0xbe8000ff,
4328 	0x0000005f, 0xbee50080,
4329 	0xbe812c65, 0xbe822c65,
4330 	0xbe832c65, 0xbe842c65,
4331 	0xbe852c65, 0xb77c0005,
4332 	0x80808500, 0xbf84fff8,
4333 	0xbe800080, 0xbf810000,
4334 };
4335 
4336 static const u32 vgpr_init_compute_shader_arcturus[] = {
4337 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4338 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4339 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4340 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4341 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4342 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4343 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4344 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4345 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4346 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4347 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4348 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4349 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4350 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4351 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4352 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4353 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4354 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4355 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4356 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4357 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4358 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4359 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4360 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4361 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4362 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4363 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4364 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4365 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4366 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4367 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4368 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4369 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4370 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4371 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4372 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4373 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4374 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4375 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4376 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4377 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4378 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4379 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4380 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4381 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4382 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4383 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4384 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4385 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4386 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4387 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4388 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4389 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4390 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4391 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4392 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4393 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4394 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4395 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4396 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4397 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4398 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4399 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4400 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4401 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4402 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4403 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4404 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4405 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4406 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4407 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4408 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4409 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4410 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4411 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4412 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4413 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4414 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4415 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4416 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4417 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4418 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4419 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4420 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4421 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4422 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4423 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4424 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4425 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4426 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4427 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4428 	0xbf84fff8, 0xbf810000,
4429 };
4430 
4431 /* When below register arrays changed, please update gpr_reg_size,
4432   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4433   to cover all gfx9 ASICs */
4434 static const struct soc15_reg_entry vgpr_init_regs[] = {
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4449 };
4450 
4451 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4466 };
4467 
4468 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4483 };
4484 
4485 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4492    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4493    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4494    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4500 };
4501 
4502 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4503    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4504    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4505    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4506    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4507    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4508    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4509    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4510    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4511    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4512    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4513    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4514    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4515    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4516    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4517    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4518    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4519    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4520    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4521    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4522    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4523    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4524    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4525    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4526    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4527    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4528    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4529    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4530    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4531    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4532    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4533    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4534    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4535    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4536 };
4537 
4538 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4539 {
4540 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4541 	int i, r;
4542 
4543 	/* only support when RAS is enabled */
4544 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4545 		return 0;
4546 
4547 	r = amdgpu_ring_alloc(ring, 7);
4548 	if (r) {
4549 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4550 			ring->name, r);
4551 		return r;
4552 	}
4553 
4554 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4555 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4556 
4557 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4558 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4559 				PACKET3_DMA_DATA_DST_SEL(1) |
4560 				PACKET3_DMA_DATA_SRC_SEL(2) |
4561 				PACKET3_DMA_DATA_ENGINE(0)));
4562 	amdgpu_ring_write(ring, 0);
4563 	amdgpu_ring_write(ring, 0);
4564 	amdgpu_ring_write(ring, 0);
4565 	amdgpu_ring_write(ring, 0);
4566 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4567 				adev->gds.gds_size);
4568 
4569 	amdgpu_ring_commit(ring);
4570 
4571 	for (i = 0; i < adev->usec_timeout; i++) {
4572 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4573 			break;
4574 		udelay(1);
4575 	}
4576 
4577 	if (i >= adev->usec_timeout)
4578 		r = -ETIMEDOUT;
4579 
4580 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4581 
4582 	return r;
4583 }
4584 
4585 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4586 {
4587 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4588 	struct amdgpu_ib ib;
4589 	struct dma_fence *f = NULL;
4590 	int r, i;
4591 	unsigned total_size, vgpr_offset, sgpr_offset;
4592 	u64 gpu_addr;
4593 
4594 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4595 						adev->gfx.config.max_cu_per_sh *
4596 						adev->gfx.config.max_sh_per_se;
4597 	int sgpr_work_group_size = 5;
4598 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4599 	int vgpr_init_shader_size;
4600 	const u32 *vgpr_init_shader_ptr;
4601 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4602 
4603 	/* only support when RAS is enabled */
4604 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4605 		return 0;
4606 
4607 	/* bail if the compute ring is not ready */
4608 	if (!ring->sched.ready)
4609 		return 0;
4610 
4611 	if (adev->asic_type == CHIP_ARCTURUS) {
4612 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4613 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4614 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4615 	} else {
4616 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4617 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4618 		vgpr_init_regs_ptr = vgpr_init_regs;
4619 	}
4620 
4621 	total_size =
4622 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4623 	total_size +=
4624 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4625 	total_size +=
4626 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4627 	total_size = roundup2(total_size, 256);
4628 	vgpr_offset = total_size;
4629 	total_size += roundup2(vgpr_init_shader_size, 256);
4630 	sgpr_offset = total_size;
4631 	total_size += sizeof(sgpr_init_compute_shader);
4632 
4633 	/* allocate an indirect buffer to put the commands in */
4634 	memset(&ib, 0, sizeof(ib));
4635 	r = amdgpu_ib_get(adev, NULL, total_size,
4636 					AMDGPU_IB_POOL_DIRECT, &ib);
4637 	if (r) {
4638 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4639 		return r;
4640 	}
4641 
4642 	/* load the compute shaders */
4643 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4644 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4645 
4646 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4647 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4648 
4649 	/* init the ib length to 0 */
4650 	ib.length_dw = 0;
4651 
4652 	/* VGPR */
4653 	/* write the register state for the compute dispatch */
4654 	for (i = 0; i < gpr_reg_size; i++) {
4655 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4656 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4657 								- PACKET3_SET_SH_REG_START;
4658 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4659 	}
4660 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4661 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4662 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4663 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4664 							- PACKET3_SET_SH_REG_START;
4665 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4666 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4667 
4668 	/* write dispatch packet */
4669 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4670 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4671 	ib.ptr[ib.length_dw++] = 1; /* y */
4672 	ib.ptr[ib.length_dw++] = 1; /* z */
4673 	ib.ptr[ib.length_dw++] =
4674 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4675 
4676 	/* write CS partial flush packet */
4677 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4678 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4679 
4680 	/* SGPR1 */
4681 	/* write the register state for the compute dispatch */
4682 	for (i = 0; i < gpr_reg_size; i++) {
4683 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4684 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4685 								- PACKET3_SET_SH_REG_START;
4686 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4687 	}
4688 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4689 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4690 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4691 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4692 							- PACKET3_SET_SH_REG_START;
4693 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4694 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4695 
4696 	/* write dispatch packet */
4697 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4698 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4699 	ib.ptr[ib.length_dw++] = 1; /* y */
4700 	ib.ptr[ib.length_dw++] = 1; /* z */
4701 	ib.ptr[ib.length_dw++] =
4702 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4703 
4704 	/* write CS partial flush packet */
4705 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4706 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4707 
4708 	/* SGPR2 */
4709 	/* write the register state for the compute dispatch */
4710 	for (i = 0; i < gpr_reg_size; i++) {
4711 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4712 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4713 								- PACKET3_SET_SH_REG_START;
4714 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4715 	}
4716 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4717 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4718 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4719 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4720 							- PACKET3_SET_SH_REG_START;
4721 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4722 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4723 
4724 	/* write dispatch packet */
4725 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4726 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4727 	ib.ptr[ib.length_dw++] = 1; /* y */
4728 	ib.ptr[ib.length_dw++] = 1; /* z */
4729 	ib.ptr[ib.length_dw++] =
4730 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4731 
4732 	/* write CS partial flush packet */
4733 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4734 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4735 
4736 	/* shedule the ib on the ring */
4737 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4738 	if (r) {
4739 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4740 		goto fail;
4741 	}
4742 
4743 	/* wait for the GPU to finish processing the IB */
4744 	r = dma_fence_wait(f, false);
4745 	if (r) {
4746 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4747 		goto fail;
4748 	}
4749 
4750 fail:
4751 	amdgpu_ib_free(adev, &ib, NULL);
4752 	dma_fence_put(f);
4753 
4754 	return r;
4755 }
4756 
4757 static int gfx_v9_0_early_init(void *handle)
4758 {
4759 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4760 
4761 	if (adev->asic_type == CHIP_ARCTURUS ||
4762 	    adev->asic_type == CHIP_ALDEBARAN)
4763 		adev->gfx.num_gfx_rings = 0;
4764 	else
4765 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4766 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4767 					  AMDGPU_MAX_COMPUTE_RINGS);
4768 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4769 	gfx_v9_0_set_ring_funcs(adev);
4770 	gfx_v9_0_set_irq_funcs(adev);
4771 	gfx_v9_0_set_gds_init(adev);
4772 	gfx_v9_0_set_rlc_funcs(adev);
4773 
4774 	return 0;
4775 }
4776 
4777 static int gfx_v9_0_ecc_late_init(void *handle)
4778 {
4779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4780 	int r;
4781 
4782 	/*
4783 	 * Temp workaround to fix the issue that CP firmware fails to
4784 	 * update read pointer when CPDMA is writing clearing operation
4785 	 * to GDS in suspend/resume sequence on several cards. So just
4786 	 * limit this operation in cold boot sequence.
4787 	 */
4788 	if ((!adev->in_suspend) &&
4789 	    (adev->gds.gds_size)) {
4790 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4791 		if (r)
4792 			return r;
4793 	}
4794 
4795 	/* requires IBs so do in late init after IB pool is initialized */
4796 	if (adev->asic_type == CHIP_ALDEBARAN)
4797 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4798 	else
4799 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4800 
4801 	if (r)
4802 		return r;
4803 
4804 	if (adev->gfx.ras_funcs &&
4805 	    adev->gfx.ras_funcs->ras_late_init) {
4806 		r = adev->gfx.ras_funcs->ras_late_init(adev);
4807 		if (r)
4808 			return r;
4809 	}
4810 
4811 	if (adev->gfx.ras_funcs &&
4812 	    adev->gfx.ras_funcs->enable_watchdog_timer)
4813 		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4814 
4815 	return 0;
4816 }
4817 
4818 static int gfx_v9_0_late_init(void *handle)
4819 {
4820 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4821 	int r;
4822 
4823 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4824 	if (r)
4825 		return r;
4826 
4827 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4828 	if (r)
4829 		return r;
4830 
4831 	r = gfx_v9_0_ecc_late_init(handle);
4832 	if (r)
4833 		return r;
4834 
4835 	return 0;
4836 }
4837 
4838 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4839 {
4840 	uint32_t rlc_setting;
4841 
4842 	/* if RLC is not enabled, do nothing */
4843 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4844 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4845 		return false;
4846 
4847 	return true;
4848 }
4849 
4850 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4851 {
4852 	uint32_t data;
4853 	unsigned i;
4854 
4855 	data = RLC_SAFE_MODE__CMD_MASK;
4856 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4857 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4858 
4859 	/* wait for RLC_SAFE_MODE */
4860 	for (i = 0; i < adev->usec_timeout; i++) {
4861 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4862 			break;
4863 		udelay(1);
4864 	}
4865 }
4866 
4867 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4868 {
4869 	uint32_t data;
4870 
4871 	data = RLC_SAFE_MODE__CMD_MASK;
4872 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4873 }
4874 
4875 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4876 						bool enable)
4877 {
4878 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4879 
4880 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4881 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4882 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4883 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4884 	} else {
4885 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4886 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4887 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4888 	}
4889 
4890 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4891 }
4892 
4893 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4894 						bool enable)
4895 {
4896 	/* TODO: double check if we need to perform under safe mode */
4897 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4898 
4899 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4900 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4901 	else
4902 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4903 
4904 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4905 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4906 	else
4907 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4908 
4909 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4910 }
4911 
4912 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4913 						      bool enable)
4914 {
4915 	uint32_t data, def;
4916 
4917 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4918 
4919 	/* It is disabled by HW by default */
4920 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4921 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4922 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4923 
4924 		if (adev->asic_type != CHIP_VEGA12)
4925 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4926 
4927 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4928 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4929 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4930 
4931 		/* only for Vega10 & Raven1 */
4932 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4933 
4934 		if (def != data)
4935 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4936 
4937 		/* MGLS is a global flag to control all MGLS in GFX */
4938 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4939 			/* 2 - RLC memory Light sleep */
4940 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4941 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4942 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4943 				if (def != data)
4944 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4945 			}
4946 			/* 3 - CP memory Light sleep */
4947 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4948 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4949 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4950 				if (def != data)
4951 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4952 			}
4953 		}
4954 	} else {
4955 		/* 1 - MGCG_OVERRIDE */
4956 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4957 
4958 		if (adev->asic_type != CHIP_VEGA12)
4959 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4960 
4961 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4962 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4963 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4964 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4965 
4966 		if (def != data)
4967 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4968 
4969 		/* 2 - disable MGLS in RLC */
4970 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4971 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4972 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4973 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4974 		}
4975 
4976 		/* 3 - disable MGLS in CP */
4977 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4978 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4979 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4980 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4981 		}
4982 	}
4983 
4984 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4985 }
4986 
4987 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4988 					   bool enable)
4989 {
4990 	uint32_t data, def;
4991 
4992 	if (!adev->gfx.num_gfx_rings)
4993 		return;
4994 
4995 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4996 
4997 	/* Enable 3D CGCG/CGLS */
4998 	if (enable) {
4999 		/* write cmd to clear cgcg/cgls ov */
5000 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5001 		/* unset CGCG override */
5002 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5003 		/* update CGCG and CGLS override bits */
5004 		if (def != data)
5005 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5006 
5007 		/* enable 3Dcgcg FSM(0x0000363f) */
5008 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5009 
5010 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5011 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5012 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5013 		else
5014 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5015 
5016 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5017 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5018 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5019 		if (def != data)
5020 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5021 
5022 		/* set IDLE_POLL_COUNT(0x00900100) */
5023 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5024 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5025 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5026 		if (def != data)
5027 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5028 	} else {
5029 		/* Disable CGCG/CGLS */
5030 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5031 		/* disable cgcg, cgls should be disabled */
5032 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5033 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5034 		/* disable cgcg and cgls in FSM */
5035 		if (def != data)
5036 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5037 	}
5038 
5039 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5040 }
5041 
5042 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5043 						      bool enable)
5044 {
5045 	uint32_t def, data;
5046 
5047 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5048 
5049 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5050 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5051 		/* unset CGCG override */
5052 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5053 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5054 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5055 		else
5056 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5057 		/* update CGCG and CGLS override bits */
5058 		if (def != data)
5059 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5060 
5061 		/* enable cgcg FSM(0x0000363F) */
5062 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5063 
5064 		if (adev->asic_type == CHIP_ARCTURUS)
5065 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5066 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5067 		else
5068 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5069 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5070 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5071 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5072 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5073 		if (def != data)
5074 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5075 
5076 		/* set IDLE_POLL_COUNT(0x00900100) */
5077 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5078 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5079 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5080 		if (def != data)
5081 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5082 	} else {
5083 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5084 		/* reset CGCG/CGLS bits */
5085 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5086 		/* disable cgcg and cgls in FSM */
5087 		if (def != data)
5088 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5089 	}
5090 
5091 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5092 }
5093 
5094 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5095 					    bool enable)
5096 {
5097 	if (enable) {
5098 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5099 		 * ===  MGCG + MGLS ===
5100 		 */
5101 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5102 		/* ===  CGCG /CGLS for GFX 3D Only === */
5103 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5104 		/* ===  CGCG + CGLS === */
5105 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5106 	} else {
5107 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5108 		 * ===  CGCG + CGLS ===
5109 		 */
5110 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5111 		/* ===  CGCG /CGLS for GFX 3D Only === */
5112 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5113 		/* ===  MGCG + MGLS === */
5114 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5115 	}
5116 	return 0;
5117 }
5118 
5119 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5120 {
5121 	u32 reg, data;
5122 
5123 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5124 	if (amdgpu_sriov_is_pp_one_vf(adev))
5125 		data = RREG32_NO_KIQ(reg);
5126 	else
5127 		data = RREG32(reg);
5128 
5129 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5130 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5131 
5132 	if (amdgpu_sriov_is_pp_one_vf(adev))
5133 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5134 	else
5135 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5136 }
5137 
5138 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5139 					uint32_t offset,
5140 					struct soc15_reg_rlcg *entries, int arr_size)
5141 {
5142 	int i;
5143 	uint32_t reg;
5144 
5145 	if (!entries)
5146 		return false;
5147 
5148 	for (i = 0; i < arr_size; i++) {
5149 		const struct soc15_reg_rlcg *entry;
5150 
5151 		entry = &entries[i];
5152 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5153 		if (offset == reg)
5154 			return true;
5155 	}
5156 
5157 	return false;
5158 }
5159 
5160 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5161 {
5162 	return gfx_v9_0_check_rlcg_range(adev, offset,
5163 					(void *)rlcg_access_gc_9_0,
5164 					ARRAY_SIZE(rlcg_access_gc_9_0));
5165 }
5166 
5167 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5168 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5169 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5170 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5171 	.init = gfx_v9_0_rlc_init,
5172 	.get_csb_size = gfx_v9_0_get_csb_size,
5173 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5174 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5175 	.resume = gfx_v9_0_rlc_resume,
5176 	.stop = gfx_v9_0_rlc_stop,
5177 	.reset = gfx_v9_0_rlc_reset,
5178 	.start = gfx_v9_0_rlc_start,
5179 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5180 	.sriov_wreg = gfx_v9_0_sriov_wreg,
5181 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5182 };
5183 
5184 static int gfx_v9_0_set_powergating_state(void *handle,
5185 					  enum amd_powergating_state state)
5186 {
5187 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5188 	bool enable = (state == AMD_PG_STATE_GATE);
5189 
5190 	switch (adev->asic_type) {
5191 	case CHIP_RAVEN:
5192 	case CHIP_RENOIR:
5193 		if (!enable)
5194 			amdgpu_gfx_off_ctrl(adev, false);
5195 
5196 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5197 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5198 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5199 		} else {
5200 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5201 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5202 		}
5203 
5204 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5205 			gfx_v9_0_enable_cp_power_gating(adev, true);
5206 		else
5207 			gfx_v9_0_enable_cp_power_gating(adev, false);
5208 
5209 		/* update gfx cgpg state */
5210 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5211 
5212 		/* update mgcg state */
5213 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5214 
5215 		if (enable)
5216 			amdgpu_gfx_off_ctrl(adev, true);
5217 		break;
5218 	case CHIP_VEGA12:
5219 		amdgpu_gfx_off_ctrl(adev, enable);
5220 		break;
5221 	default:
5222 		break;
5223 	}
5224 
5225 	return 0;
5226 }
5227 
5228 static int gfx_v9_0_set_clockgating_state(void *handle,
5229 					  enum amd_clockgating_state state)
5230 {
5231 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232 
5233 	if (amdgpu_sriov_vf(adev))
5234 		return 0;
5235 
5236 	switch (adev->asic_type) {
5237 	case CHIP_VEGA10:
5238 	case CHIP_VEGA12:
5239 	case CHIP_VEGA20:
5240 	case CHIP_RAVEN:
5241 	case CHIP_ARCTURUS:
5242 	case CHIP_RENOIR:
5243 	case CHIP_ALDEBARAN:
5244 		gfx_v9_0_update_gfx_clock_gating(adev,
5245 						 state == AMD_CG_STATE_GATE);
5246 		break;
5247 	default:
5248 		break;
5249 	}
5250 	return 0;
5251 }
5252 
5253 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5254 {
5255 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5256 	int data;
5257 
5258 	if (amdgpu_sriov_vf(adev))
5259 		*flags = 0;
5260 
5261 	/* AMD_CG_SUPPORT_GFX_MGCG */
5262 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5263 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5264 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5265 
5266 	/* AMD_CG_SUPPORT_GFX_CGCG */
5267 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5268 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5269 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5270 
5271 	/* AMD_CG_SUPPORT_GFX_CGLS */
5272 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5273 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5274 
5275 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5276 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5277 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5278 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5279 
5280 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5281 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5282 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5283 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5284 
5285 	if (adev->asic_type != CHIP_ARCTURUS) {
5286 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5287 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5288 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5289 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5290 
5291 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5292 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5293 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5294 	}
5295 }
5296 
5297 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5298 {
5299 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5300 }
5301 
5302 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5303 {
5304 	struct amdgpu_device *adev = ring->adev;
5305 	u64 wptr;
5306 
5307 	/* XXX check if swapping is necessary on BE */
5308 	if (ring->use_doorbell) {
5309 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5310 	} else {
5311 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5312 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5313 	}
5314 
5315 	return wptr;
5316 }
5317 
5318 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5319 {
5320 	struct amdgpu_device *adev = ring->adev;
5321 
5322 	if (ring->use_doorbell) {
5323 		/* XXX check if swapping is necessary on BE */
5324 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5325 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5326 	} else {
5327 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5328 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5329 	}
5330 }
5331 
5332 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5333 {
5334 	struct amdgpu_device *adev = ring->adev;
5335 	u32 ref_and_mask, reg_mem_engine;
5336 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5337 
5338 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5339 		switch (ring->me) {
5340 		case 1:
5341 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5342 			break;
5343 		case 2:
5344 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5345 			break;
5346 		default:
5347 			return;
5348 		}
5349 		reg_mem_engine = 0;
5350 	} else {
5351 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5352 		reg_mem_engine = 1; /* pfp */
5353 	}
5354 
5355 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5356 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5357 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5358 			      ref_and_mask, ref_and_mask, 0x20);
5359 }
5360 
5361 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5362 					struct amdgpu_job *job,
5363 					struct amdgpu_ib *ib,
5364 					uint32_t flags)
5365 {
5366 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5367 	u32 header, control = 0;
5368 
5369 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5370 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5371 	else
5372 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5373 
5374 	control |= ib->length_dw | (vmid << 24);
5375 
5376 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5377 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5378 
5379 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5380 			gfx_v9_0_ring_emit_de_meta(ring);
5381 	}
5382 
5383 	amdgpu_ring_write(ring, header);
5384 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5385 	amdgpu_ring_write(ring,
5386 #ifdef __BIG_ENDIAN
5387 		(2 << 0) |
5388 #endif
5389 		lower_32_bits(ib->gpu_addr));
5390 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5391 	amdgpu_ring_write(ring, control);
5392 }
5393 
5394 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5395 					  struct amdgpu_job *job,
5396 					  struct amdgpu_ib *ib,
5397 					  uint32_t flags)
5398 {
5399 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5400 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5401 
5402 	/* Currently, there is a high possibility to get wave ID mismatch
5403 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5404 	 * different wave IDs than the GDS expects. This situation happens
5405 	 * randomly when at least 5 compute pipes use GDS ordered append.
5406 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5407 	 * Those are probably bugs somewhere else in the kernel driver.
5408 	 *
5409 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5410 	 * GDS to 0 for this ring (me/pipe).
5411 	 */
5412 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5413 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5414 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5415 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5416 	}
5417 
5418 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5419 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5420 	amdgpu_ring_write(ring,
5421 #ifdef __BIG_ENDIAN
5422 				(2 << 0) |
5423 #endif
5424 				lower_32_bits(ib->gpu_addr));
5425 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5426 	amdgpu_ring_write(ring, control);
5427 }
5428 
5429 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5430 				     u64 seq, unsigned flags)
5431 {
5432 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5433 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5434 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5435 
5436 	/* RELEASE_MEM - flush caches, send int */
5437 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5438 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5439 					       EOP_TC_NC_ACTION_EN) :
5440 					      (EOP_TCL1_ACTION_EN |
5441 					       EOP_TC_ACTION_EN |
5442 					       EOP_TC_WB_ACTION_EN |
5443 					       EOP_TC_MD_ACTION_EN)) |
5444 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5445 				 EVENT_INDEX(5)));
5446 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5447 
5448 	/*
5449 	 * the address should be Qword aligned if 64bit write, Dword
5450 	 * aligned if only send 32bit data low (discard data high)
5451 	 */
5452 	if (write64bit)
5453 		BUG_ON(addr & 0x7);
5454 	else
5455 		BUG_ON(addr & 0x3);
5456 	amdgpu_ring_write(ring, lower_32_bits(addr));
5457 	amdgpu_ring_write(ring, upper_32_bits(addr));
5458 	amdgpu_ring_write(ring, lower_32_bits(seq));
5459 	amdgpu_ring_write(ring, upper_32_bits(seq));
5460 	amdgpu_ring_write(ring, 0);
5461 }
5462 
5463 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5464 {
5465 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5466 	uint32_t seq = ring->fence_drv.sync_seq;
5467 	uint64_t addr = ring->fence_drv.gpu_addr;
5468 
5469 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5470 			      lower_32_bits(addr), upper_32_bits(addr),
5471 			      seq, 0xffffffff, 4);
5472 }
5473 
5474 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5475 					unsigned vmid, uint64_t pd_addr)
5476 {
5477 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5478 
5479 	/* compute doesn't have PFP */
5480 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5481 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5482 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5483 		amdgpu_ring_write(ring, 0x0);
5484 	}
5485 }
5486 
5487 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5488 {
5489 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5490 }
5491 
5492 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5493 {
5494 	u64 wptr;
5495 
5496 	/* XXX check if swapping is necessary on BE */
5497 	if (ring->use_doorbell)
5498 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5499 	else
5500 		BUG();
5501 	return wptr;
5502 }
5503 
5504 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5505 {
5506 	struct amdgpu_device *adev = ring->adev;
5507 
5508 	/* XXX check if swapping is necessary on BE */
5509 	if (ring->use_doorbell) {
5510 		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5511 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5512 	} else{
5513 		BUG(); /* only DOORBELL method supported on gfx9 now */
5514 	}
5515 }
5516 
5517 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5518 					 u64 seq, unsigned int flags)
5519 {
5520 	struct amdgpu_device *adev = ring->adev;
5521 
5522 	/* we only allocate 32bit for each seq wb address */
5523 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5524 
5525 	/* write fence seq to the "addr" */
5526 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5527 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5528 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5529 	amdgpu_ring_write(ring, lower_32_bits(addr));
5530 	amdgpu_ring_write(ring, upper_32_bits(addr));
5531 	amdgpu_ring_write(ring, lower_32_bits(seq));
5532 
5533 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5534 		/* set register to trigger INT */
5535 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5536 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5537 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5538 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5539 		amdgpu_ring_write(ring, 0);
5540 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5541 	}
5542 }
5543 
5544 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5545 {
5546 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5547 	amdgpu_ring_write(ring, 0);
5548 }
5549 
5550 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5551 {
5552 	struct v9_ce_ib_state ce_payload = {0};
5553 	uint64_t csa_addr;
5554 	int cnt;
5555 
5556 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5557 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5558 
5559 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5560 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5561 				 WRITE_DATA_DST_SEL(8) |
5562 				 WR_CONFIRM) |
5563 				 WRITE_DATA_CACHE_POLICY(0));
5564 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5565 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5566 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5567 }
5568 
5569 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5570 {
5571 	struct v9_de_ib_state de_payload = {0};
5572 	uint64_t csa_addr, gds_addr;
5573 	int cnt;
5574 
5575 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5576 	gds_addr = csa_addr + 4096;
5577 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5578 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5579 
5580 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5581 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5582 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5583 				 WRITE_DATA_DST_SEL(8) |
5584 				 WR_CONFIRM) |
5585 				 WRITE_DATA_CACHE_POLICY(0));
5586 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5587 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5588 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5589 }
5590 
5591 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5592 				   bool secure)
5593 {
5594 	uint32_t v = secure ? FRAME_TMZ : 0;
5595 
5596 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5597 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5598 }
5599 
5600 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5601 {
5602 	uint32_t dw2 = 0;
5603 
5604 	if (amdgpu_sriov_vf(ring->adev))
5605 		gfx_v9_0_ring_emit_ce_meta(ring);
5606 
5607 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5608 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5609 		/* set load_global_config & load_global_uconfig */
5610 		dw2 |= 0x8001;
5611 		/* set load_cs_sh_regs */
5612 		dw2 |= 0x01000000;
5613 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5614 		dw2 |= 0x10002;
5615 
5616 		/* set load_ce_ram if preamble presented */
5617 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5618 			dw2 |= 0x10000000;
5619 	} else {
5620 		/* still load_ce_ram if this is the first time preamble presented
5621 		 * although there is no context switch happens.
5622 		 */
5623 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5624 			dw2 |= 0x10000000;
5625 	}
5626 
5627 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5628 	amdgpu_ring_write(ring, dw2);
5629 	amdgpu_ring_write(ring, 0);
5630 }
5631 
5632 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5633 {
5634 	unsigned ret;
5635 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5636 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5637 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5638 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5639 	ret = ring->wptr & ring->buf_mask;
5640 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5641 	return ret;
5642 }
5643 
5644 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5645 {
5646 	unsigned cur;
5647 	BUG_ON(offset > ring->buf_mask);
5648 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5649 
5650 	cur = (ring->wptr & ring->buf_mask) - 1;
5651 	if (likely(cur > offset))
5652 		ring->ring[offset] = cur - offset;
5653 	else
5654 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5655 }
5656 
5657 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5658 				    uint32_t reg_val_offs)
5659 {
5660 	struct amdgpu_device *adev = ring->adev;
5661 
5662 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5663 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5664 				(5 << 8) |	/* dst: memory */
5665 				(1 << 20));	/* write confirm */
5666 	amdgpu_ring_write(ring, reg);
5667 	amdgpu_ring_write(ring, 0);
5668 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5669 				reg_val_offs * 4));
5670 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5671 				reg_val_offs * 4));
5672 }
5673 
5674 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5675 				    uint32_t val)
5676 {
5677 	uint32_t cmd = 0;
5678 
5679 	switch (ring->funcs->type) {
5680 	case AMDGPU_RING_TYPE_GFX:
5681 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5682 		break;
5683 	case AMDGPU_RING_TYPE_KIQ:
5684 		cmd = (1 << 16); /* no inc addr */
5685 		break;
5686 	default:
5687 		cmd = WR_CONFIRM;
5688 		break;
5689 	}
5690 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691 	amdgpu_ring_write(ring, cmd);
5692 	amdgpu_ring_write(ring, reg);
5693 	amdgpu_ring_write(ring, 0);
5694 	amdgpu_ring_write(ring, val);
5695 }
5696 
5697 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5698 					uint32_t val, uint32_t mask)
5699 {
5700 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5701 }
5702 
5703 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5704 						  uint32_t reg0, uint32_t reg1,
5705 						  uint32_t ref, uint32_t mask)
5706 {
5707 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5708 	struct amdgpu_device *adev = ring->adev;
5709 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5710 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5711 
5712 	if (fw_version_ok)
5713 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5714 				      ref, mask, 0x20);
5715 	else
5716 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5717 							   ref, mask);
5718 }
5719 
5720 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5721 {
5722 	struct amdgpu_device *adev = ring->adev;
5723 	uint32_t value = 0;
5724 
5725 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5726 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5727 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5728 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5729 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5730 }
5731 
5732 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5733 						 enum amdgpu_interrupt_state state)
5734 {
5735 	switch (state) {
5736 	case AMDGPU_IRQ_STATE_DISABLE:
5737 	case AMDGPU_IRQ_STATE_ENABLE:
5738 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5739 			       TIME_STAMP_INT_ENABLE,
5740 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5741 		break;
5742 	default:
5743 		break;
5744 	}
5745 }
5746 
5747 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5748 						     int me, int pipe,
5749 						     enum amdgpu_interrupt_state state)
5750 {
5751 	u32 mec_int_cntl, mec_int_cntl_reg;
5752 
5753 	/*
5754 	 * amdgpu controls only the first MEC. That's why this function only
5755 	 * handles the setting of interrupts for this specific MEC. All other
5756 	 * pipes' interrupts are set by amdkfd.
5757 	 */
5758 
5759 	if (me == 1) {
5760 		switch (pipe) {
5761 		case 0:
5762 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5763 			break;
5764 		case 1:
5765 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5766 			break;
5767 		case 2:
5768 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5769 			break;
5770 		case 3:
5771 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5772 			break;
5773 		default:
5774 			DRM_DEBUG("invalid pipe %d\n", pipe);
5775 			return;
5776 		}
5777 	} else {
5778 		DRM_DEBUG("invalid me %d\n", me);
5779 		return;
5780 	}
5781 
5782 	switch (state) {
5783 	case AMDGPU_IRQ_STATE_DISABLE:
5784 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5785 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5786 					     TIME_STAMP_INT_ENABLE, 0);
5787 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5788 		break;
5789 	case AMDGPU_IRQ_STATE_ENABLE:
5790 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5791 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5792 					     TIME_STAMP_INT_ENABLE, 1);
5793 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5794 		break;
5795 	default:
5796 		break;
5797 	}
5798 }
5799 
5800 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5801 					     struct amdgpu_irq_src *source,
5802 					     unsigned type,
5803 					     enum amdgpu_interrupt_state state)
5804 {
5805 	switch (state) {
5806 	case AMDGPU_IRQ_STATE_DISABLE:
5807 	case AMDGPU_IRQ_STATE_ENABLE:
5808 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5809 			       PRIV_REG_INT_ENABLE,
5810 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5811 		break;
5812 	default:
5813 		break;
5814 	}
5815 
5816 	return 0;
5817 }
5818 
5819 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5820 					      struct amdgpu_irq_src *source,
5821 					      unsigned type,
5822 					      enum amdgpu_interrupt_state state)
5823 {
5824 	switch (state) {
5825 	case AMDGPU_IRQ_STATE_DISABLE:
5826 	case AMDGPU_IRQ_STATE_ENABLE:
5827 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5828 			       PRIV_INSTR_INT_ENABLE,
5829 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5830 		break;
5831 	default:
5832 		break;
5833 	}
5834 
5835 	return 0;
5836 }
5837 
5838 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5839 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5840 			CP_ECC_ERROR_INT_ENABLE, 1)
5841 
5842 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5843 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5844 			CP_ECC_ERROR_INT_ENABLE, 0)
5845 
5846 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5847 					      struct amdgpu_irq_src *source,
5848 					      unsigned type,
5849 					      enum amdgpu_interrupt_state state)
5850 {
5851 	switch (state) {
5852 	case AMDGPU_IRQ_STATE_DISABLE:
5853 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5854 				CP_ECC_ERROR_INT_ENABLE, 0);
5855 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5856 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5857 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5858 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5859 		break;
5860 
5861 	case AMDGPU_IRQ_STATE_ENABLE:
5862 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5863 				CP_ECC_ERROR_INT_ENABLE, 1);
5864 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5865 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5866 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5867 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5868 		break;
5869 	default:
5870 		break;
5871 	}
5872 
5873 	return 0;
5874 }
5875 
5876 
5877 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5878 					    struct amdgpu_irq_src *src,
5879 					    unsigned type,
5880 					    enum amdgpu_interrupt_state state)
5881 {
5882 	switch (type) {
5883 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5884 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5885 		break;
5886 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5887 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5888 		break;
5889 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5890 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5891 		break;
5892 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5893 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5894 		break;
5895 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5896 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5897 		break;
5898 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5899 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5900 		break;
5901 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5902 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5903 		break;
5904 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5905 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5906 		break;
5907 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5908 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5909 		break;
5910 	default:
5911 		break;
5912 	}
5913 	return 0;
5914 }
5915 
5916 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5917 			    struct amdgpu_irq_src *source,
5918 			    struct amdgpu_iv_entry *entry)
5919 {
5920 	int i;
5921 	u8 me_id, pipe_id, queue_id;
5922 	struct amdgpu_ring *ring;
5923 
5924 	DRM_DEBUG("IH: CP EOP\n");
5925 	me_id = (entry->ring_id & 0x0c) >> 2;
5926 	pipe_id = (entry->ring_id & 0x03) >> 0;
5927 	queue_id = (entry->ring_id & 0x70) >> 4;
5928 
5929 	switch (me_id) {
5930 	case 0:
5931 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5932 		break;
5933 	case 1:
5934 	case 2:
5935 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5936 			ring = &adev->gfx.compute_ring[i];
5937 			/* Per-queue interrupt is supported for MEC starting from VI.
5938 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5939 			  */
5940 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5941 				amdgpu_fence_process(ring);
5942 		}
5943 		break;
5944 	}
5945 	return 0;
5946 }
5947 
5948 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5949 			   struct amdgpu_iv_entry *entry)
5950 {
5951 	u8 me_id, pipe_id, queue_id;
5952 	struct amdgpu_ring *ring;
5953 	int i;
5954 
5955 	me_id = (entry->ring_id & 0x0c) >> 2;
5956 	pipe_id = (entry->ring_id & 0x03) >> 0;
5957 	queue_id = (entry->ring_id & 0x70) >> 4;
5958 
5959 	switch (me_id) {
5960 	case 0:
5961 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5962 		break;
5963 	case 1:
5964 	case 2:
5965 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5966 			ring = &adev->gfx.compute_ring[i];
5967 			if (ring->me == me_id && ring->pipe == pipe_id &&
5968 			    ring->queue == queue_id)
5969 				drm_sched_fault(&ring->sched);
5970 		}
5971 		break;
5972 	}
5973 }
5974 
5975 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5976 				 struct amdgpu_irq_src *source,
5977 				 struct amdgpu_iv_entry *entry)
5978 {
5979 	DRM_ERROR("Illegal register access in command stream\n");
5980 	gfx_v9_0_fault(adev, entry);
5981 	return 0;
5982 }
5983 
5984 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5985 				  struct amdgpu_irq_src *source,
5986 				  struct amdgpu_iv_entry *entry)
5987 {
5988 	DRM_ERROR("Illegal instruction in command stream\n");
5989 	gfx_v9_0_fault(adev, entry);
5990 	return 0;
5991 }
5992 
5993 
5994 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5995 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5996 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5997 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5998 	},
5999 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6000 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6001 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6002 	},
6003 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6004 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6005 	  0, 0
6006 	},
6007 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6008 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6009 	  0, 0
6010 	},
6011 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6012 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6013 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6014 	},
6015 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6016 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6017 	  0, 0
6018 	},
6019 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6020 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6021 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6022 	},
6023 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6024 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6025 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6026 	},
6027 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6028 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6029 	  0, 0
6030 	},
6031 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6032 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6033 	  0, 0
6034 	},
6035 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6036 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6037 	  0, 0
6038 	},
6039 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6040 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6041 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6042 	},
6043 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6044 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6045 	  0, 0
6046 	},
6047 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6048 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6049 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6050 	},
6051 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6052 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6053 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6054 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6055 	},
6056 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6057 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6058 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6059 	  0, 0
6060 	},
6061 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6062 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6063 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6064 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6065 	},
6066 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6067 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6068 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6069 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6070 	},
6071 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6072 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6073 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6074 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6075 	},
6076 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6077 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6078 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6079 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6080 	},
6081 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6082 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6083 	  0, 0
6084 	},
6085 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6086 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6087 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6088 	},
6089 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6090 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6091 	  0, 0
6092 	},
6093 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6094 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6095 	  0, 0
6096 	},
6097 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6098 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6099 	  0, 0
6100 	},
6101 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6102 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6103 	  0, 0
6104 	},
6105 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6106 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6107 	  0, 0
6108 	},
6109 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6110 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6111 	  0, 0
6112 	},
6113 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6114 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6115 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6116 	},
6117 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6118 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6119 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6120 	},
6121 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6122 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6123 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6124 	},
6125 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6126 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6127 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6128 	},
6129 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6130 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6131 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6132 	},
6133 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6134 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6135 	  0, 0
6136 	},
6137 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6138 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6139 	  0, 0
6140 	},
6141 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6142 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6143 	  0, 0
6144 	},
6145 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6146 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6147 	  0, 0
6148 	},
6149 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6150 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6151 	  0, 0
6152 	},
6153 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6154 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6155 	  0, 0
6156 	},
6157 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6158 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6159 	  0, 0
6160 	},
6161 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6162 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6163 	  0, 0
6164 	},
6165 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6166 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6167 	  0, 0
6168 	},
6169 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6170 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6171 	  0, 0
6172 	},
6173 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6174 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6175 	  0, 0
6176 	},
6177 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6178 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6179 	  0, 0
6180 	},
6181 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6182 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6183 	  0, 0
6184 	},
6185 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6186 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6187 	  0, 0
6188 	},
6189 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6190 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6191 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6192 	},
6193 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6194 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6195 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6196 	},
6197 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6198 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6199 	  0, 0
6200 	},
6201 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6202 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6203 	  0, 0
6204 	},
6205 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6206 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6207 	  0, 0
6208 	},
6209 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6210 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6211 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6212 	},
6213 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6214 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6215 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6216 	},
6217 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6218 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6219 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6220 	},
6221 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6222 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6223 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6224 	},
6225 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6226 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6227 	  0, 0
6228 	},
6229 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6230 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6231 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6232 	},
6233 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6234 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6235 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6236 	},
6237 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6238 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6239 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6240 	},
6241 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6242 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6243 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6244 	},
6245 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6246 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6247 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6248 	},
6249 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6250 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6251 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6252 	},
6253 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6254 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6255 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6256 	},
6257 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6258 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6259 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6260 	},
6261 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6262 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6263 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6264 	},
6265 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6266 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6267 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6268 	},
6269 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6270 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6271 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6272 	},
6273 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6274 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6275 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6276 	},
6277 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6278 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6279 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6280 	},
6281 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6282 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6283 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6284 	},
6285 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6286 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6287 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6288 	},
6289 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6290 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6291 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6292 	},
6293 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6294 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6295 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6296 	},
6297 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6298 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6299 	  0, 0
6300 	},
6301 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6302 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6303 	  0, 0
6304 	},
6305 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6306 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6307 	  0, 0
6308 	},
6309 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6310 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6311 	  0, 0
6312 	},
6313 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6314 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6315 	  0, 0
6316 	},
6317 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6318 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6319 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6320 	},
6321 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6322 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6323 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6324 	},
6325 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6326 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6327 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6328 	},
6329 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6330 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6331 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6332 	},
6333 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6334 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6335 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6336 	},
6337 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6338 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6339 	  0, 0
6340 	},
6341 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6342 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6343 	  0, 0
6344 	},
6345 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6346 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6347 	  0, 0
6348 	},
6349 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6350 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6351 	  0, 0
6352 	},
6353 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6354 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6355 	  0, 0
6356 	},
6357 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6358 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6359 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6360 	},
6361 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6362 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6363 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6364 	},
6365 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6366 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6367 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6368 	},
6369 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6370 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6371 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6372 	},
6373 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6374 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6375 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6376 	},
6377 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6378 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6379 	  0, 0
6380 	},
6381 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6382 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6383 	  0, 0
6384 	},
6385 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6386 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6387 	  0, 0
6388 	},
6389 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6390 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6391 	  0, 0
6392 	},
6393 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6394 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6395 	  0, 0
6396 	},
6397 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6398 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6399 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6400 	},
6401 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6402 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6403 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6404 	},
6405 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6406 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6407 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6408 	},
6409 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6410 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6411 	  0, 0
6412 	},
6413 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6414 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6415 	  0, 0
6416 	},
6417 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6418 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6419 	  0, 0
6420 	},
6421 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6422 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6423 	  0, 0
6424 	},
6425 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6426 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6427 	  0, 0
6428 	},
6429 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6430 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6431 	  0, 0
6432 	}
6433 };
6434 
6435 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6436 				     void *inject_if)
6437 {
6438 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6439 	int ret;
6440 	struct ta_ras_trigger_error_input block_info = { 0 };
6441 
6442 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6443 		return -EINVAL;
6444 
6445 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6446 		return -EINVAL;
6447 
6448 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6449 		return -EPERM;
6450 
6451 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6452 	      info->head.type)) {
6453 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6454 			ras_gfx_subblocks[info->head.sub_block_index].name,
6455 			info->head.type);
6456 		return -EPERM;
6457 	}
6458 
6459 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6460 	      info->head.type)) {
6461 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6462 			ras_gfx_subblocks[info->head.sub_block_index].name,
6463 			info->head.type);
6464 		return -EPERM;
6465 	}
6466 
6467 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6468 	block_info.sub_block_index =
6469 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6470 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6471 	block_info.address = info->address;
6472 	block_info.value = info->value;
6473 
6474 	mutex_lock(&adev->grbm_idx_mutex);
6475 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6476 	mutex_unlock(&adev->grbm_idx_mutex);
6477 
6478 	return ret;
6479 }
6480 
6481 static const char *vml2_mems[] = {
6482 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6483 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6484 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6485 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6486 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6487 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6488 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6489 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6490 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6491 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6492 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6493 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6494 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6495 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6496 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6497 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6498 };
6499 
6500 static const char *vml2_walker_mems[] = {
6501 	"UTC_VML2_CACHE_PDE0_MEM0",
6502 	"UTC_VML2_CACHE_PDE0_MEM1",
6503 	"UTC_VML2_CACHE_PDE1_MEM0",
6504 	"UTC_VML2_CACHE_PDE1_MEM1",
6505 	"UTC_VML2_CACHE_PDE2_MEM0",
6506 	"UTC_VML2_CACHE_PDE2_MEM1",
6507 	"UTC_VML2_RDIF_LOG_FIFO",
6508 };
6509 
6510 static const char *atc_l2_cache_2m_mems[] = {
6511 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6512 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6513 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6514 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6515 };
6516 
6517 static const char *atc_l2_cache_4k_mems[] = {
6518 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6519 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6520 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6521 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6522 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6523 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6524 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6525 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6526 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6527 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6528 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6529 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6530 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6531 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6532 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6533 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6534 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6535 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6536 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6537 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6538 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6539 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6540 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6541 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6542 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6543 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6544 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6545 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6546 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6547 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6548 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6549 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6550 };
6551 
6552 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6553 					 struct ras_err_data *err_data)
6554 {
6555 	uint32_t i, data;
6556 	uint32_t sec_count, ded_count;
6557 
6558 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6559 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6560 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6561 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6562 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6563 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6564 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6565 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6566 
6567 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6568 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6569 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6570 
6571 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6572 		if (sec_count) {
6573 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6574 				"SEC %d\n", i, vml2_mems[i], sec_count);
6575 			err_data->ce_count += sec_count;
6576 		}
6577 
6578 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6579 		if (ded_count) {
6580 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6581 				"DED %d\n", i, vml2_mems[i], ded_count);
6582 			err_data->ue_count += ded_count;
6583 		}
6584 	}
6585 
6586 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6587 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6588 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6589 
6590 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6591 						SEC_COUNT);
6592 		if (sec_count) {
6593 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6594 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6595 			err_data->ce_count += sec_count;
6596 		}
6597 
6598 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6599 						DED_COUNT);
6600 		if (ded_count) {
6601 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6602 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6603 			err_data->ue_count += ded_count;
6604 		}
6605 	}
6606 
6607 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6608 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6609 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6610 
6611 		sec_count = (data & 0x00006000L) >> 0xd;
6612 		if (sec_count) {
6613 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6614 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6615 				sec_count);
6616 			err_data->ce_count += sec_count;
6617 		}
6618 	}
6619 
6620 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6621 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6622 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6623 
6624 		sec_count = (data & 0x00006000L) >> 0xd;
6625 		if (sec_count) {
6626 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6627 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6628 				sec_count);
6629 			err_data->ce_count += sec_count;
6630 		}
6631 
6632 		ded_count = (data & 0x00018000L) >> 0xf;
6633 		if (ded_count) {
6634 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6635 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6636 				ded_count);
6637 			err_data->ue_count += ded_count;
6638 		}
6639 	}
6640 
6641 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6642 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6643 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6644 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6645 
6646 	return 0;
6647 }
6648 
6649 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6650 	const struct soc15_reg_entry *reg,
6651 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6652 	uint32_t *sec_count, uint32_t *ded_count)
6653 {
6654 	uint32_t i;
6655 	uint32_t sec_cnt, ded_cnt;
6656 
6657 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6658 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6659 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6660 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6661 			continue;
6662 
6663 		sec_cnt = (value &
6664 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6665 				gfx_v9_0_ras_fields[i].sec_count_shift;
6666 		if (sec_cnt) {
6667 			dev_info(adev->dev, "GFX SubBlock %s, "
6668 				"Instance[%d][%d], SEC %d\n",
6669 				gfx_v9_0_ras_fields[i].name,
6670 				se_id, inst_id,
6671 				sec_cnt);
6672 			*sec_count += sec_cnt;
6673 		}
6674 
6675 		ded_cnt = (value &
6676 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6677 				gfx_v9_0_ras_fields[i].ded_count_shift;
6678 		if (ded_cnt) {
6679 			dev_info(adev->dev, "GFX SubBlock %s, "
6680 				"Instance[%d][%d], DED %d\n",
6681 				gfx_v9_0_ras_fields[i].name,
6682 				se_id, inst_id,
6683 				ded_cnt);
6684 			*ded_count += ded_cnt;
6685 		}
6686 	}
6687 
6688 	return 0;
6689 }
6690 
6691 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6692 {
6693 	int i, j, k;
6694 
6695 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6696 		return;
6697 
6698 	/* read back registers to clear the counters */
6699 	mutex_lock(&adev->grbm_idx_mutex);
6700 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6701 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6702 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6703 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6704 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6705 			}
6706 		}
6707 	}
6708 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6709 	mutex_unlock(&adev->grbm_idx_mutex);
6710 
6711 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6712 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6713 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6714 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6715 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6716 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6717 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6718 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6719 
6720 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6721 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6722 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6723 	}
6724 
6725 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6726 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6727 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6728 	}
6729 
6730 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6731 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6732 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6733 	}
6734 
6735 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6736 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6737 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6738 	}
6739 
6740 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6741 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6742 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6743 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6744 }
6745 
6746 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6747 					  void *ras_error_status)
6748 {
6749 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6750 	uint32_t sec_count = 0, ded_count = 0;
6751 	uint32_t i, j, k;
6752 	uint32_t reg_value;
6753 
6754 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6755 		return -EINVAL;
6756 
6757 	err_data->ue_count = 0;
6758 	err_data->ce_count = 0;
6759 
6760 	mutex_lock(&adev->grbm_idx_mutex);
6761 
6762 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6763 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6764 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6765 				gfx_v9_0_select_se_sh(adev, j, 0, k);
6766 				reg_value =
6767 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6768 				if (reg_value)
6769 					gfx_v9_0_ras_error_count(adev,
6770 						&gfx_v9_0_edc_counter_regs[i],
6771 						j, k, reg_value,
6772 						&sec_count, &ded_count);
6773 			}
6774 		}
6775 	}
6776 
6777 	err_data->ce_count += sec_count;
6778 	err_data->ue_count += ded_count;
6779 
6780 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6781 	mutex_unlock(&adev->grbm_idx_mutex);
6782 
6783 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6784 
6785 	return 0;
6786 }
6787 
6788 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6789 {
6790 	const unsigned int cp_coher_cntl =
6791 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6792 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6793 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6794 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6795 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6796 
6797 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6798 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6799 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6800 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6801 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6802 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6803 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6804 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6805 }
6806 
6807 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6808 					uint32_t pipe, bool enable)
6809 {
6810 	struct amdgpu_device *adev = ring->adev;
6811 	uint32_t val;
6812 	uint32_t wcl_cs_reg;
6813 
6814 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6815 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6816 
6817 	switch (pipe) {
6818 	case 0:
6819 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6820 		break;
6821 	case 1:
6822 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6823 		break;
6824 	case 2:
6825 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6826 		break;
6827 	case 3:
6828 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6829 		break;
6830 	default:
6831 		DRM_DEBUG("invalid pipe %d\n", pipe);
6832 		return;
6833 	}
6834 
6835 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6836 
6837 }
6838 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6839 {
6840 	struct amdgpu_device *adev = ring->adev;
6841 	uint32_t val;
6842 	int i;
6843 
6844 
6845 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6846 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6847 	 * around 25% of gpu resources.
6848 	 */
6849 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6850 	amdgpu_ring_emit_wreg(ring,
6851 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6852 			      val);
6853 
6854 	/* Restrict waves for normal/low priority compute queues as well
6855 	 * to get best QoS for high priority compute jobs.
6856 	 *
6857 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6858 	 */
6859 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6860 		if (i != ring->pipe)
6861 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6862 
6863 	}
6864 }
6865 
6866 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6867 	.name = "gfx_v9_0",
6868 	.early_init = gfx_v9_0_early_init,
6869 	.late_init = gfx_v9_0_late_init,
6870 	.sw_init = gfx_v9_0_sw_init,
6871 	.sw_fini = gfx_v9_0_sw_fini,
6872 	.hw_init = gfx_v9_0_hw_init,
6873 	.hw_fini = gfx_v9_0_hw_fini,
6874 	.suspend = gfx_v9_0_suspend,
6875 	.resume = gfx_v9_0_resume,
6876 	.is_idle = gfx_v9_0_is_idle,
6877 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6878 	.soft_reset = gfx_v9_0_soft_reset,
6879 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6880 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6881 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6882 };
6883 
6884 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6885 	.type = AMDGPU_RING_TYPE_GFX,
6886 	.align_mask = 0xff,
6887 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6888 	.support_64bit_ptrs = true,
6889 	.vmhub = AMDGPU_GFXHUB_0,
6890 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6891 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6892 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6893 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6894 		5 +  /* COND_EXEC */
6895 		7 +  /* PIPELINE_SYNC */
6896 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6897 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6898 		2 + /* VM_FLUSH */
6899 		8 +  /* FENCE for VM_FLUSH */
6900 		20 + /* GDS switch */
6901 		4 + /* double SWITCH_BUFFER,
6902 		       the first COND_EXEC jump to the place just
6903 			   prior to this double SWITCH_BUFFER  */
6904 		5 + /* COND_EXEC */
6905 		7 +	 /*	HDP_flush */
6906 		4 +	 /*	VGT_flush */
6907 		14 + /*	CE_META */
6908 		31 + /*	DE_META */
6909 		3 + /* CNTX_CTRL */
6910 		5 + /* HDP_INVL */
6911 		8 + 8 + /* FENCE x2 */
6912 		2 + /* SWITCH_BUFFER */
6913 		7, /* gfx_v9_0_emit_mem_sync */
6914 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6915 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6916 	.emit_fence = gfx_v9_0_ring_emit_fence,
6917 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6918 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6919 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6920 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6921 	.test_ring = gfx_v9_0_ring_test_ring,
6922 	.test_ib = gfx_v9_0_ring_test_ib,
6923 	.insert_nop = amdgpu_ring_insert_nop,
6924 	.pad_ib = amdgpu_ring_generic_pad_ib,
6925 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6926 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6927 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6928 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6929 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6930 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6931 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6932 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6933 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6934 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6935 };
6936 
6937 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6938 	.type = AMDGPU_RING_TYPE_COMPUTE,
6939 	.align_mask = 0xff,
6940 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6941 	.support_64bit_ptrs = true,
6942 	.vmhub = AMDGPU_GFXHUB_0,
6943 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6944 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6945 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6946 	.emit_frame_size =
6947 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6948 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6949 		5 + /* hdp invalidate */
6950 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6951 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6952 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6953 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6954 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6955 		7 + /* gfx_v9_0_emit_mem_sync */
6956 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6957 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6958 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6959 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6960 	.emit_fence = gfx_v9_0_ring_emit_fence,
6961 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6962 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6963 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6964 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6965 	.test_ring = gfx_v9_0_ring_test_ring,
6966 	.test_ib = gfx_v9_0_ring_test_ib,
6967 	.insert_nop = amdgpu_ring_insert_nop,
6968 	.pad_ib = amdgpu_ring_generic_pad_ib,
6969 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6970 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6971 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6972 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6973 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6974 };
6975 
6976 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6977 	.type = AMDGPU_RING_TYPE_KIQ,
6978 	.align_mask = 0xff,
6979 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6980 	.support_64bit_ptrs = true,
6981 	.vmhub = AMDGPU_GFXHUB_0,
6982 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6983 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6984 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6985 	.emit_frame_size =
6986 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6987 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6988 		5 + /* hdp invalidate */
6989 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6990 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6991 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6992 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6993 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6994 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6995 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6996 	.test_ring = gfx_v9_0_ring_test_ring,
6997 	.insert_nop = amdgpu_ring_insert_nop,
6998 	.pad_ib = amdgpu_ring_generic_pad_ib,
6999 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7000 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7001 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7002 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7003 };
7004 
7005 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7006 {
7007 	int i;
7008 
7009 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7010 
7011 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7012 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7013 
7014 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7015 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7016 }
7017 
7018 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7019 	.set = gfx_v9_0_set_eop_interrupt_state,
7020 	.process = gfx_v9_0_eop_irq,
7021 };
7022 
7023 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7024 	.set = gfx_v9_0_set_priv_reg_fault_state,
7025 	.process = gfx_v9_0_priv_reg_irq,
7026 };
7027 
7028 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7029 	.set = gfx_v9_0_set_priv_inst_fault_state,
7030 	.process = gfx_v9_0_priv_inst_irq,
7031 };
7032 
7033 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7034 	.set = gfx_v9_0_set_cp_ecc_error_state,
7035 	.process = amdgpu_gfx_cp_ecc_error_irq,
7036 };
7037 
7038 
7039 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7040 {
7041 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7042 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7043 
7044 	adev->gfx.priv_reg_irq.num_types = 1;
7045 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7046 
7047 	adev->gfx.priv_inst_irq.num_types = 1;
7048 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7049 
7050 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7051 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7052 }
7053 
7054 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7055 {
7056 	switch (adev->asic_type) {
7057 	case CHIP_VEGA10:
7058 	case CHIP_VEGA12:
7059 	case CHIP_VEGA20:
7060 	case CHIP_RAVEN:
7061 	case CHIP_ARCTURUS:
7062 	case CHIP_RENOIR:
7063 	case CHIP_ALDEBARAN:
7064 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7065 		break;
7066 	default:
7067 		break;
7068 	}
7069 }
7070 
7071 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7072 {
7073 	/* init asci gds info */
7074 	switch (adev->asic_type) {
7075 	case CHIP_VEGA10:
7076 	case CHIP_VEGA12:
7077 	case CHIP_VEGA20:
7078 		adev->gds.gds_size = 0x10000;
7079 		break;
7080 	case CHIP_RAVEN:
7081 	case CHIP_ARCTURUS:
7082 		adev->gds.gds_size = 0x1000;
7083 		break;
7084 	case CHIP_ALDEBARAN:
7085 		/* aldebaran removed all the GDS internal memory,
7086 		 * only support GWS opcode in kernel, like barrier
7087 		 * semaphore.etc */
7088 		adev->gds.gds_size = 0;
7089 		break;
7090 	default:
7091 		adev->gds.gds_size = 0x10000;
7092 		break;
7093 	}
7094 
7095 	switch (adev->asic_type) {
7096 	case CHIP_VEGA10:
7097 	case CHIP_VEGA20:
7098 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7099 		break;
7100 	case CHIP_VEGA12:
7101 		adev->gds.gds_compute_max_wave_id = 0x27f;
7102 		break;
7103 	case CHIP_RAVEN:
7104 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7105 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7106 		else
7107 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7108 		break;
7109 	case CHIP_ARCTURUS:
7110 		adev->gds.gds_compute_max_wave_id = 0xfff;
7111 		break;
7112 	case CHIP_ALDEBARAN:
7113 		/* deprecated for Aldebaran, no usage at all */
7114 		adev->gds.gds_compute_max_wave_id = 0;
7115 		break;
7116 	default:
7117 		/* this really depends on the chip */
7118 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7119 		break;
7120 	}
7121 
7122 	adev->gds.gws_size = 64;
7123 	adev->gds.oa_size = 16;
7124 }
7125 
7126 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7127 						 u32 bitmap)
7128 {
7129 	u32 data;
7130 
7131 	if (!bitmap)
7132 		return;
7133 
7134 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7135 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7136 
7137 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7138 }
7139 
7140 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7141 {
7142 	u32 data, mask;
7143 
7144 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7145 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7146 
7147 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7148 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7149 
7150 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7151 
7152 	return (~data) & mask;
7153 }
7154 
7155 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7156 				 struct amdgpu_cu_info *cu_info)
7157 {
7158 	int i, j, k, counter, active_cu_number = 0;
7159 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7160 	unsigned disable_masks[4 * 4];
7161 
7162 	if (!adev || !cu_info)
7163 		return -EINVAL;
7164 
7165 	/*
7166 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7167 	 */
7168 	if (adev->gfx.config.max_shader_engines *
7169 		adev->gfx.config.max_sh_per_se > 16)
7170 		return -EINVAL;
7171 
7172 	amdgpu_gfx_parse_disable_cu(disable_masks,
7173 				    adev->gfx.config.max_shader_engines,
7174 				    adev->gfx.config.max_sh_per_se);
7175 
7176 	mutex_lock(&adev->grbm_idx_mutex);
7177 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7178 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7179 			mask = 1;
7180 			ao_bitmap = 0;
7181 			counter = 0;
7182 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7183 			gfx_v9_0_set_user_cu_inactive_bitmap(
7184 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7185 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7186 
7187 			/*
7188 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7189 			 * 4x4 size array, and it's usually suitable for Vega
7190 			 * ASICs which has 4*2 SE/SH layout.
7191 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7192 			 * To mostly reduce the impact, we make it compatible
7193 			 * with current bitmap array as below:
7194 			 *    SE4,SH0 --> bitmap[0][1]
7195 			 *    SE5,SH0 --> bitmap[1][1]
7196 			 *    SE6,SH0 --> bitmap[2][1]
7197 			 *    SE7,SH0 --> bitmap[3][1]
7198 			 */
7199 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7200 
7201 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7202 				if (bitmap & mask) {
7203 					if (counter < adev->gfx.config.max_cu_per_sh)
7204 						ao_bitmap |= mask;
7205 					counter ++;
7206 				}
7207 				mask <<= 1;
7208 			}
7209 			active_cu_number += counter;
7210 			if (i < 2 && j < 2)
7211 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7212 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7213 		}
7214 	}
7215 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7216 	mutex_unlock(&adev->grbm_idx_mutex);
7217 
7218 	cu_info->number = active_cu_number;
7219 	cu_info->ao_cu_mask = ao_cu_mask;
7220 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7221 
7222 	return 0;
7223 }
7224 
7225 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7226 {
7227 	.type = AMD_IP_BLOCK_TYPE_GFX,
7228 	.major = 9,
7229 	.minor = 0,
7230 	.rev = 0,
7231 	.funcs = &gfx_v9_0_ip_funcs,
7232 };
7233