xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/radeon/radeon_cik.c (revision fb5eed702691094bd687fbf1ded189c87457cd35)
1 /*	$NetBSD: radeon_cik.c,v 1.2 2020/02/14 04:35:20 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2012 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Alex Deucher
25  */
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: radeon_cik.c,v 1.2 2020/02/14 04:35:20 riastradh Exp $");
28 
29 #include <linux/firmware.h>
30 #include <linux/slab.h>
31 #include <linux/module.h>
32 #include "drmP.h"
33 #include "radeon.h"
34 #include "radeon_asic.h"
35 #include "radeon_audio.h"
36 #include "cikd.h"
37 #include "atom.h"
38 #include "cik_blit_shaders.h"
39 #include "radeon_ucode.h"
40 #include "clearstate_ci.h"
41 #include "radeon_kfd.h"
42 
43 #include <linux/nbsd-namespace.h>
44 
45 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
56 MODULE_FIRMWARE("radeon/bonaire_me.bin");
57 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
58 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
59 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
60 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
61 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
62 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
75 MODULE_FIRMWARE("radeon/hawaii_me.bin");
76 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
77 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
78 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
80 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
81 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
87 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
88 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
89 
90 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
91 MODULE_FIRMWARE("radeon/kaveri_me.bin");
92 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
93 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
94 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
95 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
96 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
99 MODULE_FIRMWARE("radeon/KABINI_me.bin");
100 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
101 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
102 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
103 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
106 MODULE_FIRMWARE("radeon/kabini_me.bin");
107 MODULE_FIRMWARE("radeon/kabini_ce.bin");
108 MODULE_FIRMWARE("radeon/kabini_mec.bin");
109 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
110 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
116 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
117 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
118 
119 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
120 MODULE_FIRMWARE("radeon/mullins_me.bin");
121 MODULE_FIRMWARE("radeon/mullins_ce.bin");
122 MODULE_FIRMWARE("radeon/mullins_mec.bin");
123 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
124 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
125 
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
131 extern void sumo_rlc_fini(struct radeon_device *rdev);
132 extern int sumo_rlc_init(struct radeon_device *rdev);
133 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
134 extern void si_rlc_reset(struct radeon_device *rdev);
135 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
136 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
137 extern int cik_sdma_resume(struct radeon_device *rdev);
138 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
139 extern void cik_sdma_fini(struct radeon_device *rdev);
140 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
141 static void cik_rlc_stop(struct radeon_device *rdev);
142 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
143 static void cik_program_aspm(struct radeon_device *rdev);
144 static void cik_init_pg(struct radeon_device *rdev);
145 static void cik_init_cg(struct radeon_device *rdev);
146 static void cik_fini_pg(struct radeon_device *rdev);
147 static void cik_fini_cg(struct radeon_device *rdev);
148 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
149 					  bool enable);
150 
151 /**
152  * cik_get_allowed_info_register - fetch the register for the info ioctl
153  *
154  * @rdev: radeon_device pointer
155  * @reg: register offset in bytes
156  * @val: register value
157  *
158  * Returns 0 for success or -EINVAL for an invalid register
159  *
160  */
161 int cik_get_allowed_info_register(struct radeon_device *rdev,
162 				  u32 reg, u32 *val)
163 {
164 	switch (reg) {
165 	case GRBM_STATUS:
166 	case GRBM_STATUS2:
167 	case GRBM_STATUS_SE0:
168 	case GRBM_STATUS_SE1:
169 	case GRBM_STATUS_SE2:
170 	case GRBM_STATUS_SE3:
171 	case SRBM_STATUS:
172 	case SRBM_STATUS2:
173 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
174 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
175 	case UVD_STATUS:
176 	/* TODO VCE */
177 		*val = RREG32(reg);
178 		return 0;
179 	default:
180 		return -EINVAL;
181 	}
182 }
183 
184 /*
185  * Indirect registers accessor
186  */
187 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
188 {
189 	unsigned long flags;
190 	u32 r;
191 
192 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
193 	WREG32(CIK_DIDT_IND_INDEX, (reg));
194 	r = RREG32(CIK_DIDT_IND_DATA);
195 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
196 	return r;
197 }
198 
199 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
200 {
201 	unsigned long flags;
202 
203 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
204 	WREG32(CIK_DIDT_IND_INDEX, (reg));
205 	WREG32(CIK_DIDT_IND_DATA, (v));
206 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
207 }
208 
209 /* get temperature in millidegrees */
210 int ci_get_temp(struct radeon_device *rdev)
211 {
212 	u32 temp;
213 	int actual_temp = 0;
214 
215 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
216 		CTF_TEMP_SHIFT;
217 
218 	if (temp & 0x200)
219 		actual_temp = 255;
220 	else
221 		actual_temp = temp & 0x1ff;
222 
223 	actual_temp = actual_temp * 1000;
224 
225 	return actual_temp;
226 }
227 
228 /* get temperature in millidegrees */
229 int kv_get_temp(struct radeon_device *rdev)
230 {
231 	u32 temp;
232 	int actual_temp = 0;
233 
234 	temp = RREG32_SMC(0xC0300E0C);
235 
236 	if (temp)
237 		actual_temp = (temp / 8) - 49;
238 	else
239 		actual_temp = 0;
240 
241 	actual_temp = actual_temp * 1000;
242 
243 	return actual_temp;
244 }
245 
246 /*
247  * Indirect registers accessor
248  */
249 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
250 {
251 	unsigned long flags;
252 	u32 r;
253 
254 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255 	WREG32(PCIE_INDEX, reg);
256 	(void)RREG32(PCIE_INDEX);
257 	r = RREG32(PCIE_DATA);
258 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
259 	return r;
260 }
261 
262 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
263 {
264 	unsigned long flags;
265 
266 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
267 	WREG32(PCIE_INDEX, reg);
268 	(void)RREG32(PCIE_INDEX);
269 	WREG32(PCIE_DATA, v);
270 	(void)RREG32(PCIE_DATA);
271 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
272 }
273 
274 static const u32 spectre_rlc_save_restore_register_list[] =
275 {
276 	(0x0e00 << 16) | (0xc12c >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc140 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc150 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc15c >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc168 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc170 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc178 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc204 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc2b4 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc2b8 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc2bc >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc2c0 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0x8228 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0x829c >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0x869c >> 2),
305 	0x00000000,
306 	(0x0600 << 16) | (0x98f4 >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0x98f8 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x9900 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0xc260 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x90e8 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x3c000 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x3c00c >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x8c1c >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0x9700 >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0xcd20 >> 2),
325 	0x00000000,
326 	(0x4e00 << 16) | (0xcd20 >> 2),
327 	0x00000000,
328 	(0x5e00 << 16) | (0xcd20 >> 2),
329 	0x00000000,
330 	(0x6e00 << 16) | (0xcd20 >> 2),
331 	0x00000000,
332 	(0x7e00 << 16) | (0xcd20 >> 2),
333 	0x00000000,
334 	(0x8e00 << 16) | (0xcd20 >> 2),
335 	0x00000000,
336 	(0x9e00 << 16) | (0xcd20 >> 2),
337 	0x00000000,
338 	(0xae00 << 16) | (0xcd20 >> 2),
339 	0x00000000,
340 	(0xbe00 << 16) | (0xcd20 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0x89bc >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0x8900 >> 2),
345 	0x00000000,
346 	0x3,
347 	(0x0e00 << 16) | (0xc130 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc134 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc1fc >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc208 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc264 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc268 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc26c >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc270 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc274 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc278 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc27c >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc280 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0xc284 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0xc288 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc28c >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0xc290 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0xc294 >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0xc298 >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0xc29c >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0xc2a0 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0xc2a4 >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0xc2a8 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0xc2ac  >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0xc2b0 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0x301d0 >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0x30238 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x30250 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30254 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x30258 >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0x3025c >> 2),
406 	0x00000000,
407 	(0x4e00 << 16) | (0xc900 >> 2),
408 	0x00000000,
409 	(0x5e00 << 16) | (0xc900 >> 2),
410 	0x00000000,
411 	(0x6e00 << 16) | (0xc900 >> 2),
412 	0x00000000,
413 	(0x7e00 << 16) | (0xc900 >> 2),
414 	0x00000000,
415 	(0x8e00 << 16) | (0xc900 >> 2),
416 	0x00000000,
417 	(0x9e00 << 16) | (0xc900 >> 2),
418 	0x00000000,
419 	(0xae00 << 16) | (0xc900 >> 2),
420 	0x00000000,
421 	(0xbe00 << 16) | (0xc900 >> 2),
422 	0x00000000,
423 	(0x4e00 << 16) | (0xc904 >> 2),
424 	0x00000000,
425 	(0x5e00 << 16) | (0xc904 >> 2),
426 	0x00000000,
427 	(0x6e00 << 16) | (0xc904 >> 2),
428 	0x00000000,
429 	(0x7e00 << 16) | (0xc904 >> 2),
430 	0x00000000,
431 	(0x8e00 << 16) | (0xc904 >> 2),
432 	0x00000000,
433 	(0x9e00 << 16) | (0xc904 >> 2),
434 	0x00000000,
435 	(0xae00 << 16) | (0xc904 >> 2),
436 	0x00000000,
437 	(0xbe00 << 16) | (0xc904 >> 2),
438 	0x00000000,
439 	(0x4e00 << 16) | (0xc908 >> 2),
440 	0x00000000,
441 	(0x5e00 << 16) | (0xc908 >> 2),
442 	0x00000000,
443 	(0x6e00 << 16) | (0xc908 >> 2),
444 	0x00000000,
445 	(0x7e00 << 16) | (0xc908 >> 2),
446 	0x00000000,
447 	(0x8e00 << 16) | (0xc908 >> 2),
448 	0x00000000,
449 	(0x9e00 << 16) | (0xc908 >> 2),
450 	0x00000000,
451 	(0xae00 << 16) | (0xc908 >> 2),
452 	0x00000000,
453 	(0xbe00 << 16) | (0xc908 >> 2),
454 	0x00000000,
455 	(0x4e00 << 16) | (0xc90c >> 2),
456 	0x00000000,
457 	(0x5e00 << 16) | (0xc90c >> 2),
458 	0x00000000,
459 	(0x6e00 << 16) | (0xc90c >> 2),
460 	0x00000000,
461 	(0x7e00 << 16) | (0xc90c >> 2),
462 	0x00000000,
463 	(0x8e00 << 16) | (0xc90c >> 2),
464 	0x00000000,
465 	(0x9e00 << 16) | (0xc90c >> 2),
466 	0x00000000,
467 	(0xae00 << 16) | (0xc90c >> 2),
468 	0x00000000,
469 	(0xbe00 << 16) | (0xc90c >> 2),
470 	0x00000000,
471 	(0x4e00 << 16) | (0xc910 >> 2),
472 	0x00000000,
473 	(0x5e00 << 16) | (0xc910 >> 2),
474 	0x00000000,
475 	(0x6e00 << 16) | (0xc910 >> 2),
476 	0x00000000,
477 	(0x7e00 << 16) | (0xc910 >> 2),
478 	0x00000000,
479 	(0x8e00 << 16) | (0xc910 >> 2),
480 	0x00000000,
481 	(0x9e00 << 16) | (0xc910 >> 2),
482 	0x00000000,
483 	(0xae00 << 16) | (0xc910 >> 2),
484 	0x00000000,
485 	(0xbe00 << 16) | (0xc910 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0xc99c >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x9834 >> 2),
490 	0x00000000,
491 	(0x0000 << 16) | (0x30f00 >> 2),
492 	0x00000000,
493 	(0x0001 << 16) | (0x30f00 >> 2),
494 	0x00000000,
495 	(0x0000 << 16) | (0x30f04 >> 2),
496 	0x00000000,
497 	(0x0001 << 16) | (0x30f04 >> 2),
498 	0x00000000,
499 	(0x0000 << 16) | (0x30f08 >> 2),
500 	0x00000000,
501 	(0x0001 << 16) | (0x30f08 >> 2),
502 	0x00000000,
503 	(0x0000 << 16) | (0x30f0c >> 2),
504 	0x00000000,
505 	(0x0001 << 16) | (0x30f0c >> 2),
506 	0x00000000,
507 	(0x0600 << 16) | (0x9b7c >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x8a14 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0x8a18 >> 2),
512 	0x00000000,
513 	(0x0600 << 16) | (0x30a00 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x8bf0 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x8bcc >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x8b24 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x30a04 >> 2),
522 	0x00000000,
523 	(0x0600 << 16) | (0x30a10 >> 2),
524 	0x00000000,
525 	(0x0600 << 16) | (0x30a14 >> 2),
526 	0x00000000,
527 	(0x0600 << 16) | (0x30a18 >> 2),
528 	0x00000000,
529 	(0x0600 << 16) | (0x30a2c >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0xc700 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0xc704 >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0xc708 >> 2),
536 	0x00000000,
537 	(0x0e00 << 16) | (0xc768 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc770 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc774 >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc778 >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc77c >> 2),
546 	0x00000000,
547 	(0x0400 << 16) | (0xc780 >> 2),
548 	0x00000000,
549 	(0x0400 << 16) | (0xc784 >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0xc788 >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0xc78c >> 2),
554 	0x00000000,
555 	(0x0400 << 16) | (0xc798 >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0xc79c >> 2),
558 	0x00000000,
559 	(0x0400 << 16) | (0xc7a0 >> 2),
560 	0x00000000,
561 	(0x0400 << 16) | (0xc7a4 >> 2),
562 	0x00000000,
563 	(0x0400 << 16) | (0xc7a8 >> 2),
564 	0x00000000,
565 	(0x0400 << 16) | (0xc7ac >> 2),
566 	0x00000000,
567 	(0x0400 << 16) | (0xc7b0 >> 2),
568 	0x00000000,
569 	(0x0400 << 16) | (0xc7b4 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x9100 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x3c010 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x92a8 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x92ac >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x92b4 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x92b8 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x92bc >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x92c0 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x92c4 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x92c8 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x92cc >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x92d0 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x8c00 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x8c04 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x8c20 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0x8c38 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0x8c3c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xae00 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x9604 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac08 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac0c >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac10 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xac14 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xac58 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xac68 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xac6c >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xac70 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xac74 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0xac78 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0xac7c >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xac80 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xac84 >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xac88 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0xac8c >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0x970c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x9714 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x9718 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x971c >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x31068 >> 2),
648 	0x00000000,
649 	(0x4e00 << 16) | (0x31068 >> 2),
650 	0x00000000,
651 	(0x5e00 << 16) | (0x31068 >> 2),
652 	0x00000000,
653 	(0x6e00 << 16) | (0x31068 >> 2),
654 	0x00000000,
655 	(0x7e00 << 16) | (0x31068 >> 2),
656 	0x00000000,
657 	(0x8e00 << 16) | (0x31068 >> 2),
658 	0x00000000,
659 	(0x9e00 << 16) | (0x31068 >> 2),
660 	0x00000000,
661 	(0xae00 << 16) | (0x31068 >> 2),
662 	0x00000000,
663 	(0xbe00 << 16) | (0x31068 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xcd10 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xcd14 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x88b0 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x88b4 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x88b8 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x88bc >> 2),
676 	0x00000000,
677 	(0x0400 << 16) | (0x89c0 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x88c4 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x88c8 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x88d0 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x88d4 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x88d8 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x8980 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x30938 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x3093c >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x30940 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x89a0 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30900 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x30904 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x89b4 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x3c210 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3c214 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x3c218 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0x8904 >> 2),
712 	0x00000000,
713 	0x5,
714 	(0x0e00 << 16) | (0x8c28 >> 2),
715 	(0x0e00 << 16) | (0x8c2c >> 2),
716 	(0x0e00 << 16) | (0x8c30 >> 2),
717 	(0x0e00 << 16) | (0x8c34 >> 2),
718 	(0x0e00 << 16) | (0x9600 >> 2),
719 };
720 
721 static const u32 kalindi_rlc_save_restore_register_list[] =
722 {
723 	(0x0e00 << 16) | (0xc12c >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc140 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc150 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc15c >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc168 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc170 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc204 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc2b4 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc2b8 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc2bc >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2c0 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0x8228 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x829c >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x869c >> 2),
750 	0x00000000,
751 	(0x0600 << 16) | (0x98f4 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x98f8 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x9900 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0xc260 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x90e8 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x3c000 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x3c00c >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8c1c >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0x9700 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0xcd20 >> 2),
770 	0x00000000,
771 	(0x4e00 << 16) | (0xcd20 >> 2),
772 	0x00000000,
773 	(0x5e00 << 16) | (0xcd20 >> 2),
774 	0x00000000,
775 	(0x6e00 << 16) | (0xcd20 >> 2),
776 	0x00000000,
777 	(0x7e00 << 16) | (0xcd20 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x89bc >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0x8900 >> 2),
782 	0x00000000,
783 	0x3,
784 	(0x0e00 << 16) | (0xc130 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc134 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc1fc >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc208 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc264 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc268 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc26c >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc270 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc274 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0xc28c >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc290 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0xc294 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0xc298 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xc2a0 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xc2a4 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xc2a8 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xc2ac >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0x301d0 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0x30238 >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0x30250 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0x30254 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x30258 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0x3025c >> 2),
829 	0x00000000,
830 	(0x4e00 << 16) | (0xc900 >> 2),
831 	0x00000000,
832 	(0x5e00 << 16) | (0xc900 >> 2),
833 	0x00000000,
834 	(0x6e00 << 16) | (0xc900 >> 2),
835 	0x00000000,
836 	(0x7e00 << 16) | (0xc900 >> 2),
837 	0x00000000,
838 	(0x4e00 << 16) | (0xc904 >> 2),
839 	0x00000000,
840 	(0x5e00 << 16) | (0xc904 >> 2),
841 	0x00000000,
842 	(0x6e00 << 16) | (0xc904 >> 2),
843 	0x00000000,
844 	(0x7e00 << 16) | (0xc904 >> 2),
845 	0x00000000,
846 	(0x4e00 << 16) | (0xc908 >> 2),
847 	0x00000000,
848 	(0x5e00 << 16) | (0xc908 >> 2),
849 	0x00000000,
850 	(0x6e00 << 16) | (0xc908 >> 2),
851 	0x00000000,
852 	(0x7e00 << 16) | (0xc908 >> 2),
853 	0x00000000,
854 	(0x4e00 << 16) | (0xc90c >> 2),
855 	0x00000000,
856 	(0x5e00 << 16) | (0xc90c >> 2),
857 	0x00000000,
858 	(0x6e00 << 16) | (0xc90c >> 2),
859 	0x00000000,
860 	(0x7e00 << 16) | (0xc90c >> 2),
861 	0x00000000,
862 	(0x4e00 << 16) | (0xc910 >> 2),
863 	0x00000000,
864 	(0x5e00 << 16) | (0xc910 >> 2),
865 	0x00000000,
866 	(0x6e00 << 16) | (0xc910 >> 2),
867 	0x00000000,
868 	(0x7e00 << 16) | (0xc910 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xc99c >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0x9834 >> 2),
873 	0x00000000,
874 	(0x0000 << 16) | (0x30f00 >> 2),
875 	0x00000000,
876 	(0x0000 << 16) | (0x30f04 >> 2),
877 	0x00000000,
878 	(0x0000 << 16) | (0x30f08 >> 2),
879 	0x00000000,
880 	(0x0000 << 16) | (0x30f0c >> 2),
881 	0x00000000,
882 	(0x0600 << 16) | (0x9b7c >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0x8a14 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x8a18 >> 2),
887 	0x00000000,
888 	(0x0600 << 16) | (0x30a00 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x8bf0 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x8bcc >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x8b24 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x30a04 >> 2),
897 	0x00000000,
898 	(0x0600 << 16) | (0x30a10 >> 2),
899 	0x00000000,
900 	(0x0600 << 16) | (0x30a14 >> 2),
901 	0x00000000,
902 	(0x0600 << 16) | (0x30a18 >> 2),
903 	0x00000000,
904 	(0x0600 << 16) | (0x30a2c >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0xc700 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xc704 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xc708 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xc768 >> 2),
913 	0x00000000,
914 	(0x0400 << 16) | (0xc770 >> 2),
915 	0x00000000,
916 	(0x0400 << 16) | (0xc774 >> 2),
917 	0x00000000,
918 	(0x0400 << 16) | (0xc798 >> 2),
919 	0x00000000,
920 	(0x0400 << 16) | (0xc79c >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x9100 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0x3c010 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x8c00 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x8c04 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x8c20 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x8c38 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0x8c3c >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xae00 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x9604 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac08 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac0c >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac10 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0xac14 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0xac58 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0xac68 >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0xac6c >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0xac70 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xac74 >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xac78 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0xac7c >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xac80 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xac84 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0xac88 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0xac8c >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x970c >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x9714 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x9718 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x971c >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x31068 >> 2),
979 	0x00000000,
980 	(0x4e00 << 16) | (0x31068 >> 2),
981 	0x00000000,
982 	(0x5e00 << 16) | (0x31068 >> 2),
983 	0x00000000,
984 	(0x6e00 << 16) | (0x31068 >> 2),
985 	0x00000000,
986 	(0x7e00 << 16) | (0x31068 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0xcd10 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0xcd14 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x88b0 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x88b4 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x88b8 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x88bc >> 2),
999 	0x00000000,
1000 	(0x0400 << 16) | (0x89c0 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x88c4 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x88c8 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x88d0 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x88d4 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x88d8 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x8980 >> 2),
1013 	0x00000000,
1014 	(0x0e00 << 16) | (0x30938 >> 2),
1015 	0x00000000,
1016 	(0x0e00 << 16) | (0x3093c >> 2),
1017 	0x00000000,
1018 	(0x0e00 << 16) | (0x30940 >> 2),
1019 	0x00000000,
1020 	(0x0e00 << 16) | (0x89a0 >> 2),
1021 	0x00000000,
1022 	(0x0e00 << 16) | (0x30900 >> 2),
1023 	0x00000000,
1024 	(0x0e00 << 16) | (0x30904 >> 2),
1025 	0x00000000,
1026 	(0x0e00 << 16) | (0x89b4 >> 2),
1027 	0x00000000,
1028 	(0x0e00 << 16) | (0x3e1fc >> 2),
1029 	0x00000000,
1030 	(0x0e00 << 16) | (0x3c210 >> 2),
1031 	0x00000000,
1032 	(0x0e00 << 16) | (0x3c214 >> 2),
1033 	0x00000000,
1034 	(0x0e00 << 16) | (0x3c218 >> 2),
1035 	0x00000000,
1036 	(0x0e00 << 16) | (0x8904 >> 2),
1037 	0x00000000,
1038 	0x5,
1039 	(0x0e00 << 16) | (0x8c28 >> 2),
1040 	(0x0e00 << 16) | (0x8c2c >> 2),
1041 	(0x0e00 << 16) | (0x8c30 >> 2),
1042 	(0x0e00 << 16) | (0x8c34 >> 2),
1043 	(0x0e00 << 16) | (0x9600 >> 2),
1044 };
1045 
1046 static const u32 bonaire_golden_spm_registers[] =
1047 {
1048 	0x30800, 0xe0ffffff, 0xe0000000
1049 };
1050 
1051 static const u32 bonaire_golden_common_registers[] =
1052 {
1053 	0xc770, 0xffffffff, 0x00000800,
1054 	0xc774, 0xffffffff, 0x00000800,
1055 	0xc798, 0xffffffff, 0x00007fbf,
1056 	0xc79c, 0xffffffff, 0x00007faf
1057 };
1058 
1059 static const u32 bonaire_golden_registers[] =
1060 {
1061 	0x3354, 0x00000333, 0x00000333,
1062 	0x3350, 0x000c0fc0, 0x00040200,
1063 	0x9a10, 0x00010000, 0x00058208,
1064 	0x3c000, 0xffff1fff, 0x00140000,
1065 	0x3c200, 0xfdfc0fff, 0x00000100,
1066 	0x3c234, 0x40000000, 0x40000200,
1067 	0x9830, 0xffffffff, 0x00000000,
1068 	0x9834, 0xf00fffff, 0x00000400,
1069 	0x9838, 0x0002021c, 0x00020200,
1070 	0xc78, 0x00000080, 0x00000000,
1071 	0x5bb0, 0x000000f0, 0x00000070,
1072 	0x5bc0, 0xf0311fff, 0x80300000,
1073 	0x98f8, 0x73773777, 0x12010001,
1074 	0x350c, 0x00810000, 0x408af000,
1075 	0x7030, 0x31000111, 0x00000011,
1076 	0x2f48, 0x73773777, 0x12010001,
1077 	0x220c, 0x00007fb6, 0x0021a1b1,
1078 	0x2210, 0x00007fb6, 0x002021b1,
1079 	0x2180, 0x00007fb6, 0x00002191,
1080 	0x2218, 0x00007fb6, 0x002121b1,
1081 	0x221c, 0x00007fb6, 0x002021b1,
1082 	0x21dc, 0x00007fb6, 0x00002191,
1083 	0x21e0, 0x00007fb6, 0x00002191,
1084 	0x3628, 0x0000003f, 0x0000000a,
1085 	0x362c, 0x0000003f, 0x0000000a,
1086 	0x2ae4, 0x00073ffe, 0x000022a2,
1087 	0x240c, 0x000007ff, 0x00000000,
1088 	0x8a14, 0xf000003f, 0x00000007,
1089 	0x8bf0, 0x00002001, 0x00000001,
1090 	0x8b24, 0xffffffff, 0x00ffffff,
1091 	0x30a04, 0x0000ff0f, 0x00000000,
1092 	0x28a4c, 0x07ffffff, 0x06000000,
1093 	0x4d8, 0x00000fff, 0x00000100,
1094 	0x3e78, 0x00000001, 0x00000002,
1095 	0x9100, 0x03000000, 0x0362c688,
1096 	0x8c00, 0x000000ff, 0x00000001,
1097 	0xe40, 0x00001fff, 0x00001fff,
1098 	0x9060, 0x0000007f, 0x00000020,
1099 	0x9508, 0x00010000, 0x00010000,
1100 	0xac14, 0x000003ff, 0x000000f3,
1101 	0xac0c, 0xffffffff, 0x00001032
1102 };
1103 
1104 static const u32 bonaire_mgcg_cgcg_init[] =
1105 {
1106 	0xc420, 0xffffffff, 0xfffffffc,
1107 	0x30800, 0xffffffff, 0xe0000000,
1108 	0x3c2a0, 0xffffffff, 0x00000100,
1109 	0x3c208, 0xffffffff, 0x00000100,
1110 	0x3c2c0, 0xffffffff, 0xc0000100,
1111 	0x3c2c8, 0xffffffff, 0xc0000100,
1112 	0x3c2c4, 0xffffffff, 0xc0000100,
1113 	0x55e4, 0xffffffff, 0x00600100,
1114 	0x3c280, 0xffffffff, 0x00000100,
1115 	0x3c214, 0xffffffff, 0x06000100,
1116 	0x3c220, 0xffffffff, 0x00000100,
1117 	0x3c218, 0xffffffff, 0x06000100,
1118 	0x3c204, 0xffffffff, 0x00000100,
1119 	0x3c2e0, 0xffffffff, 0x00000100,
1120 	0x3c224, 0xffffffff, 0x00000100,
1121 	0x3c200, 0xffffffff, 0x00000100,
1122 	0x3c230, 0xffffffff, 0x00000100,
1123 	0x3c234, 0xffffffff, 0x00000100,
1124 	0x3c250, 0xffffffff, 0x00000100,
1125 	0x3c254, 0xffffffff, 0x00000100,
1126 	0x3c258, 0xffffffff, 0x00000100,
1127 	0x3c25c, 0xffffffff, 0x00000100,
1128 	0x3c260, 0xffffffff, 0x00000100,
1129 	0x3c27c, 0xffffffff, 0x00000100,
1130 	0x3c278, 0xffffffff, 0x00000100,
1131 	0x3c210, 0xffffffff, 0x06000100,
1132 	0x3c290, 0xffffffff, 0x00000100,
1133 	0x3c274, 0xffffffff, 0x00000100,
1134 	0x3c2b4, 0xffffffff, 0x00000100,
1135 	0x3c2b0, 0xffffffff, 0x00000100,
1136 	0x3c270, 0xffffffff, 0x00000100,
1137 	0x30800, 0xffffffff, 0xe0000000,
1138 	0x3c020, 0xffffffff, 0x00010000,
1139 	0x3c024, 0xffffffff, 0x00030002,
1140 	0x3c028, 0xffffffff, 0x00040007,
1141 	0x3c02c, 0xffffffff, 0x00060005,
1142 	0x3c030, 0xffffffff, 0x00090008,
1143 	0x3c034, 0xffffffff, 0x00010000,
1144 	0x3c038, 0xffffffff, 0x00030002,
1145 	0x3c03c, 0xffffffff, 0x00040007,
1146 	0x3c040, 0xffffffff, 0x00060005,
1147 	0x3c044, 0xffffffff, 0x00090008,
1148 	0x3c048, 0xffffffff, 0x00010000,
1149 	0x3c04c, 0xffffffff, 0x00030002,
1150 	0x3c050, 0xffffffff, 0x00040007,
1151 	0x3c054, 0xffffffff, 0x00060005,
1152 	0x3c058, 0xffffffff, 0x00090008,
1153 	0x3c05c, 0xffffffff, 0x00010000,
1154 	0x3c060, 0xffffffff, 0x00030002,
1155 	0x3c064, 0xffffffff, 0x00040007,
1156 	0x3c068, 0xffffffff, 0x00060005,
1157 	0x3c06c, 0xffffffff, 0x00090008,
1158 	0x3c070, 0xffffffff, 0x00010000,
1159 	0x3c074, 0xffffffff, 0x00030002,
1160 	0x3c078, 0xffffffff, 0x00040007,
1161 	0x3c07c, 0xffffffff, 0x00060005,
1162 	0x3c080, 0xffffffff, 0x00090008,
1163 	0x3c084, 0xffffffff, 0x00010000,
1164 	0x3c088, 0xffffffff, 0x00030002,
1165 	0x3c08c, 0xffffffff, 0x00040007,
1166 	0x3c090, 0xffffffff, 0x00060005,
1167 	0x3c094, 0xffffffff, 0x00090008,
1168 	0x3c098, 0xffffffff, 0x00010000,
1169 	0x3c09c, 0xffffffff, 0x00030002,
1170 	0x3c0a0, 0xffffffff, 0x00040007,
1171 	0x3c0a4, 0xffffffff, 0x00060005,
1172 	0x3c0a8, 0xffffffff, 0x00090008,
1173 	0x3c000, 0xffffffff, 0x96e00200,
1174 	0x8708, 0xffffffff, 0x00900100,
1175 	0xc424, 0xffffffff, 0x0020003f,
1176 	0x38, 0xffffffff, 0x0140001c,
1177 	0x3c, 0x000f0000, 0x000f0000,
1178 	0x220, 0xffffffff, 0xC060000C,
1179 	0x224, 0xc0000fff, 0x00000100,
1180 	0xf90, 0xffffffff, 0x00000100,
1181 	0xf98, 0x00000101, 0x00000000,
1182 	0x20a8, 0xffffffff, 0x00000104,
1183 	0x55e4, 0xff000fff, 0x00000100,
1184 	0x30cc, 0xc0000fff, 0x00000104,
1185 	0xc1e4, 0x00000001, 0x00000001,
1186 	0xd00c, 0xff000ff0, 0x00000100,
1187 	0xd80c, 0xff000ff0, 0x00000100
1188 };
1189 
1190 static const u32 spectre_golden_spm_registers[] =
1191 {
1192 	0x30800, 0xe0ffffff, 0xe0000000
1193 };
1194 
1195 static const u32 spectre_golden_common_registers[] =
1196 {
1197 	0xc770, 0xffffffff, 0x00000800,
1198 	0xc774, 0xffffffff, 0x00000800,
1199 	0xc798, 0xffffffff, 0x00007fbf,
1200 	0xc79c, 0xffffffff, 0x00007faf
1201 };
1202 
1203 static const u32 spectre_golden_registers[] =
1204 {
1205 	0x3c000, 0xffff1fff, 0x96940200,
1206 	0x3c00c, 0xffff0001, 0xff000000,
1207 	0x3c200, 0xfffc0fff, 0x00000100,
1208 	0x6ed8, 0x00010101, 0x00010000,
1209 	0x9834, 0xf00fffff, 0x00000400,
1210 	0x9838, 0xfffffffc, 0x00020200,
1211 	0x5bb0, 0x000000f0, 0x00000070,
1212 	0x5bc0, 0xf0311fff, 0x80300000,
1213 	0x98f8, 0x73773777, 0x12010001,
1214 	0x9b7c, 0x00ff0000, 0x00fc0000,
1215 	0x2f48, 0x73773777, 0x12010001,
1216 	0x8a14, 0xf000003f, 0x00000007,
1217 	0x8b24, 0xffffffff, 0x00ffffff,
1218 	0x28350, 0x3f3f3fff, 0x00000082,
1219 	0x28354, 0x0000003f, 0x00000000,
1220 	0x3e78, 0x00000001, 0x00000002,
1221 	0x913c, 0xffff03df, 0x00000004,
1222 	0xc768, 0x00000008, 0x00000008,
1223 	0x8c00, 0x000008ff, 0x00000800,
1224 	0x9508, 0x00010000, 0x00010000,
1225 	0xac0c, 0xffffffff, 0x54763210,
1226 	0x214f8, 0x01ff01ff, 0x00000002,
1227 	0x21498, 0x007ff800, 0x00200000,
1228 	0x2015c, 0xffffffff, 0x00000f40,
1229 	0x30934, 0xffffffff, 0x00000001
1230 };
1231 
1232 static const u32 spectre_mgcg_cgcg_init[] =
1233 {
1234 	0xc420, 0xffffffff, 0xfffffffc,
1235 	0x30800, 0xffffffff, 0xe0000000,
1236 	0x3c2a0, 0xffffffff, 0x00000100,
1237 	0x3c208, 0xffffffff, 0x00000100,
1238 	0x3c2c0, 0xffffffff, 0x00000100,
1239 	0x3c2c8, 0xffffffff, 0x00000100,
1240 	0x3c2c4, 0xffffffff, 0x00000100,
1241 	0x55e4, 0xffffffff, 0x00600100,
1242 	0x3c280, 0xffffffff, 0x00000100,
1243 	0x3c214, 0xffffffff, 0x06000100,
1244 	0x3c220, 0xffffffff, 0x00000100,
1245 	0x3c218, 0xffffffff, 0x06000100,
1246 	0x3c204, 0xffffffff, 0x00000100,
1247 	0x3c2e0, 0xffffffff, 0x00000100,
1248 	0x3c224, 0xffffffff, 0x00000100,
1249 	0x3c200, 0xffffffff, 0x00000100,
1250 	0x3c230, 0xffffffff, 0x00000100,
1251 	0x3c234, 0xffffffff, 0x00000100,
1252 	0x3c250, 0xffffffff, 0x00000100,
1253 	0x3c254, 0xffffffff, 0x00000100,
1254 	0x3c258, 0xffffffff, 0x00000100,
1255 	0x3c25c, 0xffffffff, 0x00000100,
1256 	0x3c260, 0xffffffff, 0x00000100,
1257 	0x3c27c, 0xffffffff, 0x00000100,
1258 	0x3c278, 0xffffffff, 0x00000100,
1259 	0x3c210, 0xffffffff, 0x06000100,
1260 	0x3c290, 0xffffffff, 0x00000100,
1261 	0x3c274, 0xffffffff, 0x00000100,
1262 	0x3c2b4, 0xffffffff, 0x00000100,
1263 	0x3c2b0, 0xffffffff, 0x00000100,
1264 	0x3c270, 0xffffffff, 0x00000100,
1265 	0x30800, 0xffffffff, 0xe0000000,
1266 	0x3c020, 0xffffffff, 0x00010000,
1267 	0x3c024, 0xffffffff, 0x00030002,
1268 	0x3c028, 0xffffffff, 0x00040007,
1269 	0x3c02c, 0xffffffff, 0x00060005,
1270 	0x3c030, 0xffffffff, 0x00090008,
1271 	0x3c034, 0xffffffff, 0x00010000,
1272 	0x3c038, 0xffffffff, 0x00030002,
1273 	0x3c03c, 0xffffffff, 0x00040007,
1274 	0x3c040, 0xffffffff, 0x00060005,
1275 	0x3c044, 0xffffffff, 0x00090008,
1276 	0x3c048, 0xffffffff, 0x00010000,
1277 	0x3c04c, 0xffffffff, 0x00030002,
1278 	0x3c050, 0xffffffff, 0x00040007,
1279 	0x3c054, 0xffffffff, 0x00060005,
1280 	0x3c058, 0xffffffff, 0x00090008,
1281 	0x3c05c, 0xffffffff, 0x00010000,
1282 	0x3c060, 0xffffffff, 0x00030002,
1283 	0x3c064, 0xffffffff, 0x00040007,
1284 	0x3c068, 0xffffffff, 0x00060005,
1285 	0x3c06c, 0xffffffff, 0x00090008,
1286 	0x3c070, 0xffffffff, 0x00010000,
1287 	0x3c074, 0xffffffff, 0x00030002,
1288 	0x3c078, 0xffffffff, 0x00040007,
1289 	0x3c07c, 0xffffffff, 0x00060005,
1290 	0x3c080, 0xffffffff, 0x00090008,
1291 	0x3c084, 0xffffffff, 0x00010000,
1292 	0x3c088, 0xffffffff, 0x00030002,
1293 	0x3c08c, 0xffffffff, 0x00040007,
1294 	0x3c090, 0xffffffff, 0x00060005,
1295 	0x3c094, 0xffffffff, 0x00090008,
1296 	0x3c098, 0xffffffff, 0x00010000,
1297 	0x3c09c, 0xffffffff, 0x00030002,
1298 	0x3c0a0, 0xffffffff, 0x00040007,
1299 	0x3c0a4, 0xffffffff, 0x00060005,
1300 	0x3c0a8, 0xffffffff, 0x00090008,
1301 	0x3c0ac, 0xffffffff, 0x00010000,
1302 	0x3c0b0, 0xffffffff, 0x00030002,
1303 	0x3c0b4, 0xffffffff, 0x00040007,
1304 	0x3c0b8, 0xffffffff, 0x00060005,
1305 	0x3c0bc, 0xffffffff, 0x00090008,
1306 	0x3c000, 0xffffffff, 0x96e00200,
1307 	0x8708, 0xffffffff, 0x00900100,
1308 	0xc424, 0xffffffff, 0x0020003f,
1309 	0x38, 0xffffffff, 0x0140001c,
1310 	0x3c, 0x000f0000, 0x000f0000,
1311 	0x220, 0xffffffff, 0xC060000C,
1312 	0x224, 0xc0000fff, 0x00000100,
1313 	0xf90, 0xffffffff, 0x00000100,
1314 	0xf98, 0x00000101, 0x00000000,
1315 	0x20a8, 0xffffffff, 0x00000104,
1316 	0x55e4, 0xff000fff, 0x00000100,
1317 	0x30cc, 0xc0000fff, 0x00000104,
1318 	0xc1e4, 0x00000001, 0x00000001,
1319 	0xd00c, 0xff000ff0, 0x00000100,
1320 	0xd80c, 0xff000ff0, 0x00000100
1321 };
1322 
1323 static const u32 kalindi_golden_spm_registers[] =
1324 {
1325 	0x30800, 0xe0ffffff, 0xe0000000
1326 };
1327 
1328 static const u32 kalindi_golden_common_registers[] =
1329 {
1330 	0xc770, 0xffffffff, 0x00000800,
1331 	0xc774, 0xffffffff, 0x00000800,
1332 	0xc798, 0xffffffff, 0x00007fbf,
1333 	0xc79c, 0xffffffff, 0x00007faf
1334 };
1335 
1336 static const u32 kalindi_golden_registers[] =
1337 {
1338 	0x3c000, 0xffffdfff, 0x6e944040,
1339 	0x55e4, 0xff607fff, 0xfc000100,
1340 	0x3c220, 0xff000fff, 0x00000100,
1341 	0x3c224, 0xff000fff, 0x00000100,
1342 	0x3c200, 0xfffc0fff, 0x00000100,
1343 	0x6ed8, 0x00010101, 0x00010000,
1344 	0x9830, 0xffffffff, 0x00000000,
1345 	0x9834, 0xf00fffff, 0x00000400,
1346 	0x5bb0, 0x000000f0, 0x00000070,
1347 	0x5bc0, 0xf0311fff, 0x80300000,
1348 	0x98f8, 0x73773777, 0x12010001,
1349 	0x98fc, 0xffffffff, 0x00000010,
1350 	0x9b7c, 0x00ff0000, 0x00fc0000,
1351 	0x8030, 0x00001f0f, 0x0000100a,
1352 	0x2f48, 0x73773777, 0x12010001,
1353 	0x2408, 0x000fffff, 0x000c007f,
1354 	0x8a14, 0xf000003f, 0x00000007,
1355 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1356 	0x30a04, 0x0000ff0f, 0x00000000,
1357 	0x28a4c, 0x07ffffff, 0x06000000,
1358 	0x4d8, 0x00000fff, 0x00000100,
1359 	0x3e78, 0x00000001, 0x00000002,
1360 	0xc768, 0x00000008, 0x00000008,
1361 	0x8c00, 0x000000ff, 0x00000003,
1362 	0x214f8, 0x01ff01ff, 0x00000002,
1363 	0x21498, 0x007ff800, 0x00200000,
1364 	0x2015c, 0xffffffff, 0x00000f40,
1365 	0x88c4, 0x001f3ae3, 0x00000082,
1366 	0x88d4, 0x0000001f, 0x00000010,
1367 	0x30934, 0xffffffff, 0x00000000
1368 };
1369 
1370 static const u32 kalindi_mgcg_cgcg_init[] =
1371 {
1372 	0xc420, 0xffffffff, 0xfffffffc,
1373 	0x30800, 0xffffffff, 0xe0000000,
1374 	0x3c2a0, 0xffffffff, 0x00000100,
1375 	0x3c208, 0xffffffff, 0x00000100,
1376 	0x3c2c0, 0xffffffff, 0x00000100,
1377 	0x3c2c8, 0xffffffff, 0x00000100,
1378 	0x3c2c4, 0xffffffff, 0x00000100,
1379 	0x55e4, 0xffffffff, 0x00600100,
1380 	0x3c280, 0xffffffff, 0x00000100,
1381 	0x3c214, 0xffffffff, 0x06000100,
1382 	0x3c220, 0xffffffff, 0x00000100,
1383 	0x3c218, 0xffffffff, 0x06000100,
1384 	0x3c204, 0xffffffff, 0x00000100,
1385 	0x3c2e0, 0xffffffff, 0x00000100,
1386 	0x3c224, 0xffffffff, 0x00000100,
1387 	0x3c200, 0xffffffff, 0x00000100,
1388 	0x3c230, 0xffffffff, 0x00000100,
1389 	0x3c234, 0xffffffff, 0x00000100,
1390 	0x3c250, 0xffffffff, 0x00000100,
1391 	0x3c254, 0xffffffff, 0x00000100,
1392 	0x3c258, 0xffffffff, 0x00000100,
1393 	0x3c25c, 0xffffffff, 0x00000100,
1394 	0x3c260, 0xffffffff, 0x00000100,
1395 	0x3c27c, 0xffffffff, 0x00000100,
1396 	0x3c278, 0xffffffff, 0x00000100,
1397 	0x3c210, 0xffffffff, 0x06000100,
1398 	0x3c290, 0xffffffff, 0x00000100,
1399 	0x3c274, 0xffffffff, 0x00000100,
1400 	0x3c2b4, 0xffffffff, 0x00000100,
1401 	0x3c2b0, 0xffffffff, 0x00000100,
1402 	0x3c270, 0xffffffff, 0x00000100,
1403 	0x30800, 0xffffffff, 0xe0000000,
1404 	0x3c020, 0xffffffff, 0x00010000,
1405 	0x3c024, 0xffffffff, 0x00030002,
1406 	0x3c028, 0xffffffff, 0x00040007,
1407 	0x3c02c, 0xffffffff, 0x00060005,
1408 	0x3c030, 0xffffffff, 0x00090008,
1409 	0x3c034, 0xffffffff, 0x00010000,
1410 	0x3c038, 0xffffffff, 0x00030002,
1411 	0x3c03c, 0xffffffff, 0x00040007,
1412 	0x3c040, 0xffffffff, 0x00060005,
1413 	0x3c044, 0xffffffff, 0x00090008,
1414 	0x3c000, 0xffffffff, 0x96e00200,
1415 	0x8708, 0xffffffff, 0x00900100,
1416 	0xc424, 0xffffffff, 0x0020003f,
1417 	0x38, 0xffffffff, 0x0140001c,
1418 	0x3c, 0x000f0000, 0x000f0000,
1419 	0x220, 0xffffffff, 0xC060000C,
1420 	0x224, 0xc0000fff, 0x00000100,
1421 	0x20a8, 0xffffffff, 0x00000104,
1422 	0x55e4, 0xff000fff, 0x00000100,
1423 	0x30cc, 0xc0000fff, 0x00000104,
1424 	0xc1e4, 0x00000001, 0x00000001,
1425 	0xd00c, 0xff000ff0, 0x00000100,
1426 	0xd80c, 0xff000ff0, 0x00000100
1427 };
1428 
1429 static const u32 hawaii_golden_spm_registers[] =
1430 {
1431 	0x30800, 0xe0ffffff, 0xe0000000
1432 };
1433 
1434 static const u32 hawaii_golden_common_registers[] =
1435 {
1436 	0x30800, 0xffffffff, 0xe0000000,
1437 	0x28350, 0xffffffff, 0x3a00161a,
1438 	0x28354, 0xffffffff, 0x0000002e,
1439 	0x9a10, 0xffffffff, 0x00018208,
1440 	0x98f8, 0xffffffff, 0x12011003
1441 };
1442 
1443 static const u32 hawaii_golden_registers[] =
1444 {
1445 	0x3354, 0x00000333, 0x00000333,
1446 	0x9a10, 0x00010000, 0x00058208,
1447 	0x9830, 0xffffffff, 0x00000000,
1448 	0x9834, 0xf00fffff, 0x00000400,
1449 	0x9838, 0x0002021c, 0x00020200,
1450 	0xc78, 0x00000080, 0x00000000,
1451 	0x5bb0, 0x000000f0, 0x00000070,
1452 	0x5bc0, 0xf0311fff, 0x80300000,
1453 	0x350c, 0x00810000, 0x408af000,
1454 	0x7030, 0x31000111, 0x00000011,
1455 	0x2f48, 0x73773777, 0x12010001,
1456 	0x2120, 0x0000007f, 0x0000001b,
1457 	0x21dc, 0x00007fb6, 0x00002191,
1458 	0x3628, 0x0000003f, 0x0000000a,
1459 	0x362c, 0x0000003f, 0x0000000a,
1460 	0x2ae4, 0x00073ffe, 0x000022a2,
1461 	0x240c, 0x000007ff, 0x00000000,
1462 	0x8bf0, 0x00002001, 0x00000001,
1463 	0x8b24, 0xffffffff, 0x00ffffff,
1464 	0x30a04, 0x0000ff0f, 0x00000000,
1465 	0x28a4c, 0x07ffffff, 0x06000000,
1466 	0x3e78, 0x00000001, 0x00000002,
1467 	0xc768, 0x00000008, 0x00000008,
1468 	0xc770, 0x00000f00, 0x00000800,
1469 	0xc774, 0x00000f00, 0x00000800,
1470 	0xc798, 0x00ffffff, 0x00ff7fbf,
1471 	0xc79c, 0x00ffffff, 0x00ff7faf,
1472 	0x8c00, 0x000000ff, 0x00000800,
1473 	0xe40, 0x00001fff, 0x00001fff,
1474 	0x9060, 0x0000007f, 0x00000020,
1475 	0x9508, 0x00010000, 0x00010000,
1476 	0xae00, 0x00100000, 0x000ff07c,
1477 	0xac14, 0x000003ff, 0x0000000f,
1478 	0xac10, 0xffffffff, 0x7564fdec,
1479 	0xac0c, 0xffffffff, 0x3120b9a8,
1480 	0xac08, 0x20000000, 0x0f9c0000
1481 };
1482 
1483 static const u32 hawaii_mgcg_cgcg_init[] =
1484 {
1485 	0xc420, 0xffffffff, 0xfffffffd,
1486 	0x30800, 0xffffffff, 0xe0000000,
1487 	0x3c2a0, 0xffffffff, 0x00000100,
1488 	0x3c208, 0xffffffff, 0x00000100,
1489 	0x3c2c0, 0xffffffff, 0x00000100,
1490 	0x3c2c8, 0xffffffff, 0x00000100,
1491 	0x3c2c4, 0xffffffff, 0x00000100,
1492 	0x55e4, 0xffffffff, 0x00200100,
1493 	0x3c280, 0xffffffff, 0x00000100,
1494 	0x3c214, 0xffffffff, 0x06000100,
1495 	0x3c220, 0xffffffff, 0x00000100,
1496 	0x3c218, 0xffffffff, 0x06000100,
1497 	0x3c204, 0xffffffff, 0x00000100,
1498 	0x3c2e0, 0xffffffff, 0x00000100,
1499 	0x3c224, 0xffffffff, 0x00000100,
1500 	0x3c200, 0xffffffff, 0x00000100,
1501 	0x3c230, 0xffffffff, 0x00000100,
1502 	0x3c234, 0xffffffff, 0x00000100,
1503 	0x3c250, 0xffffffff, 0x00000100,
1504 	0x3c254, 0xffffffff, 0x00000100,
1505 	0x3c258, 0xffffffff, 0x00000100,
1506 	0x3c25c, 0xffffffff, 0x00000100,
1507 	0x3c260, 0xffffffff, 0x00000100,
1508 	0x3c27c, 0xffffffff, 0x00000100,
1509 	0x3c278, 0xffffffff, 0x00000100,
1510 	0x3c210, 0xffffffff, 0x06000100,
1511 	0x3c290, 0xffffffff, 0x00000100,
1512 	0x3c274, 0xffffffff, 0x00000100,
1513 	0x3c2b4, 0xffffffff, 0x00000100,
1514 	0x3c2b0, 0xffffffff, 0x00000100,
1515 	0x3c270, 0xffffffff, 0x00000100,
1516 	0x30800, 0xffffffff, 0xe0000000,
1517 	0x3c020, 0xffffffff, 0x00010000,
1518 	0x3c024, 0xffffffff, 0x00030002,
1519 	0x3c028, 0xffffffff, 0x00040007,
1520 	0x3c02c, 0xffffffff, 0x00060005,
1521 	0x3c030, 0xffffffff, 0x00090008,
1522 	0x3c034, 0xffffffff, 0x00010000,
1523 	0x3c038, 0xffffffff, 0x00030002,
1524 	0x3c03c, 0xffffffff, 0x00040007,
1525 	0x3c040, 0xffffffff, 0x00060005,
1526 	0x3c044, 0xffffffff, 0x00090008,
1527 	0x3c048, 0xffffffff, 0x00010000,
1528 	0x3c04c, 0xffffffff, 0x00030002,
1529 	0x3c050, 0xffffffff, 0x00040007,
1530 	0x3c054, 0xffffffff, 0x00060005,
1531 	0x3c058, 0xffffffff, 0x00090008,
1532 	0x3c05c, 0xffffffff, 0x00010000,
1533 	0x3c060, 0xffffffff, 0x00030002,
1534 	0x3c064, 0xffffffff, 0x00040007,
1535 	0x3c068, 0xffffffff, 0x00060005,
1536 	0x3c06c, 0xffffffff, 0x00090008,
1537 	0x3c070, 0xffffffff, 0x00010000,
1538 	0x3c074, 0xffffffff, 0x00030002,
1539 	0x3c078, 0xffffffff, 0x00040007,
1540 	0x3c07c, 0xffffffff, 0x00060005,
1541 	0x3c080, 0xffffffff, 0x00090008,
1542 	0x3c084, 0xffffffff, 0x00010000,
1543 	0x3c088, 0xffffffff, 0x00030002,
1544 	0x3c08c, 0xffffffff, 0x00040007,
1545 	0x3c090, 0xffffffff, 0x00060005,
1546 	0x3c094, 0xffffffff, 0x00090008,
1547 	0x3c098, 0xffffffff, 0x00010000,
1548 	0x3c09c, 0xffffffff, 0x00030002,
1549 	0x3c0a0, 0xffffffff, 0x00040007,
1550 	0x3c0a4, 0xffffffff, 0x00060005,
1551 	0x3c0a8, 0xffffffff, 0x00090008,
1552 	0x3c0ac, 0xffffffff, 0x00010000,
1553 	0x3c0b0, 0xffffffff, 0x00030002,
1554 	0x3c0b4, 0xffffffff, 0x00040007,
1555 	0x3c0b8, 0xffffffff, 0x00060005,
1556 	0x3c0bc, 0xffffffff, 0x00090008,
1557 	0x3c0c0, 0xffffffff, 0x00010000,
1558 	0x3c0c4, 0xffffffff, 0x00030002,
1559 	0x3c0c8, 0xffffffff, 0x00040007,
1560 	0x3c0cc, 0xffffffff, 0x00060005,
1561 	0x3c0d0, 0xffffffff, 0x00090008,
1562 	0x3c0d4, 0xffffffff, 0x00010000,
1563 	0x3c0d8, 0xffffffff, 0x00030002,
1564 	0x3c0dc, 0xffffffff, 0x00040007,
1565 	0x3c0e0, 0xffffffff, 0x00060005,
1566 	0x3c0e4, 0xffffffff, 0x00090008,
1567 	0x3c0e8, 0xffffffff, 0x00010000,
1568 	0x3c0ec, 0xffffffff, 0x00030002,
1569 	0x3c0f0, 0xffffffff, 0x00040007,
1570 	0x3c0f4, 0xffffffff, 0x00060005,
1571 	0x3c0f8, 0xffffffff, 0x00090008,
1572 	0xc318, 0xffffffff, 0x00020200,
1573 	0x3350, 0xffffffff, 0x00000200,
1574 	0x15c0, 0xffffffff, 0x00000400,
1575 	0x55e8, 0xffffffff, 0x00000000,
1576 	0x2f50, 0xffffffff, 0x00000902,
1577 	0x3c000, 0xffffffff, 0x96940200,
1578 	0x8708, 0xffffffff, 0x00900100,
1579 	0xc424, 0xffffffff, 0x0020003f,
1580 	0x38, 0xffffffff, 0x0140001c,
1581 	0x3c, 0x000f0000, 0x000f0000,
1582 	0x220, 0xffffffff, 0xc060000c,
1583 	0x224, 0xc0000fff, 0x00000100,
1584 	0xf90, 0xffffffff, 0x00000100,
1585 	0xf98, 0x00000101, 0x00000000,
1586 	0x20a8, 0xffffffff, 0x00000104,
1587 	0x55e4, 0xff000fff, 0x00000100,
1588 	0x30cc, 0xc0000fff, 0x00000104,
1589 	0xc1e4, 0x00000001, 0x00000001,
1590 	0xd00c, 0xff000ff0, 0x00000100,
1591 	0xd80c, 0xff000ff0, 0x00000100
1592 };
1593 
1594 static const u32 godavari_golden_registers[] =
1595 {
1596 	0x55e4, 0xff607fff, 0xfc000100,
1597 	0x6ed8, 0x00010101, 0x00010000,
1598 	0x9830, 0xffffffff, 0x00000000,
1599 	0x98302, 0xf00fffff, 0x00000400,
1600 	0x6130, 0xffffffff, 0x00010000,
1601 	0x5bb0, 0x000000f0, 0x00000070,
1602 	0x5bc0, 0xf0311fff, 0x80300000,
1603 	0x98f8, 0x73773777, 0x12010001,
1604 	0x98fc, 0xffffffff, 0x00000010,
1605 	0x8030, 0x00001f0f, 0x0000100a,
1606 	0x2f48, 0x73773777, 0x12010001,
1607 	0x2408, 0x000fffff, 0x000c007f,
1608 	0x8a14, 0xf000003f, 0x00000007,
1609 	0x8b24, 0xffffffff, 0x00ff0fff,
1610 	0x30a04, 0x0000ff0f, 0x00000000,
1611 	0x28a4c, 0x07ffffff, 0x06000000,
1612 	0x4d8, 0x00000fff, 0x00000100,
1613 	0xd014, 0x00010000, 0x00810001,
1614 	0xd814, 0x00010000, 0x00810001,
1615 	0x3e78, 0x00000001, 0x00000002,
1616 	0xc768, 0x00000008, 0x00000008,
1617 	0xc770, 0x00000f00, 0x00000800,
1618 	0xc774, 0x00000f00, 0x00000800,
1619 	0xc798, 0x00ffffff, 0x00ff7fbf,
1620 	0xc79c, 0x00ffffff, 0x00ff7faf,
1621 	0x8c00, 0x000000ff, 0x00000001,
1622 	0x214f8, 0x01ff01ff, 0x00000002,
1623 	0x21498, 0x007ff800, 0x00200000,
1624 	0x2015c, 0xffffffff, 0x00000f40,
1625 	0x88c4, 0x001f3ae3, 0x00000082,
1626 	0x88d4, 0x0000001f, 0x00000010,
1627 	0x30934, 0xffffffff, 0x00000000
1628 };
1629 
1630 
1631 static void cik_init_golden_registers(struct radeon_device *rdev)
1632 {
1633 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1634 	mutex_lock(&rdev->grbm_idx_mutex);
1635 	switch (rdev->family) {
1636 	case CHIP_BONAIRE:
1637 		radeon_program_register_sequence(rdev,
1638 						 bonaire_mgcg_cgcg_init,
1639 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1640 		radeon_program_register_sequence(rdev,
1641 						 bonaire_golden_registers,
1642 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 bonaire_golden_common_registers,
1645 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1646 		radeon_program_register_sequence(rdev,
1647 						 bonaire_golden_spm_registers,
1648 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1649 		break;
1650 	case CHIP_KABINI:
1651 		radeon_program_register_sequence(rdev,
1652 						 kalindi_mgcg_cgcg_init,
1653 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654 		radeon_program_register_sequence(rdev,
1655 						 kalindi_golden_registers,
1656 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 kalindi_golden_common_registers,
1659 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660 		radeon_program_register_sequence(rdev,
1661 						 kalindi_golden_spm_registers,
1662 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663 		break;
1664 	case CHIP_MULLINS:
1665 		radeon_program_register_sequence(rdev,
1666 						 kalindi_mgcg_cgcg_init,
1667 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1668 		radeon_program_register_sequence(rdev,
1669 						 godavari_golden_registers,
1670 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 kalindi_golden_common_registers,
1673 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1674 		radeon_program_register_sequence(rdev,
1675 						 kalindi_golden_spm_registers,
1676 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1677 		break;
1678 	case CHIP_KAVERI:
1679 		radeon_program_register_sequence(rdev,
1680 						 spectre_mgcg_cgcg_init,
1681 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1682 		radeon_program_register_sequence(rdev,
1683 						 spectre_golden_registers,
1684 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1685 		radeon_program_register_sequence(rdev,
1686 						 spectre_golden_common_registers,
1687 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1688 		radeon_program_register_sequence(rdev,
1689 						 spectre_golden_spm_registers,
1690 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1691 		break;
1692 	case CHIP_HAWAII:
1693 		radeon_program_register_sequence(rdev,
1694 						 hawaii_mgcg_cgcg_init,
1695 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1696 		radeon_program_register_sequence(rdev,
1697 						 hawaii_golden_registers,
1698 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1699 		radeon_program_register_sequence(rdev,
1700 						 hawaii_golden_common_registers,
1701 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1702 		radeon_program_register_sequence(rdev,
1703 						 hawaii_golden_spm_registers,
1704 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1705 		break;
1706 	default:
1707 		break;
1708 	}
1709 	mutex_unlock(&rdev->grbm_idx_mutex);
1710 }
1711 
1712 /**
1713  * cik_get_xclk - get the xclk
1714  *
1715  * @rdev: radeon_device pointer
1716  *
1717  * Returns the reference clock used by the gfx engine
1718  * (CIK).
1719  */
1720 u32 cik_get_xclk(struct radeon_device *rdev)
1721 {
1722         u32 reference_clock = rdev->clock.spll.reference_freq;
1723 
1724 	if (rdev->flags & RADEON_IS_IGP) {
1725 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1726 			return reference_clock / 2;
1727 	} else {
1728 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1729 			return reference_clock / 4;
1730 	}
1731 	return reference_clock;
1732 }
1733 
1734 /**
1735  * cik_mm_rdoorbell - read a doorbell dword
1736  *
1737  * @rdev: radeon_device pointer
1738  * @index: doorbell index
1739  *
1740  * Returns the value in the doorbell aperture at the
1741  * requested doorbell index (CIK).
1742  */
1743 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1744 {
1745 	if (index < rdev->doorbell.num_doorbells) {
1746 #ifdef __NetBSD__
1747 		return bus_space_read_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1748 		    index*4);
1749 #else
1750 		return readl(rdev->doorbell.ptr + index);
1751 #endif
1752 	} else {
1753 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1754 		return 0;
1755 	}
1756 }
1757 
1758 /**
1759  * cik_mm_wdoorbell - write a doorbell dword
1760  *
1761  * @rdev: radeon_device pointer
1762  * @index: doorbell index
1763  * @v: value to write
1764  *
1765  * Writes @v to the doorbell aperture at the
1766  * requested doorbell index (CIK).
1767  */
1768 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1769 {
1770 	if (index < rdev->doorbell.num_doorbells) {
1771 #ifdef __NetBSD__
1772 		bus_space_write_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1773 		    index*4, v);
1774 #else
1775 		writel(v, rdev->doorbell.ptr + index);
1776 #endif
1777 	} else {
1778 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1779 	}
1780 }
1781 
1782 #define BONAIRE_IO_MC_REGS_SIZE 36
1783 
1784 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1785 {
1786 	{0x00000070, 0x04400000},
1787 	{0x00000071, 0x80c01803},
1788 	{0x00000072, 0x00004004},
1789 	{0x00000073, 0x00000100},
1790 	{0x00000074, 0x00ff0000},
1791 	{0x00000075, 0x34000000},
1792 	{0x00000076, 0x08000014},
1793 	{0x00000077, 0x00cc08ec},
1794 	{0x00000078, 0x00000400},
1795 	{0x00000079, 0x00000000},
1796 	{0x0000007a, 0x04090000},
1797 	{0x0000007c, 0x00000000},
1798 	{0x0000007e, 0x4408a8e8},
1799 	{0x0000007f, 0x00000304},
1800 	{0x00000080, 0x00000000},
1801 	{0x00000082, 0x00000001},
1802 	{0x00000083, 0x00000002},
1803 	{0x00000084, 0xf3e4f400},
1804 	{0x00000085, 0x052024e3},
1805 	{0x00000087, 0x00000000},
1806 	{0x00000088, 0x01000000},
1807 	{0x0000008a, 0x1c0a0000},
1808 	{0x0000008b, 0xff010000},
1809 	{0x0000008d, 0xffffefff},
1810 	{0x0000008e, 0xfff3efff},
1811 	{0x0000008f, 0xfff3efbf},
1812 	{0x00000092, 0xf7ffffff},
1813 	{0x00000093, 0xffffff7f},
1814 	{0x00000095, 0x00101101},
1815 	{0x00000096, 0x00000fff},
1816 	{0x00000097, 0x00116fff},
1817 	{0x00000098, 0x60010000},
1818 	{0x00000099, 0x10010000},
1819 	{0x0000009a, 0x00006000},
1820 	{0x0000009b, 0x00001000},
1821 	{0x0000009f, 0x00b48000}
1822 };
1823 
1824 #define HAWAII_IO_MC_REGS_SIZE 22
1825 
1826 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1827 {
1828 	{0x0000007d, 0x40000000},
1829 	{0x0000007e, 0x40180304},
1830 	{0x0000007f, 0x0000ff00},
1831 	{0x00000081, 0x00000000},
1832 	{0x00000083, 0x00000800},
1833 	{0x00000086, 0x00000000},
1834 	{0x00000087, 0x00000100},
1835 	{0x00000088, 0x00020100},
1836 	{0x00000089, 0x00000000},
1837 	{0x0000008b, 0x00040000},
1838 	{0x0000008c, 0x00000100},
1839 	{0x0000008e, 0xff010000},
1840 	{0x00000090, 0xffffefff},
1841 	{0x00000091, 0xfff3efff},
1842 	{0x00000092, 0xfff3efbf},
1843 	{0x00000093, 0xf7ffffff},
1844 	{0x00000094, 0xffffff7f},
1845 	{0x00000095, 0x00000fff},
1846 	{0x00000096, 0x00116fff},
1847 	{0x00000097, 0x60010000},
1848 	{0x00000098, 0x10010000},
1849 	{0x0000009f, 0x00c79000}
1850 };
1851 
1852 
1853 /**
1854  * cik_srbm_select - select specific register instances
1855  *
1856  * @rdev: radeon_device pointer
1857  * @me: selected ME (micro engine)
1858  * @pipe: pipe
1859  * @queue: queue
1860  * @vmid: VMID
1861  *
1862  * Switches the currently active registers instances.  Some
1863  * registers are instanced per VMID, others are instanced per
1864  * me/pipe/queue combination.
1865  */
1866 static void cik_srbm_select(struct radeon_device *rdev,
1867 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1868 {
1869 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1870 			     MEID(me & 0x3) |
1871 			     VMID(vmid & 0xf) |
1872 			     QUEUEID(queue & 0x7));
1873 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1874 }
1875 
1876 /* ucode loading */
1877 /**
1878  * ci_mc_load_microcode - load MC ucode into the hw
1879  *
1880  * @rdev: radeon_device pointer
1881  *
1882  * Load the GDDR MC ucode into the hw (CIK).
1883  * Returns 0 on success, error on failure.
1884  */
1885 int ci_mc_load_microcode(struct radeon_device *rdev)
1886 {
1887 	const __be32 *fw_data = NULL;
1888 	const __le32 *new_fw_data = NULL;
1889 	u32 running, blackout = 0, tmp;
1890 	const u32 *io_mc_regs = NULL;
1891 	const __le32 *new_io_mc_regs = NULL;
1892 	int i, regs_size, ucode_size;
1893 
1894 	if (!rdev->mc_fw)
1895 		return -EINVAL;
1896 
1897 	if (rdev->new_fw) {
1898 		const struct mc_firmware_header_v1_0 *hdr =
1899 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1900 
1901 		radeon_ucode_print_mc_hdr(&hdr->header);
1902 
1903 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1904 		new_io_mc_regs = (const __le32 *)
1905 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1906 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1907 		new_fw_data = (const __le32 *)
1908 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1909 	} else {
1910 		ucode_size = rdev->mc_fw->size / 4;
1911 
1912 		switch (rdev->family) {
1913 		case CHIP_BONAIRE:
1914 			io_mc_regs = &bonaire_io_mc_regs[0][0];
1915 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1916 			break;
1917 		case CHIP_HAWAII:
1918 			io_mc_regs = &hawaii_io_mc_regs[0][0];
1919 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1920 			break;
1921 		default:
1922 			return -EINVAL;
1923 		}
1924 		fw_data = (const __be32 *)rdev->mc_fw->data;
1925 	}
1926 
1927 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1928 
1929 	if (running == 0) {
1930 		if (running) {
1931 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1932 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1933 		}
1934 
1935 		/* reset the engine and set to writable */
1936 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1937 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1938 
1939 		/* load mc io regs */
1940 		for (i = 0; i < regs_size; i++) {
1941 			if (rdev->new_fw) {
1942 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1943 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1944 			} else {
1945 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1946 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1947 			}
1948 		}
1949 
1950 		tmp = RREG32(MC_SEQ_MISC0);
1951 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1952 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1953 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1954 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1955 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1956 		}
1957 
1958 		/* load the MC ucode */
1959 		for (i = 0; i < ucode_size; i++) {
1960 			if (rdev->new_fw)
1961 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1962 			else
1963 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1964 		}
1965 
1966 		/* put the engine back into the active state */
1967 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1968 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1969 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1970 
1971 		/* wait for training to complete */
1972 		for (i = 0; i < rdev->usec_timeout; i++) {
1973 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1974 				break;
1975 			udelay(1);
1976 		}
1977 		for (i = 0; i < rdev->usec_timeout; i++) {
1978 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1979 				break;
1980 			udelay(1);
1981 		}
1982 
1983 		if (running)
1984 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1985 	}
1986 
1987 	return 0;
1988 }
1989 
1990 /**
1991  * cik_init_microcode - load ucode images from disk
1992  *
1993  * @rdev: radeon_device pointer
1994  *
1995  * Use the firmware interface to load the ucode images into
1996  * the driver (not loaded into hw).
1997  * Returns 0 on success, error on failure.
1998  */
1999 static int cik_init_microcode(struct radeon_device *rdev)
2000 {
2001 	const char *chip_name;
2002 	const char *new_chip_name;
2003 	size_t pfp_req_size, me_req_size, ce_req_size,
2004 		mec_req_size, rlc_req_size, mc_req_size = 0,
2005 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
2006 	char fw_name[30];
2007 	int new_fw = 0;
2008 	int err;
2009 	int num_fw;
2010 
2011 	DRM_DEBUG("\n");
2012 
2013 	switch (rdev->family) {
2014 	case CHIP_BONAIRE:
2015 		chip_name = "BONAIRE";
2016 		new_chip_name = "bonaire";
2017 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2019 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2022 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2023 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2024 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2026 		num_fw = 8;
2027 		break;
2028 	case CHIP_HAWAII:
2029 		chip_name = "HAWAII";
2030 		new_chip_name = "hawaii";
2031 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2033 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2036 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2037 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2038 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2040 		num_fw = 8;
2041 		break;
2042 	case CHIP_KAVERI:
2043 		chip_name = "KAVERI";
2044 		new_chip_name = "kaveri";
2045 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2046 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2047 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2048 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2049 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2050 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2051 		num_fw = 7;
2052 		break;
2053 	case CHIP_KABINI:
2054 		chip_name = "KABINI";
2055 		new_chip_name = "kabini";
2056 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2057 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2058 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2059 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2060 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2061 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2062 		num_fw = 6;
2063 		break;
2064 	case CHIP_MULLINS:
2065 		chip_name = "MULLINS";
2066 		new_chip_name = "mullins";
2067 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2068 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2069 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2070 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2071 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2072 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2073 		num_fw = 6;
2074 		break;
2075 	default: BUG();
2076 	}
2077 
2078 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2079 
2080 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2081 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2082 	if (err) {
2083 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2084 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2085 		if (err)
2086 			goto out;
2087 		if (rdev->pfp_fw->size != pfp_req_size) {
2088 			printk(KERN_ERR
2089 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090 			       rdev->pfp_fw->size, fw_name);
2091 			err = -EINVAL;
2092 			goto out;
2093 		}
2094 	} else {
2095 		err = radeon_ucode_validate(rdev->pfp_fw);
2096 		if (err) {
2097 			printk(KERN_ERR
2098 			       "cik_fw: validation failed for firmware \"%s\"\n",
2099 			       fw_name);
2100 			goto out;
2101 		} else {
2102 			new_fw++;
2103 		}
2104 	}
2105 
2106 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2107 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2108 	if (err) {
2109 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2110 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2111 		if (err)
2112 			goto out;
2113 		if (rdev->me_fw->size != me_req_size) {
2114 			printk(KERN_ERR
2115 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2116 			       rdev->me_fw->size, fw_name);
2117 			err = -EINVAL;
2118 		}
2119 	} else {
2120 		err = radeon_ucode_validate(rdev->me_fw);
2121 		if (err) {
2122 			printk(KERN_ERR
2123 			       "cik_fw: validation failed for firmware \"%s\"\n",
2124 			       fw_name);
2125 			goto out;
2126 		} else {
2127 			new_fw++;
2128 		}
2129 	}
2130 
2131 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2132 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2133 	if (err) {
2134 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2135 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2136 		if (err)
2137 			goto out;
2138 		if (rdev->ce_fw->size != ce_req_size) {
2139 			printk(KERN_ERR
2140 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2141 			       rdev->ce_fw->size, fw_name);
2142 			err = -EINVAL;
2143 		}
2144 	} else {
2145 		err = radeon_ucode_validate(rdev->ce_fw);
2146 		if (err) {
2147 			printk(KERN_ERR
2148 			       "cik_fw: validation failed for firmware \"%s\"\n",
2149 			       fw_name);
2150 			goto out;
2151 		} else {
2152 			new_fw++;
2153 		}
2154 	}
2155 
2156 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2157 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2158 	if (err) {
2159 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2160 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2161 		if (err)
2162 			goto out;
2163 		if (rdev->mec_fw->size != mec_req_size) {
2164 			printk(KERN_ERR
2165 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2166 			       rdev->mec_fw->size, fw_name);
2167 			err = -EINVAL;
2168 		}
2169 	} else {
2170 		err = radeon_ucode_validate(rdev->mec_fw);
2171 		if (err) {
2172 			printk(KERN_ERR
2173 			       "cik_fw: validation failed for firmware \"%s\"\n",
2174 			       fw_name);
2175 			goto out;
2176 		} else {
2177 			new_fw++;
2178 		}
2179 	}
2180 
2181 	if (rdev->family == CHIP_KAVERI) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2183 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2184 		if (err) {
2185 			goto out;
2186 		} else {
2187 			err = radeon_ucode_validate(rdev->mec2_fw);
2188 			if (err) {
2189 				goto out;
2190 			} else {
2191 				new_fw++;
2192 			}
2193 		}
2194 	}
2195 
2196 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2197 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2198 	if (err) {
2199 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2200 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2201 		if (err)
2202 			goto out;
2203 		if (rdev->rlc_fw->size != rlc_req_size) {
2204 			printk(KERN_ERR
2205 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2206 			       rdev->rlc_fw->size, fw_name);
2207 			err = -EINVAL;
2208 		}
2209 	} else {
2210 		err = radeon_ucode_validate(rdev->rlc_fw);
2211 		if (err) {
2212 			printk(KERN_ERR
2213 			       "cik_fw: validation failed for firmware \"%s\"\n",
2214 			       fw_name);
2215 			goto out;
2216 		} else {
2217 			new_fw++;
2218 		}
2219 	}
2220 
2221 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2222 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2223 	if (err) {
2224 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2225 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2226 		if (err)
2227 			goto out;
2228 		if (rdev->sdma_fw->size != sdma_req_size) {
2229 			printk(KERN_ERR
2230 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2231 			       rdev->sdma_fw->size, fw_name);
2232 			err = -EINVAL;
2233 		}
2234 	} else {
2235 		err = radeon_ucode_validate(rdev->sdma_fw);
2236 		if (err) {
2237 			printk(KERN_ERR
2238 			       "cik_fw: validation failed for firmware \"%s\"\n",
2239 			       fw_name);
2240 			goto out;
2241 		} else {
2242 			new_fw++;
2243 		}
2244 	}
2245 
2246 	/* No SMC, MC ucode on APUs */
2247 	if (!(rdev->flags & RADEON_IS_IGP)) {
2248 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2249 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2250 		if (err) {
2251 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2252 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2253 			if (err) {
2254 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2255 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2256 				if (err)
2257 					goto out;
2258 			}
2259 			if ((rdev->mc_fw->size != mc_req_size) &&
2260 			    (rdev->mc_fw->size != mc2_req_size)){
2261 				printk(KERN_ERR
2262 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2263 				       rdev->mc_fw->size, fw_name);
2264 				err = -EINVAL;
2265 			}
2266 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2267 		} else {
2268 			err = radeon_ucode_validate(rdev->mc_fw);
2269 			if (err) {
2270 				printk(KERN_ERR
2271 				       "cik_fw: validation failed for firmware \"%s\"\n",
2272 				       fw_name);
2273 				goto out;
2274 			} else {
2275 				new_fw++;
2276 			}
2277 		}
2278 
2279 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2280 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2281 		if (err) {
2282 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2283 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2284 			if (err) {
2285 				printk(KERN_ERR
2286 				       "smc: error loading firmware \"%s\"\n",
2287 				       fw_name);
2288 				release_firmware(rdev->smc_fw);
2289 				rdev->smc_fw = NULL;
2290 				err = 0;
2291 			} else if (rdev->smc_fw->size != smc_req_size) {
2292 				printk(KERN_ERR
2293 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2294 				       rdev->smc_fw->size, fw_name);
2295 				err = -EINVAL;
2296 			}
2297 		} else {
2298 			err = radeon_ucode_validate(rdev->smc_fw);
2299 			if (err) {
2300 				printk(KERN_ERR
2301 				       "cik_fw: validation failed for firmware \"%s\"\n",
2302 				       fw_name);
2303 				goto out;
2304 			} else {
2305 				new_fw++;
2306 			}
2307 		}
2308 	}
2309 
2310 	if (new_fw == 0) {
2311 		rdev->new_fw = false;
2312 	} else if (new_fw < num_fw) {
2313 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2314 		err = -EINVAL;
2315 	} else {
2316 		rdev->new_fw = true;
2317 	}
2318 
2319 out:
2320 	if (err) {
2321 		if (err != -EINVAL)
2322 			printk(KERN_ERR
2323 			       "cik_cp: Failed to load firmware \"%s\"\n",
2324 			       fw_name);
2325 		release_firmware(rdev->pfp_fw);
2326 		rdev->pfp_fw = NULL;
2327 		release_firmware(rdev->me_fw);
2328 		rdev->me_fw = NULL;
2329 		release_firmware(rdev->ce_fw);
2330 		rdev->ce_fw = NULL;
2331 		release_firmware(rdev->mec_fw);
2332 		rdev->mec_fw = NULL;
2333 		release_firmware(rdev->mec2_fw);
2334 		rdev->mec2_fw = NULL;
2335 		release_firmware(rdev->rlc_fw);
2336 		rdev->rlc_fw = NULL;
2337 		release_firmware(rdev->sdma_fw);
2338 		rdev->sdma_fw = NULL;
2339 		release_firmware(rdev->mc_fw);
2340 		rdev->mc_fw = NULL;
2341 		release_firmware(rdev->smc_fw);
2342 		rdev->smc_fw = NULL;
2343 	}
2344 	return err;
2345 }
2346 
2347 /*
2348  * Core functions
2349  */
2350 /**
2351  * cik_tiling_mode_table_init - init the hw tiling table
2352  *
2353  * @rdev: radeon_device pointer
2354  *
2355  * Starting with SI, the tiling setup is done globally in a
2356  * set of 32 tiling modes.  Rather than selecting each set of
2357  * parameters per surface as on older asics, we just select
2358  * which index in the tiling table we want to use, and the
2359  * surface uses those parameters (CIK).
2360  */
2361 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2362 {
2363 	const u32 num_tile_mode_states = 32;
2364 	const u32 num_secondary_tile_mode_states = 16;
2365 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2366 	u32 num_pipe_configs;
2367 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2368 		rdev->config.cik.max_shader_engines;
2369 
2370 	switch (rdev->config.cik.mem_row_size_in_kb) {
2371 	case 1:
2372 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2373 		break;
2374 	case 2:
2375 	default:
2376 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2377 		break;
2378 	case 4:
2379 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2380 		break;
2381 	}
2382 
2383 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2384 	if (num_pipe_configs > 8)
2385 		num_pipe_configs = 16;
2386 
2387 	if (num_pipe_configs == 16) {
2388 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2389 			switch (reg_offset) {
2390 			case 0:
2391 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2393 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2395 				break;
2396 			case 1:
2397 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2401 				break;
2402 			case 2:
2403 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2407 				break;
2408 			case 3:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2413 				break;
2414 			case 4:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 5:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 				break;
2425 			case 6:
2426 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2428 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2430 				break;
2431 			case 7:
2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2433 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2434 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 						 TILE_SPLIT(split_equal_to_row_size));
2436 				break;
2437 			case 8:
2438 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2439 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2440 				break;
2441 			case 9:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2445 				break;
2446 			case 10:
2447 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 				break;
2452 			case 11:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 12:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 13:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2466 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2468 				break;
2469 			case 14:
2470 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474 				break;
2475 			case 16:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 17:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 27:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2491 				break;
2492 			case 28:
2493 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497 				break;
2498 			case 29:
2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2502 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 				break;
2504 			case 30:
2505 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2506 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2507 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2508 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509 				break;
2510 			default:
2511 				gb_tile_moden = 0;
2512 				break;
2513 			}
2514 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2515 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2516 		}
2517 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2518 			switch (reg_offset) {
2519 			case 0:
2520 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK));
2524 				break;
2525 			case 1:
2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK));
2530 				break;
2531 			case 2:
2532 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK));
2536 				break;
2537 			case 3:
2538 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541 						 NUM_BANKS(ADDR_SURF_16_BANK));
2542 				break;
2543 			case 4:
2544 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547 						 NUM_BANKS(ADDR_SURF_8_BANK));
2548 				break;
2549 			case 5:
2550 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 						 NUM_BANKS(ADDR_SURF_4_BANK));
2554 				break;
2555 			case 6:
2556 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559 						 NUM_BANKS(ADDR_SURF_2_BANK));
2560 				break;
2561 			case 8:
2562 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2564 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2565 						 NUM_BANKS(ADDR_SURF_16_BANK));
2566 				break;
2567 			case 9:
2568 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2571 						 NUM_BANKS(ADDR_SURF_16_BANK));
2572 				break;
2573 			case 10:
2574 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577 						 NUM_BANKS(ADDR_SURF_16_BANK));
2578 				break;
2579 			case 11:
2580 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2583 						 NUM_BANKS(ADDR_SURF_8_BANK));
2584 				break;
2585 			case 12:
2586 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 						 NUM_BANKS(ADDR_SURF_4_BANK));
2590 				break;
2591 			case 13:
2592 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2594 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2595 						 NUM_BANKS(ADDR_SURF_2_BANK));
2596 				break;
2597 			case 14:
2598 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2601 						 NUM_BANKS(ADDR_SURF_2_BANK));
2602 				break;
2603 			default:
2604 				gb_tile_moden = 0;
2605 				break;
2606 			}
2607 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2608 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2609 		}
2610 	} else if (num_pipe_configs == 8) {
2611 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2612 			switch (reg_offset) {
2613 			case 0:
2614 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2616 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2618 				break;
2619 			case 1:
2620 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2622 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2624 				break;
2625 			case 2:
2626 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2630 				break;
2631 			case 3:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2636 				break;
2637 			case 4:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 5:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2646 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 				break;
2648 			case 6:
2649 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2651 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2653 				break;
2654 			case 7:
2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2656 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2657 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2658 						 TILE_SPLIT(split_equal_to_row_size));
2659 				break;
2660 			case 8:
2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2662 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2663 				break;
2664 			case 9:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2666 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2668 				break;
2669 			case 10:
2670 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 				break;
2675 			case 11:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 12:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 13:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2691 				break;
2692 			case 14:
2693 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 				break;
2698 			case 16:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 17:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 27:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2714 				break;
2715 			case 28:
2716 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 				break;
2721 			case 29:
2722 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2725 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 				break;
2727 			case 30:
2728 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2729 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2730 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 				break;
2733 			default:
2734 				gb_tile_moden = 0;
2735 				break;
2736 			}
2737 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2738 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2739 		}
2740 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2741 			switch (reg_offset) {
2742 			case 0:
2743 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 						 NUM_BANKS(ADDR_SURF_16_BANK));
2747 				break;
2748 			case 1:
2749 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2752 						 NUM_BANKS(ADDR_SURF_16_BANK));
2753 				break;
2754 			case 2:
2755 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 						 NUM_BANKS(ADDR_SURF_16_BANK));
2759 				break;
2760 			case 3:
2761 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 						 NUM_BANKS(ADDR_SURF_16_BANK));
2765 				break;
2766 			case 4:
2767 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770 						 NUM_BANKS(ADDR_SURF_8_BANK));
2771 				break;
2772 			case 5:
2773 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776 						 NUM_BANKS(ADDR_SURF_4_BANK));
2777 				break;
2778 			case 6:
2779 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 						 NUM_BANKS(ADDR_SURF_2_BANK));
2783 				break;
2784 			case 8:
2785 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2787 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2788 						 NUM_BANKS(ADDR_SURF_16_BANK));
2789 				break;
2790 			case 9:
2791 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2793 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 						 NUM_BANKS(ADDR_SURF_16_BANK));
2795 				break;
2796 			case 10:
2797 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2799 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2800 						 NUM_BANKS(ADDR_SURF_16_BANK));
2801 				break;
2802 			case 11:
2803 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2806 						 NUM_BANKS(ADDR_SURF_16_BANK));
2807 				break;
2808 			case 12:
2809 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2812 						 NUM_BANKS(ADDR_SURF_8_BANK));
2813 				break;
2814 			case 13:
2815 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2818 						 NUM_BANKS(ADDR_SURF_4_BANK));
2819 				break;
2820 			case 14:
2821 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2824 						 NUM_BANKS(ADDR_SURF_2_BANK));
2825 				break;
2826 			default:
2827 				gb_tile_moden = 0;
2828 				break;
2829 			}
2830 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2831 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2832 		}
2833 	} else if (num_pipe_configs == 4) {
2834 		if (num_rbs == 4) {
2835 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2836 				switch (reg_offset) {
2837 				case 0:
2838 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2840 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2842 					break;
2843 				case 1:
2844 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2846 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2848 					break;
2849 				case 2:
2850 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2854 					break;
2855 				case 3:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2860 					break;
2861 				case 4:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 5:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871 					break;
2872 				case 6:
2873 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2875 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2877 					break;
2878 				case 7:
2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 							 TILE_SPLIT(split_equal_to_row_size));
2883 					break;
2884 				case 8:
2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2886 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2887 					break;
2888 				case 9:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2890 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2892 					break;
2893 				case 10:
2894 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898 					break;
2899 				case 11:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 12:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 13:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2915 					break;
2916 				case 14:
2917 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 					break;
2922 				case 16:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 17:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 27:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2938 					break;
2939 				case 28:
2940 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2943 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 					break;
2945 				case 29:
2946 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2949 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 					break;
2951 				case 30:
2952 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2953 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2954 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956 					break;
2957 				default:
2958 					gb_tile_moden = 0;
2959 					break;
2960 				}
2961 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2962 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2963 			}
2964 		} else if (num_rbs < 4) {
2965 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2966 				switch (reg_offset) {
2967 				case 0:
2968 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2969 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2970 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2971 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2972 					break;
2973 				case 1:
2974 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2976 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2977 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2978 					break;
2979 				case 2:
2980 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2981 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2984 					break;
2985 				case 3:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2990 					break;
2991 				case 4:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 5:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2999 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3000 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 					break;
3002 				case 6:
3003 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3005 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3006 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3007 					break;
3008 				case 7:
3009 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3010 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3011 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3012 							 TILE_SPLIT(split_equal_to_row_size));
3013 					break;
3014 				case 8:
3015 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3016 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3017 					break;
3018 				case 9:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3020 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3021 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3022 					break;
3023 				case 10:
3024 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3028 					break;
3029 				case 11:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 12:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 13:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3043 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3044 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3045 					break;
3046 				case 14:
3047 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3048 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051 					break;
3052 				case 16:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 17:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 27:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3067 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3068 					break;
3069 				case 28:
3070 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3071 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3073 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3074 					break;
3075 				case 29:
3076 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3077 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3078 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3079 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080 					break;
3081 				case 30:
3082 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3083 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3084 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3085 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3086 					break;
3087 				default:
3088 					gb_tile_moden = 0;
3089 					break;
3090 				}
3091 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3092 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3093 			}
3094 		}
3095 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3096 			switch (reg_offset) {
3097 			case 0:
3098 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3100 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101 						 NUM_BANKS(ADDR_SURF_16_BANK));
3102 				break;
3103 			case 1:
3104 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107 						 NUM_BANKS(ADDR_SURF_16_BANK));
3108 				break;
3109 			case 2:
3110 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113 						 NUM_BANKS(ADDR_SURF_16_BANK));
3114 				break;
3115 			case 3:
3116 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3118 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3119 						 NUM_BANKS(ADDR_SURF_16_BANK));
3120 				break;
3121 			case 4:
3122 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125 						 NUM_BANKS(ADDR_SURF_16_BANK));
3126 				break;
3127 			case 5:
3128 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131 						 NUM_BANKS(ADDR_SURF_8_BANK));
3132 				break;
3133 			case 6:
3134 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137 						 NUM_BANKS(ADDR_SURF_4_BANK));
3138 				break;
3139 			case 8:
3140 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3141 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3142 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143 						 NUM_BANKS(ADDR_SURF_16_BANK));
3144 				break;
3145 			case 9:
3146 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3147 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3148 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149 						 NUM_BANKS(ADDR_SURF_16_BANK));
3150 				break;
3151 			case 10:
3152 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3154 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3155 						 NUM_BANKS(ADDR_SURF_16_BANK));
3156 				break;
3157 			case 11:
3158 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3160 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3161 						 NUM_BANKS(ADDR_SURF_16_BANK));
3162 				break;
3163 			case 12:
3164 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3167 						 NUM_BANKS(ADDR_SURF_16_BANK));
3168 				break;
3169 			case 13:
3170 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173 						 NUM_BANKS(ADDR_SURF_8_BANK));
3174 				break;
3175 			case 14:
3176 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3179 						 NUM_BANKS(ADDR_SURF_4_BANK));
3180 				break;
3181 			default:
3182 				gb_tile_moden = 0;
3183 				break;
3184 			}
3185 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3186 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3187 		}
3188 	} else if (num_pipe_configs == 2) {
3189 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3190 			switch (reg_offset) {
3191 			case 0:
3192 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3194 						 PIPE_CONFIG(ADDR_SURF_P2) |
3195 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3196 				break;
3197 			case 1:
3198 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3199 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3200 						 PIPE_CONFIG(ADDR_SURF_P2) |
3201 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3202 				break;
3203 			case 2:
3204 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3205 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3208 				break;
3209 			case 3:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3214 				break;
3215 			case 4:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 5:
3222 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3223 						 PIPE_CONFIG(ADDR_SURF_P2) |
3224 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 				break;
3226 			case 6:
3227 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3228 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3229 						 PIPE_CONFIG(ADDR_SURF_P2) |
3230 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3231 				break;
3232 			case 7:
3233 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3234 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3235 						 PIPE_CONFIG(ADDR_SURF_P2) |
3236 						 TILE_SPLIT(split_equal_to_row_size));
3237 				break;
3238 			case 8:
3239 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3240 						PIPE_CONFIG(ADDR_SURF_P2);
3241 				break;
3242 			case 9:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2));
3246 				break;
3247 			case 10:
3248 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3249 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252 				break;
3253 			case 11:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 12:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 13:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267 						 PIPE_CONFIG(ADDR_SURF_P2) |
3268 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3269 				break;
3270 			case 14:
3271 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3272 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273 						 PIPE_CONFIG(ADDR_SURF_P2) |
3274 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275 				break;
3276 			case 16:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 17:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 27:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2));
3292 				break;
3293 			case 28:
3294 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3295 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296 						 PIPE_CONFIG(ADDR_SURF_P2) |
3297 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3298 				break;
3299 			case 29:
3300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3302 						 PIPE_CONFIG(ADDR_SURF_P2) |
3303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 				break;
3305 			case 30:
3306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3308 						 PIPE_CONFIG(ADDR_SURF_P2) |
3309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 				break;
3311 			default:
3312 				gb_tile_moden = 0;
3313 				break;
3314 			}
3315 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3316 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3317 		}
3318 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3319 			switch (reg_offset) {
3320 			case 0:
3321 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 						 NUM_BANKS(ADDR_SURF_16_BANK));
3325 				break;
3326 			case 1:
3327 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330 						 NUM_BANKS(ADDR_SURF_16_BANK));
3331 				break;
3332 			case 2:
3333 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3335 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 						 NUM_BANKS(ADDR_SURF_16_BANK));
3337 				break;
3338 			case 3:
3339 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342 						 NUM_BANKS(ADDR_SURF_16_BANK));
3343 				break;
3344 			case 4:
3345 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 						 NUM_BANKS(ADDR_SURF_16_BANK));
3349 				break;
3350 			case 5:
3351 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354 						 NUM_BANKS(ADDR_SURF_16_BANK));
3355 				break;
3356 			case 6:
3357 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360 						 NUM_BANKS(ADDR_SURF_8_BANK));
3361 				break;
3362 			case 8:
3363 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3364 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3365 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366 						 NUM_BANKS(ADDR_SURF_16_BANK));
3367 				break;
3368 			case 9:
3369 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3370 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372 						 NUM_BANKS(ADDR_SURF_16_BANK));
3373 				break;
3374 			case 10:
3375 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3376 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3377 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3378 						 NUM_BANKS(ADDR_SURF_16_BANK));
3379 				break;
3380 			case 11:
3381 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3382 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3383 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384 						 NUM_BANKS(ADDR_SURF_16_BANK));
3385 				break;
3386 			case 12:
3387 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3389 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390 						 NUM_BANKS(ADDR_SURF_16_BANK));
3391 				break;
3392 			case 13:
3393 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396 						 NUM_BANKS(ADDR_SURF_16_BANK));
3397 				break;
3398 			case 14:
3399 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3401 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3402 						 NUM_BANKS(ADDR_SURF_8_BANK));
3403 				break;
3404 			default:
3405 				gb_tile_moden = 0;
3406 				break;
3407 			}
3408 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3409 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3410 		}
3411 	} else
3412 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3413 }
3414 
3415 /**
3416  * cik_select_se_sh - select which SE, SH to address
3417  *
3418  * @rdev: radeon_device pointer
3419  * @se_num: shader engine to address
3420  * @sh_num: sh block to address
3421  *
3422  * Select which SE, SH combinations to address. Certain
3423  * registers are instanced per SE or SH.  0xffffffff means
3424  * broadcast to all SEs or SHs (CIK).
3425  */
3426 static void cik_select_se_sh(struct radeon_device *rdev,
3427 			     u32 se_num, u32 sh_num)
3428 {
3429 	u32 data = INSTANCE_BROADCAST_WRITES;
3430 
3431 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3432 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3433 	else if (se_num == 0xffffffff)
3434 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3435 	else if (sh_num == 0xffffffff)
3436 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3437 	else
3438 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3439 	WREG32(GRBM_GFX_INDEX, data);
3440 }
3441 
3442 /**
3443  * cik_create_bitmask - create a bitmask
3444  *
3445  * @bit_width: length of the mask
3446  *
3447  * create a variable length bit mask (CIK).
3448  * Returns the bitmask.
3449  */
3450 static u32 cik_create_bitmask(u32 bit_width)
3451 {
3452 	u32 i, mask = 0;
3453 
3454 	for (i = 0; i < bit_width; i++) {
3455 		mask <<= 1;
3456 		mask |= 1;
3457 	}
3458 	return mask;
3459 }
3460 
3461 /**
3462  * cik_get_rb_disabled - computes the mask of disabled RBs
3463  *
3464  * @rdev: radeon_device pointer
3465  * @max_rb_num: max RBs (render backends) for the asic
3466  * @se_num: number of SEs (shader engines) for the asic
3467  * @sh_per_se: number of SH blocks per SE for the asic
3468  *
3469  * Calculates the bitmask of disabled RBs (CIK).
3470  * Returns the disabled RB bitmask.
3471  */
3472 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3473 			      u32 max_rb_num_per_se,
3474 			      u32 sh_per_se)
3475 {
3476 	u32 data, mask;
3477 
3478 	data = RREG32(CC_RB_BACKEND_DISABLE);
3479 	if (data & 1)
3480 		data &= BACKEND_DISABLE_MASK;
3481 	else
3482 		data = 0;
3483 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3484 
3485 	data >>= BACKEND_DISABLE_SHIFT;
3486 
3487 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3488 
3489 	return data & mask;
3490 }
3491 
3492 /**
3493  * cik_setup_rb - setup the RBs on the asic
3494  *
3495  * @rdev: radeon_device pointer
3496  * @se_num: number of SEs (shader engines) for the asic
3497  * @sh_per_se: number of SH blocks per SE for the asic
3498  * @max_rb_num: max RBs (render backends) for the asic
3499  *
3500  * Configures per-SE/SH RB registers (CIK).
3501  */
3502 static void cik_setup_rb(struct radeon_device *rdev,
3503 			 u32 se_num, u32 sh_per_se,
3504 			 u32 max_rb_num_per_se)
3505 {
3506 	int i, j;
3507 	u32 data, mask;
3508 	u32 disabled_rbs = 0;
3509 	u32 enabled_rbs = 0;
3510 
3511 	mutex_lock(&rdev->grbm_idx_mutex);
3512 	for (i = 0; i < se_num; i++) {
3513 		for (j = 0; j < sh_per_se; j++) {
3514 			cik_select_se_sh(rdev, i, j);
3515 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3516 			if (rdev->family == CHIP_HAWAII)
3517 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3518 			else
3519 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3520 		}
3521 	}
3522 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3523 	mutex_unlock(&rdev->grbm_idx_mutex);
3524 
3525 	mask = 1;
3526 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3527 		if (!(disabled_rbs & mask))
3528 			enabled_rbs |= mask;
3529 		mask <<= 1;
3530 	}
3531 
3532 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3533 
3534 	mutex_lock(&rdev->grbm_idx_mutex);
3535 	for (i = 0; i < se_num; i++) {
3536 		cik_select_se_sh(rdev, i, 0xffffffff);
3537 		data = 0;
3538 		for (j = 0; j < sh_per_se; j++) {
3539 			switch (enabled_rbs & 3) {
3540 			case 0:
3541 				if (j == 0)
3542 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3543 				else
3544 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3545 				break;
3546 			case 1:
3547 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3548 				break;
3549 			case 2:
3550 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3551 				break;
3552 			case 3:
3553 			default:
3554 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3555 				break;
3556 			}
3557 			enabled_rbs >>= 2;
3558 		}
3559 		WREG32(PA_SC_RASTER_CONFIG, data);
3560 	}
3561 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3562 	mutex_unlock(&rdev->grbm_idx_mutex);
3563 }
3564 
3565 /**
3566  * cik_gpu_init - setup the 3D engine
3567  *
3568  * @rdev: radeon_device pointer
3569  *
3570  * Configures the 3D engine and tiling configuration
3571  * registers so that the 3D engine is usable.
3572  */
3573 static void cik_gpu_init(struct radeon_device *rdev)
3574 {
3575 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3576 	u32 mc_shared_chmap __unused, mc_arb_ramcfg;
3577 	u32 hdp_host_path_cntl;
3578 	u32 tmp;
3579 	int i, j;
3580 
3581 	switch (rdev->family) {
3582 	case CHIP_BONAIRE:
3583 		rdev->config.cik.max_shader_engines = 2;
3584 		rdev->config.cik.max_tile_pipes = 4;
3585 		rdev->config.cik.max_cu_per_sh = 7;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 2;
3588 		rdev->config.cik.max_texture_channel_caches = 4;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_HAWAII:
3600 		rdev->config.cik.max_shader_engines = 4;
3601 		rdev->config.cik.max_tile_pipes = 16;
3602 		rdev->config.cik.max_cu_per_sh = 11;
3603 		rdev->config.cik.max_sh_per_se = 1;
3604 		rdev->config.cik.max_backends_per_se = 4;
3605 		rdev->config.cik.max_texture_channel_caches = 16;
3606 		rdev->config.cik.max_gprs = 256;
3607 		rdev->config.cik.max_gs_threads = 32;
3608 		rdev->config.cik.max_hw_contexts = 8;
3609 
3610 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3611 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3612 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3613 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3614 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3615 		break;
3616 	case CHIP_KAVERI:
3617 		rdev->config.cik.max_shader_engines = 1;
3618 		rdev->config.cik.max_tile_pipes = 4;
3619 		rdev->config.cik.max_cu_per_sh = 8;
3620 		rdev->config.cik.max_backends_per_se = 2;
3621 		rdev->config.cik.max_sh_per_se = 1;
3622 		rdev->config.cik.max_texture_channel_caches = 4;
3623 		rdev->config.cik.max_gprs = 256;
3624 		rdev->config.cik.max_gs_threads = 16;
3625 		rdev->config.cik.max_hw_contexts = 8;
3626 
3627 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3628 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3629 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3630 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3631 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3632 		break;
3633 	case CHIP_KABINI:
3634 	case CHIP_MULLINS:
3635 	default:
3636 		rdev->config.cik.max_shader_engines = 1;
3637 		rdev->config.cik.max_tile_pipes = 2;
3638 		rdev->config.cik.max_cu_per_sh = 2;
3639 		rdev->config.cik.max_sh_per_se = 1;
3640 		rdev->config.cik.max_backends_per_se = 1;
3641 		rdev->config.cik.max_texture_channel_caches = 2;
3642 		rdev->config.cik.max_gprs = 256;
3643 		rdev->config.cik.max_gs_threads = 16;
3644 		rdev->config.cik.max_hw_contexts = 8;
3645 
3646 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3647 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3648 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3649 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3650 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3651 		break;
3652 	}
3653 
3654 	/* Initialize HDP */
3655 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3656 		WREG32((0x2c14 + j), 0x00000000);
3657 		WREG32((0x2c18 + j), 0x00000000);
3658 		WREG32((0x2c1c + j), 0x00000000);
3659 		WREG32((0x2c20 + j), 0x00000000);
3660 		WREG32((0x2c24 + j), 0x00000000);
3661 	}
3662 
3663 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3664 	WREG32(SRBM_INT_CNTL, 0x1);
3665 	WREG32(SRBM_INT_ACK, 0x1);
3666 
3667 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3668 
3669 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3670 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3671 
3672 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3673 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3674 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3675 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3676 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3677 		rdev->config.cik.mem_row_size_in_kb = 4;
3678 	/* XXX use MC settings? */
3679 	rdev->config.cik.shader_engine_tile_size = 32;
3680 	rdev->config.cik.num_gpus = 1;
3681 	rdev->config.cik.multi_gpu_tile_size = 64;
3682 
3683 	/* fix up row size */
3684 	gb_addr_config &= ~ROW_SIZE_MASK;
3685 	switch (rdev->config.cik.mem_row_size_in_kb) {
3686 	case 1:
3687 	default:
3688 		gb_addr_config |= ROW_SIZE(0);
3689 		break;
3690 	case 2:
3691 		gb_addr_config |= ROW_SIZE(1);
3692 		break;
3693 	case 4:
3694 		gb_addr_config |= ROW_SIZE(2);
3695 		break;
3696 	}
3697 
3698 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3699 	 * not have bank info, so create a custom tiling dword.
3700 	 * bits 3:0   num_pipes
3701 	 * bits 7:4   num_banks
3702 	 * bits 11:8  group_size
3703 	 * bits 15:12 row_size
3704 	 */
3705 	rdev->config.cik.tile_config = 0;
3706 	switch (rdev->config.cik.num_tile_pipes) {
3707 	case 1:
3708 		rdev->config.cik.tile_config |= (0 << 0);
3709 		break;
3710 	case 2:
3711 		rdev->config.cik.tile_config |= (1 << 0);
3712 		break;
3713 	case 4:
3714 		rdev->config.cik.tile_config |= (2 << 0);
3715 		break;
3716 	case 8:
3717 	default:
3718 		/* XXX what about 12? */
3719 		rdev->config.cik.tile_config |= (3 << 0);
3720 		break;
3721 	}
3722 	rdev->config.cik.tile_config |=
3723 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3724 	rdev->config.cik.tile_config |=
3725 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3726 	rdev->config.cik.tile_config |=
3727 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3728 
3729 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3730 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3731 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3732 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3733 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3734 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3735 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3736 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3737 
3738 	cik_tiling_mode_table_init(rdev);
3739 
3740 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3741 		     rdev->config.cik.max_sh_per_se,
3742 		     rdev->config.cik.max_backends_per_se);
3743 
3744 	rdev->config.cik.active_cus = 0;
3745 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3746 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3747 			rdev->config.cik.active_cus +=
3748 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3749 		}
3750 	}
3751 
3752 	/* set HW defaults for 3D engine */
3753 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3754 
3755 	mutex_lock(&rdev->grbm_idx_mutex);
3756 	/*
3757 	 * making sure that the following register writes will be broadcasted
3758 	 * to all the shaders
3759 	 */
3760 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3761 	WREG32(SX_DEBUG_1, 0x20);
3762 
3763 	WREG32(TA_CNTL_AUX, 0x00010000);
3764 
3765 	tmp = RREG32(SPI_CONFIG_CNTL);
3766 	tmp |= 0x03000000;
3767 	WREG32(SPI_CONFIG_CNTL, tmp);
3768 
3769 	WREG32(SQ_CONFIG, 1);
3770 
3771 	WREG32(DB_DEBUG, 0);
3772 
3773 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3774 	tmp |= 0x00000400;
3775 	WREG32(DB_DEBUG2, tmp);
3776 
3777 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3778 	tmp |= 0x00020200;
3779 	WREG32(DB_DEBUG3, tmp);
3780 
3781 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3782 	tmp |= 0x00018208;
3783 	WREG32(CB_HW_CONTROL, tmp);
3784 
3785 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3786 
3787 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3788 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3789 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3790 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3791 
3792 	WREG32(VGT_NUM_INSTANCES, 1);
3793 
3794 	WREG32(CP_PERFMON_CNTL, 0);
3795 
3796 	WREG32(SQ_CONFIG, 0);
3797 
3798 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3799 					  FORCE_EOV_MAX_REZ_CNT(255)));
3800 
3801 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3802 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3803 
3804 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3805 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3806 
3807 	tmp = RREG32(HDP_MISC_CNTL);
3808 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3809 	WREG32(HDP_MISC_CNTL, tmp);
3810 
3811 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3812 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3813 
3814 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3815 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3816 	mutex_unlock(&rdev->grbm_idx_mutex);
3817 
3818 	udelay(50);
3819 }
3820 
3821 /*
3822  * GPU scratch registers helpers function.
3823  */
3824 /**
3825  * cik_scratch_init - setup driver info for CP scratch regs
3826  *
3827  * @rdev: radeon_device pointer
3828  *
3829  * Set up the number and offset of the CP scratch registers.
3830  * NOTE: use of CP scratch registers is a legacy inferface and
3831  * is not used by default on newer asics (r6xx+).  On newer asics,
3832  * memory buffers are used for fences rather than scratch regs.
3833  */
3834 static void cik_scratch_init(struct radeon_device *rdev)
3835 {
3836 	int i;
3837 
3838 	rdev->scratch.num_reg = 7;
3839 	rdev->scratch.reg_base = SCRATCH_REG0;
3840 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3841 		rdev->scratch.free[i] = true;
3842 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3843 	}
3844 }
3845 
3846 /**
3847  * cik_ring_test - basic gfx ring test
3848  *
3849  * @rdev: radeon_device pointer
3850  * @ring: radeon_ring structure holding ring information
3851  *
3852  * Allocate a scratch register and write to it using the gfx ring (CIK).
3853  * Provides a basic gfx ring test to verify that the ring is working.
3854  * Used by cik_cp_gfx_resume();
3855  * Returns 0 on success, error on failure.
3856  */
3857 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3858 {
3859 	uint32_t scratch;
3860 	uint32_t tmp = 0;
3861 	unsigned i;
3862 	int r;
3863 
3864 	r = radeon_scratch_get(rdev, &scratch);
3865 	if (r) {
3866 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3867 		return r;
3868 	}
3869 	WREG32(scratch, 0xCAFEDEAD);
3870 	r = radeon_ring_lock(rdev, ring, 3);
3871 	if (r) {
3872 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3873 		radeon_scratch_free(rdev, scratch);
3874 		return r;
3875 	}
3876 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3877 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3878 	radeon_ring_write(ring, 0xDEADBEEF);
3879 	radeon_ring_unlock_commit(rdev, ring, false);
3880 
3881 	for (i = 0; i < rdev->usec_timeout; i++) {
3882 		tmp = RREG32(scratch);
3883 		if (tmp == 0xDEADBEEF)
3884 			break;
3885 		DRM_UDELAY(1);
3886 	}
3887 	if (i < rdev->usec_timeout) {
3888 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3889 	} else {
3890 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3891 			  ring->idx, scratch, tmp);
3892 		r = -EINVAL;
3893 	}
3894 	radeon_scratch_free(rdev, scratch);
3895 	return r;
3896 }
3897 
3898 /**
3899  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3900  *
3901  * @rdev: radeon_device pointer
3902  * @ridx: radeon ring index
3903  *
3904  * Emits an hdp flush on the cp.
3905  */
3906 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3907 				       int ridx)
3908 {
3909 	struct radeon_ring *ring = &rdev->ring[ridx];
3910 	u32 ref_and_mask;
3911 
3912 	switch (ring->idx) {
3913 	case CAYMAN_RING_TYPE_CP1_INDEX:
3914 	case CAYMAN_RING_TYPE_CP2_INDEX:
3915 	default:
3916 		switch (ring->me) {
3917 		case 0:
3918 			ref_and_mask = CP2 << ring->pipe;
3919 			break;
3920 		case 1:
3921 			ref_and_mask = CP6 << ring->pipe;
3922 			break;
3923 		default:
3924 			return;
3925 		}
3926 		break;
3927 	case RADEON_RING_TYPE_GFX_INDEX:
3928 		ref_and_mask = CP0;
3929 		break;
3930 	}
3931 
3932 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3933 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3934 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3935 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3936 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3937 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3938 	radeon_ring_write(ring, ref_and_mask);
3939 	radeon_ring_write(ring, ref_and_mask);
3940 	radeon_ring_write(ring, 0x20); /* poll interval */
3941 }
3942 
3943 /**
3944  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3945  *
3946  * @rdev: radeon_device pointer
3947  * @fence: radeon fence object
3948  *
3949  * Emits a fence sequnce number on the gfx ring and flushes
3950  * GPU caches.
3951  */
3952 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3953 			     struct radeon_fence *fence)
3954 {
3955 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3956 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3957 
3958 	/* Workaround for cache flush problems. First send a dummy EOP
3959 	 * event down the pipe with seq one below.
3960 	 */
3961 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3962 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3963 				 EOP_TC_ACTION_EN |
3964 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3965 				 EVENT_INDEX(5)));
3966 	radeon_ring_write(ring, addr & 0xfffffffc);
3967 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3968 				DATA_SEL(1) | INT_SEL(0));
3969 	radeon_ring_write(ring, fence->seq - 1);
3970 	radeon_ring_write(ring, 0);
3971 
3972 	/* Then send the real EOP event down the pipe. */
3973 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3974 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3975 				 EOP_TC_ACTION_EN |
3976 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3977 				 EVENT_INDEX(5)));
3978 	radeon_ring_write(ring, addr & 0xfffffffc);
3979 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3980 	radeon_ring_write(ring, fence->seq);
3981 	radeon_ring_write(ring, 0);
3982 }
3983 
3984 /**
3985  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3986  *
3987  * @rdev: radeon_device pointer
3988  * @fence: radeon fence object
3989  *
3990  * Emits a fence sequnce number on the compute ring and flushes
3991  * GPU caches.
3992  */
3993 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3994 				 struct radeon_fence *fence)
3995 {
3996 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3997 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3998 
3999 	/* RELEASE_MEM - flush caches, send int */
4000 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4001 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4002 				 EOP_TC_ACTION_EN |
4003 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4004 				 EVENT_INDEX(5)));
4005 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4006 	radeon_ring_write(ring, addr & 0xfffffffc);
4007 	radeon_ring_write(ring, upper_32_bits(addr));
4008 	radeon_ring_write(ring, fence->seq);
4009 	radeon_ring_write(ring, 0);
4010 }
4011 
4012 /**
4013  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4014  *
4015  * @rdev: radeon_device pointer
4016  * @ring: radeon ring buffer object
4017  * @semaphore: radeon semaphore object
4018  * @emit_wait: Is this a sempahore wait?
4019  *
4020  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4021  * from running ahead of semaphore waits.
4022  */
4023 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4024 			     struct radeon_ring *ring,
4025 			     struct radeon_semaphore *semaphore,
4026 			     bool emit_wait)
4027 {
4028 	uint64_t addr = semaphore->gpu_addr;
4029 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4030 
4031 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4032 	radeon_ring_write(ring, lower_32_bits(addr));
4033 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4034 
4035 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4036 		/* Prevent the PFP from running ahead of the semaphore wait */
4037 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4038 		radeon_ring_write(ring, 0x0);
4039 	}
4040 
4041 	return true;
4042 }
4043 
4044 /**
4045  * cik_copy_cpdma - copy pages using the CP DMA engine
4046  *
4047  * @rdev: radeon_device pointer
4048  * @src_offset: src GPU address
4049  * @dst_offset: dst GPU address
4050  * @num_gpu_pages: number of GPU pages to xfer
4051  * @resv: reservation object to sync to
4052  *
4053  * Copy GPU paging using the CP DMA engine (CIK+).
4054  * Used by the radeon ttm implementation to move pages if
4055  * registered as the asic copy callback.
4056  */
4057 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4058 				    uint64_t src_offset, uint64_t dst_offset,
4059 				    unsigned num_gpu_pages,
4060 				    struct reservation_object *resv)
4061 {
4062 	struct radeon_fence *fence;
4063 	struct radeon_sync sync;
4064 	int ring_index = rdev->asic->copy.blit_ring_index;
4065 	struct radeon_ring *ring = &rdev->ring[ring_index];
4066 	u32 size_in_bytes, cur_size_in_bytes, control;
4067 	int i, num_loops;
4068 	int r = 0;
4069 
4070 	radeon_sync_create(&sync);
4071 
4072 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4073 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4074 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4075 	if (r) {
4076 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4077 		radeon_sync_free(rdev, &sync, NULL);
4078 		return ERR_PTR(r);
4079 	}
4080 
4081 	radeon_sync_resv(rdev, &sync, resv, false);
4082 	radeon_sync_rings(rdev, &sync, ring->idx);
4083 
4084 	for (i = 0; i < num_loops; i++) {
4085 		cur_size_in_bytes = size_in_bytes;
4086 		if (cur_size_in_bytes > 0x1fffff)
4087 			cur_size_in_bytes = 0x1fffff;
4088 		size_in_bytes -= cur_size_in_bytes;
4089 		control = 0;
4090 		if (size_in_bytes == 0)
4091 			control |= PACKET3_DMA_DATA_CP_SYNC;
4092 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4093 		radeon_ring_write(ring, control);
4094 		radeon_ring_write(ring, lower_32_bits(src_offset));
4095 		radeon_ring_write(ring, upper_32_bits(src_offset));
4096 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4097 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4098 		radeon_ring_write(ring, cur_size_in_bytes);
4099 		src_offset += cur_size_in_bytes;
4100 		dst_offset += cur_size_in_bytes;
4101 	}
4102 
4103 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4104 	if (r) {
4105 		radeon_ring_unlock_undo(rdev, ring);
4106 		radeon_sync_free(rdev, &sync, NULL);
4107 		return ERR_PTR(r);
4108 	}
4109 
4110 	radeon_ring_unlock_commit(rdev, ring, false);
4111 	radeon_sync_free(rdev, &sync, fence);
4112 
4113 	return fence;
4114 }
4115 
4116 /*
4117  * IB stuff
4118  */
4119 /**
4120  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4121  *
4122  * @rdev: radeon_device pointer
4123  * @ib: radeon indirect buffer object
4124  *
4125  * Emits an DE (drawing engine) or CE (constant engine) IB
4126  * on the gfx ring.  IBs are usually generated by userspace
4127  * acceleration drivers and submitted to the kernel for
4128  * sheduling on the ring.  This function schedules the IB
4129  * on the gfx ring for execution by the GPU.
4130  */
4131 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4132 {
4133 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4134 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4135 	u32 header, control = INDIRECT_BUFFER_VALID;
4136 
4137 	if (ib->is_const_ib) {
4138 		/* set switch buffer packet before const IB */
4139 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4140 		radeon_ring_write(ring, 0);
4141 
4142 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4143 	} else {
4144 		u32 next_rptr;
4145 		if (ring->rptr_save_reg) {
4146 			next_rptr = ring->wptr + 3 + 4;
4147 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4148 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4149 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4150 			radeon_ring_write(ring, next_rptr);
4151 		} else if (rdev->wb.enabled) {
4152 			next_rptr = ring->wptr + 5 + 4;
4153 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4154 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4155 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4156 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4157 			radeon_ring_write(ring, next_rptr);
4158 		}
4159 
4160 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4161 	}
4162 
4163 	control |= ib->length_dw | (vm_id << 24);
4164 
4165 	radeon_ring_write(ring, header);
4166 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4167 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4168 	radeon_ring_write(ring, control);
4169 }
4170 
4171 /**
4172  * cik_ib_test - basic gfx ring IB test
4173  *
4174  * @rdev: radeon_device pointer
4175  * @ring: radeon_ring structure holding ring information
4176  *
4177  * Allocate an IB and execute it on the gfx ring (CIK).
4178  * Provides a basic gfx ring test to verify that IBs are working.
4179  * Returns 0 on success, error on failure.
4180  */
4181 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4182 {
4183 	struct radeon_ib ib;
4184 	uint32_t scratch;
4185 	uint32_t tmp = 0;
4186 	unsigned i;
4187 	int r;
4188 
4189 	r = radeon_scratch_get(rdev, &scratch);
4190 	if (r) {
4191 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4192 		return r;
4193 	}
4194 	WREG32(scratch, 0xCAFEDEAD);
4195 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4196 	if (r) {
4197 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4198 		radeon_scratch_free(rdev, scratch);
4199 		return r;
4200 	}
4201 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4202 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4203 	ib.ptr[2] = 0xDEADBEEF;
4204 	ib.length_dw = 3;
4205 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4206 	if (r) {
4207 		radeon_scratch_free(rdev, scratch);
4208 		radeon_ib_free(rdev, &ib);
4209 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4210 		return r;
4211 	}
4212 	r = radeon_fence_wait(ib.fence, false);
4213 	if (r) {
4214 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4215 		radeon_scratch_free(rdev, scratch);
4216 		radeon_ib_free(rdev, &ib);
4217 		return r;
4218 	}
4219 	for (i = 0; i < rdev->usec_timeout; i++) {
4220 		tmp = RREG32(scratch);
4221 		if (tmp == 0xDEADBEEF)
4222 			break;
4223 		DRM_UDELAY(1);
4224 	}
4225 	if (i < rdev->usec_timeout) {
4226 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4227 	} else {
4228 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4229 			  scratch, tmp);
4230 		r = -EINVAL;
4231 	}
4232 	radeon_scratch_free(rdev, scratch);
4233 	radeon_ib_free(rdev, &ib);
4234 	return r;
4235 }
4236 
4237 /*
4238  * CP.
4239  * On CIK, gfx and compute now have independant command processors.
4240  *
4241  * GFX
4242  * Gfx consists of a single ring and can process both gfx jobs and
4243  * compute jobs.  The gfx CP consists of three microengines (ME):
4244  * PFP - Pre-Fetch Parser
4245  * ME - Micro Engine
4246  * CE - Constant Engine
4247  * The PFP and ME make up what is considered the Drawing Engine (DE).
4248  * The CE is an asynchronous engine used for updating buffer desciptors
4249  * used by the DE so that they can be loaded into cache in parallel
4250  * while the DE is processing state update packets.
4251  *
4252  * Compute
4253  * The compute CP consists of two microengines (ME):
4254  * MEC1 - Compute MicroEngine 1
4255  * MEC2 - Compute MicroEngine 2
4256  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4257  * The queues are exposed to userspace and are programmed directly
4258  * by the compute runtime.
4259  */
4260 /**
4261  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4262  *
4263  * @rdev: radeon_device pointer
4264  * @enable: enable or disable the MEs
4265  *
4266  * Halts or unhalts the gfx MEs.
4267  */
4268 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4269 {
4270 	if (enable)
4271 		WREG32(CP_ME_CNTL, 0);
4272 	else {
4273 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4274 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4275 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4276 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4277 	}
4278 	udelay(50);
4279 }
4280 
4281 /**
4282  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4283  *
4284  * @rdev: radeon_device pointer
4285  *
4286  * Loads the gfx PFP, ME, and CE ucode.
4287  * Returns 0 for success, -EINVAL if the ucode is not available.
4288  */
4289 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4290 {
4291 	int i;
4292 
4293 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4294 		return -EINVAL;
4295 
4296 	cik_cp_gfx_enable(rdev, false);
4297 
4298 	if (rdev->new_fw) {
4299 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4300 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4301 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4302 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4303 		const struct gfx_firmware_header_v1_0 *me_hdr =
4304 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4305 		const __le32 *fw_data;
4306 		u32 fw_size;
4307 
4308 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4309 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4310 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4311 
4312 		/* PFP */
4313 		fw_data = (const __le32 *)
4314 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316 		WREG32(CP_PFP_UCODE_ADDR, 0);
4317 		for (i = 0; i < fw_size; i++)
4318 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4320 
4321 		/* CE */
4322 		fw_data = (const __le32 *)
4323 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4324 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4325 		WREG32(CP_CE_UCODE_ADDR, 0);
4326 		for (i = 0; i < fw_size; i++)
4327 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4328 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4329 
4330 		/* ME */
4331 		fw_data = (const __be32 *)
4332 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4333 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4334 		WREG32(CP_ME_RAM_WADDR, 0);
4335 		for (i = 0; i < fw_size; i++)
4336 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4337 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4338 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4339 	} else {
4340 		const __be32 *fw_data;
4341 
4342 		/* PFP */
4343 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4344 		WREG32(CP_PFP_UCODE_ADDR, 0);
4345 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4346 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4347 		WREG32(CP_PFP_UCODE_ADDR, 0);
4348 
4349 		/* CE */
4350 		fw_data = (const __be32 *)rdev->ce_fw->data;
4351 		WREG32(CP_CE_UCODE_ADDR, 0);
4352 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4353 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4354 		WREG32(CP_CE_UCODE_ADDR, 0);
4355 
4356 		/* ME */
4357 		fw_data = (const __be32 *)rdev->me_fw->data;
4358 		WREG32(CP_ME_RAM_WADDR, 0);
4359 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4360 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_ME_RAM_WADDR, 0);
4362 	}
4363 
4364 	return 0;
4365 }
4366 
4367 /**
4368  * cik_cp_gfx_start - start the gfx ring
4369  *
4370  * @rdev: radeon_device pointer
4371  *
4372  * Enables the ring and loads the clear state context and other
4373  * packets required to init the ring.
4374  * Returns 0 for success, error for failure.
4375  */
4376 static int cik_cp_gfx_start(struct radeon_device *rdev)
4377 {
4378 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4379 	int r, i;
4380 
4381 	/* init the CP */
4382 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4383 	WREG32(CP_ENDIAN_SWAP, 0);
4384 	WREG32(CP_DEVICE_ID, 1);
4385 
4386 	cik_cp_gfx_enable(rdev, true);
4387 
4388 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4389 	if (r) {
4390 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4391 		return r;
4392 	}
4393 
4394 	/* init the CE partitions.  CE only used for gfx on CIK */
4395 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4396 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4397 	radeon_ring_write(ring, 0x8000);
4398 	radeon_ring_write(ring, 0x8000);
4399 
4400 	/* setup clear context state */
4401 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4402 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4403 
4404 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4405 	radeon_ring_write(ring, 0x80000000);
4406 	radeon_ring_write(ring, 0x80000000);
4407 
4408 	for (i = 0; i < cik_default_size; i++)
4409 		radeon_ring_write(ring, cik_default_state[i]);
4410 
4411 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4412 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4413 
4414 	/* set clear context state */
4415 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4416 	radeon_ring_write(ring, 0);
4417 
4418 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4419 	radeon_ring_write(ring, 0x00000316);
4420 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4421 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4422 
4423 	radeon_ring_unlock_commit(rdev, ring, false);
4424 
4425 	return 0;
4426 }
4427 
4428 /**
4429  * cik_cp_gfx_fini - stop the gfx ring
4430  *
4431  * @rdev: radeon_device pointer
4432  *
4433  * Stop the gfx ring and tear down the driver ring
4434  * info.
4435  */
4436 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4437 {
4438 	cik_cp_gfx_enable(rdev, false);
4439 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4440 }
4441 
4442 /**
4443  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Program the location and size of the gfx ring buffer
4448  * and test it to make sure it's working.
4449  * Returns 0 for success, error for failure.
4450  */
4451 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4452 {
4453 	struct radeon_ring *ring;
4454 	u32 tmp;
4455 	u32 rb_bufsz;
4456 	u64 rb_addr;
4457 	int r;
4458 
4459 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4460 	if (rdev->family != CHIP_HAWAII)
4461 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4462 
4463 	/* Set the write pointer delay */
4464 	WREG32(CP_RB_WPTR_DELAY, 0);
4465 
4466 	/* set the RB to use vmid 0 */
4467 	WREG32(CP_RB_VMID, 0);
4468 
4469 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4470 
4471 	/* ring 0 - compute and gfx */
4472 	/* Set ring buffer size */
4473 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4474 	rb_bufsz = order_base_2(ring->ring_size / 8);
4475 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4476 #ifdef __BIG_ENDIAN
4477 	tmp |= BUF_SWAP_32BIT;
4478 #endif
4479 	WREG32(CP_RB0_CNTL, tmp);
4480 
4481 	/* Initialize the ring buffer's read and write pointers */
4482 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4483 	ring->wptr = 0;
4484 	WREG32(CP_RB0_WPTR, ring->wptr);
4485 
4486 	/* set the wb address wether it's enabled or not */
4487 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4488 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4489 
4490 	/* scratch register shadowing is no longer supported */
4491 	WREG32(SCRATCH_UMSK, 0);
4492 
4493 	if (!rdev->wb.enabled)
4494 		tmp |= RB_NO_UPDATE;
4495 
4496 	mdelay(1);
4497 	WREG32(CP_RB0_CNTL, tmp);
4498 
4499 	rb_addr = ring->gpu_addr >> 8;
4500 	WREG32(CP_RB0_BASE, rb_addr);
4501 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4502 
4503 	/* start the ring */
4504 	cik_cp_gfx_start(rdev);
4505 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4506 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4507 	if (r) {
4508 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4509 		return r;
4510 	}
4511 
4512 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4513 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4514 
4515 	return 0;
4516 }
4517 
4518 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4519 		     struct radeon_ring *ring)
4520 {
4521 	u32 rptr;
4522 
4523 	if (rdev->wb.enabled)
4524 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4525 	else
4526 		rptr = RREG32(CP_RB0_RPTR);
4527 
4528 	return rptr;
4529 }
4530 
4531 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4532 		     struct radeon_ring *ring)
4533 {
4534 	u32 wptr;
4535 
4536 	wptr = RREG32(CP_RB0_WPTR);
4537 
4538 	return wptr;
4539 }
4540 
4541 void cik_gfx_set_wptr(struct radeon_device *rdev,
4542 		      struct radeon_ring *ring)
4543 {
4544 	WREG32(CP_RB0_WPTR, ring->wptr);
4545 	(void)RREG32(CP_RB0_WPTR);
4546 }
4547 
4548 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4549 			 struct radeon_ring *ring)
4550 {
4551 	u32 rptr;
4552 
4553 	if (rdev->wb.enabled) {
4554 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4555 	} else {
4556 		mutex_lock(&rdev->srbm_mutex);
4557 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4558 		rptr = RREG32(CP_HQD_PQ_RPTR);
4559 		cik_srbm_select(rdev, 0, 0, 0, 0);
4560 		mutex_unlock(&rdev->srbm_mutex);
4561 	}
4562 
4563 	return rptr;
4564 }
4565 
4566 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4567 			 struct radeon_ring *ring)
4568 {
4569 	u32 wptr;
4570 
4571 	if (rdev->wb.enabled) {
4572 		/* XXX check if swapping is necessary on BE */
4573 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4574 	} else {
4575 		mutex_lock(&rdev->srbm_mutex);
4576 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4577 		wptr = RREG32(CP_HQD_PQ_WPTR);
4578 		cik_srbm_select(rdev, 0, 0, 0, 0);
4579 		mutex_unlock(&rdev->srbm_mutex);
4580 	}
4581 
4582 	return wptr;
4583 }
4584 
4585 void cik_compute_set_wptr(struct radeon_device *rdev,
4586 			  struct radeon_ring *ring)
4587 {
4588 	/* XXX check if swapping is necessary on BE */
4589 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4590 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4591 }
4592 
4593 static void cik_compute_stop(struct radeon_device *rdev,
4594 			     struct radeon_ring *ring)
4595 {
4596 	u32 j, tmp;
4597 
4598 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4599 	/* Disable wptr polling. */
4600 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4601 	tmp &= ~WPTR_POLL_EN;
4602 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4603 	/* Disable HQD. */
4604 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4605 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4606 		for (j = 0; j < rdev->usec_timeout; j++) {
4607 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4608 				break;
4609 			udelay(1);
4610 		}
4611 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4612 		WREG32(CP_HQD_PQ_RPTR, 0);
4613 		WREG32(CP_HQD_PQ_WPTR, 0);
4614 	}
4615 	cik_srbm_select(rdev, 0, 0, 0, 0);
4616 }
4617 
4618 /**
4619  * cik_cp_compute_enable - enable/disable the compute CP MEs
4620  *
4621  * @rdev: radeon_device pointer
4622  * @enable: enable or disable the MEs
4623  *
4624  * Halts or unhalts the compute MEs.
4625  */
4626 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4627 {
4628 	if (enable)
4629 		WREG32(CP_MEC_CNTL, 0);
4630 	else {
4631 		/*
4632 		 * To make hibernation reliable we need to clear compute ring
4633 		 * configuration before halting the compute ring.
4634 		 */
4635 		mutex_lock(&rdev->srbm_mutex);
4636 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4637 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4638 		mutex_unlock(&rdev->srbm_mutex);
4639 
4640 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4641 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4642 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4643 	}
4644 	udelay(50);
4645 }
4646 
4647 /**
4648  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4649  *
4650  * @rdev: radeon_device pointer
4651  *
4652  * Loads the compute MEC1&2 ucode.
4653  * Returns 0 for success, -EINVAL if the ucode is not available.
4654  */
4655 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4656 {
4657 	int i;
4658 
4659 	if (!rdev->mec_fw)
4660 		return -EINVAL;
4661 
4662 	cik_cp_compute_enable(rdev, false);
4663 
4664 	if (rdev->new_fw) {
4665 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4666 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4667 		const __le32 *fw_data;
4668 		u32 fw_size;
4669 
4670 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4671 
4672 		/* MEC1 */
4673 		fw_data = (const __le32 *)
4674 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4675 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4676 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4677 		for (i = 0; i < fw_size; i++)
4678 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4679 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4680 
4681 		/* MEC2 */
4682 		if (rdev->family == CHIP_KAVERI) {
4683 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4684 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4685 
4686 			fw_data = (const __le32 *)
4687 				(rdev->mec2_fw->data +
4688 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4689 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4690 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4691 			for (i = 0; i < fw_size; i++)
4692 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4693 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4694 		}
4695 	} else {
4696 		const __be32 *fw_data;
4697 
4698 		/* MEC1 */
4699 		fw_data = (const __be32 *)rdev->mec_fw->data;
4700 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4701 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4702 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4703 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4704 
4705 		if (rdev->family == CHIP_KAVERI) {
4706 			/* MEC2 */
4707 			fw_data = (const __be32 *)rdev->mec_fw->data;
4708 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4709 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4710 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4711 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4712 		}
4713 	}
4714 
4715 	return 0;
4716 }
4717 
4718 /**
4719  * cik_cp_compute_start - start the compute queues
4720  *
4721  * @rdev: radeon_device pointer
4722  *
4723  * Enable the compute queues.
4724  * Returns 0 for success, error for failure.
4725  */
4726 static int cik_cp_compute_start(struct radeon_device *rdev)
4727 {
4728 	cik_cp_compute_enable(rdev, true);
4729 
4730 	return 0;
4731 }
4732 
4733 /**
4734  * cik_cp_compute_fini - stop the compute queues
4735  *
4736  * @rdev: radeon_device pointer
4737  *
4738  * Stop the compute queues and tear down the driver queue
4739  * info.
4740  */
4741 static void cik_cp_compute_fini(struct radeon_device *rdev)
4742 {
4743 	int i, idx, r;
4744 
4745 	cik_cp_compute_enable(rdev, false);
4746 
4747 	for (i = 0; i < 2; i++) {
4748 		if (i == 0)
4749 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4750 		else
4751 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4752 
4753 		if (rdev->ring[idx].mqd_obj) {
4754 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4755 			if (unlikely(r != 0))
4756 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4757 
4758 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4759 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4760 
4761 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4762 			rdev->ring[idx].mqd_obj = NULL;
4763 		}
4764 	}
4765 }
4766 
4767 static void cik_mec_fini(struct radeon_device *rdev)
4768 {
4769 	int r;
4770 
4771 	if (rdev->mec.hpd_eop_obj) {
4772 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4773 		if (unlikely(r != 0))
4774 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4775 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4776 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4777 
4778 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4779 		rdev->mec.hpd_eop_obj = NULL;
4780 	}
4781 }
4782 
4783 #define MEC_HPD_SIZE 2048
4784 
4785 static int cik_mec_init(struct radeon_device *rdev)
4786 {
4787 	int r;
4788 	u32 *hpd;
4789 
4790 	/*
4791 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4792 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4793 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4794 	 * be handled by KFD
4795 	 */
4796 	rdev->mec.num_mec = 1;
4797 	rdev->mec.num_pipe = 1;
4798 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4799 
4800 	if (rdev->mec.hpd_eop_obj == NULL) {
4801 		r = radeon_bo_create(rdev,
4802 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4803 				     PAGE_SIZE, true,
4804 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4805 				     &rdev->mec.hpd_eop_obj);
4806 		if (r) {
4807 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4808 			return r;
4809 		}
4810 	}
4811 
4812 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4813 	if (unlikely(r != 0)) {
4814 		cik_mec_fini(rdev);
4815 		return r;
4816 	}
4817 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4818 			  &rdev->mec.hpd_eop_gpu_addr);
4819 	if (r) {
4820 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4821 		cik_mec_fini(rdev);
4822 		return r;
4823 	}
4824 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4825 	if (r) {
4826 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4827 		cik_mec_fini(rdev);
4828 		return r;
4829 	}
4830 
4831 	/* clear memory.  Not sure if this is required or not */
4832 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4833 
4834 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4835 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4836 
4837 	return 0;
4838 }
4839 
4840 struct hqd_registers
4841 {
4842 	u32 cp_mqd_base_addr;
4843 	u32 cp_mqd_base_addr_hi;
4844 	u32 cp_hqd_active;
4845 	u32 cp_hqd_vmid;
4846 	u32 cp_hqd_persistent_state;
4847 	u32 cp_hqd_pipe_priority;
4848 	u32 cp_hqd_queue_priority;
4849 	u32 cp_hqd_quantum;
4850 	u32 cp_hqd_pq_base;
4851 	u32 cp_hqd_pq_base_hi;
4852 	u32 cp_hqd_pq_rptr;
4853 	u32 cp_hqd_pq_rptr_report_addr;
4854 	u32 cp_hqd_pq_rptr_report_addr_hi;
4855 	u32 cp_hqd_pq_wptr_poll_addr;
4856 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4857 	u32 cp_hqd_pq_doorbell_control;
4858 	u32 cp_hqd_pq_wptr;
4859 	u32 cp_hqd_pq_control;
4860 	u32 cp_hqd_ib_base_addr;
4861 	u32 cp_hqd_ib_base_addr_hi;
4862 	u32 cp_hqd_ib_rptr;
4863 	u32 cp_hqd_ib_control;
4864 	u32 cp_hqd_iq_timer;
4865 	u32 cp_hqd_iq_rptr;
4866 	u32 cp_hqd_dequeue_request;
4867 	u32 cp_hqd_dma_offload;
4868 	u32 cp_hqd_sema_cmd;
4869 	u32 cp_hqd_msg_type;
4870 	u32 cp_hqd_atomic0_preop_lo;
4871 	u32 cp_hqd_atomic0_preop_hi;
4872 	u32 cp_hqd_atomic1_preop_lo;
4873 	u32 cp_hqd_atomic1_preop_hi;
4874 	u32 cp_hqd_hq_scheduler0;
4875 	u32 cp_hqd_hq_scheduler1;
4876 	u32 cp_mqd_control;
4877 };
4878 
4879 struct bonaire_mqd
4880 {
4881 	u32 header;
4882 	u32 dispatch_initiator;
4883 	u32 dimensions[3];
4884 	u32 start_idx[3];
4885 	u32 num_threads[3];
4886 	u32 pipeline_stat_enable;
4887 	u32 perf_counter_enable;
4888 	u32 pgm[2];
4889 	u32 tba[2];
4890 	u32 tma[2];
4891 	u32 pgm_rsrc[2];
4892 	u32 vmid;
4893 	u32 resource_limits;
4894 	u32 static_thread_mgmt01[2];
4895 	u32 tmp_ring_size;
4896 	u32 static_thread_mgmt23[2];
4897 	u32 restart[3];
4898 	u32 thread_trace_enable;
4899 	u32 reserved1;
4900 	u32 user_data[16];
4901 	u32 vgtcs_invoke_count[2];
4902 	struct hqd_registers queue_state;
4903 	u32 dequeue_cntr;
4904 	u32 interrupt_queue[64];
4905 };
4906 
4907 /**
4908  * cik_cp_compute_resume - setup the compute queue registers
4909  *
4910  * @rdev: radeon_device pointer
4911  *
4912  * Program the compute queues and test them to make sure they
4913  * are working.
4914  * Returns 0 for success, error for failure.
4915  */
4916 static int cik_cp_compute_resume(struct radeon_device *rdev)
4917 {
4918 	int r, i, j, idx;
4919 	u32 tmp;
4920 	bool use_doorbell = true;
4921 	u64 hqd_gpu_addr;
4922 	u64 mqd_gpu_addr;
4923 	u64 eop_gpu_addr;
4924 	u64 wb_gpu_addr;
4925 	u32 *buf;
4926 	struct bonaire_mqd *mqd;
4927 
4928 	r = cik_cp_compute_start(rdev);
4929 	if (r)
4930 		return r;
4931 
4932 	/* fix up chicken bits */
4933 	tmp = RREG32(CP_CPF_DEBUG);
4934 	tmp |= (1 << 23);
4935 	WREG32(CP_CPF_DEBUG, tmp);
4936 
4937 	/* init the pipes */
4938 	mutex_lock(&rdev->srbm_mutex);
4939 
4940 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4941 
4942 	cik_srbm_select(rdev, 0, 0, 0, 0);
4943 
4944 	/* write the EOP addr */
4945 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4946 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4947 
4948 	/* set the VMID assigned */
4949 	WREG32(CP_HPD_EOP_VMID, 0);
4950 
4951 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4952 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4953 	tmp &= ~EOP_SIZE_MASK;
4954 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4955 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4956 
4957 	mutex_unlock(&rdev->srbm_mutex);
4958 
4959 	/* init the queues.  Just two for now. */
4960 	for (i = 0; i < 2; i++) {
4961 		if (i == 0)
4962 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4963 		else
4964 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4965 
4966 		if (rdev->ring[idx].mqd_obj == NULL) {
4967 			r = radeon_bo_create(rdev,
4968 					     sizeof(struct bonaire_mqd),
4969 					     PAGE_SIZE, true,
4970 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4971 					     NULL, &rdev->ring[idx].mqd_obj);
4972 			if (r) {
4973 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4974 				return r;
4975 			}
4976 		}
4977 
4978 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4979 		if (unlikely(r != 0)) {
4980 			cik_cp_compute_fini(rdev);
4981 			return r;
4982 		}
4983 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4984 				  &mqd_gpu_addr);
4985 		if (r) {
4986 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4987 			cik_cp_compute_fini(rdev);
4988 			return r;
4989 		}
4990 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4991 		if (r) {
4992 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4993 			cik_cp_compute_fini(rdev);
4994 			return r;
4995 		}
4996 
4997 		/* init the mqd struct */
4998 		memset(buf, 0, sizeof(struct bonaire_mqd));
4999 
5000 		mqd = (struct bonaire_mqd *)buf;
5001 		mqd->header = 0xC0310800;
5002 		mqd->static_thread_mgmt01[0] = 0xffffffff;
5003 		mqd->static_thread_mgmt01[1] = 0xffffffff;
5004 		mqd->static_thread_mgmt23[0] = 0xffffffff;
5005 		mqd->static_thread_mgmt23[1] = 0xffffffff;
5006 
5007 		mutex_lock(&rdev->srbm_mutex);
5008 		cik_srbm_select(rdev, rdev->ring[idx].me,
5009 				rdev->ring[idx].pipe,
5010 				rdev->ring[idx].queue, 0);
5011 
5012 		/* disable wptr polling */
5013 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5014 		tmp &= ~WPTR_POLL_EN;
5015 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5016 
5017 		/* enable doorbell? */
5018 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5019 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5020 		if (use_doorbell)
5021 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5022 		else
5023 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5024 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5025 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5026 
5027 		/* disable the queue if it's active */
5028 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5029 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5030 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5031 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5032 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5033 			for (j = 0; j < rdev->usec_timeout; j++) {
5034 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5035 					break;
5036 				udelay(1);
5037 			}
5038 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5039 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5040 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5041 		}
5042 
5043 		/* set the pointer to the MQD */
5044 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5045 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5046 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5047 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5048 		/* set MQD vmid to 0 */
5049 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5050 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5051 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5052 
5053 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5054 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5055 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5056 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5057 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5058 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5059 
5060 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5061 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5062 		mqd->queue_state.cp_hqd_pq_control &=
5063 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5064 
5065 		mqd->queue_state.cp_hqd_pq_control |=
5066 			order_base_2(rdev->ring[idx].ring_size / 8);
5067 		mqd->queue_state.cp_hqd_pq_control |=
5068 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5069 #ifdef __BIG_ENDIAN
5070 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5071 #endif
5072 		mqd->queue_state.cp_hqd_pq_control &=
5073 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5074 		mqd->queue_state.cp_hqd_pq_control |=
5075 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5076 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5077 
5078 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5079 		if (i == 0)
5080 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5081 		else
5082 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5083 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5084 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5085 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5086 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5087 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5088 
5089 		/* set the wb address wether it's enabled or not */
5090 		if (i == 0)
5091 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5092 		else
5093 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5094 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5095 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5096 			upper_32_bits(wb_gpu_addr) & 0xffff;
5097 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5098 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5099 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5100 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5101 
5102 		/* enable the doorbell if requested */
5103 		if (use_doorbell) {
5104 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5105 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5106 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5107 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5108 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5109 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5110 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5111 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5112 
5113 		} else {
5114 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5115 		}
5116 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5117 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5118 
5119 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5120 		rdev->ring[idx].wptr = 0;
5121 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5122 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5123 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5124 
5125 		/* set the vmid for the queue */
5126 		mqd->queue_state.cp_hqd_vmid = 0;
5127 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5128 
5129 		/* activate the queue */
5130 		mqd->queue_state.cp_hqd_active = 1;
5131 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5132 
5133 		cik_srbm_select(rdev, 0, 0, 0, 0);
5134 		mutex_unlock(&rdev->srbm_mutex);
5135 
5136 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5137 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5138 
5139 		rdev->ring[idx].ready = true;
5140 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5141 		if (r)
5142 			rdev->ring[idx].ready = false;
5143 	}
5144 
5145 	return 0;
5146 }
5147 
5148 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5149 {
5150 	cik_cp_gfx_enable(rdev, enable);
5151 	cik_cp_compute_enable(rdev, enable);
5152 }
5153 
5154 static int cik_cp_load_microcode(struct radeon_device *rdev)
5155 {
5156 	int r;
5157 
5158 	r = cik_cp_gfx_load_microcode(rdev);
5159 	if (r)
5160 		return r;
5161 	r = cik_cp_compute_load_microcode(rdev);
5162 	if (r)
5163 		return r;
5164 
5165 	return 0;
5166 }
5167 
5168 static void cik_cp_fini(struct radeon_device *rdev)
5169 {
5170 	cik_cp_gfx_fini(rdev);
5171 	cik_cp_compute_fini(rdev);
5172 }
5173 
5174 static int cik_cp_resume(struct radeon_device *rdev)
5175 {
5176 	int r;
5177 
5178 	cik_enable_gui_idle_interrupt(rdev, false);
5179 
5180 	r = cik_cp_load_microcode(rdev);
5181 	if (r)
5182 		return r;
5183 
5184 	r = cik_cp_gfx_resume(rdev);
5185 	if (r)
5186 		return r;
5187 	r = cik_cp_compute_resume(rdev);
5188 	if (r)
5189 		return r;
5190 
5191 	cik_enable_gui_idle_interrupt(rdev, true);
5192 
5193 	return 0;
5194 }
5195 
5196 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5197 {
5198 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5199 		RREG32(GRBM_STATUS));
5200 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5201 		RREG32(GRBM_STATUS2));
5202 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5203 		RREG32(GRBM_STATUS_SE0));
5204 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5205 		RREG32(GRBM_STATUS_SE1));
5206 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5207 		RREG32(GRBM_STATUS_SE2));
5208 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5209 		RREG32(GRBM_STATUS_SE3));
5210 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5211 		RREG32(SRBM_STATUS));
5212 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5213 		RREG32(SRBM_STATUS2));
5214 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5215 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5216 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5217 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5218 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5219 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5220 		 RREG32(CP_STALLED_STAT1));
5221 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5222 		 RREG32(CP_STALLED_STAT2));
5223 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5224 		 RREG32(CP_STALLED_STAT3));
5225 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5226 		 RREG32(CP_CPF_BUSY_STAT));
5227 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5228 		 RREG32(CP_CPF_STALLED_STAT1));
5229 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5230 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5231 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5232 		 RREG32(CP_CPC_STALLED_STAT1));
5233 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5234 }
5235 
5236 /**
5237  * cik_gpu_check_soft_reset - check which blocks are busy
5238  *
5239  * @rdev: radeon_device pointer
5240  *
5241  * Check which blocks are busy and return the relevant reset
5242  * mask to be used by cik_gpu_soft_reset().
5243  * Returns a mask of the blocks to be reset.
5244  */
5245 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5246 {
5247 	u32 reset_mask = 0;
5248 	u32 tmp;
5249 
5250 	/* GRBM_STATUS */
5251 	tmp = RREG32(GRBM_STATUS);
5252 	if (tmp & (PA_BUSY | SC_BUSY |
5253 		   BCI_BUSY | SX_BUSY |
5254 		   TA_BUSY | VGT_BUSY |
5255 		   DB_BUSY | CB_BUSY |
5256 		   GDS_BUSY | SPI_BUSY |
5257 		   IA_BUSY | IA_BUSY_NO_DMA))
5258 		reset_mask |= RADEON_RESET_GFX;
5259 
5260 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5261 		reset_mask |= RADEON_RESET_CP;
5262 
5263 	/* GRBM_STATUS2 */
5264 	tmp = RREG32(GRBM_STATUS2);
5265 	if (tmp & RLC_BUSY)
5266 		reset_mask |= RADEON_RESET_RLC;
5267 
5268 	/* SDMA0_STATUS_REG */
5269 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5270 	if (!(tmp & SDMA_IDLE))
5271 		reset_mask |= RADEON_RESET_DMA;
5272 
5273 	/* SDMA1_STATUS_REG */
5274 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5275 	if (!(tmp & SDMA_IDLE))
5276 		reset_mask |= RADEON_RESET_DMA1;
5277 
5278 	/* SRBM_STATUS2 */
5279 	tmp = RREG32(SRBM_STATUS2);
5280 	if (tmp & SDMA_BUSY)
5281 		reset_mask |= RADEON_RESET_DMA;
5282 
5283 	if (tmp & SDMA1_BUSY)
5284 		reset_mask |= RADEON_RESET_DMA1;
5285 
5286 	/* SRBM_STATUS */
5287 	tmp = RREG32(SRBM_STATUS);
5288 
5289 	if (tmp & IH_BUSY)
5290 		reset_mask |= RADEON_RESET_IH;
5291 
5292 	if (tmp & SEM_BUSY)
5293 		reset_mask |= RADEON_RESET_SEM;
5294 
5295 	if (tmp & GRBM_RQ_PENDING)
5296 		reset_mask |= RADEON_RESET_GRBM;
5297 
5298 	if (tmp & VMC_BUSY)
5299 		reset_mask |= RADEON_RESET_VMC;
5300 
5301 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5302 		   MCC_BUSY | MCD_BUSY))
5303 		reset_mask |= RADEON_RESET_MC;
5304 
5305 	if (evergreen_is_display_hung(rdev))
5306 		reset_mask |= RADEON_RESET_DISPLAY;
5307 
5308 	/* Skip MC reset as it's mostly likely not hung, just busy */
5309 	if (reset_mask & RADEON_RESET_MC) {
5310 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5311 		reset_mask &= ~RADEON_RESET_MC;
5312 	}
5313 
5314 	return reset_mask;
5315 }
5316 
5317 /**
5318  * cik_gpu_soft_reset - soft reset GPU
5319  *
5320  * @rdev: radeon_device pointer
5321  * @reset_mask: mask of which blocks to reset
5322  *
5323  * Soft reset the blocks specified in @reset_mask.
5324  */
5325 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5326 {
5327 	struct evergreen_mc_save save;
5328 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5329 	u32 tmp;
5330 
5331 	if (reset_mask == 0)
5332 		return;
5333 
5334 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5335 
5336 	cik_print_gpu_status_regs(rdev);
5337 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5338 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5339 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5340 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5341 
5342 	/* disable CG/PG */
5343 	cik_fini_pg(rdev);
5344 	cik_fini_cg(rdev);
5345 
5346 	/* stop the rlc */
5347 	cik_rlc_stop(rdev);
5348 
5349 	/* Disable GFX parsing/prefetching */
5350 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5351 
5352 	/* Disable MEC parsing/prefetching */
5353 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5354 
5355 	if (reset_mask & RADEON_RESET_DMA) {
5356 		/* sdma0 */
5357 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5358 		tmp |= SDMA_HALT;
5359 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5360 	}
5361 	if (reset_mask & RADEON_RESET_DMA1) {
5362 		/* sdma1 */
5363 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5364 		tmp |= SDMA_HALT;
5365 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5366 	}
5367 
5368 	evergreen_mc_stop(rdev, &save);
5369 	if (evergreen_mc_wait_for_idle(rdev)) {
5370 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5371 	}
5372 
5373 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5374 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5375 
5376 	if (reset_mask & RADEON_RESET_CP) {
5377 		grbm_soft_reset |= SOFT_RESET_CP;
5378 
5379 		srbm_soft_reset |= SOFT_RESET_GRBM;
5380 	}
5381 
5382 	if (reset_mask & RADEON_RESET_DMA)
5383 		srbm_soft_reset |= SOFT_RESET_SDMA;
5384 
5385 	if (reset_mask & RADEON_RESET_DMA1)
5386 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5387 
5388 	if (reset_mask & RADEON_RESET_DISPLAY)
5389 		srbm_soft_reset |= SOFT_RESET_DC;
5390 
5391 	if (reset_mask & RADEON_RESET_RLC)
5392 		grbm_soft_reset |= SOFT_RESET_RLC;
5393 
5394 	if (reset_mask & RADEON_RESET_SEM)
5395 		srbm_soft_reset |= SOFT_RESET_SEM;
5396 
5397 	if (reset_mask & RADEON_RESET_IH)
5398 		srbm_soft_reset |= SOFT_RESET_IH;
5399 
5400 	if (reset_mask & RADEON_RESET_GRBM)
5401 		srbm_soft_reset |= SOFT_RESET_GRBM;
5402 
5403 	if (reset_mask & RADEON_RESET_VMC)
5404 		srbm_soft_reset |= SOFT_RESET_VMC;
5405 
5406 	if (!(rdev->flags & RADEON_IS_IGP)) {
5407 		if (reset_mask & RADEON_RESET_MC)
5408 			srbm_soft_reset |= SOFT_RESET_MC;
5409 	}
5410 
5411 	if (grbm_soft_reset) {
5412 		tmp = RREG32(GRBM_SOFT_RESET);
5413 		tmp |= grbm_soft_reset;
5414 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5415 		WREG32(GRBM_SOFT_RESET, tmp);
5416 		tmp = RREG32(GRBM_SOFT_RESET);
5417 
5418 		udelay(50);
5419 
5420 		tmp &= ~grbm_soft_reset;
5421 		WREG32(GRBM_SOFT_RESET, tmp);
5422 		tmp = RREG32(GRBM_SOFT_RESET);
5423 	}
5424 
5425 	if (srbm_soft_reset) {
5426 		tmp = RREG32(SRBM_SOFT_RESET);
5427 		tmp |= srbm_soft_reset;
5428 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5429 		WREG32(SRBM_SOFT_RESET, tmp);
5430 		tmp = RREG32(SRBM_SOFT_RESET);
5431 
5432 		udelay(50);
5433 
5434 		tmp &= ~srbm_soft_reset;
5435 		WREG32(SRBM_SOFT_RESET, tmp);
5436 		tmp = RREG32(SRBM_SOFT_RESET);
5437 	}
5438 
5439 	/* Wait a little for things to settle down */
5440 	udelay(50);
5441 
5442 	evergreen_mc_resume(rdev, &save);
5443 	udelay(50);
5444 
5445 	cik_print_gpu_status_regs(rdev);
5446 }
5447 
5448 struct kv_reset_save_regs {
5449 	u32 gmcon_reng_execute;
5450 	u32 gmcon_misc;
5451 	u32 gmcon_misc3;
5452 };
5453 
5454 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5455 				   struct kv_reset_save_regs *save)
5456 {
5457 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5458 	save->gmcon_misc = RREG32(GMCON_MISC);
5459 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5460 
5461 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5462 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5463 						STCTRL_STUTTER_EN));
5464 }
5465 
5466 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5467 				      struct kv_reset_save_regs *save)
5468 {
5469 	int i;
5470 
5471 	WREG32(GMCON_PGFSM_WRITE, 0);
5472 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5473 
5474 	for (i = 0; i < 5; i++)
5475 		WREG32(GMCON_PGFSM_WRITE, 0);
5476 
5477 	WREG32(GMCON_PGFSM_WRITE, 0);
5478 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5479 
5480 	for (i = 0; i < 5; i++)
5481 		WREG32(GMCON_PGFSM_WRITE, 0);
5482 
5483 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5484 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5485 
5486 	for (i = 0; i < 5; i++)
5487 		WREG32(GMCON_PGFSM_WRITE, 0);
5488 
5489 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5490 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5491 
5492 	for (i = 0; i < 5; i++)
5493 		WREG32(GMCON_PGFSM_WRITE, 0);
5494 
5495 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5496 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5497 
5498 	for (i = 0; i < 5; i++)
5499 		WREG32(GMCON_PGFSM_WRITE, 0);
5500 
5501 	WREG32(GMCON_PGFSM_WRITE, 0);
5502 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5503 
5504 	for (i = 0; i < 5; i++)
5505 		WREG32(GMCON_PGFSM_WRITE, 0);
5506 
5507 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5508 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5509 
5510 	for (i = 0; i < 5; i++)
5511 		WREG32(GMCON_PGFSM_WRITE, 0);
5512 
5513 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5514 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5515 
5516 	for (i = 0; i < 5; i++)
5517 		WREG32(GMCON_PGFSM_WRITE, 0);
5518 
5519 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5520 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5521 
5522 	for (i = 0; i < 5; i++)
5523 		WREG32(GMCON_PGFSM_WRITE, 0);
5524 
5525 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5526 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5527 
5528 	for (i = 0; i < 5; i++)
5529 		WREG32(GMCON_PGFSM_WRITE, 0);
5530 
5531 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5532 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5533 
5534 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5535 	WREG32(GMCON_MISC, save->gmcon_misc);
5536 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5537 }
5538 
5539 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5540 {
5541 	struct evergreen_mc_save save;
5542 	struct kv_reset_save_regs kv_save = { 0 };
5543 	u32 tmp, i;
5544 
5545 	dev_info(rdev->dev, "GPU pci config reset\n");
5546 
5547 	/* disable dpm? */
5548 
5549 	/* disable cg/pg */
5550 	cik_fini_pg(rdev);
5551 	cik_fini_cg(rdev);
5552 
5553 	/* Disable GFX parsing/prefetching */
5554 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5555 
5556 	/* Disable MEC parsing/prefetching */
5557 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5558 
5559 	/* sdma0 */
5560 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5561 	tmp |= SDMA_HALT;
5562 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5563 	/* sdma1 */
5564 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5565 	tmp |= SDMA_HALT;
5566 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5567 	/* XXX other engines? */
5568 
5569 	/* halt the rlc, disable cp internal ints */
5570 	cik_rlc_stop(rdev);
5571 
5572 	udelay(50);
5573 
5574 	/* disable mem access */
5575 	evergreen_mc_stop(rdev, &save);
5576 	if (evergreen_mc_wait_for_idle(rdev)) {
5577 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5578 	}
5579 
5580 	if (rdev->flags & RADEON_IS_IGP)
5581 		kv_save_regs_for_reset(rdev, &kv_save);
5582 
5583 	/* disable BM */
5584 	pci_clear_master(rdev->pdev);
5585 	/* reset */
5586 	radeon_pci_config_reset(rdev);
5587 
5588 	udelay(100);
5589 
5590 	/* wait for asic to come out of reset */
5591 	for (i = 0; i < rdev->usec_timeout; i++) {
5592 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5593 			break;
5594 		udelay(1);
5595 	}
5596 
5597 	/* does asic init need to be run first??? */
5598 	if (rdev->flags & RADEON_IS_IGP)
5599 		kv_restore_regs_for_reset(rdev, &kv_save);
5600 }
5601 
5602 /**
5603  * cik_asic_reset - soft reset GPU
5604  *
5605  * @rdev: radeon_device pointer
5606  *
5607  * Look up which blocks are hung and attempt
5608  * to reset them.
5609  * Returns 0 for success.
5610  */
5611 int cik_asic_reset(struct radeon_device *rdev)
5612 {
5613 	u32 reset_mask;
5614 
5615 	reset_mask = cik_gpu_check_soft_reset(rdev);
5616 
5617 	if (reset_mask)
5618 		r600_set_bios_scratch_engine_hung(rdev, true);
5619 
5620 	/* try soft reset */
5621 	cik_gpu_soft_reset(rdev, reset_mask);
5622 
5623 	reset_mask = cik_gpu_check_soft_reset(rdev);
5624 
5625 	/* try pci config reset */
5626 	if (reset_mask && radeon_hard_reset)
5627 		cik_gpu_pci_config_reset(rdev);
5628 
5629 	reset_mask = cik_gpu_check_soft_reset(rdev);
5630 
5631 	if (!reset_mask)
5632 		r600_set_bios_scratch_engine_hung(rdev, false);
5633 
5634 	return 0;
5635 }
5636 
5637 /**
5638  * cik_gfx_is_lockup - check if the 3D engine is locked up
5639  *
5640  * @rdev: radeon_device pointer
5641  * @ring: radeon_ring structure holding ring information
5642  *
5643  * Check if the 3D engine is locked up (CIK).
5644  * Returns true if the engine is locked, false if not.
5645  */
5646 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5647 {
5648 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5649 
5650 	if (!(reset_mask & (RADEON_RESET_GFX |
5651 			    RADEON_RESET_COMPUTE |
5652 			    RADEON_RESET_CP))) {
5653 		radeon_ring_lockup_update(rdev, ring);
5654 		return false;
5655 	}
5656 	return radeon_ring_test_lockup(rdev, ring);
5657 }
5658 
5659 /* MC */
5660 /**
5661  * cik_mc_program - program the GPU memory controller
5662  *
5663  * @rdev: radeon_device pointer
5664  *
5665  * Set the location of vram, gart, and AGP in the GPU's
5666  * physical address space (CIK).
5667  */
5668 static void cik_mc_program(struct radeon_device *rdev)
5669 {
5670 	struct evergreen_mc_save save;
5671 	u32 tmp;
5672 	int i, j;
5673 
5674 	/* Initialize HDP */
5675 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5676 		WREG32((0x2c14 + j), 0x00000000);
5677 		WREG32((0x2c18 + j), 0x00000000);
5678 		WREG32((0x2c1c + j), 0x00000000);
5679 		WREG32((0x2c20 + j), 0x00000000);
5680 		WREG32((0x2c24 + j), 0x00000000);
5681 	}
5682 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5683 
5684 	evergreen_mc_stop(rdev, &save);
5685 	if (radeon_mc_wait_for_idle(rdev)) {
5686 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5687 	}
5688 	/* Lockout access through VGA aperture*/
5689 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5690 	/* Update configuration */
5691 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5692 	       rdev->mc.vram_start >> 12);
5693 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5694 	       rdev->mc.vram_end >> 12);
5695 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5696 	       rdev->vram_scratch.gpu_addr >> 12);
5697 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5698 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5699 	WREG32(MC_VM_FB_LOCATION, tmp);
5700 	/* XXX double check these! */
5701 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5702 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5703 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5704 	WREG32(MC_VM_AGP_BASE, 0);
5705 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5706 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5707 	if (radeon_mc_wait_for_idle(rdev)) {
5708 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5709 	}
5710 	evergreen_mc_resume(rdev, &save);
5711 	/* we need to own VRAM, so turn off the VGA renderer here
5712 	 * to stop it overwriting our objects */
5713 	rv515_vga_render_disable(rdev);
5714 }
5715 
5716 /**
5717  * cik_mc_init - initialize the memory controller driver params
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Look up the amount of vram, vram width, and decide how to place
5722  * vram and gart within the GPU's physical address space (CIK).
5723  * Returns 0 for success.
5724  */
5725 static int cik_mc_init(struct radeon_device *rdev)
5726 {
5727 	u32 tmp;
5728 	int chansize, numchan;
5729 
5730 	/* Get VRAM informations */
5731 	rdev->mc.vram_is_ddr = true;
5732 	tmp = RREG32(MC_ARB_RAMCFG);
5733 	if (tmp & CHANSIZE_MASK) {
5734 		chansize = 64;
5735 	} else {
5736 		chansize = 32;
5737 	}
5738 	tmp = RREG32(MC_SHARED_CHMAP);
5739 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5740 	case 0:
5741 	default:
5742 		numchan = 1;
5743 		break;
5744 	case 1:
5745 		numchan = 2;
5746 		break;
5747 	case 2:
5748 		numchan = 4;
5749 		break;
5750 	case 3:
5751 		numchan = 8;
5752 		break;
5753 	case 4:
5754 		numchan = 3;
5755 		break;
5756 	case 5:
5757 		numchan = 6;
5758 		break;
5759 	case 6:
5760 		numchan = 10;
5761 		break;
5762 	case 7:
5763 		numchan = 12;
5764 		break;
5765 	case 8:
5766 		numchan = 16;
5767 		break;
5768 	}
5769 	rdev->mc.vram_width = numchan * chansize;
5770 	/* Could aper size report 0 ? */
5771 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5772 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5773 	/* size in MB on si */
5774 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5775 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5776 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5777 	si_vram_gtt_location(rdev, &rdev->mc);
5778 	radeon_update_bandwidth_info(rdev);
5779 
5780 	return 0;
5781 }
5782 
5783 /*
5784  * GART
5785  * VMID 0 is the physical GPU addresses as used by the kernel.
5786  * VMIDs 1-15 are used for userspace clients and are handled
5787  * by the radeon vm/hsa code.
5788  */
5789 /**
5790  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5791  *
5792  * @rdev: radeon_device pointer
5793  *
5794  * Flush the TLB for the VMID 0 page table (CIK).
5795  */
5796 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5797 {
5798 	/* flush hdp cache */
5799 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5800 
5801 	/* bits 0-15 are the VM contexts0-15 */
5802 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5803 }
5804 
5805 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5806 {
5807 	int i;
5808 	uint32_t sh_mem_bases, sh_mem_config;
5809 
5810 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5811 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5812 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5813 
5814 	mutex_lock(&rdev->srbm_mutex);
5815 	for (i = 8; i < 16; i++) {
5816 		cik_srbm_select(rdev, 0, 0, 0, i);
5817 		/* CP and shaders */
5818 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5819 		WREG32(SH_MEM_APE1_BASE, 1);
5820 		WREG32(SH_MEM_APE1_LIMIT, 0);
5821 		WREG32(SH_MEM_BASES, sh_mem_bases);
5822 	}
5823 	cik_srbm_select(rdev, 0, 0, 0, 0);
5824 	mutex_unlock(&rdev->srbm_mutex);
5825 }
5826 
5827 /**
5828  * cik_pcie_gart_enable - gart enable
5829  *
5830  * @rdev: radeon_device pointer
5831  *
5832  * This sets up the TLBs, programs the page tables for VMID0,
5833  * sets up the hw for VMIDs 1-15 which are allocated on
5834  * demand, and sets up the global locations for the LDS, GDS,
5835  * and GPUVM for FSA64 clients (CIK).
5836  * Returns 0 for success, errors for failure.
5837  */
5838 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5839 {
5840 	int r, i;
5841 
5842 	if (rdev->gart.robj == NULL) {
5843 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5844 		return -EINVAL;
5845 	}
5846 	r = radeon_gart_table_vram_pin(rdev);
5847 	if (r)
5848 		return r;
5849 	/* Setup TLB control */
5850 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5851 	       (0xA << 7) |
5852 	       ENABLE_L1_TLB |
5853 	       ENABLE_L1_FRAGMENT_PROCESSING |
5854 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5855 	       ENABLE_ADVANCED_DRIVER_MODEL |
5856 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5857 	/* Setup L2 cache */
5858 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5859 	       ENABLE_L2_FRAGMENT_PROCESSING |
5860 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5861 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5862 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5863 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5864 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5865 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5866 	       BANK_SELECT(4) |
5867 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5868 	/* setup context0 */
5869 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5870 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5871 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5872 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5873 			(u32)(rdev->dummy_page.addr >> 12));
5874 	WREG32(VM_CONTEXT0_CNTL2, 0);
5875 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5876 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5877 
5878 	WREG32(0x15D4, 0);
5879 	WREG32(0x15D8, 0);
5880 	WREG32(0x15DC, 0);
5881 
5882 	/* restore context1-15 */
5883 	/* set vm size, must be a multiple of 4 */
5884 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5885 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5886 	for (i = 1; i < 16; i++) {
5887 		if (i < 8)
5888 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5889 			       rdev->vm_manager.saved_table_addr[i]);
5890 		else
5891 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5892 			       rdev->vm_manager.saved_table_addr[i]);
5893 	}
5894 
5895 	/* enable context1-15 */
5896 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5897 	       (u32)(rdev->dummy_page.addr >> 12));
5898 	WREG32(VM_CONTEXT1_CNTL2, 4);
5899 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5900 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5901 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5902 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5903 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5904 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5905 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5906 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5907 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5908 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5909 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5910 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5911 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5912 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5913 
5914 	if (rdev->family == CHIP_KAVERI) {
5915 		u32 tmp = RREG32(CHUB_CONTROL);
5916 		tmp &= ~BYPASS_VM;
5917 		WREG32(CHUB_CONTROL, tmp);
5918 	}
5919 
5920 	/* XXX SH_MEM regs */
5921 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5922 	mutex_lock(&rdev->srbm_mutex);
5923 	for (i = 0; i < 16; i++) {
5924 		cik_srbm_select(rdev, 0, 0, 0, i);
5925 		/* CP and shaders */
5926 		WREG32(SH_MEM_CONFIG, 0);
5927 		WREG32(SH_MEM_APE1_BASE, 1);
5928 		WREG32(SH_MEM_APE1_LIMIT, 0);
5929 		WREG32(SH_MEM_BASES, 0);
5930 		/* SDMA GFX */
5931 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5932 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5933 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5934 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5935 		/* XXX SDMA RLC - todo */
5936 	}
5937 	cik_srbm_select(rdev, 0, 0, 0, 0);
5938 	mutex_unlock(&rdev->srbm_mutex);
5939 
5940 	cik_pcie_init_compute_vmid(rdev);
5941 
5942 	cik_pcie_gart_tlb_flush(rdev);
5943 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5944 		 (unsigned)(rdev->mc.gtt_size >> 20),
5945 		 (unsigned long long)rdev->gart.table_addr);
5946 	rdev->gart.ready = true;
5947 	return 0;
5948 }
5949 
5950 /**
5951  * cik_pcie_gart_disable - gart disable
5952  *
5953  * @rdev: radeon_device pointer
5954  *
5955  * This disables all VM page table (CIK).
5956  */
5957 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5958 {
5959 	unsigned i;
5960 
5961 	for (i = 1; i < 16; ++i) {
5962 		uint32_t reg;
5963 		if (i < 8)
5964 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5965 		else
5966 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5967 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5968 	}
5969 
5970 	/* Disable all tables */
5971 	WREG32(VM_CONTEXT0_CNTL, 0);
5972 	WREG32(VM_CONTEXT1_CNTL, 0);
5973 	/* Setup TLB control */
5974 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5975 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5976 	/* Setup L2 cache */
5977 	WREG32(VM_L2_CNTL,
5978 	       ENABLE_L2_FRAGMENT_PROCESSING |
5979 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5980 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5981 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5982 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5983 	WREG32(VM_L2_CNTL2, 0);
5984 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5985 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5986 	radeon_gart_table_vram_unpin(rdev);
5987 }
5988 
5989 /**
5990  * cik_pcie_gart_fini - vm fini callback
5991  *
5992  * @rdev: radeon_device pointer
5993  *
5994  * Tears down the driver GART/VM setup (CIK).
5995  */
5996 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5997 {
5998 	cik_pcie_gart_disable(rdev);
5999 	radeon_gart_table_vram_free(rdev);
6000 	radeon_gart_fini(rdev);
6001 }
6002 
6003 /* vm parser */
6004 /**
6005  * cik_ib_parse - vm ib_parse callback
6006  *
6007  * @rdev: radeon_device pointer
6008  * @ib: indirect buffer pointer
6009  *
6010  * CIK uses hw IB checking so this is a nop (CIK).
6011  */
6012 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6013 {
6014 	return 0;
6015 }
6016 
6017 /*
6018  * vm
6019  * VMID 0 is the physical GPU addresses as used by the kernel.
6020  * VMIDs 1-15 are used for userspace clients and are handled
6021  * by the radeon vm/hsa code.
6022  */
6023 /**
6024  * cik_vm_init - cik vm init callback
6025  *
6026  * @rdev: radeon_device pointer
6027  *
6028  * Inits cik specific vm parameters (number of VMs, base of vram for
6029  * VMIDs 1-15) (CIK).
6030  * Returns 0 for success.
6031  */
6032 int cik_vm_init(struct radeon_device *rdev)
6033 {
6034 	/*
6035 	 * number of VMs
6036 	 * VMID 0 is reserved for System
6037 	 * radeon graphics/compute will use VMIDs 1-7
6038 	 * amdkfd will use VMIDs 8-15
6039 	 */
6040 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6041 	/* base offset of vram pages */
6042 	if (rdev->flags & RADEON_IS_IGP) {
6043 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6044 		tmp <<= 22;
6045 		rdev->vm_manager.vram_base_offset = tmp;
6046 	} else
6047 		rdev->vm_manager.vram_base_offset = 0;
6048 
6049 	return 0;
6050 }
6051 
6052 /**
6053  * cik_vm_fini - cik vm fini callback
6054  *
6055  * @rdev: radeon_device pointer
6056  *
6057  * Tear down any asic specific VM setup (CIK).
6058  */
6059 void cik_vm_fini(struct radeon_device *rdev)
6060 {
6061 }
6062 
6063 /**
6064  * cik_vm_decode_fault - print human readable fault info
6065  *
6066  * @rdev: radeon_device pointer
6067  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6068  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6069  *
6070  * Print human readable fault information (CIK).
6071  */
6072 static void cik_vm_decode_fault(struct radeon_device *rdev,
6073 				u32 status, u32 addr, u32 mc_client)
6074 {
6075 	u32 mc_id;
6076 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6077 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6078 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6079 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6080 
6081 	if (rdev->family == CHIP_HAWAII)
6082 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6083 	else
6084 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6085 
6086 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6087 	       protections, vmid, addr,
6088 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6089 	       block, mc_client, mc_id);
6090 }
6091 
6092 /**
6093  * cik_vm_flush - cik vm flush using the CP
6094  *
6095  * @rdev: radeon_device pointer
6096  *
6097  * Update the page table base and flush the VM TLB
6098  * using the CP (CIK).
6099  */
6100 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6101 		  unsigned vm_id, uint64_t pd_addr)
6102 {
6103 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6104 
6105 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6106 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6107 				 WRITE_DATA_DST_SEL(0)));
6108 	if (vm_id < 8) {
6109 		radeon_ring_write(ring,
6110 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6111 	} else {
6112 		radeon_ring_write(ring,
6113 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6114 	}
6115 	radeon_ring_write(ring, 0);
6116 	radeon_ring_write(ring, pd_addr >> 12);
6117 
6118 	/* update SH_MEM_* regs */
6119 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6120 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6121 				 WRITE_DATA_DST_SEL(0)));
6122 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6123 	radeon_ring_write(ring, 0);
6124 	radeon_ring_write(ring, VMID(vm_id));
6125 
6126 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6127 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6128 				 WRITE_DATA_DST_SEL(0)));
6129 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6130 	radeon_ring_write(ring, 0);
6131 
6132 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6133 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6134 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6135 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6136 
6137 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6139 				 WRITE_DATA_DST_SEL(0)));
6140 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6141 	radeon_ring_write(ring, 0);
6142 	radeon_ring_write(ring, VMID(0));
6143 
6144 	/* HDP flush */
6145 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6146 
6147 	/* bits 0-15 are the VM contexts0-15 */
6148 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6149 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6150 				 WRITE_DATA_DST_SEL(0)));
6151 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6152 	radeon_ring_write(ring, 0);
6153 	radeon_ring_write(ring, 1 << vm_id);
6154 
6155 	/* wait for the invalidate to complete */
6156 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6157 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6158 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6159 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6160 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6161 	radeon_ring_write(ring, 0);
6162 	radeon_ring_write(ring, 0); /* ref */
6163 	radeon_ring_write(ring, 0); /* mask */
6164 	radeon_ring_write(ring, 0x20); /* poll interval */
6165 
6166 	/* compute doesn't have PFP */
6167 	if (usepfp) {
6168 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6169 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6170 		radeon_ring_write(ring, 0x0);
6171 	}
6172 }
6173 
6174 /*
6175  * RLC
6176  * The RLC is a multi-purpose microengine that handles a
6177  * variety of functions, the most important of which is
6178  * the interrupt controller.
6179  */
6180 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6181 					  bool enable)
6182 {
6183 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6184 
6185 	if (enable)
6186 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6187 	else
6188 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6189 	WREG32(CP_INT_CNTL_RING0, tmp);
6190 }
6191 
6192 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6193 {
6194 	u32 tmp;
6195 
6196 	tmp = RREG32(RLC_LB_CNTL);
6197 	if (enable)
6198 		tmp |= LOAD_BALANCE_ENABLE;
6199 	else
6200 		tmp &= ~LOAD_BALANCE_ENABLE;
6201 	WREG32(RLC_LB_CNTL, tmp);
6202 }
6203 
6204 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6205 {
6206 	u32 i, j, k;
6207 	u32 mask;
6208 
6209 	mutex_lock(&rdev->grbm_idx_mutex);
6210 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6211 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6212 			cik_select_se_sh(rdev, i, j);
6213 			for (k = 0; k < rdev->usec_timeout; k++) {
6214 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6215 					break;
6216 				udelay(1);
6217 			}
6218 		}
6219 	}
6220 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6221 	mutex_unlock(&rdev->grbm_idx_mutex);
6222 
6223 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6224 	for (k = 0; k < rdev->usec_timeout; k++) {
6225 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6226 			break;
6227 		udelay(1);
6228 	}
6229 }
6230 
6231 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6232 {
6233 	u32 tmp;
6234 
6235 	tmp = RREG32(RLC_CNTL);
6236 	if (tmp != rlc)
6237 		WREG32(RLC_CNTL, rlc);
6238 }
6239 
6240 static u32 cik_halt_rlc(struct radeon_device *rdev)
6241 {
6242 	u32 data, orig;
6243 
6244 	orig = data = RREG32(RLC_CNTL);
6245 
6246 	if (data & RLC_ENABLE) {
6247 		u32 i;
6248 
6249 		data &= ~RLC_ENABLE;
6250 		WREG32(RLC_CNTL, data);
6251 
6252 		for (i = 0; i < rdev->usec_timeout; i++) {
6253 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6254 				break;
6255 			udelay(1);
6256 		}
6257 
6258 		cik_wait_for_rlc_serdes(rdev);
6259 	}
6260 
6261 	return orig;
6262 }
6263 
6264 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6265 {
6266 	u32 tmp, i, mask;
6267 
6268 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6269 	WREG32(RLC_GPR_REG2, tmp);
6270 
6271 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6272 	for (i = 0; i < rdev->usec_timeout; i++) {
6273 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6274 			break;
6275 		udelay(1);
6276 	}
6277 
6278 	for (i = 0; i < rdev->usec_timeout; i++) {
6279 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6280 			break;
6281 		udelay(1);
6282 	}
6283 }
6284 
6285 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6286 {
6287 	u32 tmp;
6288 
6289 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6290 	WREG32(RLC_GPR_REG2, tmp);
6291 }
6292 
6293 /**
6294  * cik_rlc_stop - stop the RLC ME
6295  *
6296  * @rdev: radeon_device pointer
6297  *
6298  * Halt the RLC ME (MicroEngine) (CIK).
6299  */
6300 static void cik_rlc_stop(struct radeon_device *rdev)
6301 {
6302 	WREG32(RLC_CNTL, 0);
6303 
6304 	cik_enable_gui_idle_interrupt(rdev, false);
6305 
6306 	cik_wait_for_rlc_serdes(rdev);
6307 }
6308 
6309 /**
6310  * cik_rlc_start - start the RLC ME
6311  *
6312  * @rdev: radeon_device pointer
6313  *
6314  * Unhalt the RLC ME (MicroEngine) (CIK).
6315  */
6316 static void cik_rlc_start(struct radeon_device *rdev)
6317 {
6318 	WREG32(RLC_CNTL, RLC_ENABLE);
6319 
6320 	cik_enable_gui_idle_interrupt(rdev, true);
6321 
6322 	udelay(50);
6323 }
6324 
6325 /**
6326  * cik_rlc_resume - setup the RLC hw
6327  *
6328  * @rdev: radeon_device pointer
6329  *
6330  * Initialize the RLC registers, load the ucode,
6331  * and start the RLC (CIK).
6332  * Returns 0 for success, -EINVAL if the ucode is not available.
6333  */
6334 static int cik_rlc_resume(struct radeon_device *rdev)
6335 {
6336 	u32 i, size, tmp;
6337 
6338 	if (!rdev->rlc_fw)
6339 		return -EINVAL;
6340 
6341 	cik_rlc_stop(rdev);
6342 
6343 	/* disable CG */
6344 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6345 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6346 
6347 	si_rlc_reset(rdev);
6348 
6349 	cik_init_pg(rdev);
6350 
6351 	cik_init_cg(rdev);
6352 
6353 	WREG32(RLC_LB_CNTR_INIT, 0);
6354 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6355 
6356 	mutex_lock(&rdev->grbm_idx_mutex);
6357 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6358 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6359 	WREG32(RLC_LB_PARAMS, 0x00600408);
6360 	WREG32(RLC_LB_CNTL, 0x80000004);
6361 	mutex_unlock(&rdev->grbm_idx_mutex);
6362 
6363 	WREG32(RLC_MC_CNTL, 0);
6364 	WREG32(RLC_UCODE_CNTL, 0);
6365 
6366 	if (rdev->new_fw) {
6367 		const struct rlc_firmware_header_v1_0 *hdr =
6368 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6369 		const __le32 *fw_data = (const __le32 *)
6370 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6371 
6372 		radeon_ucode_print_rlc_hdr(&hdr->header);
6373 
6374 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6375 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6376 		for (i = 0; i < size; i++)
6377 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6378 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6379 	} else {
6380 		const __be32 *fw_data;
6381 
6382 		switch (rdev->family) {
6383 		case CHIP_BONAIRE:
6384 		case CHIP_HAWAII:
6385 		default:
6386 			size = BONAIRE_RLC_UCODE_SIZE;
6387 			break;
6388 		case CHIP_KAVERI:
6389 			size = KV_RLC_UCODE_SIZE;
6390 			break;
6391 		case CHIP_KABINI:
6392 			size = KB_RLC_UCODE_SIZE;
6393 			break;
6394 		case CHIP_MULLINS:
6395 			size = ML_RLC_UCODE_SIZE;
6396 			break;
6397 		}
6398 
6399 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6400 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6401 		for (i = 0; i < size; i++)
6402 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6403 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6404 	}
6405 
6406 	/* XXX - find out what chips support lbpw */
6407 	cik_enable_lbpw(rdev, false);
6408 
6409 	if (rdev->family == CHIP_BONAIRE)
6410 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6411 
6412 	cik_rlc_start(rdev);
6413 
6414 	return 0;
6415 }
6416 
6417 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6418 {
6419 	u32 data, orig, tmp, tmp2;
6420 
6421 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6422 
6423 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6424 		cik_enable_gui_idle_interrupt(rdev, true);
6425 
6426 		tmp = cik_halt_rlc(rdev);
6427 
6428 		mutex_lock(&rdev->grbm_idx_mutex);
6429 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6430 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6431 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6432 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6433 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6434 		mutex_unlock(&rdev->grbm_idx_mutex);
6435 
6436 		cik_update_rlc(rdev, tmp);
6437 
6438 		data |= CGCG_EN | CGLS_EN;
6439 	} else {
6440 		cik_enable_gui_idle_interrupt(rdev, false);
6441 
6442 		RREG32(CB_CGTT_SCLK_CTRL);
6443 		RREG32(CB_CGTT_SCLK_CTRL);
6444 		RREG32(CB_CGTT_SCLK_CTRL);
6445 		RREG32(CB_CGTT_SCLK_CTRL);
6446 
6447 		data &= ~(CGCG_EN | CGLS_EN);
6448 	}
6449 
6450 	if (orig != data)
6451 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6452 
6453 }
6454 
6455 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6456 {
6457 	u32 data, orig, tmp = 0;
6458 
6459 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6460 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6461 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6462 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6463 				data |= CP_MEM_LS_EN;
6464 				if (orig != data)
6465 					WREG32(CP_MEM_SLP_CNTL, data);
6466 			}
6467 		}
6468 
6469 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6470 		data |= 0x00000001;
6471 		data &= 0xfffffffd;
6472 		if (orig != data)
6473 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6474 
6475 		tmp = cik_halt_rlc(rdev);
6476 
6477 		mutex_lock(&rdev->grbm_idx_mutex);
6478 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6479 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6480 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6481 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6482 		WREG32(RLC_SERDES_WR_CTRL, data);
6483 		mutex_unlock(&rdev->grbm_idx_mutex);
6484 
6485 		cik_update_rlc(rdev, tmp);
6486 
6487 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6488 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6489 			data &= ~SM_MODE_MASK;
6490 			data |= SM_MODE(0x2);
6491 			data |= SM_MODE_ENABLE;
6492 			data &= ~CGTS_OVERRIDE;
6493 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6494 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6495 				data &= ~CGTS_LS_OVERRIDE;
6496 			data &= ~ON_MONITOR_ADD_MASK;
6497 			data |= ON_MONITOR_ADD_EN;
6498 			data |= ON_MONITOR_ADD(0x96);
6499 			if (orig != data)
6500 				WREG32(CGTS_SM_CTRL_REG, data);
6501 		}
6502 	} else {
6503 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6504 		data |= 0x00000003;
6505 		if (orig != data)
6506 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6507 
6508 		data = RREG32(RLC_MEM_SLP_CNTL);
6509 		if (data & RLC_MEM_LS_EN) {
6510 			data &= ~RLC_MEM_LS_EN;
6511 			WREG32(RLC_MEM_SLP_CNTL, data);
6512 		}
6513 
6514 		data = RREG32(CP_MEM_SLP_CNTL);
6515 		if (data & CP_MEM_LS_EN) {
6516 			data &= ~CP_MEM_LS_EN;
6517 			WREG32(CP_MEM_SLP_CNTL, data);
6518 		}
6519 
6520 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6521 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6522 		if (orig != data)
6523 			WREG32(CGTS_SM_CTRL_REG, data);
6524 
6525 		tmp = cik_halt_rlc(rdev);
6526 
6527 		mutex_lock(&rdev->grbm_idx_mutex);
6528 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6529 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6530 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6531 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6532 		WREG32(RLC_SERDES_WR_CTRL, data);
6533 		mutex_unlock(&rdev->grbm_idx_mutex);
6534 
6535 		cik_update_rlc(rdev, tmp);
6536 	}
6537 }
6538 
6539 static const u32 mc_cg_registers[] =
6540 {
6541 	MC_HUB_MISC_HUB_CG,
6542 	MC_HUB_MISC_SIP_CG,
6543 	MC_HUB_MISC_VM_CG,
6544 	MC_XPB_CLK_GAT,
6545 	ATC_MISC_CG,
6546 	MC_CITF_MISC_WR_CG,
6547 	MC_CITF_MISC_RD_CG,
6548 	MC_CITF_MISC_VM_CG,
6549 	VM_L2_CG,
6550 };
6551 
6552 static void cik_enable_mc_ls(struct radeon_device *rdev,
6553 			     bool enable)
6554 {
6555 	int i;
6556 	u32 orig, data;
6557 
6558 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6559 		orig = data = RREG32(mc_cg_registers[i]);
6560 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6561 			data |= MC_LS_ENABLE;
6562 		else
6563 			data &= ~MC_LS_ENABLE;
6564 		if (data != orig)
6565 			WREG32(mc_cg_registers[i], data);
6566 	}
6567 }
6568 
6569 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6570 			       bool enable)
6571 {
6572 	int i;
6573 	u32 orig, data;
6574 
6575 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6576 		orig = data = RREG32(mc_cg_registers[i]);
6577 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6578 			data |= MC_CG_ENABLE;
6579 		else
6580 			data &= ~MC_CG_ENABLE;
6581 		if (data != orig)
6582 			WREG32(mc_cg_registers[i], data);
6583 	}
6584 }
6585 
6586 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6587 				 bool enable)
6588 {
6589 	u32 orig, data;
6590 
6591 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6592 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6593 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6594 	} else {
6595 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6596 		data |= 0xff000000;
6597 		if (data != orig)
6598 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6599 
6600 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6601 		data |= 0xff000000;
6602 		if (data != orig)
6603 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6604 	}
6605 }
6606 
6607 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6608 				 bool enable)
6609 {
6610 	u32 orig, data;
6611 
6612 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6613 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6614 		data |= 0x100;
6615 		if (orig != data)
6616 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6617 
6618 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6619 		data |= 0x100;
6620 		if (orig != data)
6621 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6622 	} else {
6623 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6624 		data &= ~0x100;
6625 		if (orig != data)
6626 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6627 
6628 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6629 		data &= ~0x100;
6630 		if (orig != data)
6631 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6632 	}
6633 }
6634 
6635 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6636 				bool enable)
6637 {
6638 	u32 orig, data;
6639 
6640 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6641 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6642 		data = 0xfff;
6643 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6644 
6645 		orig = data = RREG32(UVD_CGC_CTRL);
6646 		data |= DCM;
6647 		if (orig != data)
6648 			WREG32(UVD_CGC_CTRL, data);
6649 	} else {
6650 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6651 		data &= ~0xfff;
6652 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6653 
6654 		orig = data = RREG32(UVD_CGC_CTRL);
6655 		data &= ~DCM;
6656 		if (orig != data)
6657 			WREG32(UVD_CGC_CTRL, data);
6658 	}
6659 }
6660 
6661 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6662 			       bool enable)
6663 {
6664 	u32 orig, data;
6665 
6666 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6667 
6668 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6669 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6670 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6671 	else
6672 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6673 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6674 
6675 	if (orig != data)
6676 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6677 }
6678 
6679 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6680 				bool enable)
6681 {
6682 	u32 orig, data;
6683 
6684 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6685 
6686 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6687 		data &= ~CLOCK_GATING_DIS;
6688 	else
6689 		data |= CLOCK_GATING_DIS;
6690 
6691 	if (orig != data)
6692 		WREG32(HDP_HOST_PATH_CNTL, data);
6693 }
6694 
6695 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6696 			      bool enable)
6697 {
6698 	u32 orig, data;
6699 
6700 	orig = data = RREG32(HDP_MEM_POWER_LS);
6701 
6702 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6703 		data |= HDP_LS_ENABLE;
6704 	else
6705 		data &= ~HDP_LS_ENABLE;
6706 
6707 	if (orig != data)
6708 		WREG32(HDP_MEM_POWER_LS, data);
6709 }
6710 
6711 void cik_update_cg(struct radeon_device *rdev,
6712 		   u32 block, bool enable)
6713 {
6714 
6715 	if (block & RADEON_CG_BLOCK_GFX) {
6716 		cik_enable_gui_idle_interrupt(rdev, false);
6717 		/* order matters! */
6718 		if (enable) {
6719 			cik_enable_mgcg(rdev, true);
6720 			cik_enable_cgcg(rdev, true);
6721 		} else {
6722 			cik_enable_cgcg(rdev, false);
6723 			cik_enable_mgcg(rdev, false);
6724 		}
6725 		cik_enable_gui_idle_interrupt(rdev, true);
6726 	}
6727 
6728 	if (block & RADEON_CG_BLOCK_MC) {
6729 		if (!(rdev->flags & RADEON_IS_IGP)) {
6730 			cik_enable_mc_mgcg(rdev, enable);
6731 			cik_enable_mc_ls(rdev, enable);
6732 		}
6733 	}
6734 
6735 	if (block & RADEON_CG_BLOCK_SDMA) {
6736 		cik_enable_sdma_mgcg(rdev, enable);
6737 		cik_enable_sdma_mgls(rdev, enable);
6738 	}
6739 
6740 	if (block & RADEON_CG_BLOCK_BIF) {
6741 		cik_enable_bif_mgls(rdev, enable);
6742 	}
6743 
6744 	if (block & RADEON_CG_BLOCK_UVD) {
6745 		if (rdev->has_uvd)
6746 			cik_enable_uvd_mgcg(rdev, enable);
6747 	}
6748 
6749 	if (block & RADEON_CG_BLOCK_HDP) {
6750 		cik_enable_hdp_mgcg(rdev, enable);
6751 		cik_enable_hdp_ls(rdev, enable);
6752 	}
6753 
6754 	if (block & RADEON_CG_BLOCK_VCE) {
6755 		vce_v2_0_enable_mgcg(rdev, enable);
6756 	}
6757 }
6758 
6759 static void cik_init_cg(struct radeon_device *rdev)
6760 {
6761 
6762 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6763 
6764 	if (rdev->has_uvd)
6765 		si_init_uvd_internal_cg(rdev);
6766 
6767 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6768 			     RADEON_CG_BLOCK_SDMA |
6769 			     RADEON_CG_BLOCK_BIF |
6770 			     RADEON_CG_BLOCK_UVD |
6771 			     RADEON_CG_BLOCK_HDP), true);
6772 }
6773 
6774 static void cik_fini_cg(struct radeon_device *rdev)
6775 {
6776 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6777 			     RADEON_CG_BLOCK_SDMA |
6778 			     RADEON_CG_BLOCK_BIF |
6779 			     RADEON_CG_BLOCK_UVD |
6780 			     RADEON_CG_BLOCK_HDP), false);
6781 
6782 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6783 }
6784 
6785 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6786 					  bool enable)
6787 {
6788 	u32 data, orig;
6789 
6790 	orig = data = RREG32(RLC_PG_CNTL);
6791 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6792 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6793 	else
6794 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6795 	if (orig != data)
6796 		WREG32(RLC_PG_CNTL, data);
6797 }
6798 
6799 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6800 					  bool enable)
6801 {
6802 	u32 data, orig;
6803 
6804 	orig = data = RREG32(RLC_PG_CNTL);
6805 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6806 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6807 	else
6808 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6809 	if (orig != data)
6810 		WREG32(RLC_PG_CNTL, data);
6811 }
6812 
6813 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6814 {
6815 	u32 data, orig;
6816 
6817 	orig = data = RREG32(RLC_PG_CNTL);
6818 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6819 		data &= ~DISABLE_CP_PG;
6820 	else
6821 		data |= DISABLE_CP_PG;
6822 	if (orig != data)
6823 		WREG32(RLC_PG_CNTL, data);
6824 }
6825 
6826 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6827 {
6828 	u32 data, orig;
6829 
6830 	orig = data = RREG32(RLC_PG_CNTL);
6831 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6832 		data &= ~DISABLE_GDS_PG;
6833 	else
6834 		data |= DISABLE_GDS_PG;
6835 	if (orig != data)
6836 		WREG32(RLC_PG_CNTL, data);
6837 }
6838 
6839 #define CP_ME_TABLE_SIZE    96
6840 #define CP_ME_TABLE_OFFSET  2048
6841 #define CP_MEC_TABLE_OFFSET 4096
6842 
6843 void cik_init_cp_pg_table(struct radeon_device *rdev)
6844 {
6845 	volatile u32 *dst_ptr;
6846 	int me, i, max_me = 4;
6847 	u32 bo_offset = 0;
6848 	u32 table_offset, table_size;
6849 
6850 	if (rdev->family == CHIP_KAVERI)
6851 		max_me = 5;
6852 
6853 	if (rdev->rlc.cp_table_ptr == NULL)
6854 		return;
6855 
6856 	/* write the cp table buffer */
6857 	dst_ptr = rdev->rlc.cp_table_ptr;
6858 	for (me = 0; me < max_me; me++) {
6859 		if (rdev->new_fw) {
6860 			const __le32 *fw_data;
6861 			const struct gfx_firmware_header_v1_0 *hdr;
6862 
6863 			if (me == 0) {
6864 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6865 				fw_data = (const __le32 *)
6866 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6867 				table_offset = le32_to_cpu(hdr->jt_offset);
6868 				table_size = le32_to_cpu(hdr->jt_size);
6869 			} else if (me == 1) {
6870 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6871 				fw_data = (const __le32 *)
6872 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6873 				table_offset = le32_to_cpu(hdr->jt_offset);
6874 				table_size = le32_to_cpu(hdr->jt_size);
6875 			} else if (me == 2) {
6876 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6877 				fw_data = (const __le32 *)
6878 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6879 				table_offset = le32_to_cpu(hdr->jt_offset);
6880 				table_size = le32_to_cpu(hdr->jt_size);
6881 			} else if (me == 3) {
6882 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6883 				fw_data = (const __le32 *)
6884 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6885 				table_offset = le32_to_cpu(hdr->jt_offset);
6886 				table_size = le32_to_cpu(hdr->jt_size);
6887 			} else {
6888 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6889 				fw_data = (const __le32 *)
6890 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6891 				table_offset = le32_to_cpu(hdr->jt_offset);
6892 				table_size = le32_to_cpu(hdr->jt_size);
6893 			}
6894 
6895 			for (i = 0; i < table_size; i ++) {
6896 				dst_ptr[bo_offset + i] =
6897 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6898 			}
6899 			bo_offset += table_size;
6900 		} else {
6901 			const __be32 *fw_data;
6902 			table_size = CP_ME_TABLE_SIZE;
6903 
6904 			if (me == 0) {
6905 				fw_data = (const __be32 *)rdev->ce_fw->data;
6906 				table_offset = CP_ME_TABLE_OFFSET;
6907 			} else if (me == 1) {
6908 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6909 				table_offset = CP_ME_TABLE_OFFSET;
6910 			} else if (me == 2) {
6911 				fw_data = (const __be32 *)rdev->me_fw->data;
6912 				table_offset = CP_ME_TABLE_OFFSET;
6913 			} else {
6914 				fw_data = (const __be32 *)rdev->mec_fw->data;
6915 				table_offset = CP_MEC_TABLE_OFFSET;
6916 			}
6917 
6918 			for (i = 0; i < table_size; i ++) {
6919 				dst_ptr[bo_offset + i] =
6920 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6921 			}
6922 			bo_offset += table_size;
6923 		}
6924 	}
6925 }
6926 
6927 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6928 				bool enable)
6929 {
6930 	u32 data, orig;
6931 
6932 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6933 		orig = data = RREG32(RLC_PG_CNTL);
6934 		data |= GFX_PG_ENABLE;
6935 		if (orig != data)
6936 			WREG32(RLC_PG_CNTL, data);
6937 
6938 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6939 		data |= AUTO_PG_EN;
6940 		if (orig != data)
6941 			WREG32(RLC_AUTO_PG_CTRL, data);
6942 	} else {
6943 		orig = data = RREG32(RLC_PG_CNTL);
6944 		data &= ~GFX_PG_ENABLE;
6945 		if (orig != data)
6946 			WREG32(RLC_PG_CNTL, data);
6947 
6948 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6949 		data &= ~AUTO_PG_EN;
6950 		if (orig != data)
6951 			WREG32(RLC_AUTO_PG_CTRL, data);
6952 
6953 		data = RREG32(DB_RENDER_CONTROL);
6954 	}
6955 }
6956 
6957 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6958 {
6959 	u32 mask = 0, tmp, tmp1;
6960 	int i;
6961 
6962 	mutex_lock(&rdev->grbm_idx_mutex);
6963 	cik_select_se_sh(rdev, se, sh);
6964 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6965 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6966 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6967 	mutex_unlock(&rdev->grbm_idx_mutex);
6968 
6969 	tmp &= 0xffff0000;
6970 
6971 	tmp |= tmp1;
6972 	tmp >>= 16;
6973 
6974 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6975 		mask <<= 1;
6976 		mask |= 1;
6977 	}
6978 
6979 	return (~tmp) & mask;
6980 }
6981 
6982 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6983 {
6984 	u32 i, j, k, active_cu_number = 0;
6985 	u32 mask, counter, cu_bitmap;
6986 	u32 tmp = 0;
6987 
6988 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6989 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6990 			mask = 1;
6991 			cu_bitmap = 0;
6992 			counter = 0;
6993 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6994 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6995 					if (counter < 2)
6996 						cu_bitmap |= mask;
6997 					counter ++;
6998 				}
6999 				mask <<= 1;
7000 			}
7001 
7002 			active_cu_number += counter;
7003 			tmp |= (cu_bitmap << (i * 16 + j * 8));
7004 		}
7005 	}
7006 
7007 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7008 
7009 	tmp = RREG32(RLC_MAX_PG_CU);
7010 	tmp &= ~MAX_PU_CU_MASK;
7011 	tmp |= MAX_PU_CU(active_cu_number);
7012 	WREG32(RLC_MAX_PG_CU, tmp);
7013 }
7014 
7015 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7016 				       bool enable)
7017 {
7018 	u32 data, orig;
7019 
7020 	orig = data = RREG32(RLC_PG_CNTL);
7021 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7022 		data |= STATIC_PER_CU_PG_ENABLE;
7023 	else
7024 		data &= ~STATIC_PER_CU_PG_ENABLE;
7025 	if (orig != data)
7026 		WREG32(RLC_PG_CNTL, data);
7027 }
7028 
7029 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7030 					bool enable)
7031 {
7032 	u32 data, orig;
7033 
7034 	orig = data = RREG32(RLC_PG_CNTL);
7035 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7036 		data |= DYN_PER_CU_PG_ENABLE;
7037 	else
7038 		data &= ~DYN_PER_CU_PG_ENABLE;
7039 	if (orig != data)
7040 		WREG32(RLC_PG_CNTL, data);
7041 }
7042 
7043 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7044 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7045 
7046 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7047 {
7048 	u32 data, orig;
7049 	u32 i;
7050 
7051 	if (rdev->rlc.cs_data) {
7052 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7053 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7054 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7055 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7056 	} else {
7057 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7058 		for (i = 0; i < 3; i++)
7059 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7060 	}
7061 	if (rdev->rlc.reg_list) {
7062 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7063 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7064 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7065 	}
7066 
7067 	orig = data = RREG32(RLC_PG_CNTL);
7068 	data |= GFX_PG_SRC;
7069 	if (orig != data)
7070 		WREG32(RLC_PG_CNTL, data);
7071 
7072 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7073 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7074 
7075 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7076 	data &= ~IDLE_POLL_COUNT_MASK;
7077 	data |= IDLE_POLL_COUNT(0x60);
7078 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7079 
7080 	data = 0x10101010;
7081 	WREG32(RLC_PG_DELAY, data);
7082 
7083 	data = RREG32(RLC_PG_DELAY_2);
7084 	data &= ~0xff;
7085 	data |= 0x3;
7086 	WREG32(RLC_PG_DELAY_2, data);
7087 
7088 	data = RREG32(RLC_AUTO_PG_CTRL);
7089 	data &= ~GRBM_REG_SGIT_MASK;
7090 	data |= GRBM_REG_SGIT(0x700);
7091 	WREG32(RLC_AUTO_PG_CTRL, data);
7092 
7093 }
7094 
7095 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7096 {
7097 	cik_enable_gfx_cgpg(rdev, enable);
7098 	cik_enable_gfx_static_mgpg(rdev, enable);
7099 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7100 }
7101 
7102 u32 cik_get_csb_size(struct radeon_device *rdev)
7103 {
7104 	u32 count = 0;
7105 	const struct cs_section_def *sect = NULL;
7106 	const struct cs_extent_def *ext = NULL;
7107 
7108 	if (rdev->rlc.cs_data == NULL)
7109 		return 0;
7110 
7111 	/* begin clear state */
7112 	count += 2;
7113 	/* context control state */
7114 	count += 3;
7115 
7116 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7117 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7118 			if (sect->id == SECT_CONTEXT)
7119 				count += 2 + ext->reg_count;
7120 			else
7121 				return 0;
7122 		}
7123 	}
7124 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7125 	count += 4;
7126 	/* end clear state */
7127 	count += 2;
7128 	/* clear state */
7129 	count += 2;
7130 
7131 	return count;
7132 }
7133 
7134 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7135 {
7136 	u32 count = 0, i;
7137 	const struct cs_section_def *sect = NULL;
7138 	const struct cs_extent_def *ext = NULL;
7139 
7140 	if (rdev->rlc.cs_data == NULL)
7141 		return;
7142 	if (buffer == NULL)
7143 		return;
7144 
7145 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7146 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7147 
7148 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7149 	buffer[count++] = cpu_to_le32(0x80000000);
7150 	buffer[count++] = cpu_to_le32(0x80000000);
7151 
7152 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7153 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7154 			if (sect->id == SECT_CONTEXT) {
7155 				buffer[count++] =
7156 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7157 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7158 				for (i = 0; i < ext->reg_count; i++)
7159 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7160 			} else {
7161 				return;
7162 			}
7163 		}
7164 	}
7165 
7166 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7167 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7168 	switch (rdev->family) {
7169 	case CHIP_BONAIRE:
7170 		buffer[count++] = cpu_to_le32(0x16000012);
7171 		buffer[count++] = cpu_to_le32(0x00000000);
7172 		break;
7173 	case CHIP_KAVERI:
7174 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7175 		buffer[count++] = cpu_to_le32(0x00000000);
7176 		break;
7177 	case CHIP_KABINI:
7178 	case CHIP_MULLINS:
7179 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7180 		buffer[count++] = cpu_to_le32(0x00000000);
7181 		break;
7182 	case CHIP_HAWAII:
7183 		buffer[count++] = cpu_to_le32(0x3a00161a);
7184 		buffer[count++] = cpu_to_le32(0x0000002e);
7185 		break;
7186 	default:
7187 		buffer[count++] = cpu_to_le32(0x00000000);
7188 		buffer[count++] = cpu_to_le32(0x00000000);
7189 		break;
7190 	}
7191 
7192 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7193 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7194 
7195 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7196 	buffer[count++] = cpu_to_le32(0);
7197 }
7198 
7199 static void cik_init_pg(struct radeon_device *rdev)
7200 {
7201 	if (rdev->pg_flags) {
7202 		cik_enable_sck_slowdown_on_pu(rdev, true);
7203 		cik_enable_sck_slowdown_on_pd(rdev, true);
7204 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7205 			cik_init_gfx_cgpg(rdev);
7206 			cik_enable_cp_pg(rdev, true);
7207 			cik_enable_gds_pg(rdev, true);
7208 		}
7209 		cik_init_ao_cu_mask(rdev);
7210 		cik_update_gfx_pg(rdev, true);
7211 	}
7212 }
7213 
7214 static void cik_fini_pg(struct radeon_device *rdev)
7215 {
7216 	if (rdev->pg_flags) {
7217 		cik_update_gfx_pg(rdev, false);
7218 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7219 			cik_enable_cp_pg(rdev, false);
7220 			cik_enable_gds_pg(rdev, false);
7221 		}
7222 	}
7223 }
7224 
7225 /*
7226  * Interrupts
7227  * Starting with r6xx, interrupts are handled via a ring buffer.
7228  * Ring buffers are areas of GPU accessible memory that the GPU
7229  * writes interrupt vectors into and the host reads vectors out of.
7230  * There is a rptr (read pointer) that determines where the
7231  * host is currently reading, and a wptr (write pointer)
7232  * which determines where the GPU has written.  When the
7233  * pointers are equal, the ring is idle.  When the GPU
7234  * writes vectors to the ring buffer, it increments the
7235  * wptr.  When there is an interrupt, the host then starts
7236  * fetching commands and processing them until the pointers are
7237  * equal again at which point it updates the rptr.
7238  */
7239 
7240 /**
7241  * cik_enable_interrupts - Enable the interrupt ring buffer
7242  *
7243  * @rdev: radeon_device pointer
7244  *
7245  * Enable the interrupt ring buffer (CIK).
7246  */
7247 static void cik_enable_interrupts(struct radeon_device *rdev)
7248 {
7249 	u32 ih_cntl = RREG32(IH_CNTL);
7250 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7251 
7252 	ih_cntl |= ENABLE_INTR;
7253 	ih_rb_cntl |= IH_RB_ENABLE;
7254 	WREG32(IH_CNTL, ih_cntl);
7255 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7256 	rdev->ih.enabled = true;
7257 }
7258 
7259 /**
7260  * cik_disable_interrupts - Disable the interrupt ring buffer
7261  *
7262  * @rdev: radeon_device pointer
7263  *
7264  * Disable the interrupt ring buffer (CIK).
7265  */
7266 static void cik_disable_interrupts(struct radeon_device *rdev)
7267 {
7268 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7269 	u32 ih_cntl = RREG32(IH_CNTL);
7270 
7271 	ih_rb_cntl &= ~IH_RB_ENABLE;
7272 	ih_cntl &= ~ENABLE_INTR;
7273 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7274 	WREG32(IH_CNTL, ih_cntl);
7275 	/* set rptr, wptr to 0 */
7276 	WREG32(IH_RB_RPTR, 0);
7277 	WREG32(IH_RB_WPTR, 0);
7278 	rdev->ih.enabled = false;
7279 	rdev->ih.rptr = 0;
7280 }
7281 
7282 /**
7283  * cik_disable_interrupt_state - Disable all interrupt sources
7284  *
7285  * @rdev: radeon_device pointer
7286  *
7287  * Clear all interrupt enable bits used by the driver (CIK).
7288  */
7289 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7290 {
7291 	u32 tmp;
7292 
7293 	/* gfx ring */
7294 	tmp = RREG32(CP_INT_CNTL_RING0) &
7295 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7296 	WREG32(CP_INT_CNTL_RING0, tmp);
7297 	/* sdma */
7298 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7299 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7300 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7301 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7302 	/* compute queues */
7303 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7304 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7305 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7306 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7307 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7308 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7309 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7310 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7311 	/* grbm */
7312 	WREG32(GRBM_INT_CNTL, 0);
7313 	/* SRBM */
7314 	WREG32(SRBM_INT_CNTL, 0);
7315 	/* vline/vblank, etc. */
7316 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7317 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7318 	if (rdev->num_crtc >= 4) {
7319 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7320 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7321 	}
7322 	if (rdev->num_crtc >= 6) {
7323 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7324 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7325 	}
7326 	/* pflip */
7327 	if (rdev->num_crtc >= 2) {
7328 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7329 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7330 	}
7331 	if (rdev->num_crtc >= 4) {
7332 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7333 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7334 	}
7335 	if (rdev->num_crtc >= 6) {
7336 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7337 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7338 	}
7339 
7340 	/* dac hotplug */
7341 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7342 
7343 	/* digital hotplug */
7344 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7345 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7346 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7347 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7348 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7349 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7350 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7351 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7352 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7353 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7354 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7355 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7356 
7357 }
7358 
7359 /**
7360  * cik_irq_init - init and enable the interrupt ring
7361  *
7362  * @rdev: radeon_device pointer
7363  *
7364  * Allocate a ring buffer for the interrupt controller,
7365  * enable the RLC, disable interrupts, enable the IH
7366  * ring buffer and enable it (CIK).
7367  * Called at device load and reume.
7368  * Returns 0 for success, errors for failure.
7369  */
7370 static int cik_irq_init(struct radeon_device *rdev)
7371 {
7372 	int ret = 0;
7373 	int rb_bufsz;
7374 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7375 
7376 	/* allocate ring */
7377 	ret = r600_ih_ring_alloc(rdev);
7378 	if (ret)
7379 		return ret;
7380 
7381 	/* disable irqs */
7382 	cik_disable_interrupts(rdev);
7383 
7384 	/* init rlc */
7385 	ret = cik_rlc_resume(rdev);
7386 	if (ret) {
7387 		r600_ih_ring_fini(rdev);
7388 		return ret;
7389 	}
7390 
7391 	/* setup interrupt control */
7392 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7393 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7394 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7395 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7396 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7397 	 */
7398 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7399 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7400 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7401 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7402 
7403 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7404 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7405 
7406 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7407 		      IH_WPTR_OVERFLOW_CLEAR |
7408 		      (rb_bufsz << 1));
7409 
7410 	if (rdev->wb.enabled)
7411 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7412 
7413 	/* set the writeback address whether it's enabled or not */
7414 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7415 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7416 
7417 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7418 
7419 	/* set rptr, wptr to 0 */
7420 	WREG32(IH_RB_RPTR, 0);
7421 	WREG32(IH_RB_WPTR, 0);
7422 
7423 	/* Default settings for IH_CNTL (disabled at first) */
7424 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7425 	/* RPTR_REARM only works if msi's are enabled */
7426 	if (rdev->msi_enabled)
7427 		ih_cntl |= RPTR_REARM;
7428 	WREG32(IH_CNTL, ih_cntl);
7429 
7430 	/* force the active interrupt state to all disabled */
7431 	cik_disable_interrupt_state(rdev);
7432 
7433 	pci_set_master(rdev->pdev);
7434 
7435 	/* enable irqs */
7436 	cik_enable_interrupts(rdev);
7437 
7438 	return ret;
7439 }
7440 
7441 /**
7442  * cik_irq_set - enable/disable interrupt sources
7443  *
7444  * @rdev: radeon_device pointer
7445  *
7446  * Enable interrupt sources on the GPU (vblanks, hpd,
7447  * etc.) (CIK).
7448  * Returns 0 for success, errors for failure.
7449  */
7450 int cik_irq_set(struct radeon_device *rdev)
7451 {
7452 	u32 cp_int_cntl;
7453 	u32 cp_m1p0;
7454 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7455 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7456 	u32 grbm_int_cntl = 0;
7457 	u32 dma_cntl, dma_cntl1;
7458 
7459 	if (!rdev->irq.installed) {
7460 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7461 		return -EINVAL;
7462 	}
7463 	/* don't enable anything if the ih is disabled */
7464 	if (!rdev->ih.enabled) {
7465 		cik_disable_interrupts(rdev);
7466 		/* force the active interrupt state to all disabled */
7467 		cik_disable_interrupt_state(rdev);
7468 		return 0;
7469 	}
7470 
7471 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7472 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7473 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7474 
7475 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7476 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7477 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7478 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7479 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7480 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7481 
7482 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7483 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7484 
7485 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7486 
7487 	/* enable CP interrupts on all rings */
7488 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7489 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7490 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7491 	}
7492 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7493 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7494 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7495 		if (ring->me == 1) {
7496 			switch (ring->pipe) {
7497 			case 0:
7498 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7499 				break;
7500 			default:
7501 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7502 				break;
7503 			}
7504 		} else {
7505 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7506 		}
7507 	}
7508 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7509 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7510 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7511 		if (ring->me == 1) {
7512 			switch (ring->pipe) {
7513 			case 0:
7514 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7515 				break;
7516 			default:
7517 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7518 				break;
7519 			}
7520 		} else {
7521 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7522 		}
7523 	}
7524 
7525 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7526 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7527 		dma_cntl |= TRAP_ENABLE;
7528 	}
7529 
7530 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7531 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7532 		dma_cntl1 |= TRAP_ENABLE;
7533 	}
7534 
7535 	if (rdev->irq.crtc_vblank_int[0] ||
7536 	    atomic_read(&rdev->irq.pflip[0])) {
7537 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7538 		crtc1 |= VBLANK_INTERRUPT_MASK;
7539 	}
7540 	if (rdev->irq.crtc_vblank_int[1] ||
7541 	    atomic_read(&rdev->irq.pflip[1])) {
7542 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7543 		crtc2 |= VBLANK_INTERRUPT_MASK;
7544 	}
7545 	if (rdev->irq.crtc_vblank_int[2] ||
7546 	    atomic_read(&rdev->irq.pflip[2])) {
7547 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7548 		crtc3 |= VBLANK_INTERRUPT_MASK;
7549 	}
7550 	if (rdev->irq.crtc_vblank_int[3] ||
7551 	    atomic_read(&rdev->irq.pflip[3])) {
7552 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7553 		crtc4 |= VBLANK_INTERRUPT_MASK;
7554 	}
7555 	if (rdev->irq.crtc_vblank_int[4] ||
7556 	    atomic_read(&rdev->irq.pflip[4])) {
7557 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7558 		crtc5 |= VBLANK_INTERRUPT_MASK;
7559 	}
7560 	if (rdev->irq.crtc_vblank_int[5] ||
7561 	    atomic_read(&rdev->irq.pflip[5])) {
7562 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7563 		crtc6 |= VBLANK_INTERRUPT_MASK;
7564 	}
7565 	if (rdev->irq.hpd[0]) {
7566 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7567 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7568 	}
7569 	if (rdev->irq.hpd[1]) {
7570 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7571 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7572 	}
7573 	if (rdev->irq.hpd[2]) {
7574 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7575 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7576 	}
7577 	if (rdev->irq.hpd[3]) {
7578 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7579 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7580 	}
7581 	if (rdev->irq.hpd[4]) {
7582 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7583 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7584 	}
7585 	if (rdev->irq.hpd[5]) {
7586 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7587 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7588 	}
7589 
7590 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7591 
7592 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7593 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7594 
7595 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7596 
7597 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7598 
7599 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7600 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7601 	if (rdev->num_crtc >= 4) {
7602 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7603 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7604 	}
7605 	if (rdev->num_crtc >= 6) {
7606 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7607 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7608 	}
7609 
7610 	if (rdev->num_crtc >= 2) {
7611 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7612 		       GRPH_PFLIP_INT_MASK);
7613 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7614 		       GRPH_PFLIP_INT_MASK);
7615 	}
7616 	if (rdev->num_crtc >= 4) {
7617 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7618 		       GRPH_PFLIP_INT_MASK);
7619 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7620 		       GRPH_PFLIP_INT_MASK);
7621 	}
7622 	if (rdev->num_crtc >= 6) {
7623 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7624 		       GRPH_PFLIP_INT_MASK);
7625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7626 		       GRPH_PFLIP_INT_MASK);
7627 	}
7628 
7629 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7630 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7631 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7632 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7633 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7634 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7635 
7636 	/* posting read */
7637 	RREG32(SRBM_STATUS);
7638 
7639 	return 0;
7640 }
7641 
7642 /**
7643  * cik_irq_ack - ack interrupt sources
7644  *
7645  * @rdev: radeon_device pointer
7646  *
7647  * Ack interrupt sources on the GPU (vblanks, hpd,
7648  * etc.) (CIK).  Certain interrupts sources are sw
7649  * generated and do not require an explicit ack.
7650  */
7651 static inline void cik_irq_ack(struct radeon_device *rdev)
7652 {
7653 	u32 tmp;
7654 
7655 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7656 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7657 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7658 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7659 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7660 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7661 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7662 
7663 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7664 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7665 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7666 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7667 	if (rdev->num_crtc >= 4) {
7668 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7669 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7670 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7671 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7672 	}
7673 	if (rdev->num_crtc >= 6) {
7674 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7675 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7676 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7677 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7678 	}
7679 
7680 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7681 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7682 		       GRPH_PFLIP_INT_CLEAR);
7683 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7684 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7685 		       GRPH_PFLIP_INT_CLEAR);
7686 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7687 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7688 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7689 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7690 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7691 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7692 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7693 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7694 
7695 	if (rdev->num_crtc >= 4) {
7696 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7697 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7698 			       GRPH_PFLIP_INT_CLEAR);
7699 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7700 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7701 			       GRPH_PFLIP_INT_CLEAR);
7702 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7703 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7704 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7705 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7706 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7707 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7708 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7709 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7710 	}
7711 
7712 	if (rdev->num_crtc >= 6) {
7713 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7714 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7715 			       GRPH_PFLIP_INT_CLEAR);
7716 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7717 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7718 			       GRPH_PFLIP_INT_CLEAR);
7719 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7720 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7721 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7722 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7723 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7724 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7725 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7726 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7727 	}
7728 
7729 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7730 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7731 		tmp |= DC_HPDx_INT_ACK;
7732 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7733 	}
7734 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7735 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7736 		tmp |= DC_HPDx_INT_ACK;
7737 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7738 	}
7739 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7740 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7741 		tmp |= DC_HPDx_INT_ACK;
7742 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7743 	}
7744 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7745 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7746 		tmp |= DC_HPDx_INT_ACK;
7747 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7748 	}
7749 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7750 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7751 		tmp |= DC_HPDx_INT_ACK;
7752 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7753 	}
7754 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7755 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7756 		tmp |= DC_HPDx_INT_ACK;
7757 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7758 	}
7759 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7760 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7761 		tmp |= DC_HPDx_RX_INT_ACK;
7762 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7763 	}
7764 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7765 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7766 		tmp |= DC_HPDx_RX_INT_ACK;
7767 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7768 	}
7769 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7770 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7771 		tmp |= DC_HPDx_RX_INT_ACK;
7772 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7773 	}
7774 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7775 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7776 		tmp |= DC_HPDx_RX_INT_ACK;
7777 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7778 	}
7779 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7780 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7781 		tmp |= DC_HPDx_RX_INT_ACK;
7782 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7783 	}
7784 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7785 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7786 		tmp |= DC_HPDx_RX_INT_ACK;
7787 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7788 	}
7789 }
7790 
7791 /**
7792  * cik_irq_disable - disable interrupts
7793  *
7794  * @rdev: radeon_device pointer
7795  *
7796  * Disable interrupts on the hw (CIK).
7797  */
7798 static void cik_irq_disable(struct radeon_device *rdev)
7799 {
7800 	cik_disable_interrupts(rdev);
7801 	/* Wait and acknowledge irq */
7802 	mdelay(1);
7803 	cik_irq_ack(rdev);
7804 	cik_disable_interrupt_state(rdev);
7805 }
7806 
7807 /**
7808  * cik_irq_disable - disable interrupts for suspend
7809  *
7810  * @rdev: radeon_device pointer
7811  *
7812  * Disable interrupts and stop the RLC (CIK).
7813  * Used for suspend.
7814  */
7815 static void cik_irq_suspend(struct radeon_device *rdev)
7816 {
7817 	cik_irq_disable(rdev);
7818 	cik_rlc_stop(rdev);
7819 }
7820 
7821 /**
7822  * cik_irq_fini - tear down interrupt support
7823  *
7824  * @rdev: radeon_device pointer
7825  *
7826  * Disable interrupts on the hw and free the IH ring
7827  * buffer (CIK).
7828  * Used for driver unload.
7829  */
7830 static void cik_irq_fini(struct radeon_device *rdev)
7831 {
7832 	cik_irq_suspend(rdev);
7833 	r600_ih_ring_fini(rdev);
7834 }
7835 
7836 /**
7837  * cik_get_ih_wptr - get the IH ring buffer wptr
7838  *
7839  * @rdev: radeon_device pointer
7840  *
7841  * Get the IH ring buffer wptr from either the register
7842  * or the writeback memory buffer (CIK).  Also check for
7843  * ring buffer overflow and deal with it.
7844  * Used by cik_irq_process().
7845  * Returns the value of the wptr.
7846  */
7847 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7848 {
7849 	u32 wptr, tmp;
7850 
7851 	if (rdev->wb.enabled)
7852 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7853 	else
7854 		wptr = RREG32(IH_RB_WPTR);
7855 
7856 	if (wptr & RB_OVERFLOW) {
7857 		wptr &= ~RB_OVERFLOW;
7858 		/* When a ring buffer overflow happen start parsing interrupt
7859 		 * from the last not overwritten vector (wptr + 16). Hopefully
7860 		 * this should allow us to catchup.
7861 		 */
7862 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7863 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7864 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7865 		tmp = RREG32(IH_RB_CNTL);
7866 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7867 		WREG32(IH_RB_CNTL, tmp);
7868 	}
7869 	return (wptr & rdev->ih.ptr_mask);
7870 }
7871 
7872 /*        CIK IV Ring
7873  * Each IV ring entry is 128 bits:
7874  * [7:0]    - interrupt source id
7875  * [31:8]   - reserved
7876  * [59:32]  - interrupt source data
7877  * [63:60]  - reserved
7878  * [71:64]  - RINGID
7879  *            CP:
7880  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7881  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7882  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7883  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7884  *            PIPE_ID - ME0 0=3D
7885  *                    - ME1&2 compute dispatcher (4 pipes each)
7886  *            SDMA:
7887  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7888  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7889  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7890  * [79:72]  - VMID
7891  * [95:80]  - PASID
7892  * [127:96] - reserved
7893  */
7894 /**
7895  * cik_irq_process - interrupt handler
7896  *
7897  * @rdev: radeon_device pointer
7898  *
7899  * Interrupt hander (CIK).  Walk the IH ring,
7900  * ack interrupts and schedule work to handle
7901  * interrupt events.
7902  * Returns irq process return code.
7903  */
7904 int cik_irq_process(struct radeon_device *rdev)
7905 {
7906 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7907 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7908 	u32 wptr;
7909 	u32 rptr;
7910 	u32 src_id, src_data, ring_id;
7911 	u8 me_id, pipe_id, queue_id;
7912 	u32 ring_index;
7913 	bool queue_hotplug = false;
7914 	bool queue_dp = false;
7915 	bool queue_reset = false;
7916 	u32 addr, status, mc_client;
7917 	bool queue_thermal = false;
7918 
7919 	if (!rdev->ih.enabled || rdev->shutdown)
7920 		return IRQ_NONE;
7921 
7922 	wptr = cik_get_ih_wptr(rdev);
7923 
7924 restart_ih:
7925 	/* is somebody else already processing irqs? */
7926 	if (atomic_xchg(&rdev->ih.lock, 1))
7927 		return IRQ_NONE;
7928 
7929 	rptr = rdev->ih.rptr;
7930 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7931 
7932 	/* Order reading of wptr vs. reading of IH ring data */
7933 	rmb();
7934 
7935 	/* display interrupts */
7936 	cik_irq_ack(rdev);
7937 
7938 	while (rptr != wptr) {
7939 		/* wptr/rptr are in bytes! */
7940 		ring_index = rptr / 4;
7941 
7942 		radeon_kfd_interrupt(rdev,
7943 		    (const void *)__UNVOLATILE(&rdev->ih.ring[ring_index]));
7944 
7945 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7946 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7947 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7948 
7949 		switch (src_id) {
7950 		case 1: /* D1 vblank/vline */
7951 			switch (src_data) {
7952 			case 0: /* D1 vblank */
7953 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7954 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7955 
7956 				if (rdev->irq.crtc_vblank_int[0]) {
7957 					drm_handle_vblank(rdev->ddev, 0);
7958 #ifdef __NetBSD__
7959 						spin_lock(&rdev->irq.vblank_lock);
7960 						rdev->pm.vblank_sync = true;
7961 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7962 						spin_unlock(&rdev->irq.vblank_lock);
7963 #else
7964 					rdev->pm.vblank_sync = true;
7965 					wake_up(&rdev->irq.vblank_queue);
7966 #endif
7967 				}
7968 				if (atomic_read(&rdev->irq.pflip[0]))
7969 					radeon_crtc_handle_vblank(rdev, 0);
7970 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7971 				DRM_DEBUG("IH: D1 vblank\n");
7972 
7973 				break;
7974 			case 1: /* D1 vline */
7975 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7976 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7977 
7978 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7979 				DRM_DEBUG("IH: D1 vline\n");
7980 
7981 				break;
7982 			default:
7983 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7984 				break;
7985 			}
7986 			break;
7987 		case 2: /* D2 vblank/vline */
7988 			switch (src_data) {
7989 			case 0: /* D2 vblank */
7990 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7991 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7992 
7993 				if (rdev->irq.crtc_vblank_int[1]) {
7994 					drm_handle_vblank(rdev->ddev, 1);
7995 #ifdef __NetBSD__
7996 						spin_lock(&rdev->irq.vblank_lock);
7997 						rdev->pm.vblank_sync = true;
7998 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7999 						spin_unlock(&rdev->irq.vblank_lock);
8000 #else
8001 					rdev->pm.vblank_sync = true;
8002 					wake_up(&rdev->irq.vblank_queue);
8003 #endif
8004 				}
8005 				if (atomic_read(&rdev->irq.pflip[1]))
8006 					radeon_crtc_handle_vblank(rdev, 1);
8007 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8008 				DRM_DEBUG("IH: D2 vblank\n");
8009 
8010 				break;
8011 			case 1: /* D2 vline */
8012 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8013 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8014 
8015 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8016 				DRM_DEBUG("IH: D2 vline\n");
8017 
8018 				break;
8019 			default:
8020 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8021 				break;
8022 			}
8023 			break;
8024 		case 3: /* D3 vblank/vline */
8025 			switch (src_data) {
8026 			case 0: /* D3 vblank */
8027 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8028 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8029 
8030 				if (rdev->irq.crtc_vblank_int[2]) {
8031 					drm_handle_vblank(rdev->ddev, 2);
8032 #ifdef __NetBSD__
8033 						spin_lock(&rdev->irq.vblank_lock);
8034 						rdev->pm.vblank_sync = true;
8035 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
8036 						spin_unlock(&rdev->irq.vblank_lock);
8037 #else
8038 					rdev->pm.vblank_sync = true;
8039 					wake_up(&rdev->irq.vblank_queue);
8040 #endif
8041 				}
8042 				if (atomic_read(&rdev->irq.pflip[2]))
8043 					radeon_crtc_handle_vblank(rdev, 2);
8044 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8045 				DRM_DEBUG("IH: D3 vblank\n");
8046 
8047 				break;
8048 			case 1: /* D3 vline */
8049 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8050 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8051 
8052 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8053 				DRM_DEBUG("IH: D3 vline\n");
8054 
8055 				break;
8056 			default:
8057 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8058 				break;
8059 			}
8060 			break;
8061 		case 4: /* D4 vblank/vline */
8062 			switch (src_data) {
8063 			case 0: /* D4 vblank */
8064 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8065 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8066 
8067 				if (rdev->irq.crtc_vblank_int[3]) {
8068 					drm_handle_vblank(rdev->ddev, 3);
8069 #ifdef __NetBSD__
8070 						spin_lock(&rdev->irq.vblank_lock);
8071 						rdev->pm.vblank_sync = true;
8072 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
8073 						spin_unlock(&rdev->irq.vblank_lock);
8074 #else
8075 					rdev->pm.vblank_sync = true;
8076 					wake_up(&rdev->irq.vblank_queue);
8077 #endif
8078 				}
8079 				if (atomic_read(&rdev->irq.pflip[3]))
8080 					radeon_crtc_handle_vblank(rdev, 3);
8081 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8082 				DRM_DEBUG("IH: D4 vblank\n");
8083 
8084 				break;
8085 			case 1: /* D4 vline */
8086 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8087 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8088 
8089 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8090 				DRM_DEBUG("IH: D4 vline\n");
8091 
8092 				break;
8093 			default:
8094 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8095 				break;
8096 			}
8097 			break;
8098 		case 5: /* D5 vblank/vline */
8099 			switch (src_data) {
8100 			case 0: /* D5 vblank */
8101 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8102 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8103 
8104 				if (rdev->irq.crtc_vblank_int[4]) {
8105 					drm_handle_vblank(rdev->ddev, 4);
8106 #ifdef __NetBSD__
8107 						spin_lock(&rdev->irq.vblank_lock);
8108 						rdev->pm.vblank_sync = true;
8109 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
8110 						spin_unlock(&rdev->irq.vblank_lock);
8111 #else
8112 					rdev->pm.vblank_sync = true;
8113 					wake_up(&rdev->irq.vblank_queue);
8114 #endif
8115 				}
8116 				if (atomic_read(&rdev->irq.pflip[4]))
8117 					radeon_crtc_handle_vblank(rdev, 4);
8118 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8119 				DRM_DEBUG("IH: D5 vblank\n");
8120 
8121 				break;
8122 			case 1: /* D5 vline */
8123 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8124 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8125 
8126 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8127 				DRM_DEBUG("IH: D5 vline\n");
8128 
8129 				break;
8130 			default:
8131 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8132 				break;
8133 			}
8134 			break;
8135 		case 6: /* D6 vblank/vline */
8136 			switch (src_data) {
8137 			case 0: /* D6 vblank */
8138 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8139 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8140 
8141 				if (rdev->irq.crtc_vblank_int[5]) {
8142 					drm_handle_vblank(rdev->ddev, 5);
8143 #ifdef __NetBSD__
8144 						spin_lock(&rdev->irq.vblank_lock);
8145 						rdev->pm.vblank_sync = true;
8146 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
8147 						spin_unlock(&rdev->irq.vblank_lock);
8148 #else
8149 					rdev->pm.vblank_sync = true;
8150 					wake_up(&rdev->irq.vblank_queue);
8151 #endif
8152 				}
8153 				if (atomic_read(&rdev->irq.pflip[5]))
8154 					radeon_crtc_handle_vblank(rdev, 5);
8155 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8156 				DRM_DEBUG("IH: D6 vblank\n");
8157 
8158 				break;
8159 			case 1: /* D6 vline */
8160 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8161 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8162 
8163 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8164 				DRM_DEBUG("IH: D6 vline\n");
8165 
8166 				break;
8167 			default:
8168 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8169 				break;
8170 			}
8171 			break;
8172 		case 8: /* D1 page flip */
8173 		case 10: /* D2 page flip */
8174 		case 12: /* D3 page flip */
8175 		case 14: /* D4 page flip */
8176 		case 16: /* D5 page flip */
8177 		case 18: /* D6 page flip */
8178 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8179 			if (radeon_use_pflipirq > 0)
8180 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8181 			break;
8182 		case 42: /* HPD hotplug */
8183 			switch (src_data) {
8184 			case 0:
8185 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8186 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8187 
8188 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8189 				queue_hotplug = true;
8190 				DRM_DEBUG("IH: HPD1\n");
8191 
8192 				break;
8193 			case 1:
8194 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8195 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8196 
8197 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8198 				queue_hotplug = true;
8199 				DRM_DEBUG("IH: HPD2\n");
8200 
8201 				break;
8202 			case 2:
8203 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8204 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8205 
8206 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8207 				queue_hotplug = true;
8208 				DRM_DEBUG("IH: HPD3\n");
8209 
8210 				break;
8211 			case 3:
8212 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8213 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8214 
8215 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8216 				queue_hotplug = true;
8217 				DRM_DEBUG("IH: HPD4\n");
8218 
8219 				break;
8220 			case 4:
8221 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8222 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8223 
8224 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8225 				queue_hotplug = true;
8226 				DRM_DEBUG("IH: HPD5\n");
8227 
8228 				break;
8229 			case 5:
8230 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8231 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8232 
8233 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8234 				queue_hotplug = true;
8235 				DRM_DEBUG("IH: HPD6\n");
8236 
8237 				break;
8238 			case 6:
8239 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8240 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8241 
8242 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8243 				queue_dp = true;
8244 				DRM_DEBUG("IH: HPD_RX 1\n");
8245 
8246 				break;
8247 			case 7:
8248 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8249 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8250 
8251 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8252 				queue_dp = true;
8253 				DRM_DEBUG("IH: HPD_RX 2\n");
8254 
8255 				break;
8256 			case 8:
8257 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8258 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8259 
8260 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8261 				queue_dp = true;
8262 				DRM_DEBUG("IH: HPD_RX 3\n");
8263 
8264 				break;
8265 			case 9:
8266 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8267 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8268 
8269 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8270 				queue_dp = true;
8271 				DRM_DEBUG("IH: HPD_RX 4\n");
8272 
8273 				break;
8274 			case 10:
8275 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8276 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8277 
8278 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8279 				queue_dp = true;
8280 				DRM_DEBUG("IH: HPD_RX 5\n");
8281 
8282 				break;
8283 			case 11:
8284 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8285 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8286 
8287 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8288 				queue_dp = true;
8289 				DRM_DEBUG("IH: HPD_RX 6\n");
8290 
8291 				break;
8292 			default:
8293 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8294 				break;
8295 			}
8296 			break;
8297 		case 96:
8298 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8299 			WREG32(SRBM_INT_ACK, 0x1);
8300 			break;
8301 		case 124: /* UVD */
8302 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8303 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8304 			break;
8305 		case 146:
8306 		case 147:
8307 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8308 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8309 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8310 			/* reset addr and status */
8311 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8312 			if (addr == 0x0 && status == 0x0)
8313 				break;
8314 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8315 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8316 				addr);
8317 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8318 				status);
8319 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8320 			break;
8321 		case 167: /* VCE */
8322 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8323 			switch (src_data) {
8324 			case 0:
8325 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8326 				break;
8327 			case 1:
8328 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8329 				break;
8330 			default:
8331 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8332 				break;
8333 			}
8334 			break;
8335 		case 176: /* GFX RB CP_INT */
8336 		case 177: /* GFX IB CP_INT */
8337 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8338 			break;
8339 		case 181: /* CP EOP event */
8340 			DRM_DEBUG("IH: CP EOP\n");
8341 			/* XXX check the bitfield order! */
8342 			me_id = (ring_id & 0x60) >> 5;
8343 			pipe_id = (ring_id & 0x18) >> 3;
8344 			queue_id = (ring_id & 0x7) >> 0;
8345 			switch (me_id) {
8346 			case 0:
8347 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8348 				break;
8349 			case 1:
8350 			case 2:
8351 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8352 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8353 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8354 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8355 				break;
8356 			}
8357 			break;
8358 		case 184: /* CP Privileged reg access */
8359 			DRM_ERROR("Illegal register access in command stream\n");
8360 			/* XXX check the bitfield order! */
8361 			me_id = (ring_id & 0x60) >> 5;
8362 			pipe_id = (ring_id & 0x18) >> 3;
8363 			queue_id = (ring_id & 0x7) >> 0;
8364 			switch (me_id) {
8365 			case 0:
8366 				/* This results in a full GPU reset, but all we need to do is soft
8367 				 * reset the CP for gfx
8368 				 */
8369 				queue_reset = true;
8370 				break;
8371 			case 1:
8372 				/* XXX compute */
8373 				queue_reset = true;
8374 				break;
8375 			case 2:
8376 				/* XXX compute */
8377 				queue_reset = true;
8378 				break;
8379 			}
8380 			break;
8381 		case 185: /* CP Privileged inst */
8382 			DRM_ERROR("Illegal instruction in command stream\n");
8383 			/* XXX check the bitfield order! */
8384 			me_id = (ring_id & 0x60) >> 5;
8385 			pipe_id = (ring_id & 0x18) >> 3;
8386 			queue_id = (ring_id & 0x7) >> 0;
8387 			switch (me_id) {
8388 			case 0:
8389 				/* This results in a full GPU reset, but all we need to do is soft
8390 				 * reset the CP for gfx
8391 				 */
8392 				queue_reset = true;
8393 				break;
8394 			case 1:
8395 				/* XXX compute */
8396 				queue_reset = true;
8397 				break;
8398 			case 2:
8399 				/* XXX compute */
8400 				queue_reset = true;
8401 				break;
8402 			}
8403 			break;
8404 		case 224: /* SDMA trap event */
8405 			/* XXX check the bitfield order! */
8406 			me_id = (ring_id & 0x3) >> 0;
8407 			queue_id = (ring_id & 0xc) >> 2;
8408 			DRM_DEBUG("IH: SDMA trap\n");
8409 			switch (me_id) {
8410 			case 0:
8411 				switch (queue_id) {
8412 				case 0:
8413 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8414 					break;
8415 				case 1:
8416 					/* XXX compute */
8417 					break;
8418 				case 2:
8419 					/* XXX compute */
8420 					break;
8421 				}
8422 				break;
8423 			case 1:
8424 				switch (queue_id) {
8425 				case 0:
8426 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8427 					break;
8428 				case 1:
8429 					/* XXX compute */
8430 					break;
8431 				case 2:
8432 					/* XXX compute */
8433 					break;
8434 				}
8435 				break;
8436 			}
8437 			break;
8438 		case 230: /* thermal low to high */
8439 			DRM_DEBUG("IH: thermal low to high\n");
8440 			rdev->pm.dpm.thermal.high_to_low = false;
8441 			queue_thermal = true;
8442 			break;
8443 		case 231: /* thermal high to low */
8444 			DRM_DEBUG("IH: thermal high to low\n");
8445 			rdev->pm.dpm.thermal.high_to_low = true;
8446 			queue_thermal = true;
8447 			break;
8448 		case 233: /* GUI IDLE */
8449 			DRM_DEBUG("IH: GUI idle\n");
8450 			break;
8451 		case 241: /* SDMA Privileged inst */
8452 		case 247: /* SDMA Privileged inst */
8453 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8454 			/* XXX check the bitfield order! */
8455 			me_id = (ring_id & 0x3) >> 0;
8456 			queue_id = (ring_id & 0xc) >> 2;
8457 			switch (me_id) {
8458 			case 0:
8459 				switch (queue_id) {
8460 				case 0:
8461 					queue_reset = true;
8462 					break;
8463 				case 1:
8464 					/* XXX compute */
8465 					queue_reset = true;
8466 					break;
8467 				case 2:
8468 					/* XXX compute */
8469 					queue_reset = true;
8470 					break;
8471 				}
8472 				break;
8473 			case 1:
8474 				switch (queue_id) {
8475 				case 0:
8476 					queue_reset = true;
8477 					break;
8478 				case 1:
8479 					/* XXX compute */
8480 					queue_reset = true;
8481 					break;
8482 				case 2:
8483 					/* XXX compute */
8484 					queue_reset = true;
8485 					break;
8486 				}
8487 				break;
8488 			}
8489 			break;
8490 		default:
8491 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8492 			break;
8493 		}
8494 
8495 		/* wptr/rptr are in bytes! */
8496 		rptr += 16;
8497 		rptr &= rdev->ih.ptr_mask;
8498 		WREG32(IH_RB_RPTR, rptr);
8499 	}
8500 	if (queue_dp)
8501 		schedule_work(&rdev->dp_work);
8502 	if (queue_hotplug)
8503 		schedule_delayed_work(&rdev->hotplug_work, 0);
8504 	if (queue_reset) {
8505 #ifdef __NetBSD__
8506 		spin_lock(&rdev->fence_lock);
8507 		rdev->needs_reset = true;
8508 		radeon_fence_wakeup_locked(rdev);
8509 		spin_unlock(&rdev->fence_lock);
8510 #else
8511 		rdev->needs_reset = true;
8512 		wake_up_all(&rdev->fence_queue);
8513 #endif
8514 	}
8515 	if (queue_thermal)
8516 		schedule_work(&rdev->pm.dpm.thermal.work);
8517 	rdev->ih.rptr = rptr;
8518 	atomic_set(&rdev->ih.lock, 0);
8519 
8520 	/* make sure wptr hasn't changed while processing */
8521 	wptr = cik_get_ih_wptr(rdev);
8522 	if (wptr != rptr)
8523 		goto restart_ih;
8524 
8525 	return IRQ_HANDLED;
8526 }
8527 
8528 /*
8529  * startup/shutdown callbacks
8530  */
8531 /**
8532  * cik_startup - program the asic to a functional state
8533  *
8534  * @rdev: radeon_device pointer
8535  *
8536  * Programs the asic to a functional state (CIK).
8537  * Called by cik_init() and cik_resume().
8538  * Returns 0 for success, error for failure.
8539  */
8540 static int cik_startup(struct radeon_device *rdev)
8541 {
8542 	struct radeon_ring *ring;
8543 	u32 nop;
8544 	int r;
8545 
8546 	/* enable pcie gen2/3 link */
8547 	cik_pcie_gen3_enable(rdev);
8548 	/* enable aspm */
8549 	cik_program_aspm(rdev);
8550 
8551 	/* scratch needs to be initialized before MC */
8552 	r = r600_vram_scratch_init(rdev);
8553 	if (r)
8554 		return r;
8555 
8556 	cik_mc_program(rdev);
8557 
8558 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8559 		r = ci_mc_load_microcode(rdev);
8560 		if (r) {
8561 			DRM_ERROR("Failed to load MC firmware!\n");
8562 			return r;
8563 		}
8564 	}
8565 
8566 	r = cik_pcie_gart_enable(rdev);
8567 	if (r)
8568 		return r;
8569 	cik_gpu_init(rdev);
8570 
8571 	/* allocate rlc buffers */
8572 	if (rdev->flags & RADEON_IS_IGP) {
8573 		if (rdev->family == CHIP_KAVERI) {
8574 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8575 			rdev->rlc.reg_list_size =
8576 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8577 		} else {
8578 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8579 			rdev->rlc.reg_list_size =
8580 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8581 		}
8582 	}
8583 	rdev->rlc.cs_data = ci_cs_data;
8584 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8585 	r = sumo_rlc_init(rdev);
8586 	if (r) {
8587 		DRM_ERROR("Failed to init rlc BOs!\n");
8588 		return r;
8589 	}
8590 
8591 	/* allocate wb buffer */
8592 	r = radeon_wb_init(rdev);
8593 	if (r)
8594 		return r;
8595 
8596 	/* allocate mec buffers */
8597 	r = cik_mec_init(rdev);
8598 	if (r) {
8599 		DRM_ERROR("Failed to init MEC BOs!\n");
8600 		return r;
8601 	}
8602 
8603 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8604 	if (r) {
8605 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8606 		return r;
8607 	}
8608 
8609 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8610 	if (r) {
8611 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8612 		return r;
8613 	}
8614 
8615 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8616 	if (r) {
8617 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8618 		return r;
8619 	}
8620 
8621 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8622 	if (r) {
8623 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8624 		return r;
8625 	}
8626 
8627 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8628 	if (r) {
8629 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8630 		return r;
8631 	}
8632 
8633 	r = radeon_uvd_resume(rdev);
8634 	if (!r) {
8635 		r = uvd_v4_2_resume(rdev);
8636 		if (!r) {
8637 			r = radeon_fence_driver_start_ring(rdev,
8638 							   R600_RING_TYPE_UVD_INDEX);
8639 			if (r)
8640 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8641 		}
8642 	}
8643 	if (r)
8644 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8645 
8646 	r = radeon_vce_resume(rdev);
8647 	if (!r) {
8648 		r = vce_v2_0_resume(rdev);
8649 		if (!r)
8650 			r = radeon_fence_driver_start_ring(rdev,
8651 							   TN_RING_TYPE_VCE1_INDEX);
8652 		if (!r)
8653 			r = radeon_fence_driver_start_ring(rdev,
8654 							   TN_RING_TYPE_VCE2_INDEX);
8655 	}
8656 	if (r) {
8657 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8658 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8659 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8660 	}
8661 
8662 	/* Enable IRQ */
8663 	if (!rdev->irq.installed) {
8664 		r = radeon_irq_kms_init(rdev);
8665 		if (r)
8666 			return r;
8667 	}
8668 
8669 	r = cik_irq_init(rdev);
8670 	if (r) {
8671 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8672 		radeon_irq_kms_fini(rdev);
8673 		return r;
8674 	}
8675 	cik_irq_set(rdev);
8676 
8677 	if (rdev->family == CHIP_HAWAII) {
8678 		if (rdev->new_fw)
8679 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8680 		else
8681 			nop = RADEON_CP_PACKET2;
8682 	} else {
8683 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8684 	}
8685 
8686 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8687 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8688 			     nop);
8689 	if (r)
8690 		return r;
8691 
8692 	/* set up the compute queues */
8693 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8694 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8695 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8696 			     nop);
8697 	if (r)
8698 		return r;
8699 	ring->me = 1; /* first MEC */
8700 	ring->pipe = 0; /* first pipe */
8701 	ring->queue = 0; /* first queue */
8702 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8703 
8704 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8705 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8706 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8707 			     nop);
8708 	if (r)
8709 		return r;
8710 	/* dGPU only have 1 MEC */
8711 	ring->me = 1; /* first MEC */
8712 	ring->pipe = 0; /* first pipe */
8713 	ring->queue = 1; /* second queue */
8714 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8715 
8716 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8717 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8718 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8719 	if (r)
8720 		return r;
8721 
8722 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8723 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8724 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8725 	if (r)
8726 		return r;
8727 
8728 	r = cik_cp_resume(rdev);
8729 	if (r)
8730 		return r;
8731 
8732 	r = cik_sdma_resume(rdev);
8733 	if (r)
8734 		return r;
8735 
8736 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8737 	if (ring->ring_size) {
8738 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8739 				     RADEON_CP_PACKET2);
8740 		if (!r)
8741 			r = uvd_v1_0_init(rdev);
8742 		if (r)
8743 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8744 	}
8745 
8746 	r = -ENOENT;
8747 
8748 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8749 	if (ring->ring_size)
8750 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8751 				     VCE_CMD_NO_OP);
8752 
8753 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8754 	if (ring->ring_size)
8755 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8756 				     VCE_CMD_NO_OP);
8757 
8758 	if (!r)
8759 		r = vce_v1_0_init(rdev);
8760 	else if (r != -ENOENT)
8761 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8762 
8763 	r = radeon_ib_pool_init(rdev);
8764 	if (r) {
8765 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8766 		return r;
8767 	}
8768 
8769 	r = radeon_vm_manager_init(rdev);
8770 	if (r) {
8771 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8772 		return r;
8773 	}
8774 
8775 	r = radeon_audio_init(rdev);
8776 	if (r)
8777 		return r;
8778 
8779 	r = radeon_kfd_resume(rdev);
8780 	if (r)
8781 		return r;
8782 
8783 	return 0;
8784 }
8785 
8786 /**
8787  * cik_resume - resume the asic to a functional state
8788  *
8789  * @rdev: radeon_device pointer
8790  *
8791  * Programs the asic to a functional state (CIK).
8792  * Called at resume.
8793  * Returns 0 for success, error for failure.
8794  */
8795 int cik_resume(struct radeon_device *rdev)
8796 {
8797 	int r;
8798 
8799 	/* post card */
8800 	atom_asic_init(rdev->mode_info.atom_context);
8801 
8802 	/* init golden registers */
8803 	cik_init_golden_registers(rdev);
8804 
8805 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8806 		radeon_pm_resume(rdev);
8807 
8808 	rdev->accel_working = true;
8809 	r = cik_startup(rdev);
8810 	if (r) {
8811 		DRM_ERROR("cik startup failed on resume\n");
8812 		rdev->accel_working = false;
8813 		return r;
8814 	}
8815 
8816 	return r;
8817 
8818 }
8819 
8820 /**
8821  * cik_suspend - suspend the asic
8822  *
8823  * @rdev: radeon_device pointer
8824  *
8825  * Bring the chip into a state suitable for suspend (CIK).
8826  * Called at suspend.
8827  * Returns 0 for success.
8828  */
8829 int cik_suspend(struct radeon_device *rdev)
8830 {
8831 	radeon_kfd_suspend(rdev);
8832 	radeon_pm_suspend(rdev);
8833 	radeon_audio_fini(rdev);
8834 	radeon_vm_manager_fini(rdev);
8835 	cik_cp_enable(rdev, false);
8836 	cik_sdma_enable(rdev, false);
8837 	uvd_v1_0_fini(rdev);
8838 	radeon_uvd_suspend(rdev);
8839 	radeon_vce_suspend(rdev);
8840 	cik_fini_pg(rdev);
8841 	cik_fini_cg(rdev);
8842 	cik_irq_suspend(rdev);
8843 	radeon_wb_disable(rdev);
8844 	cik_pcie_gart_disable(rdev);
8845 	return 0;
8846 }
8847 
8848 /* Plan is to move initialization in that function and use
8849  * helper function so that radeon_device_init pretty much
8850  * do nothing more than calling asic specific function. This
8851  * should also allow to remove a bunch of callback function
8852  * like vram_info.
8853  */
8854 /**
8855  * cik_init - asic specific driver and hw init
8856  *
8857  * @rdev: radeon_device pointer
8858  *
8859  * Setup asic specific driver variables and program the hw
8860  * to a functional state (CIK).
8861  * Called at driver startup.
8862  * Returns 0 for success, errors for failure.
8863  */
8864 int cik_init(struct radeon_device *rdev)
8865 {
8866 	struct radeon_ring *ring;
8867 	int r;
8868 
8869 	/* Read BIOS */
8870 	if (!radeon_get_bios(rdev)) {
8871 		if (ASIC_IS_AVIVO(rdev))
8872 			return -EINVAL;
8873 	}
8874 	/* Must be an ATOMBIOS */
8875 	if (!rdev->is_atom_bios) {
8876 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8877 		return -EINVAL;
8878 	}
8879 	r = radeon_atombios_init(rdev);
8880 	if (r)
8881 		return r;
8882 
8883 	/* Post card if necessary */
8884 	if (!radeon_card_posted(rdev)) {
8885 		if (!rdev->bios) {
8886 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8887 			return -EINVAL;
8888 		}
8889 		DRM_INFO("GPU not posted. posting now...\n");
8890 		atom_asic_init(rdev->mode_info.atom_context);
8891 	}
8892 	/* init golden registers */
8893 	cik_init_golden_registers(rdev);
8894 	/* Initialize scratch registers */
8895 	cik_scratch_init(rdev);
8896 	/* Initialize surface registers */
8897 	radeon_surface_init(rdev);
8898 	/* Initialize clocks */
8899 	radeon_get_clock_info(rdev->ddev);
8900 
8901 	/* Fence driver */
8902 	r = radeon_fence_driver_init(rdev);
8903 	if (r)
8904 		return r;
8905 
8906 	/* initialize memory controller */
8907 	r = cik_mc_init(rdev);
8908 	if (r)
8909 		return r;
8910 	/* Memory manager */
8911 	r = radeon_bo_init(rdev);
8912 	if (r)
8913 		return r;
8914 
8915 	if (rdev->flags & RADEON_IS_IGP) {
8916 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8917 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8918 			r = cik_init_microcode(rdev);
8919 			if (r) {
8920 				DRM_ERROR("Failed to load firmware!\n");
8921 				return r;
8922 			}
8923 		}
8924 	} else {
8925 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8926 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8927 		    !rdev->mc_fw) {
8928 			r = cik_init_microcode(rdev);
8929 			if (r) {
8930 				DRM_ERROR("Failed to load firmware!\n");
8931 				return r;
8932 			}
8933 		}
8934 	}
8935 
8936 	/* Initialize power management */
8937 	radeon_pm_init(rdev);
8938 
8939 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8940 	ring->ring_obj = NULL;
8941 	r600_ring_init(rdev, ring, 1024 * 1024);
8942 
8943 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8944 	ring->ring_obj = NULL;
8945 	r600_ring_init(rdev, ring, 1024 * 1024);
8946 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8947 	if (r)
8948 		return r;
8949 
8950 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8951 	ring->ring_obj = NULL;
8952 	r600_ring_init(rdev, ring, 1024 * 1024);
8953 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8954 	if (r)
8955 		return r;
8956 
8957 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8958 	ring->ring_obj = NULL;
8959 	r600_ring_init(rdev, ring, 256 * 1024);
8960 
8961 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8962 	ring->ring_obj = NULL;
8963 	r600_ring_init(rdev, ring, 256 * 1024);
8964 
8965 	r = radeon_uvd_init(rdev);
8966 	if (!r) {
8967 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8968 		ring->ring_obj = NULL;
8969 		r600_ring_init(rdev, ring, 4096);
8970 	}
8971 
8972 	r = radeon_vce_init(rdev);
8973 	if (!r) {
8974 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8975 		ring->ring_obj = NULL;
8976 		r600_ring_init(rdev, ring, 4096);
8977 
8978 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8979 		ring->ring_obj = NULL;
8980 		r600_ring_init(rdev, ring, 4096);
8981 	}
8982 
8983 	rdev->ih.ring_obj = NULL;
8984 	r600_ih_ring_init(rdev, 64 * 1024);
8985 
8986 	r = r600_pcie_gart_init(rdev);
8987 	if (r)
8988 		return r;
8989 
8990 	rdev->accel_working = true;
8991 	r = cik_startup(rdev);
8992 	if (r) {
8993 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8994 		cik_cp_fini(rdev);
8995 		cik_sdma_fini(rdev);
8996 		cik_irq_fini(rdev);
8997 		sumo_rlc_fini(rdev);
8998 		cik_mec_fini(rdev);
8999 		radeon_wb_fini(rdev);
9000 		radeon_ib_pool_fini(rdev);
9001 		radeon_vm_manager_fini(rdev);
9002 		radeon_irq_kms_fini(rdev);
9003 		cik_pcie_gart_fini(rdev);
9004 		rdev->accel_working = false;
9005 	}
9006 
9007 	/* Don't start up if the MC ucode is missing.
9008 	 * The default clocks and voltages before the MC ucode
9009 	 * is loaded are not suffient for advanced operations.
9010 	 */
9011 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
9012 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
9013 		return -EINVAL;
9014 	}
9015 
9016 	return 0;
9017 }
9018 
9019 /**
9020  * cik_fini - asic specific driver and hw fini
9021  *
9022  * @rdev: radeon_device pointer
9023  *
9024  * Tear down the asic specific driver variables and program the hw
9025  * to an idle state (CIK).
9026  * Called at driver unload.
9027  */
9028 void cik_fini(struct radeon_device *rdev)
9029 {
9030 	radeon_pm_fini(rdev);
9031 	cik_cp_fini(rdev);
9032 	cik_sdma_fini(rdev);
9033 	cik_fini_pg(rdev);
9034 	cik_fini_cg(rdev);
9035 	cik_irq_fini(rdev);
9036 	sumo_rlc_fini(rdev);
9037 	cik_mec_fini(rdev);
9038 	radeon_wb_fini(rdev);
9039 	radeon_vm_manager_fini(rdev);
9040 	radeon_ib_pool_fini(rdev);
9041 	radeon_irq_kms_fini(rdev);
9042 	uvd_v1_0_fini(rdev);
9043 	radeon_uvd_fini(rdev);
9044 	radeon_vce_fini(rdev);
9045 	cik_pcie_gart_fini(rdev);
9046 	r600_vram_scratch_fini(rdev);
9047 	radeon_gem_fini(rdev);
9048 	radeon_fence_driver_fini(rdev);
9049 	radeon_bo_fini(rdev);
9050 	radeon_atombios_fini(rdev);
9051 	kfree(rdev->bios);
9052 	rdev->bios = NULL;
9053 }
9054 
9055 void dce8_program_fmt(struct drm_encoder *encoder)
9056 {
9057 	struct drm_device *dev = encoder->dev;
9058 	struct radeon_device *rdev = dev->dev_private;
9059 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9060 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9061 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9062 	int bpc = 0;
9063 	u32 tmp = 0;
9064 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9065 
9066 	if (connector) {
9067 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9068 		bpc = radeon_get_monitor_bpc(connector);
9069 		dither = radeon_connector->dither;
9070 	}
9071 
9072 	/* LVDS/eDP FMT is set up by atom */
9073 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9074 		return;
9075 
9076 	/* not needed for analog */
9077 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9078 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9079 		return;
9080 
9081 	if (bpc == 0)
9082 		return;
9083 
9084 	switch (bpc) {
9085 	case 6:
9086 		if (dither == RADEON_FMT_DITHER_ENABLE)
9087 			/* XXX sort out optimal dither settings */
9088 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9089 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9090 		else
9091 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9092 		break;
9093 	case 8:
9094 		if (dither == RADEON_FMT_DITHER_ENABLE)
9095 			/* XXX sort out optimal dither settings */
9096 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9097 				FMT_RGB_RANDOM_ENABLE |
9098 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9099 		else
9100 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9101 		break;
9102 	case 10:
9103 		if (dither == RADEON_FMT_DITHER_ENABLE)
9104 			/* XXX sort out optimal dither settings */
9105 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9106 				FMT_RGB_RANDOM_ENABLE |
9107 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9108 		else
9109 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9110 		break;
9111 	default:
9112 		/* not needed */
9113 		break;
9114 	}
9115 
9116 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9117 }
9118 
9119 /* display watermark setup */
9120 /**
9121  * dce8_line_buffer_adjust - Set up the line buffer
9122  *
9123  * @rdev: radeon_device pointer
9124  * @radeon_crtc: the selected display controller
9125  * @mode: the current display mode on the selected display
9126  * controller
9127  *
9128  * Setup up the line buffer allocation for
9129  * the selected display controller (CIK).
9130  * Returns the line buffer size in pixels.
9131  */
9132 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9133 				   struct radeon_crtc *radeon_crtc,
9134 				   struct drm_display_mode *mode)
9135 {
9136 	u32 tmp, buffer_alloc, i;
9137 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9138 	/*
9139 	 * Line Buffer Setup
9140 	 * There are 6 line buffers, one for each display controllers.
9141 	 * There are 3 partitions per LB. Select the number of partitions
9142 	 * to enable based on the display width.  For display widths larger
9143 	 * than 4096, you need use to use 2 display controllers and combine
9144 	 * them using the stereo blender.
9145 	 */
9146 	if (radeon_crtc->base.enabled && mode) {
9147 		if (mode->crtc_hdisplay < 1920) {
9148 			tmp = 1;
9149 			buffer_alloc = 2;
9150 		} else if (mode->crtc_hdisplay < 2560) {
9151 			tmp = 2;
9152 			buffer_alloc = 2;
9153 		} else if (mode->crtc_hdisplay < 4096) {
9154 			tmp = 0;
9155 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9156 		} else {
9157 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9158 			tmp = 0;
9159 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9160 		}
9161 	} else {
9162 		tmp = 1;
9163 		buffer_alloc = 0;
9164 	}
9165 
9166 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9167 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9168 
9169 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9170 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9171 	for (i = 0; i < rdev->usec_timeout; i++) {
9172 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9173 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9174 			break;
9175 		udelay(1);
9176 	}
9177 
9178 	if (radeon_crtc->base.enabled && mode) {
9179 		switch (tmp) {
9180 		case 0:
9181 		default:
9182 			return 4096 * 2;
9183 		case 1:
9184 			return 1920 * 2;
9185 		case 2:
9186 			return 2560 * 2;
9187 		}
9188 	}
9189 
9190 	/* controller not enabled, so no lb used */
9191 	return 0;
9192 }
9193 
9194 /**
9195  * cik_get_number_of_dram_channels - get the number of dram channels
9196  *
9197  * @rdev: radeon_device pointer
9198  *
9199  * Look up the number of video ram channels (CIK).
9200  * Used for display watermark bandwidth calculations
9201  * Returns the number of dram channels
9202  */
9203 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9204 {
9205 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9206 
9207 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9208 	case 0:
9209 	default:
9210 		return 1;
9211 	case 1:
9212 		return 2;
9213 	case 2:
9214 		return 4;
9215 	case 3:
9216 		return 8;
9217 	case 4:
9218 		return 3;
9219 	case 5:
9220 		return 6;
9221 	case 6:
9222 		return 10;
9223 	case 7:
9224 		return 12;
9225 	case 8:
9226 		return 16;
9227 	}
9228 }
9229 
9230 struct dce8_wm_params {
9231 	u32 dram_channels; /* number of dram channels */
9232 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9233 	u32 sclk;          /* engine clock in kHz */
9234 	u32 disp_clk;      /* display clock in kHz */
9235 	u32 src_width;     /* viewport width */
9236 	u32 active_time;   /* active display time in ns */
9237 	u32 blank_time;    /* blank time in ns */
9238 	bool interlaced;    /* mode is interlaced */
9239 	fixed20_12 vsc;    /* vertical scale ratio */
9240 	u32 num_heads;     /* number of active crtcs */
9241 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9242 	u32 lb_size;       /* line buffer allocated to pipe */
9243 	u32 vtaps;         /* vertical scaler taps */
9244 };
9245 
9246 /**
9247  * dce8_dram_bandwidth - get the dram bandwidth
9248  *
9249  * @wm: watermark calculation data
9250  *
9251  * Calculate the raw dram bandwidth (CIK).
9252  * Used for display watermark bandwidth calculations
9253  * Returns the dram bandwidth in MBytes/s
9254  */
9255 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9256 {
9257 	/* Calculate raw DRAM Bandwidth */
9258 	fixed20_12 dram_efficiency; /* 0.7 */
9259 	fixed20_12 yclk, dram_channels, bandwidth;
9260 	fixed20_12 a;
9261 
9262 	a.full = dfixed_const(1000);
9263 	yclk.full = dfixed_const(wm->yclk);
9264 	yclk.full = dfixed_div(yclk, a);
9265 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9266 	a.full = dfixed_const(10);
9267 	dram_efficiency.full = dfixed_const(7);
9268 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9269 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9270 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9271 
9272 	return dfixed_trunc(bandwidth);
9273 }
9274 
9275 /**
9276  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9277  *
9278  * @wm: watermark calculation data
9279  *
9280  * Calculate the dram bandwidth used for display (CIK).
9281  * Used for display watermark bandwidth calculations
9282  * Returns the dram bandwidth for display in MBytes/s
9283  */
9284 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9285 {
9286 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9287 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9288 	fixed20_12 yclk, dram_channels, bandwidth;
9289 	fixed20_12 a;
9290 
9291 	a.full = dfixed_const(1000);
9292 	yclk.full = dfixed_const(wm->yclk);
9293 	yclk.full = dfixed_div(yclk, a);
9294 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9295 	a.full = dfixed_const(10);
9296 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9297 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9298 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9299 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9300 
9301 	return dfixed_trunc(bandwidth);
9302 }
9303 
9304 /**
9305  * dce8_data_return_bandwidth - get the data return bandwidth
9306  *
9307  * @wm: watermark calculation data
9308  *
9309  * Calculate the data return bandwidth used for display (CIK).
9310  * Used for display watermark bandwidth calculations
9311  * Returns the data return bandwidth in MBytes/s
9312  */
9313 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9314 {
9315 	/* Calculate the display Data return Bandwidth */
9316 	fixed20_12 return_efficiency; /* 0.8 */
9317 	fixed20_12 sclk, bandwidth;
9318 	fixed20_12 a;
9319 
9320 	a.full = dfixed_const(1000);
9321 	sclk.full = dfixed_const(wm->sclk);
9322 	sclk.full = dfixed_div(sclk, a);
9323 	a.full = dfixed_const(10);
9324 	return_efficiency.full = dfixed_const(8);
9325 	return_efficiency.full = dfixed_div(return_efficiency, a);
9326 	a.full = dfixed_const(32);
9327 	bandwidth.full = dfixed_mul(a, sclk);
9328 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9329 
9330 	return dfixed_trunc(bandwidth);
9331 }
9332 
9333 /**
9334  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9335  *
9336  * @wm: watermark calculation data
9337  *
9338  * Calculate the dmif bandwidth used for display (CIK).
9339  * Used for display watermark bandwidth calculations
9340  * Returns the dmif bandwidth in MBytes/s
9341  */
9342 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9343 {
9344 	/* Calculate the DMIF Request Bandwidth */
9345 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9346 	fixed20_12 disp_clk, bandwidth;
9347 	fixed20_12 a, b;
9348 
9349 	a.full = dfixed_const(1000);
9350 	disp_clk.full = dfixed_const(wm->disp_clk);
9351 	disp_clk.full = dfixed_div(disp_clk, a);
9352 	a.full = dfixed_const(32);
9353 	b.full = dfixed_mul(a, disp_clk);
9354 
9355 	a.full = dfixed_const(10);
9356 	disp_clk_request_efficiency.full = dfixed_const(8);
9357 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9358 
9359 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9360 
9361 	return dfixed_trunc(bandwidth);
9362 }
9363 
9364 /**
9365  * dce8_available_bandwidth - get the min available bandwidth
9366  *
9367  * @wm: watermark calculation data
9368  *
9369  * Calculate the min available bandwidth used for display (CIK).
9370  * Used for display watermark bandwidth calculations
9371  * Returns the min available bandwidth in MBytes/s
9372  */
9373 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9374 {
9375 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9376 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9377 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9378 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9379 
9380 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9381 }
9382 
9383 /**
9384  * dce8_average_bandwidth - get the average available bandwidth
9385  *
9386  * @wm: watermark calculation data
9387  *
9388  * Calculate the average available bandwidth used for display (CIK).
9389  * Used for display watermark bandwidth calculations
9390  * Returns the average available bandwidth in MBytes/s
9391  */
9392 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9393 {
9394 	/* Calculate the display mode Average Bandwidth
9395 	 * DisplayMode should contain the source and destination dimensions,
9396 	 * timing, etc.
9397 	 */
9398 	fixed20_12 bpp;
9399 	fixed20_12 line_time;
9400 	fixed20_12 src_width;
9401 	fixed20_12 bandwidth;
9402 	fixed20_12 a;
9403 
9404 	a.full = dfixed_const(1000);
9405 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9406 	line_time.full = dfixed_div(line_time, a);
9407 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9408 	src_width.full = dfixed_const(wm->src_width);
9409 	bandwidth.full = dfixed_mul(src_width, bpp);
9410 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9411 	bandwidth.full = dfixed_div(bandwidth, line_time);
9412 
9413 	return dfixed_trunc(bandwidth);
9414 }
9415 
9416 /**
9417  * dce8_latency_watermark - get the latency watermark
9418  *
9419  * @wm: watermark calculation data
9420  *
9421  * Calculate the latency watermark (CIK).
9422  * Used for display watermark bandwidth calculations
9423  * Returns the latency watermark in ns
9424  */
9425 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9426 {
9427 	/* First calculate the latency in ns */
9428 	u32 mc_latency = 2000; /* 2000 ns. */
9429 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9430 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9431 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9432 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9433 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9434 		(wm->num_heads * cursor_line_pair_return_time);
9435 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9436 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9437 	u32 tmp, dmif_size = 12288;
9438 	fixed20_12 a, b, c;
9439 
9440 	if (wm->num_heads == 0)
9441 		return 0;
9442 
9443 	a.full = dfixed_const(2);
9444 	b.full = dfixed_const(1);
9445 	if ((wm->vsc.full > a.full) ||
9446 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9447 	    (wm->vtaps >= 5) ||
9448 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9449 		max_src_lines_per_dst_line = 4;
9450 	else
9451 		max_src_lines_per_dst_line = 2;
9452 
9453 	a.full = dfixed_const(available_bandwidth);
9454 	b.full = dfixed_const(wm->num_heads);
9455 	a.full = dfixed_div(a, b);
9456 
9457 	b.full = dfixed_const(mc_latency + 512);
9458 	c.full = dfixed_const(wm->disp_clk);
9459 	b.full = dfixed_div(b, c);
9460 
9461 	c.full = dfixed_const(dmif_size);
9462 	b.full = dfixed_div(c, b);
9463 
9464 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9465 
9466 	b.full = dfixed_const(1000);
9467 	c.full = dfixed_const(wm->disp_clk);
9468 	b.full = dfixed_div(c, b);
9469 	c.full = dfixed_const(wm->bytes_per_pixel);
9470 	b.full = dfixed_mul(b, c);
9471 
9472 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9473 
9474 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9475 	b.full = dfixed_const(1000);
9476 	c.full = dfixed_const(lb_fill_bw);
9477 	b.full = dfixed_div(c, b);
9478 	a.full = dfixed_div(a, b);
9479 	line_fill_time = dfixed_trunc(a);
9480 
9481 	if (line_fill_time < wm->active_time)
9482 		return latency;
9483 	else
9484 		return latency + (line_fill_time - wm->active_time);
9485 
9486 }
9487 
9488 /**
9489  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9490  * average and available dram bandwidth
9491  *
9492  * @wm: watermark calculation data
9493  *
9494  * Check if the display average bandwidth fits in the display
9495  * dram bandwidth (CIK).
9496  * Used for display watermark bandwidth calculations
9497  * Returns true if the display fits, false if not.
9498  */
9499 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9500 {
9501 	if (dce8_average_bandwidth(wm) <=
9502 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9503 		return true;
9504 	else
9505 		return false;
9506 }
9507 
9508 /**
9509  * dce8_average_bandwidth_vs_available_bandwidth - check
9510  * average and available bandwidth
9511  *
9512  * @wm: watermark calculation data
9513  *
9514  * Check if the display average bandwidth fits in the display
9515  * available bandwidth (CIK).
9516  * Used for display watermark bandwidth calculations
9517  * Returns true if the display fits, false if not.
9518  */
9519 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9520 {
9521 	if (dce8_average_bandwidth(wm) <=
9522 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9523 		return true;
9524 	else
9525 		return false;
9526 }
9527 
9528 /**
9529  * dce8_check_latency_hiding - check latency hiding
9530  *
9531  * @wm: watermark calculation data
9532  *
9533  * Check latency hiding (CIK).
9534  * Used for display watermark bandwidth calculations
9535  * Returns true if the display fits, false if not.
9536  */
9537 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9538 {
9539 	u32 lb_partitions = wm->lb_size / wm->src_width;
9540 	u32 line_time = wm->active_time + wm->blank_time;
9541 	u32 latency_tolerant_lines;
9542 	u32 latency_hiding;
9543 	fixed20_12 a;
9544 
9545 	a.full = dfixed_const(1);
9546 	if (wm->vsc.full > a.full)
9547 		latency_tolerant_lines = 1;
9548 	else {
9549 		if (lb_partitions <= (wm->vtaps + 1))
9550 			latency_tolerant_lines = 1;
9551 		else
9552 			latency_tolerant_lines = 2;
9553 	}
9554 
9555 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9556 
9557 	if (dce8_latency_watermark(wm) <= latency_hiding)
9558 		return true;
9559 	else
9560 		return false;
9561 }
9562 
9563 /**
9564  * dce8_program_watermarks - program display watermarks
9565  *
9566  * @rdev: radeon_device pointer
9567  * @radeon_crtc: the selected display controller
9568  * @lb_size: line buffer size
9569  * @num_heads: number of display controllers in use
9570  *
9571  * Calculate and program the display watermarks for the
9572  * selected display controller (CIK).
9573  */
9574 static void dce8_program_watermarks(struct radeon_device *rdev,
9575 				    struct radeon_crtc *radeon_crtc,
9576 				    u32 lb_size, u32 num_heads)
9577 {
9578 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9579 	struct dce8_wm_params wm_low, wm_high;
9580 	u32 pixel_period;
9581 	u32 line_time = 0;
9582 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9583 	u32 tmp, wm_mask;
9584 
9585 	if (radeon_crtc->base.enabled && num_heads && mode) {
9586 		pixel_period = 1000000 / (u32)mode->clock;
9587 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9588 
9589 		/* watermark for high clocks */
9590 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9591 		    rdev->pm.dpm_enabled) {
9592 			wm_high.yclk =
9593 				radeon_dpm_get_mclk(rdev, false) * 10;
9594 			wm_high.sclk =
9595 				radeon_dpm_get_sclk(rdev, false) * 10;
9596 		} else {
9597 			wm_high.yclk = rdev->pm.current_mclk * 10;
9598 			wm_high.sclk = rdev->pm.current_sclk * 10;
9599 		}
9600 
9601 		wm_high.disp_clk = mode->clock;
9602 		wm_high.src_width = mode->crtc_hdisplay;
9603 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9604 		wm_high.blank_time = line_time - wm_high.active_time;
9605 		wm_high.interlaced = false;
9606 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9607 			wm_high.interlaced = true;
9608 		wm_high.vsc = radeon_crtc->vsc;
9609 		wm_high.vtaps = 1;
9610 		if (radeon_crtc->rmx_type != RMX_OFF)
9611 			wm_high.vtaps = 2;
9612 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9613 		wm_high.lb_size = lb_size;
9614 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9615 		wm_high.num_heads = num_heads;
9616 
9617 		/* set for high clocks */
9618 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9619 
9620 		/* possibly force display priority to high */
9621 		/* should really do this at mode validation time... */
9622 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9623 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9624 		    !dce8_check_latency_hiding(&wm_high) ||
9625 		    (rdev->disp_priority == 2)) {
9626 			DRM_DEBUG_KMS("force priority to high\n");
9627 		}
9628 
9629 		/* watermark for low clocks */
9630 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9631 		    rdev->pm.dpm_enabled) {
9632 			wm_low.yclk =
9633 				radeon_dpm_get_mclk(rdev, true) * 10;
9634 			wm_low.sclk =
9635 				radeon_dpm_get_sclk(rdev, true) * 10;
9636 		} else {
9637 			wm_low.yclk = rdev->pm.current_mclk * 10;
9638 			wm_low.sclk = rdev->pm.current_sclk * 10;
9639 		}
9640 
9641 		wm_low.disp_clk = mode->clock;
9642 		wm_low.src_width = mode->crtc_hdisplay;
9643 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9644 		wm_low.blank_time = line_time - wm_low.active_time;
9645 		wm_low.interlaced = false;
9646 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9647 			wm_low.interlaced = true;
9648 		wm_low.vsc = radeon_crtc->vsc;
9649 		wm_low.vtaps = 1;
9650 		if (radeon_crtc->rmx_type != RMX_OFF)
9651 			wm_low.vtaps = 2;
9652 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9653 		wm_low.lb_size = lb_size;
9654 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9655 		wm_low.num_heads = num_heads;
9656 
9657 		/* set for low clocks */
9658 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9659 
9660 		/* possibly force display priority to high */
9661 		/* should really do this at mode validation time... */
9662 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9663 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9664 		    !dce8_check_latency_hiding(&wm_low) ||
9665 		    (rdev->disp_priority == 2)) {
9666 			DRM_DEBUG_KMS("force priority to high\n");
9667 		}
9668 
9669 		/* Save number of lines the linebuffer leads before the scanout */
9670 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9671 	}
9672 
9673 	/* select wm A */
9674 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9675 	tmp = wm_mask;
9676 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9677 	tmp |= LATENCY_WATERMARK_MASK(1);
9678 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9679 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9680 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9681 		LATENCY_HIGH_WATERMARK(line_time)));
9682 	/* select wm B */
9683 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9684 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9685 	tmp |= LATENCY_WATERMARK_MASK(2);
9686 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9687 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9688 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9689 		LATENCY_HIGH_WATERMARK(line_time)));
9690 	/* restore original selection */
9691 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9692 
9693 	/* save values for DPM */
9694 	radeon_crtc->line_time = line_time;
9695 	radeon_crtc->wm_high = latency_watermark_a;
9696 	radeon_crtc->wm_low = latency_watermark_b;
9697 }
9698 
9699 /**
9700  * dce8_bandwidth_update - program display watermarks
9701  *
9702  * @rdev: radeon_device pointer
9703  *
9704  * Calculate and program the display watermarks and line
9705  * buffer allocation (CIK).
9706  */
9707 void dce8_bandwidth_update(struct radeon_device *rdev)
9708 {
9709 	struct drm_display_mode *mode = NULL;
9710 	u32 num_heads = 0, lb_size;
9711 	int i;
9712 
9713 	if (!rdev->mode_info.mode_config_initialized)
9714 		return;
9715 
9716 	radeon_update_display_priority(rdev);
9717 
9718 	for (i = 0; i < rdev->num_crtc; i++) {
9719 		if (rdev->mode_info.crtcs[i]->base.enabled)
9720 			num_heads++;
9721 	}
9722 	for (i = 0; i < rdev->num_crtc; i++) {
9723 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9724 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9725 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9726 	}
9727 }
9728 
9729 /**
9730  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9731  *
9732  * @rdev: radeon_device pointer
9733  *
9734  * Fetches a GPU clock counter snapshot (SI).
9735  * Returns the 64 bit clock counter snapshot.
9736  */
9737 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9738 {
9739 	uint64_t clock;
9740 
9741 	mutex_lock(&rdev->gpu_clock_mutex);
9742 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9743 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9744 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9745 	mutex_unlock(&rdev->gpu_clock_mutex);
9746 	return clock;
9747 }
9748 
9749 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9750                               u32 cntl_reg, u32 status_reg)
9751 {
9752 	int r, i;
9753 	struct atom_clock_dividers dividers;
9754 	uint32_t tmp;
9755 
9756 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9757 					   clock, false, &dividers);
9758 	if (r)
9759 		return r;
9760 
9761 	tmp = RREG32_SMC(cntl_reg);
9762 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9763 	tmp |= dividers.post_divider;
9764 	WREG32_SMC(cntl_reg, tmp);
9765 
9766 	for (i = 0; i < 100; i++) {
9767 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9768 			break;
9769 		mdelay(10);
9770 	}
9771 	if (i == 100)
9772 		return -ETIMEDOUT;
9773 
9774 	return 0;
9775 }
9776 
9777 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9778 {
9779 	int r = 0;
9780 
9781 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9782 	if (r)
9783 		return r;
9784 
9785 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9786 	return r;
9787 }
9788 
9789 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9790 {
9791 	int r, i;
9792 	struct atom_clock_dividers dividers;
9793 	u32 tmp;
9794 
9795 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9796 					   ecclk, false, &dividers);
9797 	if (r)
9798 		return r;
9799 
9800 	for (i = 0; i < 100; i++) {
9801 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9802 			break;
9803 		mdelay(10);
9804 	}
9805 	if (i == 100)
9806 		return -ETIMEDOUT;
9807 
9808 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9809 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9810 	tmp |= dividers.post_divider;
9811 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9812 
9813 	for (i = 0; i < 100; i++) {
9814 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9815 			break;
9816 		mdelay(10);
9817 	}
9818 	if (i == 100)
9819 		return -ETIMEDOUT;
9820 
9821 	return 0;
9822 }
9823 
9824 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9825 {
9826 #ifndef __NetBSD__		/* XXX radeon pcie */
9827 	struct pci_dev *root = rdev->pdev->bus->self;
9828 	int bridge_pos, gpu_pos;
9829 	u32 speed_cntl, mask, current_data_rate;
9830 	int ret, i;
9831 	u16 tmp16;
9832 
9833 	if (pci_is_root_bus(rdev->pdev->bus))
9834 		return;
9835 
9836 	if (radeon_pcie_gen2 == 0)
9837 		return;
9838 
9839 	if (rdev->flags & RADEON_IS_IGP)
9840 		return;
9841 
9842 	if (!(rdev->flags & RADEON_IS_PCIE))
9843 		return;
9844 
9845 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9846 	if (ret != 0)
9847 		return;
9848 
9849 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9850 		return;
9851 
9852 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9853 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9854 		LC_CURRENT_DATA_RATE_SHIFT;
9855 	if (mask & DRM_PCIE_SPEED_80) {
9856 		if (current_data_rate == 2) {
9857 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9858 			return;
9859 		}
9860 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9861 	} else if (mask & DRM_PCIE_SPEED_50) {
9862 		if (current_data_rate == 1) {
9863 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9864 			return;
9865 		}
9866 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9867 	}
9868 
9869 	bridge_pos = pci_pcie_cap(root);
9870 	if (!bridge_pos)
9871 		return;
9872 
9873 	gpu_pos = pci_pcie_cap(rdev->pdev);
9874 	if (!gpu_pos)
9875 		return;
9876 
9877 	if (mask & DRM_PCIE_SPEED_80) {
9878 		/* re-try equalization if gen3 is not already enabled */
9879 		if (current_data_rate != 2) {
9880 			u16 bridge_cfg, gpu_cfg;
9881 			u16 bridge_cfg2, gpu_cfg2;
9882 			u32 max_lw, current_lw, tmp;
9883 
9884 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9885 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9886 
9887 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9888 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9889 
9890 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9891 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9892 
9893 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9894 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9895 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9896 
9897 			if (current_lw < max_lw) {
9898 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9899 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9900 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9901 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9902 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9903 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9904 				}
9905 			}
9906 
9907 			for (i = 0; i < 10; i++) {
9908 				/* check status */
9909 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9910 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9911 					break;
9912 
9913 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9914 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9915 
9916 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9917 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9918 
9919 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9920 				tmp |= LC_SET_QUIESCE;
9921 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9922 
9923 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9924 				tmp |= LC_REDO_EQ;
9925 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9926 
9927 				mdelay(100);
9928 
9929 				/* linkctl */
9930 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9931 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9932 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9933 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9934 
9935 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9936 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9937 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9938 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9939 
9940 				/* linkctl2 */
9941 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9942 				tmp16 &= ~((1 << 4) | (7 << 9));
9943 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9944 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9945 
9946 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9947 				tmp16 &= ~((1 << 4) | (7 << 9));
9948 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9949 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9950 
9951 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9952 				tmp &= ~LC_SET_QUIESCE;
9953 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9954 			}
9955 		}
9956 	}
9957 
9958 	/* set the link speed */
9959 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9960 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9961 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9962 
9963 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9964 	tmp16 &= ~0xf;
9965 	if (mask & DRM_PCIE_SPEED_80)
9966 		tmp16 |= 3; /* gen3 */
9967 	else if (mask & DRM_PCIE_SPEED_50)
9968 		tmp16 |= 2; /* gen2 */
9969 	else
9970 		tmp16 |= 1; /* gen1 */
9971 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9972 
9973 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9974 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9975 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9976 
9977 	for (i = 0; i < rdev->usec_timeout; i++) {
9978 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9979 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9980 			break;
9981 		udelay(1);
9982 	}
9983 #endif
9984 }
9985 
9986 static void cik_program_aspm(struct radeon_device *rdev)
9987 {
9988 	u32 data, orig;
9989 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9990 	bool disable_clkreq = false;
9991 
9992 	if (radeon_aspm == 0)
9993 		return;
9994 
9995 	/* XXX double check IGPs */
9996 	if (rdev->flags & RADEON_IS_IGP)
9997 		return;
9998 
9999 	if (!(rdev->flags & RADEON_IS_PCIE))
10000 		return;
10001 
10002 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10003 	data &= ~LC_XMIT_N_FTS_MASK;
10004 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
10005 	if (orig != data)
10006 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
10007 
10008 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
10009 	data |= LC_GO_TO_RECOVERY;
10010 	if (orig != data)
10011 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
10012 
10013 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
10014 	data |= P_IGNORE_EDB_ERR;
10015 	if (orig != data)
10016 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
10017 
10018 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10019 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
10020 	data |= LC_PMI_TO_L1_DIS;
10021 	if (!disable_l0s)
10022 		data |= LC_L0S_INACTIVITY(7);
10023 
10024 	if (!disable_l1) {
10025 		data |= LC_L1_INACTIVITY(7);
10026 		data &= ~LC_PMI_TO_L1_DIS;
10027 		if (orig != data)
10028 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10029 
10030 		if (!disable_plloff_in_l1) {
10031 			bool clk_req_support;
10032 
10033 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
10034 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10035 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10036 			if (orig != data)
10037 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
10038 
10039 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
10040 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10041 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10042 			if (orig != data)
10043 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10044 
10045 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10046 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10047 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10048 			if (orig != data)
10049 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10050 
10051 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10052 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10053 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10054 			if (orig != data)
10055 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10056 
10057 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10058 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10059 			data |= LC_DYN_LANES_PWR_STATE(3);
10060 			if (orig != data)
10061 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10062 
10063 			if (!disable_clkreq &&
10064 			    !pci_is_root_bus(rdev->pdev->bus)) {
10065 #ifndef __NetBSD__		/* XXX radeon pcie */
10066 				struct pci_dev *root = rdev->pdev->bus->self;
10067 				u32 lnkcap;
10068 #endif
10069 
10070 				clk_req_support = false;
10071 #ifndef __NetBSD__		/* XXX radeon pcie */
10072 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10073 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10074 					clk_req_support = true;
10075 #endif
10076 			} else {
10077 				clk_req_support = false;
10078 			}
10079 
10080 			if (clk_req_support) {
10081 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10082 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10083 				if (orig != data)
10084 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10085 
10086 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10087 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10088 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10089 				if (orig != data)
10090 					WREG32_SMC(THM_CLK_CNTL, data);
10091 
10092 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10093 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10094 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10095 				if (orig != data)
10096 					WREG32_SMC(MISC_CLK_CTRL, data);
10097 
10098 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10099 				data &= ~BCLK_AS_XCLK;
10100 				if (orig != data)
10101 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10102 
10103 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10104 				data &= ~FORCE_BIF_REFCLK_EN;
10105 				if (orig != data)
10106 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10107 
10108 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10109 				data &= ~MPLL_CLKOUT_SEL_MASK;
10110 				data |= MPLL_CLKOUT_SEL(4);
10111 				if (orig != data)
10112 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10113 			}
10114 		}
10115 	} else {
10116 		if (orig != data)
10117 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10118 	}
10119 
10120 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10121 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10122 	if (orig != data)
10123 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10124 
10125 	if (!disable_l0s) {
10126 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10127 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10128 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10129 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10130 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10131 				data &= ~LC_L0S_INACTIVITY_MASK;
10132 				if (orig != data)
10133 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10134 			}
10135 		}
10136 	}
10137 }
10138