xref: /dflybsd-src/sys/dev/drm/radeon/cik.c (revision 081e4509d0b74e3f878e66bfcd9c6a5d4555b09a)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 #include "radeon_kfd.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54 
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73 
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80 
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88 
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95 
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102 
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109 
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116 
117 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
118 static void cik_rlc_stop(struct radeon_device *rdev);
119 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
120 static void cik_program_aspm(struct radeon_device *rdev);
121 static void cik_init_pg(struct radeon_device *rdev);
122 static void cik_init_cg(struct radeon_device *rdev);
123 static void cik_fini_pg(struct radeon_device *rdev);
124 static void cik_fini_cg(struct radeon_device *rdev);
125 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
126 					  bool enable);
127 
128 /* get temperature in millidegrees */
129 int ci_get_temp(struct radeon_device *rdev)
130 {
131 	u32 temp;
132 	int actual_temp = 0;
133 
134 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
135 		CTF_TEMP_SHIFT;
136 
137 	if (temp & 0x200)
138 		actual_temp = 255;
139 	else
140 		actual_temp = temp & 0x1ff;
141 
142 	actual_temp = actual_temp * 1000;
143 
144 	return actual_temp;
145 }
146 
147 /* get temperature in millidegrees */
148 int kv_get_temp(struct radeon_device *rdev)
149 {
150 	u32 temp;
151 	int actual_temp = 0;
152 
153 	temp = RREG32_SMC(0xC0300E0C);
154 
155 	if (temp)
156 		actual_temp = (temp / 8) - 49;
157 	else
158 		actual_temp = 0;
159 
160 	actual_temp = actual_temp * 1000;
161 
162 	return actual_temp;
163 }
164 
165 /*
166  * Indirect registers accessor
167  */
168 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
169 {
170 	u32 r;
171 
172 	spin_lock(&rdev->pciep_idx_lock);
173 	WREG32(PCIE_INDEX, reg);
174 	(void)RREG32(PCIE_INDEX);
175 	r = RREG32(PCIE_DATA);
176 	spin_unlock(&rdev->pciep_idx_lock);
177 	return r;
178 }
179 
180 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
181 {
182 	spin_lock(&rdev->pciep_idx_lock);
183 	WREG32(PCIE_INDEX, reg);
184 	(void)RREG32(PCIE_INDEX);
185 	WREG32(PCIE_DATA, v);
186 	(void)RREG32(PCIE_DATA);
187 	spin_unlock(&rdev->pciep_idx_lock);
188 }
189 
190 static const u32 spectre_rlc_save_restore_register_list[] =
191 {
192 	(0x0e00 << 16) | (0xc12c >> 2),
193 	0x00000000,
194 	(0x0e00 << 16) | (0xc140 >> 2),
195 	0x00000000,
196 	(0x0e00 << 16) | (0xc150 >> 2),
197 	0x00000000,
198 	(0x0e00 << 16) | (0xc15c >> 2),
199 	0x00000000,
200 	(0x0e00 << 16) | (0xc168 >> 2),
201 	0x00000000,
202 	(0x0e00 << 16) | (0xc170 >> 2),
203 	0x00000000,
204 	(0x0e00 << 16) | (0xc178 >> 2),
205 	0x00000000,
206 	(0x0e00 << 16) | (0xc204 >> 2),
207 	0x00000000,
208 	(0x0e00 << 16) | (0xc2b4 >> 2),
209 	0x00000000,
210 	(0x0e00 << 16) | (0xc2b8 >> 2),
211 	0x00000000,
212 	(0x0e00 << 16) | (0xc2bc >> 2),
213 	0x00000000,
214 	(0x0e00 << 16) | (0xc2c0 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0x8228 >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0x829c >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0x869c >> 2),
221 	0x00000000,
222 	(0x0600 << 16) | (0x98f4 >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0x98f8 >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0x9900 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc260 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0x90e8 >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0x3c000 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0x3c00c >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0x8c1c >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0x9700 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xcd20 >> 2),
241 	0x00000000,
242 	(0x4e00 << 16) | (0xcd20 >> 2),
243 	0x00000000,
244 	(0x5e00 << 16) | (0xcd20 >> 2),
245 	0x00000000,
246 	(0x6e00 << 16) | (0xcd20 >> 2),
247 	0x00000000,
248 	(0x7e00 << 16) | (0xcd20 >> 2),
249 	0x00000000,
250 	(0x8e00 << 16) | (0xcd20 >> 2),
251 	0x00000000,
252 	(0x9e00 << 16) | (0xcd20 >> 2),
253 	0x00000000,
254 	(0xae00 << 16) | (0xcd20 >> 2),
255 	0x00000000,
256 	(0xbe00 << 16) | (0xcd20 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0x89bc >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0x8900 >> 2),
261 	0x00000000,
262 	0x3,
263 	(0x0e00 << 16) | (0xc130 >> 2),
264 	0x00000000,
265 	(0x0e00 << 16) | (0xc134 >> 2),
266 	0x00000000,
267 	(0x0e00 << 16) | (0xc1fc >> 2),
268 	0x00000000,
269 	(0x0e00 << 16) | (0xc208 >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc264 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc268 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc26c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc270 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc274 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc278 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc27c >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc280 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc284 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc288 >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc28c >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0xc290 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0xc294 >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0xc298 >> 2),
298 	0x00000000,
299 	(0x0e00 << 16) | (0xc29c >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0xc2a0 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0xc2a4 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc2a8 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0xc2ac  >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0xc2b0 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x301d0 >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x30238 >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x30250 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0x30254 >> 2),
318 	0x00000000,
319 	(0x0e00 << 16) | (0x30258 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0x3025c >> 2),
322 	0x00000000,
323 	(0x4e00 << 16) | (0xc900 >> 2),
324 	0x00000000,
325 	(0x5e00 << 16) | (0xc900 >> 2),
326 	0x00000000,
327 	(0x6e00 << 16) | (0xc900 >> 2),
328 	0x00000000,
329 	(0x7e00 << 16) | (0xc900 >> 2),
330 	0x00000000,
331 	(0x8e00 << 16) | (0xc900 >> 2),
332 	0x00000000,
333 	(0x9e00 << 16) | (0xc900 >> 2),
334 	0x00000000,
335 	(0xae00 << 16) | (0xc900 >> 2),
336 	0x00000000,
337 	(0xbe00 << 16) | (0xc900 >> 2),
338 	0x00000000,
339 	(0x4e00 << 16) | (0xc904 >> 2),
340 	0x00000000,
341 	(0x5e00 << 16) | (0xc904 >> 2),
342 	0x00000000,
343 	(0x6e00 << 16) | (0xc904 >> 2),
344 	0x00000000,
345 	(0x7e00 << 16) | (0xc904 >> 2),
346 	0x00000000,
347 	(0x8e00 << 16) | (0xc904 >> 2),
348 	0x00000000,
349 	(0x9e00 << 16) | (0xc904 >> 2),
350 	0x00000000,
351 	(0xae00 << 16) | (0xc904 >> 2),
352 	0x00000000,
353 	(0xbe00 << 16) | (0xc904 >> 2),
354 	0x00000000,
355 	(0x4e00 << 16) | (0xc908 >> 2),
356 	0x00000000,
357 	(0x5e00 << 16) | (0xc908 >> 2),
358 	0x00000000,
359 	(0x6e00 << 16) | (0xc908 >> 2),
360 	0x00000000,
361 	(0x7e00 << 16) | (0xc908 >> 2),
362 	0x00000000,
363 	(0x8e00 << 16) | (0xc908 >> 2),
364 	0x00000000,
365 	(0x9e00 << 16) | (0xc908 >> 2),
366 	0x00000000,
367 	(0xae00 << 16) | (0xc908 >> 2),
368 	0x00000000,
369 	(0xbe00 << 16) | (0xc908 >> 2),
370 	0x00000000,
371 	(0x4e00 << 16) | (0xc90c >> 2),
372 	0x00000000,
373 	(0x5e00 << 16) | (0xc90c >> 2),
374 	0x00000000,
375 	(0x6e00 << 16) | (0xc90c >> 2),
376 	0x00000000,
377 	(0x7e00 << 16) | (0xc90c >> 2),
378 	0x00000000,
379 	(0x8e00 << 16) | (0xc90c >> 2),
380 	0x00000000,
381 	(0x9e00 << 16) | (0xc90c >> 2),
382 	0x00000000,
383 	(0xae00 << 16) | (0xc90c >> 2),
384 	0x00000000,
385 	(0xbe00 << 16) | (0xc90c >> 2),
386 	0x00000000,
387 	(0x4e00 << 16) | (0xc910 >> 2),
388 	0x00000000,
389 	(0x5e00 << 16) | (0xc910 >> 2),
390 	0x00000000,
391 	(0x6e00 << 16) | (0xc910 >> 2),
392 	0x00000000,
393 	(0x7e00 << 16) | (0xc910 >> 2),
394 	0x00000000,
395 	(0x8e00 << 16) | (0xc910 >> 2),
396 	0x00000000,
397 	(0x9e00 << 16) | (0xc910 >> 2),
398 	0x00000000,
399 	(0xae00 << 16) | (0xc910 >> 2),
400 	0x00000000,
401 	(0xbe00 << 16) | (0xc910 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0xc99c >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0x9834 >> 2),
406 	0x00000000,
407 	(0x0000 << 16) | (0x30f00 >> 2),
408 	0x00000000,
409 	(0x0001 << 16) | (0x30f00 >> 2),
410 	0x00000000,
411 	(0x0000 << 16) | (0x30f04 >> 2),
412 	0x00000000,
413 	(0x0001 << 16) | (0x30f04 >> 2),
414 	0x00000000,
415 	(0x0000 << 16) | (0x30f08 >> 2),
416 	0x00000000,
417 	(0x0001 << 16) | (0x30f08 >> 2),
418 	0x00000000,
419 	(0x0000 << 16) | (0x30f0c >> 2),
420 	0x00000000,
421 	(0x0001 << 16) | (0x30f0c >> 2),
422 	0x00000000,
423 	(0x0600 << 16) | (0x9b7c >> 2),
424 	0x00000000,
425 	(0x0e00 << 16) | (0x8a14 >> 2),
426 	0x00000000,
427 	(0x0e00 << 16) | (0x8a18 >> 2),
428 	0x00000000,
429 	(0x0600 << 16) | (0x30a00 >> 2),
430 	0x00000000,
431 	(0x0e00 << 16) | (0x8bf0 >> 2),
432 	0x00000000,
433 	(0x0e00 << 16) | (0x8bcc >> 2),
434 	0x00000000,
435 	(0x0e00 << 16) | (0x8b24 >> 2),
436 	0x00000000,
437 	(0x0e00 << 16) | (0x30a04 >> 2),
438 	0x00000000,
439 	(0x0600 << 16) | (0x30a10 >> 2),
440 	0x00000000,
441 	(0x0600 << 16) | (0x30a14 >> 2),
442 	0x00000000,
443 	(0x0600 << 16) | (0x30a18 >> 2),
444 	0x00000000,
445 	(0x0600 << 16) | (0x30a2c >> 2),
446 	0x00000000,
447 	(0x0e00 << 16) | (0xc700 >> 2),
448 	0x00000000,
449 	(0x0e00 << 16) | (0xc704 >> 2),
450 	0x00000000,
451 	(0x0e00 << 16) | (0xc708 >> 2),
452 	0x00000000,
453 	(0x0e00 << 16) | (0xc768 >> 2),
454 	0x00000000,
455 	(0x0400 << 16) | (0xc770 >> 2),
456 	0x00000000,
457 	(0x0400 << 16) | (0xc774 >> 2),
458 	0x00000000,
459 	(0x0400 << 16) | (0xc778 >> 2),
460 	0x00000000,
461 	(0x0400 << 16) | (0xc77c >> 2),
462 	0x00000000,
463 	(0x0400 << 16) | (0xc780 >> 2),
464 	0x00000000,
465 	(0x0400 << 16) | (0xc784 >> 2),
466 	0x00000000,
467 	(0x0400 << 16) | (0xc788 >> 2),
468 	0x00000000,
469 	(0x0400 << 16) | (0xc78c >> 2),
470 	0x00000000,
471 	(0x0400 << 16) | (0xc798 >> 2),
472 	0x00000000,
473 	(0x0400 << 16) | (0xc79c >> 2),
474 	0x00000000,
475 	(0x0400 << 16) | (0xc7a0 >> 2),
476 	0x00000000,
477 	(0x0400 << 16) | (0xc7a4 >> 2),
478 	0x00000000,
479 	(0x0400 << 16) | (0xc7a8 >> 2),
480 	0x00000000,
481 	(0x0400 << 16) | (0xc7ac >> 2),
482 	0x00000000,
483 	(0x0400 << 16) | (0xc7b0 >> 2),
484 	0x00000000,
485 	(0x0400 << 16) | (0xc7b4 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x9100 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x3c010 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0x92a8 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0x92ac >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0x92b4 >> 2),
496 	0x00000000,
497 	(0x0e00 << 16) | (0x92b8 >> 2),
498 	0x00000000,
499 	(0x0e00 << 16) | (0x92bc >> 2),
500 	0x00000000,
501 	(0x0e00 << 16) | (0x92c0 >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0x92c4 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0x92c8 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0x92cc >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0x92d0 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0x8c00 >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x8c04 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x8c20 >> 2),
516 	0x00000000,
517 	(0x0e00 << 16) | (0x8c38 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x8c3c >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0xae00 >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x9604 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0xac08 >> 2),
526 	0x00000000,
527 	(0x0e00 << 16) | (0xac0c >> 2),
528 	0x00000000,
529 	(0x0e00 << 16) | (0xac10 >> 2),
530 	0x00000000,
531 	(0x0e00 << 16) | (0xac14 >> 2),
532 	0x00000000,
533 	(0x0e00 << 16) | (0xac58 >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0xac68 >> 2),
536 	0x00000000,
537 	(0x0e00 << 16) | (0xac6c >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0xac70 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xac74 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0xac78 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0xac7c >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0xac80 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0xac84 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0xac88 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0xac8c >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x970c >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x9714 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x9718 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x971c >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x31068 >> 2),
564 	0x00000000,
565 	(0x4e00 << 16) | (0x31068 >> 2),
566 	0x00000000,
567 	(0x5e00 << 16) | (0x31068 >> 2),
568 	0x00000000,
569 	(0x6e00 << 16) | (0x31068 >> 2),
570 	0x00000000,
571 	(0x7e00 << 16) | (0x31068 >> 2),
572 	0x00000000,
573 	(0x8e00 << 16) | (0x31068 >> 2),
574 	0x00000000,
575 	(0x9e00 << 16) | (0x31068 >> 2),
576 	0x00000000,
577 	(0xae00 << 16) | (0x31068 >> 2),
578 	0x00000000,
579 	(0xbe00 << 16) | (0x31068 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0xcd10 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0xcd14 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x88b0 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x88b4 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x88b8 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x88bc >> 2),
592 	0x00000000,
593 	(0x0400 << 16) | (0x89c0 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x88c4 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x88c8 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x88d0 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0x88d4 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0x88d8 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0x8980 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x30938 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0x3093c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0x30940 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x89a0 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x30900 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0x30904 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x89b4 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x3c210 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0x3c214 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0x3c218 >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0x8904 >> 2),
628 	0x00000000,
629 	0x5,
630 	(0x0e00 << 16) | (0x8c28 >> 2),
631 	(0x0e00 << 16) | (0x8c2c >> 2),
632 	(0x0e00 << 16) | (0x8c30 >> 2),
633 	(0x0e00 << 16) | (0x8c34 >> 2),
634 	(0x0e00 << 16) | (0x9600 >> 2),
635 };
636 
637 static const u32 kalindi_rlc_save_restore_register_list[] =
638 {
639 	(0x0e00 << 16) | (0xc12c >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0xc140 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0xc150 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0xc15c >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0xc168 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0xc170 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0xc204 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0xc2b4 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0xc2b8 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0xc2bc >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0xc2c0 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x8228 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x829c >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x869c >> 2),
666 	0x00000000,
667 	(0x0600 << 16) | (0x98f4 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x98f8 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x9900 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc260 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x90e8 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x3c000 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x3c00c >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x8c1c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x9700 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xcd20 >> 2),
686 	0x00000000,
687 	(0x4e00 << 16) | (0xcd20 >> 2),
688 	0x00000000,
689 	(0x5e00 << 16) | (0xcd20 >> 2),
690 	0x00000000,
691 	(0x6e00 << 16) | (0xcd20 >> 2),
692 	0x00000000,
693 	(0x7e00 << 16) | (0xcd20 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x89bc >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x8900 >> 2),
698 	0x00000000,
699 	0x3,
700 	(0x0e00 << 16) | (0xc130 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0xc134 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0xc1fc >> 2),
705 	0x00000000,
706 	(0x0e00 << 16) | (0xc208 >> 2),
707 	0x00000000,
708 	(0x0e00 << 16) | (0xc264 >> 2),
709 	0x00000000,
710 	(0x0e00 << 16) | (0xc268 >> 2),
711 	0x00000000,
712 	(0x0e00 << 16) | (0xc26c >> 2),
713 	0x00000000,
714 	(0x0e00 << 16) | (0xc270 >> 2),
715 	0x00000000,
716 	(0x0e00 << 16) | (0xc274 >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc28c >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc290 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc294 >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc298 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc2a0 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc2a4 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2a8 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2ac >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0x301d0 >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0x30238 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x30250 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x30254 >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x30258 >> 2),
743 	0x00000000,
744 	(0x0e00 << 16) | (0x3025c >> 2),
745 	0x00000000,
746 	(0x4e00 << 16) | (0xc900 >> 2),
747 	0x00000000,
748 	(0x5e00 << 16) | (0xc900 >> 2),
749 	0x00000000,
750 	(0x6e00 << 16) | (0xc900 >> 2),
751 	0x00000000,
752 	(0x7e00 << 16) | (0xc900 >> 2),
753 	0x00000000,
754 	(0x4e00 << 16) | (0xc904 >> 2),
755 	0x00000000,
756 	(0x5e00 << 16) | (0xc904 >> 2),
757 	0x00000000,
758 	(0x6e00 << 16) | (0xc904 >> 2),
759 	0x00000000,
760 	(0x7e00 << 16) | (0xc904 >> 2),
761 	0x00000000,
762 	(0x4e00 << 16) | (0xc908 >> 2),
763 	0x00000000,
764 	(0x5e00 << 16) | (0xc908 >> 2),
765 	0x00000000,
766 	(0x6e00 << 16) | (0xc908 >> 2),
767 	0x00000000,
768 	(0x7e00 << 16) | (0xc908 >> 2),
769 	0x00000000,
770 	(0x4e00 << 16) | (0xc90c >> 2),
771 	0x00000000,
772 	(0x5e00 << 16) | (0xc90c >> 2),
773 	0x00000000,
774 	(0x6e00 << 16) | (0xc90c >> 2),
775 	0x00000000,
776 	(0x7e00 << 16) | (0xc90c >> 2),
777 	0x00000000,
778 	(0x4e00 << 16) | (0xc910 >> 2),
779 	0x00000000,
780 	(0x5e00 << 16) | (0xc910 >> 2),
781 	0x00000000,
782 	(0x6e00 << 16) | (0xc910 >> 2),
783 	0x00000000,
784 	(0x7e00 << 16) | (0xc910 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc99c >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0x9834 >> 2),
789 	0x00000000,
790 	(0x0000 << 16) | (0x30f00 >> 2),
791 	0x00000000,
792 	(0x0000 << 16) | (0x30f04 >> 2),
793 	0x00000000,
794 	(0x0000 << 16) | (0x30f08 >> 2),
795 	0x00000000,
796 	(0x0000 << 16) | (0x30f0c >> 2),
797 	0x00000000,
798 	(0x0600 << 16) | (0x9b7c >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x8a14 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x8a18 >> 2),
803 	0x00000000,
804 	(0x0600 << 16) | (0x30a00 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0x8bf0 >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0x8bcc >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0x8b24 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0x30a04 >> 2),
813 	0x00000000,
814 	(0x0600 << 16) | (0x30a10 >> 2),
815 	0x00000000,
816 	(0x0600 << 16) | (0x30a14 >> 2),
817 	0x00000000,
818 	(0x0600 << 16) | (0x30a18 >> 2),
819 	0x00000000,
820 	(0x0600 << 16) | (0x30a2c >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0xc700 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0xc704 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0xc708 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0xc768 >> 2),
829 	0x00000000,
830 	(0x0400 << 16) | (0xc770 >> 2),
831 	0x00000000,
832 	(0x0400 << 16) | (0xc774 >> 2),
833 	0x00000000,
834 	(0x0400 << 16) | (0xc798 >> 2),
835 	0x00000000,
836 	(0x0400 << 16) | (0xc79c >> 2),
837 	0x00000000,
838 	(0x0e00 << 16) | (0x9100 >> 2),
839 	0x00000000,
840 	(0x0e00 << 16) | (0x3c010 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0x8c00 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0x8c04 >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x8c20 >> 2),
847 	0x00000000,
848 	(0x0e00 << 16) | (0x8c38 >> 2),
849 	0x00000000,
850 	(0x0e00 << 16) | (0x8c3c >> 2),
851 	0x00000000,
852 	(0x0e00 << 16) | (0xae00 >> 2),
853 	0x00000000,
854 	(0x0e00 << 16) | (0x9604 >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0xac08 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0xac0c >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0xac10 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0xac14 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0xac58 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0xac68 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0xac6c >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0xac70 >> 2),
871 	0x00000000,
872 	(0x0e00 << 16) | (0xac74 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0xac78 >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0xac7c >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xac80 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xac84 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0xac88 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0xac8c >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0x970c >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x9714 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x9718 >> 2),
891 	0x00000000,
892 	(0x0e00 << 16) | (0x971c >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x31068 >> 2),
895 	0x00000000,
896 	(0x4e00 << 16) | (0x31068 >> 2),
897 	0x00000000,
898 	(0x5e00 << 16) | (0x31068 >> 2),
899 	0x00000000,
900 	(0x6e00 << 16) | (0x31068 >> 2),
901 	0x00000000,
902 	(0x7e00 << 16) | (0x31068 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0xcd10 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0xcd14 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x88b0 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x88b4 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x88b8 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0x88bc >> 2),
915 	0x00000000,
916 	(0x0400 << 16) | (0x89c0 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0x88c4 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0x88c8 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0x88d0 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0x88d4 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x88d8 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x8980 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x30938 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x3093c >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0x30940 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0x89a0 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x30900 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0x30904 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x89b4 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x3e1fc >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x3c210 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0x3c214 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0x3c218 >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0x8904 >> 2),
953 	0x00000000,
954 	0x5,
955 	(0x0e00 << 16) | (0x8c28 >> 2),
956 	(0x0e00 << 16) | (0x8c2c >> 2),
957 	(0x0e00 << 16) | (0x8c30 >> 2),
958 	(0x0e00 << 16) | (0x8c34 >> 2),
959 	(0x0e00 << 16) | (0x9600 >> 2),
960 };
961 
962 static const u32 bonaire_golden_spm_registers[] =
963 {
964 	0x30800, 0xe0ffffff, 0xe0000000
965 };
966 
967 static const u32 bonaire_golden_common_registers[] =
968 {
969 	0xc770, 0xffffffff, 0x00000800,
970 	0xc774, 0xffffffff, 0x00000800,
971 	0xc798, 0xffffffff, 0x00007fbf,
972 	0xc79c, 0xffffffff, 0x00007faf
973 };
974 
975 static const u32 bonaire_golden_registers[] =
976 {
977 	0x3354, 0x00000333, 0x00000333,
978 	0x3350, 0x000c0fc0, 0x00040200,
979 	0x9a10, 0x00010000, 0x00058208,
980 	0x3c000, 0xffff1fff, 0x00140000,
981 	0x3c200, 0xfdfc0fff, 0x00000100,
982 	0x3c234, 0x40000000, 0x40000200,
983 	0x9830, 0xffffffff, 0x00000000,
984 	0x9834, 0xf00fffff, 0x00000400,
985 	0x9838, 0x0002021c, 0x00020200,
986 	0xc78, 0x00000080, 0x00000000,
987 	0x5bb0, 0x000000f0, 0x00000070,
988 	0x5bc0, 0xf0311fff, 0x80300000,
989 	0x98f8, 0x73773777, 0x12010001,
990 	0x350c, 0x00810000, 0x408af000,
991 	0x7030, 0x31000111, 0x00000011,
992 	0x2f48, 0x73773777, 0x12010001,
993 	0x220c, 0x00007fb6, 0x0021a1b1,
994 	0x2210, 0x00007fb6, 0x002021b1,
995 	0x2180, 0x00007fb6, 0x00002191,
996 	0x2218, 0x00007fb6, 0x002121b1,
997 	0x221c, 0x00007fb6, 0x002021b1,
998 	0x21dc, 0x00007fb6, 0x00002191,
999 	0x21e0, 0x00007fb6, 0x00002191,
1000 	0x3628, 0x0000003f, 0x0000000a,
1001 	0x362c, 0x0000003f, 0x0000000a,
1002 	0x2ae4, 0x00073ffe, 0x000022a2,
1003 	0x240c, 0x000007ff, 0x00000000,
1004 	0x8a14, 0xf000003f, 0x00000007,
1005 	0x8bf0, 0x00002001, 0x00000001,
1006 	0x8b24, 0xffffffff, 0x00ffffff,
1007 	0x30a04, 0x0000ff0f, 0x00000000,
1008 	0x28a4c, 0x07ffffff, 0x06000000,
1009 	0x4d8, 0x00000fff, 0x00000100,
1010 	0x3e78, 0x00000001, 0x00000002,
1011 	0x9100, 0x03000000, 0x0362c688,
1012 	0x8c00, 0x000000ff, 0x00000001,
1013 	0xe40, 0x00001fff, 0x00001fff,
1014 	0x9060, 0x0000007f, 0x00000020,
1015 	0x9508, 0x00010000, 0x00010000,
1016 	0xac14, 0x000003ff, 0x000000f3,
1017 	0xac0c, 0xffffffff, 0x00001032
1018 };
1019 
1020 static const u32 bonaire_mgcg_cgcg_init[] =
1021 {
1022 	0xc420, 0xffffffff, 0xfffffffc,
1023 	0x30800, 0xffffffff, 0xe0000000,
1024 	0x3c2a0, 0xffffffff, 0x00000100,
1025 	0x3c208, 0xffffffff, 0x00000100,
1026 	0x3c2c0, 0xffffffff, 0xc0000100,
1027 	0x3c2c8, 0xffffffff, 0xc0000100,
1028 	0x3c2c4, 0xffffffff, 0xc0000100,
1029 	0x55e4, 0xffffffff, 0x00600100,
1030 	0x3c280, 0xffffffff, 0x00000100,
1031 	0x3c214, 0xffffffff, 0x06000100,
1032 	0x3c220, 0xffffffff, 0x00000100,
1033 	0x3c218, 0xffffffff, 0x06000100,
1034 	0x3c204, 0xffffffff, 0x00000100,
1035 	0x3c2e0, 0xffffffff, 0x00000100,
1036 	0x3c224, 0xffffffff, 0x00000100,
1037 	0x3c200, 0xffffffff, 0x00000100,
1038 	0x3c230, 0xffffffff, 0x00000100,
1039 	0x3c234, 0xffffffff, 0x00000100,
1040 	0x3c250, 0xffffffff, 0x00000100,
1041 	0x3c254, 0xffffffff, 0x00000100,
1042 	0x3c258, 0xffffffff, 0x00000100,
1043 	0x3c25c, 0xffffffff, 0x00000100,
1044 	0x3c260, 0xffffffff, 0x00000100,
1045 	0x3c27c, 0xffffffff, 0x00000100,
1046 	0x3c278, 0xffffffff, 0x00000100,
1047 	0x3c210, 0xffffffff, 0x06000100,
1048 	0x3c290, 0xffffffff, 0x00000100,
1049 	0x3c274, 0xffffffff, 0x00000100,
1050 	0x3c2b4, 0xffffffff, 0x00000100,
1051 	0x3c2b0, 0xffffffff, 0x00000100,
1052 	0x3c270, 0xffffffff, 0x00000100,
1053 	0x30800, 0xffffffff, 0xe0000000,
1054 	0x3c020, 0xffffffff, 0x00010000,
1055 	0x3c024, 0xffffffff, 0x00030002,
1056 	0x3c028, 0xffffffff, 0x00040007,
1057 	0x3c02c, 0xffffffff, 0x00060005,
1058 	0x3c030, 0xffffffff, 0x00090008,
1059 	0x3c034, 0xffffffff, 0x00010000,
1060 	0x3c038, 0xffffffff, 0x00030002,
1061 	0x3c03c, 0xffffffff, 0x00040007,
1062 	0x3c040, 0xffffffff, 0x00060005,
1063 	0x3c044, 0xffffffff, 0x00090008,
1064 	0x3c048, 0xffffffff, 0x00010000,
1065 	0x3c04c, 0xffffffff, 0x00030002,
1066 	0x3c050, 0xffffffff, 0x00040007,
1067 	0x3c054, 0xffffffff, 0x00060005,
1068 	0x3c058, 0xffffffff, 0x00090008,
1069 	0x3c05c, 0xffffffff, 0x00010000,
1070 	0x3c060, 0xffffffff, 0x00030002,
1071 	0x3c064, 0xffffffff, 0x00040007,
1072 	0x3c068, 0xffffffff, 0x00060005,
1073 	0x3c06c, 0xffffffff, 0x00090008,
1074 	0x3c070, 0xffffffff, 0x00010000,
1075 	0x3c074, 0xffffffff, 0x00030002,
1076 	0x3c078, 0xffffffff, 0x00040007,
1077 	0x3c07c, 0xffffffff, 0x00060005,
1078 	0x3c080, 0xffffffff, 0x00090008,
1079 	0x3c084, 0xffffffff, 0x00010000,
1080 	0x3c088, 0xffffffff, 0x00030002,
1081 	0x3c08c, 0xffffffff, 0x00040007,
1082 	0x3c090, 0xffffffff, 0x00060005,
1083 	0x3c094, 0xffffffff, 0x00090008,
1084 	0x3c098, 0xffffffff, 0x00010000,
1085 	0x3c09c, 0xffffffff, 0x00030002,
1086 	0x3c0a0, 0xffffffff, 0x00040007,
1087 	0x3c0a4, 0xffffffff, 0x00060005,
1088 	0x3c0a8, 0xffffffff, 0x00090008,
1089 	0x3c000, 0xffffffff, 0x96e00200,
1090 	0x8708, 0xffffffff, 0x00900100,
1091 	0xc424, 0xffffffff, 0x0020003f,
1092 	0x38, 0xffffffff, 0x0140001c,
1093 	0x3c, 0x000f0000, 0x000f0000,
1094 	0x220, 0xffffffff, 0xC060000C,
1095 	0x224, 0xc0000fff, 0x00000100,
1096 	0xf90, 0xffffffff, 0x00000100,
1097 	0xf98, 0x00000101, 0x00000000,
1098 	0x20a8, 0xffffffff, 0x00000104,
1099 	0x55e4, 0xff000fff, 0x00000100,
1100 	0x30cc, 0xc0000fff, 0x00000104,
1101 	0xc1e4, 0x00000001, 0x00000001,
1102 	0xd00c, 0xff000ff0, 0x00000100,
1103 	0xd80c, 0xff000ff0, 0x00000100
1104 };
1105 
1106 static const u32 spectre_golden_spm_registers[] =
1107 {
1108 	0x30800, 0xe0ffffff, 0xe0000000
1109 };
1110 
1111 static const u32 spectre_golden_common_registers[] =
1112 {
1113 	0xc770, 0xffffffff, 0x00000800,
1114 	0xc774, 0xffffffff, 0x00000800,
1115 	0xc798, 0xffffffff, 0x00007fbf,
1116 	0xc79c, 0xffffffff, 0x00007faf
1117 };
1118 
1119 static const u32 spectre_golden_registers[] =
1120 {
1121 	0x3c000, 0xffff1fff, 0x96940200,
1122 	0x3c00c, 0xffff0001, 0xff000000,
1123 	0x3c200, 0xfffc0fff, 0x00000100,
1124 	0x6ed8, 0x00010101, 0x00010000,
1125 	0x9834, 0xf00fffff, 0x00000400,
1126 	0x9838, 0xfffffffc, 0x00020200,
1127 	0x5bb0, 0x000000f0, 0x00000070,
1128 	0x5bc0, 0xf0311fff, 0x80300000,
1129 	0x98f8, 0x73773777, 0x12010001,
1130 	0x9b7c, 0x00ff0000, 0x00fc0000,
1131 	0x2f48, 0x73773777, 0x12010001,
1132 	0x8a14, 0xf000003f, 0x00000007,
1133 	0x8b24, 0xffffffff, 0x00ffffff,
1134 	0x28350, 0x3f3f3fff, 0x00000082,
1135 	0x28354, 0x0000003f, 0x00000000,
1136 	0x3e78, 0x00000001, 0x00000002,
1137 	0x913c, 0xffff03df, 0x00000004,
1138 	0xc768, 0x00000008, 0x00000008,
1139 	0x8c00, 0x000008ff, 0x00000800,
1140 	0x9508, 0x00010000, 0x00010000,
1141 	0xac0c, 0xffffffff, 0x54763210,
1142 	0x214f8, 0x01ff01ff, 0x00000002,
1143 	0x21498, 0x007ff800, 0x00200000,
1144 	0x2015c, 0xffffffff, 0x00000f40,
1145 	0x30934, 0xffffffff, 0x00000001
1146 };
1147 
1148 static const u32 spectre_mgcg_cgcg_init[] =
1149 {
1150 	0xc420, 0xffffffff, 0xfffffffc,
1151 	0x30800, 0xffffffff, 0xe0000000,
1152 	0x3c2a0, 0xffffffff, 0x00000100,
1153 	0x3c208, 0xffffffff, 0x00000100,
1154 	0x3c2c0, 0xffffffff, 0x00000100,
1155 	0x3c2c8, 0xffffffff, 0x00000100,
1156 	0x3c2c4, 0xffffffff, 0x00000100,
1157 	0x55e4, 0xffffffff, 0x00600100,
1158 	0x3c280, 0xffffffff, 0x00000100,
1159 	0x3c214, 0xffffffff, 0x06000100,
1160 	0x3c220, 0xffffffff, 0x00000100,
1161 	0x3c218, 0xffffffff, 0x06000100,
1162 	0x3c204, 0xffffffff, 0x00000100,
1163 	0x3c2e0, 0xffffffff, 0x00000100,
1164 	0x3c224, 0xffffffff, 0x00000100,
1165 	0x3c200, 0xffffffff, 0x00000100,
1166 	0x3c230, 0xffffffff, 0x00000100,
1167 	0x3c234, 0xffffffff, 0x00000100,
1168 	0x3c250, 0xffffffff, 0x00000100,
1169 	0x3c254, 0xffffffff, 0x00000100,
1170 	0x3c258, 0xffffffff, 0x00000100,
1171 	0x3c25c, 0xffffffff, 0x00000100,
1172 	0x3c260, 0xffffffff, 0x00000100,
1173 	0x3c27c, 0xffffffff, 0x00000100,
1174 	0x3c278, 0xffffffff, 0x00000100,
1175 	0x3c210, 0xffffffff, 0x06000100,
1176 	0x3c290, 0xffffffff, 0x00000100,
1177 	0x3c274, 0xffffffff, 0x00000100,
1178 	0x3c2b4, 0xffffffff, 0x00000100,
1179 	0x3c2b0, 0xffffffff, 0x00000100,
1180 	0x3c270, 0xffffffff, 0x00000100,
1181 	0x30800, 0xffffffff, 0xe0000000,
1182 	0x3c020, 0xffffffff, 0x00010000,
1183 	0x3c024, 0xffffffff, 0x00030002,
1184 	0x3c028, 0xffffffff, 0x00040007,
1185 	0x3c02c, 0xffffffff, 0x00060005,
1186 	0x3c030, 0xffffffff, 0x00090008,
1187 	0x3c034, 0xffffffff, 0x00010000,
1188 	0x3c038, 0xffffffff, 0x00030002,
1189 	0x3c03c, 0xffffffff, 0x00040007,
1190 	0x3c040, 0xffffffff, 0x00060005,
1191 	0x3c044, 0xffffffff, 0x00090008,
1192 	0x3c048, 0xffffffff, 0x00010000,
1193 	0x3c04c, 0xffffffff, 0x00030002,
1194 	0x3c050, 0xffffffff, 0x00040007,
1195 	0x3c054, 0xffffffff, 0x00060005,
1196 	0x3c058, 0xffffffff, 0x00090008,
1197 	0x3c05c, 0xffffffff, 0x00010000,
1198 	0x3c060, 0xffffffff, 0x00030002,
1199 	0x3c064, 0xffffffff, 0x00040007,
1200 	0x3c068, 0xffffffff, 0x00060005,
1201 	0x3c06c, 0xffffffff, 0x00090008,
1202 	0x3c070, 0xffffffff, 0x00010000,
1203 	0x3c074, 0xffffffff, 0x00030002,
1204 	0x3c078, 0xffffffff, 0x00040007,
1205 	0x3c07c, 0xffffffff, 0x00060005,
1206 	0x3c080, 0xffffffff, 0x00090008,
1207 	0x3c084, 0xffffffff, 0x00010000,
1208 	0x3c088, 0xffffffff, 0x00030002,
1209 	0x3c08c, 0xffffffff, 0x00040007,
1210 	0x3c090, 0xffffffff, 0x00060005,
1211 	0x3c094, 0xffffffff, 0x00090008,
1212 	0x3c098, 0xffffffff, 0x00010000,
1213 	0x3c09c, 0xffffffff, 0x00030002,
1214 	0x3c0a0, 0xffffffff, 0x00040007,
1215 	0x3c0a4, 0xffffffff, 0x00060005,
1216 	0x3c0a8, 0xffffffff, 0x00090008,
1217 	0x3c0ac, 0xffffffff, 0x00010000,
1218 	0x3c0b0, 0xffffffff, 0x00030002,
1219 	0x3c0b4, 0xffffffff, 0x00040007,
1220 	0x3c0b8, 0xffffffff, 0x00060005,
1221 	0x3c0bc, 0xffffffff, 0x00090008,
1222 	0x3c000, 0xffffffff, 0x96e00200,
1223 	0x8708, 0xffffffff, 0x00900100,
1224 	0xc424, 0xffffffff, 0x0020003f,
1225 	0x38, 0xffffffff, 0x0140001c,
1226 	0x3c, 0x000f0000, 0x000f0000,
1227 	0x220, 0xffffffff, 0xC060000C,
1228 	0x224, 0xc0000fff, 0x00000100,
1229 	0xf90, 0xffffffff, 0x00000100,
1230 	0xf98, 0x00000101, 0x00000000,
1231 	0x20a8, 0xffffffff, 0x00000104,
1232 	0x55e4, 0xff000fff, 0x00000100,
1233 	0x30cc, 0xc0000fff, 0x00000104,
1234 	0xc1e4, 0x00000001, 0x00000001,
1235 	0xd00c, 0xff000ff0, 0x00000100,
1236 	0xd80c, 0xff000ff0, 0x00000100
1237 };
1238 
1239 static const u32 kalindi_golden_spm_registers[] =
1240 {
1241 	0x30800, 0xe0ffffff, 0xe0000000
1242 };
1243 
1244 static const u32 kalindi_golden_common_registers[] =
1245 {
1246 	0xc770, 0xffffffff, 0x00000800,
1247 	0xc774, 0xffffffff, 0x00000800,
1248 	0xc798, 0xffffffff, 0x00007fbf,
1249 	0xc79c, 0xffffffff, 0x00007faf
1250 };
1251 
1252 static const u32 kalindi_golden_registers[] =
1253 {
1254 	0x3c000, 0xffffdfff, 0x6e944040,
1255 	0x55e4, 0xff607fff, 0xfc000100,
1256 	0x3c220, 0xff000fff, 0x00000100,
1257 	0x3c224, 0xff000fff, 0x00000100,
1258 	0x3c200, 0xfffc0fff, 0x00000100,
1259 	0x6ed8, 0x00010101, 0x00010000,
1260 	0x9830, 0xffffffff, 0x00000000,
1261 	0x9834, 0xf00fffff, 0x00000400,
1262 	0x5bb0, 0x000000f0, 0x00000070,
1263 	0x5bc0, 0xf0311fff, 0x80300000,
1264 	0x98f8, 0x73773777, 0x12010001,
1265 	0x98fc, 0xffffffff, 0x00000010,
1266 	0x9b7c, 0x00ff0000, 0x00fc0000,
1267 	0x8030, 0x00001f0f, 0x0000100a,
1268 	0x2f48, 0x73773777, 0x12010001,
1269 	0x2408, 0x000fffff, 0x000c007f,
1270 	0x8a14, 0xf000003f, 0x00000007,
1271 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1272 	0x30a04, 0x0000ff0f, 0x00000000,
1273 	0x28a4c, 0x07ffffff, 0x06000000,
1274 	0x4d8, 0x00000fff, 0x00000100,
1275 	0x3e78, 0x00000001, 0x00000002,
1276 	0xc768, 0x00000008, 0x00000008,
1277 	0x8c00, 0x000000ff, 0x00000003,
1278 	0x214f8, 0x01ff01ff, 0x00000002,
1279 	0x21498, 0x007ff800, 0x00200000,
1280 	0x2015c, 0xffffffff, 0x00000f40,
1281 	0x88c4, 0x001f3ae3, 0x00000082,
1282 	0x88d4, 0x0000001f, 0x00000010,
1283 	0x30934, 0xffffffff, 0x00000000
1284 };
1285 
1286 static const u32 kalindi_mgcg_cgcg_init[] =
1287 {
1288 	0xc420, 0xffffffff, 0xfffffffc,
1289 	0x30800, 0xffffffff, 0xe0000000,
1290 	0x3c2a0, 0xffffffff, 0x00000100,
1291 	0x3c208, 0xffffffff, 0x00000100,
1292 	0x3c2c0, 0xffffffff, 0x00000100,
1293 	0x3c2c8, 0xffffffff, 0x00000100,
1294 	0x3c2c4, 0xffffffff, 0x00000100,
1295 	0x55e4, 0xffffffff, 0x00600100,
1296 	0x3c280, 0xffffffff, 0x00000100,
1297 	0x3c214, 0xffffffff, 0x06000100,
1298 	0x3c220, 0xffffffff, 0x00000100,
1299 	0x3c218, 0xffffffff, 0x06000100,
1300 	0x3c204, 0xffffffff, 0x00000100,
1301 	0x3c2e0, 0xffffffff, 0x00000100,
1302 	0x3c224, 0xffffffff, 0x00000100,
1303 	0x3c200, 0xffffffff, 0x00000100,
1304 	0x3c230, 0xffffffff, 0x00000100,
1305 	0x3c234, 0xffffffff, 0x00000100,
1306 	0x3c250, 0xffffffff, 0x00000100,
1307 	0x3c254, 0xffffffff, 0x00000100,
1308 	0x3c258, 0xffffffff, 0x00000100,
1309 	0x3c25c, 0xffffffff, 0x00000100,
1310 	0x3c260, 0xffffffff, 0x00000100,
1311 	0x3c27c, 0xffffffff, 0x00000100,
1312 	0x3c278, 0xffffffff, 0x00000100,
1313 	0x3c210, 0xffffffff, 0x06000100,
1314 	0x3c290, 0xffffffff, 0x00000100,
1315 	0x3c274, 0xffffffff, 0x00000100,
1316 	0x3c2b4, 0xffffffff, 0x00000100,
1317 	0x3c2b0, 0xffffffff, 0x00000100,
1318 	0x3c270, 0xffffffff, 0x00000100,
1319 	0x30800, 0xffffffff, 0xe0000000,
1320 	0x3c020, 0xffffffff, 0x00010000,
1321 	0x3c024, 0xffffffff, 0x00030002,
1322 	0x3c028, 0xffffffff, 0x00040007,
1323 	0x3c02c, 0xffffffff, 0x00060005,
1324 	0x3c030, 0xffffffff, 0x00090008,
1325 	0x3c034, 0xffffffff, 0x00010000,
1326 	0x3c038, 0xffffffff, 0x00030002,
1327 	0x3c03c, 0xffffffff, 0x00040007,
1328 	0x3c040, 0xffffffff, 0x00060005,
1329 	0x3c044, 0xffffffff, 0x00090008,
1330 	0x3c000, 0xffffffff, 0x96e00200,
1331 	0x8708, 0xffffffff, 0x00900100,
1332 	0xc424, 0xffffffff, 0x0020003f,
1333 	0x38, 0xffffffff, 0x0140001c,
1334 	0x3c, 0x000f0000, 0x000f0000,
1335 	0x220, 0xffffffff, 0xC060000C,
1336 	0x224, 0xc0000fff, 0x00000100,
1337 	0x20a8, 0xffffffff, 0x00000104,
1338 	0x55e4, 0xff000fff, 0x00000100,
1339 	0x30cc, 0xc0000fff, 0x00000104,
1340 	0xc1e4, 0x00000001, 0x00000001,
1341 	0xd00c, 0xff000ff0, 0x00000100,
1342 	0xd80c, 0xff000ff0, 0x00000100
1343 };
1344 
1345 static const u32 hawaii_golden_spm_registers[] =
1346 {
1347 	0x30800, 0xe0ffffff, 0xe0000000
1348 };
1349 
1350 static const u32 hawaii_golden_common_registers[] =
1351 {
1352 	0x30800, 0xffffffff, 0xe0000000,
1353 	0x28350, 0xffffffff, 0x3a00161a,
1354 	0x28354, 0xffffffff, 0x0000002e,
1355 	0x9a10, 0xffffffff, 0x00018208,
1356 	0x98f8, 0xffffffff, 0x12011003
1357 };
1358 
1359 static const u32 hawaii_golden_registers[] =
1360 {
1361 	0x3354, 0x00000333, 0x00000333,
1362 	0x9a10, 0x00010000, 0x00058208,
1363 	0x9830, 0xffffffff, 0x00000000,
1364 	0x9834, 0xf00fffff, 0x00000400,
1365 	0x9838, 0x0002021c, 0x00020200,
1366 	0xc78, 0x00000080, 0x00000000,
1367 	0x5bb0, 0x000000f0, 0x00000070,
1368 	0x5bc0, 0xf0311fff, 0x80300000,
1369 	0x350c, 0x00810000, 0x408af000,
1370 	0x7030, 0x31000111, 0x00000011,
1371 	0x2f48, 0x73773777, 0x12010001,
1372 	0x2120, 0x0000007f, 0x0000001b,
1373 	0x21dc, 0x00007fb6, 0x00002191,
1374 	0x3628, 0x0000003f, 0x0000000a,
1375 	0x362c, 0x0000003f, 0x0000000a,
1376 	0x2ae4, 0x00073ffe, 0x000022a2,
1377 	0x240c, 0x000007ff, 0x00000000,
1378 	0x8bf0, 0x00002001, 0x00000001,
1379 	0x8b24, 0xffffffff, 0x00ffffff,
1380 	0x30a04, 0x0000ff0f, 0x00000000,
1381 	0x28a4c, 0x07ffffff, 0x06000000,
1382 	0x3e78, 0x00000001, 0x00000002,
1383 	0xc768, 0x00000008, 0x00000008,
1384 	0xc770, 0x00000f00, 0x00000800,
1385 	0xc774, 0x00000f00, 0x00000800,
1386 	0xc798, 0x00ffffff, 0x00ff7fbf,
1387 	0xc79c, 0x00ffffff, 0x00ff7faf,
1388 	0x8c00, 0x000000ff, 0x00000800,
1389 	0xe40, 0x00001fff, 0x00001fff,
1390 	0x9060, 0x0000007f, 0x00000020,
1391 	0x9508, 0x00010000, 0x00010000,
1392 	0xae00, 0x00100000, 0x000ff07c,
1393 	0xac14, 0x000003ff, 0x0000000f,
1394 	0xac10, 0xffffffff, 0x7564fdec,
1395 	0xac0c, 0xffffffff, 0x3120b9a8,
1396 	0xac08, 0x20000000, 0x0f9c0000
1397 };
1398 
1399 static const u32 hawaii_mgcg_cgcg_init[] =
1400 {
1401 	0xc420, 0xffffffff, 0xfffffffd,
1402 	0x30800, 0xffffffff, 0xe0000000,
1403 	0x3c2a0, 0xffffffff, 0x00000100,
1404 	0x3c208, 0xffffffff, 0x00000100,
1405 	0x3c2c0, 0xffffffff, 0x00000100,
1406 	0x3c2c8, 0xffffffff, 0x00000100,
1407 	0x3c2c4, 0xffffffff, 0x00000100,
1408 	0x55e4, 0xffffffff, 0x00200100,
1409 	0x3c280, 0xffffffff, 0x00000100,
1410 	0x3c214, 0xffffffff, 0x06000100,
1411 	0x3c220, 0xffffffff, 0x00000100,
1412 	0x3c218, 0xffffffff, 0x06000100,
1413 	0x3c204, 0xffffffff, 0x00000100,
1414 	0x3c2e0, 0xffffffff, 0x00000100,
1415 	0x3c224, 0xffffffff, 0x00000100,
1416 	0x3c200, 0xffffffff, 0x00000100,
1417 	0x3c230, 0xffffffff, 0x00000100,
1418 	0x3c234, 0xffffffff, 0x00000100,
1419 	0x3c250, 0xffffffff, 0x00000100,
1420 	0x3c254, 0xffffffff, 0x00000100,
1421 	0x3c258, 0xffffffff, 0x00000100,
1422 	0x3c25c, 0xffffffff, 0x00000100,
1423 	0x3c260, 0xffffffff, 0x00000100,
1424 	0x3c27c, 0xffffffff, 0x00000100,
1425 	0x3c278, 0xffffffff, 0x00000100,
1426 	0x3c210, 0xffffffff, 0x06000100,
1427 	0x3c290, 0xffffffff, 0x00000100,
1428 	0x3c274, 0xffffffff, 0x00000100,
1429 	0x3c2b4, 0xffffffff, 0x00000100,
1430 	0x3c2b0, 0xffffffff, 0x00000100,
1431 	0x3c270, 0xffffffff, 0x00000100,
1432 	0x30800, 0xffffffff, 0xe0000000,
1433 	0x3c020, 0xffffffff, 0x00010000,
1434 	0x3c024, 0xffffffff, 0x00030002,
1435 	0x3c028, 0xffffffff, 0x00040007,
1436 	0x3c02c, 0xffffffff, 0x00060005,
1437 	0x3c030, 0xffffffff, 0x00090008,
1438 	0x3c034, 0xffffffff, 0x00010000,
1439 	0x3c038, 0xffffffff, 0x00030002,
1440 	0x3c03c, 0xffffffff, 0x00040007,
1441 	0x3c040, 0xffffffff, 0x00060005,
1442 	0x3c044, 0xffffffff, 0x00090008,
1443 	0x3c048, 0xffffffff, 0x00010000,
1444 	0x3c04c, 0xffffffff, 0x00030002,
1445 	0x3c050, 0xffffffff, 0x00040007,
1446 	0x3c054, 0xffffffff, 0x00060005,
1447 	0x3c058, 0xffffffff, 0x00090008,
1448 	0x3c05c, 0xffffffff, 0x00010000,
1449 	0x3c060, 0xffffffff, 0x00030002,
1450 	0x3c064, 0xffffffff, 0x00040007,
1451 	0x3c068, 0xffffffff, 0x00060005,
1452 	0x3c06c, 0xffffffff, 0x00090008,
1453 	0x3c070, 0xffffffff, 0x00010000,
1454 	0x3c074, 0xffffffff, 0x00030002,
1455 	0x3c078, 0xffffffff, 0x00040007,
1456 	0x3c07c, 0xffffffff, 0x00060005,
1457 	0x3c080, 0xffffffff, 0x00090008,
1458 	0x3c084, 0xffffffff, 0x00010000,
1459 	0x3c088, 0xffffffff, 0x00030002,
1460 	0x3c08c, 0xffffffff, 0x00040007,
1461 	0x3c090, 0xffffffff, 0x00060005,
1462 	0x3c094, 0xffffffff, 0x00090008,
1463 	0x3c098, 0xffffffff, 0x00010000,
1464 	0x3c09c, 0xffffffff, 0x00030002,
1465 	0x3c0a0, 0xffffffff, 0x00040007,
1466 	0x3c0a4, 0xffffffff, 0x00060005,
1467 	0x3c0a8, 0xffffffff, 0x00090008,
1468 	0x3c0ac, 0xffffffff, 0x00010000,
1469 	0x3c0b0, 0xffffffff, 0x00030002,
1470 	0x3c0b4, 0xffffffff, 0x00040007,
1471 	0x3c0b8, 0xffffffff, 0x00060005,
1472 	0x3c0bc, 0xffffffff, 0x00090008,
1473 	0x3c0c0, 0xffffffff, 0x00010000,
1474 	0x3c0c4, 0xffffffff, 0x00030002,
1475 	0x3c0c8, 0xffffffff, 0x00040007,
1476 	0x3c0cc, 0xffffffff, 0x00060005,
1477 	0x3c0d0, 0xffffffff, 0x00090008,
1478 	0x3c0d4, 0xffffffff, 0x00010000,
1479 	0x3c0d8, 0xffffffff, 0x00030002,
1480 	0x3c0dc, 0xffffffff, 0x00040007,
1481 	0x3c0e0, 0xffffffff, 0x00060005,
1482 	0x3c0e4, 0xffffffff, 0x00090008,
1483 	0x3c0e8, 0xffffffff, 0x00010000,
1484 	0x3c0ec, 0xffffffff, 0x00030002,
1485 	0x3c0f0, 0xffffffff, 0x00040007,
1486 	0x3c0f4, 0xffffffff, 0x00060005,
1487 	0x3c0f8, 0xffffffff, 0x00090008,
1488 	0xc318, 0xffffffff, 0x00020200,
1489 	0x3350, 0xffffffff, 0x00000200,
1490 	0x15c0, 0xffffffff, 0x00000400,
1491 	0x55e8, 0xffffffff, 0x00000000,
1492 	0x2f50, 0xffffffff, 0x00000902,
1493 	0x3c000, 0xffffffff, 0x96940200,
1494 	0x8708, 0xffffffff, 0x00900100,
1495 	0xc424, 0xffffffff, 0x0020003f,
1496 	0x38, 0xffffffff, 0x0140001c,
1497 	0x3c, 0x000f0000, 0x000f0000,
1498 	0x220, 0xffffffff, 0xc060000c,
1499 	0x224, 0xc0000fff, 0x00000100,
1500 	0xf90, 0xffffffff, 0x00000100,
1501 	0xf98, 0x00000101, 0x00000000,
1502 	0x20a8, 0xffffffff, 0x00000104,
1503 	0x55e4, 0xff000fff, 0x00000100,
1504 	0x30cc, 0xc0000fff, 0x00000104,
1505 	0xc1e4, 0x00000001, 0x00000001,
1506 	0xd00c, 0xff000ff0, 0x00000100,
1507 	0xd80c, 0xff000ff0, 0x00000100
1508 };
1509 
1510 static const u32 godavari_golden_registers[] =
1511 {
1512 	0x55e4, 0xff607fff, 0xfc000100,
1513 	0x6ed8, 0x00010101, 0x00010000,
1514 	0x9830, 0xffffffff, 0x00000000,
1515 	0x98302, 0xf00fffff, 0x00000400,
1516 	0x6130, 0xffffffff, 0x00010000,
1517 	0x5bb0, 0x000000f0, 0x00000070,
1518 	0x5bc0, 0xf0311fff, 0x80300000,
1519 	0x98f8, 0x73773777, 0x12010001,
1520 	0x98fc, 0xffffffff, 0x00000010,
1521 	0x8030, 0x00001f0f, 0x0000100a,
1522 	0x2f48, 0x73773777, 0x12010001,
1523 	0x2408, 0x000fffff, 0x000c007f,
1524 	0x8a14, 0xf000003f, 0x00000007,
1525 	0x8b24, 0xffffffff, 0x00ff0fff,
1526 	0x30a04, 0x0000ff0f, 0x00000000,
1527 	0x28a4c, 0x07ffffff, 0x06000000,
1528 	0x4d8, 0x00000fff, 0x00000100,
1529 	0xd014, 0x00010000, 0x00810001,
1530 	0xd814, 0x00010000, 0x00810001,
1531 	0x3e78, 0x00000001, 0x00000002,
1532 	0xc768, 0x00000008, 0x00000008,
1533 	0xc770, 0x00000f00, 0x00000800,
1534 	0xc774, 0x00000f00, 0x00000800,
1535 	0xc798, 0x00ffffff, 0x00ff7fbf,
1536 	0xc79c, 0x00ffffff, 0x00ff7faf,
1537 	0x8c00, 0x000000ff, 0x00000001,
1538 	0x214f8, 0x01ff01ff, 0x00000002,
1539 	0x21498, 0x007ff800, 0x00200000,
1540 	0x2015c, 0xffffffff, 0x00000f40,
1541 	0x88c4, 0x001f3ae3, 0x00000082,
1542 	0x88d4, 0x0000001f, 0x00000010,
1543 	0x30934, 0xffffffff, 0x00000000
1544 };
1545 
1546 
1547 static void cik_init_golden_registers(struct radeon_device *rdev)
1548 {
1549 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1550 	mutex_lock(&rdev->grbm_idx_mutex);
1551 	switch (rdev->family) {
1552 	case CHIP_BONAIRE:
1553 		radeon_program_register_sequence(rdev,
1554 						 bonaire_mgcg_cgcg_init,
1555 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1556 		radeon_program_register_sequence(rdev,
1557 						 bonaire_golden_registers,
1558 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1559 		radeon_program_register_sequence(rdev,
1560 						 bonaire_golden_common_registers,
1561 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1562 		radeon_program_register_sequence(rdev,
1563 						 bonaire_golden_spm_registers,
1564 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1565 		break;
1566 	case CHIP_KABINI:
1567 		radeon_program_register_sequence(rdev,
1568 						 kalindi_mgcg_cgcg_init,
1569 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1570 		radeon_program_register_sequence(rdev,
1571 						 kalindi_golden_registers,
1572 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1573 		radeon_program_register_sequence(rdev,
1574 						 kalindi_golden_common_registers,
1575 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1576 		radeon_program_register_sequence(rdev,
1577 						 kalindi_golden_spm_registers,
1578 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1579 		break;
1580 	case CHIP_MULLINS:
1581 		radeon_program_register_sequence(rdev,
1582 						 kalindi_mgcg_cgcg_init,
1583 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1584 		radeon_program_register_sequence(rdev,
1585 						 godavari_golden_registers,
1586 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1587 		radeon_program_register_sequence(rdev,
1588 						 kalindi_golden_common_registers,
1589 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1590 		radeon_program_register_sequence(rdev,
1591 						 kalindi_golden_spm_registers,
1592 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1593 		break;
1594 	case CHIP_KAVERI:
1595 		radeon_program_register_sequence(rdev,
1596 						 spectre_mgcg_cgcg_init,
1597 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1598 		radeon_program_register_sequence(rdev,
1599 						 spectre_golden_registers,
1600 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1601 		radeon_program_register_sequence(rdev,
1602 						 spectre_golden_common_registers,
1603 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1604 		radeon_program_register_sequence(rdev,
1605 						 spectre_golden_spm_registers,
1606 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1607 		break;
1608 	case CHIP_HAWAII:
1609 		radeon_program_register_sequence(rdev,
1610 						 hawaii_mgcg_cgcg_init,
1611 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1612 		radeon_program_register_sequence(rdev,
1613 						 hawaii_golden_registers,
1614 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1615 		radeon_program_register_sequence(rdev,
1616 						 hawaii_golden_common_registers,
1617 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1618 		radeon_program_register_sequence(rdev,
1619 						 hawaii_golden_spm_registers,
1620 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1621 		break;
1622 	default:
1623 		break;
1624 	}
1625 	mutex_unlock(&rdev->grbm_idx_mutex);
1626 }
1627 
1628 /**
1629  * cik_get_xclk - get the xclk
1630  *
1631  * @rdev: radeon_device pointer
1632  *
1633  * Returns the reference clock used by the gfx engine
1634  * (CIK).
1635  */
1636 u32 cik_get_xclk(struct radeon_device *rdev)
1637 {
1638         u32 reference_clock = rdev->clock.spll.reference_freq;
1639 
1640 	if (rdev->flags & RADEON_IS_IGP) {
1641 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1642 			return reference_clock / 2;
1643 	} else {
1644 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1645 			return reference_clock / 4;
1646 	}
1647 	return reference_clock;
1648 }
1649 
1650 /**
1651  * cik_mm_rdoorbell - read a doorbell dword
1652  *
1653  * @rdev: radeon_device pointer
1654  * @index: doorbell index
1655  *
1656  * Returns the value in the doorbell aperture at the
1657  * requested doorbell index (CIK).
1658  */
1659 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1660 {
1661 	if (index < rdev->doorbell.num_doorbells) {
1662 		return readl(rdev->doorbell.ptr + index);
1663 	} else {
1664 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1665 		return 0;
1666 	}
1667 }
1668 
1669 /**
1670  * cik_mm_wdoorbell - write a doorbell dword
1671  *
1672  * @rdev: radeon_device pointer
1673  * @index: doorbell index
1674  * @v: value to write
1675  *
1676  * Writes @v to the doorbell aperture at the
1677  * requested doorbell index (CIK).
1678  */
1679 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1680 {
1681 	if (index < rdev->doorbell.num_doorbells) {
1682 		writel(v, rdev->doorbell.ptr + index);
1683 	} else {
1684 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1685 	}
1686 }
1687 
1688 #define BONAIRE_IO_MC_REGS_SIZE 36
1689 
1690 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1691 {
1692 	{0x00000070, 0x04400000},
1693 	{0x00000071, 0x80c01803},
1694 	{0x00000072, 0x00004004},
1695 	{0x00000073, 0x00000100},
1696 	{0x00000074, 0x00ff0000},
1697 	{0x00000075, 0x34000000},
1698 	{0x00000076, 0x08000014},
1699 	{0x00000077, 0x00cc08ec},
1700 	{0x00000078, 0x00000400},
1701 	{0x00000079, 0x00000000},
1702 	{0x0000007a, 0x04090000},
1703 	{0x0000007c, 0x00000000},
1704 	{0x0000007e, 0x4408a8e8},
1705 	{0x0000007f, 0x00000304},
1706 	{0x00000080, 0x00000000},
1707 	{0x00000082, 0x00000001},
1708 	{0x00000083, 0x00000002},
1709 	{0x00000084, 0xf3e4f400},
1710 	{0x00000085, 0x052024e3},
1711 	{0x00000087, 0x00000000},
1712 	{0x00000088, 0x01000000},
1713 	{0x0000008a, 0x1c0a0000},
1714 	{0x0000008b, 0xff010000},
1715 	{0x0000008d, 0xffffefff},
1716 	{0x0000008e, 0xfff3efff},
1717 	{0x0000008f, 0xfff3efbf},
1718 	{0x00000092, 0xf7ffffff},
1719 	{0x00000093, 0xffffff7f},
1720 	{0x00000095, 0x00101101},
1721 	{0x00000096, 0x00000fff},
1722 	{0x00000097, 0x00116fff},
1723 	{0x00000098, 0x60010000},
1724 	{0x00000099, 0x10010000},
1725 	{0x0000009a, 0x00006000},
1726 	{0x0000009b, 0x00001000},
1727 	{0x0000009f, 0x00b48000}
1728 };
1729 
1730 #define HAWAII_IO_MC_REGS_SIZE 22
1731 
1732 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1733 {
1734 	{0x0000007d, 0x40000000},
1735 	{0x0000007e, 0x40180304},
1736 	{0x0000007f, 0x0000ff00},
1737 	{0x00000081, 0x00000000},
1738 	{0x00000083, 0x00000800},
1739 	{0x00000086, 0x00000000},
1740 	{0x00000087, 0x00000100},
1741 	{0x00000088, 0x00020100},
1742 	{0x00000089, 0x00000000},
1743 	{0x0000008b, 0x00040000},
1744 	{0x0000008c, 0x00000100},
1745 	{0x0000008e, 0xff010000},
1746 	{0x00000090, 0xffffefff},
1747 	{0x00000091, 0xfff3efff},
1748 	{0x00000092, 0xfff3efbf},
1749 	{0x00000093, 0xf7ffffff},
1750 	{0x00000094, 0xffffff7f},
1751 	{0x00000095, 0x00000fff},
1752 	{0x00000096, 0x00116fff},
1753 	{0x00000097, 0x60010000},
1754 	{0x00000098, 0x10010000},
1755 	{0x0000009f, 0x00c79000}
1756 };
1757 
1758 
1759 /**
1760  * cik_srbm_select - select specific register instances
1761  *
1762  * @rdev: radeon_device pointer
1763  * @me: selected ME (micro engine)
1764  * @pipe: pipe
1765  * @queue: queue
1766  * @vmid: VMID
1767  *
1768  * Switches the currently active registers instances.  Some
1769  * registers are instanced per VMID, others are instanced per
1770  * me/pipe/queue combination.
1771  */
1772 static void cik_srbm_select(struct radeon_device *rdev,
1773 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1774 {
1775 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1776 			     MEID(me & 0x3) |
1777 			     VMID(vmid & 0xf) |
1778 			     QUEUEID(queue & 0x7));
1779 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1780 }
1781 
1782 /* ucode loading */
1783 /**
1784  * ci_mc_load_microcode - load MC ucode into the hw
1785  *
1786  * @rdev: radeon_device pointer
1787  *
1788  * Load the GDDR MC ucode into the hw (CIK).
1789  * Returns 0 on success, error on failure.
1790  */
1791 int ci_mc_load_microcode(struct radeon_device *rdev)
1792 {
1793 	const __be32 *fw_data = NULL;
1794 	const __le32 *new_fw_data = NULL;
1795 	u32 running, blackout = 0, tmp;
1796 	u32 *io_mc_regs = NULL;
1797 	const __le32 *new_io_mc_regs = NULL;
1798 	int i, regs_size, ucode_size;
1799 
1800 	if (!rdev->mc_fw)
1801 		return -EINVAL;
1802 
1803 	if (rdev->new_fw) {
1804 		const struct mc_firmware_header_v1_0 *hdr =
1805 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1806 
1807 		radeon_ucode_print_mc_hdr(&hdr->header);
1808 
1809 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1810 		new_io_mc_regs = (const __le32 *)
1811 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1812 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1813 		new_fw_data = (const __le32 *)
1814 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1815 	} else {
1816 		ucode_size = rdev->mc_fw->datasize / 4;
1817 
1818 		switch (rdev->family) {
1819 		case CHIP_BONAIRE:
1820 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1821 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1822 			break;
1823 		case CHIP_HAWAII:
1824 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1825 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1826 			break;
1827 		default:
1828 			return -EINVAL;
1829 		}
1830 		fw_data = (const __be32 *)rdev->mc_fw->data;
1831 	}
1832 
1833 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1834 
1835 	if (running == 0) {
1836 		if (running) {
1837 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1838 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1839 		}
1840 
1841 		/* reset the engine and set to writable */
1842 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1843 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1844 
1845 		/* load mc io regs */
1846 		for (i = 0; i < regs_size; i++) {
1847 			if (rdev->new_fw) {
1848 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1849 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1850 			} else {
1851 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1852 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1853 			}
1854 		}
1855 
1856 		tmp = RREG32(MC_SEQ_MISC0);
1857 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1858 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1859 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1860 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1861 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1862 		}
1863 
1864 		/* load the MC ucode */
1865 		for (i = 0; i < ucode_size; i++) {
1866 			if (rdev->new_fw)
1867 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1868 			else
1869 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1870 		}
1871 
1872 		/* put the engine back into the active state */
1873 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1874 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1875 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1876 
1877 		/* wait for training to complete */
1878 		for (i = 0; i < rdev->usec_timeout; i++) {
1879 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1880 				break;
1881 			udelay(1);
1882 		}
1883 		for (i = 0; i < rdev->usec_timeout; i++) {
1884 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1885 				break;
1886 			udelay(1);
1887 		}
1888 
1889 		if (running)
1890 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1891 	}
1892 
1893 	return 0;
1894 }
1895 
1896 /**
1897  * cik_init_microcode - load ucode images from disk
1898  *
1899  * @rdev: radeon_device pointer
1900  *
1901  * Use the firmware interface to load the ucode images into
1902  * the driver (not loaded into hw).
1903  * Returns 0 on success, error on failure.
1904  */
1905 static int cik_init_microcode(struct radeon_device *rdev)
1906 {
1907 	const char *chip_name;
1908 	const char *new_chip_name;
1909 	size_t pfp_req_size, me_req_size, ce_req_size,
1910 		mec_req_size, rlc_req_size, mc_req_size = 0,
1911 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1912 	char fw_name[30];
1913 	int new_fw = 0;
1914 	int err;
1915 	int num_fw;
1916 
1917 	DRM_DEBUG("\n");
1918 
1919 	switch (rdev->family) {
1920 	case CHIP_BONAIRE:
1921 		chip_name = "BONAIRE";
1922 		new_chip_name = "bonaire";
1923 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1924 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1925 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1926 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1927 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1928 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1929 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1930 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1931 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1932 		num_fw = 8;
1933 		break;
1934 	case CHIP_HAWAII:
1935 		chip_name = "HAWAII";
1936 		new_chip_name = "hawaii";
1937 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1938 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1939 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1940 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1941 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1942 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1943 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1944 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1945 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1946 		num_fw = 8;
1947 		break;
1948 	case CHIP_KAVERI:
1949 		chip_name = "KAVERI";
1950 		new_chip_name = "kaveri";
1951 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1952 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1953 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1954 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1955 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1956 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1957 		num_fw = 7;
1958 		break;
1959 	case CHIP_KABINI:
1960 		chip_name = "KABINI";
1961 		new_chip_name = "kabini";
1962 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1963 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1964 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1965 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1966 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1967 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1968 		num_fw = 6;
1969 		break;
1970 	case CHIP_MULLINS:
1971 		chip_name = "MULLINS";
1972 		new_chip_name = "mullins";
1973 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1974 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1975 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1976 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1977 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1978 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1979 		num_fw = 6;
1980 		break;
1981 	default: BUG();
1982 	}
1983 
1984 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1985 
1986 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1987 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1988 	if (err) {
1989 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1990 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1991 		if (err)
1992 			goto out;
1993 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1994 			printk(KERN_ERR
1995 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1996 			       rdev->pfp_fw->datasize, fw_name);
1997 			err = -EINVAL;
1998 			goto out;
1999 		}
2000 	} else {
2001 		err = radeon_ucode_validate(rdev->pfp_fw);
2002 		if (err) {
2003 			printk(KERN_ERR
2004 			       "cik_fw: validation failed for firmware \"%s\"\n",
2005 			       fw_name);
2006 			goto out;
2007 		} else {
2008 			new_fw++;
2009 		}
2010 	}
2011 
2012 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2013 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2014 	if (err) {
2015 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2016 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2017 		if (err)
2018 			goto out;
2019 		if (rdev->me_fw->datasize != me_req_size) {
2020 			printk(KERN_ERR
2021 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2022 			       rdev->me_fw->datasize, fw_name);
2023 			err = -EINVAL;
2024 		}
2025 	} else {
2026 		err = radeon_ucode_validate(rdev->me_fw);
2027 		if (err) {
2028 			printk(KERN_ERR
2029 			       "cik_fw: validation failed for firmware \"%s\"\n",
2030 			       fw_name);
2031 			goto out;
2032 		} else {
2033 			new_fw++;
2034 		}
2035 	}
2036 
2037 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2038 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2039 	if (err) {
2040 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2041 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2042 		if (err)
2043 			goto out;
2044 		if (rdev->ce_fw->datasize != ce_req_size) {
2045 			printk(KERN_ERR
2046 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2047 			       rdev->ce_fw->datasize, fw_name);
2048 			err = -EINVAL;
2049 		}
2050 	} else {
2051 		err = radeon_ucode_validate(rdev->ce_fw);
2052 		if (err) {
2053 			printk(KERN_ERR
2054 			       "cik_fw: validation failed for firmware \"%s\"\n",
2055 			       fw_name);
2056 			goto out;
2057 		} else {
2058 			new_fw++;
2059 		}
2060 	}
2061 
2062 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2063 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2064 	if (err) {
2065 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2066 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2067 		if (err)
2068 			goto out;
2069 		if (rdev->mec_fw->datasize != mec_req_size) {
2070 			printk(KERN_ERR
2071 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072 			       rdev->mec_fw->datasize, fw_name);
2073 			err = -EINVAL;
2074 		}
2075 	} else {
2076 		err = radeon_ucode_validate(rdev->mec_fw);
2077 		if (err) {
2078 			printk(KERN_ERR
2079 			       "cik_fw: validation failed for firmware \"%s\"\n",
2080 			       fw_name);
2081 			goto out;
2082 		} else {
2083 			new_fw++;
2084 		}
2085 	}
2086 
2087 	if (rdev->family == CHIP_KAVERI) {
2088 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2089 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2090 		if (err) {
2091 			goto out;
2092 		} else {
2093 			err = radeon_ucode_validate(rdev->mec2_fw);
2094 			if (err) {
2095 				goto out;
2096 			} else {
2097 				new_fw++;
2098 			}
2099 		}
2100 	}
2101 
2102 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2103 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2104 	if (err) {
2105 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2106 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2107 		if (err)
2108 			goto out;
2109 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2110 			printk(KERN_ERR
2111 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2112 			       rdev->rlc_fw->datasize, fw_name);
2113 			err = -EINVAL;
2114 		}
2115 	} else {
2116 		err = radeon_ucode_validate(rdev->rlc_fw);
2117 		if (err) {
2118 			printk(KERN_ERR
2119 			       "cik_fw: validation failed for firmware \"%s\"\n",
2120 			       fw_name);
2121 			goto out;
2122 		} else {
2123 			new_fw++;
2124 		}
2125 	}
2126 
2127 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2128 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2129 	if (err) {
2130 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2131 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2132 		if (err)
2133 			goto out;
2134 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2135 			printk(KERN_ERR
2136 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2137 			       rdev->sdma_fw->datasize, fw_name);
2138 			err = -EINVAL;
2139 		}
2140 	} else {
2141 		err = radeon_ucode_validate(rdev->sdma_fw);
2142 		if (err) {
2143 			printk(KERN_ERR
2144 			       "cik_fw: validation failed for firmware \"%s\"\n",
2145 			       fw_name);
2146 			goto out;
2147 		} else {
2148 			new_fw++;
2149 		}
2150 	}
2151 
2152 	/* No SMC, MC ucode on APUs */
2153 	if (!(rdev->flags & RADEON_IS_IGP)) {
2154 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2155 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2156 		if (err) {
2157 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2158 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2159 			if (err) {
2160 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2161 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2162 				if (err)
2163 					goto out;
2164 			}
2165 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2166 			    (rdev->mc_fw->datasize != mc2_req_size)){
2167 				printk(KERN_ERR
2168 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2169 				       rdev->mc_fw->datasize, fw_name);
2170 				err = -EINVAL;
2171 			}
2172 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2173 		} else {
2174 			err = radeon_ucode_validate(rdev->mc_fw);
2175 			if (err) {
2176 				printk(KERN_ERR
2177 				       "cik_fw: validation failed for firmware \"%s\"\n",
2178 				       fw_name);
2179 				goto out;
2180 			} else {
2181 				new_fw++;
2182 			}
2183 		}
2184 
2185 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2186 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2187 		if (err) {
2188 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2189 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2190 			if (err) {
2191 				printk(KERN_ERR
2192 				       "smc: error loading firmware \"%s\"\n",
2193 				       fw_name);
2194 				release_firmware(rdev->smc_fw);
2195 				rdev->smc_fw = NULL;
2196 				err = 0;
2197 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2198 				printk(KERN_ERR
2199 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2200 				       rdev->smc_fw->datasize, fw_name);
2201 				err = -EINVAL;
2202 			}
2203 		} else {
2204 			err = radeon_ucode_validate(rdev->smc_fw);
2205 			if (err) {
2206 				printk(KERN_ERR
2207 				       "cik_fw: validation failed for firmware \"%s\"\n",
2208 				       fw_name);
2209 				goto out;
2210 			} else {
2211 				new_fw++;
2212 			}
2213 		}
2214 	}
2215 
2216 	if (new_fw == 0) {
2217 		rdev->new_fw = false;
2218 	} else if (new_fw < num_fw) {
2219 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2220 		err = -EINVAL;
2221 	} else {
2222 		rdev->new_fw = true;
2223 	}
2224 
2225 out:
2226 	if (err) {
2227 		if (err != -EINVAL)
2228 			printk(KERN_ERR
2229 			       "cik_cp: Failed to load firmware \"%s\"\n",
2230 			       fw_name);
2231 		release_firmware(rdev->pfp_fw);
2232 		rdev->pfp_fw = NULL;
2233 		release_firmware(rdev->me_fw);
2234 		rdev->me_fw = NULL;
2235 		release_firmware(rdev->ce_fw);
2236 		rdev->ce_fw = NULL;
2237 		release_firmware(rdev->mec_fw);
2238 		rdev->mec_fw = NULL;
2239 		release_firmware(rdev->mec2_fw);
2240 		rdev->mec2_fw = NULL;
2241 		release_firmware(rdev->rlc_fw);
2242 		rdev->rlc_fw = NULL;
2243 		release_firmware(rdev->sdma_fw);
2244 		rdev->sdma_fw = NULL;
2245 		release_firmware(rdev->mc_fw);
2246 		rdev->mc_fw = NULL;
2247 		release_firmware(rdev->smc_fw);
2248 		rdev->smc_fw = NULL;
2249 	}
2250 	return err;
2251 }
2252 
2253 /**
2254  * cik_fini_microcode - drop the firmwares image references
2255  *
2256  * @rdev: radeon_device pointer
2257  *
2258  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2259  * Called at driver shutdown.
2260  */
2261 static void cik_fini_microcode(struct radeon_device *rdev)
2262 {
2263 	release_firmware(rdev->pfp_fw);
2264 	rdev->pfp_fw = NULL;
2265 	release_firmware(rdev->me_fw);
2266 	rdev->me_fw = NULL;
2267 	release_firmware(rdev->ce_fw);
2268 	rdev->ce_fw = NULL;
2269 	release_firmware(rdev->mec_fw);
2270 	rdev->mec_fw = NULL;
2271 	release_firmware(rdev->mec2_fw);
2272 	rdev->mec2_fw = NULL;
2273 	release_firmware(rdev->rlc_fw);
2274 	rdev->rlc_fw = NULL;
2275 	release_firmware(rdev->sdma_fw);
2276 	rdev->sdma_fw = NULL;
2277 	release_firmware(rdev->mc_fw);
2278 	rdev->mc_fw = NULL;
2279 	release_firmware(rdev->smc_fw);
2280 	rdev->smc_fw = NULL;
2281 }
2282 
2283 /*
2284  * Core functions
2285  */
2286 /**
2287  * cik_tiling_mode_table_init - init the hw tiling table
2288  *
2289  * @rdev: radeon_device pointer
2290  *
2291  * Starting with SI, the tiling setup is done globally in a
2292  * set of 32 tiling modes.  Rather than selecting each set of
2293  * parameters per surface as on older asics, we just select
2294  * which index in the tiling table we want to use, and the
2295  * surface uses those parameters (CIK).
2296  */
2297 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2298 {
2299 	const u32 num_tile_mode_states = 32;
2300 	const u32 num_secondary_tile_mode_states = 16;
2301 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2302 	u32 num_pipe_configs;
2303 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2304 		rdev->config.cik.max_shader_engines;
2305 
2306 	switch (rdev->config.cik.mem_row_size_in_kb) {
2307 	case 1:
2308 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2309 		break;
2310 	case 2:
2311 	default:
2312 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2313 		break;
2314 	case 4:
2315 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2316 		break;
2317 	}
2318 
2319 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2320 	if (num_pipe_configs > 8)
2321 		num_pipe_configs = 16;
2322 
2323 	if (num_pipe_configs == 16) {
2324 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2325 			switch (reg_offset) {
2326 			case 0:
2327 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2331 				break;
2332 			case 1:
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2337 				break;
2338 			case 2:
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2343 				break;
2344 			case 3:
2345 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2349 				break;
2350 			case 4:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 						 TILE_SPLIT(split_equal_to_row_size));
2355 				break;
2356 			case 5:
2357 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360 				break;
2361 			case 6:
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2363 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2364 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2366 				break;
2367 			case 7:
2368 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2370 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 						 TILE_SPLIT(split_equal_to_row_size));
2372 				break;
2373 			case 8:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2375 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2376 				break;
2377 			case 9:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2379 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2381 				break;
2382 			case 10:
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387 				break;
2388 			case 11:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2392 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 				break;
2394 			case 12:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 13:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2402 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2404 				break;
2405 			case 14:
2406 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 				break;
2411 			case 16:
2412 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 				break;
2417 			case 17:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 				break;
2423 			case 27:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2427 				break;
2428 			case 28:
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 				break;
2434 			case 29:
2435 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2438 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2439 				break;
2440 			case 30:
2441 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2442 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2443 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445 				break;
2446 			default:
2447 				gb_tile_moden = 0;
2448 				break;
2449 			}
2450 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2451 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2452 		}
2453 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2454 			switch (reg_offset) {
2455 			case 0:
2456 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 						 NUM_BANKS(ADDR_SURF_16_BANK));
2460 				break;
2461 			case 1:
2462 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 						 NUM_BANKS(ADDR_SURF_16_BANK));
2466 				break;
2467 			case 2:
2468 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 						 NUM_BANKS(ADDR_SURF_16_BANK));
2472 				break;
2473 			case 3:
2474 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 						 NUM_BANKS(ADDR_SURF_16_BANK));
2478 				break;
2479 			case 4:
2480 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 						 NUM_BANKS(ADDR_SURF_8_BANK));
2484 				break;
2485 			case 5:
2486 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 						 NUM_BANKS(ADDR_SURF_4_BANK));
2490 				break;
2491 			case 6:
2492 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 						 NUM_BANKS(ADDR_SURF_2_BANK));
2496 				break;
2497 			case 8:
2498 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2500 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501 						 NUM_BANKS(ADDR_SURF_16_BANK));
2502 				break;
2503 			case 9:
2504 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2507 						 NUM_BANKS(ADDR_SURF_16_BANK));
2508 				break;
2509 			case 10:
2510 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513 						 NUM_BANKS(ADDR_SURF_16_BANK));
2514 				break;
2515 			case 11:
2516 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519 						 NUM_BANKS(ADDR_SURF_8_BANK));
2520 				break;
2521 			case 12:
2522 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525 						 NUM_BANKS(ADDR_SURF_4_BANK));
2526 				break;
2527 			case 13:
2528 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2531 						 NUM_BANKS(ADDR_SURF_2_BANK));
2532 				break;
2533 			case 14:
2534 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2537 						 NUM_BANKS(ADDR_SURF_2_BANK));
2538 				break;
2539 			default:
2540 				gb_tile_moden = 0;
2541 				break;
2542 			}
2543 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2544 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545 		}
2546 	} else if (num_pipe_configs == 8) {
2547 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2548 			switch (reg_offset) {
2549 			case 0:
2550 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2554 				break;
2555 			case 1:
2556 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2560 				break;
2561 			case 2:
2562 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2566 				break;
2567 			case 3:
2568 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2569 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2570 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2572 				break;
2573 			case 4:
2574 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2576 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 						 TILE_SPLIT(split_equal_to_row_size));
2578 				break;
2579 			case 5:
2580 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2581 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 				break;
2584 			case 6:
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2586 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2589 				break;
2590 			case 7:
2591 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2592 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2593 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 						 TILE_SPLIT(split_equal_to_row_size));
2595 				break;
2596 			case 8:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2598 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2599 				break;
2600 			case 9:
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2604 				break;
2605 			case 10:
2606 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610 				break;
2611 			case 11:
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2615 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616 				break;
2617 			case 12:
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 				break;
2623 			case 13:
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2625 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2627 				break;
2628 			case 14:
2629 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633 				break;
2634 			case 16:
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639 				break;
2640 			case 17:
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 				break;
2646 			case 27:
2647 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2648 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2650 				break;
2651 			case 28:
2652 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 				break;
2657 			case 29:
2658 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2660 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2662 				break;
2663 			case 30:
2664 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2665 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2666 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2668 				break;
2669 			default:
2670 				gb_tile_moden = 0;
2671 				break;
2672 			}
2673 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2674 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2675 		}
2676 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2677 			switch (reg_offset) {
2678 			case 0:
2679 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2682 						 NUM_BANKS(ADDR_SURF_16_BANK));
2683 				break;
2684 			case 1:
2685 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2687 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688 						 NUM_BANKS(ADDR_SURF_16_BANK));
2689 				break;
2690 			case 2:
2691 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2694 						 NUM_BANKS(ADDR_SURF_16_BANK));
2695 				break;
2696 			case 3:
2697 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2700 						 NUM_BANKS(ADDR_SURF_16_BANK));
2701 				break;
2702 			case 4:
2703 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706 						 NUM_BANKS(ADDR_SURF_8_BANK));
2707 				break;
2708 			case 5:
2709 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2712 						 NUM_BANKS(ADDR_SURF_4_BANK));
2713 				break;
2714 			case 6:
2715 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2717 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2718 						 NUM_BANKS(ADDR_SURF_2_BANK));
2719 				break;
2720 			case 8:
2721 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2723 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 						 NUM_BANKS(ADDR_SURF_16_BANK));
2725 				break;
2726 			case 9:
2727 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 						 NUM_BANKS(ADDR_SURF_16_BANK));
2731 				break;
2732 			case 10:
2733 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2736 						 NUM_BANKS(ADDR_SURF_16_BANK));
2737 				break;
2738 			case 11:
2739 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2742 						 NUM_BANKS(ADDR_SURF_16_BANK));
2743 				break;
2744 			case 12:
2745 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748 						 NUM_BANKS(ADDR_SURF_8_BANK));
2749 				break;
2750 			case 13:
2751 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2754 						 NUM_BANKS(ADDR_SURF_4_BANK));
2755 				break;
2756 			case 14:
2757 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2760 						 NUM_BANKS(ADDR_SURF_2_BANK));
2761 				break;
2762 			default:
2763 				gb_tile_moden = 0;
2764 				break;
2765 			}
2766 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2767 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2768 		}
2769 	} else if (num_pipe_configs == 4) {
2770 		if (num_rbs == 4) {
2771 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2772 				switch (reg_offset) {
2773 				case 0:
2774 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2778 					break;
2779 				case 1:
2780 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2784 					break;
2785 				case 2:
2786 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2790 					break;
2791 				case 3:
2792 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2794 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2796 					break;
2797 				case 4:
2798 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2800 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 							 TILE_SPLIT(split_equal_to_row_size));
2802 					break;
2803 				case 5:
2804 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 					break;
2808 				case 6:
2809 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2810 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2811 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2813 					break;
2814 				case 7:
2815 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2816 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2817 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818 							 TILE_SPLIT(split_equal_to_row_size));
2819 					break;
2820 				case 8:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2822 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2823 					break;
2824 				case 9:
2825 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2828 					break;
2829 				case 10:
2830 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834 					break;
2835 				case 11:
2836 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2837 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2839 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 					break;
2841 				case 12:
2842 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2843 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846 					break;
2847 				case 13:
2848 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2851 					break;
2852 				case 14:
2853 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857 					break;
2858 				case 16:
2859 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2862 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 					break;
2864 				case 17:
2865 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 					break;
2870 				case 27:
2871 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2872 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2874 					break;
2875 				case 28:
2876 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880 					break;
2881 				case 29:
2882 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2883 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2884 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2885 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886 					break;
2887 				case 30:
2888 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2889 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2890 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 					break;
2893 				default:
2894 					gb_tile_moden = 0;
2895 					break;
2896 				}
2897 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2898 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2899 			}
2900 		} else if (num_rbs < 4) {
2901 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2902 				switch (reg_offset) {
2903 				case 0:
2904 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2908 					break;
2909 				case 1:
2910 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2914 					break;
2915 				case 2:
2916 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2920 					break;
2921 				case 3:
2922 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2924 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2926 					break;
2927 				case 4:
2928 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931 							 TILE_SPLIT(split_equal_to_row_size));
2932 					break;
2933 				case 5:
2934 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937 					break;
2938 				case 6:
2939 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2940 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2941 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2942 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2943 					break;
2944 				case 7:
2945 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2946 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2947 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2948 							 TILE_SPLIT(split_equal_to_row_size));
2949 					break;
2950 				case 8:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2952 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2953 					break;
2954 				case 9:
2955 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2958 					break;
2959 				case 10:
2960 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2961 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 					break;
2965 				case 11:
2966 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2968 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970 					break;
2971 				case 12:
2972 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2973 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2976 					break;
2977 				case 13:
2978 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2979 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2981 					break;
2982 				case 14:
2983 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987 					break;
2988 				case 16:
2989 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2991 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2993 					break;
2994 				case 17:
2995 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2999 					break;
3000 				case 27:
3001 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3002 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3004 					break;
3005 				case 28:
3006 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010 					break;
3011 				case 29:
3012 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3015 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 					break;
3017 				case 30:
3018 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3019 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3021 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3022 					break;
3023 				default:
3024 					gb_tile_moden = 0;
3025 					break;
3026 				}
3027 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3028 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3029 			}
3030 		}
3031 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3032 			switch (reg_offset) {
3033 			case 0:
3034 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 						 NUM_BANKS(ADDR_SURF_16_BANK));
3038 				break;
3039 			case 1:
3040 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3042 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043 						 NUM_BANKS(ADDR_SURF_16_BANK));
3044 				break;
3045 			case 2:
3046 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049 						 NUM_BANKS(ADDR_SURF_16_BANK));
3050 				break;
3051 			case 3:
3052 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055 						 NUM_BANKS(ADDR_SURF_16_BANK));
3056 				break;
3057 			case 4:
3058 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3061 						 NUM_BANKS(ADDR_SURF_16_BANK));
3062 				break;
3063 			case 5:
3064 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3066 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3067 						 NUM_BANKS(ADDR_SURF_8_BANK));
3068 				break;
3069 			case 6:
3070 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073 						 NUM_BANKS(ADDR_SURF_4_BANK));
3074 				break;
3075 			case 8:
3076 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3077 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3078 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079 						 NUM_BANKS(ADDR_SURF_16_BANK));
3080 				break;
3081 			case 9:
3082 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3083 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085 						 NUM_BANKS(ADDR_SURF_16_BANK));
3086 				break;
3087 			case 10:
3088 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3090 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3091 						 NUM_BANKS(ADDR_SURF_16_BANK));
3092 				break;
3093 			case 11:
3094 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3096 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3097 						 NUM_BANKS(ADDR_SURF_16_BANK));
3098 				break;
3099 			case 12:
3100 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3103 						 NUM_BANKS(ADDR_SURF_16_BANK));
3104 				break;
3105 			case 13:
3106 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3109 						 NUM_BANKS(ADDR_SURF_8_BANK));
3110 				break;
3111 			case 14:
3112 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 						 NUM_BANKS(ADDR_SURF_4_BANK));
3116 				break;
3117 			default:
3118 				gb_tile_moden = 0;
3119 				break;
3120 			}
3121 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3122 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3123 		}
3124 	} else if (num_pipe_configs == 2) {
3125 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3126 			switch (reg_offset) {
3127 			case 0:
3128 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130 						 PIPE_CONFIG(ADDR_SURF_P2) |
3131 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3132 				break;
3133 			case 1:
3134 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136 						 PIPE_CONFIG(ADDR_SURF_P2) |
3137 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3138 				break;
3139 			case 2:
3140 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142 						 PIPE_CONFIG(ADDR_SURF_P2) |
3143 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3144 				break;
3145 			case 3:
3146 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3148 						 PIPE_CONFIG(ADDR_SURF_P2) |
3149 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3150 				break;
3151 			case 4:
3152 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3153 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3154 						 PIPE_CONFIG(ADDR_SURF_P2) |
3155 						 TILE_SPLIT(split_equal_to_row_size));
3156 				break;
3157 			case 5:
3158 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3159 						 PIPE_CONFIG(ADDR_SURF_P2) |
3160 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3161 				break;
3162 			case 6:
3163 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3164 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3165 						 PIPE_CONFIG(ADDR_SURF_P2) |
3166 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3167 				break;
3168 			case 7:
3169 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3170 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3171 						 PIPE_CONFIG(ADDR_SURF_P2) |
3172 						 TILE_SPLIT(split_equal_to_row_size));
3173 				break;
3174 			case 8:
3175 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3176 						PIPE_CONFIG(ADDR_SURF_P2);
3177 				break;
3178 			case 9:
3179 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3181 						 PIPE_CONFIG(ADDR_SURF_P2));
3182 				break;
3183 			case 10:
3184 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186 						 PIPE_CONFIG(ADDR_SURF_P2) |
3187 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188 				break;
3189 			case 11:
3190 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3191 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3192 						 PIPE_CONFIG(ADDR_SURF_P2) |
3193 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3194 				break;
3195 			case 12:
3196 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3197 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198 						 PIPE_CONFIG(ADDR_SURF_P2) |
3199 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 				break;
3201 			case 13:
3202 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3203 						 PIPE_CONFIG(ADDR_SURF_P2) |
3204 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3205 				break;
3206 			case 14:
3207 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209 						 PIPE_CONFIG(ADDR_SURF_P2) |
3210 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 				break;
3212 			case 16:
3213 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3214 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3215 						 PIPE_CONFIG(ADDR_SURF_P2) |
3216 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3217 				break;
3218 			case 17:
3219 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3221 						 PIPE_CONFIG(ADDR_SURF_P2) |
3222 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 				break;
3224 			case 27:
3225 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227 						 PIPE_CONFIG(ADDR_SURF_P2));
3228 				break;
3229 			case 28:
3230 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232 						 PIPE_CONFIG(ADDR_SURF_P2) |
3233 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234 				break;
3235 			case 29:
3236 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3238 						 PIPE_CONFIG(ADDR_SURF_P2) |
3239 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240 				break;
3241 			case 30:
3242 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3243 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3244 						 PIPE_CONFIG(ADDR_SURF_P2) |
3245 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3246 				break;
3247 			default:
3248 				gb_tile_moden = 0;
3249 				break;
3250 			}
3251 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3252 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3253 		}
3254 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3255 			switch (reg_offset) {
3256 			case 0:
3257 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3258 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3259 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260 						 NUM_BANKS(ADDR_SURF_16_BANK));
3261 				break;
3262 			case 1:
3263 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3264 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3265 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266 						 NUM_BANKS(ADDR_SURF_16_BANK));
3267 				break;
3268 			case 2:
3269 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3271 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272 						 NUM_BANKS(ADDR_SURF_16_BANK));
3273 				break;
3274 			case 3:
3275 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278 						 NUM_BANKS(ADDR_SURF_16_BANK));
3279 				break;
3280 			case 4:
3281 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3284 						 NUM_BANKS(ADDR_SURF_16_BANK));
3285 				break;
3286 			case 5:
3287 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290 						 NUM_BANKS(ADDR_SURF_16_BANK));
3291 				break;
3292 			case 6:
3293 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3295 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3296 						 NUM_BANKS(ADDR_SURF_8_BANK));
3297 				break;
3298 			case 8:
3299 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3300 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3301 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302 						 NUM_BANKS(ADDR_SURF_16_BANK));
3303 				break;
3304 			case 9:
3305 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3306 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3307 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308 						 NUM_BANKS(ADDR_SURF_16_BANK));
3309 				break;
3310 			case 10:
3311 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3312 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3313 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314 						 NUM_BANKS(ADDR_SURF_16_BANK));
3315 				break;
3316 			case 11:
3317 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3318 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3319 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 						 NUM_BANKS(ADDR_SURF_16_BANK));
3321 				break;
3322 			case 12:
3323 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3325 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326 						 NUM_BANKS(ADDR_SURF_16_BANK));
3327 				break;
3328 			case 13:
3329 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3332 						 NUM_BANKS(ADDR_SURF_16_BANK));
3333 				break;
3334 			case 14:
3335 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338 						 NUM_BANKS(ADDR_SURF_8_BANK));
3339 				break;
3340 			default:
3341 				gb_tile_moden = 0;
3342 				break;
3343 			}
3344 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3345 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3346 		}
3347 	} else
3348 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3349 }
3350 
3351 /**
3352  * cik_select_se_sh - select which SE, SH to address
3353  *
3354  * @rdev: radeon_device pointer
3355  * @se_num: shader engine to address
3356  * @sh_num: sh block to address
3357  *
3358  * Select which SE, SH combinations to address. Certain
3359  * registers are instanced per SE or SH.  0xffffffff means
3360  * broadcast to all SEs or SHs (CIK).
3361  */
3362 static void cik_select_se_sh(struct radeon_device *rdev,
3363 			     u32 se_num, u32 sh_num)
3364 {
3365 	u32 data = INSTANCE_BROADCAST_WRITES;
3366 
3367 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3368 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3369 	else if (se_num == 0xffffffff)
3370 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3371 	else if (sh_num == 0xffffffff)
3372 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3373 	else
3374 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3375 	WREG32(GRBM_GFX_INDEX, data);
3376 }
3377 
3378 /**
3379  * cik_create_bitmask - create a bitmask
3380  *
3381  * @bit_width: length of the mask
3382  *
3383  * create a variable length bit mask (CIK).
3384  * Returns the bitmask.
3385  */
3386 static u32 cik_create_bitmask(u32 bit_width)
3387 {
3388 	u32 i, mask = 0;
3389 
3390 	for (i = 0; i < bit_width; i++) {
3391 		mask <<= 1;
3392 		mask |= 1;
3393 	}
3394 	return mask;
3395 }
3396 
3397 /**
3398  * cik_get_rb_disabled - computes the mask of disabled RBs
3399  *
3400  * @rdev: radeon_device pointer
3401  * @max_rb_num: max RBs (render backends) for the asic
3402  * @se_num: number of SEs (shader engines) for the asic
3403  * @sh_per_se: number of SH blocks per SE for the asic
3404  *
3405  * Calculates the bitmask of disabled RBs (CIK).
3406  * Returns the disabled RB bitmask.
3407  */
3408 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3409 			      u32 max_rb_num_per_se,
3410 			      u32 sh_per_se)
3411 {
3412 	u32 data, mask;
3413 
3414 	data = RREG32(CC_RB_BACKEND_DISABLE);
3415 	if (data & 1)
3416 		data &= BACKEND_DISABLE_MASK;
3417 	else
3418 		data = 0;
3419 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3420 
3421 	data >>= BACKEND_DISABLE_SHIFT;
3422 
3423 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3424 
3425 	return data & mask;
3426 }
3427 
3428 /**
3429  * cik_setup_rb - setup the RBs on the asic
3430  *
3431  * @rdev: radeon_device pointer
3432  * @se_num: number of SEs (shader engines) for the asic
3433  * @sh_per_se: number of SH blocks per SE for the asic
3434  * @max_rb_num: max RBs (render backends) for the asic
3435  *
3436  * Configures per-SE/SH RB registers (CIK).
3437  */
3438 static void cik_setup_rb(struct radeon_device *rdev,
3439 			 u32 se_num, u32 sh_per_se,
3440 			 u32 max_rb_num_per_se)
3441 {
3442 	int i, j;
3443 	u32 data, mask;
3444 	u32 disabled_rbs = 0;
3445 	u32 enabled_rbs = 0;
3446 
3447 	mutex_lock(&rdev->grbm_idx_mutex);
3448 	for (i = 0; i < se_num; i++) {
3449 		for (j = 0; j < sh_per_se; j++) {
3450 			cik_select_se_sh(rdev, i, j);
3451 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3452 			if (rdev->family == CHIP_HAWAII)
3453 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3454 			else
3455 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3456 		}
3457 	}
3458 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3459 	mutex_unlock(&rdev->grbm_idx_mutex);
3460 
3461 	mask = 1;
3462 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3463 		if (!(disabled_rbs & mask))
3464 			enabled_rbs |= mask;
3465 		mask <<= 1;
3466 	}
3467 
3468 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3469 
3470 	mutex_lock(&rdev->grbm_idx_mutex);
3471 	for (i = 0; i < se_num; i++) {
3472 		cik_select_se_sh(rdev, i, 0xffffffff);
3473 		data = 0;
3474 		for (j = 0; j < sh_per_se; j++) {
3475 			switch (enabled_rbs & 3) {
3476 			case 0:
3477 				if (j == 0)
3478 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3479 				else
3480 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3481 				break;
3482 			case 1:
3483 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3484 				break;
3485 			case 2:
3486 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3487 				break;
3488 			case 3:
3489 			default:
3490 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3491 				break;
3492 			}
3493 			enabled_rbs >>= 2;
3494 		}
3495 		WREG32(PA_SC_RASTER_CONFIG, data);
3496 	}
3497 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3498 	mutex_unlock(&rdev->grbm_idx_mutex);
3499 }
3500 
3501 /**
3502  * cik_gpu_init - setup the 3D engine
3503  *
3504  * @rdev: radeon_device pointer
3505  *
3506  * Configures the 3D engine and tiling configuration
3507  * registers so that the 3D engine is usable.
3508  */
3509 static void cik_gpu_init(struct radeon_device *rdev)
3510 {
3511 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3512 	u32 mc_shared_chmap, mc_arb_ramcfg;
3513 	u32 hdp_host_path_cntl;
3514 	u32 tmp;
3515 	int i, j;
3516 
3517 	switch (rdev->family) {
3518 	case CHIP_BONAIRE:
3519 		rdev->config.cik.max_shader_engines = 2;
3520 		rdev->config.cik.max_tile_pipes = 4;
3521 		rdev->config.cik.max_cu_per_sh = 7;
3522 		rdev->config.cik.max_sh_per_se = 1;
3523 		rdev->config.cik.max_backends_per_se = 2;
3524 		rdev->config.cik.max_texture_channel_caches = 4;
3525 		rdev->config.cik.max_gprs = 256;
3526 		rdev->config.cik.max_gs_threads = 32;
3527 		rdev->config.cik.max_hw_contexts = 8;
3528 
3529 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3530 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3531 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3532 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3533 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3534 		break;
3535 	case CHIP_HAWAII:
3536 		rdev->config.cik.max_shader_engines = 4;
3537 		rdev->config.cik.max_tile_pipes = 16;
3538 		rdev->config.cik.max_cu_per_sh = 11;
3539 		rdev->config.cik.max_sh_per_se = 1;
3540 		rdev->config.cik.max_backends_per_se = 4;
3541 		rdev->config.cik.max_texture_channel_caches = 16;
3542 		rdev->config.cik.max_gprs = 256;
3543 		rdev->config.cik.max_gs_threads = 32;
3544 		rdev->config.cik.max_hw_contexts = 8;
3545 
3546 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3547 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3548 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3549 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3550 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3551 		break;
3552 	case CHIP_KAVERI:
3553 		rdev->config.cik.max_shader_engines = 1;
3554 		rdev->config.cik.max_tile_pipes = 4;
3555 		if ((rdev->pdev->device == 0x1304) ||
3556 		    (rdev->pdev->device == 0x1305) ||
3557 		    (rdev->pdev->device == 0x130C) ||
3558 		    (rdev->pdev->device == 0x130F) ||
3559 		    (rdev->pdev->device == 0x1310) ||
3560 		    (rdev->pdev->device == 0x1311) ||
3561 		    (rdev->pdev->device == 0x131C)) {
3562 			rdev->config.cik.max_cu_per_sh = 8;
3563 			rdev->config.cik.max_backends_per_se = 2;
3564 		} else if ((rdev->pdev->device == 0x1309) ||
3565 			   (rdev->pdev->device == 0x130A) ||
3566 			   (rdev->pdev->device == 0x130D) ||
3567 			   (rdev->pdev->device == 0x1313) ||
3568 			   (rdev->pdev->device == 0x131D)) {
3569 			rdev->config.cik.max_cu_per_sh = 6;
3570 			rdev->config.cik.max_backends_per_se = 2;
3571 		} else if ((rdev->pdev->device == 0x1306) ||
3572 			   (rdev->pdev->device == 0x1307) ||
3573 			   (rdev->pdev->device == 0x130B) ||
3574 			   (rdev->pdev->device == 0x130E) ||
3575 			   (rdev->pdev->device == 0x1315) ||
3576 			   (rdev->pdev->device == 0x1318) ||
3577 			   (rdev->pdev->device == 0x131B)) {
3578 			rdev->config.cik.max_cu_per_sh = 4;
3579 			rdev->config.cik.max_backends_per_se = 1;
3580 		} else {
3581 			rdev->config.cik.max_cu_per_sh = 3;
3582 			rdev->config.cik.max_backends_per_se = 1;
3583 		}
3584 		rdev->config.cik.max_sh_per_se = 1;
3585 		rdev->config.cik.max_texture_channel_caches = 4;
3586 		rdev->config.cik.max_gprs = 256;
3587 		rdev->config.cik.max_gs_threads = 16;
3588 		rdev->config.cik.max_hw_contexts = 8;
3589 
3590 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3591 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3592 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3593 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3594 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3595 		break;
3596 	case CHIP_KABINI:
3597 	case CHIP_MULLINS:
3598 	default:
3599 		rdev->config.cik.max_shader_engines = 1;
3600 		rdev->config.cik.max_tile_pipes = 2;
3601 		rdev->config.cik.max_cu_per_sh = 2;
3602 		rdev->config.cik.max_sh_per_se = 1;
3603 		rdev->config.cik.max_backends_per_se = 1;
3604 		rdev->config.cik.max_texture_channel_caches = 2;
3605 		rdev->config.cik.max_gprs = 256;
3606 		rdev->config.cik.max_gs_threads = 16;
3607 		rdev->config.cik.max_hw_contexts = 8;
3608 
3609 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3610 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3611 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3612 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3613 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3614 		break;
3615 	}
3616 
3617 	/* Initialize HDP */
3618 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3619 		WREG32((0x2c14 + j), 0x00000000);
3620 		WREG32((0x2c18 + j), 0x00000000);
3621 		WREG32((0x2c1c + j), 0x00000000);
3622 		WREG32((0x2c20 + j), 0x00000000);
3623 		WREG32((0x2c24 + j), 0x00000000);
3624 	}
3625 
3626 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3627 
3628 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3629 
3630 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3631 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3632 
3633 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3634 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3635 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3636 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3637 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3638 		rdev->config.cik.mem_row_size_in_kb = 4;
3639 	/* XXX use MC settings? */
3640 	rdev->config.cik.shader_engine_tile_size = 32;
3641 	rdev->config.cik.num_gpus = 1;
3642 	rdev->config.cik.multi_gpu_tile_size = 64;
3643 
3644 	/* fix up row size */
3645 	gb_addr_config &= ~ROW_SIZE_MASK;
3646 	switch (rdev->config.cik.mem_row_size_in_kb) {
3647 	case 1:
3648 	default:
3649 		gb_addr_config |= ROW_SIZE(0);
3650 		break;
3651 	case 2:
3652 		gb_addr_config |= ROW_SIZE(1);
3653 		break;
3654 	case 4:
3655 		gb_addr_config |= ROW_SIZE(2);
3656 		break;
3657 	}
3658 
3659 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3660 	 * not have bank info, so create a custom tiling dword.
3661 	 * bits 3:0   num_pipes
3662 	 * bits 7:4   num_banks
3663 	 * bits 11:8  group_size
3664 	 * bits 15:12 row_size
3665 	 */
3666 	rdev->config.cik.tile_config = 0;
3667 	switch (rdev->config.cik.num_tile_pipes) {
3668 	case 1:
3669 		rdev->config.cik.tile_config |= (0 << 0);
3670 		break;
3671 	case 2:
3672 		rdev->config.cik.tile_config |= (1 << 0);
3673 		break;
3674 	case 4:
3675 		rdev->config.cik.tile_config |= (2 << 0);
3676 		break;
3677 	case 8:
3678 	default:
3679 		/* XXX what about 12? */
3680 		rdev->config.cik.tile_config |= (3 << 0);
3681 		break;
3682 	}
3683 	rdev->config.cik.tile_config |=
3684 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3685 	rdev->config.cik.tile_config |=
3686 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3687 	rdev->config.cik.tile_config |=
3688 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3689 
3690 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3691 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3692 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3693 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3694 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3695 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3696 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3697 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3698 
3699 	cik_tiling_mode_table_init(rdev);
3700 
3701 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3702 		     rdev->config.cik.max_sh_per_se,
3703 		     rdev->config.cik.max_backends_per_se);
3704 
3705 	rdev->config.cik.active_cus = 0;
3706 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3707 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3708 			rdev->config.cik.active_cus +=
3709 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3710 		}
3711 	}
3712 
3713 	/* set HW defaults for 3D engine */
3714 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3715 
3716 	mutex_lock(&rdev->grbm_idx_mutex);
3717 	/*
3718 	 * making sure that the following register writes will be broadcasted
3719 	 * to all the shaders
3720 	 */
3721 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3722 	WREG32(SX_DEBUG_1, 0x20);
3723 
3724 	WREG32(TA_CNTL_AUX, 0x00010000);
3725 
3726 	tmp = RREG32(SPI_CONFIG_CNTL);
3727 	tmp |= 0x03000000;
3728 	WREG32(SPI_CONFIG_CNTL, tmp);
3729 
3730 	WREG32(SQ_CONFIG, 1);
3731 
3732 	WREG32(DB_DEBUG, 0);
3733 
3734 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3735 	tmp |= 0x00000400;
3736 	WREG32(DB_DEBUG2, tmp);
3737 
3738 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3739 	tmp |= 0x00020200;
3740 	WREG32(DB_DEBUG3, tmp);
3741 
3742 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3743 	tmp |= 0x00018208;
3744 	WREG32(CB_HW_CONTROL, tmp);
3745 
3746 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3747 
3748 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3749 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3750 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3751 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3752 
3753 	WREG32(VGT_NUM_INSTANCES, 1);
3754 
3755 	WREG32(CP_PERFMON_CNTL, 0);
3756 
3757 	WREG32(SQ_CONFIG, 0);
3758 
3759 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3760 					  FORCE_EOV_MAX_REZ_CNT(255)));
3761 
3762 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3763 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3764 
3765 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3766 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3767 
3768 	tmp = RREG32(HDP_MISC_CNTL);
3769 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3770 	WREG32(HDP_MISC_CNTL, tmp);
3771 
3772 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3773 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3774 
3775 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3776 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3777 	mutex_unlock(&rdev->grbm_idx_mutex);
3778 
3779 	udelay(50);
3780 }
3781 
3782 /*
3783  * GPU scratch registers helpers function.
3784  */
3785 /**
3786  * cik_scratch_init - setup driver info for CP scratch regs
3787  *
3788  * @rdev: radeon_device pointer
3789  *
3790  * Set up the number and offset of the CP scratch registers.
3791  * NOTE: use of CP scratch registers is a legacy inferface and
3792  * is not used by default on newer asics (r6xx+).  On newer asics,
3793  * memory buffers are used for fences rather than scratch regs.
3794  */
3795 static void cik_scratch_init(struct radeon_device *rdev)
3796 {
3797 	int i;
3798 
3799 	rdev->scratch.num_reg = 7;
3800 	rdev->scratch.reg_base = SCRATCH_REG0;
3801 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3802 		rdev->scratch.free[i] = true;
3803 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3804 	}
3805 }
3806 
3807 /**
3808  * cik_ring_test - basic gfx ring test
3809  *
3810  * @rdev: radeon_device pointer
3811  * @ring: radeon_ring structure holding ring information
3812  *
3813  * Allocate a scratch register and write to it using the gfx ring (CIK).
3814  * Provides a basic gfx ring test to verify that the ring is working.
3815  * Used by cik_cp_gfx_resume();
3816  * Returns 0 on success, error on failure.
3817  */
3818 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3819 {
3820 	uint32_t scratch;
3821 	uint32_t tmp = 0;
3822 	unsigned i;
3823 	int r;
3824 
3825 	r = radeon_scratch_get(rdev, &scratch);
3826 	if (r) {
3827 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3828 		return r;
3829 	}
3830 	WREG32(scratch, 0xCAFEDEAD);
3831 	r = radeon_ring_lock(rdev, ring, 3);
3832 	if (r) {
3833 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3834 		radeon_scratch_free(rdev, scratch);
3835 		return r;
3836 	}
3837 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3838 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3839 	radeon_ring_write(ring, 0xDEADBEEF);
3840 	radeon_ring_unlock_commit(rdev, ring, false);
3841 
3842 	for (i = 0; i < rdev->usec_timeout; i++) {
3843 		tmp = RREG32(scratch);
3844 		if (tmp == 0xDEADBEEF)
3845 			break;
3846 		DRM_UDELAY(1);
3847 	}
3848 	if (i < rdev->usec_timeout) {
3849 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3850 	} else {
3851 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3852 			  ring->idx, scratch, tmp);
3853 		r = -EINVAL;
3854 	}
3855 	radeon_scratch_free(rdev, scratch);
3856 	return r;
3857 }
3858 
3859 /**
3860  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3861  *
3862  * @rdev: radeon_device pointer
3863  * @ridx: radeon ring index
3864  *
3865  * Emits an hdp flush on the cp.
3866  */
3867 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3868 				       int ridx)
3869 {
3870 	struct radeon_ring *ring = &rdev->ring[ridx];
3871 	u32 ref_and_mask;
3872 
3873 	switch (ring->idx) {
3874 	case CAYMAN_RING_TYPE_CP1_INDEX:
3875 	case CAYMAN_RING_TYPE_CP2_INDEX:
3876 	default:
3877 		switch (ring->me) {
3878 		case 0:
3879 			ref_and_mask = CP2 << ring->pipe;
3880 			break;
3881 		case 1:
3882 			ref_and_mask = CP6 << ring->pipe;
3883 			break;
3884 		default:
3885 			return;
3886 		}
3887 		break;
3888 	case RADEON_RING_TYPE_GFX_INDEX:
3889 		ref_and_mask = CP0;
3890 		break;
3891 	}
3892 
3893 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3894 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3895 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3896 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3897 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3898 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3899 	radeon_ring_write(ring, ref_and_mask);
3900 	radeon_ring_write(ring, ref_and_mask);
3901 	radeon_ring_write(ring, 0x20); /* poll interval */
3902 }
3903 
3904 /**
3905  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3906  *
3907  * @rdev: radeon_device pointer
3908  * @fence: radeon fence object
3909  *
3910  * Emits a fence sequnce number on the gfx ring and flushes
3911  * GPU caches.
3912  */
3913 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3914 			     struct radeon_fence *fence)
3915 {
3916 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3917 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3918 
3919 	/* Workaround for cache flush problems. First send a dummy EOP
3920 	 * event down the pipe with seq one below.
3921 	 */
3922 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3923 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3924 				 EOP_TC_ACTION_EN |
3925 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3926 				 EVENT_INDEX(5)));
3927 	radeon_ring_write(ring, addr & 0xfffffffc);
3928 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3929 				DATA_SEL(1) | INT_SEL(0));
3930 	radeon_ring_write(ring, fence->seq - 1);
3931 	radeon_ring_write(ring, 0);
3932 
3933 	/* Then send the real EOP event down the pipe. */
3934 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3935 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3936 				 EOP_TC_ACTION_EN |
3937 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3938 				 EVENT_INDEX(5)));
3939 	radeon_ring_write(ring, addr & 0xfffffffc);
3940 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3941 	radeon_ring_write(ring, fence->seq);
3942 	radeon_ring_write(ring, 0);
3943 }
3944 
3945 /**
3946  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3947  *
3948  * @rdev: radeon_device pointer
3949  * @fence: radeon fence object
3950  *
3951  * Emits a fence sequnce number on the compute ring and flushes
3952  * GPU caches.
3953  */
3954 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3955 				 struct radeon_fence *fence)
3956 {
3957 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3958 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3959 
3960 	/* RELEASE_MEM - flush caches, send int */
3961 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3962 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3963 				 EOP_TC_ACTION_EN |
3964 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3965 				 EVENT_INDEX(5)));
3966 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3967 	radeon_ring_write(ring, addr & 0xfffffffc);
3968 	radeon_ring_write(ring, upper_32_bits(addr));
3969 	radeon_ring_write(ring, fence->seq);
3970 	radeon_ring_write(ring, 0);
3971 }
3972 
3973 /**
3974  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3975  *
3976  * @rdev: radeon_device pointer
3977  * @ring: radeon ring buffer object
3978  * @semaphore: radeon semaphore object
3979  * @emit_wait: Is this a sempahore wait?
3980  *
3981  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3982  * from running ahead of semaphore waits.
3983  */
3984 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3985 			     struct radeon_ring *ring,
3986 			     struct radeon_semaphore *semaphore,
3987 			     bool emit_wait)
3988 {
3989 	uint64_t addr = semaphore->gpu_addr;
3990 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3991 
3992 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3993 	radeon_ring_write(ring, lower_32_bits(addr));
3994 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3995 
3996 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3997 		/* Prevent the PFP from running ahead of the semaphore wait */
3998 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3999 		radeon_ring_write(ring, 0x0);
4000 	}
4001 
4002 	return true;
4003 }
4004 
4005 /**
4006  * cik_copy_cpdma - copy pages using the CP DMA engine
4007  *
4008  * @rdev: radeon_device pointer
4009  * @src_offset: src GPU address
4010  * @dst_offset: dst GPU address
4011  * @num_gpu_pages: number of GPU pages to xfer
4012  * @resv: reservation object to sync to
4013  *
4014  * Copy GPU paging using the CP DMA engine (CIK+).
4015  * Used by the radeon ttm implementation to move pages if
4016  * registered as the asic copy callback.
4017  */
4018 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4019 				    uint64_t src_offset, uint64_t dst_offset,
4020 				    unsigned num_gpu_pages,
4021 				    struct reservation_object *resv)
4022 {
4023 	struct radeon_fence *fence;
4024 	struct radeon_sync sync;
4025 	int ring_index = rdev->asic->copy.blit_ring_index;
4026 	struct radeon_ring *ring = &rdev->ring[ring_index];
4027 	u32 size_in_bytes, cur_size_in_bytes, control;
4028 	int i, num_loops;
4029 	int r = 0;
4030 
4031 	radeon_sync_create(&sync);
4032 
4033 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4034 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4035 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4036 	if (r) {
4037 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4038 		radeon_sync_free(rdev, &sync, NULL);
4039 		return ERR_PTR(r);
4040 	}
4041 
4042 	radeon_sync_resv(rdev, &sync, resv, false);
4043 	radeon_sync_rings(rdev, &sync, ring->idx);
4044 
4045 	for (i = 0; i < num_loops; i++) {
4046 		cur_size_in_bytes = size_in_bytes;
4047 		if (cur_size_in_bytes > 0x1fffff)
4048 			cur_size_in_bytes = 0x1fffff;
4049 		size_in_bytes -= cur_size_in_bytes;
4050 		control = 0;
4051 		if (size_in_bytes == 0)
4052 			control |= PACKET3_DMA_DATA_CP_SYNC;
4053 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4054 		radeon_ring_write(ring, control);
4055 		radeon_ring_write(ring, lower_32_bits(src_offset));
4056 		radeon_ring_write(ring, upper_32_bits(src_offset));
4057 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4058 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4059 		radeon_ring_write(ring, cur_size_in_bytes);
4060 		src_offset += cur_size_in_bytes;
4061 		dst_offset += cur_size_in_bytes;
4062 	}
4063 
4064 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4065 	if (r) {
4066 		radeon_ring_unlock_undo(rdev, ring);
4067 		radeon_sync_free(rdev, &sync, NULL);
4068 		return ERR_PTR(r);
4069 	}
4070 
4071 	radeon_ring_unlock_commit(rdev, ring, false);
4072 	radeon_sync_free(rdev, &sync, fence);
4073 
4074 	return fence;
4075 }
4076 
4077 /*
4078  * IB stuff
4079  */
4080 /**
4081  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4082  *
4083  * @rdev: radeon_device pointer
4084  * @ib: radeon indirect buffer object
4085  *
4086  * Emits an DE (drawing engine) or CE (constant engine) IB
4087  * on the gfx ring.  IBs are usually generated by userspace
4088  * acceleration drivers and submitted to the kernel for
4089  * sheduling on the ring.  This function schedules the IB
4090  * on the gfx ring for execution by the GPU.
4091  */
4092 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4093 {
4094 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4095 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4096 	u32 header, control = INDIRECT_BUFFER_VALID;
4097 
4098 	if (ib->is_const_ib) {
4099 		/* set switch buffer packet before const IB */
4100 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4101 		radeon_ring_write(ring, 0);
4102 
4103 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4104 	} else {
4105 		u32 next_rptr;
4106 		if (ring->rptr_save_reg) {
4107 			next_rptr = ring->wptr + 3 + 4;
4108 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4109 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4110 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4111 			radeon_ring_write(ring, next_rptr);
4112 		} else if (rdev->wb.enabled) {
4113 			next_rptr = ring->wptr + 5 + 4;
4114 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4115 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4116 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4117 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4118 			radeon_ring_write(ring, next_rptr);
4119 		}
4120 
4121 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4122 	}
4123 
4124 	control |= ib->length_dw | (vm_id << 24);
4125 
4126 	radeon_ring_write(ring, header);
4127 	radeon_ring_write(ring,
4128 #ifdef __BIG_ENDIAN
4129 			  (2 << 0) |
4130 #endif
4131 			  (ib->gpu_addr & 0xFFFFFFFC));
4132 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4133 	radeon_ring_write(ring, control);
4134 }
4135 
4136 /**
4137  * cik_ib_test - basic gfx ring IB test
4138  *
4139  * @rdev: radeon_device pointer
4140  * @ring: radeon_ring structure holding ring information
4141  *
4142  * Allocate an IB and execute it on the gfx ring (CIK).
4143  * Provides a basic gfx ring test to verify that IBs are working.
4144  * Returns 0 on success, error on failure.
4145  */
4146 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4147 {
4148 	struct radeon_ib ib;
4149 	uint32_t scratch;
4150 	uint32_t tmp = 0;
4151 	unsigned i;
4152 	int r;
4153 
4154 	r = radeon_scratch_get(rdev, &scratch);
4155 	if (r) {
4156 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4157 		return r;
4158 	}
4159 	WREG32(scratch, 0xCAFEDEAD);
4160 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4161 	if (r) {
4162 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4163 		radeon_scratch_free(rdev, scratch);
4164 		return r;
4165 	}
4166 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4167 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4168 	ib.ptr[2] = 0xDEADBEEF;
4169 	ib.length_dw = 3;
4170 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4171 	if (r) {
4172 		radeon_scratch_free(rdev, scratch);
4173 		radeon_ib_free(rdev, &ib);
4174 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4175 		return r;
4176 	}
4177 	r = radeon_fence_wait(ib.fence, false);
4178 	if (r) {
4179 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4180 		radeon_scratch_free(rdev, scratch);
4181 		radeon_ib_free(rdev, &ib);
4182 		return r;
4183 	}
4184 	for (i = 0; i < rdev->usec_timeout; i++) {
4185 		tmp = RREG32(scratch);
4186 		if (tmp == 0xDEADBEEF)
4187 			break;
4188 		DRM_UDELAY(1);
4189 	}
4190 	if (i < rdev->usec_timeout) {
4191 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4192 	} else {
4193 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4194 			  scratch, tmp);
4195 		r = -EINVAL;
4196 	}
4197 	radeon_scratch_free(rdev, scratch);
4198 	radeon_ib_free(rdev, &ib);
4199 	return r;
4200 }
4201 
4202 /*
4203  * CP.
4204  * On CIK, gfx and compute now have independant command processors.
4205  *
4206  * GFX
4207  * Gfx consists of a single ring and can process both gfx jobs and
4208  * compute jobs.  The gfx CP consists of three microengines (ME):
4209  * PFP - Pre-Fetch Parser
4210  * ME - Micro Engine
4211  * CE - Constant Engine
4212  * The PFP and ME make up what is considered the Drawing Engine (DE).
4213  * The CE is an asynchronous engine used for updating buffer desciptors
4214  * used by the DE so that they can be loaded into cache in parallel
4215  * while the DE is processing state update packets.
4216  *
4217  * Compute
4218  * The compute CP consists of two microengines (ME):
4219  * MEC1 - Compute MicroEngine 1
4220  * MEC2 - Compute MicroEngine 2
4221  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4222  * The queues are exposed to userspace and are programmed directly
4223  * by the compute runtime.
4224  */
4225 /**
4226  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4227  *
4228  * @rdev: radeon_device pointer
4229  * @enable: enable or disable the MEs
4230  *
4231  * Halts or unhalts the gfx MEs.
4232  */
4233 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4234 {
4235 	if (enable)
4236 		WREG32(CP_ME_CNTL, 0);
4237 	else {
4238 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4239 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4240 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4241 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4242 	}
4243 	udelay(50);
4244 }
4245 
4246 /**
4247  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4248  *
4249  * @rdev: radeon_device pointer
4250  *
4251  * Loads the gfx PFP, ME, and CE ucode.
4252  * Returns 0 for success, -EINVAL if the ucode is not available.
4253  */
4254 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4255 {
4256 	int i;
4257 
4258 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4259 		return -EINVAL;
4260 
4261 	cik_cp_gfx_enable(rdev, false);
4262 
4263 	if (rdev->new_fw) {
4264 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4265 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4266 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4267 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4268 		const struct gfx_firmware_header_v1_0 *me_hdr =
4269 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4270 		const __le32 *fw_data;
4271 		u32 fw_size;
4272 
4273 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4274 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4275 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4276 
4277 		/* PFP */
4278 		fw_data = (const __le32 *)
4279 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4280 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4281 		WREG32(CP_PFP_UCODE_ADDR, 0);
4282 		for (i = 0; i < fw_size; i++)
4283 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4284 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4285 
4286 		/* CE */
4287 		fw_data = (const __le32 *)
4288 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4289 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4290 		WREG32(CP_CE_UCODE_ADDR, 0);
4291 		for (i = 0; i < fw_size; i++)
4292 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4293 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4294 
4295 		/* ME */
4296 		fw_data = (const __be32 *)
4297 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4298 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4299 		WREG32(CP_ME_RAM_WADDR, 0);
4300 		for (i = 0; i < fw_size; i++)
4301 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4302 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4303 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4304 	} else {
4305 		const __be32 *fw_data;
4306 
4307 		/* PFP */
4308 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4309 		WREG32(CP_PFP_UCODE_ADDR, 0);
4310 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4311 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4312 		WREG32(CP_PFP_UCODE_ADDR, 0);
4313 
4314 		/* CE */
4315 		fw_data = (const __be32 *)rdev->ce_fw->data;
4316 		WREG32(CP_CE_UCODE_ADDR, 0);
4317 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4318 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4319 		WREG32(CP_CE_UCODE_ADDR, 0);
4320 
4321 		/* ME */
4322 		fw_data = (const __be32 *)rdev->me_fw->data;
4323 		WREG32(CP_ME_RAM_WADDR, 0);
4324 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4325 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4326 		WREG32(CP_ME_RAM_WADDR, 0);
4327 	}
4328 
4329 	return 0;
4330 }
4331 
4332 /**
4333  * cik_cp_gfx_start - start the gfx ring
4334  *
4335  * @rdev: radeon_device pointer
4336  *
4337  * Enables the ring and loads the clear state context and other
4338  * packets required to init the ring.
4339  * Returns 0 for success, error for failure.
4340  */
4341 static int cik_cp_gfx_start(struct radeon_device *rdev)
4342 {
4343 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4344 	int r, i;
4345 
4346 	/* init the CP */
4347 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4348 	WREG32(CP_ENDIAN_SWAP, 0);
4349 	WREG32(CP_DEVICE_ID, 1);
4350 
4351 	cik_cp_gfx_enable(rdev, true);
4352 
4353 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4354 	if (r) {
4355 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4356 		return r;
4357 	}
4358 
4359 	/* init the CE partitions.  CE only used for gfx on CIK */
4360 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4361 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4362 	radeon_ring_write(ring, 0x8000);
4363 	radeon_ring_write(ring, 0x8000);
4364 
4365 	/* setup clear context state */
4366 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4367 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4368 
4369 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4370 	radeon_ring_write(ring, 0x80000000);
4371 	radeon_ring_write(ring, 0x80000000);
4372 
4373 	for (i = 0; i < cik_default_size; i++)
4374 		radeon_ring_write(ring, cik_default_state[i]);
4375 
4376 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4377 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4378 
4379 	/* set clear context state */
4380 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4381 	radeon_ring_write(ring, 0);
4382 
4383 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4384 	radeon_ring_write(ring, 0x00000316);
4385 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4386 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4387 
4388 	radeon_ring_unlock_commit(rdev, ring, false);
4389 
4390 	return 0;
4391 }
4392 
4393 /**
4394  * cik_cp_gfx_fini - stop the gfx ring
4395  *
4396  * @rdev: radeon_device pointer
4397  *
4398  * Stop the gfx ring and tear down the driver ring
4399  * info.
4400  */
4401 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4402 {
4403 	cik_cp_gfx_enable(rdev, false);
4404 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4405 }
4406 
4407 /**
4408  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4409  *
4410  * @rdev: radeon_device pointer
4411  *
4412  * Program the location and size of the gfx ring buffer
4413  * and test it to make sure it's working.
4414  * Returns 0 for success, error for failure.
4415  */
4416 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4417 {
4418 	struct radeon_ring *ring;
4419 	u32 tmp;
4420 	u32 rb_bufsz;
4421 	u64 rb_addr;
4422 	int r;
4423 
4424 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4425 	if (rdev->family != CHIP_HAWAII)
4426 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4427 
4428 	/* Set the write pointer delay */
4429 	WREG32(CP_RB_WPTR_DELAY, 0);
4430 
4431 	/* set the RB to use vmid 0 */
4432 	WREG32(CP_RB_VMID, 0);
4433 
4434 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4435 
4436 	/* ring 0 - compute and gfx */
4437 	/* Set ring buffer size */
4438 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4439 	rb_bufsz = order_base_2(ring->ring_size / 8);
4440 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4441 #ifdef __BIG_ENDIAN
4442 	tmp |= BUF_SWAP_32BIT;
4443 #endif
4444 	WREG32(CP_RB0_CNTL, tmp);
4445 
4446 	/* Initialize the ring buffer's read and write pointers */
4447 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4448 	ring->wptr = 0;
4449 	WREG32(CP_RB0_WPTR, ring->wptr);
4450 
4451 	/* set the wb address wether it's enabled or not */
4452 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4453 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4454 
4455 	/* scratch register shadowing is no longer supported */
4456 	WREG32(SCRATCH_UMSK, 0);
4457 
4458 	if (!rdev->wb.enabled)
4459 		tmp |= RB_NO_UPDATE;
4460 
4461 	mdelay(1);
4462 	WREG32(CP_RB0_CNTL, tmp);
4463 
4464 	rb_addr = ring->gpu_addr >> 8;
4465 	WREG32(CP_RB0_BASE, rb_addr);
4466 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4467 
4468 	/* start the ring */
4469 	cik_cp_gfx_start(rdev);
4470 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4471 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4472 	if (r) {
4473 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4474 		return r;
4475 	}
4476 
4477 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4478 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4479 
4480 	return 0;
4481 }
4482 
4483 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4484 		     struct radeon_ring *ring)
4485 {
4486 	u32 rptr;
4487 
4488 	if (rdev->wb.enabled)
4489 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4490 	else
4491 		rptr = RREG32(CP_RB0_RPTR);
4492 
4493 	return rptr;
4494 }
4495 
4496 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4497 		     struct radeon_ring *ring)
4498 {
4499 	u32 wptr;
4500 
4501 	wptr = RREG32(CP_RB0_WPTR);
4502 
4503 	return wptr;
4504 }
4505 
4506 void cik_gfx_set_wptr(struct radeon_device *rdev,
4507 		      struct radeon_ring *ring)
4508 {
4509 	WREG32(CP_RB0_WPTR, ring->wptr);
4510 	(void)RREG32(CP_RB0_WPTR);
4511 }
4512 
4513 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4514 			 struct radeon_ring *ring)
4515 {
4516 	u32 rptr;
4517 
4518 	if (rdev->wb.enabled) {
4519 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4520 	} else {
4521 		mutex_lock(&rdev->srbm_mutex);
4522 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4523 		rptr = RREG32(CP_HQD_PQ_RPTR);
4524 		cik_srbm_select(rdev, 0, 0, 0, 0);
4525 		mutex_unlock(&rdev->srbm_mutex);
4526 	}
4527 
4528 	return rptr;
4529 }
4530 
4531 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4532 			 struct radeon_ring *ring)
4533 {
4534 	u32 wptr;
4535 
4536 	if (rdev->wb.enabled) {
4537 		/* XXX check if swapping is necessary on BE */
4538 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4539 	} else {
4540 		mutex_lock(&rdev->srbm_mutex);
4541 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4542 		wptr = RREG32(CP_HQD_PQ_WPTR);
4543 		cik_srbm_select(rdev, 0, 0, 0, 0);
4544 		mutex_unlock(&rdev->srbm_mutex);
4545 	}
4546 
4547 	return wptr;
4548 }
4549 
4550 void cik_compute_set_wptr(struct radeon_device *rdev,
4551 			  struct radeon_ring *ring)
4552 {
4553 	/* XXX check if swapping is necessary on BE */
4554 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4555 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4556 }
4557 
4558 /**
4559  * cik_cp_compute_enable - enable/disable the compute CP MEs
4560  *
4561  * @rdev: radeon_device pointer
4562  * @enable: enable or disable the MEs
4563  *
4564  * Halts or unhalts the compute MEs.
4565  */
4566 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4567 {
4568 	if (enable)
4569 		WREG32(CP_MEC_CNTL, 0);
4570 	else {
4571 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4572 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4573 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4574 	}
4575 	udelay(50);
4576 }
4577 
4578 /**
4579  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4580  *
4581  * @rdev: radeon_device pointer
4582  *
4583  * Loads the compute MEC1&2 ucode.
4584  * Returns 0 for success, -EINVAL if the ucode is not available.
4585  */
4586 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4587 {
4588 	int i;
4589 
4590 	if (!rdev->mec_fw)
4591 		return -EINVAL;
4592 
4593 	cik_cp_compute_enable(rdev, false);
4594 
4595 	if (rdev->new_fw) {
4596 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4597 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4598 		const __le32 *fw_data;
4599 		u32 fw_size;
4600 
4601 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4602 
4603 		/* MEC1 */
4604 		fw_data = (const __le32 *)
4605 			((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4606 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4607 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4608 		for (i = 0; i < fw_size; i++)
4609 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4610 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4611 
4612 		/* MEC2 */
4613 		if (rdev->family == CHIP_KAVERI) {
4614 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4615 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4616 
4617 			fw_data = (const __le32 *)
4618 				((const char *)rdev->mec2_fw->data +
4619 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4620 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4621 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4622 			for (i = 0; i < fw_size; i++)
4623 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4624 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4625 		}
4626 	} else {
4627 		const __be32 *fw_data;
4628 
4629 		/* MEC1 */
4630 		fw_data = (const __be32 *)rdev->mec_fw->data;
4631 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4632 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4633 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4634 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4635 
4636 		if (rdev->family == CHIP_KAVERI) {
4637 			/* MEC2 */
4638 			fw_data = (const __be32 *)rdev->mec_fw->data;
4639 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4640 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4641 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4642 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4643 		}
4644 	}
4645 
4646 	return 0;
4647 }
4648 
4649 /**
4650  * cik_cp_compute_start - start the compute queues
4651  *
4652  * @rdev: radeon_device pointer
4653  *
4654  * Enable the compute queues.
4655  * Returns 0 for success, error for failure.
4656  */
4657 static int cik_cp_compute_start(struct radeon_device *rdev)
4658 {
4659 	cik_cp_compute_enable(rdev, true);
4660 
4661 	return 0;
4662 }
4663 
4664 /**
4665  * cik_cp_compute_fini - stop the compute queues
4666  *
4667  * @rdev: radeon_device pointer
4668  *
4669  * Stop the compute queues and tear down the driver queue
4670  * info.
4671  */
4672 static void cik_cp_compute_fini(struct radeon_device *rdev)
4673 {
4674 	int i, idx, r;
4675 
4676 	cik_cp_compute_enable(rdev, false);
4677 
4678 	for (i = 0; i < 2; i++) {
4679 		if (i == 0)
4680 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4681 		else
4682 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4683 
4684 		if (rdev->ring[idx].mqd_obj) {
4685 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4686 			if (unlikely(r != 0))
4687 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4688 
4689 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4690 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4691 
4692 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4693 			rdev->ring[idx].mqd_obj = NULL;
4694 		}
4695 	}
4696 }
4697 
4698 static void cik_mec_fini(struct radeon_device *rdev)
4699 {
4700 	int r;
4701 
4702 	if (rdev->mec.hpd_eop_obj) {
4703 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4704 		if (unlikely(r != 0))
4705 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4706 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4707 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4708 
4709 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4710 		rdev->mec.hpd_eop_obj = NULL;
4711 	}
4712 }
4713 
4714 #define MEC_HPD_SIZE 2048
4715 
4716 static int cik_mec_init(struct radeon_device *rdev)
4717 {
4718 	int r;
4719 	u32 *hpd;
4720 
4721 	/*
4722 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4723 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4724 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4725 	 * be handled by KFD
4726 	 */
4727 	rdev->mec.num_mec = 1;
4728 	rdev->mec.num_pipe = 1;
4729 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4730 
4731 	if (rdev->mec.hpd_eop_obj == NULL) {
4732 		r = radeon_bo_create(rdev,
4733 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4734 				     PAGE_SIZE, true,
4735 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4736 				     &rdev->mec.hpd_eop_obj);
4737 		if (r) {
4738 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4739 			return r;
4740 		}
4741 	}
4742 
4743 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4744 	if (unlikely(r != 0)) {
4745 		cik_mec_fini(rdev);
4746 		return r;
4747 	}
4748 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4749 			  &rdev->mec.hpd_eop_gpu_addr);
4750 	if (r) {
4751 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4752 		cik_mec_fini(rdev);
4753 		return r;
4754 	}
4755 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4756 	if (r) {
4757 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4758 		cik_mec_fini(rdev);
4759 		return r;
4760 	}
4761 
4762 	/* clear memory.  Not sure if this is required or not */
4763 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4764 
4765 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4766 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4767 
4768 	return 0;
4769 }
4770 
4771 struct hqd_registers
4772 {
4773 	u32 cp_mqd_base_addr;
4774 	u32 cp_mqd_base_addr_hi;
4775 	u32 cp_hqd_active;
4776 	u32 cp_hqd_vmid;
4777 	u32 cp_hqd_persistent_state;
4778 	u32 cp_hqd_pipe_priority;
4779 	u32 cp_hqd_queue_priority;
4780 	u32 cp_hqd_quantum;
4781 	u32 cp_hqd_pq_base;
4782 	u32 cp_hqd_pq_base_hi;
4783 	u32 cp_hqd_pq_rptr;
4784 	u32 cp_hqd_pq_rptr_report_addr;
4785 	u32 cp_hqd_pq_rptr_report_addr_hi;
4786 	u32 cp_hqd_pq_wptr_poll_addr;
4787 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4788 	u32 cp_hqd_pq_doorbell_control;
4789 	u32 cp_hqd_pq_wptr;
4790 	u32 cp_hqd_pq_control;
4791 	u32 cp_hqd_ib_base_addr;
4792 	u32 cp_hqd_ib_base_addr_hi;
4793 	u32 cp_hqd_ib_rptr;
4794 	u32 cp_hqd_ib_control;
4795 	u32 cp_hqd_iq_timer;
4796 	u32 cp_hqd_iq_rptr;
4797 	u32 cp_hqd_dequeue_request;
4798 	u32 cp_hqd_dma_offload;
4799 	u32 cp_hqd_sema_cmd;
4800 	u32 cp_hqd_msg_type;
4801 	u32 cp_hqd_atomic0_preop_lo;
4802 	u32 cp_hqd_atomic0_preop_hi;
4803 	u32 cp_hqd_atomic1_preop_lo;
4804 	u32 cp_hqd_atomic1_preop_hi;
4805 	u32 cp_hqd_hq_scheduler0;
4806 	u32 cp_hqd_hq_scheduler1;
4807 	u32 cp_mqd_control;
4808 };
4809 
4810 struct bonaire_mqd
4811 {
4812 	u32 header;
4813 	u32 dispatch_initiator;
4814 	u32 dimensions[3];
4815 	u32 start_idx[3];
4816 	u32 num_threads[3];
4817 	u32 pipeline_stat_enable;
4818 	u32 perf_counter_enable;
4819 	u32 pgm[2];
4820 	u32 tba[2];
4821 	u32 tma[2];
4822 	u32 pgm_rsrc[2];
4823 	u32 vmid;
4824 	u32 resource_limits;
4825 	u32 static_thread_mgmt01[2];
4826 	u32 tmp_ring_size;
4827 	u32 static_thread_mgmt23[2];
4828 	u32 restart[3];
4829 	u32 thread_trace_enable;
4830 	u32 reserved1;
4831 	u32 user_data[16];
4832 	u32 vgtcs_invoke_count[2];
4833 	struct hqd_registers queue_state;
4834 	u32 dequeue_cntr;
4835 	u32 interrupt_queue[64];
4836 };
4837 
4838 /**
4839  * cik_cp_compute_resume - setup the compute queue registers
4840  *
4841  * @rdev: radeon_device pointer
4842  *
4843  * Program the compute queues and test them to make sure they
4844  * are working.
4845  * Returns 0 for success, error for failure.
4846  */
4847 static int cik_cp_compute_resume(struct radeon_device *rdev)
4848 {
4849 	int r, i, j, idx;
4850 	u32 tmp;
4851 	bool use_doorbell = true;
4852 	u64 hqd_gpu_addr;
4853 	u64 mqd_gpu_addr;
4854 	u64 eop_gpu_addr;
4855 	u64 wb_gpu_addr;
4856 	u32 *buf;
4857 	struct bonaire_mqd *mqd;
4858 
4859 	r = cik_cp_compute_start(rdev);
4860 	if (r)
4861 		return r;
4862 
4863 	/* fix up chicken bits */
4864 	tmp = RREG32(CP_CPF_DEBUG);
4865 	tmp |= (1 << 23);
4866 	WREG32(CP_CPF_DEBUG, tmp);
4867 
4868 	/* init the pipes */
4869 	mutex_lock(&rdev->srbm_mutex);
4870 
4871 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4872 
4873 	cik_srbm_select(rdev, 0, 0, 0, 0);
4874 
4875 	/* write the EOP addr */
4876 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4877 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4878 
4879 	/* set the VMID assigned */
4880 	WREG32(CP_HPD_EOP_VMID, 0);
4881 
4882 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4883 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4884 	tmp &= ~EOP_SIZE_MASK;
4885 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4886 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4887 
4888 	mutex_unlock(&rdev->srbm_mutex);
4889 
4890 	/* init the queues.  Just two for now. */
4891 	for (i = 0; i < 2; i++) {
4892 		if (i == 0)
4893 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4894 		else
4895 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4896 
4897 		if (rdev->ring[idx].mqd_obj == NULL) {
4898 			r = radeon_bo_create(rdev,
4899 					     sizeof(struct bonaire_mqd),
4900 					     PAGE_SIZE, true,
4901 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4902 					     NULL, &rdev->ring[idx].mqd_obj);
4903 			if (r) {
4904 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4905 				return r;
4906 			}
4907 		}
4908 
4909 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4910 		if (unlikely(r != 0)) {
4911 			cik_cp_compute_fini(rdev);
4912 			return r;
4913 		}
4914 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4915 				  &mqd_gpu_addr);
4916 		if (r) {
4917 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4918 			cik_cp_compute_fini(rdev);
4919 			return r;
4920 		}
4921 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4922 		if (r) {
4923 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4924 			cik_cp_compute_fini(rdev);
4925 			return r;
4926 		}
4927 
4928 		/* init the mqd struct */
4929 		memset(buf, 0, sizeof(struct bonaire_mqd));
4930 
4931 		mqd = (struct bonaire_mqd *)buf;
4932 		mqd->header = 0xC0310800;
4933 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4934 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4935 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4936 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4937 
4938 		mutex_lock(&rdev->srbm_mutex);
4939 		cik_srbm_select(rdev, rdev->ring[idx].me,
4940 				rdev->ring[idx].pipe,
4941 				rdev->ring[idx].queue, 0);
4942 
4943 		/* disable wptr polling */
4944 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4945 		tmp &= ~WPTR_POLL_EN;
4946 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4947 
4948 		/* enable doorbell? */
4949 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4950 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4951 		if (use_doorbell)
4952 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4953 		else
4954 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4955 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4956 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4957 
4958 		/* disable the queue if it's active */
4959 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4960 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4961 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4962 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4963 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4964 			for (j = 0; j < rdev->usec_timeout; j++) {
4965 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4966 					break;
4967 				udelay(1);
4968 			}
4969 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4970 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4971 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4972 		}
4973 
4974 		/* set the pointer to the MQD */
4975 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4976 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4977 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4978 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4979 		/* set MQD vmid to 0 */
4980 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4981 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4982 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4983 
4984 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4985 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4986 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4987 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4988 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4989 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4990 
4991 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4992 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4993 		mqd->queue_state.cp_hqd_pq_control &=
4994 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4995 
4996 		mqd->queue_state.cp_hqd_pq_control |=
4997 			order_base_2(rdev->ring[idx].ring_size / 8);
4998 		mqd->queue_state.cp_hqd_pq_control |=
4999 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5000 #ifdef __BIG_ENDIAN
5001 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5002 #endif
5003 		mqd->queue_state.cp_hqd_pq_control &=
5004 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5005 		mqd->queue_state.cp_hqd_pq_control |=
5006 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5007 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5008 
5009 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5010 		if (i == 0)
5011 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5012 		else
5013 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5014 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5015 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5016 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5017 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5018 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5019 
5020 		/* set the wb address wether it's enabled or not */
5021 		if (i == 0)
5022 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5023 		else
5024 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5025 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5026 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5027 			upper_32_bits(wb_gpu_addr) & 0xffff;
5028 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5029 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5030 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5031 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5032 
5033 		/* enable the doorbell if requested */
5034 		if (use_doorbell) {
5035 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5036 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5037 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5038 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5039 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5040 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5041 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5042 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5043 
5044 		} else {
5045 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5046 		}
5047 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5048 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5049 
5050 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5051 		rdev->ring[idx].wptr = 0;
5052 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5053 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5054 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5055 
5056 		/* set the vmid for the queue */
5057 		mqd->queue_state.cp_hqd_vmid = 0;
5058 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5059 
5060 		/* activate the queue */
5061 		mqd->queue_state.cp_hqd_active = 1;
5062 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5063 
5064 		cik_srbm_select(rdev, 0, 0, 0, 0);
5065 		mutex_unlock(&rdev->srbm_mutex);
5066 
5067 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5068 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5069 
5070 		rdev->ring[idx].ready = true;
5071 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5072 		if (r)
5073 			rdev->ring[idx].ready = false;
5074 	}
5075 
5076 	return 0;
5077 }
5078 
5079 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5080 {
5081 	cik_cp_gfx_enable(rdev, enable);
5082 	cik_cp_compute_enable(rdev, enable);
5083 }
5084 
5085 static int cik_cp_load_microcode(struct radeon_device *rdev)
5086 {
5087 	int r;
5088 
5089 	r = cik_cp_gfx_load_microcode(rdev);
5090 	if (r)
5091 		return r;
5092 	r = cik_cp_compute_load_microcode(rdev);
5093 	if (r)
5094 		return r;
5095 
5096 	return 0;
5097 }
5098 
5099 static void cik_cp_fini(struct radeon_device *rdev)
5100 {
5101 	cik_cp_gfx_fini(rdev);
5102 	cik_cp_compute_fini(rdev);
5103 }
5104 
5105 static int cik_cp_resume(struct radeon_device *rdev)
5106 {
5107 	int r;
5108 
5109 	cik_enable_gui_idle_interrupt(rdev, false);
5110 
5111 	r = cik_cp_load_microcode(rdev);
5112 	if (r)
5113 		return r;
5114 
5115 	r = cik_cp_gfx_resume(rdev);
5116 	if (r)
5117 		return r;
5118 	r = cik_cp_compute_resume(rdev);
5119 	if (r)
5120 		return r;
5121 
5122 	cik_enable_gui_idle_interrupt(rdev, true);
5123 
5124 	return 0;
5125 }
5126 
5127 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5128 {
5129 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5130 		RREG32(GRBM_STATUS));
5131 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5132 		RREG32(GRBM_STATUS2));
5133 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5134 		RREG32(GRBM_STATUS_SE0));
5135 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5136 		RREG32(GRBM_STATUS_SE1));
5137 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5138 		RREG32(GRBM_STATUS_SE2));
5139 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5140 		RREG32(GRBM_STATUS_SE3));
5141 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5142 		RREG32(SRBM_STATUS));
5143 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5144 		RREG32(SRBM_STATUS2));
5145 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5146 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5147 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5148 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5149 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5150 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5151 		 RREG32(CP_STALLED_STAT1));
5152 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5153 		 RREG32(CP_STALLED_STAT2));
5154 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5155 		 RREG32(CP_STALLED_STAT3));
5156 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5157 		 RREG32(CP_CPF_BUSY_STAT));
5158 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5159 		 RREG32(CP_CPF_STALLED_STAT1));
5160 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5161 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5162 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5163 		 RREG32(CP_CPC_STALLED_STAT1));
5164 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5165 }
5166 
5167 /**
5168  * cik_gpu_check_soft_reset - check which blocks are busy
5169  *
5170  * @rdev: radeon_device pointer
5171  *
5172  * Check which blocks are busy and return the relevant reset
5173  * mask to be used by cik_gpu_soft_reset().
5174  * Returns a mask of the blocks to be reset.
5175  */
5176 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5177 {
5178 	u32 reset_mask = 0;
5179 	u32 tmp;
5180 
5181 	/* GRBM_STATUS */
5182 	tmp = RREG32(GRBM_STATUS);
5183 	if (tmp & (PA_BUSY | SC_BUSY |
5184 		   BCI_BUSY | SX_BUSY |
5185 		   TA_BUSY | VGT_BUSY |
5186 		   DB_BUSY | CB_BUSY |
5187 		   GDS_BUSY | SPI_BUSY |
5188 		   IA_BUSY | IA_BUSY_NO_DMA))
5189 		reset_mask |= RADEON_RESET_GFX;
5190 
5191 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5192 		reset_mask |= RADEON_RESET_CP;
5193 
5194 	/* GRBM_STATUS2 */
5195 	tmp = RREG32(GRBM_STATUS2);
5196 	if (tmp & RLC_BUSY)
5197 		reset_mask |= RADEON_RESET_RLC;
5198 
5199 	/* SDMA0_STATUS_REG */
5200 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5201 	if (!(tmp & SDMA_IDLE))
5202 		reset_mask |= RADEON_RESET_DMA;
5203 
5204 	/* SDMA1_STATUS_REG */
5205 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5206 	if (!(tmp & SDMA_IDLE))
5207 		reset_mask |= RADEON_RESET_DMA1;
5208 
5209 	/* SRBM_STATUS2 */
5210 	tmp = RREG32(SRBM_STATUS2);
5211 	if (tmp & SDMA_BUSY)
5212 		reset_mask |= RADEON_RESET_DMA;
5213 
5214 	if (tmp & SDMA1_BUSY)
5215 		reset_mask |= RADEON_RESET_DMA1;
5216 
5217 	/* SRBM_STATUS */
5218 	tmp = RREG32(SRBM_STATUS);
5219 
5220 	if (tmp & IH_BUSY)
5221 		reset_mask |= RADEON_RESET_IH;
5222 
5223 	if (tmp & SEM_BUSY)
5224 		reset_mask |= RADEON_RESET_SEM;
5225 
5226 	if (tmp & GRBM_RQ_PENDING)
5227 		reset_mask |= RADEON_RESET_GRBM;
5228 
5229 	if (tmp & VMC_BUSY)
5230 		reset_mask |= RADEON_RESET_VMC;
5231 
5232 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5233 		   MCC_BUSY | MCD_BUSY))
5234 		reset_mask |= RADEON_RESET_MC;
5235 
5236 	if (evergreen_is_display_hung(rdev))
5237 		reset_mask |= RADEON_RESET_DISPLAY;
5238 
5239 	/* Skip MC reset as it's mostly likely not hung, just busy */
5240 	if (reset_mask & RADEON_RESET_MC) {
5241 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5242 		reset_mask &= ~RADEON_RESET_MC;
5243 	}
5244 
5245 	return reset_mask;
5246 }
5247 
5248 /**
5249  * cik_gpu_soft_reset - soft reset GPU
5250  *
5251  * @rdev: radeon_device pointer
5252  * @reset_mask: mask of which blocks to reset
5253  *
5254  * Soft reset the blocks specified in @reset_mask.
5255  */
5256 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5257 {
5258 	struct evergreen_mc_save save;
5259 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5260 	u32 tmp;
5261 
5262 	if (reset_mask == 0)
5263 		return;
5264 
5265 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5266 
5267 	cik_print_gpu_status_regs(rdev);
5268 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5269 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5270 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5271 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5272 
5273 	/* disable CG/PG */
5274 	cik_fini_pg(rdev);
5275 	cik_fini_cg(rdev);
5276 
5277 	/* stop the rlc */
5278 	cik_rlc_stop(rdev);
5279 
5280 	/* Disable GFX parsing/prefetching */
5281 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5282 
5283 	/* Disable MEC parsing/prefetching */
5284 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5285 
5286 	if (reset_mask & RADEON_RESET_DMA) {
5287 		/* sdma0 */
5288 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5289 		tmp |= SDMA_HALT;
5290 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5291 	}
5292 	if (reset_mask & RADEON_RESET_DMA1) {
5293 		/* sdma1 */
5294 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5295 		tmp |= SDMA_HALT;
5296 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5297 	}
5298 
5299 	evergreen_mc_stop(rdev, &save);
5300 	if (evergreen_mc_wait_for_idle(rdev)) {
5301 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5302 	}
5303 
5304 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5305 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5306 
5307 	if (reset_mask & RADEON_RESET_CP) {
5308 		grbm_soft_reset |= SOFT_RESET_CP;
5309 
5310 		srbm_soft_reset |= SOFT_RESET_GRBM;
5311 	}
5312 
5313 	if (reset_mask & RADEON_RESET_DMA)
5314 		srbm_soft_reset |= SOFT_RESET_SDMA;
5315 
5316 	if (reset_mask & RADEON_RESET_DMA1)
5317 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5318 
5319 	if (reset_mask & RADEON_RESET_DISPLAY)
5320 		srbm_soft_reset |= SOFT_RESET_DC;
5321 
5322 	if (reset_mask & RADEON_RESET_RLC)
5323 		grbm_soft_reset |= SOFT_RESET_RLC;
5324 
5325 	if (reset_mask & RADEON_RESET_SEM)
5326 		srbm_soft_reset |= SOFT_RESET_SEM;
5327 
5328 	if (reset_mask & RADEON_RESET_IH)
5329 		srbm_soft_reset |= SOFT_RESET_IH;
5330 
5331 	if (reset_mask & RADEON_RESET_GRBM)
5332 		srbm_soft_reset |= SOFT_RESET_GRBM;
5333 
5334 	if (reset_mask & RADEON_RESET_VMC)
5335 		srbm_soft_reset |= SOFT_RESET_VMC;
5336 
5337 	if (!(rdev->flags & RADEON_IS_IGP)) {
5338 		if (reset_mask & RADEON_RESET_MC)
5339 			srbm_soft_reset |= SOFT_RESET_MC;
5340 	}
5341 
5342 	if (grbm_soft_reset) {
5343 		tmp = RREG32(GRBM_SOFT_RESET);
5344 		tmp |= grbm_soft_reset;
5345 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5346 		WREG32(GRBM_SOFT_RESET, tmp);
5347 		tmp = RREG32(GRBM_SOFT_RESET);
5348 
5349 		udelay(50);
5350 
5351 		tmp &= ~grbm_soft_reset;
5352 		WREG32(GRBM_SOFT_RESET, tmp);
5353 		tmp = RREG32(GRBM_SOFT_RESET);
5354 	}
5355 
5356 	if (srbm_soft_reset) {
5357 		tmp = RREG32(SRBM_SOFT_RESET);
5358 		tmp |= srbm_soft_reset;
5359 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5360 		WREG32(SRBM_SOFT_RESET, tmp);
5361 		tmp = RREG32(SRBM_SOFT_RESET);
5362 
5363 		udelay(50);
5364 
5365 		tmp &= ~srbm_soft_reset;
5366 		WREG32(SRBM_SOFT_RESET, tmp);
5367 		tmp = RREG32(SRBM_SOFT_RESET);
5368 	}
5369 
5370 	/* Wait a little for things to settle down */
5371 	udelay(50);
5372 
5373 	evergreen_mc_resume(rdev, &save);
5374 	udelay(50);
5375 
5376 	cik_print_gpu_status_regs(rdev);
5377 }
5378 
5379 struct kv_reset_save_regs {
5380 	u32 gmcon_reng_execute;
5381 	u32 gmcon_misc;
5382 	u32 gmcon_misc3;
5383 };
5384 
5385 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5386 				   struct kv_reset_save_regs *save)
5387 {
5388 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5389 	save->gmcon_misc = RREG32(GMCON_MISC);
5390 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5391 
5392 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5393 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5394 						STCTRL_STUTTER_EN));
5395 }
5396 
5397 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5398 				      struct kv_reset_save_regs *save)
5399 {
5400 	int i;
5401 
5402 	WREG32(GMCON_PGFSM_WRITE, 0);
5403 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5404 
5405 	for (i = 0; i < 5; i++)
5406 		WREG32(GMCON_PGFSM_WRITE, 0);
5407 
5408 	WREG32(GMCON_PGFSM_WRITE, 0);
5409 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5410 
5411 	for (i = 0; i < 5; i++)
5412 		WREG32(GMCON_PGFSM_WRITE, 0);
5413 
5414 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5415 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5416 
5417 	for (i = 0; i < 5; i++)
5418 		WREG32(GMCON_PGFSM_WRITE, 0);
5419 
5420 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5421 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5422 
5423 	for (i = 0; i < 5; i++)
5424 		WREG32(GMCON_PGFSM_WRITE, 0);
5425 
5426 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5427 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5428 
5429 	for (i = 0; i < 5; i++)
5430 		WREG32(GMCON_PGFSM_WRITE, 0);
5431 
5432 	WREG32(GMCON_PGFSM_WRITE, 0);
5433 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5434 
5435 	for (i = 0; i < 5; i++)
5436 		WREG32(GMCON_PGFSM_WRITE, 0);
5437 
5438 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5439 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5440 
5441 	for (i = 0; i < 5; i++)
5442 		WREG32(GMCON_PGFSM_WRITE, 0);
5443 
5444 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5445 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5446 
5447 	for (i = 0; i < 5; i++)
5448 		WREG32(GMCON_PGFSM_WRITE, 0);
5449 
5450 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5451 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5452 
5453 	for (i = 0; i < 5; i++)
5454 		WREG32(GMCON_PGFSM_WRITE, 0);
5455 
5456 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5457 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5458 
5459 	for (i = 0; i < 5; i++)
5460 		WREG32(GMCON_PGFSM_WRITE, 0);
5461 
5462 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5463 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5464 
5465 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5466 	WREG32(GMCON_MISC, save->gmcon_misc);
5467 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5468 }
5469 
5470 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5471 {
5472 	struct evergreen_mc_save save;
5473 	struct kv_reset_save_regs kv_save = { 0 };
5474 	u32 tmp, i;
5475 
5476 	dev_info(rdev->dev, "GPU pci config reset\n");
5477 
5478 	/* disable dpm? */
5479 
5480 	/* disable cg/pg */
5481 	cik_fini_pg(rdev);
5482 	cik_fini_cg(rdev);
5483 
5484 	/* Disable GFX parsing/prefetching */
5485 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5486 
5487 	/* Disable MEC parsing/prefetching */
5488 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5489 
5490 	/* sdma0 */
5491 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5492 	tmp |= SDMA_HALT;
5493 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5494 	/* sdma1 */
5495 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5496 	tmp |= SDMA_HALT;
5497 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5498 	/* XXX other engines? */
5499 
5500 	/* halt the rlc, disable cp internal ints */
5501 	cik_rlc_stop(rdev);
5502 
5503 	udelay(50);
5504 
5505 	/* disable mem access */
5506 	evergreen_mc_stop(rdev, &save);
5507 	if (evergreen_mc_wait_for_idle(rdev)) {
5508 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5509 	}
5510 
5511 	if (rdev->flags & RADEON_IS_IGP)
5512 		kv_save_regs_for_reset(rdev, &kv_save);
5513 
5514 	/* disable BM */
5515 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
5516 	/* reset */
5517 	radeon_pci_config_reset(rdev);
5518 
5519 	udelay(100);
5520 
5521 	/* wait for asic to come out of reset */
5522 	for (i = 0; i < rdev->usec_timeout; i++) {
5523 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5524 			break;
5525 		udelay(1);
5526 	}
5527 
5528 	/* does asic init need to be run first??? */
5529 	if (rdev->flags & RADEON_IS_IGP)
5530 		kv_restore_regs_for_reset(rdev, &kv_save);
5531 }
5532 
5533 /**
5534  * cik_asic_reset - soft reset GPU
5535  *
5536  * @rdev: radeon_device pointer
5537  *
5538  * Look up which blocks are hung and attempt
5539  * to reset them.
5540  * Returns 0 for success.
5541  */
5542 int cik_asic_reset(struct radeon_device *rdev)
5543 {
5544 	u32 reset_mask;
5545 
5546 	reset_mask = cik_gpu_check_soft_reset(rdev);
5547 
5548 	if (reset_mask)
5549 		r600_set_bios_scratch_engine_hung(rdev, true);
5550 
5551 	/* try soft reset */
5552 	cik_gpu_soft_reset(rdev, reset_mask);
5553 
5554 	reset_mask = cik_gpu_check_soft_reset(rdev);
5555 
5556 	/* try pci config reset */
5557 	if (reset_mask && radeon_hard_reset)
5558 		cik_gpu_pci_config_reset(rdev);
5559 
5560 	reset_mask = cik_gpu_check_soft_reset(rdev);
5561 
5562 	if (!reset_mask)
5563 		r600_set_bios_scratch_engine_hung(rdev, false);
5564 
5565 	return 0;
5566 }
5567 
5568 /**
5569  * cik_gfx_is_lockup - check if the 3D engine is locked up
5570  *
5571  * @rdev: radeon_device pointer
5572  * @ring: radeon_ring structure holding ring information
5573  *
5574  * Check if the 3D engine is locked up (CIK).
5575  * Returns true if the engine is locked, false if not.
5576  */
5577 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5578 {
5579 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5580 
5581 	if (!(reset_mask & (RADEON_RESET_GFX |
5582 			    RADEON_RESET_COMPUTE |
5583 			    RADEON_RESET_CP))) {
5584 		radeon_ring_lockup_update(rdev, ring);
5585 		return false;
5586 	}
5587 	return radeon_ring_test_lockup(rdev, ring);
5588 }
5589 
5590 /* MC */
5591 /**
5592  * cik_mc_program - program the GPU memory controller
5593  *
5594  * @rdev: radeon_device pointer
5595  *
5596  * Set the location of vram, gart, and AGP in the GPU's
5597  * physical address space (CIK).
5598  */
5599 static void cik_mc_program(struct radeon_device *rdev)
5600 {
5601 	struct evergreen_mc_save save;
5602 	u32 tmp;
5603 	int i, j;
5604 
5605 	/* Initialize HDP */
5606 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5607 		WREG32((0x2c14 + j), 0x00000000);
5608 		WREG32((0x2c18 + j), 0x00000000);
5609 		WREG32((0x2c1c + j), 0x00000000);
5610 		WREG32((0x2c20 + j), 0x00000000);
5611 		WREG32((0x2c24 + j), 0x00000000);
5612 	}
5613 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5614 
5615 	evergreen_mc_stop(rdev, &save);
5616 	if (radeon_mc_wait_for_idle(rdev)) {
5617 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5618 	}
5619 	/* Lockout access through VGA aperture*/
5620 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5621 	/* Update configuration */
5622 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5623 	       rdev->mc.vram_start >> 12);
5624 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5625 	       rdev->mc.vram_end >> 12);
5626 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5627 	       rdev->vram_scratch.gpu_addr >> 12);
5628 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5629 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5630 	WREG32(MC_VM_FB_LOCATION, tmp);
5631 	/* XXX double check these! */
5632 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5633 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5634 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5635 	WREG32(MC_VM_AGP_BASE, 0);
5636 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5637 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5638 	if (radeon_mc_wait_for_idle(rdev)) {
5639 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5640 	}
5641 	evergreen_mc_resume(rdev, &save);
5642 	/* we need to own VRAM, so turn off the VGA renderer here
5643 	 * to stop it overwriting our objects */
5644 	rv515_vga_render_disable(rdev);
5645 }
5646 
5647 /**
5648  * cik_mc_init - initialize the memory controller driver params
5649  *
5650  * @rdev: radeon_device pointer
5651  *
5652  * Look up the amount of vram, vram width, and decide how to place
5653  * vram and gart within the GPU's physical address space (CIK).
5654  * Returns 0 for success.
5655  */
5656 static int cik_mc_init(struct radeon_device *rdev)
5657 {
5658 	u32 tmp;
5659 	int chansize, numchan;
5660 
5661 	/* Get VRAM informations */
5662 	rdev->mc.vram_is_ddr = true;
5663 	tmp = RREG32(MC_ARB_RAMCFG);
5664 	if (tmp & CHANSIZE_MASK) {
5665 		chansize = 64;
5666 	} else {
5667 		chansize = 32;
5668 	}
5669 	tmp = RREG32(MC_SHARED_CHMAP);
5670 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5671 	case 0:
5672 	default:
5673 		numchan = 1;
5674 		break;
5675 	case 1:
5676 		numchan = 2;
5677 		break;
5678 	case 2:
5679 		numchan = 4;
5680 		break;
5681 	case 3:
5682 		numchan = 8;
5683 		break;
5684 	case 4:
5685 		numchan = 3;
5686 		break;
5687 	case 5:
5688 		numchan = 6;
5689 		break;
5690 	case 6:
5691 		numchan = 10;
5692 		break;
5693 	case 7:
5694 		numchan = 12;
5695 		break;
5696 	case 8:
5697 		numchan = 16;
5698 		break;
5699 	}
5700 	rdev->mc.vram_width = numchan * chansize;
5701 	/* Could aper size report 0 ? */
5702 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5703 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5704 	/* size in MB on si */
5705 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5706 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5707 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5708 	si_vram_gtt_location(rdev, &rdev->mc);
5709 	radeon_update_bandwidth_info(rdev);
5710 
5711 	return 0;
5712 }
5713 
5714 /*
5715  * GART
5716  * VMID 0 is the physical GPU addresses as used by the kernel.
5717  * VMIDs 1-15 are used for userspace clients and are handled
5718  * by the radeon vm/hsa code.
5719  */
5720 /**
5721  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5722  *
5723  * @rdev: radeon_device pointer
5724  *
5725  * Flush the TLB for the VMID 0 page table (CIK).
5726  */
5727 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5728 {
5729 	/* flush hdp cache */
5730 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5731 
5732 	/* bits 0-15 are the VM contexts0-15 */
5733 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5734 }
5735 
5736 /**
5737  * cik_pcie_gart_enable - gart enable
5738  *
5739  * @rdev: radeon_device pointer
5740  *
5741  * This sets up the TLBs, programs the page tables for VMID0,
5742  * sets up the hw for VMIDs 1-15 which are allocated on
5743  * demand, and sets up the global locations for the LDS, GDS,
5744  * and GPUVM for FSA64 clients (CIK).
5745  * Returns 0 for success, errors for failure.
5746  */
5747 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5748 {
5749 	int r, i;
5750 
5751 	if (rdev->gart.robj == NULL) {
5752 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5753 		return -EINVAL;
5754 	}
5755 	r = radeon_gart_table_vram_pin(rdev);
5756 	if (r)
5757 		return r;
5758 	/* Setup TLB control */
5759 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5760 	       (0xA << 7) |
5761 	       ENABLE_L1_TLB |
5762 	       ENABLE_L1_FRAGMENT_PROCESSING |
5763 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5764 	       ENABLE_ADVANCED_DRIVER_MODEL |
5765 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5766 	/* Setup L2 cache */
5767 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5768 	       ENABLE_L2_FRAGMENT_PROCESSING |
5769 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5770 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5771 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5772 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5773 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5774 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5775 	       BANK_SELECT(4) |
5776 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5777 	/* setup context0 */
5778 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5779 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5780 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5781 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5782 			(u32)(rdev->dummy_page.addr >> 12));
5783 	WREG32(VM_CONTEXT0_CNTL2, 0);
5784 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5785 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5786 
5787 	WREG32(0x15D4, 0);
5788 	WREG32(0x15D8, 0);
5789 	WREG32(0x15DC, 0);
5790 
5791 	/* restore context1-15 */
5792 	/* set vm size, must be a multiple of 4 */
5793 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5794 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5795 	for (i = 1; i < 16; i++) {
5796 		if (i < 8)
5797 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5798 			       rdev->vm_manager.saved_table_addr[i]);
5799 		else
5800 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5801 			       rdev->vm_manager.saved_table_addr[i]);
5802 	}
5803 
5804 	/* enable context1-15 */
5805 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5806 	       (u32)(rdev->dummy_page.addr >> 12));
5807 	WREG32(VM_CONTEXT1_CNTL2, 4);
5808 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5809 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5810 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5811 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5812 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5813 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5814 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5815 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5816 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5817 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5818 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5819 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5820 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5821 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5822 
5823 	if (rdev->family == CHIP_KAVERI) {
5824 		u32 tmp = RREG32(CHUB_CONTROL);
5825 		tmp &= ~BYPASS_VM;
5826 		WREG32(CHUB_CONTROL, tmp);
5827 	}
5828 
5829 	/* XXX SH_MEM regs */
5830 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5831 	mutex_lock(&rdev->srbm_mutex);
5832 	for (i = 0; i < 16; i++) {
5833 		cik_srbm_select(rdev, 0, 0, 0, i);
5834 		/* CP and shaders */
5835 		WREG32(SH_MEM_CONFIG, 0);
5836 		WREG32(SH_MEM_APE1_BASE, 1);
5837 		WREG32(SH_MEM_APE1_LIMIT, 0);
5838 		WREG32(SH_MEM_BASES, 0);
5839 		/* SDMA GFX */
5840 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5841 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5842 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5843 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5844 		/* XXX SDMA RLC - todo */
5845 	}
5846 	cik_srbm_select(rdev, 0, 0, 0, 0);
5847 	mutex_unlock(&rdev->srbm_mutex);
5848 
5849 	cik_pcie_gart_tlb_flush(rdev);
5850 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5851 		 (unsigned)(rdev->mc.gtt_size >> 20),
5852 		 (unsigned long long)rdev->gart.table_addr);
5853 	rdev->gart.ready = true;
5854 	return 0;
5855 }
5856 
5857 /**
5858  * cik_pcie_gart_disable - gart disable
5859  *
5860  * @rdev: radeon_device pointer
5861  *
5862  * This disables all VM page table (CIK).
5863  */
5864 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5865 {
5866 	unsigned i;
5867 
5868 	for (i = 1; i < 16; ++i) {
5869 		uint32_t reg;
5870 		if (i < 8)
5871 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5872 		else
5873 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5874 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5875 	}
5876 
5877 	/* Disable all tables */
5878 	WREG32(VM_CONTEXT0_CNTL, 0);
5879 	WREG32(VM_CONTEXT1_CNTL, 0);
5880 	/* Setup TLB control */
5881 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5882 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5883 	/* Setup L2 cache */
5884 	WREG32(VM_L2_CNTL,
5885 	       ENABLE_L2_FRAGMENT_PROCESSING |
5886 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5887 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5888 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5889 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5890 	WREG32(VM_L2_CNTL2, 0);
5891 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5892 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5893 	radeon_gart_table_vram_unpin(rdev);
5894 }
5895 
5896 /**
5897  * cik_pcie_gart_fini - vm fini callback
5898  *
5899  * @rdev: radeon_device pointer
5900  *
5901  * Tears down the driver GART/VM setup (CIK).
5902  */
5903 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5904 {
5905 	cik_pcie_gart_disable(rdev);
5906 	radeon_gart_table_vram_free(rdev);
5907 	radeon_gart_fini(rdev);
5908 }
5909 
5910 /* vm parser */
5911 /**
5912  * cik_ib_parse - vm ib_parse callback
5913  *
5914  * @rdev: radeon_device pointer
5915  * @ib: indirect buffer pointer
5916  *
5917  * CIK uses hw IB checking so this is a nop (CIK).
5918  */
5919 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5920 {
5921 	return 0;
5922 }
5923 
5924 /*
5925  * vm
5926  * VMID 0 is the physical GPU addresses as used by the kernel.
5927  * VMIDs 1-15 are used for userspace clients and are handled
5928  * by the radeon vm/hsa code.
5929  */
5930 /**
5931  * cik_vm_init - cik vm init callback
5932  *
5933  * @rdev: radeon_device pointer
5934  *
5935  * Inits cik specific vm parameters (number of VMs, base of vram for
5936  * VMIDs 1-15) (CIK).
5937  * Returns 0 for success.
5938  */
5939 int cik_vm_init(struct radeon_device *rdev)
5940 {
5941 	/*
5942 	 * number of VMs
5943 	 * VMID 0 is reserved for System
5944 	 * radeon graphics/compute will use VMIDs 1-7
5945 	 * amdkfd will use VMIDs 8-15
5946 	 */
5947 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5948 	/* base offset of vram pages */
5949 	if (rdev->flags & RADEON_IS_IGP) {
5950 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5951 		tmp <<= 22;
5952 		rdev->vm_manager.vram_base_offset = tmp;
5953 	} else
5954 		rdev->vm_manager.vram_base_offset = 0;
5955 
5956 	return 0;
5957 }
5958 
5959 /**
5960  * cik_vm_fini - cik vm fini callback
5961  *
5962  * @rdev: radeon_device pointer
5963  *
5964  * Tear down any asic specific VM setup (CIK).
5965  */
5966 void cik_vm_fini(struct radeon_device *rdev)
5967 {
5968 }
5969 
5970 /**
5971  * cik_vm_decode_fault - print human readable fault info
5972  *
5973  * @rdev: radeon_device pointer
5974  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5975  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5976  *
5977  * Print human readable fault information (CIK).
5978  */
5979 static void cik_vm_decode_fault(struct radeon_device *rdev,
5980 				u32 status, u32 addr, u32 mc_client)
5981 {
5982 	u32 mc_id;
5983 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5984 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5985 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5986 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5987 
5988 	if (rdev->family == CHIP_HAWAII)
5989 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5990 	else
5991 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5992 
5993 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5994 	       protections, vmid, addr,
5995 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5996 	       block, mc_client, mc_id);
5997 }
5998 
5999 /**
6000  * cik_vm_flush - cik vm flush using the CP
6001  *
6002  * @rdev: radeon_device pointer
6003  *
6004  * Update the page table base and flush the VM TLB
6005  * using the CP (CIK).
6006  */
6007 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6008 		  unsigned vm_id, uint64_t pd_addr)
6009 {
6010 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6011 
6012 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6013 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6014 				 WRITE_DATA_DST_SEL(0)));
6015 	if (vm_id < 8) {
6016 		radeon_ring_write(ring,
6017 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6018 	} else {
6019 		radeon_ring_write(ring,
6020 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6021 	}
6022 	radeon_ring_write(ring, 0);
6023 	radeon_ring_write(ring, pd_addr >> 12);
6024 
6025 	/* update SH_MEM_* regs */
6026 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6027 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6028 				 WRITE_DATA_DST_SEL(0)));
6029 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6030 	radeon_ring_write(ring, 0);
6031 	radeon_ring_write(ring, VMID(vm_id));
6032 
6033 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6034 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6035 				 WRITE_DATA_DST_SEL(0)));
6036 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6037 	radeon_ring_write(ring, 0);
6038 
6039 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6040 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6041 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6042 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6043 
6044 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6045 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6046 				 WRITE_DATA_DST_SEL(0)));
6047 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6048 	radeon_ring_write(ring, 0);
6049 	radeon_ring_write(ring, VMID(0));
6050 
6051 	/* HDP flush */
6052 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6053 
6054 	/* bits 0-15 are the VM contexts0-15 */
6055 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6056 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6057 				 WRITE_DATA_DST_SEL(0)));
6058 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6059 	radeon_ring_write(ring, 0);
6060 	radeon_ring_write(ring, 1 << vm_id);
6061 
6062 	/* wait for the invalidate to complete */
6063 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6064 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6065 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6066 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6067 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6068 	radeon_ring_write(ring, 0);
6069 	radeon_ring_write(ring, 0); /* ref */
6070 	radeon_ring_write(ring, 0); /* mask */
6071 	radeon_ring_write(ring, 0x20); /* poll interval */
6072 
6073 	/* compute doesn't have PFP */
6074 	if (usepfp) {
6075 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6076 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6077 		radeon_ring_write(ring, 0x0);
6078 	}
6079 }
6080 
6081 /*
6082  * RLC
6083  * The RLC is a multi-purpose microengine that handles a
6084  * variety of functions, the most important of which is
6085  * the interrupt controller.
6086  */
6087 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6088 					  bool enable)
6089 {
6090 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6091 
6092 	if (enable)
6093 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6094 	else
6095 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6096 	WREG32(CP_INT_CNTL_RING0, tmp);
6097 }
6098 
6099 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6100 {
6101 	u32 tmp;
6102 
6103 	tmp = RREG32(RLC_LB_CNTL);
6104 	if (enable)
6105 		tmp |= LOAD_BALANCE_ENABLE;
6106 	else
6107 		tmp &= ~LOAD_BALANCE_ENABLE;
6108 	WREG32(RLC_LB_CNTL, tmp);
6109 }
6110 
6111 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6112 {
6113 	u32 i, j, k;
6114 	u32 mask;
6115 
6116 	mutex_lock(&rdev->grbm_idx_mutex);
6117 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6118 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6119 			cik_select_se_sh(rdev, i, j);
6120 			for (k = 0; k < rdev->usec_timeout; k++) {
6121 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6122 					break;
6123 				udelay(1);
6124 			}
6125 		}
6126 	}
6127 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6128 	mutex_unlock(&rdev->grbm_idx_mutex);
6129 
6130 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6131 	for (k = 0; k < rdev->usec_timeout; k++) {
6132 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6133 			break;
6134 		udelay(1);
6135 	}
6136 }
6137 
6138 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6139 {
6140 	u32 tmp;
6141 
6142 	tmp = RREG32(RLC_CNTL);
6143 	if (tmp != rlc)
6144 		WREG32(RLC_CNTL, rlc);
6145 }
6146 
6147 static u32 cik_halt_rlc(struct radeon_device *rdev)
6148 {
6149 	u32 data, orig;
6150 
6151 	orig = data = RREG32(RLC_CNTL);
6152 
6153 	if (data & RLC_ENABLE) {
6154 		u32 i;
6155 
6156 		data &= ~RLC_ENABLE;
6157 		WREG32(RLC_CNTL, data);
6158 
6159 		for (i = 0; i < rdev->usec_timeout; i++) {
6160 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6161 				break;
6162 			udelay(1);
6163 		}
6164 
6165 		cik_wait_for_rlc_serdes(rdev);
6166 	}
6167 
6168 	return orig;
6169 }
6170 
6171 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6172 {
6173 	u32 tmp, i, mask;
6174 
6175 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6176 	WREG32(RLC_GPR_REG2, tmp);
6177 
6178 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6179 	for (i = 0; i < rdev->usec_timeout; i++) {
6180 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6181 			break;
6182 		udelay(1);
6183 	}
6184 
6185 	for (i = 0; i < rdev->usec_timeout; i++) {
6186 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6187 			break;
6188 		udelay(1);
6189 	}
6190 }
6191 
6192 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6193 {
6194 	u32 tmp;
6195 
6196 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6197 	WREG32(RLC_GPR_REG2, tmp);
6198 }
6199 
6200 /**
6201  * cik_rlc_stop - stop the RLC ME
6202  *
6203  * @rdev: radeon_device pointer
6204  *
6205  * Halt the RLC ME (MicroEngine) (CIK).
6206  */
6207 static void cik_rlc_stop(struct radeon_device *rdev)
6208 {
6209 	WREG32(RLC_CNTL, 0);
6210 
6211 	cik_enable_gui_idle_interrupt(rdev, false);
6212 
6213 	cik_wait_for_rlc_serdes(rdev);
6214 }
6215 
6216 /**
6217  * cik_rlc_start - start the RLC ME
6218  *
6219  * @rdev: radeon_device pointer
6220  *
6221  * Unhalt the RLC ME (MicroEngine) (CIK).
6222  */
6223 static void cik_rlc_start(struct radeon_device *rdev)
6224 {
6225 	WREG32(RLC_CNTL, RLC_ENABLE);
6226 
6227 	cik_enable_gui_idle_interrupt(rdev, true);
6228 
6229 	udelay(50);
6230 }
6231 
6232 /**
6233  * cik_rlc_resume - setup the RLC hw
6234  *
6235  * @rdev: radeon_device pointer
6236  *
6237  * Initialize the RLC registers, load the ucode,
6238  * and start the RLC (CIK).
6239  * Returns 0 for success, -EINVAL if the ucode is not available.
6240  */
6241 static int cik_rlc_resume(struct radeon_device *rdev)
6242 {
6243 	u32 i, size, tmp;
6244 
6245 	if (!rdev->rlc_fw)
6246 		return -EINVAL;
6247 
6248 	cik_rlc_stop(rdev);
6249 
6250 	/* disable CG */
6251 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6252 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6253 
6254 	si_rlc_reset(rdev);
6255 
6256 	cik_init_pg(rdev);
6257 
6258 	cik_init_cg(rdev);
6259 
6260 	WREG32(RLC_LB_CNTR_INIT, 0);
6261 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6262 
6263 	mutex_lock(&rdev->grbm_idx_mutex);
6264 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6265 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6266 	WREG32(RLC_LB_PARAMS, 0x00600408);
6267 	WREG32(RLC_LB_CNTL, 0x80000004);
6268 	mutex_unlock(&rdev->grbm_idx_mutex);
6269 
6270 	WREG32(RLC_MC_CNTL, 0);
6271 	WREG32(RLC_UCODE_CNTL, 0);
6272 
6273 	if (rdev->new_fw) {
6274 		const struct rlc_firmware_header_v1_0 *hdr =
6275 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6276 		const __le32 *fw_data = (const __le32 *)
6277 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6278 
6279 		radeon_ucode_print_rlc_hdr(&hdr->header);
6280 
6281 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6282 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6283 		for (i = 0; i < size; i++)
6284 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6285 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6286 	} else {
6287 		const __be32 *fw_data;
6288 
6289 		switch (rdev->family) {
6290 		case CHIP_BONAIRE:
6291 		case CHIP_HAWAII:
6292 		default:
6293 			size = BONAIRE_RLC_UCODE_SIZE;
6294 			break;
6295 		case CHIP_KAVERI:
6296 			size = KV_RLC_UCODE_SIZE;
6297 			break;
6298 		case CHIP_KABINI:
6299 			size = KB_RLC_UCODE_SIZE;
6300 			break;
6301 		case CHIP_MULLINS:
6302 			size = ML_RLC_UCODE_SIZE;
6303 			break;
6304 		}
6305 
6306 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6307 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6308 		for (i = 0; i < size; i++)
6309 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6310 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6311 	}
6312 
6313 	/* XXX - find out what chips support lbpw */
6314 	cik_enable_lbpw(rdev, false);
6315 
6316 	if (rdev->family == CHIP_BONAIRE)
6317 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6318 
6319 	cik_rlc_start(rdev);
6320 
6321 	return 0;
6322 }
6323 
6324 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6325 {
6326 	u32 data, orig, tmp, tmp2;
6327 
6328 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6329 
6330 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6331 		cik_enable_gui_idle_interrupt(rdev, true);
6332 
6333 		tmp = cik_halt_rlc(rdev);
6334 
6335 		mutex_lock(&rdev->grbm_idx_mutex);
6336 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6337 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6338 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6339 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6340 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6341 		mutex_unlock(&rdev->grbm_idx_mutex);
6342 
6343 		cik_update_rlc(rdev, tmp);
6344 
6345 		data |= CGCG_EN | CGLS_EN;
6346 	} else {
6347 		cik_enable_gui_idle_interrupt(rdev, false);
6348 
6349 		RREG32(CB_CGTT_SCLK_CTRL);
6350 		RREG32(CB_CGTT_SCLK_CTRL);
6351 		RREG32(CB_CGTT_SCLK_CTRL);
6352 		RREG32(CB_CGTT_SCLK_CTRL);
6353 
6354 		data &= ~(CGCG_EN | CGLS_EN);
6355 	}
6356 
6357 	if (orig != data)
6358 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6359 
6360 }
6361 
6362 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6363 {
6364 	u32 data, orig, tmp = 0;
6365 
6366 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6367 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6368 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6369 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6370 				data |= CP_MEM_LS_EN;
6371 				if (orig != data)
6372 					WREG32(CP_MEM_SLP_CNTL, data);
6373 			}
6374 		}
6375 
6376 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6377 		data |= 0x00000001;
6378 		data &= 0xfffffffd;
6379 		if (orig != data)
6380 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6381 
6382 		tmp = cik_halt_rlc(rdev);
6383 
6384 		mutex_lock(&rdev->grbm_idx_mutex);
6385 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6386 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6387 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6388 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6389 		WREG32(RLC_SERDES_WR_CTRL, data);
6390 		mutex_unlock(&rdev->grbm_idx_mutex);
6391 
6392 		cik_update_rlc(rdev, tmp);
6393 
6394 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6395 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6396 			data &= ~SM_MODE_MASK;
6397 			data |= SM_MODE(0x2);
6398 			data |= SM_MODE_ENABLE;
6399 			data &= ~CGTS_OVERRIDE;
6400 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6401 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6402 				data &= ~CGTS_LS_OVERRIDE;
6403 			data &= ~ON_MONITOR_ADD_MASK;
6404 			data |= ON_MONITOR_ADD_EN;
6405 			data |= ON_MONITOR_ADD(0x96);
6406 			if (orig != data)
6407 				WREG32(CGTS_SM_CTRL_REG, data);
6408 		}
6409 	} else {
6410 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6411 		data |= 0x00000003;
6412 		if (orig != data)
6413 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6414 
6415 		data = RREG32(RLC_MEM_SLP_CNTL);
6416 		if (data & RLC_MEM_LS_EN) {
6417 			data &= ~RLC_MEM_LS_EN;
6418 			WREG32(RLC_MEM_SLP_CNTL, data);
6419 		}
6420 
6421 		data = RREG32(CP_MEM_SLP_CNTL);
6422 		if (data & CP_MEM_LS_EN) {
6423 			data &= ~CP_MEM_LS_EN;
6424 			WREG32(CP_MEM_SLP_CNTL, data);
6425 		}
6426 
6427 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6428 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6429 		if (orig != data)
6430 			WREG32(CGTS_SM_CTRL_REG, data);
6431 
6432 		tmp = cik_halt_rlc(rdev);
6433 
6434 		mutex_lock(&rdev->grbm_idx_mutex);
6435 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6436 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6437 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6438 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6439 		WREG32(RLC_SERDES_WR_CTRL, data);
6440 		mutex_unlock(&rdev->grbm_idx_mutex);
6441 
6442 		cik_update_rlc(rdev, tmp);
6443 	}
6444 }
6445 
6446 static const u32 mc_cg_registers[] =
6447 {
6448 	MC_HUB_MISC_HUB_CG,
6449 	MC_HUB_MISC_SIP_CG,
6450 	MC_HUB_MISC_VM_CG,
6451 	MC_XPB_CLK_GAT,
6452 	ATC_MISC_CG,
6453 	MC_CITF_MISC_WR_CG,
6454 	MC_CITF_MISC_RD_CG,
6455 	MC_CITF_MISC_VM_CG,
6456 	VM_L2_CG,
6457 };
6458 
6459 static void cik_enable_mc_ls(struct radeon_device *rdev,
6460 			     bool enable)
6461 {
6462 	int i;
6463 	u32 orig, data;
6464 
6465 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6466 		orig = data = RREG32(mc_cg_registers[i]);
6467 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6468 			data |= MC_LS_ENABLE;
6469 		else
6470 			data &= ~MC_LS_ENABLE;
6471 		if (data != orig)
6472 			WREG32(mc_cg_registers[i], data);
6473 	}
6474 }
6475 
6476 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6477 			       bool enable)
6478 {
6479 	int i;
6480 	u32 orig, data;
6481 
6482 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6483 		orig = data = RREG32(mc_cg_registers[i]);
6484 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6485 			data |= MC_CG_ENABLE;
6486 		else
6487 			data &= ~MC_CG_ENABLE;
6488 		if (data != orig)
6489 			WREG32(mc_cg_registers[i], data);
6490 	}
6491 }
6492 
6493 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6494 				 bool enable)
6495 {
6496 	u32 orig, data;
6497 
6498 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6499 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6500 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6501 	} else {
6502 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6503 		data |= 0xff000000;
6504 		if (data != orig)
6505 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6506 
6507 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6508 		data |= 0xff000000;
6509 		if (data != orig)
6510 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6511 	}
6512 }
6513 
6514 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6515 				 bool enable)
6516 {
6517 	u32 orig, data;
6518 
6519 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6520 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6521 		data |= 0x100;
6522 		if (orig != data)
6523 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6524 
6525 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6526 		data |= 0x100;
6527 		if (orig != data)
6528 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6529 	} else {
6530 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6531 		data &= ~0x100;
6532 		if (orig != data)
6533 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6534 
6535 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6536 		data &= ~0x100;
6537 		if (orig != data)
6538 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6539 	}
6540 }
6541 
6542 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6543 				bool enable)
6544 {
6545 	u32 orig, data;
6546 
6547 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6548 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6549 		data = 0xfff;
6550 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6551 
6552 		orig = data = RREG32(UVD_CGC_CTRL);
6553 		data |= DCM;
6554 		if (orig != data)
6555 			WREG32(UVD_CGC_CTRL, data);
6556 	} else {
6557 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6558 		data &= ~0xfff;
6559 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6560 
6561 		orig = data = RREG32(UVD_CGC_CTRL);
6562 		data &= ~DCM;
6563 		if (orig != data)
6564 			WREG32(UVD_CGC_CTRL, data);
6565 	}
6566 }
6567 
6568 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6569 			       bool enable)
6570 {
6571 	u32 orig, data;
6572 
6573 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6574 
6575 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6576 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6577 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6578 	else
6579 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6580 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6581 
6582 	if (orig != data)
6583 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6584 }
6585 
6586 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6587 				bool enable)
6588 {
6589 	u32 orig, data;
6590 
6591 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6592 
6593 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6594 		data &= ~CLOCK_GATING_DIS;
6595 	else
6596 		data |= CLOCK_GATING_DIS;
6597 
6598 	if (orig != data)
6599 		WREG32(HDP_HOST_PATH_CNTL, data);
6600 }
6601 
6602 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6603 			      bool enable)
6604 {
6605 	u32 orig, data;
6606 
6607 	orig = data = RREG32(HDP_MEM_POWER_LS);
6608 
6609 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6610 		data |= HDP_LS_ENABLE;
6611 	else
6612 		data &= ~HDP_LS_ENABLE;
6613 
6614 	if (orig != data)
6615 		WREG32(HDP_MEM_POWER_LS, data);
6616 }
6617 
6618 void cik_update_cg(struct radeon_device *rdev,
6619 		   u32 block, bool enable)
6620 {
6621 
6622 	if (block & RADEON_CG_BLOCK_GFX) {
6623 		cik_enable_gui_idle_interrupt(rdev, false);
6624 		/* order matters! */
6625 		if (enable) {
6626 			cik_enable_mgcg(rdev, true);
6627 			cik_enable_cgcg(rdev, true);
6628 		} else {
6629 			cik_enable_cgcg(rdev, false);
6630 			cik_enable_mgcg(rdev, false);
6631 		}
6632 		cik_enable_gui_idle_interrupt(rdev, true);
6633 	}
6634 
6635 	if (block & RADEON_CG_BLOCK_MC) {
6636 		if (!(rdev->flags & RADEON_IS_IGP)) {
6637 			cik_enable_mc_mgcg(rdev, enable);
6638 			cik_enable_mc_ls(rdev, enable);
6639 		}
6640 	}
6641 
6642 	if (block & RADEON_CG_BLOCK_SDMA) {
6643 		cik_enable_sdma_mgcg(rdev, enable);
6644 		cik_enable_sdma_mgls(rdev, enable);
6645 	}
6646 
6647 	if (block & RADEON_CG_BLOCK_BIF) {
6648 		cik_enable_bif_mgls(rdev, enable);
6649 	}
6650 
6651 	if (block & RADEON_CG_BLOCK_UVD) {
6652 		if (rdev->has_uvd)
6653 			cik_enable_uvd_mgcg(rdev, enable);
6654 	}
6655 
6656 	if (block & RADEON_CG_BLOCK_HDP) {
6657 		cik_enable_hdp_mgcg(rdev, enable);
6658 		cik_enable_hdp_ls(rdev, enable);
6659 	}
6660 
6661 	if (block & RADEON_CG_BLOCK_VCE) {
6662 		vce_v2_0_enable_mgcg(rdev, enable);
6663 	}
6664 }
6665 
6666 static void cik_init_cg(struct radeon_device *rdev)
6667 {
6668 
6669 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6670 
6671 	if (rdev->has_uvd)
6672 		si_init_uvd_internal_cg(rdev);
6673 
6674 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6675 			     RADEON_CG_BLOCK_SDMA |
6676 			     RADEON_CG_BLOCK_BIF |
6677 			     RADEON_CG_BLOCK_UVD |
6678 			     RADEON_CG_BLOCK_HDP), true);
6679 }
6680 
6681 static void cik_fini_cg(struct radeon_device *rdev)
6682 {
6683 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6684 			     RADEON_CG_BLOCK_SDMA |
6685 			     RADEON_CG_BLOCK_BIF |
6686 			     RADEON_CG_BLOCK_UVD |
6687 			     RADEON_CG_BLOCK_HDP), false);
6688 
6689 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6690 }
6691 
6692 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6693 					  bool enable)
6694 {
6695 	u32 data, orig;
6696 
6697 	orig = data = RREG32(RLC_PG_CNTL);
6698 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6699 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6700 	else
6701 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6702 	if (orig != data)
6703 		WREG32(RLC_PG_CNTL, data);
6704 }
6705 
6706 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6707 					  bool enable)
6708 {
6709 	u32 data, orig;
6710 
6711 	orig = data = RREG32(RLC_PG_CNTL);
6712 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6713 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6714 	else
6715 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6716 	if (orig != data)
6717 		WREG32(RLC_PG_CNTL, data);
6718 }
6719 
6720 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6721 {
6722 	u32 data, orig;
6723 
6724 	orig = data = RREG32(RLC_PG_CNTL);
6725 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6726 		data &= ~DISABLE_CP_PG;
6727 	else
6728 		data |= DISABLE_CP_PG;
6729 	if (orig != data)
6730 		WREG32(RLC_PG_CNTL, data);
6731 }
6732 
6733 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6734 {
6735 	u32 data, orig;
6736 
6737 	orig = data = RREG32(RLC_PG_CNTL);
6738 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6739 		data &= ~DISABLE_GDS_PG;
6740 	else
6741 		data |= DISABLE_GDS_PG;
6742 	if (orig != data)
6743 		WREG32(RLC_PG_CNTL, data);
6744 }
6745 
6746 #define CP_ME_TABLE_SIZE    96
6747 #define CP_ME_TABLE_OFFSET  2048
6748 #define CP_MEC_TABLE_OFFSET 4096
6749 
6750 void cik_init_cp_pg_table(struct radeon_device *rdev)
6751 {
6752 	volatile u32 *dst_ptr;
6753 	int me, i, max_me = 4;
6754 	u32 bo_offset = 0;
6755 	u32 table_offset, table_size;
6756 
6757 	if (rdev->family == CHIP_KAVERI)
6758 		max_me = 5;
6759 
6760 	if (rdev->rlc.cp_table_ptr == NULL)
6761 		return;
6762 
6763 	/* write the cp table buffer */
6764 	dst_ptr = rdev->rlc.cp_table_ptr;
6765 	for (me = 0; me < max_me; me++) {
6766 		if (rdev->new_fw) {
6767 			const __le32 *fw_data;
6768 			const struct gfx_firmware_header_v1_0 *hdr;
6769 
6770 			if (me == 0) {
6771 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6772 				fw_data = (const __le32 *)
6773 					((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6774 				table_offset = le32_to_cpu(hdr->jt_offset);
6775 				table_size = le32_to_cpu(hdr->jt_size);
6776 			} else if (me == 1) {
6777 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6778 				fw_data = (const __le32 *)
6779 					((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6780 				table_offset = le32_to_cpu(hdr->jt_offset);
6781 				table_size = le32_to_cpu(hdr->jt_size);
6782 			} else if (me == 2) {
6783 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6784 				fw_data = (const __le32 *)
6785 					((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6786 				table_offset = le32_to_cpu(hdr->jt_offset);
6787 				table_size = le32_to_cpu(hdr->jt_size);
6788 			} else if (me == 3) {
6789 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6790 				fw_data = (const __le32 *)
6791 					((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6792 				table_offset = le32_to_cpu(hdr->jt_offset);
6793 				table_size = le32_to_cpu(hdr->jt_size);
6794 			} else {
6795 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6796 				fw_data = (const __le32 *)
6797 					((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6798 				table_offset = le32_to_cpu(hdr->jt_offset);
6799 				table_size = le32_to_cpu(hdr->jt_size);
6800 			}
6801 
6802 			for (i = 0; i < table_size; i ++) {
6803 				dst_ptr[bo_offset + i] =
6804 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6805 			}
6806 			bo_offset += table_size;
6807 		} else {
6808 			const __be32 *fw_data;
6809 			table_size = CP_ME_TABLE_SIZE;
6810 
6811 			if (me == 0) {
6812 				fw_data = (const __be32 *)rdev->ce_fw->data;
6813 				table_offset = CP_ME_TABLE_OFFSET;
6814 			} else if (me == 1) {
6815 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6816 				table_offset = CP_ME_TABLE_OFFSET;
6817 			} else if (me == 2) {
6818 				fw_data = (const __be32 *)rdev->me_fw->data;
6819 				table_offset = CP_ME_TABLE_OFFSET;
6820 			} else {
6821 				fw_data = (const __be32 *)rdev->mec_fw->data;
6822 				table_offset = CP_MEC_TABLE_OFFSET;
6823 			}
6824 
6825 			for (i = 0; i < table_size; i ++) {
6826 				dst_ptr[bo_offset + i] =
6827 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6828 			}
6829 			bo_offset += table_size;
6830 		}
6831 	}
6832 }
6833 
6834 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6835 				bool enable)
6836 {
6837 	u32 data, orig;
6838 
6839 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6840 		orig = data = RREG32(RLC_PG_CNTL);
6841 		data |= GFX_PG_ENABLE;
6842 		if (orig != data)
6843 			WREG32(RLC_PG_CNTL, data);
6844 
6845 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6846 		data |= AUTO_PG_EN;
6847 		if (orig != data)
6848 			WREG32(RLC_AUTO_PG_CTRL, data);
6849 	} else {
6850 		orig = data = RREG32(RLC_PG_CNTL);
6851 		data &= ~GFX_PG_ENABLE;
6852 		if (orig != data)
6853 			WREG32(RLC_PG_CNTL, data);
6854 
6855 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6856 		data &= ~AUTO_PG_EN;
6857 		if (orig != data)
6858 			WREG32(RLC_AUTO_PG_CTRL, data);
6859 
6860 		data = RREG32(DB_RENDER_CONTROL);
6861 	}
6862 }
6863 
6864 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6865 {
6866 	u32 mask = 0, tmp, tmp1;
6867 	int i;
6868 
6869 	mutex_lock(&rdev->grbm_idx_mutex);
6870 	cik_select_se_sh(rdev, se, sh);
6871 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6872 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6873 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6874 	mutex_unlock(&rdev->grbm_idx_mutex);
6875 
6876 	tmp &= 0xffff0000;
6877 
6878 	tmp |= tmp1;
6879 	tmp >>= 16;
6880 
6881 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6882 		mask <<= 1;
6883 		mask |= 1;
6884 	}
6885 
6886 	return (~tmp) & mask;
6887 }
6888 
6889 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6890 {
6891 	u32 i, j, k, active_cu_number = 0;
6892 	u32 mask, counter, cu_bitmap;
6893 	u32 tmp = 0;
6894 
6895 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6896 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6897 			mask = 1;
6898 			cu_bitmap = 0;
6899 			counter = 0;
6900 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6901 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6902 					if (counter < 2)
6903 						cu_bitmap |= mask;
6904 					counter ++;
6905 				}
6906 				mask <<= 1;
6907 			}
6908 
6909 			active_cu_number += counter;
6910 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6911 		}
6912 	}
6913 
6914 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6915 
6916 	tmp = RREG32(RLC_MAX_PG_CU);
6917 	tmp &= ~MAX_PU_CU_MASK;
6918 	tmp |= MAX_PU_CU(active_cu_number);
6919 	WREG32(RLC_MAX_PG_CU, tmp);
6920 }
6921 
6922 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6923 				       bool enable)
6924 {
6925 	u32 data, orig;
6926 
6927 	orig = data = RREG32(RLC_PG_CNTL);
6928 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6929 		data |= STATIC_PER_CU_PG_ENABLE;
6930 	else
6931 		data &= ~STATIC_PER_CU_PG_ENABLE;
6932 	if (orig != data)
6933 		WREG32(RLC_PG_CNTL, data);
6934 }
6935 
6936 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6937 					bool enable)
6938 {
6939 	u32 data, orig;
6940 
6941 	orig = data = RREG32(RLC_PG_CNTL);
6942 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6943 		data |= DYN_PER_CU_PG_ENABLE;
6944 	else
6945 		data &= ~DYN_PER_CU_PG_ENABLE;
6946 	if (orig != data)
6947 		WREG32(RLC_PG_CNTL, data);
6948 }
6949 
6950 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6951 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6952 
6953 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6954 {
6955 	u32 data, orig;
6956 	u32 i;
6957 
6958 	if (rdev->rlc.cs_data) {
6959 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6960 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6961 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6962 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6963 	} else {
6964 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6965 		for (i = 0; i < 3; i++)
6966 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6967 	}
6968 	if (rdev->rlc.reg_list) {
6969 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6970 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6971 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6972 	}
6973 
6974 	orig = data = RREG32(RLC_PG_CNTL);
6975 	data |= GFX_PG_SRC;
6976 	if (orig != data)
6977 		WREG32(RLC_PG_CNTL, data);
6978 
6979 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6980 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6981 
6982 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6983 	data &= ~IDLE_POLL_COUNT_MASK;
6984 	data |= IDLE_POLL_COUNT(0x60);
6985 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6986 
6987 	data = 0x10101010;
6988 	WREG32(RLC_PG_DELAY, data);
6989 
6990 	data = RREG32(RLC_PG_DELAY_2);
6991 	data &= ~0xff;
6992 	data |= 0x3;
6993 	WREG32(RLC_PG_DELAY_2, data);
6994 
6995 	data = RREG32(RLC_AUTO_PG_CTRL);
6996 	data &= ~GRBM_REG_SGIT_MASK;
6997 	data |= GRBM_REG_SGIT(0x700);
6998 	WREG32(RLC_AUTO_PG_CTRL, data);
6999 
7000 }
7001 
7002 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7003 {
7004 	cik_enable_gfx_cgpg(rdev, enable);
7005 	cik_enable_gfx_static_mgpg(rdev, enable);
7006 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7007 }
7008 
7009 u32 cik_get_csb_size(struct radeon_device *rdev)
7010 {
7011 	u32 count = 0;
7012 	const struct cs_section_def *sect = NULL;
7013 	const struct cs_extent_def *ext = NULL;
7014 
7015 	if (rdev->rlc.cs_data == NULL)
7016 		return 0;
7017 
7018 	/* begin clear state */
7019 	count += 2;
7020 	/* context control state */
7021 	count += 3;
7022 
7023 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7024 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7025 			if (sect->id == SECT_CONTEXT)
7026 				count += 2 + ext->reg_count;
7027 			else
7028 				return 0;
7029 		}
7030 	}
7031 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7032 	count += 4;
7033 	/* end clear state */
7034 	count += 2;
7035 	/* clear state */
7036 	count += 2;
7037 
7038 	return count;
7039 }
7040 
7041 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7042 {
7043 	u32 count = 0, i;
7044 	const struct cs_section_def *sect = NULL;
7045 	const struct cs_extent_def *ext = NULL;
7046 
7047 	if (rdev->rlc.cs_data == NULL)
7048 		return;
7049 	if (buffer == NULL)
7050 		return;
7051 
7052 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7053 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7054 
7055 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7056 	buffer[count++] = cpu_to_le32(0x80000000);
7057 	buffer[count++] = cpu_to_le32(0x80000000);
7058 
7059 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7060 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7061 			if (sect->id == SECT_CONTEXT) {
7062 				buffer[count++] =
7063 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7064 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7065 				for (i = 0; i < ext->reg_count; i++)
7066 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7067 			} else {
7068 				return;
7069 			}
7070 		}
7071 	}
7072 
7073 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7074 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7075 	switch (rdev->family) {
7076 	case CHIP_BONAIRE:
7077 		buffer[count++] = cpu_to_le32(0x16000012);
7078 		buffer[count++] = cpu_to_le32(0x00000000);
7079 		break;
7080 	case CHIP_KAVERI:
7081 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7082 		buffer[count++] = cpu_to_le32(0x00000000);
7083 		break;
7084 	case CHIP_KABINI:
7085 	case CHIP_MULLINS:
7086 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7087 		buffer[count++] = cpu_to_le32(0x00000000);
7088 		break;
7089 	case CHIP_HAWAII:
7090 		buffer[count++] = cpu_to_le32(0x3a00161a);
7091 		buffer[count++] = cpu_to_le32(0x0000002e);
7092 		break;
7093 	default:
7094 		buffer[count++] = cpu_to_le32(0x00000000);
7095 		buffer[count++] = cpu_to_le32(0x00000000);
7096 		break;
7097 	}
7098 
7099 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7100 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7101 
7102 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7103 	buffer[count++] = cpu_to_le32(0);
7104 }
7105 
7106 static void cik_init_pg(struct radeon_device *rdev)
7107 {
7108 	if (rdev->pg_flags) {
7109 		cik_enable_sck_slowdown_on_pu(rdev, true);
7110 		cik_enable_sck_slowdown_on_pd(rdev, true);
7111 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7112 			cik_init_gfx_cgpg(rdev);
7113 			cik_enable_cp_pg(rdev, true);
7114 			cik_enable_gds_pg(rdev, true);
7115 		}
7116 		cik_init_ao_cu_mask(rdev);
7117 		cik_update_gfx_pg(rdev, true);
7118 	}
7119 }
7120 
7121 static void cik_fini_pg(struct radeon_device *rdev)
7122 {
7123 	if (rdev->pg_flags) {
7124 		cik_update_gfx_pg(rdev, false);
7125 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7126 			cik_enable_cp_pg(rdev, false);
7127 			cik_enable_gds_pg(rdev, false);
7128 		}
7129 	}
7130 }
7131 
7132 /*
7133  * Interrupts
7134  * Starting with r6xx, interrupts are handled via a ring buffer.
7135  * Ring buffers are areas of GPU accessible memory that the GPU
7136  * writes interrupt vectors into and the host reads vectors out of.
7137  * There is a rptr (read pointer) that determines where the
7138  * host is currently reading, and a wptr (write pointer)
7139  * which determines where the GPU has written.  When the
7140  * pointers are equal, the ring is idle.  When the GPU
7141  * writes vectors to the ring buffer, it increments the
7142  * wptr.  When there is an interrupt, the host then starts
7143  * fetching commands and processing them until the pointers are
7144  * equal again at which point it updates the rptr.
7145  */
7146 
7147 /**
7148  * cik_enable_interrupts - Enable the interrupt ring buffer
7149  *
7150  * @rdev: radeon_device pointer
7151  *
7152  * Enable the interrupt ring buffer (CIK).
7153  */
7154 static void cik_enable_interrupts(struct radeon_device *rdev)
7155 {
7156 	u32 ih_cntl = RREG32(IH_CNTL);
7157 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7158 
7159 	ih_cntl |= ENABLE_INTR;
7160 	ih_rb_cntl |= IH_RB_ENABLE;
7161 	WREG32(IH_CNTL, ih_cntl);
7162 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7163 	rdev->ih.enabled = true;
7164 }
7165 
7166 /**
7167  * cik_disable_interrupts - Disable the interrupt ring buffer
7168  *
7169  * @rdev: radeon_device pointer
7170  *
7171  * Disable the interrupt ring buffer (CIK).
7172  */
7173 static void cik_disable_interrupts(struct radeon_device *rdev)
7174 {
7175 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7176 	u32 ih_cntl = RREG32(IH_CNTL);
7177 
7178 	ih_rb_cntl &= ~IH_RB_ENABLE;
7179 	ih_cntl &= ~ENABLE_INTR;
7180 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7181 	WREG32(IH_CNTL, ih_cntl);
7182 	/* set rptr, wptr to 0 */
7183 	WREG32(IH_RB_RPTR, 0);
7184 	WREG32(IH_RB_WPTR, 0);
7185 	rdev->ih.enabled = false;
7186 	rdev->ih.rptr = 0;
7187 }
7188 
7189 /**
7190  * cik_disable_interrupt_state - Disable all interrupt sources
7191  *
7192  * @rdev: radeon_device pointer
7193  *
7194  * Clear all interrupt enable bits used by the driver (CIK).
7195  */
7196 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7197 {
7198 	u32 tmp;
7199 
7200 	/* gfx ring */
7201 	tmp = RREG32(CP_INT_CNTL_RING0) &
7202 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7203 	WREG32(CP_INT_CNTL_RING0, tmp);
7204 	/* sdma */
7205 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7206 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7207 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7208 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7209 	/* compute queues */
7210 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7211 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7212 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7213 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7214 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7215 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7216 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7217 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7218 	/* grbm */
7219 	WREG32(GRBM_INT_CNTL, 0);
7220 	/* vline/vblank, etc. */
7221 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7222 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7223 	if (rdev->num_crtc >= 4) {
7224 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7225 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7226 	}
7227 	if (rdev->num_crtc >= 6) {
7228 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7229 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7230 	}
7231 	/* pflip */
7232 	if (rdev->num_crtc >= 2) {
7233 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7234 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7235 	}
7236 	if (rdev->num_crtc >= 4) {
7237 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7238 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7239 	}
7240 	if (rdev->num_crtc >= 6) {
7241 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7242 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7243 	}
7244 
7245 	/* dac hotplug */
7246 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7247 
7248 	/* digital hotplug */
7249 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7250 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7251 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7252 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7253 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7254 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7255 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7256 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7257 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7258 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7259 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7260 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7261 
7262 }
7263 
7264 /**
7265  * cik_irq_init - init and enable the interrupt ring
7266  *
7267  * @rdev: radeon_device pointer
7268  *
7269  * Allocate a ring buffer for the interrupt controller,
7270  * enable the RLC, disable interrupts, enable the IH
7271  * ring buffer and enable it (CIK).
7272  * Called at device load and reume.
7273  * Returns 0 for success, errors for failure.
7274  */
7275 static int cik_irq_init(struct radeon_device *rdev)
7276 {
7277 	int ret = 0;
7278 	int rb_bufsz;
7279 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7280 
7281 	/* allocate ring */
7282 	ret = r600_ih_ring_alloc(rdev);
7283 	if (ret)
7284 		return ret;
7285 
7286 	/* disable irqs */
7287 	cik_disable_interrupts(rdev);
7288 
7289 	/* init rlc */
7290 	ret = cik_rlc_resume(rdev);
7291 	if (ret) {
7292 		r600_ih_ring_fini(rdev);
7293 		return ret;
7294 	}
7295 
7296 	/* setup interrupt control */
7297 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7298 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7299 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7300 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7301 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7302 	 */
7303 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7304 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7305 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7306 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7307 
7308 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7309 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7310 
7311 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7312 		      IH_WPTR_OVERFLOW_CLEAR |
7313 		      (rb_bufsz << 1));
7314 
7315 	if (rdev->wb.enabled)
7316 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7317 
7318 	/* set the writeback address whether it's enabled or not */
7319 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7320 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7321 
7322 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7323 
7324 	/* set rptr, wptr to 0 */
7325 	WREG32(IH_RB_RPTR, 0);
7326 	WREG32(IH_RB_WPTR, 0);
7327 
7328 	/* Default settings for IH_CNTL (disabled at first) */
7329 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7330 	/* RPTR_REARM only works if msi's are enabled */
7331 	if (rdev->msi_enabled)
7332 		ih_cntl |= RPTR_REARM;
7333 	WREG32(IH_CNTL, ih_cntl);
7334 
7335 	/* force the active interrupt state to all disabled */
7336 	cik_disable_interrupt_state(rdev);
7337 
7338 	pci_enable_busmaster(rdev->pdev->dev.bsddev);
7339 
7340 	/* enable irqs */
7341 	cik_enable_interrupts(rdev);
7342 
7343 	return ret;
7344 }
7345 
7346 /**
7347  * cik_irq_set - enable/disable interrupt sources
7348  *
7349  * @rdev: radeon_device pointer
7350  *
7351  * Enable interrupt sources on the GPU (vblanks, hpd,
7352  * etc.) (CIK).
7353  * Returns 0 for success, errors for failure.
7354  */
7355 int cik_irq_set(struct radeon_device *rdev)
7356 {
7357 	u32 cp_int_cntl;
7358 	u32 cp_m1p0;
7359 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7360 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7361 	u32 grbm_int_cntl = 0;
7362 	u32 dma_cntl, dma_cntl1;
7363 
7364 	if (!rdev->irq.installed) {
7365 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7366 		return -EINVAL;
7367 	}
7368 	/* don't enable anything if the ih is disabled */
7369 	if (!rdev->ih.enabled) {
7370 		cik_disable_interrupts(rdev);
7371 		/* force the active interrupt state to all disabled */
7372 		cik_disable_interrupt_state(rdev);
7373 		return 0;
7374 	}
7375 
7376 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7377 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7378 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7379 
7380 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7381 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7382 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7383 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7384 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7385 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7386 
7387 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7388 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7389 
7390 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7391 
7392 	/* enable CP interrupts on all rings */
7393 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7394 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7395 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7396 	}
7397 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7398 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7399 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7400 		if (ring->me == 1) {
7401 			switch (ring->pipe) {
7402 			case 0:
7403 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7404 				break;
7405 			default:
7406 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7407 				break;
7408 			}
7409 		} else {
7410 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7411 		}
7412 	}
7413 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7414 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7415 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7416 		if (ring->me == 1) {
7417 			switch (ring->pipe) {
7418 			case 0:
7419 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7420 				break;
7421 			default:
7422 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7423 				break;
7424 			}
7425 		} else {
7426 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7427 		}
7428 	}
7429 
7430 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7431 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7432 		dma_cntl |= TRAP_ENABLE;
7433 	}
7434 
7435 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7436 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7437 		dma_cntl1 |= TRAP_ENABLE;
7438 	}
7439 
7440 	if (rdev->irq.crtc_vblank_int[0] ||
7441 	    atomic_read(&rdev->irq.pflip[0])) {
7442 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7443 		crtc1 |= VBLANK_INTERRUPT_MASK;
7444 	}
7445 	if (rdev->irq.crtc_vblank_int[1] ||
7446 	    atomic_read(&rdev->irq.pflip[1])) {
7447 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7448 		crtc2 |= VBLANK_INTERRUPT_MASK;
7449 	}
7450 	if (rdev->irq.crtc_vblank_int[2] ||
7451 	    atomic_read(&rdev->irq.pflip[2])) {
7452 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7453 		crtc3 |= VBLANK_INTERRUPT_MASK;
7454 	}
7455 	if (rdev->irq.crtc_vblank_int[3] ||
7456 	    atomic_read(&rdev->irq.pflip[3])) {
7457 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7458 		crtc4 |= VBLANK_INTERRUPT_MASK;
7459 	}
7460 	if (rdev->irq.crtc_vblank_int[4] ||
7461 	    atomic_read(&rdev->irq.pflip[4])) {
7462 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7463 		crtc5 |= VBLANK_INTERRUPT_MASK;
7464 	}
7465 	if (rdev->irq.crtc_vblank_int[5] ||
7466 	    atomic_read(&rdev->irq.pflip[5])) {
7467 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7468 		crtc6 |= VBLANK_INTERRUPT_MASK;
7469 	}
7470 	if (rdev->irq.hpd[0]) {
7471 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7472 		hpd1 |= DC_HPDx_INT_EN;
7473 	}
7474 	if (rdev->irq.hpd[1]) {
7475 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7476 		hpd2 |= DC_HPDx_INT_EN;
7477 	}
7478 	if (rdev->irq.hpd[2]) {
7479 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7480 		hpd3 |= DC_HPDx_INT_EN;
7481 	}
7482 	if (rdev->irq.hpd[3]) {
7483 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7484 		hpd4 |= DC_HPDx_INT_EN;
7485 	}
7486 	if (rdev->irq.hpd[4]) {
7487 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7488 		hpd5 |= DC_HPDx_INT_EN;
7489 	}
7490 	if (rdev->irq.hpd[5]) {
7491 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7492 		hpd6 |= DC_HPDx_INT_EN;
7493 	}
7494 
7495 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7496 
7497 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7498 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7499 
7500 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7501 
7502 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7503 
7504 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7505 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7506 	if (rdev->num_crtc >= 4) {
7507 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7508 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7509 	}
7510 	if (rdev->num_crtc >= 6) {
7511 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7512 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7513 	}
7514 
7515 	if (rdev->num_crtc >= 2) {
7516 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7517 		       GRPH_PFLIP_INT_MASK);
7518 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7519 		       GRPH_PFLIP_INT_MASK);
7520 	}
7521 	if (rdev->num_crtc >= 4) {
7522 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7523 		       GRPH_PFLIP_INT_MASK);
7524 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7525 		       GRPH_PFLIP_INT_MASK);
7526 	}
7527 	if (rdev->num_crtc >= 6) {
7528 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7529 		       GRPH_PFLIP_INT_MASK);
7530 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7531 		       GRPH_PFLIP_INT_MASK);
7532 	}
7533 
7534 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7535 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7536 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7537 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7538 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7539 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7540 
7541 	/* posting read */
7542 	RREG32(SRBM_STATUS);
7543 
7544 	return 0;
7545 }
7546 
7547 /**
7548  * cik_irq_ack - ack interrupt sources
7549  *
7550  * @rdev: radeon_device pointer
7551  *
7552  * Ack interrupt sources on the GPU (vblanks, hpd,
7553  * etc.) (CIK).  Certain interrupts sources are sw
7554  * generated and do not require an explicit ack.
7555  */
7556 static inline void cik_irq_ack(struct radeon_device *rdev)
7557 {
7558 	u32 tmp;
7559 
7560 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7561 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7562 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7563 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7564 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7565 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7566 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7567 
7568 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7569 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7570 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7571 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7572 	if (rdev->num_crtc >= 4) {
7573 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7574 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7575 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7576 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7577 	}
7578 	if (rdev->num_crtc >= 6) {
7579 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7580 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7581 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7582 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7583 	}
7584 
7585 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7586 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7587 		       GRPH_PFLIP_INT_CLEAR);
7588 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7589 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7590 		       GRPH_PFLIP_INT_CLEAR);
7591 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7592 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7593 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7594 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7595 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7596 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7597 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7598 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7599 
7600 	if (rdev->num_crtc >= 4) {
7601 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7602 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7603 			       GRPH_PFLIP_INT_CLEAR);
7604 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7605 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7606 			       GRPH_PFLIP_INT_CLEAR);
7607 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7608 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7609 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7610 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7611 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7612 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7613 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7614 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7615 	}
7616 
7617 	if (rdev->num_crtc >= 6) {
7618 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7619 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7620 			       GRPH_PFLIP_INT_CLEAR);
7621 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7622 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7623 			       GRPH_PFLIP_INT_CLEAR);
7624 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7625 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7626 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7627 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7628 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7629 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7630 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7631 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7632 	}
7633 
7634 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7635 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7636 		tmp |= DC_HPDx_INT_ACK;
7637 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7638 	}
7639 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7640 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7641 		tmp |= DC_HPDx_INT_ACK;
7642 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7643 	}
7644 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7645 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7646 		tmp |= DC_HPDx_INT_ACK;
7647 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7648 	}
7649 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7650 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7651 		tmp |= DC_HPDx_INT_ACK;
7652 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7653 	}
7654 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7655 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7656 		tmp |= DC_HPDx_INT_ACK;
7657 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7658 	}
7659 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7660 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7661 		tmp |= DC_HPDx_INT_ACK;
7662 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7663 	}
7664 }
7665 
7666 /**
7667  * cik_irq_disable - disable interrupts
7668  *
7669  * @rdev: radeon_device pointer
7670  *
7671  * Disable interrupts on the hw (CIK).
7672  */
7673 static void cik_irq_disable(struct radeon_device *rdev)
7674 {
7675 	cik_disable_interrupts(rdev);
7676 	/* Wait and acknowledge irq */
7677 	mdelay(1);
7678 	cik_irq_ack(rdev);
7679 	cik_disable_interrupt_state(rdev);
7680 }
7681 
7682 /**
7683  * cik_irq_disable - disable interrupts for suspend
7684  *
7685  * @rdev: radeon_device pointer
7686  *
7687  * Disable interrupts and stop the RLC (CIK).
7688  * Used for suspend.
7689  */
7690 static void cik_irq_suspend(struct radeon_device *rdev)
7691 {
7692 	cik_irq_disable(rdev);
7693 	cik_rlc_stop(rdev);
7694 }
7695 
7696 /**
7697  * cik_irq_fini - tear down interrupt support
7698  *
7699  * @rdev: radeon_device pointer
7700  *
7701  * Disable interrupts on the hw and free the IH ring
7702  * buffer (CIK).
7703  * Used for driver unload.
7704  */
7705 static void cik_irq_fini(struct radeon_device *rdev)
7706 {
7707 	cik_irq_suspend(rdev);
7708 	r600_ih_ring_fini(rdev);
7709 }
7710 
7711 /**
7712  * cik_get_ih_wptr - get the IH ring buffer wptr
7713  *
7714  * @rdev: radeon_device pointer
7715  *
7716  * Get the IH ring buffer wptr from either the register
7717  * or the writeback memory buffer (CIK).  Also check for
7718  * ring buffer overflow and deal with it.
7719  * Used by cik_irq_process().
7720  * Returns the value of the wptr.
7721  */
7722 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7723 {
7724 	u32 wptr, tmp;
7725 
7726 	if (rdev->wb.enabled)
7727 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7728 	else
7729 		wptr = RREG32(IH_RB_WPTR);
7730 
7731 	if (wptr & RB_OVERFLOW) {
7732 		wptr &= ~RB_OVERFLOW;
7733 		/* When a ring buffer overflow happen start parsing interrupt
7734 		 * from the last not overwritten vector (wptr + 16). Hopefully
7735 		 * this should allow us to catchup.
7736 		 */
7737 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7738 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7739 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7740 		tmp = RREG32(IH_RB_CNTL);
7741 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7742 		WREG32(IH_RB_CNTL, tmp);
7743 	}
7744 	return (wptr & rdev->ih.ptr_mask);
7745 }
7746 
7747 /*        CIK IV Ring
7748  * Each IV ring entry is 128 bits:
7749  * [7:0]    - interrupt source id
7750  * [31:8]   - reserved
7751  * [59:32]  - interrupt source data
7752  * [63:60]  - reserved
7753  * [71:64]  - RINGID
7754  *            CP:
7755  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7756  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7757  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7758  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7759  *            PIPE_ID - ME0 0=3D
7760  *                    - ME1&2 compute dispatcher (4 pipes each)
7761  *            SDMA:
7762  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7763  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7764  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7765  * [79:72]  - VMID
7766  * [95:80]  - PASID
7767  * [127:96] - reserved
7768  */
7769 /**
7770  * cik_irq_process - interrupt handler
7771  *
7772  * @rdev: radeon_device pointer
7773  *
7774  * Interrupt hander (CIK).  Walk the IH ring,
7775  * ack interrupts and schedule work to handle
7776  * interrupt events.
7777  * Returns irq process return code.
7778  */
7779 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7780 {
7781 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7782 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7783 	u32 wptr;
7784 	u32 rptr;
7785 	u32 src_id, src_data, ring_id;
7786 	u8 me_id, pipe_id, queue_id;
7787 	u32 ring_index;
7788 	bool queue_hotplug = false;
7789 	bool queue_reset = false;
7790 	u32 addr, status, mc_client;
7791 	bool queue_thermal = false;
7792 
7793 	if (!rdev->ih.enabled || rdev->shutdown)
7794 		return IRQ_NONE;
7795 
7796 	wptr = cik_get_ih_wptr(rdev);
7797 
7798 restart_ih:
7799 	/* is somebody else already processing irqs? */
7800 	if (atomic_xchg(&rdev->ih.lock, 1))
7801 		return IRQ_NONE;
7802 
7803 	rptr = rdev->ih.rptr;
7804 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7805 
7806 	/* Order reading of wptr vs. reading of IH ring data */
7807 	rmb();
7808 
7809 	/* display interrupts */
7810 	cik_irq_ack(rdev);
7811 
7812 	while (rptr != wptr) {
7813 		/* wptr/rptr are in bytes! */
7814 		ring_index = rptr / 4;
7815 
7816 #pragma GCC diagnostic push
7817 #pragma GCC diagnostic ignored "-Wcast-qual"
7818 		radeon_kfd_interrupt(rdev,
7819 				(const void *) &rdev->ih.ring[ring_index]);
7820 #pragma GCC diagnostic pop
7821 
7822 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7823 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7824 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7825 
7826 		switch (src_id) {
7827 		case 1: /* D1 vblank/vline */
7828 			switch (src_data) {
7829 			case 0: /* D1 vblank */
7830 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7831 					if (rdev->irq.crtc_vblank_int[0]) {
7832 						drm_handle_vblank(rdev->ddev, 0);
7833 						rdev->pm.vblank_sync = true;
7834 						wake_up(&rdev->irq.vblank_queue);
7835 					}
7836 					if (atomic_read(&rdev->irq.pflip[0]))
7837 						radeon_crtc_handle_vblank(rdev, 0);
7838 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7839 					DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7840 				}
7841 				break;
7842 			case 1: /* D1 vline */
7843 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7844 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7845 					DRM_DEBUG_VBLANK("IH: D1 vline\n");
7846 				}
7847 				break;
7848 			default:
7849 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7850 				break;
7851 			}
7852 			break;
7853 		case 2: /* D2 vblank/vline */
7854 			switch (src_data) {
7855 			case 0: /* D2 vblank */
7856 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7857 					if (rdev->irq.crtc_vblank_int[1]) {
7858 						drm_handle_vblank(rdev->ddev, 1);
7859 						rdev->pm.vblank_sync = true;
7860 						wake_up(&rdev->irq.vblank_queue);
7861 					}
7862 					if (atomic_read(&rdev->irq.pflip[1]))
7863 						radeon_crtc_handle_vblank(rdev, 1);
7864 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7865 					DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7866 				}
7867 				break;
7868 			case 1: /* D2 vline */
7869 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7870 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7871 					DRM_DEBUG_VBLANK("IH: D2 vline\n");
7872 				}
7873 				break;
7874 			default:
7875 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7876 				break;
7877 			}
7878 			break;
7879 		case 3: /* D3 vblank/vline */
7880 			switch (src_data) {
7881 			case 0: /* D3 vblank */
7882 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7883 					if (rdev->irq.crtc_vblank_int[2]) {
7884 						drm_handle_vblank(rdev->ddev, 2);
7885 						rdev->pm.vblank_sync = true;
7886 						wake_up(&rdev->irq.vblank_queue);
7887 					}
7888 					if (atomic_read(&rdev->irq.pflip[2]))
7889 						radeon_crtc_handle_vblank(rdev, 2);
7890 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7891 					DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7892 				}
7893 				break;
7894 			case 1: /* D3 vline */
7895 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7896 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7897 					DRM_DEBUG_VBLANK("IH: D3 vline\n");
7898 				}
7899 				break;
7900 			default:
7901 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7902 				break;
7903 			}
7904 			break;
7905 		case 4: /* D4 vblank/vline */
7906 			switch (src_data) {
7907 			case 0: /* D4 vblank */
7908 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7909 					if (rdev->irq.crtc_vblank_int[3]) {
7910 						drm_handle_vblank(rdev->ddev, 3);
7911 						rdev->pm.vblank_sync = true;
7912 						wake_up(&rdev->irq.vblank_queue);
7913 					}
7914 					if (atomic_read(&rdev->irq.pflip[3]))
7915 						radeon_crtc_handle_vblank(rdev, 3);
7916 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7917 					DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7918 				}
7919 				break;
7920 			case 1: /* D4 vline */
7921 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7922 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7923 					DRM_DEBUG_VBLANK("IH: D4 vline\n");
7924 				}
7925 				break;
7926 			default:
7927 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928 				break;
7929 			}
7930 			break;
7931 		case 5: /* D5 vblank/vline */
7932 			switch (src_data) {
7933 			case 0: /* D5 vblank */
7934 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7935 					if (rdev->irq.crtc_vblank_int[4]) {
7936 						drm_handle_vblank(rdev->ddev, 4);
7937 						rdev->pm.vblank_sync = true;
7938 						wake_up(&rdev->irq.vblank_queue);
7939 					}
7940 					if (atomic_read(&rdev->irq.pflip[4]))
7941 						radeon_crtc_handle_vblank(rdev, 4);
7942 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7943 					DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7944 				}
7945 				break;
7946 			case 1: /* D5 vline */
7947 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7948 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7949 					DRM_DEBUG_VBLANK("IH: D5 vline\n");
7950 				}
7951 				break;
7952 			default:
7953 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7954 				break;
7955 			}
7956 			break;
7957 		case 6: /* D6 vblank/vline */
7958 			switch (src_data) {
7959 			case 0: /* D6 vblank */
7960 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7961 					if (rdev->irq.crtc_vblank_int[5]) {
7962 						drm_handle_vblank(rdev->ddev, 5);
7963 						rdev->pm.vblank_sync = true;
7964 						wake_up(&rdev->irq.vblank_queue);
7965 					}
7966 					if (atomic_read(&rdev->irq.pflip[5]))
7967 						radeon_crtc_handle_vblank(rdev, 5);
7968 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7969 					DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7970 				}
7971 				break;
7972 			case 1: /* D6 vline */
7973 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7974 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7975 					DRM_DEBUG_VBLANK("IH: D6 vline\n");
7976 				}
7977 				break;
7978 			default:
7979 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7980 				break;
7981 			}
7982 			break;
7983 		case 8: /* D1 page flip */
7984 		case 10: /* D2 page flip */
7985 		case 12: /* D3 page flip */
7986 		case 14: /* D4 page flip */
7987 		case 16: /* D5 page flip */
7988 		case 18: /* D6 page flip */
7989 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7990 			if (radeon_use_pflipirq > 0)
7991 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7992 			break;
7993 		case 42: /* HPD hotplug */
7994 			switch (src_data) {
7995 			case 0:
7996 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7997 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7998 					queue_hotplug = true;
7999 					DRM_DEBUG("IH: HPD1\n");
8000 				}
8001 				break;
8002 			case 1:
8003 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8004 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8005 					queue_hotplug = true;
8006 					DRM_DEBUG("IH: HPD2\n");
8007 				}
8008 				break;
8009 			case 2:
8010 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8011 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8012 					queue_hotplug = true;
8013 					DRM_DEBUG("IH: HPD3\n");
8014 				}
8015 				break;
8016 			case 3:
8017 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8018 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8019 					queue_hotplug = true;
8020 					DRM_DEBUG("IH: HPD4\n");
8021 				}
8022 				break;
8023 			case 4:
8024 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8025 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8026 					queue_hotplug = true;
8027 					DRM_DEBUG("IH: HPD5\n");
8028 				}
8029 				break;
8030 			case 5:
8031 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8032 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8033 					queue_hotplug = true;
8034 					DRM_DEBUG("IH: HPD6\n");
8035 				}
8036 				break;
8037 			default:
8038 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8039 				break;
8040 			}
8041 			break;
8042 		case 124: /* UVD */
8043 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8044 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8045 			break;
8046 		case 146:
8047 		case 147:
8048 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8049 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8050 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8051 			/* reset addr and status */
8052 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8053 			if (addr == 0x0 && status == 0x0)
8054 				break;
8055 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8056 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8057 				addr);
8058 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8059 				status);
8060 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8061 			break;
8062 		case 167: /* VCE */
8063 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8064 			switch (src_data) {
8065 			case 0:
8066 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8067 				break;
8068 			case 1:
8069 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8070 				break;
8071 			default:
8072 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8073 				break;
8074 			}
8075 			break;
8076 		case 176: /* GFX RB CP_INT */
8077 		case 177: /* GFX IB CP_INT */
8078 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8079 			break;
8080 		case 181: /* CP EOP event */
8081 			DRM_DEBUG("IH: CP EOP\n");
8082 			/* XXX check the bitfield order! */
8083 			me_id = (ring_id & 0x60) >> 5;
8084 			pipe_id = (ring_id & 0x18) >> 3;
8085 			queue_id = (ring_id & 0x7) >> 0;
8086 			switch (me_id) {
8087 			case 0:
8088 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8089 				break;
8090 			case 1:
8091 			case 2:
8092 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8093 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8094 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8095 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8096 				break;
8097 			}
8098 			break;
8099 		case 184: /* CP Privileged reg access */
8100 			DRM_ERROR("Illegal register access in command stream\n");
8101 			/* XXX check the bitfield order! */
8102 			me_id = (ring_id & 0x60) >> 5;
8103 			pipe_id = (ring_id & 0x18) >> 3;
8104 			queue_id = (ring_id & 0x7) >> 0;
8105 			switch (me_id) {
8106 			case 0:
8107 				/* This results in a full GPU reset, but all we need to do is soft
8108 				 * reset the CP for gfx
8109 				 */
8110 				queue_reset = true;
8111 				break;
8112 			case 1:
8113 				/* XXX compute */
8114 				queue_reset = true;
8115 				break;
8116 			case 2:
8117 				/* XXX compute */
8118 				queue_reset = true;
8119 				break;
8120 			}
8121 			break;
8122 		case 185: /* CP Privileged inst */
8123 			DRM_ERROR("Illegal instruction in command stream\n");
8124 			/* XXX check the bitfield order! */
8125 			me_id = (ring_id & 0x60) >> 5;
8126 			pipe_id = (ring_id & 0x18) >> 3;
8127 			queue_id = (ring_id & 0x7) >> 0;
8128 			switch (me_id) {
8129 			case 0:
8130 				/* This results in a full GPU reset, but all we need to do is soft
8131 				 * reset the CP for gfx
8132 				 */
8133 				queue_reset = true;
8134 				break;
8135 			case 1:
8136 				/* XXX compute */
8137 				queue_reset = true;
8138 				break;
8139 			case 2:
8140 				/* XXX compute */
8141 				queue_reset = true;
8142 				break;
8143 			}
8144 			break;
8145 		case 224: /* SDMA trap event */
8146 			/* XXX check the bitfield order! */
8147 			me_id = (ring_id & 0x3) >> 0;
8148 			queue_id = (ring_id & 0xc) >> 2;
8149 			DRM_DEBUG("IH: SDMA trap\n");
8150 			switch (me_id) {
8151 			case 0:
8152 				switch (queue_id) {
8153 				case 0:
8154 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8155 					break;
8156 				case 1:
8157 					/* XXX compute */
8158 					break;
8159 				case 2:
8160 					/* XXX compute */
8161 					break;
8162 				}
8163 				break;
8164 			case 1:
8165 				switch (queue_id) {
8166 				case 0:
8167 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8168 					break;
8169 				case 1:
8170 					/* XXX compute */
8171 					break;
8172 				case 2:
8173 					/* XXX compute */
8174 					break;
8175 				}
8176 				break;
8177 			}
8178 			break;
8179 		case 230: /* thermal low to high */
8180 			DRM_DEBUG("IH: thermal low to high\n");
8181 			rdev->pm.dpm.thermal.high_to_low = false;
8182 			queue_thermal = true;
8183 			break;
8184 		case 231: /* thermal high to low */
8185 			DRM_DEBUG("IH: thermal high to low\n");
8186 			rdev->pm.dpm.thermal.high_to_low = true;
8187 			queue_thermal = true;
8188 			break;
8189 		case 233: /* GUI IDLE */
8190 			DRM_DEBUG("IH: GUI idle\n");
8191 			break;
8192 		case 241: /* SDMA Privileged inst */
8193 		case 247: /* SDMA Privileged inst */
8194 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8195 			/* XXX check the bitfield order! */
8196 			me_id = (ring_id & 0x3) >> 0;
8197 			queue_id = (ring_id & 0xc) >> 2;
8198 			switch (me_id) {
8199 			case 0:
8200 				switch (queue_id) {
8201 				case 0:
8202 					queue_reset = true;
8203 					break;
8204 				case 1:
8205 					/* XXX compute */
8206 					queue_reset = true;
8207 					break;
8208 				case 2:
8209 					/* XXX compute */
8210 					queue_reset = true;
8211 					break;
8212 				}
8213 				break;
8214 			case 1:
8215 				switch (queue_id) {
8216 				case 0:
8217 					queue_reset = true;
8218 					break;
8219 				case 1:
8220 					/* XXX compute */
8221 					queue_reset = true;
8222 					break;
8223 				case 2:
8224 					/* XXX compute */
8225 					queue_reset = true;
8226 					break;
8227 				}
8228 				break;
8229 			}
8230 			break;
8231 		default:
8232 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8233 			break;
8234 		}
8235 
8236 		/* wptr/rptr are in bytes! */
8237 		rptr += 16;
8238 		rptr &= rdev->ih.ptr_mask;
8239 		WREG32(IH_RB_RPTR, rptr);
8240 	}
8241 	if (queue_hotplug)
8242 		schedule_work(&rdev->hotplug_work);
8243 	if (queue_reset) {
8244 		rdev->needs_reset = true;
8245 		wake_up_all(&rdev->fence_queue);
8246 	}
8247 	if (queue_thermal)
8248 		schedule_work(&rdev->pm.dpm.thermal.work);
8249 	rdev->ih.rptr = rptr;
8250 	atomic_set(&rdev->ih.lock, 0);
8251 
8252 	/* make sure wptr hasn't changed while processing */
8253 	wptr = cik_get_ih_wptr(rdev);
8254 	if (wptr != rptr)
8255 		goto restart_ih;
8256 
8257 	return IRQ_HANDLED;
8258 }
8259 
8260 /*
8261  * startup/shutdown callbacks
8262  */
8263 /**
8264  * cik_startup - program the asic to a functional state
8265  *
8266  * @rdev: radeon_device pointer
8267  *
8268  * Programs the asic to a functional state (CIK).
8269  * Called by cik_init() and cik_resume().
8270  * Returns 0 for success, error for failure.
8271  */
8272 static int cik_startup(struct radeon_device *rdev)
8273 {
8274 	struct radeon_ring *ring;
8275 	u32 nop;
8276 	int r;
8277 
8278 	/* enable pcie gen2/3 link */
8279 	cik_pcie_gen3_enable(rdev);
8280 	/* enable aspm */
8281 	cik_program_aspm(rdev);
8282 
8283 	/* scratch needs to be initialized before MC */
8284 	r = r600_vram_scratch_init(rdev);
8285 	if (r)
8286 		return r;
8287 
8288 	cik_mc_program(rdev);
8289 
8290 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8291 		r = ci_mc_load_microcode(rdev);
8292 		if (r) {
8293 			DRM_ERROR("Failed to load MC firmware!\n");
8294 			return r;
8295 		}
8296 	}
8297 
8298 	r = cik_pcie_gart_enable(rdev);
8299 	if (r)
8300 		return r;
8301 	cik_gpu_init(rdev);
8302 
8303 	/* allocate rlc buffers */
8304 	if (rdev->flags & RADEON_IS_IGP) {
8305 		if (rdev->family == CHIP_KAVERI) {
8306 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8307 			rdev->rlc.reg_list_size =
8308 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8309 		} else {
8310 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8311 			rdev->rlc.reg_list_size =
8312 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8313 		}
8314 	}
8315 	rdev->rlc.cs_data = ci_cs_data;
8316 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8317 	r = sumo_rlc_init(rdev);
8318 	if (r) {
8319 		DRM_ERROR("Failed to init rlc BOs!\n");
8320 		return r;
8321 	}
8322 
8323 	/* allocate wb buffer */
8324 	r = radeon_wb_init(rdev);
8325 	if (r)
8326 		return r;
8327 
8328 	/* allocate mec buffers */
8329 	r = cik_mec_init(rdev);
8330 	if (r) {
8331 		DRM_ERROR("Failed to init MEC BOs!\n");
8332 		return r;
8333 	}
8334 
8335 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8336 	if (r) {
8337 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338 		return r;
8339 	}
8340 
8341 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8342 	if (r) {
8343 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8344 		return r;
8345 	}
8346 
8347 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8348 	if (r) {
8349 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8350 		return r;
8351 	}
8352 
8353 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8354 	if (r) {
8355 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8356 		return r;
8357 	}
8358 
8359 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8360 	if (r) {
8361 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8362 		return r;
8363 	}
8364 
8365 	r = radeon_uvd_resume(rdev);
8366 	if (!r) {
8367 		r = uvd_v4_2_resume(rdev);
8368 		if (!r) {
8369 			r = radeon_fence_driver_start_ring(rdev,
8370 							   R600_RING_TYPE_UVD_INDEX);
8371 			if (r)
8372 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8373 		}
8374 	}
8375 	if (r)
8376 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8377 
8378 	r = radeon_vce_resume(rdev);
8379 	if (!r) {
8380 		r = vce_v2_0_resume(rdev);
8381 		if (!r)
8382 			r = radeon_fence_driver_start_ring(rdev,
8383 							   TN_RING_TYPE_VCE1_INDEX);
8384 		if (!r)
8385 			r = radeon_fence_driver_start_ring(rdev,
8386 							   TN_RING_TYPE_VCE2_INDEX);
8387 	}
8388 	if (r) {
8389 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8390 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8391 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8392 	}
8393 
8394 	/* Enable IRQ */
8395 	if (!rdev->irq.installed) {
8396 		r = radeon_irq_kms_init(rdev);
8397 		if (r)
8398 			return r;
8399 	}
8400 
8401 	r = cik_irq_init(rdev);
8402 	if (r) {
8403 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8404 		radeon_irq_kms_fini(rdev);
8405 		return r;
8406 	}
8407 	cik_irq_set(rdev);
8408 
8409 	if (rdev->family == CHIP_HAWAII) {
8410 		if (rdev->new_fw)
8411 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8412 		else
8413 			nop = RADEON_CP_PACKET2;
8414 	} else {
8415 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8416 	}
8417 
8418 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8419 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8420 			     nop);
8421 	if (r)
8422 		return r;
8423 
8424 	/* set up the compute queues */
8425 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8426 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8427 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8428 			     nop);
8429 	if (r)
8430 		return r;
8431 	ring->me = 1; /* first MEC */
8432 	ring->pipe = 0; /* first pipe */
8433 	ring->queue = 0; /* first queue */
8434 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8435 
8436 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8437 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8438 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8439 			     nop);
8440 	if (r)
8441 		return r;
8442 	/* dGPU only have 1 MEC */
8443 	ring->me = 1; /* first MEC */
8444 	ring->pipe = 0; /* first pipe */
8445 	ring->queue = 1; /* second queue */
8446 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8447 
8448 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8449 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8450 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8451 	if (r)
8452 		return r;
8453 
8454 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8455 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8456 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8457 	if (r)
8458 		return r;
8459 
8460 	r = cik_cp_resume(rdev);
8461 	if (r)
8462 		return r;
8463 
8464 	r = cik_sdma_resume(rdev);
8465 	if (r)
8466 		return r;
8467 
8468 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8469 	if (ring->ring_size) {
8470 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8471 				     RADEON_CP_PACKET2);
8472 		if (!r)
8473 			r = uvd_v1_0_init(rdev);
8474 		if (r)
8475 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8476 	}
8477 
8478 	r = -ENOENT;
8479 
8480 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8481 	if (ring->ring_size)
8482 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8483 				     VCE_CMD_NO_OP);
8484 
8485 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8486 	if (ring->ring_size)
8487 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8488 				     VCE_CMD_NO_OP);
8489 
8490 	if (!r)
8491 		r = vce_v1_0_init(rdev);
8492 	else if (r != -ENOENT)
8493 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8494 
8495 	r = radeon_ib_pool_init(rdev);
8496 	if (r) {
8497 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8498 		return r;
8499 	}
8500 
8501 	r = radeon_vm_manager_init(rdev);
8502 	if (r) {
8503 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8504 		return r;
8505 	}
8506 
8507 	r = dce6_audio_init(rdev);
8508 	if (r)
8509 		return r;
8510 
8511 	r = radeon_kfd_resume(rdev);
8512 	if (r)
8513 		return r;
8514 
8515 	return 0;
8516 }
8517 
8518 /**
8519  * cik_resume - resume the asic to a functional state
8520  *
8521  * @rdev: radeon_device pointer
8522  *
8523  * Programs the asic to a functional state (CIK).
8524  * Called at resume.
8525  * Returns 0 for success, error for failure.
8526  */
8527 int cik_resume(struct radeon_device *rdev)
8528 {
8529 	int r;
8530 
8531 	/* post card */
8532 	atom_asic_init(rdev->mode_info.atom_context);
8533 
8534 	/* init golden registers */
8535 	cik_init_golden_registers(rdev);
8536 
8537 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8538 		radeon_pm_resume(rdev);
8539 
8540 	rdev->accel_working = true;
8541 	r = cik_startup(rdev);
8542 	if (r) {
8543 		DRM_ERROR("cik startup failed on resume\n");
8544 		rdev->accel_working = false;
8545 		return r;
8546 	}
8547 
8548 	return r;
8549 
8550 }
8551 
8552 /**
8553  * cik_suspend - suspend the asic
8554  *
8555  * @rdev: radeon_device pointer
8556  *
8557  * Bring the chip into a state suitable for suspend (CIK).
8558  * Called at suspend.
8559  * Returns 0 for success.
8560  */
8561 int cik_suspend(struct radeon_device *rdev)
8562 {
8563 	radeon_kfd_suspend(rdev);
8564 	radeon_pm_suspend(rdev);
8565 	dce6_audio_fini(rdev);
8566 	radeon_vm_manager_fini(rdev);
8567 	cik_cp_enable(rdev, false);
8568 	cik_sdma_enable(rdev, false);
8569 	uvd_v1_0_fini(rdev);
8570 	radeon_uvd_suspend(rdev);
8571 	radeon_vce_suspend(rdev);
8572 	cik_fini_pg(rdev);
8573 	cik_fini_cg(rdev);
8574 	cik_irq_suspend(rdev);
8575 	radeon_wb_disable(rdev);
8576 	cik_pcie_gart_disable(rdev);
8577 	return 0;
8578 }
8579 
8580 /* Plan is to move initialization in that function and use
8581  * helper function so that radeon_device_init pretty much
8582  * do nothing more than calling asic specific function. This
8583  * should also allow to remove a bunch of callback function
8584  * like vram_info.
8585  */
8586 /**
8587  * cik_init - asic specific driver and hw init
8588  *
8589  * @rdev: radeon_device pointer
8590  *
8591  * Setup asic specific driver variables and program the hw
8592  * to a functional state (CIK).
8593  * Called at driver startup.
8594  * Returns 0 for success, errors for failure.
8595  */
8596 int cik_init(struct radeon_device *rdev)
8597 {
8598 	struct radeon_ring *ring;
8599 	int r;
8600 
8601 	/* Read BIOS */
8602 	if (!radeon_get_bios(rdev)) {
8603 		if (ASIC_IS_AVIVO(rdev))
8604 			return -EINVAL;
8605 	}
8606 	/* Must be an ATOMBIOS */
8607 	if (!rdev->is_atom_bios) {
8608 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8609 		return -EINVAL;
8610 	}
8611 	r = radeon_atombios_init(rdev);
8612 	if (r)
8613 		return r;
8614 
8615 	/* Post card if necessary */
8616 	if (!radeon_card_posted(rdev)) {
8617 		if (!rdev->bios) {
8618 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8619 			return -EINVAL;
8620 		}
8621 		DRM_INFO("GPU not posted. posting now...\n");
8622 		atom_asic_init(rdev->mode_info.atom_context);
8623 	}
8624 	/* init golden registers */
8625 	cik_init_golden_registers(rdev);
8626 	/* Initialize scratch registers */
8627 	cik_scratch_init(rdev);
8628 	/* Initialize surface registers */
8629 	radeon_surface_init(rdev);
8630 	/* Initialize clocks */
8631 	radeon_get_clock_info(rdev->ddev);
8632 
8633 	/* Fence driver */
8634 	r = radeon_fence_driver_init(rdev);
8635 	if (r)
8636 		return r;
8637 
8638 	/* initialize memory controller */
8639 	r = cik_mc_init(rdev);
8640 	if (r)
8641 		return r;
8642 	/* Memory manager */
8643 	r = radeon_bo_init(rdev);
8644 	if (r)
8645 		return r;
8646 
8647 	if (rdev->flags & RADEON_IS_IGP) {
8648 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8649 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8650 			r = cik_init_microcode(rdev);
8651 			if (r) {
8652 				DRM_ERROR("Failed to load firmware!\n");
8653 				return r;
8654 			}
8655 		}
8656 	} else {
8657 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8658 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8659 		    !rdev->mc_fw) {
8660 			r = cik_init_microcode(rdev);
8661 			if (r) {
8662 				DRM_ERROR("Failed to load firmware!\n");
8663 				return r;
8664 			}
8665 		}
8666 	}
8667 
8668 	/* Initialize power management */
8669 	radeon_pm_init(rdev);
8670 
8671 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8672 	ring->ring_obj = NULL;
8673 	r600_ring_init(rdev, ring, 1024 * 1024);
8674 
8675 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8676 	ring->ring_obj = NULL;
8677 	r600_ring_init(rdev, ring, 1024 * 1024);
8678 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8679 	if (r)
8680 		return r;
8681 
8682 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8683 	ring->ring_obj = NULL;
8684 	r600_ring_init(rdev, ring, 1024 * 1024);
8685 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8686 	if (r)
8687 		return r;
8688 
8689 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8690 	ring->ring_obj = NULL;
8691 	r600_ring_init(rdev, ring, 256 * 1024);
8692 
8693 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8694 	ring->ring_obj = NULL;
8695 	r600_ring_init(rdev, ring, 256 * 1024);
8696 
8697 	r = radeon_uvd_init(rdev);
8698 	if (!r) {
8699 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8700 		ring->ring_obj = NULL;
8701 		r600_ring_init(rdev, ring, 4096);
8702 	}
8703 
8704 	r = radeon_vce_init(rdev);
8705 	if (!r) {
8706 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8707 		ring->ring_obj = NULL;
8708 		r600_ring_init(rdev, ring, 4096);
8709 
8710 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8711 		ring->ring_obj = NULL;
8712 		r600_ring_init(rdev, ring, 4096);
8713 	}
8714 
8715 	rdev->ih.ring_obj = NULL;
8716 	r600_ih_ring_init(rdev, 64 * 1024);
8717 
8718 	r = r600_pcie_gart_init(rdev);
8719 	if (r)
8720 		return r;
8721 
8722 	rdev->accel_working = true;
8723 	r = cik_startup(rdev);
8724 	if (r) {
8725 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8726 		cik_cp_fini(rdev);
8727 		cik_sdma_fini(rdev);
8728 		cik_irq_fini(rdev);
8729 		sumo_rlc_fini(rdev);
8730 		cik_mec_fini(rdev);
8731 		radeon_wb_fini(rdev);
8732 		radeon_ib_pool_fini(rdev);
8733 		radeon_vm_manager_fini(rdev);
8734 		radeon_irq_kms_fini(rdev);
8735 		cik_pcie_gart_fini(rdev);
8736 		rdev->accel_working = false;
8737 	}
8738 
8739 	/* Don't start up if the MC ucode is missing.
8740 	 * The default clocks and voltages before the MC ucode
8741 	 * is loaded are not suffient for advanced operations.
8742 	 */
8743 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8744 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8745 		return -EINVAL;
8746 	}
8747 
8748 	return 0;
8749 }
8750 
8751 /**
8752  * cik_fini - asic specific driver and hw fini
8753  *
8754  * @rdev: radeon_device pointer
8755  *
8756  * Tear down the asic specific driver variables and program the hw
8757  * to an idle state (CIK).
8758  * Called at driver unload.
8759  */
8760 void cik_fini(struct radeon_device *rdev)
8761 {
8762 	radeon_pm_fini(rdev);
8763 	cik_cp_fini(rdev);
8764 	cik_sdma_fini(rdev);
8765 	cik_fini_pg(rdev);
8766 	cik_fini_cg(rdev);
8767 	cik_irq_fini(rdev);
8768 	sumo_rlc_fini(rdev);
8769 	cik_mec_fini(rdev);
8770 	radeon_wb_fini(rdev);
8771 	radeon_vm_manager_fini(rdev);
8772 	radeon_ib_pool_fini(rdev);
8773 	radeon_irq_kms_fini(rdev);
8774 	uvd_v1_0_fini(rdev);
8775 	radeon_uvd_fini(rdev);
8776 	radeon_vce_fini(rdev);
8777 	cik_pcie_gart_fini(rdev);
8778 	r600_vram_scratch_fini(rdev);
8779 	radeon_gem_fini(rdev);
8780 	radeon_fence_driver_fini(rdev);
8781 	radeon_bo_fini(rdev);
8782 	radeon_atombios_fini(rdev);
8783 	cik_fini_microcode(rdev);
8784 	kfree(rdev->bios);
8785 	rdev->bios = NULL;
8786 }
8787 
8788 void dce8_program_fmt(struct drm_encoder *encoder)
8789 {
8790 	struct drm_device *dev = encoder->dev;
8791 	struct radeon_device *rdev = dev->dev_private;
8792 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8793 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8794 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8795 	int bpc = 0;
8796 	u32 tmp = 0;
8797 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8798 
8799 	if (connector) {
8800 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8801 		bpc = radeon_get_monitor_bpc(connector);
8802 		dither = radeon_connector->dither;
8803 	}
8804 
8805 	/* LVDS/eDP FMT is set up by atom */
8806 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8807 		return;
8808 
8809 	/* not needed for analog */
8810 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8811 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8812 		return;
8813 
8814 	if (bpc == 0)
8815 		return;
8816 
8817 	switch (bpc) {
8818 	case 6:
8819 		if (dither == RADEON_FMT_DITHER_ENABLE)
8820 			/* XXX sort out optimal dither settings */
8821 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8822 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8823 		else
8824 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8825 		break;
8826 	case 8:
8827 		if (dither == RADEON_FMT_DITHER_ENABLE)
8828 			/* XXX sort out optimal dither settings */
8829 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8830 				FMT_RGB_RANDOM_ENABLE |
8831 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8832 		else
8833 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8834 		break;
8835 	case 10:
8836 		if (dither == RADEON_FMT_DITHER_ENABLE)
8837 			/* XXX sort out optimal dither settings */
8838 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8839 				FMT_RGB_RANDOM_ENABLE |
8840 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8841 		else
8842 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8843 		break;
8844 	default:
8845 		/* not needed */
8846 		break;
8847 	}
8848 
8849 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8850 }
8851 
8852 /* display watermark setup */
8853 /**
8854  * dce8_line_buffer_adjust - Set up the line buffer
8855  *
8856  * @rdev: radeon_device pointer
8857  * @radeon_crtc: the selected display controller
8858  * @mode: the current display mode on the selected display
8859  * controller
8860  *
8861  * Setup up the line buffer allocation for
8862  * the selected display controller (CIK).
8863  * Returns the line buffer size in pixels.
8864  */
8865 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8866 				   struct radeon_crtc *radeon_crtc,
8867 				   struct drm_display_mode *mode)
8868 {
8869 	u32 tmp, buffer_alloc, i;
8870 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8871 	/*
8872 	 * Line Buffer Setup
8873 	 * There are 6 line buffers, one for each display controllers.
8874 	 * There are 3 partitions per LB. Select the number of partitions
8875 	 * to enable based on the display width.  For display widths larger
8876 	 * than 4096, you need use to use 2 display controllers and combine
8877 	 * them using the stereo blender.
8878 	 */
8879 	if (radeon_crtc->base.enabled && mode) {
8880 		if (mode->crtc_hdisplay < 1920) {
8881 			tmp = 1;
8882 			buffer_alloc = 2;
8883 		} else if (mode->crtc_hdisplay < 2560) {
8884 			tmp = 2;
8885 			buffer_alloc = 2;
8886 		} else if (mode->crtc_hdisplay < 4096) {
8887 			tmp = 0;
8888 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8889 		} else {
8890 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8891 			tmp = 0;
8892 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8893 		}
8894 	} else {
8895 		tmp = 1;
8896 		buffer_alloc = 0;
8897 	}
8898 
8899 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8900 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8901 
8902 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8903 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8904 	for (i = 0; i < rdev->usec_timeout; i++) {
8905 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8906 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8907 			break;
8908 		udelay(1);
8909 	}
8910 
8911 	if (radeon_crtc->base.enabled && mode) {
8912 		switch (tmp) {
8913 		case 0:
8914 		default:
8915 			return 4096 * 2;
8916 		case 1:
8917 			return 1920 * 2;
8918 		case 2:
8919 			return 2560 * 2;
8920 		}
8921 	}
8922 
8923 	/* controller not enabled, so no lb used */
8924 	return 0;
8925 }
8926 
8927 /**
8928  * cik_get_number_of_dram_channels - get the number of dram channels
8929  *
8930  * @rdev: radeon_device pointer
8931  *
8932  * Look up the number of video ram channels (CIK).
8933  * Used for display watermark bandwidth calculations
8934  * Returns the number of dram channels
8935  */
8936 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8937 {
8938 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8939 
8940 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8941 	case 0:
8942 	default:
8943 		return 1;
8944 	case 1:
8945 		return 2;
8946 	case 2:
8947 		return 4;
8948 	case 3:
8949 		return 8;
8950 	case 4:
8951 		return 3;
8952 	case 5:
8953 		return 6;
8954 	case 6:
8955 		return 10;
8956 	case 7:
8957 		return 12;
8958 	case 8:
8959 		return 16;
8960 	}
8961 }
8962 
8963 struct dce8_wm_params {
8964 	u32 dram_channels; /* number of dram channels */
8965 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8966 	u32 sclk;          /* engine clock in kHz */
8967 	u32 disp_clk;      /* display clock in kHz */
8968 	u32 src_width;     /* viewport width */
8969 	u32 active_time;   /* active display time in ns */
8970 	u32 blank_time;    /* blank time in ns */
8971 	bool interlaced;    /* mode is interlaced */
8972 	fixed20_12 vsc;    /* vertical scale ratio */
8973 	u32 num_heads;     /* number of active crtcs */
8974 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8975 	u32 lb_size;       /* line buffer allocated to pipe */
8976 	u32 vtaps;         /* vertical scaler taps */
8977 };
8978 
8979 /**
8980  * dce8_dram_bandwidth - get the dram bandwidth
8981  *
8982  * @wm: watermark calculation data
8983  *
8984  * Calculate the raw dram bandwidth (CIK).
8985  * Used for display watermark bandwidth calculations
8986  * Returns the dram bandwidth in MBytes/s
8987  */
8988 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8989 {
8990 	/* Calculate raw DRAM Bandwidth */
8991 	fixed20_12 dram_efficiency; /* 0.7 */
8992 	fixed20_12 yclk, dram_channels, bandwidth;
8993 	fixed20_12 a;
8994 
8995 	a.full = dfixed_const(1000);
8996 	yclk.full = dfixed_const(wm->yclk);
8997 	yclk.full = dfixed_div(yclk, a);
8998 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8999 	a.full = dfixed_const(10);
9000 	dram_efficiency.full = dfixed_const(7);
9001 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9002 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9003 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9004 
9005 	return dfixed_trunc(bandwidth);
9006 }
9007 
9008 /**
9009  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9010  *
9011  * @wm: watermark calculation data
9012  *
9013  * Calculate the dram bandwidth used for display (CIK).
9014  * Used for display watermark bandwidth calculations
9015  * Returns the dram bandwidth for display in MBytes/s
9016  */
9017 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9018 {
9019 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9020 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9021 	fixed20_12 yclk, dram_channels, bandwidth;
9022 	fixed20_12 a;
9023 
9024 	a.full = dfixed_const(1000);
9025 	yclk.full = dfixed_const(wm->yclk);
9026 	yclk.full = dfixed_div(yclk, a);
9027 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9028 	a.full = dfixed_const(10);
9029 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9030 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9031 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9032 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9033 
9034 	return dfixed_trunc(bandwidth);
9035 }
9036 
9037 /**
9038  * dce8_data_return_bandwidth - get the data return bandwidth
9039  *
9040  * @wm: watermark calculation data
9041  *
9042  * Calculate the data return bandwidth used for display (CIK).
9043  * Used for display watermark bandwidth calculations
9044  * Returns the data return bandwidth in MBytes/s
9045  */
9046 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9047 {
9048 	/* Calculate the display Data return Bandwidth */
9049 	fixed20_12 return_efficiency; /* 0.8 */
9050 	fixed20_12 sclk, bandwidth;
9051 	fixed20_12 a;
9052 
9053 	a.full = dfixed_const(1000);
9054 	sclk.full = dfixed_const(wm->sclk);
9055 	sclk.full = dfixed_div(sclk, a);
9056 	a.full = dfixed_const(10);
9057 	return_efficiency.full = dfixed_const(8);
9058 	return_efficiency.full = dfixed_div(return_efficiency, a);
9059 	a.full = dfixed_const(32);
9060 	bandwidth.full = dfixed_mul(a, sclk);
9061 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9062 
9063 	return dfixed_trunc(bandwidth);
9064 }
9065 
9066 /**
9067  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9068  *
9069  * @wm: watermark calculation data
9070  *
9071  * Calculate the dmif bandwidth used for display (CIK).
9072  * Used for display watermark bandwidth calculations
9073  * Returns the dmif bandwidth in MBytes/s
9074  */
9075 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9076 {
9077 	/* Calculate the DMIF Request Bandwidth */
9078 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9079 	fixed20_12 disp_clk, bandwidth;
9080 	fixed20_12 a, b;
9081 
9082 	a.full = dfixed_const(1000);
9083 	disp_clk.full = dfixed_const(wm->disp_clk);
9084 	disp_clk.full = dfixed_div(disp_clk, a);
9085 	a.full = dfixed_const(32);
9086 	b.full = dfixed_mul(a, disp_clk);
9087 
9088 	a.full = dfixed_const(10);
9089 	disp_clk_request_efficiency.full = dfixed_const(8);
9090 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9091 
9092 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9093 
9094 	return dfixed_trunc(bandwidth);
9095 }
9096 
9097 /**
9098  * dce8_available_bandwidth - get the min available bandwidth
9099  *
9100  * @wm: watermark calculation data
9101  *
9102  * Calculate the min available bandwidth used for display (CIK).
9103  * Used for display watermark bandwidth calculations
9104  * Returns the min available bandwidth in MBytes/s
9105  */
9106 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9107 {
9108 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9109 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9110 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9111 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9112 
9113 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9114 }
9115 
9116 /**
9117  * dce8_average_bandwidth - get the average available bandwidth
9118  *
9119  * @wm: watermark calculation data
9120  *
9121  * Calculate the average available bandwidth used for display (CIK).
9122  * Used for display watermark bandwidth calculations
9123  * Returns the average available bandwidth in MBytes/s
9124  */
9125 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9126 {
9127 	/* Calculate the display mode Average Bandwidth
9128 	 * DisplayMode should contain the source and destination dimensions,
9129 	 * timing, etc.
9130 	 */
9131 	fixed20_12 bpp;
9132 	fixed20_12 line_time;
9133 	fixed20_12 src_width;
9134 	fixed20_12 bandwidth;
9135 	fixed20_12 a;
9136 
9137 	a.full = dfixed_const(1000);
9138 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9139 	line_time.full = dfixed_div(line_time, a);
9140 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9141 	src_width.full = dfixed_const(wm->src_width);
9142 	bandwidth.full = dfixed_mul(src_width, bpp);
9143 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9144 	bandwidth.full = dfixed_div(bandwidth, line_time);
9145 
9146 	return dfixed_trunc(bandwidth);
9147 }
9148 
9149 /**
9150  * dce8_latency_watermark - get the latency watermark
9151  *
9152  * @wm: watermark calculation data
9153  *
9154  * Calculate the latency watermark (CIK).
9155  * Used for display watermark bandwidth calculations
9156  * Returns the latency watermark in ns
9157  */
9158 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9159 {
9160 	/* First calculate the latency in ns */
9161 	u32 mc_latency = 2000; /* 2000 ns. */
9162 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9163 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9164 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9165 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9166 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9167 		(wm->num_heads * cursor_line_pair_return_time);
9168 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9169 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9170 	u32 tmp, dmif_size = 12288;
9171 	fixed20_12 a, b, c;
9172 
9173 	if (wm->num_heads == 0)
9174 		return 0;
9175 
9176 	a.full = dfixed_const(2);
9177 	b.full = dfixed_const(1);
9178 	if ((wm->vsc.full > a.full) ||
9179 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9180 	    (wm->vtaps >= 5) ||
9181 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9182 		max_src_lines_per_dst_line = 4;
9183 	else
9184 		max_src_lines_per_dst_line = 2;
9185 
9186 	a.full = dfixed_const(available_bandwidth);
9187 	b.full = dfixed_const(wm->num_heads);
9188 	a.full = dfixed_div(a, b);
9189 
9190 	b.full = dfixed_const(mc_latency + 512);
9191 	c.full = dfixed_const(wm->disp_clk);
9192 	b.full = dfixed_div(b, c);
9193 
9194 	c.full = dfixed_const(dmif_size);
9195 	b.full = dfixed_div(c, b);
9196 
9197 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9198 
9199 	b.full = dfixed_const(1000);
9200 	c.full = dfixed_const(wm->disp_clk);
9201 	b.full = dfixed_div(c, b);
9202 	c.full = dfixed_const(wm->bytes_per_pixel);
9203 	b.full = dfixed_mul(b, c);
9204 
9205 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9206 
9207 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9208 	b.full = dfixed_const(1000);
9209 	c.full = dfixed_const(lb_fill_bw);
9210 	b.full = dfixed_div(c, b);
9211 	a.full = dfixed_div(a, b);
9212 	line_fill_time = dfixed_trunc(a);
9213 
9214 	if (line_fill_time < wm->active_time)
9215 		return latency;
9216 	else
9217 		return latency + (line_fill_time - wm->active_time);
9218 
9219 }
9220 
9221 /**
9222  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9223  * average and available dram bandwidth
9224  *
9225  * @wm: watermark calculation data
9226  *
9227  * Check if the display average bandwidth fits in the display
9228  * dram bandwidth (CIK).
9229  * Used for display watermark bandwidth calculations
9230  * Returns true if the display fits, false if not.
9231  */
9232 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9233 {
9234 	if (dce8_average_bandwidth(wm) <=
9235 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9236 		return true;
9237 	else
9238 		return false;
9239 }
9240 
9241 /**
9242  * dce8_average_bandwidth_vs_available_bandwidth - check
9243  * average and available bandwidth
9244  *
9245  * @wm: watermark calculation data
9246  *
9247  * Check if the display average bandwidth fits in the display
9248  * available bandwidth (CIK).
9249  * Used for display watermark bandwidth calculations
9250  * Returns true if the display fits, false if not.
9251  */
9252 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9253 {
9254 	if (dce8_average_bandwidth(wm) <=
9255 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9256 		return true;
9257 	else
9258 		return false;
9259 }
9260 
9261 /**
9262  * dce8_check_latency_hiding - check latency hiding
9263  *
9264  * @wm: watermark calculation data
9265  *
9266  * Check latency hiding (CIK).
9267  * Used for display watermark bandwidth calculations
9268  * Returns true if the display fits, false if not.
9269  */
9270 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9271 {
9272 	u32 lb_partitions = wm->lb_size / wm->src_width;
9273 	u32 line_time = wm->active_time + wm->blank_time;
9274 	u32 latency_tolerant_lines;
9275 	u32 latency_hiding;
9276 	fixed20_12 a;
9277 
9278 	a.full = dfixed_const(1);
9279 	if (wm->vsc.full > a.full)
9280 		latency_tolerant_lines = 1;
9281 	else {
9282 		if (lb_partitions <= (wm->vtaps + 1))
9283 			latency_tolerant_lines = 1;
9284 		else
9285 			latency_tolerant_lines = 2;
9286 	}
9287 
9288 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9289 
9290 	if (dce8_latency_watermark(wm) <= latency_hiding)
9291 		return true;
9292 	else
9293 		return false;
9294 }
9295 
9296 /**
9297  * dce8_program_watermarks - program display watermarks
9298  *
9299  * @rdev: radeon_device pointer
9300  * @radeon_crtc: the selected display controller
9301  * @lb_size: line buffer size
9302  * @num_heads: number of display controllers in use
9303  *
9304  * Calculate and program the display watermarks for the
9305  * selected display controller (CIK).
9306  */
9307 static void dce8_program_watermarks(struct radeon_device *rdev,
9308 				    struct radeon_crtc *radeon_crtc,
9309 				    u32 lb_size, u32 num_heads)
9310 {
9311 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9312 	struct dce8_wm_params wm_low, wm_high;
9313 	u32 pixel_period;
9314 	u32 line_time = 0;
9315 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9316 	u32 tmp, wm_mask;
9317 
9318 	if (radeon_crtc->base.enabled && num_heads && mode) {
9319 		pixel_period = 1000000 / (u32)mode->clock;
9320 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9321 
9322 		/* watermark for high clocks */
9323 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9324 		    rdev->pm.dpm_enabled) {
9325 			wm_high.yclk =
9326 				radeon_dpm_get_mclk(rdev, false) * 10;
9327 			wm_high.sclk =
9328 				radeon_dpm_get_sclk(rdev, false) * 10;
9329 		} else {
9330 			wm_high.yclk = rdev->pm.current_mclk * 10;
9331 			wm_high.sclk = rdev->pm.current_sclk * 10;
9332 		}
9333 
9334 		wm_high.disp_clk = mode->clock;
9335 		wm_high.src_width = mode->crtc_hdisplay;
9336 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9337 		wm_high.blank_time = line_time - wm_high.active_time;
9338 		wm_high.interlaced = false;
9339 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9340 			wm_high.interlaced = true;
9341 		wm_high.vsc = radeon_crtc->vsc;
9342 		wm_high.vtaps = 1;
9343 		if (radeon_crtc->rmx_type != RMX_OFF)
9344 			wm_high.vtaps = 2;
9345 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9346 		wm_high.lb_size = lb_size;
9347 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9348 		wm_high.num_heads = num_heads;
9349 
9350 		/* set for high clocks */
9351 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9352 
9353 		/* possibly force display priority to high */
9354 		/* should really do this at mode validation time... */
9355 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9356 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9357 		    !dce8_check_latency_hiding(&wm_high) ||
9358 		    (rdev->disp_priority == 2)) {
9359 			DRM_DEBUG_KMS("force priority to high\n");
9360 		}
9361 
9362 		/* watermark for low clocks */
9363 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9364 		    rdev->pm.dpm_enabled) {
9365 			wm_low.yclk =
9366 				radeon_dpm_get_mclk(rdev, true) * 10;
9367 			wm_low.sclk =
9368 				radeon_dpm_get_sclk(rdev, true) * 10;
9369 		} else {
9370 			wm_low.yclk = rdev->pm.current_mclk * 10;
9371 			wm_low.sclk = rdev->pm.current_sclk * 10;
9372 		}
9373 
9374 		wm_low.disp_clk = mode->clock;
9375 		wm_low.src_width = mode->crtc_hdisplay;
9376 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9377 		wm_low.blank_time = line_time - wm_low.active_time;
9378 		wm_low.interlaced = false;
9379 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9380 			wm_low.interlaced = true;
9381 		wm_low.vsc = radeon_crtc->vsc;
9382 		wm_low.vtaps = 1;
9383 		if (radeon_crtc->rmx_type != RMX_OFF)
9384 			wm_low.vtaps = 2;
9385 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9386 		wm_low.lb_size = lb_size;
9387 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9388 		wm_low.num_heads = num_heads;
9389 
9390 		/* set for low clocks */
9391 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9392 
9393 		/* possibly force display priority to high */
9394 		/* should really do this at mode validation time... */
9395 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9396 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9397 		    !dce8_check_latency_hiding(&wm_low) ||
9398 		    (rdev->disp_priority == 2)) {
9399 			DRM_DEBUG_KMS("force priority to high\n");
9400 		}
9401 	}
9402 
9403 	/* select wm A */
9404 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9405 	tmp = wm_mask;
9406 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9407 	tmp |= LATENCY_WATERMARK_MASK(1);
9408 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9409 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9410 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9411 		LATENCY_HIGH_WATERMARK(line_time)));
9412 	/* select wm B */
9413 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9414 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9415 	tmp |= LATENCY_WATERMARK_MASK(2);
9416 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9417 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9418 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9419 		LATENCY_HIGH_WATERMARK(line_time)));
9420 	/* restore original selection */
9421 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9422 
9423 	/* save values for DPM */
9424 	radeon_crtc->line_time = line_time;
9425 	radeon_crtc->wm_high = latency_watermark_a;
9426 	radeon_crtc->wm_low = latency_watermark_b;
9427 }
9428 
9429 /**
9430  * dce8_bandwidth_update - program display watermarks
9431  *
9432  * @rdev: radeon_device pointer
9433  *
9434  * Calculate and program the display watermarks and line
9435  * buffer allocation (CIK).
9436  */
9437 void dce8_bandwidth_update(struct radeon_device *rdev)
9438 {
9439 	struct drm_display_mode *mode = NULL;
9440 	u32 num_heads = 0, lb_size;
9441 	int i;
9442 
9443 	if (!rdev->mode_info.mode_config_initialized)
9444 		return;
9445 
9446 	radeon_update_display_priority(rdev);
9447 
9448 	for (i = 0; i < rdev->num_crtc; i++) {
9449 		if (rdev->mode_info.crtcs[i]->base.enabled)
9450 			num_heads++;
9451 	}
9452 	for (i = 0; i < rdev->num_crtc; i++) {
9453 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9454 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9455 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9456 	}
9457 }
9458 
9459 /**
9460  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9461  *
9462  * @rdev: radeon_device pointer
9463  *
9464  * Fetches a GPU clock counter snapshot (SI).
9465  * Returns the 64 bit clock counter snapshot.
9466  */
9467 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9468 {
9469 	uint64_t clock;
9470 
9471 	mutex_lock(&rdev->gpu_clock_mutex);
9472 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9473 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9474 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9475 	mutex_unlock(&rdev->gpu_clock_mutex);
9476 	return clock;
9477 }
9478 
9479 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9480                               u32 cntl_reg, u32 status_reg)
9481 {
9482 	int r, i;
9483 	struct atom_clock_dividers dividers;
9484 	uint32_t tmp;
9485 
9486 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9487 					   clock, false, &dividers);
9488 	if (r)
9489 		return r;
9490 
9491 	tmp = RREG32_SMC(cntl_reg);
9492 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9493 	tmp |= dividers.post_divider;
9494 	WREG32_SMC(cntl_reg, tmp);
9495 
9496 	for (i = 0; i < 100; i++) {
9497 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9498 			break;
9499 		mdelay(10);
9500 	}
9501 	if (i == 100)
9502 		return -ETIMEDOUT;
9503 
9504 	return 0;
9505 }
9506 
9507 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9508 {
9509 	int r = 0;
9510 
9511 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9512 	if (r)
9513 		return r;
9514 
9515 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9516 	return r;
9517 }
9518 
9519 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9520 {
9521 	int r, i;
9522 	struct atom_clock_dividers dividers;
9523 	u32 tmp;
9524 
9525 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9526 					   ecclk, false, &dividers);
9527 	if (r)
9528 		return r;
9529 
9530 	for (i = 0; i < 100; i++) {
9531 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9532 			break;
9533 		mdelay(10);
9534 	}
9535 	if (i == 100)
9536 		return -ETIMEDOUT;
9537 
9538 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9539 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9540 	tmp |= dividers.post_divider;
9541 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9542 
9543 	for (i = 0; i < 100; i++) {
9544 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9545 			break;
9546 		mdelay(10);
9547 	}
9548 	if (i == 100)
9549 		return -ETIMEDOUT;
9550 
9551 	return 0;
9552 }
9553 
9554 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9555 {
9556 	struct pci_dev *root = rdev->pdev->bus->self;
9557 	int bridge_pos, gpu_pos;
9558 	u32 speed_cntl, mask, current_data_rate;
9559 	int ret, i;
9560 	u16 tmp16;
9561 
9562 	if (radeon_pcie_gen2 == 0)
9563 		return;
9564 
9565 	if (rdev->flags & RADEON_IS_IGP)
9566 		return;
9567 
9568 	if (!(rdev->flags & RADEON_IS_PCIE))
9569 		return;
9570 
9571 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9572 	if (ret != 0)
9573 		return;
9574 
9575 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9576 		return;
9577 
9578 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9579 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9580 		LC_CURRENT_DATA_RATE_SHIFT;
9581 	if (mask & DRM_PCIE_SPEED_80) {
9582 		if (current_data_rate == 2) {
9583 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9584 			return;
9585 		}
9586 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9587 	} else if (mask & DRM_PCIE_SPEED_50) {
9588 		if (current_data_rate == 1) {
9589 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9590 			return;
9591 		}
9592 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9593 	}
9594 
9595 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
9596 	if (!bridge_pos)
9597 		return;
9598 
9599 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
9600 	if (!gpu_pos)
9601 		return;
9602 
9603 	if (mask & DRM_PCIE_SPEED_80) {
9604 		/* re-try equalization if gen3 is not already enabled */
9605 		if (current_data_rate != 2) {
9606 			u16 bridge_cfg, gpu_cfg;
9607 			u16 bridge_cfg2, gpu_cfg2;
9608 			u32 max_lw, current_lw, tmp;
9609 
9610 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9611 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9612 
9613 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9614 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9615 
9616 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9617 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9618 
9619 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9620 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9621 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9622 
9623 			if (current_lw < max_lw) {
9624 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9625 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9626 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9627 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9628 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9629 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9630 				}
9631 			}
9632 
9633 			for (i = 0; i < 10; i++) {
9634 				/* check status */
9635 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9636 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9637 					break;
9638 
9639 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9640 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9641 
9642 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9643 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9644 
9645 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9646 				tmp |= LC_SET_QUIESCE;
9647 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9648 
9649 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9650 				tmp |= LC_REDO_EQ;
9651 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9652 
9653 				mdelay(100);
9654 
9655 				/* linkctl */
9656 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9657 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9658 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9659 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9660 
9661 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9662 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9663 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9664 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9665 
9666 				/* linkctl2 */
9667 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9668 				tmp16 &= ~((1 << 4) | (7 << 9));
9669 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9670 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9671 
9672 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9673 				tmp16 &= ~((1 << 4) | (7 << 9));
9674 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9675 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9676 
9677 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9678 				tmp &= ~LC_SET_QUIESCE;
9679 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9680 			}
9681 		}
9682 	}
9683 
9684 	/* set the link speed */
9685 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9686 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9687 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9688 
9689 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9690 	tmp16 &= ~0xf;
9691 	if (mask & DRM_PCIE_SPEED_80)
9692 		tmp16 |= 3; /* gen3 */
9693 	else if (mask & DRM_PCIE_SPEED_50)
9694 		tmp16 |= 2; /* gen2 */
9695 	else
9696 		tmp16 |= 1; /* gen1 */
9697 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9698 
9699 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9700 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9701 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9702 
9703 	for (i = 0; i < rdev->usec_timeout; i++) {
9704 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9705 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9706 			break;
9707 		udelay(1);
9708 	}
9709 }
9710 
9711 static void cik_program_aspm(struct radeon_device *rdev)
9712 {
9713 	u32 data, orig;
9714 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9715 	bool disable_clkreq = false;
9716 
9717 	if (radeon_aspm == 0)
9718 		return;
9719 
9720 	/* XXX double check IGPs */
9721 	if (rdev->flags & RADEON_IS_IGP)
9722 		return;
9723 
9724 	if (!(rdev->flags & RADEON_IS_PCIE))
9725 		return;
9726 
9727 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9728 	data &= ~LC_XMIT_N_FTS_MASK;
9729 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9730 	if (orig != data)
9731 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9732 
9733 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9734 	data |= LC_GO_TO_RECOVERY;
9735 	if (orig != data)
9736 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9737 
9738 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9739 	data |= P_IGNORE_EDB_ERR;
9740 	if (orig != data)
9741 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9742 
9743 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9744 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9745 	data |= LC_PMI_TO_L1_DIS;
9746 	if (!disable_l0s)
9747 		data |= LC_L0S_INACTIVITY(7);
9748 
9749 	if (!disable_l1) {
9750 		data |= LC_L1_INACTIVITY(7);
9751 		data &= ~LC_PMI_TO_L1_DIS;
9752 		if (orig != data)
9753 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9754 
9755 		if (!disable_plloff_in_l1) {
9756 			bool clk_req_support;
9757 
9758 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9759 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9760 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9761 			if (orig != data)
9762 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9763 
9764 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9765 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9766 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9767 			if (orig != data)
9768 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9769 
9770 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9771 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9772 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9773 			if (orig != data)
9774 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9775 
9776 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9777 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9778 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9779 			if (orig != data)
9780 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9781 
9782 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9783 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9784 			data |= LC_DYN_LANES_PWR_STATE(3);
9785 			if (orig != data)
9786 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9787 
9788 			if (!disable_clkreq) {
9789 #ifdef zMN_TODO
9790 				struct pci_dev *root = rdev->pdev->bus->self;
9791 				u32 lnkcap;
9792 
9793 				clk_req_support = false;
9794 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9795 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9796 					clk_req_support = true;
9797 #else
9798 				clk_req_support = false;
9799 #endif
9800 			} else {
9801 				clk_req_support = false;
9802 			}
9803 
9804 			if (clk_req_support) {
9805 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9806 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9807 				if (orig != data)
9808 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9809 
9810 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9811 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9812 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9813 				if (orig != data)
9814 					WREG32_SMC(THM_CLK_CNTL, data);
9815 
9816 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9817 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9818 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9819 				if (orig != data)
9820 					WREG32_SMC(MISC_CLK_CTRL, data);
9821 
9822 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9823 				data &= ~BCLK_AS_XCLK;
9824 				if (orig != data)
9825 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9826 
9827 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9828 				data &= ~FORCE_BIF_REFCLK_EN;
9829 				if (orig != data)
9830 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9831 
9832 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9833 				data &= ~MPLL_CLKOUT_SEL_MASK;
9834 				data |= MPLL_CLKOUT_SEL(4);
9835 				if (orig != data)
9836 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9837 			}
9838 		}
9839 	} else {
9840 		if (orig != data)
9841 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9842 	}
9843 
9844 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9845 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9846 	if (orig != data)
9847 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9848 
9849 	if (!disable_l0s) {
9850 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9851 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9852 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9853 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9854 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9855 				data &= ~LC_L0S_INACTIVITY_MASK;
9856 				if (orig != data)
9857 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9858 			}
9859 		}
9860 	}
9861 }
9862