xref: /dflybsd-src/sys/dev/drm/radeon/cik.c (revision 9ebbd47df7abd81e0803cf228d15b3c372ad85db)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 
35 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */
36 #define PCI_EXP_LNKCTL2 48
37 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */
38 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */
39 #define PCI_EXP_DEVSTA_TRPND 0x0020
40 #define PCI_EXP_LNKCAP_CLKPM 0x00040000
41 
42 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
50 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
56 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KABINI_me.bin");
58 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
59 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
60 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
62 
63 static void cik_rlc_stop(struct radeon_device *rdev);
64 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
65 static void cik_program_aspm(struct radeon_device *rdev);
66 static void cik_init_pg(struct radeon_device *rdev);
67 static void cik_init_cg(struct radeon_device *rdev);
68 static void cik_fini_pg(struct radeon_device *rdev);
69 static void cik_fini_cg(struct radeon_device *rdev);
70 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
71 					  bool enable);
72 
73 /* get temperature in millidegrees */
74 int ci_get_temp(struct radeon_device *rdev)
75 {
76 	u32 temp;
77 	int actual_temp = 0;
78 
79 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
80 		CTF_TEMP_SHIFT;
81 
82 	if (temp & 0x200)
83 		actual_temp = 255;
84 	else
85 		actual_temp = temp & 0x1ff;
86 
87 	actual_temp = actual_temp * 1000;
88 
89 	return actual_temp;
90 }
91 
92 /* get temperature in millidegrees */
93 int kv_get_temp(struct radeon_device *rdev)
94 {
95 	u32 temp;
96 	int actual_temp = 0;
97 
98 	temp = RREG32_SMC(0xC0300E0C);
99 
100 	if (temp)
101 		actual_temp = (temp / 8) - 49;
102 	else
103 		actual_temp = 0;
104 
105 	actual_temp = actual_temp * 1000;
106 
107 	return actual_temp;
108 }
109 
110 /*
111  * Indirect registers accessor
112  */
113 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
114 {
115 	u32 r;
116 
117 	WREG32(PCIE_INDEX, reg);
118 	(void)RREG32(PCIE_INDEX);
119 	r = RREG32(PCIE_DATA);
120 	return r;
121 }
122 
123 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
124 {
125 	WREG32(PCIE_INDEX, reg);
126 	(void)RREG32(PCIE_INDEX);
127 	WREG32(PCIE_DATA, v);
128 	(void)RREG32(PCIE_DATA);
129 }
130 
131 static const u32 spectre_rlc_save_restore_register_list[] =
132 {
133 	(0x0e00 << 16) | (0xc12c >> 2),
134 	0x00000000,
135 	(0x0e00 << 16) | (0xc140 >> 2),
136 	0x00000000,
137 	(0x0e00 << 16) | (0xc150 >> 2),
138 	0x00000000,
139 	(0x0e00 << 16) | (0xc15c >> 2),
140 	0x00000000,
141 	(0x0e00 << 16) | (0xc168 >> 2),
142 	0x00000000,
143 	(0x0e00 << 16) | (0xc170 >> 2),
144 	0x00000000,
145 	(0x0e00 << 16) | (0xc178 >> 2),
146 	0x00000000,
147 	(0x0e00 << 16) | (0xc204 >> 2),
148 	0x00000000,
149 	(0x0e00 << 16) | (0xc2b4 >> 2),
150 	0x00000000,
151 	(0x0e00 << 16) | (0xc2b8 >> 2),
152 	0x00000000,
153 	(0x0e00 << 16) | (0xc2bc >> 2),
154 	0x00000000,
155 	(0x0e00 << 16) | (0xc2c0 >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0x8228 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0x829c >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0x869c >> 2),
162 	0x00000000,
163 	(0x0600 << 16) | (0x98f4 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0x98f8 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0x9900 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc260 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0x90e8 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0x3c000 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0x3c00c >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0x8c1c >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x9700 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0xcd20 >> 2),
182 	0x00000000,
183 	(0x4e00 << 16) | (0xcd20 >> 2),
184 	0x00000000,
185 	(0x5e00 << 16) | (0xcd20 >> 2),
186 	0x00000000,
187 	(0x6e00 << 16) | (0xcd20 >> 2),
188 	0x00000000,
189 	(0x7e00 << 16) | (0xcd20 >> 2),
190 	0x00000000,
191 	(0x8e00 << 16) | (0xcd20 >> 2),
192 	0x00000000,
193 	(0x9e00 << 16) | (0xcd20 >> 2),
194 	0x00000000,
195 	(0xae00 << 16) | (0xcd20 >> 2),
196 	0x00000000,
197 	(0xbe00 << 16) | (0xcd20 >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x89bc >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0x8900 >> 2),
202 	0x00000000,
203 	0x3,
204 	(0x0e00 << 16) | (0xc130 >> 2),
205 	0x00000000,
206 	(0x0e00 << 16) | (0xc134 >> 2),
207 	0x00000000,
208 	(0x0e00 << 16) | (0xc1fc >> 2),
209 	0x00000000,
210 	(0x0e00 << 16) | (0xc208 >> 2),
211 	0x00000000,
212 	(0x0e00 << 16) | (0xc264 >> 2),
213 	0x00000000,
214 	(0x0e00 << 16) | (0xc268 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0xc26c >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0xc270 >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0xc274 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0xc278 >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0xc27c >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0xc280 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc284 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc288 >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc28c >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc290 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc294 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc298 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc29c >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc2a0 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc2a4 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc2a8 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc2ac  >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc2b0 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0x301d0 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0x30238 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0x30250 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0x30254 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0x30258 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0x3025c >> 2),
263 	0x00000000,
264 	(0x4e00 << 16) | (0xc900 >> 2),
265 	0x00000000,
266 	(0x5e00 << 16) | (0xc900 >> 2),
267 	0x00000000,
268 	(0x6e00 << 16) | (0xc900 >> 2),
269 	0x00000000,
270 	(0x7e00 << 16) | (0xc900 >> 2),
271 	0x00000000,
272 	(0x8e00 << 16) | (0xc900 >> 2),
273 	0x00000000,
274 	(0x9e00 << 16) | (0xc900 >> 2),
275 	0x00000000,
276 	(0xae00 << 16) | (0xc900 >> 2),
277 	0x00000000,
278 	(0xbe00 << 16) | (0xc900 >> 2),
279 	0x00000000,
280 	(0x4e00 << 16) | (0xc904 >> 2),
281 	0x00000000,
282 	(0x5e00 << 16) | (0xc904 >> 2),
283 	0x00000000,
284 	(0x6e00 << 16) | (0xc904 >> 2),
285 	0x00000000,
286 	(0x7e00 << 16) | (0xc904 >> 2),
287 	0x00000000,
288 	(0x8e00 << 16) | (0xc904 >> 2),
289 	0x00000000,
290 	(0x9e00 << 16) | (0xc904 >> 2),
291 	0x00000000,
292 	(0xae00 << 16) | (0xc904 >> 2),
293 	0x00000000,
294 	(0xbe00 << 16) | (0xc904 >> 2),
295 	0x00000000,
296 	(0x4e00 << 16) | (0xc908 >> 2),
297 	0x00000000,
298 	(0x5e00 << 16) | (0xc908 >> 2),
299 	0x00000000,
300 	(0x6e00 << 16) | (0xc908 >> 2),
301 	0x00000000,
302 	(0x7e00 << 16) | (0xc908 >> 2),
303 	0x00000000,
304 	(0x8e00 << 16) | (0xc908 >> 2),
305 	0x00000000,
306 	(0x9e00 << 16) | (0xc908 >> 2),
307 	0x00000000,
308 	(0xae00 << 16) | (0xc908 >> 2),
309 	0x00000000,
310 	(0xbe00 << 16) | (0xc908 >> 2),
311 	0x00000000,
312 	(0x4e00 << 16) | (0xc90c >> 2),
313 	0x00000000,
314 	(0x5e00 << 16) | (0xc90c >> 2),
315 	0x00000000,
316 	(0x6e00 << 16) | (0xc90c >> 2),
317 	0x00000000,
318 	(0x7e00 << 16) | (0xc90c >> 2),
319 	0x00000000,
320 	(0x8e00 << 16) | (0xc90c >> 2),
321 	0x00000000,
322 	(0x9e00 << 16) | (0xc90c >> 2),
323 	0x00000000,
324 	(0xae00 << 16) | (0xc90c >> 2),
325 	0x00000000,
326 	(0xbe00 << 16) | (0xc90c >> 2),
327 	0x00000000,
328 	(0x4e00 << 16) | (0xc910 >> 2),
329 	0x00000000,
330 	(0x5e00 << 16) | (0xc910 >> 2),
331 	0x00000000,
332 	(0x6e00 << 16) | (0xc910 >> 2),
333 	0x00000000,
334 	(0x7e00 << 16) | (0xc910 >> 2),
335 	0x00000000,
336 	(0x8e00 << 16) | (0xc910 >> 2),
337 	0x00000000,
338 	(0x9e00 << 16) | (0xc910 >> 2),
339 	0x00000000,
340 	(0xae00 << 16) | (0xc910 >> 2),
341 	0x00000000,
342 	(0xbe00 << 16) | (0xc910 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc99c >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0x9834 >> 2),
347 	0x00000000,
348 	(0x0000 << 16) | (0x30f00 >> 2),
349 	0x00000000,
350 	(0x0001 << 16) | (0x30f00 >> 2),
351 	0x00000000,
352 	(0x0000 << 16) | (0x30f04 >> 2),
353 	0x00000000,
354 	(0x0001 << 16) | (0x30f04 >> 2),
355 	0x00000000,
356 	(0x0000 << 16) | (0x30f08 >> 2),
357 	0x00000000,
358 	(0x0001 << 16) | (0x30f08 >> 2),
359 	0x00000000,
360 	(0x0000 << 16) | (0x30f0c >> 2),
361 	0x00000000,
362 	(0x0001 << 16) | (0x30f0c >> 2),
363 	0x00000000,
364 	(0x0600 << 16) | (0x9b7c >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0x8a14 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0x8a18 >> 2),
369 	0x00000000,
370 	(0x0600 << 16) | (0x30a00 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0x8bf0 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0x8bcc >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0x8b24 >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x30a04 >> 2),
379 	0x00000000,
380 	(0x0600 << 16) | (0x30a10 >> 2),
381 	0x00000000,
382 	(0x0600 << 16) | (0x30a14 >> 2),
383 	0x00000000,
384 	(0x0600 << 16) | (0x30a18 >> 2),
385 	0x00000000,
386 	(0x0600 << 16) | (0x30a2c >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0xc700 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0xc704 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0xc708 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0xc768 >> 2),
395 	0x00000000,
396 	(0x0400 << 16) | (0xc770 >> 2),
397 	0x00000000,
398 	(0x0400 << 16) | (0xc774 >> 2),
399 	0x00000000,
400 	(0x0400 << 16) | (0xc778 >> 2),
401 	0x00000000,
402 	(0x0400 << 16) | (0xc77c >> 2),
403 	0x00000000,
404 	(0x0400 << 16) | (0xc780 >> 2),
405 	0x00000000,
406 	(0x0400 << 16) | (0xc784 >> 2),
407 	0x00000000,
408 	(0x0400 << 16) | (0xc788 >> 2),
409 	0x00000000,
410 	(0x0400 << 16) | (0xc78c >> 2),
411 	0x00000000,
412 	(0x0400 << 16) | (0xc798 >> 2),
413 	0x00000000,
414 	(0x0400 << 16) | (0xc79c >> 2),
415 	0x00000000,
416 	(0x0400 << 16) | (0xc7a0 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc7a4 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc7a8 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc7ac >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc7b0 >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc7b4 >> 2),
427 	0x00000000,
428 	(0x0e00 << 16) | (0x9100 >> 2),
429 	0x00000000,
430 	(0x0e00 << 16) | (0x3c010 >> 2),
431 	0x00000000,
432 	(0x0e00 << 16) | (0x92a8 >> 2),
433 	0x00000000,
434 	(0x0e00 << 16) | (0x92ac >> 2),
435 	0x00000000,
436 	(0x0e00 << 16) | (0x92b4 >> 2),
437 	0x00000000,
438 	(0x0e00 << 16) | (0x92b8 >> 2),
439 	0x00000000,
440 	(0x0e00 << 16) | (0x92bc >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x92c0 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x92c4 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x92c8 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x92cc >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x92d0 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x8c00 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x8c04 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x8c20 >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x8c38 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x8c3c >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0xae00 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x9604 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0xac08 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0xac0c >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xac10 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0xac14 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0xac58 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0xac68 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0xac6c >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xac70 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0xac74 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xac78 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0xac7c >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac80 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac84 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac88 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac8c >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x970c >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x9714 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0x9718 >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x971c >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x31068 >> 2),
505 	0x00000000,
506 	(0x4e00 << 16) | (0x31068 >> 2),
507 	0x00000000,
508 	(0x5e00 << 16) | (0x31068 >> 2),
509 	0x00000000,
510 	(0x6e00 << 16) | (0x31068 >> 2),
511 	0x00000000,
512 	(0x7e00 << 16) | (0x31068 >> 2),
513 	0x00000000,
514 	(0x8e00 << 16) | (0x31068 >> 2),
515 	0x00000000,
516 	(0x9e00 << 16) | (0x31068 >> 2),
517 	0x00000000,
518 	(0xae00 << 16) | (0x31068 >> 2),
519 	0x00000000,
520 	(0xbe00 << 16) | (0x31068 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0xcd10 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xcd14 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x88b0 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x88b4 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x88b8 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x88bc >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0x89c0 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x88c4 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0x88c8 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x88d0 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0x88d4 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0x88d8 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0x8980 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x30938 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x3093c >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x30940 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x89a0 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x30900 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x30904 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x89b4 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x3c210 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x3c214 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c218 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x8904 >> 2),
569 	0x00000000,
570 	0x5,
571 	(0x0e00 << 16) | (0x8c28 >> 2),
572 	(0x0e00 << 16) | (0x8c2c >> 2),
573 	(0x0e00 << 16) | (0x8c30 >> 2),
574 	(0x0e00 << 16) | (0x8c34 >> 2),
575 	(0x0e00 << 16) | (0x9600 >> 2),
576 };
577 
578 static const u32 kalindi_rlc_save_restore_register_list[] =
579 {
580 	(0x0e00 << 16) | (0xc12c >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0xc140 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0xc150 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0xc15c >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0xc168 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0xc170 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0xc204 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0xc2b4 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0xc2b8 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xc2bc >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xc2c0 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x8228 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x829c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x869c >> 2),
607 	0x00000000,
608 	(0x0600 << 16) | (0x98f4 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x98f8 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x9900 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc260 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x90e8 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x3c000 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x3c00c >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x8c1c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x9700 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xcd20 >> 2),
627 	0x00000000,
628 	(0x4e00 << 16) | (0xcd20 >> 2),
629 	0x00000000,
630 	(0x5e00 << 16) | (0xcd20 >> 2),
631 	0x00000000,
632 	(0x6e00 << 16) | (0xcd20 >> 2),
633 	0x00000000,
634 	(0x7e00 << 16) | (0xcd20 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x89bc >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x8900 >> 2),
639 	0x00000000,
640 	0x3,
641 	(0x0e00 << 16) | (0xc130 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0xc134 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0xc1fc >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0xc208 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0xc264 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0xc268 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0xc26c >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0xc270 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0xc274 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0xc28c >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0xc290 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xc294 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc298 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc2a0 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc2a4 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc2a8 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc2ac >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x301d0 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x30238 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x30250 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x30254 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x30258 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x3025c >> 2),
686 	0x00000000,
687 	(0x4e00 << 16) | (0xc900 >> 2),
688 	0x00000000,
689 	(0x5e00 << 16) | (0xc900 >> 2),
690 	0x00000000,
691 	(0x6e00 << 16) | (0xc900 >> 2),
692 	0x00000000,
693 	(0x7e00 << 16) | (0xc900 >> 2),
694 	0x00000000,
695 	(0x4e00 << 16) | (0xc904 >> 2),
696 	0x00000000,
697 	(0x5e00 << 16) | (0xc904 >> 2),
698 	0x00000000,
699 	(0x6e00 << 16) | (0xc904 >> 2),
700 	0x00000000,
701 	(0x7e00 << 16) | (0xc904 >> 2),
702 	0x00000000,
703 	(0x4e00 << 16) | (0xc908 >> 2),
704 	0x00000000,
705 	(0x5e00 << 16) | (0xc908 >> 2),
706 	0x00000000,
707 	(0x6e00 << 16) | (0xc908 >> 2),
708 	0x00000000,
709 	(0x7e00 << 16) | (0xc908 >> 2),
710 	0x00000000,
711 	(0x4e00 << 16) | (0xc90c >> 2),
712 	0x00000000,
713 	(0x5e00 << 16) | (0xc90c >> 2),
714 	0x00000000,
715 	(0x6e00 << 16) | (0xc90c >> 2),
716 	0x00000000,
717 	(0x7e00 << 16) | (0xc90c >> 2),
718 	0x00000000,
719 	(0x4e00 << 16) | (0xc910 >> 2),
720 	0x00000000,
721 	(0x5e00 << 16) | (0xc910 >> 2),
722 	0x00000000,
723 	(0x6e00 << 16) | (0xc910 >> 2),
724 	0x00000000,
725 	(0x7e00 << 16) | (0xc910 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc99c >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0x9834 >> 2),
730 	0x00000000,
731 	(0x0000 << 16) | (0x30f00 >> 2),
732 	0x00000000,
733 	(0x0000 << 16) | (0x30f04 >> 2),
734 	0x00000000,
735 	(0x0000 << 16) | (0x30f08 >> 2),
736 	0x00000000,
737 	(0x0000 << 16) | (0x30f0c >> 2),
738 	0x00000000,
739 	(0x0600 << 16) | (0x9b7c >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x8a14 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x8a18 >> 2),
744 	0x00000000,
745 	(0x0600 << 16) | (0x30a00 >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0x8bf0 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x8bcc >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x8b24 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x30a04 >> 2),
754 	0x00000000,
755 	(0x0600 << 16) | (0x30a10 >> 2),
756 	0x00000000,
757 	(0x0600 << 16) | (0x30a14 >> 2),
758 	0x00000000,
759 	(0x0600 << 16) | (0x30a18 >> 2),
760 	0x00000000,
761 	(0x0600 << 16) | (0x30a2c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0xc700 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0xc704 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0xc708 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0xc768 >> 2),
770 	0x00000000,
771 	(0x0400 << 16) | (0xc770 >> 2),
772 	0x00000000,
773 	(0x0400 << 16) | (0xc774 >> 2),
774 	0x00000000,
775 	(0x0400 << 16) | (0xc798 >> 2),
776 	0x00000000,
777 	(0x0400 << 16) | (0xc79c >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x9100 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0x3c010 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0x8c00 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0x8c04 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0x8c20 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0x8c38 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0x8c3c >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xae00 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0x9604 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xac08 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xac0c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xac10 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xac14 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xac58 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xac68 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xac6c >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xac70 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xac74 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xac78 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0xac7c >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac80 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac84 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac88 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac8c >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x970c >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0x9714 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0x9718 >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0x971c >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0x31068 >> 2),
836 	0x00000000,
837 	(0x4e00 << 16) | (0x31068 >> 2),
838 	0x00000000,
839 	(0x5e00 << 16) | (0x31068 >> 2),
840 	0x00000000,
841 	(0x6e00 << 16) | (0x31068 >> 2),
842 	0x00000000,
843 	(0x7e00 << 16) | (0x31068 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xcd10 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xcd14 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x88b0 >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0x88b4 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x88b8 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x88bc >> 2),
856 	0x00000000,
857 	(0x0400 << 16) | (0x89c0 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x88c4 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x88c8 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x88d0 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x88d4 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x88d8 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x8980 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x30938 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x3093c >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x30940 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x89a0 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x30900 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x30904 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x89b4 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x3e1fc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x3c210 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x3c214 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x3c218 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x8904 >> 2),
894 	0x00000000,
895 	0x5,
896 	(0x0e00 << 16) | (0x8c28 >> 2),
897 	(0x0e00 << 16) | (0x8c2c >> 2),
898 	(0x0e00 << 16) | (0x8c30 >> 2),
899 	(0x0e00 << 16) | (0x8c34 >> 2),
900 	(0x0e00 << 16) | (0x9600 >> 2),
901 };
902 
903 static const u32 bonaire_golden_spm_registers[] =
904 {
905 	0x30800, 0xe0ffffff, 0xe0000000
906 };
907 
908 static const u32 bonaire_golden_common_registers[] =
909 {
910 	0xc770, 0xffffffff, 0x00000800,
911 	0xc774, 0xffffffff, 0x00000800,
912 	0xc798, 0xffffffff, 0x00007fbf,
913 	0xc79c, 0xffffffff, 0x00007faf
914 };
915 
916 static const u32 bonaire_golden_registers[] =
917 {
918 	0x3354, 0x00000333, 0x00000333,
919 	0x3350, 0x000c0fc0, 0x00040200,
920 	0x9a10, 0x00010000, 0x00058208,
921 	0x3c000, 0xffff1fff, 0x00140000,
922 	0x3c200, 0xfdfc0fff, 0x00000100,
923 	0x3c234, 0x40000000, 0x40000200,
924 	0x9830, 0xffffffff, 0x00000000,
925 	0x9834, 0xf00fffff, 0x00000400,
926 	0x9838, 0x0002021c, 0x00020200,
927 	0xc78, 0x00000080, 0x00000000,
928 	0x5bb0, 0x000000f0, 0x00000070,
929 	0x5bc0, 0xf0311fff, 0x80300000,
930 	0x98f8, 0x73773777, 0x12010001,
931 	0x350c, 0x00810000, 0x408af000,
932 	0x7030, 0x31000111, 0x00000011,
933 	0x2f48, 0x73773777, 0x12010001,
934 	0x220c, 0x00007fb6, 0x0021a1b1,
935 	0x2210, 0x00007fb6, 0x002021b1,
936 	0x2180, 0x00007fb6, 0x00002191,
937 	0x2218, 0x00007fb6, 0x002121b1,
938 	0x221c, 0x00007fb6, 0x002021b1,
939 	0x21dc, 0x00007fb6, 0x00002191,
940 	0x21e0, 0x00007fb6, 0x00002191,
941 	0x3628, 0x0000003f, 0x0000000a,
942 	0x362c, 0x0000003f, 0x0000000a,
943 	0x2ae4, 0x00073ffe, 0x000022a2,
944 	0x240c, 0x000007ff, 0x00000000,
945 	0x8a14, 0xf000003f, 0x00000007,
946 	0x8bf0, 0x00002001, 0x00000001,
947 	0x8b24, 0xffffffff, 0x00ffffff,
948 	0x30a04, 0x0000ff0f, 0x00000000,
949 	0x28a4c, 0x07ffffff, 0x06000000,
950 	0x4d8, 0x00000fff, 0x00000100,
951 	0x3e78, 0x00000001, 0x00000002,
952 	0x9100, 0x03000000, 0x0362c688,
953 	0x8c00, 0x000000ff, 0x00000001,
954 	0xe40, 0x00001fff, 0x00001fff,
955 	0x9060, 0x0000007f, 0x00000020,
956 	0x9508, 0x00010000, 0x00010000,
957 	0xac14, 0x000003ff, 0x000000f3,
958 	0xac0c, 0xffffffff, 0x00001032
959 };
960 
961 static const u32 bonaire_mgcg_cgcg_init[] =
962 {
963 	0xc420, 0xffffffff, 0xfffffffc,
964 	0x30800, 0xffffffff, 0xe0000000,
965 	0x3c2a0, 0xffffffff, 0x00000100,
966 	0x3c208, 0xffffffff, 0x00000100,
967 	0x3c2c0, 0xffffffff, 0xc0000100,
968 	0x3c2c8, 0xffffffff, 0xc0000100,
969 	0x3c2c4, 0xffffffff, 0xc0000100,
970 	0x55e4, 0xffffffff, 0x00600100,
971 	0x3c280, 0xffffffff, 0x00000100,
972 	0x3c214, 0xffffffff, 0x06000100,
973 	0x3c220, 0xffffffff, 0x00000100,
974 	0x3c218, 0xffffffff, 0x06000100,
975 	0x3c204, 0xffffffff, 0x00000100,
976 	0x3c2e0, 0xffffffff, 0x00000100,
977 	0x3c224, 0xffffffff, 0x00000100,
978 	0x3c200, 0xffffffff, 0x00000100,
979 	0x3c230, 0xffffffff, 0x00000100,
980 	0x3c234, 0xffffffff, 0x00000100,
981 	0x3c250, 0xffffffff, 0x00000100,
982 	0x3c254, 0xffffffff, 0x00000100,
983 	0x3c258, 0xffffffff, 0x00000100,
984 	0x3c25c, 0xffffffff, 0x00000100,
985 	0x3c260, 0xffffffff, 0x00000100,
986 	0x3c27c, 0xffffffff, 0x00000100,
987 	0x3c278, 0xffffffff, 0x00000100,
988 	0x3c210, 0xffffffff, 0x06000100,
989 	0x3c290, 0xffffffff, 0x00000100,
990 	0x3c274, 0xffffffff, 0x00000100,
991 	0x3c2b4, 0xffffffff, 0x00000100,
992 	0x3c2b0, 0xffffffff, 0x00000100,
993 	0x3c270, 0xffffffff, 0x00000100,
994 	0x30800, 0xffffffff, 0xe0000000,
995 	0x3c020, 0xffffffff, 0x00010000,
996 	0x3c024, 0xffffffff, 0x00030002,
997 	0x3c028, 0xffffffff, 0x00040007,
998 	0x3c02c, 0xffffffff, 0x00060005,
999 	0x3c030, 0xffffffff, 0x00090008,
1000 	0x3c034, 0xffffffff, 0x00010000,
1001 	0x3c038, 0xffffffff, 0x00030002,
1002 	0x3c03c, 0xffffffff, 0x00040007,
1003 	0x3c040, 0xffffffff, 0x00060005,
1004 	0x3c044, 0xffffffff, 0x00090008,
1005 	0x3c048, 0xffffffff, 0x00010000,
1006 	0x3c04c, 0xffffffff, 0x00030002,
1007 	0x3c050, 0xffffffff, 0x00040007,
1008 	0x3c054, 0xffffffff, 0x00060005,
1009 	0x3c058, 0xffffffff, 0x00090008,
1010 	0x3c05c, 0xffffffff, 0x00010000,
1011 	0x3c060, 0xffffffff, 0x00030002,
1012 	0x3c064, 0xffffffff, 0x00040007,
1013 	0x3c068, 0xffffffff, 0x00060005,
1014 	0x3c06c, 0xffffffff, 0x00090008,
1015 	0x3c070, 0xffffffff, 0x00010000,
1016 	0x3c074, 0xffffffff, 0x00030002,
1017 	0x3c078, 0xffffffff, 0x00040007,
1018 	0x3c07c, 0xffffffff, 0x00060005,
1019 	0x3c080, 0xffffffff, 0x00090008,
1020 	0x3c084, 0xffffffff, 0x00010000,
1021 	0x3c088, 0xffffffff, 0x00030002,
1022 	0x3c08c, 0xffffffff, 0x00040007,
1023 	0x3c090, 0xffffffff, 0x00060005,
1024 	0x3c094, 0xffffffff, 0x00090008,
1025 	0x3c098, 0xffffffff, 0x00010000,
1026 	0x3c09c, 0xffffffff, 0x00030002,
1027 	0x3c0a0, 0xffffffff, 0x00040007,
1028 	0x3c0a4, 0xffffffff, 0x00060005,
1029 	0x3c0a8, 0xffffffff, 0x00090008,
1030 	0x3c000, 0xffffffff, 0x96e00200,
1031 	0x8708, 0xffffffff, 0x00900100,
1032 	0xc424, 0xffffffff, 0x0020003f,
1033 	0x38, 0xffffffff, 0x0140001c,
1034 	0x3c, 0x000f0000, 0x000f0000,
1035 	0x220, 0xffffffff, 0xC060000C,
1036 	0x224, 0xc0000fff, 0x00000100,
1037 	0xf90, 0xffffffff, 0x00000100,
1038 	0xf98, 0x00000101, 0x00000000,
1039 	0x20a8, 0xffffffff, 0x00000104,
1040 	0x55e4, 0xff000fff, 0x00000100,
1041 	0x30cc, 0xc0000fff, 0x00000104,
1042 	0xc1e4, 0x00000001, 0x00000001,
1043 	0xd00c, 0xff000ff0, 0x00000100,
1044 	0xd80c, 0xff000ff0, 0x00000100
1045 };
1046 
1047 static const u32 spectre_golden_spm_registers[] =
1048 {
1049 	0x30800, 0xe0ffffff, 0xe0000000
1050 };
1051 
1052 static const u32 spectre_golden_common_registers[] =
1053 {
1054 	0xc770, 0xffffffff, 0x00000800,
1055 	0xc774, 0xffffffff, 0x00000800,
1056 	0xc798, 0xffffffff, 0x00007fbf,
1057 	0xc79c, 0xffffffff, 0x00007faf
1058 };
1059 
1060 static const u32 spectre_golden_registers[] =
1061 {
1062 	0x3c000, 0xffff1fff, 0x96940200,
1063 	0x3c00c, 0xffff0001, 0xff000000,
1064 	0x3c200, 0xfffc0fff, 0x00000100,
1065 	0x6ed8, 0x00010101, 0x00010000,
1066 	0x9834, 0xf00fffff, 0x00000400,
1067 	0x9838, 0xfffffffc, 0x00020200,
1068 	0x5bb0, 0x000000f0, 0x00000070,
1069 	0x5bc0, 0xf0311fff, 0x80300000,
1070 	0x98f8, 0x73773777, 0x12010001,
1071 	0x9b7c, 0x00ff0000, 0x00fc0000,
1072 	0x2f48, 0x73773777, 0x12010001,
1073 	0x8a14, 0xf000003f, 0x00000007,
1074 	0x8b24, 0xffffffff, 0x00ffffff,
1075 	0x28350, 0x3f3f3fff, 0x00000082,
1076 	0x28355, 0x0000003f, 0x00000000,
1077 	0x3e78, 0x00000001, 0x00000002,
1078 	0x913c, 0xffff03df, 0x00000004,
1079 	0xc768, 0x00000008, 0x00000008,
1080 	0x8c00, 0x000008ff, 0x00000800,
1081 	0x9508, 0x00010000, 0x00010000,
1082 	0xac0c, 0xffffffff, 0x54763210,
1083 	0x214f8, 0x01ff01ff, 0x00000002,
1084 	0x21498, 0x007ff800, 0x00200000,
1085 	0x2015c, 0xffffffff, 0x00000f40,
1086 	0x30934, 0xffffffff, 0x00000001
1087 };
1088 
1089 static const u32 spectre_mgcg_cgcg_init[] =
1090 {
1091 	0xc420, 0xffffffff, 0xfffffffc,
1092 	0x30800, 0xffffffff, 0xe0000000,
1093 	0x3c2a0, 0xffffffff, 0x00000100,
1094 	0x3c208, 0xffffffff, 0x00000100,
1095 	0x3c2c0, 0xffffffff, 0x00000100,
1096 	0x3c2c8, 0xffffffff, 0x00000100,
1097 	0x3c2c4, 0xffffffff, 0x00000100,
1098 	0x55e4, 0xffffffff, 0x00600100,
1099 	0x3c280, 0xffffffff, 0x00000100,
1100 	0x3c214, 0xffffffff, 0x06000100,
1101 	0x3c220, 0xffffffff, 0x00000100,
1102 	0x3c218, 0xffffffff, 0x06000100,
1103 	0x3c204, 0xffffffff, 0x00000100,
1104 	0x3c2e0, 0xffffffff, 0x00000100,
1105 	0x3c224, 0xffffffff, 0x00000100,
1106 	0x3c200, 0xffffffff, 0x00000100,
1107 	0x3c230, 0xffffffff, 0x00000100,
1108 	0x3c234, 0xffffffff, 0x00000100,
1109 	0x3c250, 0xffffffff, 0x00000100,
1110 	0x3c254, 0xffffffff, 0x00000100,
1111 	0x3c258, 0xffffffff, 0x00000100,
1112 	0x3c25c, 0xffffffff, 0x00000100,
1113 	0x3c260, 0xffffffff, 0x00000100,
1114 	0x3c27c, 0xffffffff, 0x00000100,
1115 	0x3c278, 0xffffffff, 0x00000100,
1116 	0x3c210, 0xffffffff, 0x06000100,
1117 	0x3c290, 0xffffffff, 0x00000100,
1118 	0x3c274, 0xffffffff, 0x00000100,
1119 	0x3c2b4, 0xffffffff, 0x00000100,
1120 	0x3c2b0, 0xffffffff, 0x00000100,
1121 	0x3c270, 0xffffffff, 0x00000100,
1122 	0x30800, 0xffffffff, 0xe0000000,
1123 	0x3c020, 0xffffffff, 0x00010000,
1124 	0x3c024, 0xffffffff, 0x00030002,
1125 	0x3c028, 0xffffffff, 0x00040007,
1126 	0x3c02c, 0xffffffff, 0x00060005,
1127 	0x3c030, 0xffffffff, 0x00090008,
1128 	0x3c034, 0xffffffff, 0x00010000,
1129 	0x3c038, 0xffffffff, 0x00030002,
1130 	0x3c03c, 0xffffffff, 0x00040007,
1131 	0x3c040, 0xffffffff, 0x00060005,
1132 	0x3c044, 0xffffffff, 0x00090008,
1133 	0x3c048, 0xffffffff, 0x00010000,
1134 	0x3c04c, 0xffffffff, 0x00030002,
1135 	0x3c050, 0xffffffff, 0x00040007,
1136 	0x3c054, 0xffffffff, 0x00060005,
1137 	0x3c058, 0xffffffff, 0x00090008,
1138 	0x3c05c, 0xffffffff, 0x00010000,
1139 	0x3c060, 0xffffffff, 0x00030002,
1140 	0x3c064, 0xffffffff, 0x00040007,
1141 	0x3c068, 0xffffffff, 0x00060005,
1142 	0x3c06c, 0xffffffff, 0x00090008,
1143 	0x3c070, 0xffffffff, 0x00010000,
1144 	0x3c074, 0xffffffff, 0x00030002,
1145 	0x3c078, 0xffffffff, 0x00040007,
1146 	0x3c07c, 0xffffffff, 0x00060005,
1147 	0x3c080, 0xffffffff, 0x00090008,
1148 	0x3c084, 0xffffffff, 0x00010000,
1149 	0x3c088, 0xffffffff, 0x00030002,
1150 	0x3c08c, 0xffffffff, 0x00040007,
1151 	0x3c090, 0xffffffff, 0x00060005,
1152 	0x3c094, 0xffffffff, 0x00090008,
1153 	0x3c098, 0xffffffff, 0x00010000,
1154 	0x3c09c, 0xffffffff, 0x00030002,
1155 	0x3c0a0, 0xffffffff, 0x00040007,
1156 	0x3c0a4, 0xffffffff, 0x00060005,
1157 	0x3c0a8, 0xffffffff, 0x00090008,
1158 	0x3c0ac, 0xffffffff, 0x00010000,
1159 	0x3c0b0, 0xffffffff, 0x00030002,
1160 	0x3c0b4, 0xffffffff, 0x00040007,
1161 	0x3c0b8, 0xffffffff, 0x00060005,
1162 	0x3c0bc, 0xffffffff, 0x00090008,
1163 	0x3c000, 0xffffffff, 0x96e00200,
1164 	0x8708, 0xffffffff, 0x00900100,
1165 	0xc424, 0xffffffff, 0x0020003f,
1166 	0x38, 0xffffffff, 0x0140001c,
1167 	0x3c, 0x000f0000, 0x000f0000,
1168 	0x220, 0xffffffff, 0xC060000C,
1169 	0x224, 0xc0000fff, 0x00000100,
1170 	0xf90, 0xffffffff, 0x00000100,
1171 	0xf98, 0x00000101, 0x00000000,
1172 	0x20a8, 0xffffffff, 0x00000104,
1173 	0x55e4, 0xff000fff, 0x00000100,
1174 	0x30cc, 0xc0000fff, 0x00000104,
1175 	0xc1e4, 0x00000001, 0x00000001,
1176 	0xd00c, 0xff000ff0, 0x00000100,
1177 	0xd80c, 0xff000ff0, 0x00000100
1178 };
1179 
1180 static const u32 kalindi_golden_spm_registers[] =
1181 {
1182 	0x30800, 0xe0ffffff, 0xe0000000
1183 };
1184 
1185 static const u32 kalindi_golden_common_registers[] =
1186 {
1187 	0xc770, 0xffffffff, 0x00000800,
1188 	0xc774, 0xffffffff, 0x00000800,
1189 	0xc798, 0xffffffff, 0x00007fbf,
1190 	0xc79c, 0xffffffff, 0x00007faf
1191 };
1192 
1193 static const u32 kalindi_golden_registers[] =
1194 {
1195 	0x3c000, 0xffffdfff, 0x6e944040,
1196 	0x55e4, 0xff607fff, 0xfc000100,
1197 	0x3c220, 0xff000fff, 0x00000100,
1198 	0x3c224, 0xff000fff, 0x00000100,
1199 	0x3c200, 0xfffc0fff, 0x00000100,
1200 	0x6ed8, 0x00010101, 0x00010000,
1201 	0x9830, 0xffffffff, 0x00000000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x5bb0, 0x000000f0, 0x00000070,
1204 	0x5bc0, 0xf0311fff, 0x80300000,
1205 	0x98f8, 0x73773777, 0x12010001,
1206 	0x98fc, 0xffffffff, 0x00000010,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x8030, 0x00001f0f, 0x0000100a,
1209 	0x2f48, 0x73773777, 0x12010001,
1210 	0x2408, 0x000fffff, 0x000c007f,
1211 	0x8a14, 0xf000003f, 0x00000007,
1212 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1213 	0x30a04, 0x0000ff0f, 0x00000000,
1214 	0x28a4c, 0x07ffffff, 0x06000000,
1215 	0x4d8, 0x00000fff, 0x00000100,
1216 	0x3e78, 0x00000001, 0x00000002,
1217 	0xc768, 0x00000008, 0x00000008,
1218 	0x8c00, 0x000000ff, 0x00000003,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x88c4, 0x001f3ae3, 0x00000082,
1223 	0x88d4, 0x0000001f, 0x00000010,
1224 	0x30934, 0xffffffff, 0x00000000
1225 };
1226 
1227 static const u32 kalindi_mgcg_cgcg_init[] =
1228 {
1229 	0xc420, 0xffffffff, 0xfffffffc,
1230 	0x30800, 0xffffffff, 0xe0000000,
1231 	0x3c2a0, 0xffffffff, 0x00000100,
1232 	0x3c208, 0xffffffff, 0x00000100,
1233 	0x3c2c0, 0xffffffff, 0x00000100,
1234 	0x3c2c8, 0xffffffff, 0x00000100,
1235 	0x3c2c4, 0xffffffff, 0x00000100,
1236 	0x55e4, 0xffffffff, 0x00600100,
1237 	0x3c280, 0xffffffff, 0x00000100,
1238 	0x3c214, 0xffffffff, 0x06000100,
1239 	0x3c220, 0xffffffff, 0x00000100,
1240 	0x3c218, 0xffffffff, 0x06000100,
1241 	0x3c204, 0xffffffff, 0x00000100,
1242 	0x3c2e0, 0xffffffff, 0x00000100,
1243 	0x3c224, 0xffffffff, 0x00000100,
1244 	0x3c200, 0xffffffff, 0x00000100,
1245 	0x3c230, 0xffffffff, 0x00000100,
1246 	0x3c234, 0xffffffff, 0x00000100,
1247 	0x3c250, 0xffffffff, 0x00000100,
1248 	0x3c254, 0xffffffff, 0x00000100,
1249 	0x3c258, 0xffffffff, 0x00000100,
1250 	0x3c25c, 0xffffffff, 0x00000100,
1251 	0x3c260, 0xffffffff, 0x00000100,
1252 	0x3c27c, 0xffffffff, 0x00000100,
1253 	0x3c278, 0xffffffff, 0x00000100,
1254 	0x3c210, 0xffffffff, 0x06000100,
1255 	0x3c290, 0xffffffff, 0x00000100,
1256 	0x3c274, 0xffffffff, 0x00000100,
1257 	0x3c2b4, 0xffffffff, 0x00000100,
1258 	0x3c2b0, 0xffffffff, 0x00000100,
1259 	0x3c270, 0xffffffff, 0x00000100,
1260 	0x30800, 0xffffffff, 0xe0000000,
1261 	0x3c020, 0xffffffff, 0x00010000,
1262 	0x3c024, 0xffffffff, 0x00030002,
1263 	0x3c028, 0xffffffff, 0x00040007,
1264 	0x3c02c, 0xffffffff, 0x00060005,
1265 	0x3c030, 0xffffffff, 0x00090008,
1266 	0x3c034, 0xffffffff, 0x00010000,
1267 	0x3c038, 0xffffffff, 0x00030002,
1268 	0x3c03c, 0xffffffff, 0x00040007,
1269 	0x3c040, 0xffffffff, 0x00060005,
1270 	0x3c044, 0xffffffff, 0x00090008,
1271 	0x3c000, 0xffffffff, 0x96e00200,
1272 	0x8708, 0xffffffff, 0x00900100,
1273 	0xc424, 0xffffffff, 0x0020003f,
1274 	0x38, 0xffffffff, 0x0140001c,
1275 	0x3c, 0x000f0000, 0x000f0000,
1276 	0x220, 0xffffffff, 0xC060000C,
1277 	0x224, 0xc0000fff, 0x00000100,
1278 	0x20a8, 0xffffffff, 0x00000104,
1279 	0x55e4, 0xff000fff, 0x00000100,
1280 	0x30cc, 0xc0000fff, 0x00000104,
1281 	0xc1e4, 0x00000001, 0x00000001,
1282 	0xd00c, 0xff000ff0, 0x00000100,
1283 	0xd80c, 0xff000ff0, 0x00000100
1284 };
1285 
1286 static void cik_init_golden_registers(struct radeon_device *rdev)
1287 {
1288 	switch (rdev->family) {
1289 	case CHIP_BONAIRE:
1290 		radeon_program_register_sequence(rdev,
1291 						 bonaire_mgcg_cgcg_init,
1292 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1293 		radeon_program_register_sequence(rdev,
1294 						 bonaire_golden_registers,
1295 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1296 		radeon_program_register_sequence(rdev,
1297 						 bonaire_golden_common_registers,
1298 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1299 		radeon_program_register_sequence(rdev,
1300 						 bonaire_golden_spm_registers,
1301 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1302 		break;
1303 	case CHIP_KABINI:
1304 		radeon_program_register_sequence(rdev,
1305 						 kalindi_mgcg_cgcg_init,
1306 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1307 		radeon_program_register_sequence(rdev,
1308 						 kalindi_golden_registers,
1309 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1310 		radeon_program_register_sequence(rdev,
1311 						 kalindi_golden_common_registers,
1312 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1313 		radeon_program_register_sequence(rdev,
1314 						 kalindi_golden_spm_registers,
1315 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1316 		break;
1317 	case CHIP_KAVERI:
1318 		radeon_program_register_sequence(rdev,
1319 						 spectre_mgcg_cgcg_init,
1320 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1321 		radeon_program_register_sequence(rdev,
1322 						 spectre_golden_registers,
1323 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1324 		radeon_program_register_sequence(rdev,
1325 						 spectre_golden_common_registers,
1326 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1327 		radeon_program_register_sequence(rdev,
1328 						 spectre_golden_spm_registers,
1329 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1330 		break;
1331 	default:
1332 		break;
1333 	}
1334 }
1335 
1336 /**
1337  * cik_get_xclk - get the xclk
1338  *
1339  * @rdev: radeon_device pointer
1340  *
1341  * Returns the reference clock used by the gfx engine
1342  * (CIK).
1343  */
1344 u32 cik_get_xclk(struct radeon_device *rdev)
1345 {
1346         u32 reference_clock = rdev->clock.spll.reference_freq;
1347 
1348 	if (rdev->flags & RADEON_IS_IGP) {
1349 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1350 			return reference_clock / 2;
1351 	} else {
1352 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1353 			return reference_clock / 4;
1354 	}
1355 	return reference_clock;
1356 }
1357 
1358 /**
1359  * cik_mm_rdoorbell - read a doorbell dword
1360  *
1361  * @rdev: radeon_device pointer
1362  * @offset: byte offset into the aperture
1363  *
1364  * Returns the value in the doorbell aperture at the
1365  * requested offset (CIK).
1366  */
1367 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1368 {
1369 	if (offset < rdev->doorbell.size) {
1370 		return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
1371 	} else {
1372 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1373 		return 0;
1374 	}
1375 }
1376 
1377 /**
1378  * cik_mm_wdoorbell - write a doorbell dword
1379  *
1380  * @rdev: radeon_device pointer
1381  * @offset: byte offset into the aperture
1382  * @v: value to write
1383  *
1384  * Writes @v to the doorbell aperture at the
1385  * requested offset (CIK).
1386  */
1387 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1388 {
1389 	if (offset < rdev->doorbell.size) {
1390 		writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
1391 	} else {
1392 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1393 	}
1394 }
1395 
1396 #define BONAIRE_IO_MC_REGS_SIZE 36
1397 
1398 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1399 {
1400 	{0x00000070, 0x04400000},
1401 	{0x00000071, 0x80c01803},
1402 	{0x00000072, 0x00004004},
1403 	{0x00000073, 0x00000100},
1404 	{0x00000074, 0x00ff0000},
1405 	{0x00000075, 0x34000000},
1406 	{0x00000076, 0x08000014},
1407 	{0x00000077, 0x00cc08ec},
1408 	{0x00000078, 0x00000400},
1409 	{0x00000079, 0x00000000},
1410 	{0x0000007a, 0x04090000},
1411 	{0x0000007c, 0x00000000},
1412 	{0x0000007e, 0x4408a8e8},
1413 	{0x0000007f, 0x00000304},
1414 	{0x00000080, 0x00000000},
1415 	{0x00000082, 0x00000001},
1416 	{0x00000083, 0x00000002},
1417 	{0x00000084, 0xf3e4f400},
1418 	{0x00000085, 0x052024e3},
1419 	{0x00000087, 0x00000000},
1420 	{0x00000088, 0x01000000},
1421 	{0x0000008a, 0x1c0a0000},
1422 	{0x0000008b, 0xff010000},
1423 	{0x0000008d, 0xffffefff},
1424 	{0x0000008e, 0xfff3efff},
1425 	{0x0000008f, 0xfff3efbf},
1426 	{0x00000092, 0xf7ffffff},
1427 	{0x00000093, 0xffffff7f},
1428 	{0x00000095, 0x00101101},
1429 	{0x00000096, 0x00000fff},
1430 	{0x00000097, 0x00116fff},
1431 	{0x00000098, 0x60010000},
1432 	{0x00000099, 0x10010000},
1433 	{0x0000009a, 0x00006000},
1434 	{0x0000009b, 0x00001000},
1435 	{0x0000009f, 0x00b48000}
1436 };
1437 
1438 /**
1439  * cik_srbm_select - select specific register instances
1440  *
1441  * @rdev: radeon_device pointer
1442  * @me: selected ME (micro engine)
1443  * @pipe: pipe
1444  * @queue: queue
1445  * @vmid: VMID
1446  *
1447  * Switches the currently active registers instances.  Some
1448  * registers are instanced per VMID, others are instanced per
1449  * me/pipe/queue combination.
1450  */
1451 static void cik_srbm_select(struct radeon_device *rdev,
1452 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1453 {
1454 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1455 			     MEID(me & 0x3) |
1456 			     VMID(vmid & 0xf) |
1457 			     QUEUEID(queue & 0x7));
1458 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1459 }
1460 
1461 /* ucode loading */
1462 /**
1463  * ci_mc_load_microcode - load MC ucode into the hw
1464  *
1465  * @rdev: radeon_device pointer
1466  *
1467  * Load the GDDR MC ucode into the hw (CIK).
1468  * Returns 0 on success, error on failure.
1469  */
1470 static int ci_mc_load_microcode(struct radeon_device *rdev)
1471 {
1472 	const __be32 *fw_data;
1473 	u32 running, blackout = 0;
1474 	u32 *io_mc_regs;
1475 	int i, ucode_size, regs_size;
1476 
1477 	if (!rdev->mc_fw)
1478 		return -EINVAL;
1479 
1480 	switch (rdev->family) {
1481 	case CHIP_BONAIRE:
1482 	default:
1483 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1484 		ucode_size = CIK_MC_UCODE_SIZE;
1485 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1486 		break;
1487 	}
1488 
1489 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1490 
1491 	if (running == 0) {
1492 		if (running) {
1493 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1494 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1495 		}
1496 
1497 		/* reset the engine and set to writable */
1498 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1499 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1500 
1501 		/* load mc io regs */
1502 		for (i = 0; i < regs_size; i++) {
1503 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1504 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1505 		}
1506 		/* load the MC ucode */
1507 		fw_data = (const __be32 *)rdev->mc_fw->data;
1508 		for (i = 0; i < ucode_size; i++)
1509 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1510 
1511 		/* put the engine back into the active state */
1512 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1514 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1515 
1516 		/* wait for training to complete */
1517 		for (i = 0; i < rdev->usec_timeout; i++) {
1518 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1519 				break;
1520 			udelay(1);
1521 		}
1522 		for (i = 0; i < rdev->usec_timeout; i++) {
1523 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1524 				break;
1525 			udelay(1);
1526 		}
1527 
1528 		if (running)
1529 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1530 	}
1531 
1532 	return 0;
1533 }
1534 
1535 /**
1536  * cik_init_microcode - load ucode images from disk
1537  *
1538  * @rdev: radeon_device pointer
1539  *
1540  * Use the firmware interface to load the ucode images into
1541  * the driver (not loaded into hw).
1542  * Returns 0 on success, error on failure.
1543  */
1544 static int cik_init_microcode(struct radeon_device *rdev)
1545 {
1546 	const char *chip_name;
1547 	size_t pfp_req_size, me_req_size, ce_req_size,
1548 		mec_req_size, rlc_req_size, mc_req_size,
1549 		sdma_req_size, smc_req_size;
1550 	char fw_name[30];
1551 	int err;
1552 
1553 	DRM_DEBUG("\n");
1554 
1555 	switch (rdev->family) {
1556 	case CHIP_BONAIRE:
1557 		chip_name = "BONAIRE";
1558 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1559 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1560 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1561 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1562 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1563 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1564 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1565 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1566 		break;
1567 	case CHIP_KAVERI:
1568 		chip_name = "KAVERI";
1569 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1570 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1571 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1572 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1573 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1574 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1575 		break;
1576 	case CHIP_KABINI:
1577 		chip_name = "KABINI";
1578 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1579 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1580 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1581 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1582 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1583 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 		break;
1585 	default: BUG();
1586 	}
1587 
1588 	DRM_INFO("Loading %s Microcode\n", chip_name);
1589 
1590 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1591 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1592 	if (err)
1593 		goto out;
1594 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1595 		printk(KERN_ERR
1596 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1597 		       rdev->pfp_fw->datasize, fw_name);
1598 		err = -EINVAL;
1599 		goto out;
1600 	}
1601 
1602 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1603 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1604 	if (err)
1605 		goto out;
1606 	if (rdev->me_fw->datasize != me_req_size) {
1607 		printk(KERN_ERR
1608 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1609 		       rdev->me_fw->datasize, fw_name);
1610 		err = -EINVAL;
1611 	}
1612 
1613 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1614 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1615 	if (err)
1616 		goto out;
1617 	if (rdev->ce_fw->datasize != ce_req_size) {
1618 		printk(KERN_ERR
1619 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1620 		       rdev->ce_fw->datasize, fw_name);
1621 		err = -EINVAL;
1622 	}
1623 
1624 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
1625 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1626 	if (err)
1627 		goto out;
1628 	if (rdev->mec_fw->datasize != mec_req_size) {
1629 		printk(KERN_ERR
1630 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1631 		       rdev->mec_fw->datasize, fw_name);
1632 		err = -EINVAL;
1633 	}
1634 
1635 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1636 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1637 	if (err)
1638 		goto out;
1639 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1640 		printk(KERN_ERR
1641 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1642 		       rdev->rlc_fw->datasize, fw_name);
1643 		err = -EINVAL;
1644 	}
1645 
1646 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
1647 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1648 	if (err)
1649 		goto out;
1650 	if (rdev->sdma_fw->datasize != sdma_req_size) {
1651 		printk(KERN_ERR
1652 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1653 		       rdev->sdma_fw->datasize, fw_name);
1654 		err = -EINVAL;
1655 	}
1656 
1657 	/* No SMC, MC ucode on APUs */
1658 	if (!(rdev->flags & RADEON_IS_IGP)) {
1659 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1660 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1661 		if (err)
1662 			goto out;
1663 		if (rdev->mc_fw->datasize != mc_req_size) {
1664 			printk(KERN_ERR
1665 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1666 			       rdev->mc_fw->datasize, fw_name);
1667 			err = -EINVAL;
1668 		}
1669 
1670 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1671 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1672 		if (err) {
1673 			printk(KERN_ERR
1674 			       "smc: error loading firmware \"%s\"\n",
1675 			       fw_name);
1676 			release_firmware(rdev->smc_fw);
1677 			rdev->smc_fw = NULL;
1678 			err = 0;
1679 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1680 			printk(KERN_ERR
1681 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1682 			       rdev->smc_fw->datasize, fw_name);
1683 			err = -EINVAL;
1684 		}
1685 	}
1686 
1687 out:
1688 	if (err) {
1689 		if (err != -EINVAL)
1690 			printk(KERN_ERR
1691 			       "cik_cp: Failed to load firmware \"%s\"\n",
1692 			       fw_name);
1693 		release_firmware(rdev->pfp_fw);
1694 		rdev->pfp_fw = NULL;
1695 		release_firmware(rdev->me_fw);
1696 		rdev->me_fw = NULL;
1697 		release_firmware(rdev->ce_fw);
1698 		rdev->ce_fw = NULL;
1699 		release_firmware(rdev->mec_fw);
1700 		rdev->mec_fw = NULL;
1701 		release_firmware(rdev->rlc_fw);
1702 		rdev->rlc_fw = NULL;
1703 		release_firmware(rdev->sdma_fw);
1704 		rdev->sdma_fw = NULL;
1705 		release_firmware(rdev->mc_fw);
1706 		rdev->mc_fw = NULL;
1707 		release_firmware(rdev->smc_fw);
1708 		rdev->smc_fw = NULL;
1709 	}
1710 	return err;
1711 }
1712 
1713 /*
1714  * Core functions
1715  */
1716 /**
1717  * cik_tiling_mode_table_init - init the hw tiling table
1718  *
1719  * @rdev: radeon_device pointer
1720  *
1721  * Starting with SI, the tiling setup is done globally in a
1722  * set of 32 tiling modes.  Rather than selecting each set of
1723  * parameters per surface as on older asics, we just select
1724  * which index in the tiling table we want to use, and the
1725  * surface uses those parameters (CIK).
1726  */
1727 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1728 {
1729 	const u32 num_tile_mode_states = 32;
1730 	const u32 num_secondary_tile_mode_states = 16;
1731 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1732 	u32 num_pipe_configs;
1733 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1734 		rdev->config.cik.max_shader_engines;
1735 
1736 	switch (rdev->config.cik.mem_row_size_in_kb) {
1737 	case 1:
1738 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1739 		break;
1740 	case 2:
1741 	default:
1742 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1743 		break;
1744 	case 4:
1745 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1746 		break;
1747 	}
1748 
1749 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1750 	if (num_pipe_configs > 8)
1751 		num_pipe_configs = 8; /* ??? */
1752 
1753 	if (num_pipe_configs == 8) {
1754 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1755 			switch (reg_offset) {
1756 			case 0:
1757 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1758 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1759 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1760 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1761 				break;
1762 			case 1:
1763 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1764 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1765 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1766 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1767 				break;
1768 			case 2:
1769 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1773 				break;
1774 			case 3:
1775 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1779 				break;
1780 			case 4:
1781 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784 						 TILE_SPLIT(split_equal_to_row_size));
1785 				break;
1786 			case 5:
1787 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1788 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1789 				break;
1790 			case 6:
1791 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1792 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1793 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1794 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1795 				break;
1796 			case 7:
1797 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1798 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1799 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1800 						 TILE_SPLIT(split_equal_to_row_size));
1801 				break;
1802 			case 8:
1803 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1804 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1805 				break;
1806 			case 9:
1807 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1808 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1809 				break;
1810 			case 10:
1811 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1812 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1813 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1814 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1815 				break;
1816 			case 11:
1817 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1818 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1819 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1820 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1821 				break;
1822 			case 12:
1823 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1824 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827 				break;
1828 			case 13:
1829 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1830 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1831 				break;
1832 			case 14:
1833 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1834 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1835 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1836 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1837 				break;
1838 			case 16:
1839 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1840 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1841 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1843 				break;
1844 			case 17:
1845 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1846 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849 				break;
1850 			case 27:
1851 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1852 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1853 				break;
1854 			case 28:
1855 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1856 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1857 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1858 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1859 				break;
1860 			case 29:
1861 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1862 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1863 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1864 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1865 				break;
1866 			case 30:
1867 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1868 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 				break;
1872 			default:
1873 				gb_tile_moden = 0;
1874 				break;
1875 			}
1876 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1877 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1878 		}
1879 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1880 			switch (reg_offset) {
1881 			case 0:
1882 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1883 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1884 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1885 						 NUM_BANKS(ADDR_SURF_16_BANK));
1886 				break;
1887 			case 1:
1888 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1889 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1890 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1891 						 NUM_BANKS(ADDR_SURF_16_BANK));
1892 				break;
1893 			case 2:
1894 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1897 						 NUM_BANKS(ADDR_SURF_16_BANK));
1898 				break;
1899 			case 3:
1900 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1902 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903 						 NUM_BANKS(ADDR_SURF_16_BANK));
1904 				break;
1905 			case 4:
1906 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1909 						 NUM_BANKS(ADDR_SURF_8_BANK));
1910 				break;
1911 			case 5:
1912 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1915 						 NUM_BANKS(ADDR_SURF_4_BANK));
1916 				break;
1917 			case 6:
1918 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921 						 NUM_BANKS(ADDR_SURF_2_BANK));
1922 				break;
1923 			case 8:
1924 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1926 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1927 						 NUM_BANKS(ADDR_SURF_16_BANK));
1928 				break;
1929 			case 9:
1930 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1932 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1933 						 NUM_BANKS(ADDR_SURF_16_BANK));
1934 				break;
1935 			case 10:
1936 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1938 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1939 						 NUM_BANKS(ADDR_SURF_16_BANK));
1940 				break;
1941 			case 11:
1942 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1944 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1945 						 NUM_BANKS(ADDR_SURF_16_BANK));
1946 				break;
1947 			case 12:
1948 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1950 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1951 						 NUM_BANKS(ADDR_SURF_8_BANK));
1952 				break;
1953 			case 13:
1954 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1957 						 NUM_BANKS(ADDR_SURF_4_BANK));
1958 				break;
1959 			case 14:
1960 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963 						 NUM_BANKS(ADDR_SURF_2_BANK));
1964 				break;
1965 			default:
1966 				gb_tile_moden = 0;
1967 				break;
1968 			}
1969 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1970 		}
1971 	} else if (num_pipe_configs == 4) {
1972 		if (num_rbs == 4) {
1973 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1974 				switch (reg_offset) {
1975 				case 0:
1976 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1977 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1978 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1979 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1980 					break;
1981 				case 1:
1982 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1984 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1985 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1986 					break;
1987 				case 2:
1988 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1992 					break;
1993 				case 3:
1994 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1998 					break;
1999 				case 4:
2000 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003 							 TILE_SPLIT(split_equal_to_row_size));
2004 					break;
2005 				case 5:
2006 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2007 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2008 					break;
2009 				case 6:
2010 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2011 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2012 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2013 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2014 					break;
2015 				case 7:
2016 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2017 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2018 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2019 							 TILE_SPLIT(split_equal_to_row_size));
2020 					break;
2021 				case 8:
2022 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2023 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2024 					break;
2025 				case 9:
2026 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2028 					break;
2029 				case 10:
2030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2032 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2034 					break;
2035 				case 11:
2036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2040 					break;
2041 				case 12:
2042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2046 					break;
2047 				case 13:
2048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2049 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2050 					break;
2051 				case 14:
2052 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2054 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2055 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056 					break;
2057 				case 16:
2058 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2060 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2061 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2062 					break;
2063 				case 17:
2064 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2065 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068 					break;
2069 				case 27:
2070 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2071 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2072 					break;
2073 				case 28:
2074 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2075 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2076 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2077 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078 					break;
2079 				case 29:
2080 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2082 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2083 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2084 					break;
2085 				case 30:
2086 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 					break;
2091 				default:
2092 					gb_tile_moden = 0;
2093 					break;
2094 				}
2095 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2096 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2097 			}
2098 		} else if (num_rbs < 4) {
2099 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2100 				switch (reg_offset) {
2101 				case 0:
2102 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2104 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2105 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2106 					break;
2107 				case 1:
2108 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2109 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2110 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2111 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2112 					break;
2113 				case 2:
2114 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2118 					break;
2119 				case 3:
2120 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2124 					break;
2125 				case 4:
2126 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129 							 TILE_SPLIT(split_equal_to_row_size));
2130 					break;
2131 				case 5:
2132 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2133 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134 					break;
2135 				case 6:
2136 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2137 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2138 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2139 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2140 					break;
2141 				case 7:
2142 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2143 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2144 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2145 							 TILE_SPLIT(split_equal_to_row_size));
2146 					break;
2147 				case 8:
2148 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2149 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2150 					break;
2151 				case 9:
2152 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2154 					break;
2155 				case 10:
2156 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2158 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2159 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160 					break;
2161 				case 11:
2162 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2164 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2165 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2166 					break;
2167 				case 12:
2168 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2169 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 					break;
2173 				case 13:
2174 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2176 					break;
2177 				case 14:
2178 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2181 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 					break;
2183 				case 16:
2184 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2185 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2186 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2187 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188 					break;
2189 				case 17:
2190 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2191 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194 					break;
2195 				case 27:
2196 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2197 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2198 					break;
2199 				case 28:
2200 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2201 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2202 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2203 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204 					break;
2205 				case 29:
2206 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2208 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2209 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2210 					break;
2211 				case 30:
2212 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216 					break;
2217 				default:
2218 					gb_tile_moden = 0;
2219 					break;
2220 				}
2221 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2222 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2223 			}
2224 		}
2225 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2226 			switch (reg_offset) {
2227 			case 0:
2228 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2229 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2230 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2231 						 NUM_BANKS(ADDR_SURF_16_BANK));
2232 				break;
2233 			case 1:
2234 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2236 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2237 						 NUM_BANKS(ADDR_SURF_16_BANK));
2238 				break;
2239 			case 2:
2240 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2242 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243 						 NUM_BANKS(ADDR_SURF_16_BANK));
2244 				break;
2245 			case 3:
2246 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2248 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249 						 NUM_BANKS(ADDR_SURF_16_BANK));
2250 				break;
2251 			case 4:
2252 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255 						 NUM_BANKS(ADDR_SURF_16_BANK));
2256 				break;
2257 			case 5:
2258 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 						 NUM_BANKS(ADDR_SURF_8_BANK));
2262 				break;
2263 			case 6:
2264 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2267 						 NUM_BANKS(ADDR_SURF_4_BANK));
2268 				break;
2269 			case 8:
2270 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2271 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2272 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273 						 NUM_BANKS(ADDR_SURF_16_BANK));
2274 				break;
2275 			case 9:
2276 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2277 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2278 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2279 						 NUM_BANKS(ADDR_SURF_16_BANK));
2280 				break;
2281 			case 10:
2282 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 						 NUM_BANKS(ADDR_SURF_16_BANK));
2286 				break;
2287 			case 11:
2288 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2290 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 						 NUM_BANKS(ADDR_SURF_16_BANK));
2292 				break;
2293 			case 12:
2294 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2297 						 NUM_BANKS(ADDR_SURF_16_BANK));
2298 				break;
2299 			case 13:
2300 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2303 						 NUM_BANKS(ADDR_SURF_8_BANK));
2304 				break;
2305 			case 14:
2306 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2309 						 NUM_BANKS(ADDR_SURF_4_BANK));
2310 				break;
2311 			default:
2312 				gb_tile_moden = 0;
2313 				break;
2314 			}
2315 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2316 		}
2317 	} else if (num_pipe_configs == 2) {
2318 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2319 			switch (reg_offset) {
2320 			case 0:
2321 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323 						 PIPE_CONFIG(ADDR_SURF_P2) |
2324 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2325 				break;
2326 			case 1:
2327 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329 						 PIPE_CONFIG(ADDR_SURF_P2) |
2330 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2331 				break;
2332 			case 2:
2333 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335 						 PIPE_CONFIG(ADDR_SURF_P2) |
2336 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2337 				break;
2338 			case 3:
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P2) |
2342 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2343 				break;
2344 			case 4:
2345 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347 						 PIPE_CONFIG(ADDR_SURF_P2) |
2348 						 TILE_SPLIT(split_equal_to_row_size));
2349 				break;
2350 			case 5:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2352 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353 				break;
2354 			case 6:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P2) |
2358 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2359 				break;
2360 			case 7:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2362 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 						 PIPE_CONFIG(ADDR_SURF_P2) |
2364 						 TILE_SPLIT(split_equal_to_row_size));
2365 				break;
2366 			case 8:
2367 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2368 				break;
2369 			case 9:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2372 				break;
2373 			case 10:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P2) |
2377 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378 				break;
2379 			case 11:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P2) |
2383 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384 				break;
2385 			case 12:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P2) |
2389 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 				break;
2391 			case 13:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2394 				break;
2395 			case 14:
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2398 						 PIPE_CONFIG(ADDR_SURF_P2) |
2399 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 				break;
2401 			case 16:
2402 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404 						 PIPE_CONFIG(ADDR_SURF_P2) |
2405 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 				break;
2407 			case 17:
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P2) |
2411 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 				break;
2413 			case 27:
2414 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2416 				break;
2417 			case 28:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P2) |
2421 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 				break;
2423 			case 29:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2426 						 PIPE_CONFIG(ADDR_SURF_P2) |
2427 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 				break;
2429 			case 30:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P2) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			default:
2436 				gb_tile_moden = 0;
2437 				break;
2438 			}
2439 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2440 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441 		}
2442 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:
2445 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2448 						 NUM_BANKS(ADDR_SURF_16_BANK));
2449 				break;
2450 			case 1:
2451 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK));
2455 				break;
2456 			case 2:
2457 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK));
2461 				break;
2462 			case 3:
2463 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466 						 NUM_BANKS(ADDR_SURF_16_BANK));
2467 				break;
2468 			case 4:
2469 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472 						 NUM_BANKS(ADDR_SURF_16_BANK));
2473 				break;
2474 			case 5:
2475 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478 						 NUM_BANKS(ADDR_SURF_16_BANK));
2479 				break;
2480 			case 6:
2481 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 						 NUM_BANKS(ADDR_SURF_8_BANK));
2485 				break;
2486 			case 8:
2487 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2488 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2489 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK));
2491 				break;
2492 			case 9:
2493 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2494 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2495 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2496 						 NUM_BANKS(ADDR_SURF_16_BANK));
2497 				break;
2498 			case 10:
2499 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2500 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502 						 NUM_BANKS(ADDR_SURF_16_BANK));
2503 				break;
2504 			case 11:
2505 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 						 NUM_BANKS(ADDR_SURF_16_BANK));
2509 				break;
2510 			case 12:
2511 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 						 NUM_BANKS(ADDR_SURF_16_BANK));
2515 				break;
2516 			case 13:
2517 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520 						 NUM_BANKS(ADDR_SURF_16_BANK));
2521 				break;
2522 			case 14:
2523 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 						 NUM_BANKS(ADDR_SURF_8_BANK));
2527 				break;
2528 			default:
2529 				gb_tile_moden = 0;
2530 				break;
2531 			}
2532 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533 		}
2534 	} else
2535 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2536 }
2537 
2538 /**
2539  * cik_select_se_sh - select which SE, SH to address
2540  *
2541  * @rdev: radeon_device pointer
2542  * @se_num: shader engine to address
2543  * @sh_num: sh block to address
2544  *
2545  * Select which SE, SH combinations to address. Certain
2546  * registers are instanced per SE or SH.  0xffffffff means
2547  * broadcast to all SEs or SHs (CIK).
2548  */
2549 static void cik_select_se_sh(struct radeon_device *rdev,
2550 			     u32 se_num, u32 sh_num)
2551 {
2552 	u32 data = INSTANCE_BROADCAST_WRITES;
2553 
2554 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2555 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2556 	else if (se_num == 0xffffffff)
2557 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2558 	else if (sh_num == 0xffffffff)
2559 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2560 	else
2561 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2562 	WREG32(GRBM_GFX_INDEX, data);
2563 }
2564 
2565 /**
2566  * cik_create_bitmask - create a bitmask
2567  *
2568  * @bit_width: length of the mask
2569  *
2570  * create a variable length bit mask (CIK).
2571  * Returns the bitmask.
2572  */
2573 static u32 cik_create_bitmask(u32 bit_width)
2574 {
2575 	u32 i, mask = 0;
2576 
2577 	for (i = 0; i < bit_width; i++) {
2578 		mask <<= 1;
2579 		mask |= 1;
2580 	}
2581 	return mask;
2582 }
2583 
2584 /**
2585  * cik_select_se_sh - select which SE, SH to address
2586  *
2587  * @rdev: radeon_device pointer
2588  * @max_rb_num: max RBs (render backends) for the asic
2589  * @se_num: number of SEs (shader engines) for the asic
2590  * @sh_per_se: number of SH blocks per SE for the asic
2591  *
2592  * Calculates the bitmask of disabled RBs (CIK).
2593  * Returns the disabled RB bitmask.
2594  */
2595 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2596 			      u32 max_rb_num, u32 se_num,
2597 			      u32 sh_per_se)
2598 {
2599 	u32 data, mask;
2600 
2601 	data = RREG32(CC_RB_BACKEND_DISABLE);
2602 	if (data & 1)
2603 		data &= BACKEND_DISABLE_MASK;
2604 	else
2605 		data = 0;
2606 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2607 
2608 	data >>= BACKEND_DISABLE_SHIFT;
2609 
2610 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2611 
2612 	return data & mask;
2613 }
2614 
2615 /**
2616  * cik_setup_rb - setup the RBs on the asic
2617  *
2618  * @rdev: radeon_device pointer
2619  * @se_num: number of SEs (shader engines) for the asic
2620  * @sh_per_se: number of SH blocks per SE for the asic
2621  * @max_rb_num: max RBs (render backends) for the asic
2622  *
2623  * Configures per-SE/SH RB registers (CIK).
2624  */
2625 static void cik_setup_rb(struct radeon_device *rdev,
2626 			 u32 se_num, u32 sh_per_se,
2627 			 u32 max_rb_num)
2628 {
2629 	int i, j;
2630 	u32 data, mask;
2631 	u32 disabled_rbs = 0;
2632 	u32 enabled_rbs = 0;
2633 
2634 	for (i = 0; i < se_num; i++) {
2635 		for (j = 0; j < sh_per_se; j++) {
2636 			cik_select_se_sh(rdev, i, j);
2637 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2638 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2639 		}
2640 	}
2641 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2642 
2643 	mask = 1;
2644 	for (i = 0; i < max_rb_num; i++) {
2645 		if (!(disabled_rbs & mask))
2646 			enabled_rbs |= mask;
2647 		mask <<= 1;
2648 	}
2649 
2650 	for (i = 0; i < se_num; i++) {
2651 		cik_select_se_sh(rdev, i, 0xffffffff);
2652 		data = 0;
2653 		for (j = 0; j < sh_per_se; j++) {
2654 			switch (enabled_rbs & 3) {
2655 			case 1:
2656 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2657 				break;
2658 			case 2:
2659 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2660 				break;
2661 			case 3:
2662 			default:
2663 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2664 				break;
2665 			}
2666 			enabled_rbs >>= 2;
2667 		}
2668 		WREG32(PA_SC_RASTER_CONFIG, data);
2669 	}
2670 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2671 }
2672 
2673 /**
2674  * cik_gpu_init - setup the 3D engine
2675  *
2676  * @rdev: radeon_device pointer
2677  *
2678  * Configures the 3D engine and tiling configuration
2679  * registers so that the 3D engine is usable.
2680  */
2681 static void cik_gpu_init(struct radeon_device *rdev)
2682 {
2683 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2684 	u32 mc_shared_chmap, mc_arb_ramcfg;
2685 	u32 hdp_host_path_cntl;
2686 	u32 tmp;
2687 	int i, j;
2688 
2689 	switch (rdev->family) {
2690 	case CHIP_BONAIRE:
2691 		rdev->config.cik.max_shader_engines = 2;
2692 		rdev->config.cik.max_tile_pipes = 4;
2693 		rdev->config.cik.max_cu_per_sh = 7;
2694 		rdev->config.cik.max_sh_per_se = 1;
2695 		rdev->config.cik.max_backends_per_se = 2;
2696 		rdev->config.cik.max_texture_channel_caches = 4;
2697 		rdev->config.cik.max_gprs = 256;
2698 		rdev->config.cik.max_gs_threads = 32;
2699 		rdev->config.cik.max_hw_contexts = 8;
2700 
2701 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2702 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2703 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2704 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2705 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2706 		break;
2707 	case CHIP_KAVERI:
2708 		rdev->config.cik.max_shader_engines = 1;
2709 		rdev->config.cik.max_tile_pipes = 4;
2710 		if ((rdev->ddev->pci_device == 0x1304) ||
2711 		    (rdev->ddev->pci_device == 0x1305) ||
2712 		    (rdev->ddev->pci_device == 0x130C) ||
2713 		    (rdev->ddev->pci_device == 0x130F) ||
2714 		    (rdev->ddev->pci_device == 0x1310) ||
2715 		    (rdev->ddev->pci_device == 0x1311) ||
2716 		    (rdev->ddev->pci_device == 0x131C)) {
2717 			rdev->config.cik.max_cu_per_sh = 8;
2718 			rdev->config.cik.max_backends_per_se = 2;
2719 		} else if ((rdev->ddev->pci_device == 0x1309) ||
2720 			   (rdev->ddev->pci_device == 0x130A) ||
2721 			   (rdev->ddev->pci_device == 0x130D) ||
2722 			   (rdev->ddev->pci_device == 0x1313) ||
2723 			   (rdev->ddev->pci_device == 0x131D)) {
2724 			rdev->config.cik.max_cu_per_sh = 6;
2725 			rdev->config.cik.max_backends_per_se = 2;
2726 		} else if ((rdev->ddev->pci_device == 0x1306) ||
2727 			   (rdev->ddev->pci_device == 0x1307) ||
2728 			   (rdev->ddev->pci_device == 0x130B) ||
2729 			   (rdev->ddev->pci_device == 0x130E) ||
2730 			   (rdev->ddev->pci_device == 0x1315) ||
2731 			   (rdev->ddev->pci_device == 0x131B)) {
2732 			rdev->config.cik.max_cu_per_sh = 4;
2733 			rdev->config.cik.max_backends_per_se = 1;
2734 		} else {
2735 			rdev->config.cik.max_cu_per_sh = 3;
2736 			rdev->config.cik.max_backends_per_se = 1;
2737 		}
2738 		rdev->config.cik.max_sh_per_se = 1;
2739 		rdev->config.cik.max_texture_channel_caches = 4;
2740 		rdev->config.cik.max_gprs = 256;
2741 		rdev->config.cik.max_gs_threads = 16;
2742 		rdev->config.cik.max_hw_contexts = 8;
2743 
2744 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2745 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2746 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2747 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2748 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2749 		break;
2750 	case CHIP_KABINI:
2751 	default:
2752 		rdev->config.cik.max_shader_engines = 1;
2753 		rdev->config.cik.max_tile_pipes = 2;
2754 		rdev->config.cik.max_cu_per_sh = 2;
2755 		rdev->config.cik.max_sh_per_se = 1;
2756 		rdev->config.cik.max_backends_per_se = 1;
2757 		rdev->config.cik.max_texture_channel_caches = 2;
2758 		rdev->config.cik.max_gprs = 256;
2759 		rdev->config.cik.max_gs_threads = 16;
2760 		rdev->config.cik.max_hw_contexts = 8;
2761 
2762 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2763 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2764 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2765 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2766 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2767 		break;
2768 	}
2769 
2770 	/* Initialize HDP */
2771 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2772 		WREG32((0x2c14 + j), 0x00000000);
2773 		WREG32((0x2c18 + j), 0x00000000);
2774 		WREG32((0x2c1c + j), 0x00000000);
2775 		WREG32((0x2c20 + j), 0x00000000);
2776 		WREG32((0x2c24 + j), 0x00000000);
2777 	}
2778 
2779 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2780 
2781 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2782 
2783 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2784 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2785 
2786 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2787 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2788 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2789 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2790 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2791 		rdev->config.cik.mem_row_size_in_kb = 4;
2792 	/* XXX use MC settings? */
2793 	rdev->config.cik.shader_engine_tile_size = 32;
2794 	rdev->config.cik.num_gpus = 1;
2795 	rdev->config.cik.multi_gpu_tile_size = 64;
2796 
2797 	/* fix up row size */
2798 	gb_addr_config &= ~ROW_SIZE_MASK;
2799 	switch (rdev->config.cik.mem_row_size_in_kb) {
2800 	case 1:
2801 	default:
2802 		gb_addr_config |= ROW_SIZE(0);
2803 		break;
2804 	case 2:
2805 		gb_addr_config |= ROW_SIZE(1);
2806 		break;
2807 	case 4:
2808 		gb_addr_config |= ROW_SIZE(2);
2809 		break;
2810 	}
2811 
2812 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2813 	 * not have bank info, so create a custom tiling dword.
2814 	 * bits 3:0   num_pipes
2815 	 * bits 7:4   num_banks
2816 	 * bits 11:8  group_size
2817 	 * bits 15:12 row_size
2818 	 */
2819 	rdev->config.cik.tile_config = 0;
2820 	switch (rdev->config.cik.num_tile_pipes) {
2821 	case 1:
2822 		rdev->config.cik.tile_config |= (0 << 0);
2823 		break;
2824 	case 2:
2825 		rdev->config.cik.tile_config |= (1 << 0);
2826 		break;
2827 	case 4:
2828 		rdev->config.cik.tile_config |= (2 << 0);
2829 		break;
2830 	case 8:
2831 	default:
2832 		/* XXX what about 12? */
2833 		rdev->config.cik.tile_config |= (3 << 0);
2834 		break;
2835 	}
2836 	rdev->config.cik.tile_config |=
2837 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2838 	rdev->config.cik.tile_config |=
2839 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2840 	rdev->config.cik.tile_config |=
2841 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2842 
2843 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2844 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2845 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2846 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2847 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2848 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2849 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2850 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2851 
2852 	cik_tiling_mode_table_init(rdev);
2853 
2854 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2855 		     rdev->config.cik.max_sh_per_se,
2856 		     rdev->config.cik.max_backends_per_se);
2857 
2858 	/* set HW defaults for 3D engine */
2859 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2860 
2861 	WREG32(SX_DEBUG_1, 0x20);
2862 
2863 	WREG32(TA_CNTL_AUX, 0x00010000);
2864 
2865 	tmp = RREG32(SPI_CONFIG_CNTL);
2866 	tmp |= 0x03000000;
2867 	WREG32(SPI_CONFIG_CNTL, tmp);
2868 
2869 	WREG32(SQ_CONFIG, 1);
2870 
2871 	WREG32(DB_DEBUG, 0);
2872 
2873 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2874 	tmp |= 0x00000400;
2875 	WREG32(DB_DEBUG2, tmp);
2876 
2877 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2878 	tmp |= 0x00020200;
2879 	WREG32(DB_DEBUG3, tmp);
2880 
2881 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2882 	tmp |= 0x00018208;
2883 	WREG32(CB_HW_CONTROL, tmp);
2884 
2885 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2886 
2887 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2888 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2889 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2890 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2891 
2892 	WREG32(VGT_NUM_INSTANCES, 1);
2893 
2894 	WREG32(CP_PERFMON_CNTL, 0);
2895 
2896 	WREG32(SQ_CONFIG, 0);
2897 
2898 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2899 					  FORCE_EOV_MAX_REZ_CNT(255)));
2900 
2901 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2902 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2903 
2904 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2905 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2906 
2907 	tmp = RREG32(HDP_MISC_CNTL);
2908 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2909 	WREG32(HDP_MISC_CNTL, tmp);
2910 
2911 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2912 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2913 
2914 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2915 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2916 
2917 	udelay(50);
2918 }
2919 
2920 /*
2921  * GPU scratch registers helpers function.
2922  */
2923 /**
2924  * cik_scratch_init - setup driver info for CP scratch regs
2925  *
2926  * @rdev: radeon_device pointer
2927  *
2928  * Set up the number and offset of the CP scratch registers.
2929  * NOTE: use of CP scratch registers is a legacy inferface and
2930  * is not used by default on newer asics (r6xx+).  On newer asics,
2931  * memory buffers are used for fences rather than scratch regs.
2932  */
2933 static void cik_scratch_init(struct radeon_device *rdev)
2934 {
2935 	int i;
2936 
2937 	rdev->scratch.num_reg = 7;
2938 	rdev->scratch.reg_base = SCRATCH_REG0;
2939 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2940 		rdev->scratch.free[i] = true;
2941 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2942 	}
2943 }
2944 
2945 /**
2946  * cik_ring_test - basic gfx ring test
2947  *
2948  * @rdev: radeon_device pointer
2949  * @ring: radeon_ring structure holding ring information
2950  *
2951  * Allocate a scratch register and write to it using the gfx ring (CIK).
2952  * Provides a basic gfx ring test to verify that the ring is working.
2953  * Used by cik_cp_gfx_resume();
2954  * Returns 0 on success, error on failure.
2955  */
2956 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2957 {
2958 	uint32_t scratch;
2959 	uint32_t tmp = 0;
2960 	unsigned i;
2961 	int r;
2962 
2963 	r = radeon_scratch_get(rdev, &scratch);
2964 	if (r) {
2965 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2966 		return r;
2967 	}
2968 	WREG32(scratch, 0xCAFEDEAD);
2969 	r = radeon_ring_lock(rdev, ring, 3);
2970 	if (r) {
2971 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2972 		radeon_scratch_free(rdev, scratch);
2973 		return r;
2974 	}
2975 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2976 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2977 	radeon_ring_write(ring, 0xDEADBEEF);
2978 	radeon_ring_unlock_commit(rdev, ring);
2979 
2980 	for (i = 0; i < rdev->usec_timeout; i++) {
2981 		tmp = RREG32(scratch);
2982 		if (tmp == 0xDEADBEEF)
2983 			break;
2984 		DRM_UDELAY(1);
2985 	}
2986 	if (i < rdev->usec_timeout) {
2987 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2988 	} else {
2989 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2990 			  ring->idx, scratch, tmp);
2991 		r = -EINVAL;
2992 	}
2993 	radeon_scratch_free(rdev, scratch);
2994 	return r;
2995 }
2996 
2997 /**
2998  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2999  *
3000  * @rdev: radeon_device pointer
3001  * @fence: radeon fence object
3002  *
3003  * Emits a fence sequnce number on the gfx ring and flushes
3004  * GPU caches.
3005  */
3006 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3007 			     struct radeon_fence *fence)
3008 {
3009 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3010 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3011 
3012 	/* EVENT_WRITE_EOP - flush caches, send int */
3013 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3014 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3015 				 EOP_TC_ACTION_EN |
3016 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3017 				 EVENT_INDEX(5)));
3018 	radeon_ring_write(ring, addr & 0xfffffffc);
3019 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3020 	radeon_ring_write(ring, fence->seq);
3021 	radeon_ring_write(ring, 0);
3022 	/* HDP flush */
3023 	/* We should be using the new WAIT_REG_MEM special op packet here
3024 	 * but it causes the CP to hang
3025 	 */
3026 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3027 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3028 				 WRITE_DATA_DST_SEL(0)));
3029 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3030 	radeon_ring_write(ring, 0);
3031 	radeon_ring_write(ring, 0);
3032 }
3033 
3034 /**
3035  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3036  *
3037  * @rdev: radeon_device pointer
3038  * @fence: radeon fence object
3039  *
3040  * Emits a fence sequnce number on the compute ring and flushes
3041  * GPU caches.
3042  */
3043 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3044 				 struct radeon_fence *fence)
3045 {
3046 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3047 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3048 
3049 	/* RELEASE_MEM - flush caches, send int */
3050 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3051 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3052 				 EOP_TC_ACTION_EN |
3053 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3054 				 EVENT_INDEX(5)));
3055 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3056 	radeon_ring_write(ring, addr & 0xfffffffc);
3057 	radeon_ring_write(ring, upper_32_bits(addr));
3058 	radeon_ring_write(ring, fence->seq);
3059 	radeon_ring_write(ring, 0);
3060 	/* HDP flush */
3061 	/* We should be using the new WAIT_REG_MEM special op packet here
3062 	 * but it causes the CP to hang
3063 	 */
3064 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3065 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3066 				 WRITE_DATA_DST_SEL(0)));
3067 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3068 	radeon_ring_write(ring, 0);
3069 	radeon_ring_write(ring, 0);
3070 }
3071 
3072 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3073 			     struct radeon_ring *ring,
3074 			     struct radeon_semaphore *semaphore,
3075 			     bool emit_wait)
3076 {
3077 	uint64_t addr = semaphore->gpu_addr;
3078 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3079 
3080 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3081 	radeon_ring_write(ring, addr & 0xffffffff);
3082 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3083 }
3084 
3085 /*
3086  * IB stuff
3087  */
3088 /**
3089  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3090  *
3091  * @rdev: radeon_device pointer
3092  * @ib: radeon indirect buffer object
3093  *
3094  * Emits an DE (drawing engine) or CE (constant engine) IB
3095  * on the gfx ring.  IBs are usually generated by userspace
3096  * acceleration drivers and submitted to the kernel for
3097  * sheduling on the ring.  This function schedules the IB
3098  * on the gfx ring for execution by the GPU.
3099  */
3100 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3101 {
3102 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3103 	u32 header, control = INDIRECT_BUFFER_VALID;
3104 
3105 	if (ib->is_const_ib) {
3106 		/* set switch buffer packet before const IB */
3107 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3108 		radeon_ring_write(ring, 0);
3109 
3110 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3111 	} else {
3112 		u32 next_rptr;
3113 		if (ring->rptr_save_reg) {
3114 			next_rptr = ring->wptr + 3 + 4;
3115 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3116 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3117 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3118 			radeon_ring_write(ring, next_rptr);
3119 		} else if (rdev->wb.enabled) {
3120 			next_rptr = ring->wptr + 5 + 4;
3121 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3122 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3123 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3124 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3125 			radeon_ring_write(ring, next_rptr);
3126 		}
3127 
3128 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3129 	}
3130 
3131 	control |= ib->length_dw |
3132 		(ib->vm ? (ib->vm->id << 24) : 0);
3133 
3134 	radeon_ring_write(ring, header);
3135 	radeon_ring_write(ring,
3136 #ifdef __BIG_ENDIAN
3137 			  (2 << 0) |
3138 #endif
3139 			  (ib->gpu_addr & 0xFFFFFFFC));
3140 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3141 	radeon_ring_write(ring, control);
3142 }
3143 
3144 /**
3145  * cik_ib_test - basic gfx ring IB test
3146  *
3147  * @rdev: radeon_device pointer
3148  * @ring: radeon_ring structure holding ring information
3149  *
3150  * Allocate an IB and execute it on the gfx ring (CIK).
3151  * Provides a basic gfx ring test to verify that IBs are working.
3152  * Returns 0 on success, error on failure.
3153  */
3154 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3155 {
3156 	struct radeon_ib ib;
3157 	uint32_t scratch;
3158 	uint32_t tmp = 0;
3159 	unsigned i;
3160 	int r;
3161 
3162 	r = radeon_scratch_get(rdev, &scratch);
3163 	if (r) {
3164 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3165 		return r;
3166 	}
3167 	WREG32(scratch, 0xCAFEDEAD);
3168 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3169 	if (r) {
3170 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3171 		radeon_scratch_free(rdev, scratch);
3172 		return r;
3173 	}
3174 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3175 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3176 	ib.ptr[2] = 0xDEADBEEF;
3177 	ib.length_dw = 3;
3178 	r = radeon_ib_schedule(rdev, &ib, NULL);
3179 	if (r) {
3180 		radeon_scratch_free(rdev, scratch);
3181 		radeon_ib_free(rdev, &ib);
3182 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3183 		return r;
3184 	}
3185 	r = radeon_fence_wait(ib.fence, false);
3186 	if (r) {
3187 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3188 		radeon_scratch_free(rdev, scratch);
3189 		radeon_ib_free(rdev, &ib);
3190 		return r;
3191 	}
3192 	for (i = 0; i < rdev->usec_timeout; i++) {
3193 		tmp = RREG32(scratch);
3194 		if (tmp == 0xDEADBEEF)
3195 			break;
3196 		DRM_UDELAY(1);
3197 	}
3198 	if (i < rdev->usec_timeout) {
3199 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3200 	} else {
3201 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3202 			  scratch, tmp);
3203 		r = -EINVAL;
3204 	}
3205 	radeon_scratch_free(rdev, scratch);
3206 	radeon_ib_free(rdev, &ib);
3207 	return r;
3208 }
3209 
3210 /*
3211  * CP.
3212  * On CIK, gfx and compute now have independant command processors.
3213  *
3214  * GFX
3215  * Gfx consists of a single ring and can process both gfx jobs and
3216  * compute jobs.  The gfx CP consists of three microengines (ME):
3217  * PFP - Pre-Fetch Parser
3218  * ME - Micro Engine
3219  * CE - Constant Engine
3220  * The PFP and ME make up what is considered the Drawing Engine (DE).
3221  * The CE is an asynchronous engine used for updating buffer desciptors
3222  * used by the DE so that they can be loaded into cache in parallel
3223  * while the DE is processing state update packets.
3224  *
3225  * Compute
3226  * The compute CP consists of two microengines (ME):
3227  * MEC1 - Compute MicroEngine 1
3228  * MEC2 - Compute MicroEngine 2
3229  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3230  * The queues are exposed to userspace and are programmed directly
3231  * by the compute runtime.
3232  */
3233 /**
3234  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3235  *
3236  * @rdev: radeon_device pointer
3237  * @enable: enable or disable the MEs
3238  *
3239  * Halts or unhalts the gfx MEs.
3240  */
3241 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3242 {
3243 	if (enable)
3244 		WREG32(CP_ME_CNTL, 0);
3245 	else {
3246 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3247 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3248 	}
3249 	udelay(50);
3250 }
3251 
3252 /**
3253  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3254  *
3255  * @rdev: radeon_device pointer
3256  *
3257  * Loads the gfx PFP, ME, and CE ucode.
3258  * Returns 0 for success, -EINVAL if the ucode is not available.
3259  */
3260 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3261 {
3262 	const __be32 *fw_data;
3263 	int i;
3264 
3265 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3266 		return -EINVAL;
3267 
3268 	cik_cp_gfx_enable(rdev, false);
3269 
3270 	/* PFP */
3271 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3272 	WREG32(CP_PFP_UCODE_ADDR, 0);
3273 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3274 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3275 	WREG32(CP_PFP_UCODE_ADDR, 0);
3276 
3277 	/* CE */
3278 	fw_data = (const __be32 *)rdev->ce_fw->data;
3279 	WREG32(CP_CE_UCODE_ADDR, 0);
3280 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3281 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3282 	WREG32(CP_CE_UCODE_ADDR, 0);
3283 
3284 	/* ME */
3285 	fw_data = (const __be32 *)rdev->me_fw->data;
3286 	WREG32(CP_ME_RAM_WADDR, 0);
3287 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3288 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3289 	WREG32(CP_ME_RAM_WADDR, 0);
3290 
3291 	WREG32(CP_PFP_UCODE_ADDR, 0);
3292 	WREG32(CP_CE_UCODE_ADDR, 0);
3293 	WREG32(CP_ME_RAM_WADDR, 0);
3294 	WREG32(CP_ME_RAM_RADDR, 0);
3295 	return 0;
3296 }
3297 
3298 /**
3299  * cik_cp_gfx_start - start the gfx ring
3300  *
3301  * @rdev: radeon_device pointer
3302  *
3303  * Enables the ring and loads the clear state context and other
3304  * packets required to init the ring.
3305  * Returns 0 for success, error for failure.
3306  */
3307 static int cik_cp_gfx_start(struct radeon_device *rdev)
3308 {
3309 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3310 	int r, i;
3311 
3312 	/* init the CP */
3313 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3314 	WREG32(CP_ENDIAN_SWAP, 0);
3315 	WREG32(CP_DEVICE_ID, 1);
3316 
3317 	cik_cp_gfx_enable(rdev, true);
3318 
3319 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3320 	if (r) {
3321 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3322 		return r;
3323 	}
3324 
3325 	/* init the CE partitions.  CE only used for gfx on CIK */
3326 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3327 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3328 	radeon_ring_write(ring, 0xc000);
3329 	radeon_ring_write(ring, 0xc000);
3330 
3331 	/* setup clear context state */
3332 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3333 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3334 
3335 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3336 	radeon_ring_write(ring, 0x80000000);
3337 	radeon_ring_write(ring, 0x80000000);
3338 
3339 	for (i = 0; i < cik_default_size; i++)
3340 		radeon_ring_write(ring, cik_default_state[i]);
3341 
3342 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3343 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3344 
3345 	/* set clear context state */
3346 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3347 	radeon_ring_write(ring, 0);
3348 
3349 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3350 	radeon_ring_write(ring, 0x00000316);
3351 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3352 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3353 
3354 	radeon_ring_unlock_commit(rdev, ring);
3355 
3356 	return 0;
3357 }
3358 
3359 /**
3360  * cik_cp_gfx_fini - stop the gfx ring
3361  *
3362  * @rdev: radeon_device pointer
3363  *
3364  * Stop the gfx ring and tear down the driver ring
3365  * info.
3366  */
3367 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3368 {
3369 	cik_cp_gfx_enable(rdev, false);
3370 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3371 }
3372 
3373 /**
3374  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3375  *
3376  * @rdev: radeon_device pointer
3377  *
3378  * Program the location and size of the gfx ring buffer
3379  * and test it to make sure it's working.
3380  * Returns 0 for success, error for failure.
3381  */
3382 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3383 {
3384 	struct radeon_ring *ring;
3385 	u32 tmp;
3386 	u32 rb_bufsz;
3387 	u64 rb_addr;
3388 	int r;
3389 
3390 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3391 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3392 
3393 	/* Set the write pointer delay */
3394 	WREG32(CP_RB_WPTR_DELAY, 0);
3395 
3396 	/* set the RB to use vmid 0 */
3397 	WREG32(CP_RB_VMID, 0);
3398 
3399 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3400 
3401 	/* ring 0 - compute and gfx */
3402 	/* Set ring buffer size */
3403 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3404 	rb_bufsz = order_base_2(ring->ring_size / 8);
3405 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3406 #ifdef __BIG_ENDIAN
3407 	tmp |= BUF_SWAP_32BIT;
3408 #endif
3409 	WREG32(CP_RB0_CNTL, tmp);
3410 
3411 	/* Initialize the ring buffer's read and write pointers */
3412 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3413 	ring->wptr = 0;
3414 	WREG32(CP_RB0_WPTR, ring->wptr);
3415 
3416 	/* set the wb address wether it's enabled or not */
3417 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3418 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3419 
3420 	/* scratch register shadowing is no longer supported */
3421 	WREG32(SCRATCH_UMSK, 0);
3422 
3423 	if (!rdev->wb.enabled)
3424 		tmp |= RB_NO_UPDATE;
3425 
3426 	mdelay(1);
3427 	WREG32(CP_RB0_CNTL, tmp);
3428 
3429 	rb_addr = ring->gpu_addr >> 8;
3430 	WREG32(CP_RB0_BASE, rb_addr);
3431 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3432 
3433 	ring->rptr = RREG32(CP_RB0_RPTR);
3434 
3435 	/* start the ring */
3436 	cik_cp_gfx_start(rdev);
3437 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3438 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3439 	if (r) {
3440 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3441 		return r;
3442 	}
3443 	return 0;
3444 }
3445 
3446 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3447 			      struct radeon_ring *ring)
3448 {
3449 	u32 rptr;
3450 
3451 
3452 
3453 	if (rdev->wb.enabled) {
3454 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3455 	} else {
3456 		spin_lock(&rdev->srbm_mutex);
3457 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3458 		rptr = RREG32(CP_HQD_PQ_RPTR);
3459 		cik_srbm_select(rdev, 0, 0, 0, 0);
3460 		spin_unlock(&rdev->srbm_mutex);
3461 	}
3462 
3463 	return rptr;
3464 }
3465 
3466 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3467 			      struct radeon_ring *ring)
3468 {
3469 	u32 wptr;
3470 
3471 	if (rdev->wb.enabled) {
3472 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3473 	} else {
3474 		spin_lock(&rdev->srbm_mutex);
3475 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3476 		wptr = RREG32(CP_HQD_PQ_WPTR);
3477 		cik_srbm_select(rdev, 0, 0, 0, 0);
3478 		spin_unlock(&rdev->srbm_mutex);
3479 	}
3480 
3481 	return wptr;
3482 }
3483 
3484 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3485 			       struct radeon_ring *ring)
3486 {
3487 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3488 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3489 }
3490 
3491 /**
3492  * cik_cp_compute_enable - enable/disable the compute CP MEs
3493  *
3494  * @rdev: radeon_device pointer
3495  * @enable: enable or disable the MEs
3496  *
3497  * Halts or unhalts the compute MEs.
3498  */
3499 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3500 {
3501 	if (enable)
3502 		WREG32(CP_MEC_CNTL, 0);
3503 	else
3504 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3505 	udelay(50);
3506 }
3507 
3508 /**
3509  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3510  *
3511  * @rdev: radeon_device pointer
3512  *
3513  * Loads the compute MEC1&2 ucode.
3514  * Returns 0 for success, -EINVAL if the ucode is not available.
3515  */
3516 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3517 {
3518 	const __be32 *fw_data;
3519 	int i;
3520 
3521 	if (!rdev->mec_fw)
3522 		return -EINVAL;
3523 
3524 	cik_cp_compute_enable(rdev, false);
3525 
3526 	/* MEC1 */
3527 	fw_data = (const __be32 *)rdev->mec_fw->data;
3528 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3529 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3530 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3531 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3532 
3533 	if (rdev->family == CHIP_KAVERI) {
3534 		/* MEC2 */
3535 		fw_data = (const __be32 *)rdev->mec_fw->data;
3536 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3537 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3538 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3539 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3540 	}
3541 
3542 	return 0;
3543 }
3544 
3545 /**
3546  * cik_cp_compute_start - start the compute queues
3547  *
3548  * @rdev: radeon_device pointer
3549  *
3550  * Enable the compute queues.
3551  * Returns 0 for success, error for failure.
3552  */
3553 static int cik_cp_compute_start(struct radeon_device *rdev)
3554 {
3555 	cik_cp_compute_enable(rdev, true);
3556 
3557 	return 0;
3558 }
3559 
3560 /**
3561  * cik_cp_compute_fini - stop the compute queues
3562  *
3563  * @rdev: radeon_device pointer
3564  *
3565  * Stop the compute queues and tear down the driver queue
3566  * info.
3567  */
3568 static void cik_cp_compute_fini(struct radeon_device *rdev)
3569 {
3570 	int i, idx, r;
3571 
3572 	cik_cp_compute_enable(rdev, false);
3573 
3574 	for (i = 0; i < 2; i++) {
3575 		if (i == 0)
3576 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3577 		else
3578 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3579 
3580 		if (rdev->ring[idx].mqd_obj) {
3581 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3582 			if (unlikely(r != 0))
3583 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3584 
3585 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3586 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3587 
3588 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3589 			rdev->ring[idx].mqd_obj = NULL;
3590 		}
3591 	}
3592 }
3593 
3594 static void cik_mec_fini(struct radeon_device *rdev)
3595 {
3596 	int r;
3597 
3598 	if (rdev->mec.hpd_eop_obj) {
3599 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3600 		if (unlikely(r != 0))
3601 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3602 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3603 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3604 
3605 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3606 		rdev->mec.hpd_eop_obj = NULL;
3607 	}
3608 }
3609 
3610 #define MEC_HPD_SIZE 2048
3611 
3612 static int cik_mec_init(struct radeon_device *rdev)
3613 {
3614 	int r;
3615 	u32 *hpd;
3616 
3617 	/*
3618 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3619 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3620 	 */
3621 	if (rdev->family == CHIP_KAVERI)
3622 		rdev->mec.num_mec = 2;
3623 	else
3624 		rdev->mec.num_mec = 1;
3625 	rdev->mec.num_pipe = 4;
3626 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3627 
3628 	if (rdev->mec.hpd_eop_obj == NULL) {
3629 		r = radeon_bo_create(rdev,
3630 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3631 				     PAGE_SIZE, true,
3632 				     RADEON_GEM_DOMAIN_GTT, NULL,
3633 				     &rdev->mec.hpd_eop_obj);
3634 		if (r) {
3635 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3636 			return r;
3637 		}
3638 	}
3639 
3640 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3641 	if (unlikely(r != 0)) {
3642 		cik_mec_fini(rdev);
3643 		return r;
3644 	}
3645 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3646 			  &rdev->mec.hpd_eop_gpu_addr);
3647 	if (r) {
3648 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3649 		cik_mec_fini(rdev);
3650 		return r;
3651 	}
3652 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3653 	if (r) {
3654 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3655 		cik_mec_fini(rdev);
3656 		return r;
3657 	}
3658 
3659 	/* clear memory.  Not sure if this is required or not */
3660 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3661 
3662 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3663 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3664 
3665 	return 0;
3666 }
3667 
3668 struct hqd_registers
3669 {
3670 	u32 cp_mqd_base_addr;
3671 	u32 cp_mqd_base_addr_hi;
3672 	u32 cp_hqd_active;
3673 	u32 cp_hqd_vmid;
3674 	u32 cp_hqd_persistent_state;
3675 	u32 cp_hqd_pipe_priority;
3676 	u32 cp_hqd_queue_priority;
3677 	u32 cp_hqd_quantum;
3678 	u32 cp_hqd_pq_base;
3679 	u32 cp_hqd_pq_base_hi;
3680 	u32 cp_hqd_pq_rptr;
3681 	u32 cp_hqd_pq_rptr_report_addr;
3682 	u32 cp_hqd_pq_rptr_report_addr_hi;
3683 	u32 cp_hqd_pq_wptr_poll_addr;
3684 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3685 	u32 cp_hqd_pq_doorbell_control;
3686 	u32 cp_hqd_pq_wptr;
3687 	u32 cp_hqd_pq_control;
3688 	u32 cp_hqd_ib_base_addr;
3689 	u32 cp_hqd_ib_base_addr_hi;
3690 	u32 cp_hqd_ib_rptr;
3691 	u32 cp_hqd_ib_control;
3692 	u32 cp_hqd_iq_timer;
3693 	u32 cp_hqd_iq_rptr;
3694 	u32 cp_hqd_dequeue_request;
3695 	u32 cp_hqd_dma_offload;
3696 	u32 cp_hqd_sema_cmd;
3697 	u32 cp_hqd_msg_type;
3698 	u32 cp_hqd_atomic0_preop_lo;
3699 	u32 cp_hqd_atomic0_preop_hi;
3700 	u32 cp_hqd_atomic1_preop_lo;
3701 	u32 cp_hqd_atomic1_preop_hi;
3702 	u32 cp_hqd_hq_scheduler0;
3703 	u32 cp_hqd_hq_scheduler1;
3704 	u32 cp_mqd_control;
3705 };
3706 
3707 struct bonaire_mqd
3708 {
3709 	u32 header;
3710 	u32 dispatch_initiator;
3711 	u32 dimensions[3];
3712 	u32 start_idx[3];
3713 	u32 num_threads[3];
3714 	u32 pipeline_stat_enable;
3715 	u32 perf_counter_enable;
3716 	u32 pgm[2];
3717 	u32 tba[2];
3718 	u32 tma[2];
3719 	u32 pgm_rsrc[2];
3720 	u32 vmid;
3721 	u32 resource_limits;
3722 	u32 static_thread_mgmt01[2];
3723 	u32 tmp_ring_size;
3724 	u32 static_thread_mgmt23[2];
3725 	u32 restart[3];
3726 	u32 thread_trace_enable;
3727 	u32 reserved1;
3728 	u32 user_data[16];
3729 	u32 vgtcs_invoke_count[2];
3730 	struct hqd_registers queue_state;
3731 	u32 dequeue_cntr;
3732 	u32 interrupt_queue[64];
3733 };
3734 
3735 /**
3736  * cik_cp_compute_resume - setup the compute queue registers
3737  *
3738  * @rdev: radeon_device pointer
3739  *
3740  * Program the compute queues and test them to make sure they
3741  * are working.
3742  * Returns 0 for success, error for failure.
3743  */
3744 static int cik_cp_compute_resume(struct radeon_device *rdev)
3745 {
3746 	int r, i, idx;
3747 	u32 tmp;
3748 	bool use_doorbell = true;
3749 	u64 hqd_gpu_addr;
3750 	u64 mqd_gpu_addr;
3751 	u64 eop_gpu_addr;
3752 	u64 wb_gpu_addr;
3753 	u32 *buf;
3754 	struct bonaire_mqd *mqd;
3755 
3756 	r = cik_cp_compute_start(rdev);
3757 	if (r)
3758 		return r;
3759 
3760 	/* fix up chicken bits */
3761 	tmp = RREG32(CP_CPF_DEBUG);
3762 	tmp |= (1 << 23);
3763 	WREG32(CP_CPF_DEBUG, tmp);
3764 
3765 	/* init the pipes */
3766 	spin_lock(&rdev->srbm_mutex);
3767 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3768 		int me = (i < 4) ? 1 : 2;
3769 		int pipe = (i < 4) ? i : (i - 4);
3770 
3771 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3772 
3773 		cik_srbm_select(rdev, me, pipe, 0, 0);
3774 
3775 		/* write the EOP addr */
3776 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3777 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3778 
3779 		/* set the VMID assigned */
3780 		WREG32(CP_HPD_EOP_VMID, 0);
3781 
3782 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3783 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3784 		tmp &= ~EOP_SIZE_MASK;
3785 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3786 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3787 	}
3788 	cik_srbm_select(rdev, 0, 0, 0, 0);
3789 	spin_unlock(&rdev->srbm_mutex);
3790 
3791 	/* init the queues.  Just two for now. */
3792 	for (i = 0; i < 2; i++) {
3793 		if (i == 0)
3794 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3795 		else
3796 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3797 
3798 		if (rdev->ring[idx].mqd_obj == NULL) {
3799 			r = radeon_bo_create(rdev,
3800 					     sizeof(struct bonaire_mqd),
3801 					     PAGE_SIZE, true,
3802 					     RADEON_GEM_DOMAIN_GTT, NULL,
3803 					     &rdev->ring[idx].mqd_obj);
3804 			if (r) {
3805 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3806 				return r;
3807 			}
3808 		}
3809 
3810 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3811 		if (unlikely(r != 0)) {
3812 			cik_cp_compute_fini(rdev);
3813 			return r;
3814 		}
3815 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3816 				  &mqd_gpu_addr);
3817 		if (r) {
3818 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3819 			cik_cp_compute_fini(rdev);
3820 			return r;
3821 		}
3822 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3823 		if (r) {
3824 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3825 			cik_cp_compute_fini(rdev);
3826 			return r;
3827 		}
3828 
3829 		/* doorbell offset */
3830 		rdev->ring[idx].doorbell_offset =
3831 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3832 
3833 		/* init the mqd struct */
3834 		memset(buf, 0, sizeof(struct bonaire_mqd));
3835 
3836 		mqd = (struct bonaire_mqd *)buf;
3837 		mqd->header = 0xC0310800;
3838 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3839 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3840 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3841 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3842 
3843 		spin_lock(&rdev->srbm_mutex);
3844 		cik_srbm_select(rdev, rdev->ring[idx].me,
3845 				rdev->ring[idx].pipe,
3846 				rdev->ring[idx].queue, 0);
3847 
3848 		/* disable wptr polling */
3849 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3850 		tmp &= ~WPTR_POLL_EN;
3851 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3852 
3853 		/* enable doorbell? */
3854 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3855 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3856 		if (use_doorbell)
3857 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3858 		else
3859 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3860 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3861 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3862 
3863 		/* disable the queue if it's active */
3864 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3865 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3866 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3867 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3868 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3869 			for (i = 0; i < rdev->usec_timeout; i++) {
3870 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3871 					break;
3872 				udelay(1);
3873 			}
3874 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3875 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3876 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3877 		}
3878 
3879 		/* set the pointer to the MQD */
3880 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3881 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3882 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3883 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3884 		/* set MQD vmid to 0 */
3885 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3886 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3887 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3888 
3889 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3890 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3891 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3892 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3893 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3894 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3895 
3896 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3897 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3898 		mqd->queue_state.cp_hqd_pq_control &=
3899 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3900 
3901 		mqd->queue_state.cp_hqd_pq_control |=
3902 			order_base_2(rdev->ring[idx].ring_size / 8);
3903 		mqd->queue_state.cp_hqd_pq_control |=
3904 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3905 #ifdef __BIG_ENDIAN
3906 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3907 #endif
3908 		mqd->queue_state.cp_hqd_pq_control &=
3909 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3910 		mqd->queue_state.cp_hqd_pq_control |=
3911 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3912 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3913 
3914 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3915 		if (i == 0)
3916 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3917 		else
3918 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3919 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3920 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3921 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3922 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3923 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3924 
3925 		/* set the wb address wether it's enabled or not */
3926 		if (i == 0)
3927 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3928 		else
3929 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3930 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3931 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3932 			upper_32_bits(wb_gpu_addr) & 0xffff;
3933 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3934 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3935 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3936 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3937 
3938 		/* enable the doorbell if requested */
3939 		if (use_doorbell) {
3940 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3941 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3942 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3943 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3944 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3945 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3946 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3947 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3948 
3949 		} else {
3950 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3951 		}
3952 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3953 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3954 
3955 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3956 		rdev->ring[idx].wptr = 0;
3957 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3958 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3959 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3960 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3961 
3962 		/* set the vmid for the queue */
3963 		mqd->queue_state.cp_hqd_vmid = 0;
3964 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3965 
3966 		/* activate the queue */
3967 		mqd->queue_state.cp_hqd_active = 1;
3968 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3969 
3970 		cik_srbm_select(rdev, 0, 0, 0, 0);
3971 		spin_unlock(&rdev->srbm_mutex);
3972 
3973 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3974 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3975 
3976 		rdev->ring[idx].ready = true;
3977 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3978 		if (r)
3979 			rdev->ring[idx].ready = false;
3980 	}
3981 
3982 	return 0;
3983 }
3984 
3985 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3986 {
3987 	cik_cp_gfx_enable(rdev, enable);
3988 	cik_cp_compute_enable(rdev, enable);
3989 }
3990 
3991 static int cik_cp_load_microcode(struct radeon_device *rdev)
3992 {
3993 	int r;
3994 
3995 	r = cik_cp_gfx_load_microcode(rdev);
3996 	if (r)
3997 		return r;
3998 	r = cik_cp_compute_load_microcode(rdev);
3999 	if (r)
4000 		return r;
4001 
4002 	return 0;
4003 }
4004 
4005 static void cik_cp_fini(struct radeon_device *rdev)
4006 {
4007 	cik_cp_gfx_fini(rdev);
4008 	cik_cp_compute_fini(rdev);
4009 }
4010 
4011 static int cik_cp_resume(struct radeon_device *rdev)
4012 {
4013 	int r;
4014 
4015 	cik_enable_gui_idle_interrupt(rdev, false);
4016 
4017 	r = cik_cp_load_microcode(rdev);
4018 	if (r)
4019 		return r;
4020 
4021 	r = cik_cp_gfx_resume(rdev);
4022 	if (r)
4023 		return r;
4024 	r = cik_cp_compute_resume(rdev);
4025 	if (r)
4026 		return r;
4027 
4028 	cik_enable_gui_idle_interrupt(rdev, true);
4029 
4030 	return 0;
4031 }
4032 
4033 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4034 {
4035 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4036 		RREG32(GRBM_STATUS));
4037 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4038 		RREG32(GRBM_STATUS2));
4039 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4040 		RREG32(GRBM_STATUS_SE0));
4041 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4042 		RREG32(GRBM_STATUS_SE1));
4043 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4044 		RREG32(GRBM_STATUS_SE2));
4045 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4046 		RREG32(GRBM_STATUS_SE3));
4047 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4048 		RREG32(SRBM_STATUS));
4049 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4050 		RREG32(SRBM_STATUS2));
4051 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4052 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4053 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4054 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4055 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4056 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4057 		 RREG32(CP_STALLED_STAT1));
4058 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4059 		 RREG32(CP_STALLED_STAT2));
4060 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4061 		 RREG32(CP_STALLED_STAT3));
4062 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4063 		 RREG32(CP_CPF_BUSY_STAT));
4064 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4065 		 RREG32(CP_CPF_STALLED_STAT1));
4066 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4067 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4068 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4069 		 RREG32(CP_CPC_STALLED_STAT1));
4070 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4071 }
4072 
4073 /**
4074  * cik_gpu_check_soft_reset - check which blocks are busy
4075  *
4076  * @rdev: radeon_device pointer
4077  *
4078  * Check which blocks are busy and return the relevant reset
4079  * mask to be used by cik_gpu_soft_reset().
4080  * Returns a mask of the blocks to be reset.
4081  */
4082 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4083 {
4084 	u32 reset_mask = 0;
4085 	u32 tmp;
4086 
4087 	/* GRBM_STATUS */
4088 	tmp = RREG32(GRBM_STATUS);
4089 	if (tmp & (PA_BUSY | SC_BUSY |
4090 		   BCI_BUSY | SX_BUSY |
4091 		   TA_BUSY | VGT_BUSY |
4092 		   DB_BUSY | CB_BUSY |
4093 		   GDS_BUSY | SPI_BUSY |
4094 		   IA_BUSY | IA_BUSY_NO_DMA))
4095 		reset_mask |= RADEON_RESET_GFX;
4096 
4097 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4098 		reset_mask |= RADEON_RESET_CP;
4099 
4100 	/* GRBM_STATUS2 */
4101 	tmp = RREG32(GRBM_STATUS2);
4102 	if (tmp & RLC_BUSY)
4103 		reset_mask |= RADEON_RESET_RLC;
4104 
4105 	/* SDMA0_STATUS_REG */
4106 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4107 	if (!(tmp & SDMA_IDLE))
4108 		reset_mask |= RADEON_RESET_DMA;
4109 
4110 	/* SDMA1_STATUS_REG */
4111 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4112 	if (!(tmp & SDMA_IDLE))
4113 		reset_mask |= RADEON_RESET_DMA1;
4114 
4115 	/* SRBM_STATUS2 */
4116 	tmp = RREG32(SRBM_STATUS2);
4117 	if (tmp & SDMA_BUSY)
4118 		reset_mask |= RADEON_RESET_DMA;
4119 
4120 	if (tmp & SDMA1_BUSY)
4121 		reset_mask |= RADEON_RESET_DMA1;
4122 
4123 	/* SRBM_STATUS */
4124 	tmp = RREG32(SRBM_STATUS);
4125 
4126 	if (tmp & IH_BUSY)
4127 		reset_mask |= RADEON_RESET_IH;
4128 
4129 	if (tmp & SEM_BUSY)
4130 		reset_mask |= RADEON_RESET_SEM;
4131 
4132 	if (tmp & GRBM_RQ_PENDING)
4133 		reset_mask |= RADEON_RESET_GRBM;
4134 
4135 	if (tmp & VMC_BUSY)
4136 		reset_mask |= RADEON_RESET_VMC;
4137 
4138 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4139 		   MCC_BUSY | MCD_BUSY))
4140 		reset_mask |= RADEON_RESET_MC;
4141 
4142 	if (evergreen_is_display_hung(rdev))
4143 		reset_mask |= RADEON_RESET_DISPLAY;
4144 
4145 	/* Skip MC reset as it's mostly likely not hung, just busy */
4146 	if (reset_mask & RADEON_RESET_MC) {
4147 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4148 		reset_mask &= ~RADEON_RESET_MC;
4149 	}
4150 
4151 	return reset_mask;
4152 }
4153 
4154 /**
4155  * cik_gpu_soft_reset - soft reset GPU
4156  *
4157  * @rdev: radeon_device pointer
4158  * @reset_mask: mask of which blocks to reset
4159  *
4160  * Soft reset the blocks specified in @reset_mask.
4161  */
4162 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4163 {
4164 	struct evergreen_mc_save save;
4165 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4166 	u32 tmp;
4167 
4168 	if (reset_mask == 0)
4169 		return;
4170 
4171 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4172 
4173 	cik_print_gpu_status_regs(rdev);
4174 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4175 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4176 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4177 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4178 
4179 	/* disable CG/PG */
4180 	cik_fini_pg(rdev);
4181 	cik_fini_cg(rdev);
4182 
4183 	/* stop the rlc */
4184 	cik_rlc_stop(rdev);
4185 
4186 	/* Disable GFX parsing/prefetching */
4187 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4188 
4189 	/* Disable MEC parsing/prefetching */
4190 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4191 
4192 	if (reset_mask & RADEON_RESET_DMA) {
4193 		/* sdma0 */
4194 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4195 		tmp |= SDMA_HALT;
4196 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4197 	}
4198 	if (reset_mask & RADEON_RESET_DMA1) {
4199 		/* sdma1 */
4200 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4201 		tmp |= SDMA_HALT;
4202 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4203 	}
4204 
4205 	evergreen_mc_stop(rdev, &save);
4206 	if (evergreen_mc_wait_for_idle(rdev)) {
4207 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4208 	}
4209 
4210 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4211 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4212 
4213 	if (reset_mask & RADEON_RESET_CP) {
4214 		grbm_soft_reset |= SOFT_RESET_CP;
4215 
4216 		srbm_soft_reset |= SOFT_RESET_GRBM;
4217 	}
4218 
4219 	if (reset_mask & RADEON_RESET_DMA)
4220 		srbm_soft_reset |= SOFT_RESET_SDMA;
4221 
4222 	if (reset_mask & RADEON_RESET_DMA1)
4223 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4224 
4225 	if (reset_mask & RADEON_RESET_DISPLAY)
4226 		srbm_soft_reset |= SOFT_RESET_DC;
4227 
4228 	if (reset_mask & RADEON_RESET_RLC)
4229 		grbm_soft_reset |= SOFT_RESET_RLC;
4230 
4231 	if (reset_mask & RADEON_RESET_SEM)
4232 		srbm_soft_reset |= SOFT_RESET_SEM;
4233 
4234 	if (reset_mask & RADEON_RESET_IH)
4235 		srbm_soft_reset |= SOFT_RESET_IH;
4236 
4237 	if (reset_mask & RADEON_RESET_GRBM)
4238 		srbm_soft_reset |= SOFT_RESET_GRBM;
4239 
4240 	if (reset_mask & RADEON_RESET_VMC)
4241 		srbm_soft_reset |= SOFT_RESET_VMC;
4242 
4243 	if (!(rdev->flags & RADEON_IS_IGP)) {
4244 		if (reset_mask & RADEON_RESET_MC)
4245 			srbm_soft_reset |= SOFT_RESET_MC;
4246 	}
4247 
4248 	if (grbm_soft_reset) {
4249 		tmp = RREG32(GRBM_SOFT_RESET);
4250 		tmp |= grbm_soft_reset;
4251 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4252 		WREG32(GRBM_SOFT_RESET, tmp);
4253 		tmp = RREG32(GRBM_SOFT_RESET);
4254 
4255 		udelay(50);
4256 
4257 		tmp &= ~grbm_soft_reset;
4258 		WREG32(GRBM_SOFT_RESET, tmp);
4259 		tmp = RREG32(GRBM_SOFT_RESET);
4260 	}
4261 
4262 	if (srbm_soft_reset) {
4263 		tmp = RREG32(SRBM_SOFT_RESET);
4264 		tmp |= srbm_soft_reset;
4265 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4266 		WREG32(SRBM_SOFT_RESET, tmp);
4267 		tmp = RREG32(SRBM_SOFT_RESET);
4268 
4269 		udelay(50);
4270 
4271 		tmp &= ~srbm_soft_reset;
4272 		WREG32(SRBM_SOFT_RESET, tmp);
4273 		tmp = RREG32(SRBM_SOFT_RESET);
4274 	}
4275 
4276 	/* Wait a little for things to settle down */
4277 	udelay(50);
4278 
4279 	evergreen_mc_resume(rdev, &save);
4280 	udelay(50);
4281 
4282 	cik_print_gpu_status_regs(rdev);
4283 }
4284 
4285 /**
4286  * cik_asic_reset - soft reset GPU
4287  *
4288  * @rdev: radeon_device pointer
4289  *
4290  * Look up which blocks are hung and attempt
4291  * to reset them.
4292  * Returns 0 for success.
4293  */
4294 int cik_asic_reset(struct radeon_device *rdev)
4295 {
4296 	u32 reset_mask;
4297 
4298 	reset_mask = cik_gpu_check_soft_reset(rdev);
4299 
4300 	if (reset_mask)
4301 		r600_set_bios_scratch_engine_hung(rdev, true);
4302 
4303 	cik_gpu_soft_reset(rdev, reset_mask);
4304 
4305 	reset_mask = cik_gpu_check_soft_reset(rdev);
4306 
4307 	if (!reset_mask)
4308 		r600_set_bios_scratch_engine_hung(rdev, false);
4309 
4310 	return 0;
4311 }
4312 
4313 /**
4314  * cik_gfx_is_lockup - check if the 3D engine is locked up
4315  *
4316  * @rdev: radeon_device pointer
4317  * @ring: radeon_ring structure holding ring information
4318  *
4319  * Check if the 3D engine is locked up (CIK).
4320  * Returns true if the engine is locked, false if not.
4321  */
4322 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4323 {
4324 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4325 
4326 	if (!(reset_mask & (RADEON_RESET_GFX |
4327 			    RADEON_RESET_COMPUTE |
4328 			    RADEON_RESET_CP))) {
4329 		radeon_ring_lockup_update(ring);
4330 		return false;
4331 	}
4332 	/* force CP activities */
4333 	radeon_ring_force_activity(rdev, ring);
4334 	return radeon_ring_test_lockup(rdev, ring);
4335 }
4336 
4337 /* MC */
4338 /**
4339  * cik_mc_program - program the GPU memory controller
4340  *
4341  * @rdev: radeon_device pointer
4342  *
4343  * Set the location of vram, gart, and AGP in the GPU's
4344  * physical address space (CIK).
4345  */
4346 static void cik_mc_program(struct radeon_device *rdev)
4347 {
4348 	struct evergreen_mc_save save;
4349 	u32 tmp;
4350 	int i, j;
4351 
4352 	/* Initialize HDP */
4353 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4354 		WREG32((0x2c14 + j), 0x00000000);
4355 		WREG32((0x2c18 + j), 0x00000000);
4356 		WREG32((0x2c1c + j), 0x00000000);
4357 		WREG32((0x2c20 + j), 0x00000000);
4358 		WREG32((0x2c24 + j), 0x00000000);
4359 	}
4360 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4361 
4362 	evergreen_mc_stop(rdev, &save);
4363 	if (radeon_mc_wait_for_idle(rdev)) {
4364 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4365 	}
4366 	/* Lockout access through VGA aperture*/
4367 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4368 	/* Update configuration */
4369 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4370 	       rdev->mc.vram_start >> 12);
4371 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4372 	       rdev->mc.vram_end >> 12);
4373 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4374 	       rdev->vram_scratch.gpu_addr >> 12);
4375 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4376 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4377 	WREG32(MC_VM_FB_LOCATION, tmp);
4378 	/* XXX double check these! */
4379 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4380 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4381 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4382 	WREG32(MC_VM_AGP_BASE, 0);
4383 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4384 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4385 	if (radeon_mc_wait_for_idle(rdev)) {
4386 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4387 	}
4388 	evergreen_mc_resume(rdev, &save);
4389 	/* we need to own VRAM, so turn off the VGA renderer here
4390 	 * to stop it overwriting our objects */
4391 	rv515_vga_render_disable(rdev);
4392 }
4393 
4394 /**
4395  * cik_mc_init - initialize the memory controller driver params
4396  *
4397  * @rdev: radeon_device pointer
4398  *
4399  * Look up the amount of vram, vram width, and decide how to place
4400  * vram and gart within the GPU's physical address space (CIK).
4401  * Returns 0 for success.
4402  */
4403 static int cik_mc_init(struct radeon_device *rdev)
4404 {
4405 	u32 tmp;
4406 	int chansize, numchan;
4407 
4408 	/* Get VRAM informations */
4409 	rdev->mc.vram_is_ddr = true;
4410 	tmp = RREG32(MC_ARB_RAMCFG);
4411 	if (tmp & CHANSIZE_MASK) {
4412 		chansize = 64;
4413 	} else {
4414 		chansize = 32;
4415 	}
4416 	tmp = RREG32(MC_SHARED_CHMAP);
4417 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4418 	case 0:
4419 	default:
4420 		numchan = 1;
4421 		break;
4422 	case 1:
4423 		numchan = 2;
4424 		break;
4425 	case 2:
4426 		numchan = 4;
4427 		break;
4428 	case 3:
4429 		numchan = 8;
4430 		break;
4431 	case 4:
4432 		numchan = 3;
4433 		break;
4434 	case 5:
4435 		numchan = 6;
4436 		break;
4437 	case 6:
4438 		numchan = 10;
4439 		break;
4440 	case 7:
4441 		numchan = 12;
4442 		break;
4443 	case 8:
4444 		numchan = 16;
4445 		break;
4446 	}
4447 	rdev->mc.vram_width = numchan * chansize;
4448 	/* Could aper size report 0 ? */
4449 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4450 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4451 	/* size in MB on si */
4452 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4453 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4454 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4455 	si_vram_gtt_location(rdev, &rdev->mc);
4456 	radeon_update_bandwidth_info(rdev);
4457 
4458 	return 0;
4459 }
4460 
4461 /*
4462  * GART
4463  * VMID 0 is the physical GPU addresses as used by the kernel.
4464  * VMIDs 1-15 are used for userspace clients and are handled
4465  * by the radeon vm/hsa code.
4466  */
4467 /**
4468  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4469  *
4470  * @rdev: radeon_device pointer
4471  *
4472  * Flush the TLB for the VMID 0 page table (CIK).
4473  */
4474 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4475 {
4476 	/* flush hdp cache */
4477 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4478 
4479 	/* bits 0-15 are the VM contexts0-15 */
4480 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4481 }
4482 
4483 /**
4484  * cik_pcie_gart_enable - gart enable
4485  *
4486  * @rdev: radeon_device pointer
4487  *
4488  * This sets up the TLBs, programs the page tables for VMID0,
4489  * sets up the hw for VMIDs 1-15 which are allocated on
4490  * demand, and sets up the global locations for the LDS, GDS,
4491  * and GPUVM for FSA64 clients (CIK).
4492  * Returns 0 for success, errors for failure.
4493  */
4494 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4495 {
4496 	int r, i;
4497 
4498 	if (rdev->gart.robj == NULL) {
4499 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4500 		return -EINVAL;
4501 	}
4502 	r = radeon_gart_table_vram_pin(rdev);
4503 	if (r)
4504 		return r;
4505 	radeon_gart_restore(rdev);
4506 	/* Setup TLB control */
4507 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4508 	       (0xA << 7) |
4509 	       ENABLE_L1_TLB |
4510 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4511 	       ENABLE_ADVANCED_DRIVER_MODEL |
4512 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4513 	/* Setup L2 cache */
4514 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4515 	       ENABLE_L2_FRAGMENT_PROCESSING |
4516 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4517 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4518 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4519 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4520 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4521 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4522 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4523 	/* setup context0 */
4524 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4525 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4526 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4527 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4528 			(u32)(rdev->dummy_page.addr >> 12));
4529 	WREG32(VM_CONTEXT0_CNTL2, 0);
4530 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4531 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4532 
4533 	WREG32(0x15D4, 0);
4534 	WREG32(0x15D8, 0);
4535 	WREG32(0x15DC, 0);
4536 
4537 	/* empty context1-15 */
4538 	/* FIXME start with 4G, once using 2 level pt switch to full
4539 	 * vm size space
4540 	 */
4541 	/* set vm size, must be a multiple of 4 */
4542 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4543 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4544 	for (i = 1; i < 16; i++) {
4545 		if (i < 8)
4546 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4547 			       rdev->gart.table_addr >> 12);
4548 		else
4549 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4550 			       rdev->gart.table_addr >> 12);
4551 	}
4552 
4553 	/* enable context1-15 */
4554 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4555 	       (u32)(rdev->dummy_page.addr >> 12));
4556 	WREG32(VM_CONTEXT1_CNTL2, 4);
4557 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4558 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4559 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4560 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4561 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4562 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4563 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4564 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4565 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4566 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4567 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4568 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4569 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4570 
4571 	/* TC cache setup ??? */
4572 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4573 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4574 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4575 
4576 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4577 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4578 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4579 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4580 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4581 
4582 	WREG32(TC_CFG_L1_VOLATILE, 0);
4583 	WREG32(TC_CFG_L2_VOLATILE, 0);
4584 
4585 	if (rdev->family == CHIP_KAVERI) {
4586 		u32 tmp = RREG32(CHUB_CONTROL);
4587 		tmp &= ~BYPASS_VM;
4588 		WREG32(CHUB_CONTROL, tmp);
4589 	}
4590 
4591 	/* XXX SH_MEM regs */
4592 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4593 	spin_lock(&rdev->srbm_mutex);
4594 	for (i = 0; i < 16; i++) {
4595 		cik_srbm_select(rdev, 0, 0, 0, i);
4596 		/* CP and shaders */
4597 		WREG32(SH_MEM_CONFIG, 0);
4598 		WREG32(SH_MEM_APE1_BASE, 1);
4599 		WREG32(SH_MEM_APE1_LIMIT, 0);
4600 		WREG32(SH_MEM_BASES, 0);
4601 		/* SDMA GFX */
4602 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4603 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4604 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4605 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4606 		/* XXX SDMA RLC - todo */
4607 	}
4608 	cik_srbm_select(rdev, 0, 0, 0, 0);
4609 	spin_unlock(&rdev->srbm_mutex);
4610 
4611 	cik_pcie_gart_tlb_flush(rdev);
4612 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4613 		 (unsigned)(rdev->mc.gtt_size >> 20),
4614 		 (unsigned long long)rdev->gart.table_addr);
4615 	rdev->gart.ready = true;
4616 	return 0;
4617 }
4618 
4619 /**
4620  * cik_pcie_gart_disable - gart disable
4621  *
4622  * @rdev: radeon_device pointer
4623  *
4624  * This disables all VM page table (CIK).
4625  */
4626 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4627 {
4628 	/* Disable all tables */
4629 	WREG32(VM_CONTEXT0_CNTL, 0);
4630 	WREG32(VM_CONTEXT1_CNTL, 0);
4631 	/* Setup TLB control */
4632 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4633 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4634 	/* Setup L2 cache */
4635 	WREG32(VM_L2_CNTL,
4636 	       ENABLE_L2_FRAGMENT_PROCESSING |
4637 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4638 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4639 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4640 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4641 	WREG32(VM_L2_CNTL2, 0);
4642 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4643 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4644 	radeon_gart_table_vram_unpin(rdev);
4645 }
4646 
4647 /**
4648  * cik_pcie_gart_fini - vm fini callback
4649  *
4650  * @rdev: radeon_device pointer
4651  *
4652  * Tears down the driver GART/VM setup (CIK).
4653  */
4654 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4655 {
4656 	cik_pcie_gart_disable(rdev);
4657 	radeon_gart_table_vram_free(rdev);
4658 	radeon_gart_fini(rdev);
4659 }
4660 
4661 /* vm parser */
4662 /**
4663  * cik_ib_parse - vm ib_parse callback
4664  *
4665  * @rdev: radeon_device pointer
4666  * @ib: indirect buffer pointer
4667  *
4668  * CIK uses hw IB checking so this is a nop (CIK).
4669  */
4670 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4671 {
4672 	return 0;
4673 }
4674 
4675 /*
4676  * vm
4677  * VMID 0 is the physical GPU addresses as used by the kernel.
4678  * VMIDs 1-15 are used for userspace clients and are handled
4679  * by the radeon vm/hsa code.
4680  */
4681 /**
4682  * cik_vm_init - cik vm init callback
4683  *
4684  * @rdev: radeon_device pointer
4685  *
4686  * Inits cik specific vm parameters (number of VMs, base of vram for
4687  * VMIDs 1-15) (CIK).
4688  * Returns 0 for success.
4689  */
4690 int cik_vm_init(struct radeon_device *rdev)
4691 {
4692 	/* number of VMs */
4693 	rdev->vm_manager.nvm = 16;
4694 	/* base offset of vram pages */
4695 	if (rdev->flags & RADEON_IS_IGP) {
4696 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4697 		tmp <<= 22;
4698 		rdev->vm_manager.vram_base_offset = tmp;
4699 	} else
4700 		rdev->vm_manager.vram_base_offset = 0;
4701 
4702 	return 0;
4703 }
4704 
4705 /**
4706  * cik_vm_fini - cik vm fini callback
4707  *
4708  * @rdev: radeon_device pointer
4709  *
4710  * Tear down any asic specific VM setup (CIK).
4711  */
4712 void cik_vm_fini(struct radeon_device *rdev)
4713 {
4714 }
4715 
4716 /**
4717  * cik_vm_decode_fault - print human readable fault info
4718  *
4719  * @rdev: radeon_device pointer
4720  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4721  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4722  *
4723  * Print human readable fault information (CIK).
4724  */
4725 static void cik_vm_decode_fault(struct radeon_device *rdev,
4726 				u32 status, u32 addr, u32 mc_client)
4727 {
4728 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4729 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4730 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4731 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4732 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4733 
4734 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4735 	       protections, vmid, addr,
4736 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4737 	       block, mc_client, mc_id);
4738 }
4739 
4740 /**
4741  * cik_vm_flush - cik vm flush using the CP
4742  *
4743  * @rdev: radeon_device pointer
4744  *
4745  * Update the page table base and flush the VM TLB
4746  * using the CP (CIK).
4747  */
4748 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4749 {
4750 	struct radeon_ring *ring = &rdev->ring[ridx];
4751 
4752 	if (vm == NULL)
4753 		return;
4754 
4755 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4756 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4757 				 WRITE_DATA_DST_SEL(0)));
4758 	if (vm->id < 8) {
4759 		radeon_ring_write(ring,
4760 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4761 	} else {
4762 		radeon_ring_write(ring,
4763 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4764 	}
4765 	radeon_ring_write(ring, 0);
4766 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4767 
4768 	/* update SH_MEM_* regs */
4769 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4770 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4771 				 WRITE_DATA_DST_SEL(0)));
4772 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4773 	radeon_ring_write(ring, 0);
4774 	radeon_ring_write(ring, VMID(vm->id));
4775 
4776 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4777 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4778 				 WRITE_DATA_DST_SEL(0)));
4779 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4780 	radeon_ring_write(ring, 0);
4781 
4782 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4783 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4784 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4785 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4786 
4787 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4788 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4789 				 WRITE_DATA_DST_SEL(0)));
4790 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4791 	radeon_ring_write(ring, 0);
4792 	radeon_ring_write(ring, VMID(0));
4793 
4794 	/* HDP flush */
4795 	/* We should be using the WAIT_REG_MEM packet here like in
4796 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4797 	 * context...
4798 	 */
4799 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4800 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4801 				 WRITE_DATA_DST_SEL(0)));
4802 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4803 	radeon_ring_write(ring, 0);
4804 	radeon_ring_write(ring, 0);
4805 
4806 	/* bits 0-15 are the VM contexts0-15 */
4807 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4808 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4809 				 WRITE_DATA_DST_SEL(0)));
4810 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4811 	radeon_ring_write(ring, 0);
4812 	radeon_ring_write(ring, 1 << vm->id);
4813 
4814 	/* compute doesn't have PFP */
4815 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4816 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4817 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4818 		radeon_ring_write(ring, 0x0);
4819 	}
4820 }
4821 
4822 /**
4823  * cik_vm_set_page - update the page tables using sDMA
4824  *
4825  * @rdev: radeon_device pointer
4826  * @ib: indirect buffer to fill with commands
4827  * @pe: addr of the page entry
4828  * @addr: dst addr to write into pe
4829  * @count: number of page entries to update
4830  * @incr: increase next addr by incr bytes
4831  * @flags: access flags
4832  *
4833  * Update the page tables using CP or sDMA (CIK).
4834  */
4835 void cik_vm_set_page(struct radeon_device *rdev,
4836 		     struct radeon_ib *ib,
4837 		     uint64_t pe,
4838 		     uint64_t addr, unsigned count,
4839 		     uint32_t incr, uint32_t flags)
4840 {
4841 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4842 	uint64_t value;
4843 	unsigned ndw;
4844 
4845 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4846 		/* CP */
4847 		while (count) {
4848 			ndw = 2 + count * 2;
4849 			if (ndw > 0x3FFE)
4850 				ndw = 0x3FFE;
4851 
4852 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4853 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4854 						    WRITE_DATA_DST_SEL(1));
4855 			ib->ptr[ib->length_dw++] = pe;
4856 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4857 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4858 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4859 					value = radeon_vm_map_gart(rdev, addr);
4860 					value &= 0xFFFFFFFFFFFFF000ULL;
4861 				} else if (flags & RADEON_VM_PAGE_VALID) {
4862 					value = addr;
4863 				} else {
4864 					value = 0;
4865 				}
4866 				addr += incr;
4867 				value |= r600_flags;
4868 				ib->ptr[ib->length_dw++] = value;
4869 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4870 			}
4871 		}
4872 	} else {
4873 		/* DMA */
4874 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4875 	}
4876 }
4877 
4878 /*
4879  * RLC
4880  * The RLC is a multi-purpose microengine that handles a
4881  * variety of functions, the most important of which is
4882  * the interrupt controller.
4883  */
4884 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4885 					  bool enable)
4886 {
4887 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4888 
4889 	if (enable)
4890 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4891 	else
4892 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4893 	WREG32(CP_INT_CNTL_RING0, tmp);
4894 }
4895 
4896 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4897 {
4898 	u32 tmp;
4899 
4900 	tmp = RREG32(RLC_LB_CNTL);
4901 	if (enable)
4902 		tmp |= LOAD_BALANCE_ENABLE;
4903 	else
4904 		tmp &= ~LOAD_BALANCE_ENABLE;
4905 	WREG32(RLC_LB_CNTL, tmp);
4906 }
4907 
4908 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4909 {
4910 	u32 i, j, k;
4911 	u32 mask;
4912 
4913 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4914 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4915 			cik_select_se_sh(rdev, i, j);
4916 			for (k = 0; k < rdev->usec_timeout; k++) {
4917 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4918 					break;
4919 				udelay(1);
4920 			}
4921 		}
4922 	}
4923 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4924 
4925 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4926 	for (k = 0; k < rdev->usec_timeout; k++) {
4927 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4928 			break;
4929 		udelay(1);
4930 	}
4931 }
4932 
4933 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4934 {
4935 	u32 tmp;
4936 
4937 	tmp = RREG32(RLC_CNTL);
4938 	if (tmp != rlc)
4939 		WREG32(RLC_CNTL, rlc);
4940 }
4941 
4942 static u32 cik_halt_rlc(struct radeon_device *rdev)
4943 {
4944 	u32 data, orig;
4945 
4946 	orig = data = RREG32(RLC_CNTL);
4947 
4948 	if (data & RLC_ENABLE) {
4949 		u32 i;
4950 
4951 		data &= ~RLC_ENABLE;
4952 		WREG32(RLC_CNTL, data);
4953 
4954 		for (i = 0; i < rdev->usec_timeout; i++) {
4955 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4956 				break;
4957 			udelay(1);
4958 		}
4959 
4960 		cik_wait_for_rlc_serdes(rdev);
4961 	}
4962 
4963 	return orig;
4964 }
4965 
4966 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4967 {
4968 	u32 tmp, i, mask;
4969 
4970 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4971 	WREG32(RLC_GPR_REG2, tmp);
4972 
4973 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4974 	for (i = 0; i < rdev->usec_timeout; i++) {
4975 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4976 			break;
4977 		udelay(1);
4978 	}
4979 
4980 	for (i = 0; i < rdev->usec_timeout; i++) {
4981 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4982 			break;
4983 		udelay(1);
4984 	}
4985 }
4986 
4987 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4988 {
4989 	u32 tmp;
4990 
4991 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4992 	WREG32(RLC_GPR_REG2, tmp);
4993 }
4994 
4995 /**
4996  * cik_rlc_stop - stop the RLC ME
4997  *
4998  * @rdev: radeon_device pointer
4999  *
5000  * Halt the RLC ME (MicroEngine) (CIK).
5001  */
5002 static void cik_rlc_stop(struct radeon_device *rdev)
5003 {
5004 	WREG32(RLC_CNTL, 0);
5005 
5006 	cik_enable_gui_idle_interrupt(rdev, false);
5007 
5008 	cik_wait_for_rlc_serdes(rdev);
5009 }
5010 
5011 /**
5012  * cik_rlc_start - start the RLC ME
5013  *
5014  * @rdev: radeon_device pointer
5015  *
5016  * Unhalt the RLC ME (MicroEngine) (CIK).
5017  */
5018 static void cik_rlc_start(struct radeon_device *rdev)
5019 {
5020 	WREG32(RLC_CNTL, RLC_ENABLE);
5021 
5022 	cik_enable_gui_idle_interrupt(rdev, true);
5023 
5024 	udelay(50);
5025 }
5026 
5027 /**
5028  * cik_rlc_resume - setup the RLC hw
5029  *
5030  * @rdev: radeon_device pointer
5031  *
5032  * Initialize the RLC registers, load the ucode,
5033  * and start the RLC (CIK).
5034  * Returns 0 for success, -EINVAL if the ucode is not available.
5035  */
5036 static int cik_rlc_resume(struct radeon_device *rdev)
5037 {
5038 	u32 i, size, tmp;
5039 	const __be32 *fw_data;
5040 
5041 	if (!rdev->rlc_fw)
5042 		return -EINVAL;
5043 
5044 	switch (rdev->family) {
5045 	case CHIP_BONAIRE:
5046 	default:
5047 		size = BONAIRE_RLC_UCODE_SIZE;
5048 		break;
5049 	case CHIP_KAVERI:
5050 		size = KV_RLC_UCODE_SIZE;
5051 		break;
5052 	case CHIP_KABINI:
5053 		size = KB_RLC_UCODE_SIZE;
5054 		break;
5055 	}
5056 
5057 	cik_rlc_stop(rdev);
5058 
5059 	/* disable CG */
5060 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5061 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5062 
5063 	si_rlc_reset(rdev);
5064 
5065 	cik_init_pg(rdev);
5066 
5067 	cik_init_cg(rdev);
5068 
5069 	WREG32(RLC_LB_CNTR_INIT, 0);
5070 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5071 
5072 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5073 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5074 	WREG32(RLC_LB_PARAMS, 0x00600408);
5075 	WREG32(RLC_LB_CNTL, 0x80000004);
5076 
5077 	WREG32(RLC_MC_CNTL, 0);
5078 	WREG32(RLC_UCODE_CNTL, 0);
5079 
5080 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5081 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5082 	for (i = 0; i < size; i++)
5083 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5084 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5085 
5086 	/* XXX - find out what chips support lbpw */
5087 	cik_enable_lbpw(rdev, false);
5088 
5089 	if (rdev->family == CHIP_BONAIRE)
5090 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5091 
5092 	cik_rlc_start(rdev);
5093 
5094 	return 0;
5095 }
5096 
5097 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5098 {
5099 	u32 data, orig, tmp, tmp2;
5100 
5101 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5102 
5103 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5104 		cik_enable_gui_idle_interrupt(rdev, true);
5105 
5106 		tmp = cik_halt_rlc(rdev);
5107 
5108 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5109 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5110 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5111 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5112 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5113 
5114 		cik_update_rlc(rdev, tmp);
5115 
5116 		data |= CGCG_EN | CGLS_EN;
5117 	} else {
5118 		cik_enable_gui_idle_interrupt(rdev, false);
5119 
5120 		RREG32(CB_CGTT_SCLK_CTRL);
5121 		RREG32(CB_CGTT_SCLK_CTRL);
5122 		RREG32(CB_CGTT_SCLK_CTRL);
5123 		RREG32(CB_CGTT_SCLK_CTRL);
5124 
5125 		data &= ~(CGCG_EN | CGLS_EN);
5126 	}
5127 
5128 	if (orig != data)
5129 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5130 
5131 }
5132 
5133 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5134 {
5135 	u32 data, orig, tmp = 0;
5136 
5137 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5138 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5139 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5140 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5141 				data |= CP_MEM_LS_EN;
5142 				if (orig != data)
5143 					WREG32(CP_MEM_SLP_CNTL, data);
5144 			}
5145 		}
5146 
5147 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5148 		data &= 0xfffffffd;
5149 		if (orig != data)
5150 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5151 
5152 		tmp = cik_halt_rlc(rdev);
5153 
5154 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5155 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5156 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5157 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5158 		WREG32(RLC_SERDES_WR_CTRL, data);
5159 
5160 		cik_update_rlc(rdev, tmp);
5161 
5162 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5163 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5164 			data &= ~SM_MODE_MASK;
5165 			data |= SM_MODE(0x2);
5166 			data |= SM_MODE_ENABLE;
5167 			data &= ~CGTS_OVERRIDE;
5168 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5169 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5170 				data &= ~CGTS_LS_OVERRIDE;
5171 			data &= ~ON_MONITOR_ADD_MASK;
5172 			data |= ON_MONITOR_ADD_EN;
5173 			data |= ON_MONITOR_ADD(0x96);
5174 			if (orig != data)
5175 				WREG32(CGTS_SM_CTRL_REG, data);
5176 		}
5177 	} else {
5178 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5179 		data |= 0x00000002;
5180 		if (orig != data)
5181 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5182 
5183 		data = RREG32(RLC_MEM_SLP_CNTL);
5184 		if (data & RLC_MEM_LS_EN) {
5185 			data &= ~RLC_MEM_LS_EN;
5186 			WREG32(RLC_MEM_SLP_CNTL, data);
5187 		}
5188 
5189 		data = RREG32(CP_MEM_SLP_CNTL);
5190 		if (data & CP_MEM_LS_EN) {
5191 			data &= ~CP_MEM_LS_EN;
5192 			WREG32(CP_MEM_SLP_CNTL, data);
5193 		}
5194 
5195 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5196 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5197 		if (orig != data)
5198 			WREG32(CGTS_SM_CTRL_REG, data);
5199 
5200 		tmp = cik_halt_rlc(rdev);
5201 
5202 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5203 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5204 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5205 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5206 		WREG32(RLC_SERDES_WR_CTRL, data);
5207 
5208 		cik_update_rlc(rdev, tmp);
5209 	}
5210 }
5211 
5212 static const u32 mc_cg_registers[] =
5213 {
5214 	MC_HUB_MISC_HUB_CG,
5215 	MC_HUB_MISC_SIP_CG,
5216 	MC_HUB_MISC_VM_CG,
5217 	MC_XPB_CLK_GAT,
5218 	ATC_MISC_CG,
5219 	MC_CITF_MISC_WR_CG,
5220 	MC_CITF_MISC_RD_CG,
5221 	MC_CITF_MISC_VM_CG,
5222 	VM_L2_CG,
5223 };
5224 
5225 static void cik_enable_mc_ls(struct radeon_device *rdev,
5226 			     bool enable)
5227 {
5228 	int i;
5229 	u32 orig, data;
5230 
5231 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5232 		orig = data = RREG32(mc_cg_registers[i]);
5233 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5234 			data |= MC_LS_ENABLE;
5235 		else
5236 			data &= ~MC_LS_ENABLE;
5237 		if (data != orig)
5238 			WREG32(mc_cg_registers[i], data);
5239 	}
5240 }
5241 
5242 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5243 			       bool enable)
5244 {
5245 	int i;
5246 	u32 orig, data;
5247 
5248 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5249 		orig = data = RREG32(mc_cg_registers[i]);
5250 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5251 			data |= MC_CG_ENABLE;
5252 		else
5253 			data &= ~MC_CG_ENABLE;
5254 		if (data != orig)
5255 			WREG32(mc_cg_registers[i], data);
5256 	}
5257 }
5258 
5259 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5260 				 bool enable)
5261 {
5262 	u32 orig, data;
5263 
5264 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5265 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5266 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5267 	} else {
5268 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5269 		data |= 0xff000000;
5270 		if (data != orig)
5271 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5272 
5273 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5274 		data |= 0xff000000;
5275 		if (data != orig)
5276 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5277 	}
5278 }
5279 
5280 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5281 				 bool enable)
5282 {
5283 	u32 orig, data;
5284 
5285 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5286 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5287 		data |= 0x100;
5288 		if (orig != data)
5289 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5290 
5291 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5292 		data |= 0x100;
5293 		if (orig != data)
5294 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5295 	} else {
5296 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5297 		data &= ~0x100;
5298 		if (orig != data)
5299 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5300 
5301 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5302 		data &= ~0x100;
5303 		if (orig != data)
5304 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5305 	}
5306 }
5307 
5308 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5309 				bool enable)
5310 {
5311 	u32 orig, data;
5312 
5313 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5314 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5315 		data = 0xfff;
5316 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5317 
5318 		orig = data = RREG32(UVD_CGC_CTRL);
5319 		data |= DCM;
5320 		if (orig != data)
5321 			WREG32(UVD_CGC_CTRL, data);
5322 	} else {
5323 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5324 		data &= ~0xfff;
5325 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5326 
5327 		orig = data = RREG32(UVD_CGC_CTRL);
5328 		data &= ~DCM;
5329 		if (orig != data)
5330 			WREG32(UVD_CGC_CTRL, data);
5331 	}
5332 }
5333 
5334 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5335 			       bool enable)
5336 {
5337 	u32 orig, data;
5338 
5339 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5340 
5341 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5342 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5343 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5344 	else
5345 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5346 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5347 
5348 	if (orig != data)
5349 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5350 }
5351 
5352 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5353 				bool enable)
5354 {
5355 	u32 orig, data;
5356 
5357 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5358 
5359 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5360 		data &= ~CLOCK_GATING_DIS;
5361 	else
5362 		data |= CLOCK_GATING_DIS;
5363 
5364 	if (orig != data)
5365 		WREG32(HDP_HOST_PATH_CNTL, data);
5366 }
5367 
5368 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5369 			      bool enable)
5370 {
5371 	u32 orig, data;
5372 
5373 	orig = data = RREG32(HDP_MEM_POWER_LS);
5374 
5375 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5376 		data |= HDP_LS_ENABLE;
5377 	else
5378 		data &= ~HDP_LS_ENABLE;
5379 
5380 	if (orig != data)
5381 		WREG32(HDP_MEM_POWER_LS, data);
5382 }
5383 
5384 void cik_update_cg(struct radeon_device *rdev,
5385 		   u32 block, bool enable)
5386 {
5387 
5388 	if (block & RADEON_CG_BLOCK_GFX) {
5389 		cik_enable_gui_idle_interrupt(rdev, false);
5390 		/* order matters! */
5391 		if (enable) {
5392 			cik_enable_mgcg(rdev, true);
5393 			cik_enable_cgcg(rdev, true);
5394 		} else {
5395 			cik_enable_cgcg(rdev, false);
5396 			cik_enable_mgcg(rdev, false);
5397 		}
5398 		cik_enable_gui_idle_interrupt(rdev, true);
5399 	}
5400 
5401 	if (block & RADEON_CG_BLOCK_MC) {
5402 		if (!(rdev->flags & RADEON_IS_IGP)) {
5403 			cik_enable_mc_mgcg(rdev, enable);
5404 			cik_enable_mc_ls(rdev, enable);
5405 		}
5406 	}
5407 
5408 	if (block & RADEON_CG_BLOCK_SDMA) {
5409 		cik_enable_sdma_mgcg(rdev, enable);
5410 		cik_enable_sdma_mgls(rdev, enable);
5411 	}
5412 
5413 	if (block & RADEON_CG_BLOCK_BIF) {
5414 		cik_enable_bif_mgls(rdev, enable);
5415 	}
5416 
5417 	if (block & RADEON_CG_BLOCK_UVD) {
5418 		if (rdev->has_uvd)
5419 			cik_enable_uvd_mgcg(rdev, enable);
5420 	}
5421 
5422 	if (block & RADEON_CG_BLOCK_HDP) {
5423 		cik_enable_hdp_mgcg(rdev, enable);
5424 		cik_enable_hdp_ls(rdev, enable);
5425 	}
5426 }
5427 
5428 static void cik_init_cg(struct radeon_device *rdev)
5429 {
5430 
5431 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5432 
5433 	if (rdev->has_uvd)
5434 		si_init_uvd_internal_cg(rdev);
5435 
5436 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5437 			     RADEON_CG_BLOCK_SDMA |
5438 			     RADEON_CG_BLOCK_BIF |
5439 			     RADEON_CG_BLOCK_UVD |
5440 			     RADEON_CG_BLOCK_HDP), true);
5441 }
5442 
5443 static void cik_fini_cg(struct radeon_device *rdev)
5444 {
5445 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5446 			     RADEON_CG_BLOCK_SDMA |
5447 			     RADEON_CG_BLOCK_BIF |
5448 			     RADEON_CG_BLOCK_UVD |
5449 			     RADEON_CG_BLOCK_HDP), false);
5450 
5451 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5452 }
5453 
5454 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5455 					  bool enable)
5456 {
5457 	u32 data, orig;
5458 
5459 	orig = data = RREG32(RLC_PG_CNTL);
5460 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5461 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5462 	else
5463 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5464 	if (orig != data)
5465 		WREG32(RLC_PG_CNTL, data);
5466 }
5467 
5468 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5469 					  bool enable)
5470 {
5471 	u32 data, orig;
5472 
5473 	orig = data = RREG32(RLC_PG_CNTL);
5474 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5475 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5476 	else
5477 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5478 	if (orig != data)
5479 		WREG32(RLC_PG_CNTL, data);
5480 }
5481 
5482 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5483 {
5484 	u32 data, orig;
5485 
5486 	orig = data = RREG32(RLC_PG_CNTL);
5487 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5488 		data &= ~DISABLE_CP_PG;
5489 	else
5490 		data |= DISABLE_CP_PG;
5491 	if (orig != data)
5492 		WREG32(RLC_PG_CNTL, data);
5493 }
5494 
5495 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5496 {
5497 	u32 data, orig;
5498 
5499 	orig = data = RREG32(RLC_PG_CNTL);
5500 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5501 		data &= ~DISABLE_GDS_PG;
5502 	else
5503 		data |= DISABLE_GDS_PG;
5504 	if (orig != data)
5505 		WREG32(RLC_PG_CNTL, data);
5506 }
5507 
5508 #define CP_ME_TABLE_SIZE    96
5509 #define CP_ME_TABLE_OFFSET  2048
5510 #define CP_MEC_TABLE_OFFSET 4096
5511 
5512 void cik_init_cp_pg_table(struct radeon_device *rdev)
5513 {
5514 	const __be32 *fw_data;
5515 	volatile u32 *dst_ptr;
5516 	int me, i, max_me = 4;
5517 	u32 bo_offset = 0;
5518 	u32 table_offset;
5519 
5520 	if (rdev->family == CHIP_KAVERI)
5521 		max_me = 5;
5522 
5523 	if (rdev->rlc.cp_table_ptr == NULL)
5524 		return;
5525 
5526 	/* write the cp table buffer */
5527 	dst_ptr = rdev->rlc.cp_table_ptr;
5528 	for (me = 0; me < max_me; me++) {
5529 		if (me == 0) {
5530 			fw_data = (const __be32 *)rdev->ce_fw->data;
5531 			table_offset = CP_ME_TABLE_OFFSET;
5532 		} else if (me == 1) {
5533 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5534 			table_offset = CP_ME_TABLE_OFFSET;
5535 		} else if (me == 2) {
5536 			fw_data = (const __be32 *)rdev->me_fw->data;
5537 			table_offset = CP_ME_TABLE_OFFSET;
5538 		} else {
5539 			fw_data = (const __be32 *)rdev->mec_fw->data;
5540 			table_offset = CP_MEC_TABLE_OFFSET;
5541 		}
5542 
5543 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5544 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5545 		}
5546 		bo_offset += CP_ME_TABLE_SIZE;
5547 	}
5548 }
5549 
5550 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5551 				bool enable)
5552 {
5553 	u32 data, orig;
5554 
5555 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5556 		orig = data = RREG32(RLC_PG_CNTL);
5557 		data |= GFX_PG_ENABLE;
5558 		if (orig != data)
5559 			WREG32(RLC_PG_CNTL, data);
5560 
5561 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5562 		data |= AUTO_PG_EN;
5563 		if (orig != data)
5564 			WREG32(RLC_AUTO_PG_CTRL, data);
5565 	} else {
5566 		orig = data = RREG32(RLC_PG_CNTL);
5567 		data &= ~GFX_PG_ENABLE;
5568 		if (orig != data)
5569 			WREG32(RLC_PG_CNTL, data);
5570 
5571 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5572 		data &= ~AUTO_PG_EN;
5573 		if (orig != data)
5574 			WREG32(RLC_AUTO_PG_CTRL, data);
5575 
5576 		data = RREG32(DB_RENDER_CONTROL);
5577 	}
5578 }
5579 
5580 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5581 {
5582 	u32 mask = 0, tmp, tmp1;
5583 	int i;
5584 
5585 	cik_select_se_sh(rdev, se, sh);
5586 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5587 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5588 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5589 
5590 	tmp &= 0xffff0000;
5591 
5592 	tmp |= tmp1;
5593 	tmp >>= 16;
5594 
5595 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5596 		mask <<= 1;
5597 		mask |= 1;
5598 	}
5599 
5600 	return (~tmp) & mask;
5601 }
5602 
5603 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5604 {
5605 	u32 i, j, k, active_cu_number = 0;
5606 	u32 mask, counter, cu_bitmap;
5607 	u32 tmp = 0;
5608 
5609 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5610 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5611 			mask = 1;
5612 			cu_bitmap = 0;
5613 			counter = 0;
5614 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5615 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5616 					if (counter < 2)
5617 						cu_bitmap |= mask;
5618 					counter ++;
5619 				}
5620 				mask <<= 1;
5621 			}
5622 
5623 			active_cu_number += counter;
5624 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5625 		}
5626 	}
5627 
5628 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5629 
5630 	tmp = RREG32(RLC_MAX_PG_CU);
5631 	tmp &= ~MAX_PU_CU_MASK;
5632 	tmp |= MAX_PU_CU(active_cu_number);
5633 	WREG32(RLC_MAX_PG_CU, tmp);
5634 }
5635 
5636 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5637 				       bool enable)
5638 {
5639 	u32 data, orig;
5640 
5641 	orig = data = RREG32(RLC_PG_CNTL);
5642 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5643 		data |= STATIC_PER_CU_PG_ENABLE;
5644 	else
5645 		data &= ~STATIC_PER_CU_PG_ENABLE;
5646 	if (orig != data)
5647 		WREG32(RLC_PG_CNTL, data);
5648 }
5649 
5650 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5651 					bool enable)
5652 {
5653 	u32 data, orig;
5654 
5655 	orig = data = RREG32(RLC_PG_CNTL);
5656 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5657 		data |= DYN_PER_CU_PG_ENABLE;
5658 	else
5659 		data &= ~DYN_PER_CU_PG_ENABLE;
5660 	if (orig != data)
5661 		WREG32(RLC_PG_CNTL, data);
5662 }
5663 
5664 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5665 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5666 
5667 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5668 {
5669 	u32 data, orig;
5670 	u32 i;
5671 
5672 	if (rdev->rlc.cs_data) {
5673 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5674 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5675 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5676 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5677 	} else {
5678 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5679 		for (i = 0; i < 3; i++)
5680 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5681 	}
5682 	if (rdev->rlc.reg_list) {
5683 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5684 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5685 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5686 	}
5687 
5688 	orig = data = RREG32(RLC_PG_CNTL);
5689 	data |= GFX_PG_SRC;
5690 	if (orig != data)
5691 		WREG32(RLC_PG_CNTL, data);
5692 
5693 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5694 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5695 
5696 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5697 	data &= ~IDLE_POLL_COUNT_MASK;
5698 	data |= IDLE_POLL_COUNT(0x60);
5699 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5700 
5701 	data = 0x10101010;
5702 	WREG32(RLC_PG_DELAY, data);
5703 
5704 	data = RREG32(RLC_PG_DELAY_2);
5705 	data &= ~0xff;
5706 	data |= 0x3;
5707 	WREG32(RLC_PG_DELAY_2, data);
5708 
5709 	data = RREG32(RLC_AUTO_PG_CTRL);
5710 	data &= ~GRBM_REG_SGIT_MASK;
5711 	data |= GRBM_REG_SGIT(0x700);
5712 	WREG32(RLC_AUTO_PG_CTRL, data);
5713 
5714 }
5715 
5716 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5717 {
5718 	cik_enable_gfx_cgpg(rdev, enable);
5719 	cik_enable_gfx_static_mgpg(rdev, enable);
5720 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5721 }
5722 
5723 u32 cik_get_csb_size(struct radeon_device *rdev)
5724 {
5725 	u32 count = 0;
5726 	const struct cs_section_def *sect = NULL;
5727 	const struct cs_extent_def *ext = NULL;
5728 
5729 	if (rdev->rlc.cs_data == NULL)
5730 		return 0;
5731 
5732 	/* begin clear state */
5733 	count += 2;
5734 	/* context control state */
5735 	count += 3;
5736 
5737 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5738 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5739 			if (sect->id == SECT_CONTEXT)
5740 				count += 2 + ext->reg_count;
5741 			else
5742 				return 0;
5743 		}
5744 	}
5745 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5746 	count += 4;
5747 	/* end clear state */
5748 	count += 2;
5749 	/* clear state */
5750 	count += 2;
5751 
5752 	return count;
5753 }
5754 
5755 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5756 {
5757 	u32 count = 0, i;
5758 	const struct cs_section_def *sect = NULL;
5759 	const struct cs_extent_def *ext = NULL;
5760 
5761 	if (rdev->rlc.cs_data == NULL)
5762 		return;
5763 	if (buffer == NULL)
5764 		return;
5765 
5766 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5767 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5768 
5769 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5770 	buffer[count++] = 0x80000000;
5771 	buffer[count++] = 0x80000000;
5772 
5773 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5774 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5775 			if (sect->id == SECT_CONTEXT) {
5776 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5777 				buffer[count++] = ext->reg_index - 0xa000;
5778 				for (i = 0; i < ext->reg_count; i++)
5779 					buffer[count++] = ext->extent[i];
5780 			} else {
5781 				return;
5782 			}
5783 		}
5784 	}
5785 
5786 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5787 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5788 	switch (rdev->family) {
5789 	case CHIP_BONAIRE:
5790 		buffer[count++] = 0x16000012;
5791 		buffer[count++] = 0x00000000;
5792 		break;
5793 	case CHIP_KAVERI:
5794 		buffer[count++] = 0x00000000; /* XXX */
5795 		buffer[count++] = 0x00000000;
5796 		break;
5797 	case CHIP_KABINI:
5798 		buffer[count++] = 0x00000000; /* XXX */
5799 		buffer[count++] = 0x00000000;
5800 		break;
5801 	default:
5802 		buffer[count++] = 0x00000000;
5803 		buffer[count++] = 0x00000000;
5804 		break;
5805 	}
5806 
5807 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5808 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5809 
5810 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5811 	buffer[count++] = 0;
5812 }
5813 
5814 static void cik_init_pg(struct radeon_device *rdev)
5815 {
5816 	if (rdev->pg_flags) {
5817 		cik_enable_sck_slowdown_on_pu(rdev, true);
5818 		cik_enable_sck_slowdown_on_pd(rdev, true);
5819 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5820 			cik_init_gfx_cgpg(rdev);
5821 			cik_enable_cp_pg(rdev, true);
5822 			cik_enable_gds_pg(rdev, true);
5823 		}
5824 		cik_init_ao_cu_mask(rdev);
5825 		cik_update_gfx_pg(rdev, true);
5826 	}
5827 }
5828 
5829 static void cik_fini_pg(struct radeon_device *rdev)
5830 {
5831 	if (rdev->pg_flags) {
5832 		cik_update_gfx_pg(rdev, false);
5833 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5834 			cik_enable_cp_pg(rdev, false);
5835 			cik_enable_gds_pg(rdev, false);
5836 		}
5837 	}
5838 }
5839 
5840 /*
5841  * Interrupts
5842  * Starting with r6xx, interrupts are handled via a ring buffer.
5843  * Ring buffers are areas of GPU accessible memory that the GPU
5844  * writes interrupt vectors into and the host reads vectors out of.
5845  * There is a rptr (read pointer) that determines where the
5846  * host is currently reading, and a wptr (write pointer)
5847  * which determines where the GPU has written.  When the
5848  * pointers are equal, the ring is idle.  When the GPU
5849  * writes vectors to the ring buffer, it increments the
5850  * wptr.  When there is an interrupt, the host then starts
5851  * fetching commands and processing them until the pointers are
5852  * equal again at which point it updates the rptr.
5853  */
5854 
5855 /**
5856  * cik_enable_interrupts - Enable the interrupt ring buffer
5857  *
5858  * @rdev: radeon_device pointer
5859  *
5860  * Enable the interrupt ring buffer (CIK).
5861  */
5862 static void cik_enable_interrupts(struct radeon_device *rdev)
5863 {
5864 	u32 ih_cntl = RREG32(IH_CNTL);
5865 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5866 
5867 	ih_cntl |= ENABLE_INTR;
5868 	ih_rb_cntl |= IH_RB_ENABLE;
5869 	WREG32(IH_CNTL, ih_cntl);
5870 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5871 	rdev->ih.enabled = true;
5872 }
5873 
5874 /**
5875  * cik_disable_interrupts - Disable the interrupt ring buffer
5876  *
5877  * @rdev: radeon_device pointer
5878  *
5879  * Disable the interrupt ring buffer (CIK).
5880  */
5881 static void cik_disable_interrupts(struct radeon_device *rdev)
5882 {
5883 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5884 	u32 ih_cntl = RREG32(IH_CNTL);
5885 
5886 	ih_rb_cntl &= ~IH_RB_ENABLE;
5887 	ih_cntl &= ~ENABLE_INTR;
5888 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5889 	WREG32(IH_CNTL, ih_cntl);
5890 	/* set rptr, wptr to 0 */
5891 	WREG32(IH_RB_RPTR, 0);
5892 	WREG32(IH_RB_WPTR, 0);
5893 	rdev->ih.enabled = false;
5894 	rdev->ih.rptr = 0;
5895 }
5896 
5897 /**
5898  * cik_disable_interrupt_state - Disable all interrupt sources
5899  *
5900  * @rdev: radeon_device pointer
5901  *
5902  * Clear all interrupt enable bits used by the driver (CIK).
5903  */
5904 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5905 {
5906 	u32 tmp;
5907 
5908 	/* gfx ring */
5909 	tmp = RREG32(CP_INT_CNTL_RING0) &
5910 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5911 	WREG32(CP_INT_CNTL_RING0, tmp);
5912 	/* sdma */
5913 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5914 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5915 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5916 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5917 	/* compute queues */
5918 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5919 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5920 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5921 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5922 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5923 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5924 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5925 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5926 	/* grbm */
5927 	WREG32(GRBM_INT_CNTL, 0);
5928 	/* vline/vblank, etc. */
5929 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5930 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5931 	if (rdev->num_crtc >= 4) {
5932 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5933 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5934 	}
5935 	if (rdev->num_crtc >= 6) {
5936 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5937 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5938 	}
5939 
5940 	/* dac hotplug */
5941 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5942 
5943 	/* digital hotplug */
5944 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5945 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5946 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5947 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5948 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5950 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5952 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5954 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5956 
5957 }
5958 
5959 /**
5960  * cik_irq_init - init and enable the interrupt ring
5961  *
5962  * @rdev: radeon_device pointer
5963  *
5964  * Allocate a ring buffer for the interrupt controller,
5965  * enable the RLC, disable interrupts, enable the IH
5966  * ring buffer and enable it (CIK).
5967  * Called at device load and reume.
5968  * Returns 0 for success, errors for failure.
5969  */
5970 static int cik_irq_init(struct radeon_device *rdev)
5971 {
5972 	int ret = 0;
5973 	int rb_bufsz;
5974 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5975 
5976 	/* allocate ring */
5977 	ret = r600_ih_ring_alloc(rdev);
5978 	if (ret)
5979 		return ret;
5980 
5981 	/* disable irqs */
5982 	cik_disable_interrupts(rdev);
5983 
5984 	/* init rlc */
5985 	ret = cik_rlc_resume(rdev);
5986 	if (ret) {
5987 		r600_ih_ring_fini(rdev);
5988 		return ret;
5989 	}
5990 
5991 	/* setup interrupt control */
5992 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5993 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5994 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5995 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5996 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5997 	 */
5998 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5999 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6000 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6001 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6002 
6003 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6004 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6005 
6006 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6007 		      IH_WPTR_OVERFLOW_CLEAR |
6008 		      (rb_bufsz << 1));
6009 
6010 	if (rdev->wb.enabled)
6011 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6012 
6013 	/* set the writeback address whether it's enabled or not */
6014 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6015 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6016 
6017 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6018 
6019 	/* set rptr, wptr to 0 */
6020 	WREG32(IH_RB_RPTR, 0);
6021 	WREG32(IH_RB_WPTR, 0);
6022 
6023 	/* Default settings for IH_CNTL (disabled at first) */
6024 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6025 	/* RPTR_REARM only works if msi's are enabled */
6026 	if (rdev->msi_enabled)
6027 		ih_cntl |= RPTR_REARM;
6028 	WREG32(IH_CNTL, ih_cntl);
6029 
6030 	/* force the active interrupt state to all disabled */
6031 	cik_disable_interrupt_state(rdev);
6032 
6033 	pci_enable_busmaster(rdev->dev);
6034 
6035 	/* enable irqs */
6036 	cik_enable_interrupts(rdev);
6037 
6038 	return ret;
6039 }
6040 
6041 /**
6042  * cik_irq_set - enable/disable interrupt sources
6043  *
6044  * @rdev: radeon_device pointer
6045  *
6046  * Enable interrupt sources on the GPU (vblanks, hpd,
6047  * etc.) (CIK).
6048  * Returns 0 for success, errors for failure.
6049  */
6050 int cik_irq_set(struct radeon_device *rdev)
6051 {
6052 	u32 cp_int_cntl;
6053 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6054 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6055 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6056 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6057 	u32 grbm_int_cntl = 0;
6058 	u32 dma_cntl, dma_cntl1;
6059 	u32 thermal_int;
6060 
6061 	if (!rdev->irq.installed) {
6062 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6063 		return -EINVAL;
6064 	}
6065 	/* don't enable anything if the ih is disabled */
6066 	if (!rdev->ih.enabled) {
6067 		cik_disable_interrupts(rdev);
6068 		/* force the active interrupt state to all disabled */
6069 		cik_disable_interrupt_state(rdev);
6070 		return 0;
6071 	}
6072 
6073 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6074 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6075 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6076 
6077 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6078 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6079 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6080 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6081 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6082 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6083 
6084 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6085 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6086 
6087 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6088 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6089 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6090 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6091 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6092 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6093 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095 
6096 	if (rdev->flags & RADEON_IS_IGP)
6097 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6098 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6099 	else
6100 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6101 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6102 
6103 	/* enable CP interrupts on all rings */
6104 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6105 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6106 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6107 	}
6108 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6109 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6110 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6111 		if (ring->me == 1) {
6112 			switch (ring->pipe) {
6113 			case 0:
6114 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6115 				break;
6116 			case 1:
6117 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6118 				break;
6119 			case 2:
6120 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6121 				break;
6122 			case 3:
6123 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6124 				break;
6125 			default:
6126 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6127 				break;
6128 			}
6129 		} else if (ring->me == 2) {
6130 			switch (ring->pipe) {
6131 			case 0:
6132 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6133 				break;
6134 			case 1:
6135 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6136 				break;
6137 			case 2:
6138 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6139 				break;
6140 			case 3:
6141 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6142 				break;
6143 			default:
6144 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6145 				break;
6146 			}
6147 		} else {
6148 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6149 		}
6150 	}
6151 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6152 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6153 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6154 		if (ring->me == 1) {
6155 			switch (ring->pipe) {
6156 			case 0:
6157 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6158 				break;
6159 			case 1:
6160 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6161 				break;
6162 			case 2:
6163 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6164 				break;
6165 			case 3:
6166 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6167 				break;
6168 			default:
6169 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6170 				break;
6171 			}
6172 		} else if (ring->me == 2) {
6173 			switch (ring->pipe) {
6174 			case 0:
6175 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6176 				break;
6177 			case 1:
6178 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6179 				break;
6180 			case 2:
6181 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6182 				break;
6183 			case 3:
6184 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6185 				break;
6186 			default:
6187 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6188 				break;
6189 			}
6190 		} else {
6191 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6192 		}
6193 	}
6194 
6195 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6196 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6197 		dma_cntl |= TRAP_ENABLE;
6198 	}
6199 
6200 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6201 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6202 		dma_cntl1 |= TRAP_ENABLE;
6203 	}
6204 
6205 	if (rdev->irq.crtc_vblank_int[0] ||
6206 	    atomic_read(&rdev->irq.pflip[0])) {
6207 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6208 		crtc1 |= VBLANK_INTERRUPT_MASK;
6209 	}
6210 	if (rdev->irq.crtc_vblank_int[1] ||
6211 	    atomic_read(&rdev->irq.pflip[1])) {
6212 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6213 		crtc2 |= VBLANK_INTERRUPT_MASK;
6214 	}
6215 	if (rdev->irq.crtc_vblank_int[2] ||
6216 	    atomic_read(&rdev->irq.pflip[2])) {
6217 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6218 		crtc3 |= VBLANK_INTERRUPT_MASK;
6219 	}
6220 	if (rdev->irq.crtc_vblank_int[3] ||
6221 	    atomic_read(&rdev->irq.pflip[3])) {
6222 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6223 		crtc4 |= VBLANK_INTERRUPT_MASK;
6224 	}
6225 	if (rdev->irq.crtc_vblank_int[4] ||
6226 	    atomic_read(&rdev->irq.pflip[4])) {
6227 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6228 		crtc5 |= VBLANK_INTERRUPT_MASK;
6229 	}
6230 	if (rdev->irq.crtc_vblank_int[5] ||
6231 	    atomic_read(&rdev->irq.pflip[5])) {
6232 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6233 		crtc6 |= VBLANK_INTERRUPT_MASK;
6234 	}
6235 	if (rdev->irq.hpd[0]) {
6236 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6237 		hpd1 |= DC_HPDx_INT_EN;
6238 	}
6239 	if (rdev->irq.hpd[1]) {
6240 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6241 		hpd2 |= DC_HPDx_INT_EN;
6242 	}
6243 	if (rdev->irq.hpd[2]) {
6244 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6245 		hpd3 |= DC_HPDx_INT_EN;
6246 	}
6247 	if (rdev->irq.hpd[3]) {
6248 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6249 		hpd4 |= DC_HPDx_INT_EN;
6250 	}
6251 	if (rdev->irq.hpd[4]) {
6252 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6253 		hpd5 |= DC_HPDx_INT_EN;
6254 	}
6255 	if (rdev->irq.hpd[5]) {
6256 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6257 		hpd6 |= DC_HPDx_INT_EN;
6258 	}
6259 
6260 	if (rdev->irq.dpm_thermal) {
6261 		DRM_DEBUG("dpm thermal\n");
6262 		if (rdev->flags & RADEON_IS_IGP)
6263 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6264 		else
6265 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6266 	}
6267 
6268 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6269 
6270 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6271 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6272 
6273 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6274 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6275 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6276 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6277 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6278 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6279 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6280 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6281 
6282 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6283 
6284 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6285 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6286 	if (rdev->num_crtc >= 4) {
6287 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6288 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6289 	}
6290 	if (rdev->num_crtc >= 6) {
6291 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6292 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6293 	}
6294 
6295 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6296 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6297 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6298 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6299 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6300 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6301 
6302 	if (rdev->flags & RADEON_IS_IGP)
6303 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6304 	else
6305 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6306 
6307 	return 0;
6308 }
6309 
6310 /**
6311  * cik_irq_ack - ack interrupt sources
6312  *
6313  * @rdev: radeon_device pointer
6314  *
6315  * Ack interrupt sources on the GPU (vblanks, hpd,
6316  * etc.) (CIK).  Certain interrupts sources are sw
6317  * generated and do not require an explicit ack.
6318  */
6319 static inline void cik_irq_ack(struct radeon_device *rdev)
6320 {
6321 	u32 tmp;
6322 
6323 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6324 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6325 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6326 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6327 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6328 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6329 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6330 
6331 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6332 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6333 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6334 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6335 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6336 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6337 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6338 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6339 
6340 	if (rdev->num_crtc >= 4) {
6341 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6342 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6343 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6344 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6345 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6346 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6347 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6348 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6349 	}
6350 
6351 	if (rdev->num_crtc >= 6) {
6352 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6353 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6354 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6355 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6356 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6357 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6358 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6359 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6360 	}
6361 
6362 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6363 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6364 		tmp |= DC_HPDx_INT_ACK;
6365 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6366 	}
6367 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6368 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6369 		tmp |= DC_HPDx_INT_ACK;
6370 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6371 	}
6372 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6373 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6374 		tmp |= DC_HPDx_INT_ACK;
6375 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6376 	}
6377 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6378 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6379 		tmp |= DC_HPDx_INT_ACK;
6380 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6381 	}
6382 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6383 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6384 		tmp |= DC_HPDx_INT_ACK;
6385 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6386 	}
6387 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6388 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6389 		tmp |= DC_HPDx_INT_ACK;
6390 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6391 	}
6392 }
6393 
6394 /**
6395  * cik_irq_disable - disable interrupts
6396  *
6397  * @rdev: radeon_device pointer
6398  *
6399  * Disable interrupts on the hw (CIK).
6400  */
6401 static void cik_irq_disable(struct radeon_device *rdev)
6402 {
6403 	cik_disable_interrupts(rdev);
6404 	/* Wait and acknowledge irq */
6405 	mdelay(1);
6406 	cik_irq_ack(rdev);
6407 	cik_disable_interrupt_state(rdev);
6408 }
6409 
6410 /**
6411  * cik_irq_disable - disable interrupts for suspend
6412  *
6413  * @rdev: radeon_device pointer
6414  *
6415  * Disable interrupts and stop the RLC (CIK).
6416  * Used for suspend.
6417  */
6418 static void cik_irq_suspend(struct radeon_device *rdev)
6419 {
6420 	cik_irq_disable(rdev);
6421 	cik_rlc_stop(rdev);
6422 }
6423 
6424 /**
6425  * cik_irq_fini - tear down interrupt support
6426  *
6427  * @rdev: radeon_device pointer
6428  *
6429  * Disable interrupts on the hw and free the IH ring
6430  * buffer (CIK).
6431  * Used for driver unload.
6432  */
6433 static void cik_irq_fini(struct radeon_device *rdev)
6434 {
6435 	cik_irq_suspend(rdev);
6436 	r600_ih_ring_fini(rdev);
6437 }
6438 
6439 /**
6440  * cik_get_ih_wptr - get the IH ring buffer wptr
6441  *
6442  * @rdev: radeon_device pointer
6443  *
6444  * Get the IH ring buffer wptr from either the register
6445  * or the writeback memory buffer (CIK).  Also check for
6446  * ring buffer overflow and deal with it.
6447  * Used by cik_irq_process().
6448  * Returns the value of the wptr.
6449  */
6450 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6451 {
6452 	u32 wptr, tmp;
6453 
6454 	if (rdev->wb.enabled)
6455 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6456 	else
6457 		wptr = RREG32(IH_RB_WPTR);
6458 
6459 	if (wptr & RB_OVERFLOW) {
6460 		/* When a ring buffer overflow happen start parsing interrupt
6461 		 * from the last not overwritten vector (wptr + 16). Hopefully
6462 		 * this should allow us to catchup.
6463 		 */
6464 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6465 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6466 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6467 		tmp = RREG32(IH_RB_CNTL);
6468 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6469 		WREG32(IH_RB_CNTL, tmp);
6470 	}
6471 	return (wptr & rdev->ih.ptr_mask);
6472 }
6473 
6474 /*        CIK IV Ring
6475  * Each IV ring entry is 128 bits:
6476  * [7:0]    - interrupt source id
6477  * [31:8]   - reserved
6478  * [59:32]  - interrupt source data
6479  * [63:60]  - reserved
6480  * [71:64]  - RINGID
6481  *            CP:
6482  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6483  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6484  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6485  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6486  *            PIPE_ID - ME0 0=3D
6487  *                    - ME1&2 compute dispatcher (4 pipes each)
6488  *            SDMA:
6489  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6490  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6491  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6492  * [79:72]  - VMID
6493  * [95:80]  - PASID
6494  * [127:96] - reserved
6495  */
6496 /**
6497  * cik_irq_process - interrupt handler
6498  *
6499  * @rdev: radeon_device pointer
6500  *
6501  * Interrupt hander (CIK).  Walk the IH ring,
6502  * ack interrupts and schedule work to handle
6503  * interrupt events.
6504  * Returns irq process return code.
6505  */
6506 irqreturn_t cik_irq_process(struct radeon_device *rdev)
6507 {
6508 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6509 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6510 	u32 wptr;
6511 	u32 rptr;
6512 	u32 src_id, src_data, ring_id;
6513 	u8 me_id, pipe_id, queue_id;
6514 	u32 ring_index;
6515 	bool queue_hotplug = false;
6516 	bool queue_reset = false;
6517 	u32 addr, status, mc_client;
6518 	bool queue_thermal = false;
6519 
6520 	if (!rdev->ih.enabled || rdev->shutdown)
6521 		return IRQ_NONE;
6522 
6523 	wptr = cik_get_ih_wptr(rdev);
6524 
6525 restart_ih:
6526 	/* is somebody else already processing irqs? */
6527 	if (atomic_xchg(&rdev->ih.lock, 1))
6528 		return IRQ_NONE;
6529 
6530 	rptr = rdev->ih.rptr;
6531 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6532 
6533 	/* Order reading of wptr vs. reading of IH ring data */
6534 	rmb();
6535 
6536 	/* display interrupts */
6537 	cik_irq_ack(rdev);
6538 
6539 	while (rptr != wptr) {
6540 		/* wptr/rptr are in bytes! */
6541 		ring_index = rptr / 4;
6542 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6543 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6544 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6545 
6546 		switch (src_id) {
6547 		case 1: /* D1 vblank/vline */
6548 			switch (src_data) {
6549 			case 0: /* D1 vblank */
6550 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6551 					if (rdev->irq.crtc_vblank_int[0]) {
6552 						drm_handle_vblank(rdev->ddev, 0);
6553 						rdev->pm.vblank_sync = true;
6554 						wake_up(&rdev->irq.vblank_queue);
6555 					}
6556 					if (atomic_read(&rdev->irq.pflip[0]))
6557 						radeon_crtc_handle_flip(rdev, 0);
6558 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6559 					DRM_DEBUG("IH: D1 vblank\n");
6560 				}
6561 				break;
6562 			case 1: /* D1 vline */
6563 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6564 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6565 					DRM_DEBUG("IH: D1 vline\n");
6566 				}
6567 				break;
6568 			default:
6569 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6570 				break;
6571 			}
6572 			break;
6573 		case 2: /* D2 vblank/vline */
6574 			switch (src_data) {
6575 			case 0: /* D2 vblank */
6576 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6577 					if (rdev->irq.crtc_vblank_int[1]) {
6578 						drm_handle_vblank(rdev->ddev, 1);
6579 						rdev->pm.vblank_sync = true;
6580 						wake_up(&rdev->irq.vblank_queue);
6581 					}
6582 					if (atomic_read(&rdev->irq.pflip[1]))
6583 						radeon_crtc_handle_flip(rdev, 1);
6584 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6585 					DRM_DEBUG("IH: D2 vblank\n");
6586 				}
6587 				break;
6588 			case 1: /* D2 vline */
6589 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6590 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6591 					DRM_DEBUG("IH: D2 vline\n");
6592 				}
6593 				break;
6594 			default:
6595 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6596 				break;
6597 			}
6598 			break;
6599 		case 3: /* D3 vblank/vline */
6600 			switch (src_data) {
6601 			case 0: /* D3 vblank */
6602 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6603 					if (rdev->irq.crtc_vblank_int[2]) {
6604 						drm_handle_vblank(rdev->ddev, 2);
6605 						rdev->pm.vblank_sync = true;
6606 						wake_up(&rdev->irq.vblank_queue);
6607 					}
6608 					if (atomic_read(&rdev->irq.pflip[2]))
6609 						radeon_crtc_handle_flip(rdev, 2);
6610 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6611 					DRM_DEBUG("IH: D3 vblank\n");
6612 				}
6613 				break;
6614 			case 1: /* D3 vline */
6615 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6616 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6617 					DRM_DEBUG("IH: D3 vline\n");
6618 				}
6619 				break;
6620 			default:
6621 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6622 				break;
6623 			}
6624 			break;
6625 		case 4: /* D4 vblank/vline */
6626 			switch (src_data) {
6627 			case 0: /* D4 vblank */
6628 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6629 					if (rdev->irq.crtc_vblank_int[3]) {
6630 						drm_handle_vblank(rdev->ddev, 3);
6631 						rdev->pm.vblank_sync = true;
6632 						wake_up(&rdev->irq.vblank_queue);
6633 					}
6634 					if (atomic_read(&rdev->irq.pflip[3]))
6635 						radeon_crtc_handle_flip(rdev, 3);
6636 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6637 					DRM_DEBUG("IH: D4 vblank\n");
6638 				}
6639 				break;
6640 			case 1: /* D4 vline */
6641 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6642 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6643 					DRM_DEBUG("IH: D4 vline\n");
6644 				}
6645 				break;
6646 			default:
6647 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6648 				break;
6649 			}
6650 			break;
6651 		case 5: /* D5 vblank/vline */
6652 			switch (src_data) {
6653 			case 0: /* D5 vblank */
6654 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6655 					if (rdev->irq.crtc_vblank_int[4]) {
6656 						drm_handle_vblank(rdev->ddev, 4);
6657 						rdev->pm.vblank_sync = true;
6658 						wake_up(&rdev->irq.vblank_queue);
6659 					}
6660 					if (atomic_read(&rdev->irq.pflip[4]))
6661 						radeon_crtc_handle_flip(rdev, 4);
6662 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6663 					DRM_DEBUG("IH: D5 vblank\n");
6664 				}
6665 				break;
6666 			case 1: /* D5 vline */
6667 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6668 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6669 					DRM_DEBUG("IH: D5 vline\n");
6670 				}
6671 				break;
6672 			default:
6673 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6674 				break;
6675 			}
6676 			break;
6677 		case 6: /* D6 vblank/vline */
6678 			switch (src_data) {
6679 			case 0: /* D6 vblank */
6680 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6681 					if (rdev->irq.crtc_vblank_int[5]) {
6682 						drm_handle_vblank(rdev->ddev, 5);
6683 						rdev->pm.vblank_sync = true;
6684 						wake_up(&rdev->irq.vblank_queue);
6685 					}
6686 					if (atomic_read(&rdev->irq.pflip[5]))
6687 						radeon_crtc_handle_flip(rdev, 5);
6688 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6689 					DRM_DEBUG("IH: D6 vblank\n");
6690 				}
6691 				break;
6692 			case 1: /* D6 vline */
6693 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6694 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6695 					DRM_DEBUG("IH: D6 vline\n");
6696 				}
6697 				break;
6698 			default:
6699 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6700 				break;
6701 			}
6702 			break;
6703 		case 42: /* HPD hotplug */
6704 			switch (src_data) {
6705 			case 0:
6706 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6707 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6708 					queue_hotplug = true;
6709 					DRM_DEBUG("IH: HPD1\n");
6710 				}
6711 				break;
6712 			case 1:
6713 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6714 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6715 					queue_hotplug = true;
6716 					DRM_DEBUG("IH: HPD2\n");
6717 				}
6718 				break;
6719 			case 2:
6720 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6721 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6722 					queue_hotplug = true;
6723 					DRM_DEBUG("IH: HPD3\n");
6724 				}
6725 				break;
6726 			case 3:
6727 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6728 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6729 					queue_hotplug = true;
6730 					DRM_DEBUG("IH: HPD4\n");
6731 				}
6732 				break;
6733 			case 4:
6734 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6735 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6736 					queue_hotplug = true;
6737 					DRM_DEBUG("IH: HPD5\n");
6738 				}
6739 				break;
6740 			case 5:
6741 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6742 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6743 					queue_hotplug = true;
6744 					DRM_DEBUG("IH: HPD6\n");
6745 				}
6746 				break;
6747 			default:
6748 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6749 				break;
6750 			}
6751 			break;
6752 		case 124: /* UVD */
6753 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6754 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6755 			break;
6756 		case 146:
6757 		case 147:
6758 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6759 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6760 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6761 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6762 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6763 				addr);
6764 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6765 				status);
6766 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6767 			/* reset addr and status */
6768 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6769 			break;
6770 		case 176: /* GFX RB CP_INT */
6771 		case 177: /* GFX IB CP_INT */
6772 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6773 			break;
6774 		case 181: /* CP EOP event */
6775 			DRM_DEBUG("IH: CP EOP\n");
6776 			/* XXX check the bitfield order! */
6777 			me_id = (ring_id & 0x60) >> 5;
6778 			pipe_id = (ring_id & 0x18) >> 3;
6779 			queue_id = (ring_id & 0x7) >> 0;
6780 			switch (me_id) {
6781 			case 0:
6782 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6783 				break;
6784 			case 1:
6785 			case 2:
6786 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6787 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6788 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6789 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6790 				break;
6791 			}
6792 			break;
6793 		case 184: /* CP Privileged reg access */
6794 			DRM_ERROR("Illegal register access in command stream\n");
6795 			/* XXX check the bitfield order! */
6796 			me_id = (ring_id & 0x60) >> 5;
6797 			pipe_id = (ring_id & 0x18) >> 3;
6798 			queue_id = (ring_id & 0x7) >> 0;
6799 			switch (me_id) {
6800 			case 0:
6801 				/* This results in a full GPU reset, but all we need to do is soft
6802 				 * reset the CP for gfx
6803 				 */
6804 				queue_reset = true;
6805 				break;
6806 			case 1:
6807 				/* XXX compute */
6808 				queue_reset = true;
6809 				break;
6810 			case 2:
6811 				/* XXX compute */
6812 				queue_reset = true;
6813 				break;
6814 			}
6815 			break;
6816 		case 185: /* CP Privileged inst */
6817 			DRM_ERROR("Illegal instruction in command stream\n");
6818 			/* XXX check the bitfield order! */
6819 			me_id = (ring_id & 0x60) >> 5;
6820 			pipe_id = (ring_id & 0x18) >> 3;
6821 			queue_id = (ring_id & 0x7) >> 0;
6822 			switch (me_id) {
6823 			case 0:
6824 				/* This results in a full GPU reset, but all we need to do is soft
6825 				 * reset the CP for gfx
6826 				 */
6827 				queue_reset = true;
6828 				break;
6829 			case 1:
6830 				/* XXX compute */
6831 				queue_reset = true;
6832 				break;
6833 			case 2:
6834 				/* XXX compute */
6835 				queue_reset = true;
6836 				break;
6837 			}
6838 			break;
6839 		case 224: /* SDMA trap event */
6840 			/* XXX check the bitfield order! */
6841 			me_id = (ring_id & 0x3) >> 0;
6842 			queue_id = (ring_id & 0xc) >> 2;
6843 			DRM_DEBUG("IH: SDMA trap\n");
6844 			switch (me_id) {
6845 			case 0:
6846 				switch (queue_id) {
6847 				case 0:
6848 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6849 					break;
6850 				case 1:
6851 					/* XXX compute */
6852 					break;
6853 				case 2:
6854 					/* XXX compute */
6855 					break;
6856 				}
6857 				break;
6858 			case 1:
6859 				switch (queue_id) {
6860 				case 0:
6861 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6862 					break;
6863 				case 1:
6864 					/* XXX compute */
6865 					break;
6866 				case 2:
6867 					/* XXX compute */
6868 					break;
6869 				}
6870 				break;
6871 			}
6872 			break;
6873 		case 230: /* thermal low to high */
6874 			DRM_DEBUG("IH: thermal low to high\n");
6875 			rdev->pm.dpm.thermal.high_to_low = false;
6876 			queue_thermal = true;
6877 			break;
6878 		case 231: /* thermal high to low */
6879 			DRM_DEBUG("IH: thermal high to low\n");
6880 			rdev->pm.dpm.thermal.high_to_low = true;
6881 			queue_thermal = true;
6882 			break;
6883 		case 233: /* GUI IDLE */
6884 			DRM_DEBUG("IH: GUI idle\n");
6885 			break;
6886 		case 241: /* SDMA Privileged inst */
6887 		case 247: /* SDMA Privileged inst */
6888 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6889 			/* XXX check the bitfield order! */
6890 			me_id = (ring_id & 0x3) >> 0;
6891 			queue_id = (ring_id & 0xc) >> 2;
6892 			switch (me_id) {
6893 			case 0:
6894 				switch (queue_id) {
6895 				case 0:
6896 					queue_reset = true;
6897 					break;
6898 				case 1:
6899 					/* XXX compute */
6900 					queue_reset = true;
6901 					break;
6902 				case 2:
6903 					/* XXX compute */
6904 					queue_reset = true;
6905 					break;
6906 				}
6907 				break;
6908 			case 1:
6909 				switch (queue_id) {
6910 				case 0:
6911 					queue_reset = true;
6912 					break;
6913 				case 1:
6914 					/* XXX compute */
6915 					queue_reset = true;
6916 					break;
6917 				case 2:
6918 					/* XXX compute */
6919 					queue_reset = true;
6920 					break;
6921 				}
6922 				break;
6923 			}
6924 			break;
6925 		default:
6926 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6927 			break;
6928 		}
6929 
6930 		/* wptr/rptr are in bytes! */
6931 		rptr += 16;
6932 		rptr &= rdev->ih.ptr_mask;
6933 	}
6934 	if (queue_hotplug)
6935 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6936 	if (queue_reset)
6937 		taskqueue_enqueue(rdev->tq, &rdev->reset_work);
6938 	if (queue_thermal)
6939 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6940 	rdev->ih.rptr = rptr;
6941 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6942 	atomic_set(&rdev->ih.lock, 0);
6943 
6944 	/* make sure wptr hasn't changed while processing */
6945 	wptr = cik_get_ih_wptr(rdev);
6946 	if (wptr != rptr)
6947 		goto restart_ih;
6948 
6949 	return IRQ_HANDLED;
6950 }
6951 
6952 /*
6953  * startup/shutdown callbacks
6954  */
6955 /**
6956  * cik_startup - program the asic to a functional state
6957  *
6958  * @rdev: radeon_device pointer
6959  *
6960  * Programs the asic to a functional state (CIK).
6961  * Called by cik_init() and cik_resume().
6962  * Returns 0 for success, error for failure.
6963  */
6964 static int cik_startup(struct radeon_device *rdev)
6965 {
6966 	struct radeon_ring *ring;
6967 	int r;
6968 
6969 	/* enable pcie gen2/3 link */
6970 	cik_pcie_gen3_enable(rdev);
6971 	/* enable aspm */
6972 	cik_program_aspm(rdev);
6973 
6974 	/* scratch needs to be initialized before MC */
6975 	r = r600_vram_scratch_init(rdev);
6976 	if (r)
6977 		return r;
6978 
6979 	cik_mc_program(rdev);
6980 
6981 	if (rdev->flags & RADEON_IS_IGP) {
6982 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6983 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6984 			r = cik_init_microcode(rdev);
6985 			if (r) {
6986 				DRM_ERROR("Failed to load firmware!\n");
6987 				return r;
6988 			}
6989 		}
6990 	} else {
6991 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6992 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6993 		    !rdev->mc_fw) {
6994 			r = cik_init_microcode(rdev);
6995 			if (r) {
6996 				DRM_ERROR("Failed to load firmware!\n");
6997 				return r;
6998 			}
6999 		}
7000 
7001 		r = ci_mc_load_microcode(rdev);
7002 		if (r) {
7003 			DRM_ERROR("Failed to load MC firmware!\n");
7004 			return r;
7005 		}
7006 	}
7007 
7008 	r = cik_pcie_gart_enable(rdev);
7009 	if (r)
7010 		return r;
7011 	cik_gpu_init(rdev);
7012 
7013 	/* allocate rlc buffers */
7014 	if (rdev->flags & RADEON_IS_IGP) {
7015 		if (rdev->family == CHIP_KAVERI) {
7016 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7017 			rdev->rlc.reg_list_size =
7018 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7019 		} else {
7020 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7021 			rdev->rlc.reg_list_size =
7022 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7023 		}
7024 	}
7025 	rdev->rlc.cs_data = ci_cs_data;
7026 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7027 	r = sumo_rlc_init(rdev);
7028 	if (r) {
7029 		DRM_ERROR("Failed to init rlc BOs!\n");
7030 		return r;
7031 	}
7032 
7033 	/* allocate wb buffer */
7034 	r = radeon_wb_init(rdev);
7035 	if (r)
7036 		return r;
7037 
7038 	/* allocate mec buffers */
7039 	r = cik_mec_init(rdev);
7040 	if (r) {
7041 		DRM_ERROR("Failed to init MEC BOs!\n");
7042 		return r;
7043 	}
7044 
7045 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7046 	if (r) {
7047 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7048 		return r;
7049 	}
7050 
7051 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7052 	if (r) {
7053 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7054 		return r;
7055 	}
7056 
7057 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7058 	if (r) {
7059 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7060 		return r;
7061 	}
7062 
7063 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7064 	if (r) {
7065 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7066 		return r;
7067 	}
7068 
7069 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7070 	if (r) {
7071 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7072 		return r;
7073 	}
7074 
7075 	r = radeon_uvd_resume(rdev);
7076 	if (!r) {
7077 		r = uvd_v4_2_resume(rdev);
7078 		if (!r) {
7079 			r = radeon_fence_driver_start_ring(rdev,
7080 							   R600_RING_TYPE_UVD_INDEX);
7081 			if (r)
7082 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7083 		}
7084 	}
7085 	if (r)
7086 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7087 
7088 	/* Enable IRQ */
7089 	if (!rdev->irq.installed) {
7090 		r = radeon_irq_kms_init(rdev);
7091 		if (r)
7092 			return r;
7093 	}
7094 
7095 	r = cik_irq_init(rdev);
7096 	if (r) {
7097 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7098 		radeon_irq_kms_fini(rdev);
7099 		return r;
7100 	}
7101 	cik_irq_set(rdev);
7102 
7103 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7104 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7105 			     CP_RB0_RPTR, CP_RB0_WPTR,
7106 			     RADEON_CP_PACKET2);
7107 	if (r)
7108 		return r;
7109 
7110 	/* set up the compute queues */
7111 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7112 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7113 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7114 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7115 			     PACKET3(PACKET3_NOP, 0x3FFF));
7116 	if (r)
7117 		return r;
7118 	ring->me = 1; /* first MEC */
7119 	ring->pipe = 0; /* first pipe */
7120 	ring->queue = 0; /* first queue */
7121 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7122 
7123 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7124 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7126 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7127 			     PACKET3(PACKET3_NOP, 0x3FFF));
7128 	if (r)
7129 		return r;
7130 	/* dGPU only have 1 MEC */
7131 	ring->me = 1; /* first MEC */
7132 	ring->pipe = 0; /* first pipe */
7133 	ring->queue = 1; /* second queue */
7134 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7135 
7136 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7137 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7138 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7139 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7140 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7141 	if (r)
7142 		return r;
7143 
7144 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7145 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7146 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7147 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7148 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7149 	if (r)
7150 		return r;
7151 
7152 	r = cik_cp_resume(rdev);
7153 	if (r)
7154 		return r;
7155 
7156 	r = cik_sdma_resume(rdev);
7157 	if (r)
7158 		return r;
7159 
7160 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7161 	if (ring->ring_size) {
7162 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7163 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7164 				     RADEON_CP_PACKET2);
7165 		if (!r)
7166 			r = uvd_v1_0_init(rdev);
7167 		if (r)
7168 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7169 	}
7170 
7171 	r = radeon_ib_pool_init(rdev);
7172 	if (r) {
7173 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7174 		return r;
7175 	}
7176 
7177 	r = radeon_vm_manager_init(rdev);
7178 	if (r) {
7179 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7180 		return r;
7181 	}
7182 
7183 	r = dce6_audio_init(rdev);
7184 	if (r)
7185 		return r;
7186 
7187 	return 0;
7188 }
7189 
7190 /**
7191  * cik_resume - resume the asic to a functional state
7192  *
7193  * @rdev: radeon_device pointer
7194  *
7195  * Programs the asic to a functional state (CIK).
7196  * Called at resume.
7197  * Returns 0 for success, error for failure.
7198  */
7199 int cik_resume(struct radeon_device *rdev)
7200 {
7201 	int r;
7202 
7203 	/* post card */
7204 	atom_asic_init(rdev->mode_info.atom_context);
7205 
7206 	/* init golden registers */
7207 	cik_init_golden_registers(rdev);
7208 
7209 	rdev->accel_working = true;
7210 	r = cik_startup(rdev);
7211 	if (r) {
7212 		DRM_ERROR("cik startup failed on resume\n");
7213 		rdev->accel_working = false;
7214 		return r;
7215 	}
7216 
7217 	return r;
7218 
7219 }
7220 
7221 /**
7222  * cik_suspend - suspend the asic
7223  *
7224  * @rdev: radeon_device pointer
7225  *
7226  * Bring the chip into a state suitable for suspend (CIK).
7227  * Called at suspend.
7228  * Returns 0 for success.
7229  */
7230 int cik_suspend(struct radeon_device *rdev)
7231 {
7232 	dce6_audio_fini(rdev);
7233 	radeon_vm_manager_fini(rdev);
7234 	cik_cp_enable(rdev, false);
7235 	cik_sdma_enable(rdev, false);
7236 	uvd_v1_0_fini(rdev);
7237 	radeon_uvd_suspend(rdev);
7238 	cik_fini_pg(rdev);
7239 	cik_fini_cg(rdev);
7240 	cik_irq_suspend(rdev);
7241 	radeon_wb_disable(rdev);
7242 	cik_pcie_gart_disable(rdev);
7243 	return 0;
7244 }
7245 
7246 /* Plan is to move initialization in that function and use
7247  * helper function so that radeon_device_init pretty much
7248  * do nothing more than calling asic specific function. This
7249  * should also allow to remove a bunch of callback function
7250  * like vram_info.
7251  */
7252 /**
7253  * cik_init - asic specific driver and hw init
7254  *
7255  * @rdev: radeon_device pointer
7256  *
7257  * Setup asic specific driver variables and program the hw
7258  * to a functional state (CIK).
7259  * Called at driver startup.
7260  * Returns 0 for success, errors for failure.
7261  */
7262 int cik_init(struct radeon_device *rdev)
7263 {
7264 	struct radeon_ring *ring;
7265 	int r;
7266 
7267 	/* Read BIOS */
7268 	if (!radeon_get_bios(rdev)) {
7269 		if (ASIC_IS_AVIVO(rdev))
7270 			return -EINVAL;
7271 	}
7272 	/* Must be an ATOMBIOS */
7273 	if (!rdev->is_atom_bios) {
7274 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7275 		return -EINVAL;
7276 	}
7277 	r = radeon_atombios_init(rdev);
7278 	if (r)
7279 		return r;
7280 
7281 	/* Post card if necessary */
7282 	if (!radeon_card_posted(rdev)) {
7283 		if (!rdev->bios) {
7284 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7285 			return -EINVAL;
7286 		}
7287 		DRM_INFO("GPU not posted. posting now...\n");
7288 		atom_asic_init(rdev->mode_info.atom_context);
7289 	}
7290 	/* init golden registers */
7291 	cik_init_golden_registers(rdev);
7292 	/* Initialize scratch registers */
7293 	cik_scratch_init(rdev);
7294 	/* Initialize surface registers */
7295 	radeon_surface_init(rdev);
7296 	/* Initialize clocks */
7297 	radeon_get_clock_info(rdev->ddev);
7298 
7299 	/* Fence driver */
7300 	r = radeon_fence_driver_init(rdev);
7301 	if (r)
7302 		return r;
7303 
7304 	/* initialize memory controller */
7305 	r = cik_mc_init(rdev);
7306 	if (r)
7307 		return r;
7308 	/* Memory manager */
7309 	r = radeon_bo_init(rdev);
7310 	if (r)
7311 		return r;
7312 
7313 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7314 	ring->ring_obj = NULL;
7315 	r600_ring_init(rdev, ring, 1024 * 1024);
7316 
7317 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7318 	ring->ring_obj = NULL;
7319 	r600_ring_init(rdev, ring, 1024 * 1024);
7320 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7321 	if (r)
7322 		return r;
7323 
7324 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7325 	ring->ring_obj = NULL;
7326 	r600_ring_init(rdev, ring, 1024 * 1024);
7327 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7328 	if (r)
7329 		return r;
7330 
7331 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7332 	ring->ring_obj = NULL;
7333 	r600_ring_init(rdev, ring, 256 * 1024);
7334 
7335 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7336 	ring->ring_obj = NULL;
7337 	r600_ring_init(rdev, ring, 256 * 1024);
7338 
7339 	r = radeon_uvd_init(rdev);
7340 	if (!r) {
7341 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7342 		ring->ring_obj = NULL;
7343 		r600_ring_init(rdev, ring, 4096);
7344 	}
7345 
7346 	rdev->ih.ring_obj = NULL;
7347 	r600_ih_ring_init(rdev, 64 * 1024);
7348 
7349 	r = r600_pcie_gart_init(rdev);
7350 	if (r)
7351 		return r;
7352 
7353 	rdev->accel_working = true;
7354 	r = cik_startup(rdev);
7355 	if (r) {
7356 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7357 		cik_cp_fini(rdev);
7358 		cik_sdma_fini(rdev);
7359 		cik_irq_fini(rdev);
7360 		sumo_rlc_fini(rdev);
7361 		cik_mec_fini(rdev);
7362 		radeon_wb_fini(rdev);
7363 		radeon_ib_pool_fini(rdev);
7364 		radeon_vm_manager_fini(rdev);
7365 		radeon_irq_kms_fini(rdev);
7366 		cik_pcie_gart_fini(rdev);
7367 		rdev->accel_working = false;
7368 	}
7369 
7370 	/* Don't start up if the MC ucode is missing.
7371 	 * The default clocks and voltages before the MC ucode
7372 	 * is loaded are not suffient for advanced operations.
7373 	 */
7374 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7375 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7376 		return -EINVAL;
7377 	}
7378 
7379 	return 0;
7380 }
7381 
7382 /**
7383  * cik_fini - asic specific driver and hw fini
7384  *
7385  * @rdev: radeon_device pointer
7386  *
7387  * Tear down the asic specific driver variables and program the hw
7388  * to an idle state (CIK).
7389  * Called at driver unload.
7390  */
7391 void cik_fini(struct radeon_device *rdev)
7392 {
7393 	cik_cp_fini(rdev);
7394 	cik_sdma_fini(rdev);
7395 	cik_fini_pg(rdev);
7396 	cik_fini_cg(rdev);
7397 	cik_irq_fini(rdev);
7398 	sumo_rlc_fini(rdev);
7399 	cik_mec_fini(rdev);
7400 	radeon_wb_fini(rdev);
7401 	radeon_vm_manager_fini(rdev);
7402 	radeon_ib_pool_fini(rdev);
7403 	radeon_irq_kms_fini(rdev);
7404 	uvd_v1_0_fini(rdev);
7405 	radeon_uvd_fini(rdev);
7406 	cik_pcie_gart_fini(rdev);
7407 	r600_vram_scratch_fini(rdev);
7408 	radeon_gem_fini(rdev);
7409 	radeon_fence_driver_fini(rdev);
7410 	radeon_bo_fini(rdev);
7411 	radeon_atombios_fini(rdev);
7412 	kfree(rdev->bios);
7413 	rdev->bios = NULL;
7414 }
7415 
7416 /* display watermark setup */
7417 /**
7418  * dce8_line_buffer_adjust - Set up the line buffer
7419  *
7420  * @rdev: radeon_device pointer
7421  * @radeon_crtc: the selected display controller
7422  * @mode: the current display mode on the selected display
7423  * controller
7424  *
7425  * Setup up the line buffer allocation for
7426  * the selected display controller (CIK).
7427  * Returns the line buffer size in pixels.
7428  */
7429 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7430 				   struct radeon_crtc *radeon_crtc,
7431 				   struct drm_display_mode *mode)
7432 {
7433 	u32 tmp, buffer_alloc, i;
7434 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7435 	/*
7436 	 * Line Buffer Setup
7437 	 * There are 6 line buffers, one for each display controllers.
7438 	 * There are 3 partitions per LB. Select the number of partitions
7439 	 * to enable based on the display width.  For display widths larger
7440 	 * than 4096, you need use to use 2 display controllers and combine
7441 	 * them using the stereo blender.
7442 	 */
7443 	if (radeon_crtc->base.enabled && mode) {
7444 		if (mode->crtc_hdisplay < 1920) {
7445 			tmp = 1;
7446 			buffer_alloc = 2;
7447 		} else if (mode->crtc_hdisplay < 2560) {
7448 			tmp = 2;
7449 			buffer_alloc = 2;
7450 		} else if (mode->crtc_hdisplay < 4096) {
7451 			tmp = 0;
7452 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7453 		} else {
7454 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7455 			tmp = 0;
7456 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7457 		}
7458 	} else {
7459 		tmp = 1;
7460 		buffer_alloc = 0;
7461 	}
7462 
7463 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7464 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7465 
7466 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7467 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7468 	for (i = 0; i < rdev->usec_timeout; i++) {
7469 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7470 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7471 			break;
7472 		udelay(1);
7473 	}
7474 
7475 	if (radeon_crtc->base.enabled && mode) {
7476 		switch (tmp) {
7477 		case 0:
7478 		default:
7479 			return 4096 * 2;
7480 		case 1:
7481 			return 1920 * 2;
7482 		case 2:
7483 			return 2560 * 2;
7484 		}
7485 	}
7486 
7487 	/* controller not enabled, so no lb used */
7488 	return 0;
7489 }
7490 
7491 /**
7492  * cik_get_number_of_dram_channels - get the number of dram channels
7493  *
7494  * @rdev: radeon_device pointer
7495  *
7496  * Look up the number of video ram channels (CIK).
7497  * Used for display watermark bandwidth calculations
7498  * Returns the number of dram channels
7499  */
7500 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7501 {
7502 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7503 
7504 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7505 	case 0:
7506 	default:
7507 		return 1;
7508 	case 1:
7509 		return 2;
7510 	case 2:
7511 		return 4;
7512 	case 3:
7513 		return 8;
7514 	case 4:
7515 		return 3;
7516 	case 5:
7517 		return 6;
7518 	case 6:
7519 		return 10;
7520 	case 7:
7521 		return 12;
7522 	case 8:
7523 		return 16;
7524 	}
7525 }
7526 
7527 struct dce8_wm_params {
7528 	u32 dram_channels; /* number of dram channels */
7529 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7530 	u32 sclk;          /* engine clock in kHz */
7531 	u32 disp_clk;      /* display clock in kHz */
7532 	u32 src_width;     /* viewport width */
7533 	u32 active_time;   /* active display time in ns */
7534 	u32 blank_time;    /* blank time in ns */
7535 	bool interlaced;    /* mode is interlaced */
7536 	fixed20_12 vsc;    /* vertical scale ratio */
7537 	u32 num_heads;     /* number of active crtcs */
7538 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7539 	u32 lb_size;       /* line buffer allocated to pipe */
7540 	u32 vtaps;         /* vertical scaler taps */
7541 };
7542 
7543 /**
7544  * dce8_dram_bandwidth - get the dram bandwidth
7545  *
7546  * @wm: watermark calculation data
7547  *
7548  * Calculate the raw dram bandwidth (CIK).
7549  * Used for display watermark bandwidth calculations
7550  * Returns the dram bandwidth in MBytes/s
7551  */
7552 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7553 {
7554 	/* Calculate raw DRAM Bandwidth */
7555 	fixed20_12 dram_efficiency; /* 0.7 */
7556 	fixed20_12 yclk, dram_channels, bandwidth;
7557 	fixed20_12 a;
7558 
7559 	a.full = dfixed_const(1000);
7560 	yclk.full = dfixed_const(wm->yclk);
7561 	yclk.full = dfixed_div(yclk, a);
7562 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7563 	a.full = dfixed_const(10);
7564 	dram_efficiency.full = dfixed_const(7);
7565 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7566 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7567 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7568 
7569 	return dfixed_trunc(bandwidth);
7570 }
7571 
7572 /**
7573  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7574  *
7575  * @wm: watermark calculation data
7576  *
7577  * Calculate the dram bandwidth used for display (CIK).
7578  * Used for display watermark bandwidth calculations
7579  * Returns the dram bandwidth for display in MBytes/s
7580  */
7581 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7582 {
7583 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7584 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7585 	fixed20_12 yclk, dram_channels, bandwidth;
7586 	fixed20_12 a;
7587 
7588 	a.full = dfixed_const(1000);
7589 	yclk.full = dfixed_const(wm->yclk);
7590 	yclk.full = dfixed_div(yclk, a);
7591 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7592 	a.full = dfixed_const(10);
7593 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7594 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7595 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7596 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7597 
7598 	return dfixed_trunc(bandwidth);
7599 }
7600 
7601 /**
7602  * dce8_data_return_bandwidth - get the data return bandwidth
7603  *
7604  * @wm: watermark calculation data
7605  *
7606  * Calculate the data return bandwidth used for display (CIK).
7607  * Used for display watermark bandwidth calculations
7608  * Returns the data return bandwidth in MBytes/s
7609  */
7610 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7611 {
7612 	/* Calculate the display Data return Bandwidth */
7613 	fixed20_12 return_efficiency; /* 0.8 */
7614 	fixed20_12 sclk, bandwidth;
7615 	fixed20_12 a;
7616 
7617 	a.full = dfixed_const(1000);
7618 	sclk.full = dfixed_const(wm->sclk);
7619 	sclk.full = dfixed_div(sclk, a);
7620 	a.full = dfixed_const(10);
7621 	return_efficiency.full = dfixed_const(8);
7622 	return_efficiency.full = dfixed_div(return_efficiency, a);
7623 	a.full = dfixed_const(32);
7624 	bandwidth.full = dfixed_mul(a, sclk);
7625 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7626 
7627 	return dfixed_trunc(bandwidth);
7628 }
7629 
7630 /**
7631  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7632  *
7633  * @wm: watermark calculation data
7634  *
7635  * Calculate the dmif bandwidth used for display (CIK).
7636  * Used for display watermark bandwidth calculations
7637  * Returns the dmif bandwidth in MBytes/s
7638  */
7639 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7640 {
7641 	/* Calculate the DMIF Request Bandwidth */
7642 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7643 	fixed20_12 disp_clk, bandwidth;
7644 	fixed20_12 a, b;
7645 
7646 	a.full = dfixed_const(1000);
7647 	disp_clk.full = dfixed_const(wm->disp_clk);
7648 	disp_clk.full = dfixed_div(disp_clk, a);
7649 	a.full = dfixed_const(32);
7650 	b.full = dfixed_mul(a, disp_clk);
7651 
7652 	a.full = dfixed_const(10);
7653 	disp_clk_request_efficiency.full = dfixed_const(8);
7654 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7655 
7656 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7657 
7658 	return dfixed_trunc(bandwidth);
7659 }
7660 
7661 /**
7662  * dce8_available_bandwidth - get the min available bandwidth
7663  *
7664  * @wm: watermark calculation data
7665  *
7666  * Calculate the min available bandwidth used for display (CIK).
7667  * Used for display watermark bandwidth calculations
7668  * Returns the min available bandwidth in MBytes/s
7669  */
7670 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7671 {
7672 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7673 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7674 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7675 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7676 
7677 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7678 }
7679 
7680 /**
7681  * dce8_average_bandwidth - get the average available bandwidth
7682  *
7683  * @wm: watermark calculation data
7684  *
7685  * Calculate the average available bandwidth used for display (CIK).
7686  * Used for display watermark bandwidth calculations
7687  * Returns the average available bandwidth in MBytes/s
7688  */
7689 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7690 {
7691 	/* Calculate the display mode Average Bandwidth
7692 	 * DisplayMode should contain the source and destination dimensions,
7693 	 * timing, etc.
7694 	 */
7695 	fixed20_12 bpp;
7696 	fixed20_12 line_time;
7697 	fixed20_12 src_width;
7698 	fixed20_12 bandwidth;
7699 	fixed20_12 a;
7700 
7701 	a.full = dfixed_const(1000);
7702 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7703 	line_time.full = dfixed_div(line_time, a);
7704 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7705 	src_width.full = dfixed_const(wm->src_width);
7706 	bandwidth.full = dfixed_mul(src_width, bpp);
7707 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7708 	bandwidth.full = dfixed_div(bandwidth, line_time);
7709 
7710 	return dfixed_trunc(bandwidth);
7711 }
7712 
7713 /**
7714  * dce8_latency_watermark - get the latency watermark
7715  *
7716  * @wm: watermark calculation data
7717  *
7718  * Calculate the latency watermark (CIK).
7719  * Used for display watermark bandwidth calculations
7720  * Returns the latency watermark in ns
7721  */
7722 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7723 {
7724 	/* First calculate the latency in ns */
7725 	u32 mc_latency = 2000; /* 2000 ns. */
7726 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7727 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7728 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7729 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7730 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7731 		(wm->num_heads * cursor_line_pair_return_time);
7732 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7733 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7734 	u32 tmp, dmif_size = 12288;
7735 	fixed20_12 a, b, c;
7736 
7737 	if (wm->num_heads == 0)
7738 		return 0;
7739 
7740 	a.full = dfixed_const(2);
7741 	b.full = dfixed_const(1);
7742 	if ((wm->vsc.full > a.full) ||
7743 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7744 	    (wm->vtaps >= 5) ||
7745 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7746 		max_src_lines_per_dst_line = 4;
7747 	else
7748 		max_src_lines_per_dst_line = 2;
7749 
7750 	a.full = dfixed_const(available_bandwidth);
7751 	b.full = dfixed_const(wm->num_heads);
7752 	a.full = dfixed_div(a, b);
7753 
7754 	b.full = dfixed_const(mc_latency + 512);
7755 	c.full = dfixed_const(wm->disp_clk);
7756 	b.full = dfixed_div(b, c);
7757 
7758 	c.full = dfixed_const(dmif_size);
7759 	b.full = dfixed_div(c, b);
7760 
7761 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7762 
7763 	b.full = dfixed_const(1000);
7764 	c.full = dfixed_const(wm->disp_clk);
7765 	b.full = dfixed_div(c, b);
7766 	c.full = dfixed_const(wm->bytes_per_pixel);
7767 	b.full = dfixed_mul(b, c);
7768 
7769 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7770 
7771 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7772 	b.full = dfixed_const(1000);
7773 	c.full = dfixed_const(lb_fill_bw);
7774 	b.full = dfixed_div(c, b);
7775 	a.full = dfixed_div(a, b);
7776 	line_fill_time = dfixed_trunc(a);
7777 
7778 	if (line_fill_time < wm->active_time)
7779 		return latency;
7780 	else
7781 		return latency + (line_fill_time - wm->active_time);
7782 
7783 }
7784 
7785 /**
7786  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7787  * average and available dram bandwidth
7788  *
7789  * @wm: watermark calculation data
7790  *
7791  * Check if the display average bandwidth fits in the display
7792  * dram bandwidth (CIK).
7793  * Used for display watermark bandwidth calculations
7794  * Returns true if the display fits, false if not.
7795  */
7796 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7797 {
7798 	if (dce8_average_bandwidth(wm) <=
7799 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7800 		return true;
7801 	else
7802 		return false;
7803 }
7804 
7805 /**
7806  * dce8_average_bandwidth_vs_available_bandwidth - check
7807  * average and available bandwidth
7808  *
7809  * @wm: watermark calculation data
7810  *
7811  * Check if the display average bandwidth fits in the display
7812  * available bandwidth (CIK).
7813  * Used for display watermark bandwidth calculations
7814  * Returns true if the display fits, false if not.
7815  */
7816 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7817 {
7818 	if (dce8_average_bandwidth(wm) <=
7819 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7820 		return true;
7821 	else
7822 		return false;
7823 }
7824 
7825 /**
7826  * dce8_check_latency_hiding - check latency hiding
7827  *
7828  * @wm: watermark calculation data
7829  *
7830  * Check latency hiding (CIK).
7831  * Used for display watermark bandwidth calculations
7832  * Returns true if the display fits, false if not.
7833  */
7834 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7835 {
7836 	u32 lb_partitions = wm->lb_size / wm->src_width;
7837 	u32 line_time = wm->active_time + wm->blank_time;
7838 	u32 latency_tolerant_lines;
7839 	u32 latency_hiding;
7840 	fixed20_12 a;
7841 
7842 	a.full = dfixed_const(1);
7843 	if (wm->vsc.full > a.full)
7844 		latency_tolerant_lines = 1;
7845 	else {
7846 		if (lb_partitions <= (wm->vtaps + 1))
7847 			latency_tolerant_lines = 1;
7848 		else
7849 			latency_tolerant_lines = 2;
7850 	}
7851 
7852 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7853 
7854 	if (dce8_latency_watermark(wm) <= latency_hiding)
7855 		return true;
7856 	else
7857 		return false;
7858 }
7859 
7860 /**
7861  * dce8_program_watermarks - program display watermarks
7862  *
7863  * @rdev: radeon_device pointer
7864  * @radeon_crtc: the selected display controller
7865  * @lb_size: line buffer size
7866  * @num_heads: number of display controllers in use
7867  *
7868  * Calculate and program the display watermarks for the
7869  * selected display controller (CIK).
7870  */
7871 static void dce8_program_watermarks(struct radeon_device *rdev,
7872 				    struct radeon_crtc *radeon_crtc,
7873 				    u32 lb_size, u32 num_heads)
7874 {
7875 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7876 	struct dce8_wm_params wm_low, wm_high;
7877 	u32 pixel_period;
7878 	u32 line_time = 0;
7879 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7880 	u32 tmp, wm_mask;
7881 
7882 	if (radeon_crtc->base.enabled && num_heads && mode) {
7883 		pixel_period = 1000000 / (u32)mode->clock;
7884 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7885 
7886 		/* watermark for high clocks */
7887 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7888 		    rdev->pm.dpm_enabled) {
7889 			wm_high.yclk =
7890 				radeon_dpm_get_mclk(rdev, false) * 10;
7891 			wm_high.sclk =
7892 				radeon_dpm_get_sclk(rdev, false) * 10;
7893 		} else {
7894 			wm_high.yclk = rdev->pm.current_mclk * 10;
7895 			wm_high.sclk = rdev->pm.current_sclk * 10;
7896 		}
7897 
7898 		wm_high.disp_clk = mode->clock;
7899 		wm_high.src_width = mode->crtc_hdisplay;
7900 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7901 		wm_high.blank_time = line_time - wm_high.active_time;
7902 		wm_high.interlaced = false;
7903 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7904 			wm_high.interlaced = true;
7905 		wm_high.vsc = radeon_crtc->vsc;
7906 		wm_high.vtaps = 1;
7907 		if (radeon_crtc->rmx_type != RMX_OFF)
7908 			wm_high.vtaps = 2;
7909 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7910 		wm_high.lb_size = lb_size;
7911 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7912 		wm_high.num_heads = num_heads;
7913 
7914 		/* set for high clocks */
7915 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7916 
7917 		/* possibly force display priority to high */
7918 		/* should really do this at mode validation time... */
7919 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7920 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7921 		    !dce8_check_latency_hiding(&wm_high) ||
7922 		    (rdev->disp_priority == 2)) {
7923 			DRM_DEBUG_KMS("force priority to high\n");
7924 		}
7925 
7926 		/* watermark for low clocks */
7927 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7928 		    rdev->pm.dpm_enabled) {
7929 			wm_low.yclk =
7930 				radeon_dpm_get_mclk(rdev, true) * 10;
7931 			wm_low.sclk =
7932 				radeon_dpm_get_sclk(rdev, true) * 10;
7933 		} else {
7934 			wm_low.yclk = rdev->pm.current_mclk * 10;
7935 			wm_low.sclk = rdev->pm.current_sclk * 10;
7936 		}
7937 
7938 		wm_low.disp_clk = mode->clock;
7939 		wm_low.src_width = mode->crtc_hdisplay;
7940 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7941 		wm_low.blank_time = line_time - wm_low.active_time;
7942 		wm_low.interlaced = false;
7943 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7944 			wm_low.interlaced = true;
7945 		wm_low.vsc = radeon_crtc->vsc;
7946 		wm_low.vtaps = 1;
7947 		if (radeon_crtc->rmx_type != RMX_OFF)
7948 			wm_low.vtaps = 2;
7949 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7950 		wm_low.lb_size = lb_size;
7951 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7952 		wm_low.num_heads = num_heads;
7953 
7954 		/* set for low clocks */
7955 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7956 
7957 		/* possibly force display priority to high */
7958 		/* should really do this at mode validation time... */
7959 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7960 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7961 		    !dce8_check_latency_hiding(&wm_low) ||
7962 		    (rdev->disp_priority == 2)) {
7963 			DRM_DEBUG_KMS("force priority to high\n");
7964 		}
7965 	}
7966 
7967 	/* select wm A */
7968 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7969 	tmp = wm_mask;
7970 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7971 	tmp |= LATENCY_WATERMARK_MASK(1);
7972 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7973 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7974 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7975 		LATENCY_HIGH_WATERMARK(line_time)));
7976 	/* select wm B */
7977 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7978 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7979 	tmp |= LATENCY_WATERMARK_MASK(2);
7980 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7981 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7982 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7983 		LATENCY_HIGH_WATERMARK(line_time)));
7984 	/* restore original selection */
7985 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7986 
7987 	/* save values for DPM */
7988 	radeon_crtc->line_time = line_time;
7989 	radeon_crtc->wm_high = latency_watermark_a;
7990 	radeon_crtc->wm_low = latency_watermark_b;
7991 }
7992 
7993 /**
7994  * dce8_bandwidth_update - program display watermarks
7995  *
7996  * @rdev: radeon_device pointer
7997  *
7998  * Calculate and program the display watermarks and line
7999  * buffer allocation (CIK).
8000  */
8001 void dce8_bandwidth_update(struct radeon_device *rdev)
8002 {
8003 	struct drm_display_mode *mode = NULL;
8004 	u32 num_heads = 0, lb_size;
8005 	int i;
8006 
8007 	radeon_update_display_priority(rdev);
8008 
8009 	for (i = 0; i < rdev->num_crtc; i++) {
8010 		if (rdev->mode_info.crtcs[i]->base.enabled)
8011 			num_heads++;
8012 	}
8013 	for (i = 0; i < rdev->num_crtc; i++) {
8014 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8015 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8016 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8017 	}
8018 }
8019 
8020 /**
8021  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8022  *
8023  * @rdev: radeon_device pointer
8024  *
8025  * Fetches a GPU clock counter snapshot (SI).
8026  * Returns the 64 bit clock counter snapshot.
8027  */
8028 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8029 {
8030 	uint64_t clock;
8031 
8032 	spin_lock(&rdev->gpu_clock_mutex);
8033 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8034 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8035 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8036 	spin_unlock(&rdev->gpu_clock_mutex);
8037 	return clock;
8038 }
8039 
8040 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8041                               u32 cntl_reg, u32 status_reg)
8042 {
8043 	int r, i;
8044 	struct atom_clock_dividers dividers;
8045 	uint32_t tmp;
8046 
8047 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8048 					   clock, false, &dividers);
8049 	if (r)
8050 		return r;
8051 
8052 	tmp = RREG32_SMC(cntl_reg);
8053 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8054 	tmp |= dividers.post_divider;
8055 	WREG32_SMC(cntl_reg, tmp);
8056 
8057 	for (i = 0; i < 100; i++) {
8058 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8059 			break;
8060 		mdelay(10);
8061 	}
8062 	if (i == 100)
8063 		return -ETIMEDOUT;
8064 
8065 	return 0;
8066 }
8067 
8068 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8069 {
8070 	int r = 0;
8071 
8072 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8073 	if (r)
8074 		return r;
8075 
8076 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8077 	return r;
8078 }
8079 
8080 static struct pci_dev dev_to_pcidev(device_t dev)
8081 {
8082     struct pci_dev pdev;
8083     pdev.dev = dev;
8084     return pdev;
8085 }
8086 
8087 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8088 {
8089 #if 0
8090 	struct pci_dev *root = rdev->pdev->bus->self;
8091 #else
8092 	device_t root = device_get_parent(rdev->dev);
8093 #endif
8094 	int bridge_pos, gpu_pos;
8095 	u32 speed_cntl, mask, current_data_rate;
8096 	int ret, i;
8097 	u16 tmp16;
8098 	struct pci_dev root_pdev = dev_to_pcidev(root);
8099 	struct pci_dev pdev = dev_to_pcidev(rdev->dev);
8100 
8101 	if (radeon_pcie_gen2 == 0)
8102 		return;
8103 
8104 	if (rdev->flags & RADEON_IS_IGP)
8105 		return;
8106 
8107 	if (!(rdev->flags & RADEON_IS_PCIE))
8108 		return;
8109 
8110 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8111 	if (ret != 0)
8112 		return;
8113 
8114 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8115 		return;
8116 
8117 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8118 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8119 		LC_CURRENT_DATA_RATE_SHIFT;
8120 	if (mask & DRM_PCIE_SPEED_80) {
8121 		if (current_data_rate == 2) {
8122 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8123 			return;
8124 		}
8125 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8126 	} else if (mask & DRM_PCIE_SPEED_50) {
8127 		if (current_data_rate == 1) {
8128 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8129 			return;
8130 		}
8131 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8132 	}
8133 
8134 	bridge_pos = pci_get_pciecap_ptr(root);
8135 	if (!bridge_pos)
8136 		return;
8137 
8138 	gpu_pos = pci_get_pciecap_ptr(rdev->dev);
8139 	if (!gpu_pos)
8140 		return;
8141 
8142 	if (mask & DRM_PCIE_SPEED_80) {
8143 		/* re-try equalization if gen3 is not already enabled */
8144 		if (current_data_rate != 2) {
8145 			u16 bridge_cfg, gpu_cfg;
8146 			u16 bridge_cfg2, gpu_cfg2;
8147 			u32 max_lw, current_lw, tmp;
8148 
8149 			pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8150 			pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8151 
8152 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8153 			pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8154 
8155 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8156 			pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8157 
8158 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8159 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8160 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8161 
8162 			if (current_lw < max_lw) {
8163 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8164 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8165 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8166 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8167 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8168 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8169 				}
8170 			}
8171 
8172 			for (i = 0; i < 10; i++) {
8173 				/* check status */
8174 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8175 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8176 					break;
8177 
8178 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8179 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8180 
8181 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8182 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8183 
8184 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8185 				tmp |= LC_SET_QUIESCE;
8186 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8187 
8188 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8189 				tmp |= LC_REDO_EQ;
8190 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8191 
8192 				mdelay(100);
8193 
8194 				/* linkctl */
8195 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8196 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8197 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8198 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8199 
8200 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8201 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8202 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8203 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8204 
8205 				/* linkctl2 */
8206 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8207 				tmp16 &= ~((1 << 4) | (7 << 9));
8208 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8209 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8210 
8211 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8212 				tmp16 &= ~((1 << 4) | (7 << 9));
8213 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8214 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8215 
8216 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8217 				tmp &= ~LC_SET_QUIESCE;
8218 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8219 			}
8220 		}
8221 	}
8222 
8223 	/* set the link speed */
8224 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8225 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8226 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8227 
8228 	pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8229 	tmp16 &= ~0xf;
8230 	if (mask & DRM_PCIE_SPEED_80)
8231 		tmp16 |= 3; /* gen3 */
8232 	else if (mask & DRM_PCIE_SPEED_50)
8233 		tmp16 |= 2; /* gen2 */
8234 	else
8235 		tmp16 |= 1; /* gen1 */
8236 	pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8237 
8238 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8239 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8240 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8241 
8242 	for (i = 0; i < rdev->usec_timeout; i++) {
8243 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8244 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8245 			break;
8246 		udelay(1);
8247 	}
8248 }
8249 
8250 static void cik_program_aspm(struct radeon_device *rdev)
8251 {
8252 	u32 data, orig;
8253 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8254 	bool disable_clkreq = false;
8255 
8256 	if (radeon_aspm == 0)
8257 		return;
8258 
8259 	/* XXX double check IGPs */
8260 	if (rdev->flags & RADEON_IS_IGP)
8261 		return;
8262 
8263 	if (!(rdev->flags & RADEON_IS_PCIE))
8264 		return;
8265 
8266 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8267 	data &= ~LC_XMIT_N_FTS_MASK;
8268 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8269 	if (orig != data)
8270 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8271 
8272 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8273 	data |= LC_GO_TO_RECOVERY;
8274 	if (orig != data)
8275 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8276 
8277 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8278 	data |= P_IGNORE_EDB_ERR;
8279 	if (orig != data)
8280 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8281 
8282 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8283 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8284 	data |= LC_PMI_TO_L1_DIS;
8285 	if (!disable_l0s)
8286 		data |= LC_L0S_INACTIVITY(7);
8287 
8288 	if (!disable_l1) {
8289 		data |= LC_L1_INACTIVITY(7);
8290 		data &= ~LC_PMI_TO_L1_DIS;
8291 		if (orig != data)
8292 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8293 
8294 		if (!disable_plloff_in_l1) {
8295 			bool clk_req_support;
8296 
8297 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8298 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8299 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8300 			if (orig != data)
8301 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8302 
8303 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8304 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8305 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8306 			if (orig != data)
8307 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8308 
8309 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8310 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8311 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8312 			if (orig != data)
8313 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8314 
8315 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8316 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8317 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8318 			if (orig != data)
8319 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8320 
8321 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8322 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8323 			data |= LC_DYN_LANES_PWR_STATE(3);
8324 			if (orig != data)
8325 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8326 
8327 			if (!disable_clkreq) {
8328 #ifdef zMN_TODO
8329 				struct pci_dev *root = rdev->pdev->bus->self;
8330 				u32 lnkcap;
8331 
8332 				clk_req_support = false;
8333 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8334 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8335 					clk_req_support = true;
8336 #else
8337 				clk_req_support = false;
8338 #endif
8339 			} else {
8340 				clk_req_support = false;
8341 			}
8342 
8343 			if (clk_req_support) {
8344 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8345 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8346 				if (orig != data)
8347 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8348 
8349 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8350 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8351 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8352 				if (orig != data)
8353 					WREG32_SMC(THM_CLK_CNTL, data);
8354 
8355 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8356 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8357 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8358 				if (orig != data)
8359 					WREG32_SMC(MISC_CLK_CTRL, data);
8360 
8361 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8362 				data &= ~BCLK_AS_XCLK;
8363 				if (orig != data)
8364 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8365 
8366 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8367 				data &= ~FORCE_BIF_REFCLK_EN;
8368 				if (orig != data)
8369 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8370 
8371 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8372 				data &= ~MPLL_CLKOUT_SEL_MASK;
8373 				data |= MPLL_CLKOUT_SEL(4);
8374 				if (orig != data)
8375 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8376 			}
8377 		}
8378 	} else {
8379 		if (orig != data)
8380 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8381 	}
8382 
8383 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8384 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8385 	if (orig != data)
8386 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8387 
8388 	if (!disable_l0s) {
8389 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8390 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8391 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8392 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8393 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8394 				data &= ~LC_L0S_INACTIVITY_MASK;
8395 				if (orig != data)
8396 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8397 			}
8398 		}
8399 	}
8400 }
8401