xref: /dflybsd-src/sys/dev/drm/radeon/si.c (revision 9ebbd47df7abd81e0803cf228d15b3c372ad85db)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35 
36 
37 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */
38 #define PCI_EXP_LNKCTL2 48
39 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */
40 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */
41 #define PCI_EXP_DEVSTA_TRPND 0x0020
42 #define PCI_EXP_LNKCAP_CLKPM 0x00040000
43 
44 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
57 MODULE_FIRMWARE("radeon/VERDE_me.bin");
58 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
59 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
60 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
61 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
63 MODULE_FIRMWARE("radeon/OLAND_me.bin");
64 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
65 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
66 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
67 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
73 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
74 
75 static void si_pcie_gen3_enable(struct radeon_device *rdev);
76 static void si_program_aspm(struct radeon_device *rdev);
77 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
78 					 bool enable);
79 static void si_fini_pg(struct radeon_device *rdev);
80 static void si_fini_cg(struct radeon_device *rdev);
81 static void si_rlc_stop(struct radeon_device *rdev);
82 
83 static const u32 verde_rlc_save_restore_register_list[] =
84 {
85 	(0x8000 << 16) | (0x98f4 >> 2),
86 	0x00000000,
87 	(0x8040 << 16) | (0x98f4 >> 2),
88 	0x00000000,
89 	(0x8000 << 16) | (0xe80 >> 2),
90 	0x00000000,
91 	(0x8040 << 16) | (0xe80 >> 2),
92 	0x00000000,
93 	(0x8000 << 16) | (0x89bc >> 2),
94 	0x00000000,
95 	(0x8040 << 16) | (0x89bc >> 2),
96 	0x00000000,
97 	(0x8000 << 16) | (0x8c1c >> 2),
98 	0x00000000,
99 	(0x8040 << 16) | (0x8c1c >> 2),
100 	0x00000000,
101 	(0x9c00 << 16) | (0x98f0 >> 2),
102 	0x00000000,
103 	(0x9c00 << 16) | (0xe7c >> 2),
104 	0x00000000,
105 	(0x8000 << 16) | (0x9148 >> 2),
106 	0x00000000,
107 	(0x8040 << 16) | (0x9148 >> 2),
108 	0x00000000,
109 	(0x9c00 << 16) | (0x9150 >> 2),
110 	0x00000000,
111 	(0x9c00 << 16) | (0x897c >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x8d8c >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0xac54 >> 2),
116 	0X00000000,
117 	0x3,
118 	(0x9c00 << 16) | (0x98f8 >> 2),
119 	0x00000000,
120 	(0x9c00 << 16) | (0x9910 >> 2),
121 	0x00000000,
122 	(0x9c00 << 16) | (0x9914 >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0x9918 >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0x991c >> 2),
127 	0x00000000,
128 	(0x9c00 << 16) | (0x9920 >> 2),
129 	0x00000000,
130 	(0x9c00 << 16) | (0x9924 >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x9928 >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x992c >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9930 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x9934 >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x9938 >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x993c >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9940 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x9944 >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x9948 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x994c >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9950 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x9954 >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x9958 >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x995c >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9960 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x9964 >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9968 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x996c >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9970 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9974 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9978 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x997c >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9980 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9984 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9988 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x998c >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x8c00 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x8c14 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x8c04 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x8c08 >> 2),
191 	0x00000000,
192 	(0x8000 << 16) | (0x9b7c >> 2),
193 	0x00000000,
194 	(0x8040 << 16) | (0x9b7c >> 2),
195 	0x00000000,
196 	(0x8000 << 16) | (0xe84 >> 2),
197 	0x00000000,
198 	(0x8040 << 16) | (0xe84 >> 2),
199 	0x00000000,
200 	(0x8000 << 16) | (0x89c0 >> 2),
201 	0x00000000,
202 	(0x8040 << 16) | (0x89c0 >> 2),
203 	0x00000000,
204 	(0x8000 << 16) | (0x914c >> 2),
205 	0x00000000,
206 	(0x8040 << 16) | (0x914c >> 2),
207 	0x00000000,
208 	(0x8000 << 16) | (0x8c20 >> 2),
209 	0x00000000,
210 	(0x8040 << 16) | (0x8c20 >> 2),
211 	0x00000000,
212 	(0x8000 << 16) | (0x9354 >> 2),
213 	0x00000000,
214 	(0x8040 << 16) | (0x9354 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9060 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9364 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9100 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x913c >> 2),
223 	0x00000000,
224 	(0x8000 << 16) | (0x90e0 >> 2),
225 	0x00000000,
226 	(0x8000 << 16) | (0x90e4 >> 2),
227 	0x00000000,
228 	(0x8000 << 16) | (0x90e8 >> 2),
229 	0x00000000,
230 	(0x8040 << 16) | (0x90e0 >> 2),
231 	0x00000000,
232 	(0x8040 << 16) | (0x90e4 >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x90e8 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x8bcc >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x8b24 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x88c4 >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8e50 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x8c0c >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x8e58 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8e5c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x9508 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x950c >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9494 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0xac0c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0xac10 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0xac14 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0xae00 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0xac08 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x88d4 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x88c8 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x88cc >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x89b0 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x8b10 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x8a14 >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x9830 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x9834 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x9838 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x9a10 >> 2),
285 	0x00000000,
286 	(0x8000 << 16) | (0x9870 >> 2),
287 	0x00000000,
288 	(0x8000 << 16) | (0x9874 >> 2),
289 	0x00000000,
290 	(0x8001 << 16) | (0x9870 >> 2),
291 	0x00000000,
292 	(0x8001 << 16) | (0x9874 >> 2),
293 	0x00000000,
294 	(0x8040 << 16) | (0x9870 >> 2),
295 	0x00000000,
296 	(0x8040 << 16) | (0x9874 >> 2),
297 	0x00000000,
298 	(0x8041 << 16) | (0x9870 >> 2),
299 	0x00000000,
300 	(0x8041 << 16) | (0x9874 >> 2),
301 	0x00000000,
302 	0x00000000
303 };
304 
305 static const u32 tahiti_golden_rlc_registers[] =
306 {
307 	0xc424, 0xffffffff, 0x00601005,
308 	0xc47c, 0xffffffff, 0x10104040,
309 	0xc488, 0xffffffff, 0x0100000a,
310 	0xc314, 0xffffffff, 0x00000800,
311 	0xc30c, 0xffffffff, 0x800000f4,
312 	0xf4a8, 0xffffffff, 0x00000000
313 };
314 
315 static const u32 tahiti_golden_registers[] =
316 {
317 	0x9a10, 0x00010000, 0x00018208,
318 	0x9830, 0xffffffff, 0x00000000,
319 	0x9834, 0xf00fffff, 0x00000400,
320 	0x9838, 0x0002021c, 0x00020200,
321 	0xc78, 0x00000080, 0x00000000,
322 	0xd030, 0x000300c0, 0x00800040,
323 	0xd830, 0x000300c0, 0x00800040,
324 	0x5bb0, 0x000000f0, 0x00000070,
325 	0x5bc0, 0x00200000, 0x50100000,
326 	0x7030, 0x31000311, 0x00000011,
327 	0x277c, 0x00000003, 0x000007ff,
328 	0x240c, 0x000007ff, 0x00000000,
329 	0x8a14, 0xf000001f, 0x00000007,
330 	0x8b24, 0xffffffff, 0x00ffffff,
331 	0x8b10, 0x0000ff0f, 0x00000000,
332 	0x28a4c, 0x07ffffff, 0x4e000000,
333 	0x28350, 0x3f3f3fff, 0x2a00126a,
334 	0x30, 0x000000ff, 0x0040,
335 	0x34, 0x00000040, 0x00004040,
336 	0x9100, 0x07ffffff, 0x03000000,
337 	0x8e88, 0x01ff1f3f, 0x00000000,
338 	0x8e84, 0x01ff1f3f, 0x00000000,
339 	0x9060, 0x0000007f, 0x00000020,
340 	0x9508, 0x00010000, 0x00010000,
341 	0xac14, 0x00000200, 0x000002fb,
342 	0xac10, 0xffffffff, 0x0000543b,
343 	0xac0c, 0xffffffff, 0xa9210876,
344 	0x88d0, 0xffffffff, 0x000fff40,
345 	0x88d4, 0x0000001f, 0x00000010,
346 	0x1410, 0x20000000, 0x20fffed8,
347 	0x15c0, 0x000c0fc0, 0x000c0400
348 };
349 
350 static const u32 tahiti_golden_registers2[] =
351 {
352 	0xc64, 0x00000001, 0x00000001
353 };
354 
355 static const u32 pitcairn_golden_rlc_registers[] =
356 {
357 	0xc424, 0xffffffff, 0x00601004,
358 	0xc47c, 0xffffffff, 0x10102020,
359 	0xc488, 0xffffffff, 0x01000020,
360 	0xc314, 0xffffffff, 0x00000800,
361 	0xc30c, 0xffffffff, 0x800000a4
362 };
363 
364 static const u32 pitcairn_golden_registers[] =
365 {
366 	0x9a10, 0x00010000, 0x00018208,
367 	0x9830, 0xffffffff, 0x00000000,
368 	0x9834, 0xf00fffff, 0x00000400,
369 	0x9838, 0x0002021c, 0x00020200,
370 	0xc78, 0x00000080, 0x00000000,
371 	0xd030, 0x000300c0, 0x00800040,
372 	0xd830, 0x000300c0, 0x00800040,
373 	0x5bb0, 0x000000f0, 0x00000070,
374 	0x5bc0, 0x00200000, 0x50100000,
375 	0x7030, 0x31000311, 0x00000011,
376 	0x2ae4, 0x00073ffe, 0x000022a2,
377 	0x240c, 0x000007ff, 0x00000000,
378 	0x8a14, 0xf000001f, 0x00000007,
379 	0x8b24, 0xffffffff, 0x00ffffff,
380 	0x8b10, 0x0000ff0f, 0x00000000,
381 	0x28a4c, 0x07ffffff, 0x4e000000,
382 	0x28350, 0x3f3f3fff, 0x2a00126a,
383 	0x30, 0x000000ff, 0x0040,
384 	0x34, 0x00000040, 0x00004040,
385 	0x9100, 0x07ffffff, 0x03000000,
386 	0x9060, 0x0000007f, 0x00000020,
387 	0x9508, 0x00010000, 0x00010000,
388 	0xac14, 0x000003ff, 0x000000f7,
389 	0xac10, 0xffffffff, 0x00000000,
390 	0xac0c, 0xffffffff, 0x32761054,
391 	0x88d4, 0x0000001f, 0x00000010,
392 	0x15c0, 0x000c0fc0, 0x000c0400
393 };
394 
395 static const u32 verde_golden_rlc_registers[] =
396 {
397 	0xc424, 0xffffffff, 0x033f1005,
398 	0xc47c, 0xffffffff, 0x10808020,
399 	0xc488, 0xffffffff, 0x00800008,
400 	0xc314, 0xffffffff, 0x00001000,
401 	0xc30c, 0xffffffff, 0x80010014
402 };
403 
404 static const u32 verde_golden_registers[] =
405 {
406 	0x9a10, 0x00010000, 0x00018208,
407 	0x9830, 0xffffffff, 0x00000000,
408 	0x9834, 0xf00fffff, 0x00000400,
409 	0x9838, 0x0002021c, 0x00020200,
410 	0xc78, 0x00000080, 0x00000000,
411 	0xd030, 0x000300c0, 0x00800040,
412 	0xd030, 0x000300c0, 0x00800040,
413 	0xd830, 0x000300c0, 0x00800040,
414 	0xd830, 0x000300c0, 0x00800040,
415 	0x5bb0, 0x000000f0, 0x00000070,
416 	0x5bc0, 0x00200000, 0x50100000,
417 	0x7030, 0x31000311, 0x00000011,
418 	0x2ae4, 0x00073ffe, 0x000022a2,
419 	0x2ae4, 0x00073ffe, 0x000022a2,
420 	0x2ae4, 0x00073ffe, 0x000022a2,
421 	0x240c, 0x000007ff, 0x00000000,
422 	0x240c, 0x000007ff, 0x00000000,
423 	0x240c, 0x000007ff, 0x00000000,
424 	0x8a14, 0xf000001f, 0x00000007,
425 	0x8a14, 0xf000001f, 0x00000007,
426 	0x8a14, 0xf000001f, 0x00000007,
427 	0x8b24, 0xffffffff, 0x00ffffff,
428 	0x8b10, 0x0000ff0f, 0x00000000,
429 	0x28a4c, 0x07ffffff, 0x4e000000,
430 	0x28350, 0x3f3f3fff, 0x0000124a,
431 	0x28350, 0x3f3f3fff, 0x0000124a,
432 	0x28350, 0x3f3f3fff, 0x0000124a,
433 	0x30, 0x000000ff, 0x0040,
434 	0x34, 0x00000040, 0x00004040,
435 	0x9100, 0x07ffffff, 0x03000000,
436 	0x9100, 0x07ffffff, 0x03000000,
437 	0x8e88, 0x01ff1f3f, 0x00000000,
438 	0x8e88, 0x01ff1f3f, 0x00000000,
439 	0x8e88, 0x01ff1f3f, 0x00000000,
440 	0x8e84, 0x01ff1f3f, 0x00000000,
441 	0x8e84, 0x01ff1f3f, 0x00000000,
442 	0x8e84, 0x01ff1f3f, 0x00000000,
443 	0x9060, 0x0000007f, 0x00000020,
444 	0x9508, 0x00010000, 0x00010000,
445 	0xac14, 0x000003ff, 0x00000003,
446 	0xac14, 0x000003ff, 0x00000003,
447 	0xac14, 0x000003ff, 0x00000003,
448 	0xac10, 0xffffffff, 0x00000000,
449 	0xac10, 0xffffffff, 0x00000000,
450 	0xac10, 0xffffffff, 0x00000000,
451 	0xac0c, 0xffffffff, 0x00001032,
452 	0xac0c, 0xffffffff, 0x00001032,
453 	0xac0c, 0xffffffff, 0x00001032,
454 	0x88d4, 0x0000001f, 0x00000010,
455 	0x88d4, 0x0000001f, 0x00000010,
456 	0x88d4, 0x0000001f, 0x00000010,
457 	0x15c0, 0x000c0fc0, 0x000c0400
458 };
459 
460 static const u32 oland_golden_rlc_registers[] =
461 {
462 	0xc424, 0xffffffff, 0x00601005,
463 	0xc47c, 0xffffffff, 0x10104040,
464 	0xc488, 0xffffffff, 0x0100000a,
465 	0xc314, 0xffffffff, 0x00000800,
466 	0xc30c, 0xffffffff, 0x800000f4
467 };
468 
469 static const u32 oland_golden_registers[] =
470 {
471 	0x9a10, 0x00010000, 0x00018208,
472 	0x9830, 0xffffffff, 0x00000000,
473 	0x9834, 0xf00fffff, 0x00000400,
474 	0x9838, 0x0002021c, 0x00020200,
475 	0xc78, 0x00000080, 0x00000000,
476 	0xd030, 0x000300c0, 0x00800040,
477 	0xd830, 0x000300c0, 0x00800040,
478 	0x5bb0, 0x000000f0, 0x00000070,
479 	0x5bc0, 0x00200000, 0x50100000,
480 	0x7030, 0x31000311, 0x00000011,
481 	0x2ae4, 0x00073ffe, 0x000022a2,
482 	0x240c, 0x000007ff, 0x00000000,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8b24, 0xffffffff, 0x00ffffff,
485 	0x8b10, 0x0000ff0f, 0x00000000,
486 	0x28a4c, 0x07ffffff, 0x4e000000,
487 	0x28350, 0x3f3f3fff, 0x00000082,
488 	0x30, 0x000000ff, 0x0040,
489 	0x34, 0x00000040, 0x00004040,
490 	0x9100, 0x07ffffff, 0x03000000,
491 	0x9060, 0x0000007f, 0x00000020,
492 	0x9508, 0x00010000, 0x00010000,
493 	0xac14, 0x000003ff, 0x000000f3,
494 	0xac10, 0xffffffff, 0x00000000,
495 	0xac0c, 0xffffffff, 0x00003210,
496 	0x88d4, 0x0000001f, 0x00000010,
497 	0x15c0, 0x000c0fc0, 0x000c0400
498 };
499 
500 static const u32 hainan_golden_registers[] =
501 {
502 	0x9a10, 0x00010000, 0x00018208,
503 	0x9830, 0xffffffff, 0x00000000,
504 	0x9834, 0xf00fffff, 0x00000400,
505 	0x9838, 0x0002021c, 0x00020200,
506 	0xd0c0, 0xff000fff, 0x00000100,
507 	0xd030, 0x000300c0, 0x00800040,
508 	0xd8c0, 0xff000fff, 0x00000100,
509 	0xd830, 0x000300c0, 0x00800040,
510 	0x2ae4, 0x00073ffe, 0x000022a2,
511 	0x240c, 0x000007ff, 0x00000000,
512 	0x8a14, 0xf000001f, 0x00000007,
513 	0x8b24, 0xffffffff, 0x00ffffff,
514 	0x8b10, 0x0000ff0f, 0x00000000,
515 	0x28a4c, 0x07ffffff, 0x4e000000,
516 	0x28350, 0x3f3f3fff, 0x00000000,
517 	0x30, 0x000000ff, 0x0040,
518 	0x34, 0x00000040, 0x00004040,
519 	0x9100, 0x03e00000, 0x03600000,
520 	0x9060, 0x0000007f, 0x00000020,
521 	0x9508, 0x00010000, 0x00010000,
522 	0xac14, 0x000003ff, 0x000000f1,
523 	0xac10, 0xffffffff, 0x00000000,
524 	0xac0c, 0xffffffff, 0x00003210,
525 	0x88d4, 0x0000001f, 0x00000010,
526 	0x15c0, 0x000c0fc0, 0x000c0400
527 };
528 
529 static const u32 hainan_golden_registers2[] =
530 {
531 	0x98f8, 0xffffffff, 0x02010001
532 };
533 
534 static const u32 tahiti_mgcg_cgcg_init[] =
535 {
536 	0xc400, 0xffffffff, 0xfffffffc,
537 	0x802c, 0xffffffff, 0xe0000000,
538 	0x9a60, 0xffffffff, 0x00000100,
539 	0x92a4, 0xffffffff, 0x00000100,
540 	0xc164, 0xffffffff, 0x00000100,
541 	0x9774, 0xffffffff, 0x00000100,
542 	0x8984, 0xffffffff, 0x06000100,
543 	0x8a18, 0xffffffff, 0x00000100,
544 	0x92a0, 0xffffffff, 0x00000100,
545 	0xc380, 0xffffffff, 0x00000100,
546 	0x8b28, 0xffffffff, 0x00000100,
547 	0x9144, 0xffffffff, 0x00000100,
548 	0x8d88, 0xffffffff, 0x00000100,
549 	0x8d8c, 0xffffffff, 0x00000100,
550 	0x9030, 0xffffffff, 0x00000100,
551 	0x9034, 0xffffffff, 0x00000100,
552 	0x9038, 0xffffffff, 0x00000100,
553 	0x903c, 0xffffffff, 0x00000100,
554 	0xad80, 0xffffffff, 0x00000100,
555 	0xac54, 0xffffffff, 0x00000100,
556 	0x897c, 0xffffffff, 0x06000100,
557 	0x9868, 0xffffffff, 0x00000100,
558 	0x9510, 0xffffffff, 0x00000100,
559 	0xaf04, 0xffffffff, 0x00000100,
560 	0xae04, 0xffffffff, 0x00000100,
561 	0x949c, 0xffffffff, 0x00000100,
562 	0x802c, 0xffffffff, 0xe0000000,
563 	0x9160, 0xffffffff, 0x00010000,
564 	0x9164, 0xffffffff, 0x00030002,
565 	0x9168, 0xffffffff, 0x00040007,
566 	0x916c, 0xffffffff, 0x00060005,
567 	0x9170, 0xffffffff, 0x00090008,
568 	0x9174, 0xffffffff, 0x00020001,
569 	0x9178, 0xffffffff, 0x00040003,
570 	0x917c, 0xffffffff, 0x00000007,
571 	0x9180, 0xffffffff, 0x00060005,
572 	0x9184, 0xffffffff, 0x00090008,
573 	0x9188, 0xffffffff, 0x00030002,
574 	0x918c, 0xffffffff, 0x00050004,
575 	0x9190, 0xffffffff, 0x00000008,
576 	0x9194, 0xffffffff, 0x00070006,
577 	0x9198, 0xffffffff, 0x000a0009,
578 	0x919c, 0xffffffff, 0x00040003,
579 	0x91a0, 0xffffffff, 0x00060005,
580 	0x91a4, 0xffffffff, 0x00000009,
581 	0x91a8, 0xffffffff, 0x00080007,
582 	0x91ac, 0xffffffff, 0x000b000a,
583 	0x91b0, 0xffffffff, 0x00050004,
584 	0x91b4, 0xffffffff, 0x00070006,
585 	0x91b8, 0xffffffff, 0x0008000b,
586 	0x91bc, 0xffffffff, 0x000a0009,
587 	0x91c0, 0xffffffff, 0x000d000c,
588 	0x91c4, 0xffffffff, 0x00060005,
589 	0x91c8, 0xffffffff, 0x00080007,
590 	0x91cc, 0xffffffff, 0x0000000b,
591 	0x91d0, 0xffffffff, 0x000a0009,
592 	0x91d4, 0xffffffff, 0x000d000c,
593 	0x91d8, 0xffffffff, 0x00070006,
594 	0x91dc, 0xffffffff, 0x00090008,
595 	0x91e0, 0xffffffff, 0x0000000c,
596 	0x91e4, 0xffffffff, 0x000b000a,
597 	0x91e8, 0xffffffff, 0x000e000d,
598 	0x91ec, 0xffffffff, 0x00080007,
599 	0x91f0, 0xffffffff, 0x000a0009,
600 	0x91f4, 0xffffffff, 0x0000000d,
601 	0x91f8, 0xffffffff, 0x000c000b,
602 	0x91fc, 0xffffffff, 0x000f000e,
603 	0x9200, 0xffffffff, 0x00090008,
604 	0x9204, 0xffffffff, 0x000b000a,
605 	0x9208, 0xffffffff, 0x000c000f,
606 	0x920c, 0xffffffff, 0x000e000d,
607 	0x9210, 0xffffffff, 0x00110010,
608 	0x9214, 0xffffffff, 0x000a0009,
609 	0x9218, 0xffffffff, 0x000c000b,
610 	0x921c, 0xffffffff, 0x0000000f,
611 	0x9220, 0xffffffff, 0x000e000d,
612 	0x9224, 0xffffffff, 0x00110010,
613 	0x9228, 0xffffffff, 0x000b000a,
614 	0x922c, 0xffffffff, 0x000d000c,
615 	0x9230, 0xffffffff, 0x00000010,
616 	0x9234, 0xffffffff, 0x000f000e,
617 	0x9238, 0xffffffff, 0x00120011,
618 	0x923c, 0xffffffff, 0x000c000b,
619 	0x9240, 0xffffffff, 0x000e000d,
620 	0x9244, 0xffffffff, 0x00000011,
621 	0x9248, 0xffffffff, 0x0010000f,
622 	0x924c, 0xffffffff, 0x00130012,
623 	0x9250, 0xffffffff, 0x000d000c,
624 	0x9254, 0xffffffff, 0x000f000e,
625 	0x9258, 0xffffffff, 0x00100013,
626 	0x925c, 0xffffffff, 0x00120011,
627 	0x9260, 0xffffffff, 0x00150014,
628 	0x9264, 0xffffffff, 0x000e000d,
629 	0x9268, 0xffffffff, 0x0010000f,
630 	0x926c, 0xffffffff, 0x00000013,
631 	0x9270, 0xffffffff, 0x00120011,
632 	0x9274, 0xffffffff, 0x00150014,
633 	0x9278, 0xffffffff, 0x000f000e,
634 	0x927c, 0xffffffff, 0x00110010,
635 	0x9280, 0xffffffff, 0x00000014,
636 	0x9284, 0xffffffff, 0x00130012,
637 	0x9288, 0xffffffff, 0x00160015,
638 	0x928c, 0xffffffff, 0x0010000f,
639 	0x9290, 0xffffffff, 0x00120011,
640 	0x9294, 0xffffffff, 0x00000015,
641 	0x9298, 0xffffffff, 0x00140013,
642 	0x929c, 0xffffffff, 0x00170016,
643 	0x9150, 0xffffffff, 0x96940200,
644 	0x8708, 0xffffffff, 0x00900100,
645 	0xc478, 0xffffffff, 0x00000080,
646 	0xc404, 0xffffffff, 0x0020003f,
647 	0x30, 0xffffffff, 0x0000001c,
648 	0x34, 0x000f0000, 0x000f0000,
649 	0x160c, 0xffffffff, 0x00000100,
650 	0x1024, 0xffffffff, 0x00000100,
651 	0x102c, 0x00000101, 0x00000000,
652 	0x20a8, 0xffffffff, 0x00000104,
653 	0x264c, 0x000c0000, 0x000c0000,
654 	0x2648, 0x000c0000, 0x000c0000,
655 	0x55e4, 0xff000fff, 0x00000100,
656 	0x55e8, 0x00000001, 0x00000001,
657 	0x2f50, 0x00000001, 0x00000001,
658 	0x30cc, 0xc0000fff, 0x00000104,
659 	0xc1e4, 0x00000001, 0x00000001,
660 	0xd0c0, 0xfffffff0, 0x00000100,
661 	0xd8c0, 0xfffffff0, 0x00000100
662 };
663 
664 static const u32 pitcairn_mgcg_cgcg_init[] =
665 {
666 	0xc400, 0xffffffff, 0xfffffffc,
667 	0x802c, 0xffffffff, 0xe0000000,
668 	0x9a60, 0xffffffff, 0x00000100,
669 	0x92a4, 0xffffffff, 0x00000100,
670 	0xc164, 0xffffffff, 0x00000100,
671 	0x9774, 0xffffffff, 0x00000100,
672 	0x8984, 0xffffffff, 0x06000100,
673 	0x8a18, 0xffffffff, 0x00000100,
674 	0x92a0, 0xffffffff, 0x00000100,
675 	0xc380, 0xffffffff, 0x00000100,
676 	0x8b28, 0xffffffff, 0x00000100,
677 	0x9144, 0xffffffff, 0x00000100,
678 	0x8d88, 0xffffffff, 0x00000100,
679 	0x8d8c, 0xffffffff, 0x00000100,
680 	0x9030, 0xffffffff, 0x00000100,
681 	0x9034, 0xffffffff, 0x00000100,
682 	0x9038, 0xffffffff, 0x00000100,
683 	0x903c, 0xffffffff, 0x00000100,
684 	0xad80, 0xffffffff, 0x00000100,
685 	0xac54, 0xffffffff, 0x00000100,
686 	0x897c, 0xffffffff, 0x06000100,
687 	0x9868, 0xffffffff, 0x00000100,
688 	0x9510, 0xffffffff, 0x00000100,
689 	0xaf04, 0xffffffff, 0x00000100,
690 	0xae04, 0xffffffff, 0x00000100,
691 	0x949c, 0xffffffff, 0x00000100,
692 	0x802c, 0xffffffff, 0xe0000000,
693 	0x9160, 0xffffffff, 0x00010000,
694 	0x9164, 0xffffffff, 0x00030002,
695 	0x9168, 0xffffffff, 0x00040007,
696 	0x916c, 0xffffffff, 0x00060005,
697 	0x9170, 0xffffffff, 0x00090008,
698 	0x9174, 0xffffffff, 0x00020001,
699 	0x9178, 0xffffffff, 0x00040003,
700 	0x917c, 0xffffffff, 0x00000007,
701 	0x9180, 0xffffffff, 0x00060005,
702 	0x9184, 0xffffffff, 0x00090008,
703 	0x9188, 0xffffffff, 0x00030002,
704 	0x918c, 0xffffffff, 0x00050004,
705 	0x9190, 0xffffffff, 0x00000008,
706 	0x9194, 0xffffffff, 0x00070006,
707 	0x9198, 0xffffffff, 0x000a0009,
708 	0x919c, 0xffffffff, 0x00040003,
709 	0x91a0, 0xffffffff, 0x00060005,
710 	0x91a4, 0xffffffff, 0x00000009,
711 	0x91a8, 0xffffffff, 0x00080007,
712 	0x91ac, 0xffffffff, 0x000b000a,
713 	0x91b0, 0xffffffff, 0x00050004,
714 	0x91b4, 0xffffffff, 0x00070006,
715 	0x91b8, 0xffffffff, 0x0008000b,
716 	0x91bc, 0xffffffff, 0x000a0009,
717 	0x91c0, 0xffffffff, 0x000d000c,
718 	0x9200, 0xffffffff, 0x00090008,
719 	0x9204, 0xffffffff, 0x000b000a,
720 	0x9208, 0xffffffff, 0x000c000f,
721 	0x920c, 0xffffffff, 0x000e000d,
722 	0x9210, 0xffffffff, 0x00110010,
723 	0x9214, 0xffffffff, 0x000a0009,
724 	0x9218, 0xffffffff, 0x000c000b,
725 	0x921c, 0xffffffff, 0x0000000f,
726 	0x9220, 0xffffffff, 0x000e000d,
727 	0x9224, 0xffffffff, 0x00110010,
728 	0x9228, 0xffffffff, 0x000b000a,
729 	0x922c, 0xffffffff, 0x000d000c,
730 	0x9230, 0xffffffff, 0x00000010,
731 	0x9234, 0xffffffff, 0x000f000e,
732 	0x9238, 0xffffffff, 0x00120011,
733 	0x923c, 0xffffffff, 0x000c000b,
734 	0x9240, 0xffffffff, 0x000e000d,
735 	0x9244, 0xffffffff, 0x00000011,
736 	0x9248, 0xffffffff, 0x0010000f,
737 	0x924c, 0xffffffff, 0x00130012,
738 	0x9250, 0xffffffff, 0x000d000c,
739 	0x9254, 0xffffffff, 0x000f000e,
740 	0x9258, 0xffffffff, 0x00100013,
741 	0x925c, 0xffffffff, 0x00120011,
742 	0x9260, 0xffffffff, 0x00150014,
743 	0x9150, 0xffffffff, 0x96940200,
744 	0x8708, 0xffffffff, 0x00900100,
745 	0xc478, 0xffffffff, 0x00000080,
746 	0xc404, 0xffffffff, 0x0020003f,
747 	0x30, 0xffffffff, 0x0000001c,
748 	0x34, 0x000f0000, 0x000f0000,
749 	0x160c, 0xffffffff, 0x00000100,
750 	0x1024, 0xffffffff, 0x00000100,
751 	0x102c, 0x00000101, 0x00000000,
752 	0x20a8, 0xffffffff, 0x00000104,
753 	0x55e4, 0xff000fff, 0x00000100,
754 	0x55e8, 0x00000001, 0x00000001,
755 	0x2f50, 0x00000001, 0x00000001,
756 	0x30cc, 0xc0000fff, 0x00000104,
757 	0xc1e4, 0x00000001, 0x00000001,
758 	0xd0c0, 0xfffffff0, 0x00000100,
759 	0xd8c0, 0xfffffff0, 0x00000100
760 };
761 
762 static const u32 verde_mgcg_cgcg_init[] =
763 {
764 	0xc400, 0xffffffff, 0xfffffffc,
765 	0x802c, 0xffffffff, 0xe0000000,
766 	0x9a60, 0xffffffff, 0x00000100,
767 	0x92a4, 0xffffffff, 0x00000100,
768 	0xc164, 0xffffffff, 0x00000100,
769 	0x9774, 0xffffffff, 0x00000100,
770 	0x8984, 0xffffffff, 0x06000100,
771 	0x8a18, 0xffffffff, 0x00000100,
772 	0x92a0, 0xffffffff, 0x00000100,
773 	0xc380, 0xffffffff, 0x00000100,
774 	0x8b28, 0xffffffff, 0x00000100,
775 	0x9144, 0xffffffff, 0x00000100,
776 	0x8d88, 0xffffffff, 0x00000100,
777 	0x8d8c, 0xffffffff, 0x00000100,
778 	0x9030, 0xffffffff, 0x00000100,
779 	0x9034, 0xffffffff, 0x00000100,
780 	0x9038, 0xffffffff, 0x00000100,
781 	0x903c, 0xffffffff, 0x00000100,
782 	0xad80, 0xffffffff, 0x00000100,
783 	0xac54, 0xffffffff, 0x00000100,
784 	0x897c, 0xffffffff, 0x06000100,
785 	0x9868, 0xffffffff, 0x00000100,
786 	0x9510, 0xffffffff, 0x00000100,
787 	0xaf04, 0xffffffff, 0x00000100,
788 	0xae04, 0xffffffff, 0x00000100,
789 	0x949c, 0xffffffff, 0x00000100,
790 	0x802c, 0xffffffff, 0xe0000000,
791 	0x9160, 0xffffffff, 0x00010000,
792 	0x9164, 0xffffffff, 0x00030002,
793 	0x9168, 0xffffffff, 0x00040007,
794 	0x916c, 0xffffffff, 0x00060005,
795 	0x9170, 0xffffffff, 0x00090008,
796 	0x9174, 0xffffffff, 0x00020001,
797 	0x9178, 0xffffffff, 0x00040003,
798 	0x917c, 0xffffffff, 0x00000007,
799 	0x9180, 0xffffffff, 0x00060005,
800 	0x9184, 0xffffffff, 0x00090008,
801 	0x9188, 0xffffffff, 0x00030002,
802 	0x918c, 0xffffffff, 0x00050004,
803 	0x9190, 0xffffffff, 0x00000008,
804 	0x9194, 0xffffffff, 0x00070006,
805 	0x9198, 0xffffffff, 0x000a0009,
806 	0x919c, 0xffffffff, 0x00040003,
807 	0x91a0, 0xffffffff, 0x00060005,
808 	0x91a4, 0xffffffff, 0x00000009,
809 	0x91a8, 0xffffffff, 0x00080007,
810 	0x91ac, 0xffffffff, 0x000b000a,
811 	0x91b0, 0xffffffff, 0x00050004,
812 	0x91b4, 0xffffffff, 0x00070006,
813 	0x91b8, 0xffffffff, 0x0008000b,
814 	0x91bc, 0xffffffff, 0x000a0009,
815 	0x91c0, 0xffffffff, 0x000d000c,
816 	0x9200, 0xffffffff, 0x00090008,
817 	0x9204, 0xffffffff, 0x000b000a,
818 	0x9208, 0xffffffff, 0x000c000f,
819 	0x920c, 0xffffffff, 0x000e000d,
820 	0x9210, 0xffffffff, 0x00110010,
821 	0x9214, 0xffffffff, 0x000a0009,
822 	0x9218, 0xffffffff, 0x000c000b,
823 	0x921c, 0xffffffff, 0x0000000f,
824 	0x9220, 0xffffffff, 0x000e000d,
825 	0x9224, 0xffffffff, 0x00110010,
826 	0x9228, 0xffffffff, 0x000b000a,
827 	0x922c, 0xffffffff, 0x000d000c,
828 	0x9230, 0xffffffff, 0x00000010,
829 	0x9234, 0xffffffff, 0x000f000e,
830 	0x9238, 0xffffffff, 0x00120011,
831 	0x923c, 0xffffffff, 0x000c000b,
832 	0x9240, 0xffffffff, 0x000e000d,
833 	0x9244, 0xffffffff, 0x00000011,
834 	0x9248, 0xffffffff, 0x0010000f,
835 	0x924c, 0xffffffff, 0x00130012,
836 	0x9250, 0xffffffff, 0x000d000c,
837 	0x9254, 0xffffffff, 0x000f000e,
838 	0x9258, 0xffffffff, 0x00100013,
839 	0x925c, 0xffffffff, 0x00120011,
840 	0x9260, 0xffffffff, 0x00150014,
841 	0x9150, 0xffffffff, 0x96940200,
842 	0x8708, 0xffffffff, 0x00900100,
843 	0xc478, 0xffffffff, 0x00000080,
844 	0xc404, 0xffffffff, 0x0020003f,
845 	0x30, 0xffffffff, 0x0000001c,
846 	0x34, 0x000f0000, 0x000f0000,
847 	0x160c, 0xffffffff, 0x00000100,
848 	0x1024, 0xffffffff, 0x00000100,
849 	0x102c, 0x00000101, 0x00000000,
850 	0x20a8, 0xffffffff, 0x00000104,
851 	0x264c, 0x000c0000, 0x000c0000,
852 	0x2648, 0x000c0000, 0x000c0000,
853 	0x55e4, 0xff000fff, 0x00000100,
854 	0x55e8, 0x00000001, 0x00000001,
855 	0x2f50, 0x00000001, 0x00000001,
856 	0x30cc, 0xc0000fff, 0x00000104,
857 	0xc1e4, 0x00000001, 0x00000001,
858 	0xd0c0, 0xfffffff0, 0x00000100,
859 	0xd8c0, 0xfffffff0, 0x00000100
860 };
861 
862 static const u32 oland_mgcg_cgcg_init[] =
863 {
864 	0xc400, 0xffffffff, 0xfffffffc,
865 	0x802c, 0xffffffff, 0xe0000000,
866 	0x9a60, 0xffffffff, 0x00000100,
867 	0x92a4, 0xffffffff, 0x00000100,
868 	0xc164, 0xffffffff, 0x00000100,
869 	0x9774, 0xffffffff, 0x00000100,
870 	0x8984, 0xffffffff, 0x06000100,
871 	0x8a18, 0xffffffff, 0x00000100,
872 	0x92a0, 0xffffffff, 0x00000100,
873 	0xc380, 0xffffffff, 0x00000100,
874 	0x8b28, 0xffffffff, 0x00000100,
875 	0x9144, 0xffffffff, 0x00000100,
876 	0x8d88, 0xffffffff, 0x00000100,
877 	0x8d8c, 0xffffffff, 0x00000100,
878 	0x9030, 0xffffffff, 0x00000100,
879 	0x9034, 0xffffffff, 0x00000100,
880 	0x9038, 0xffffffff, 0x00000100,
881 	0x903c, 0xffffffff, 0x00000100,
882 	0xad80, 0xffffffff, 0x00000100,
883 	0xac54, 0xffffffff, 0x00000100,
884 	0x897c, 0xffffffff, 0x06000100,
885 	0x9868, 0xffffffff, 0x00000100,
886 	0x9510, 0xffffffff, 0x00000100,
887 	0xaf04, 0xffffffff, 0x00000100,
888 	0xae04, 0xffffffff, 0x00000100,
889 	0x949c, 0xffffffff, 0x00000100,
890 	0x802c, 0xffffffff, 0xe0000000,
891 	0x9160, 0xffffffff, 0x00010000,
892 	0x9164, 0xffffffff, 0x00030002,
893 	0x9168, 0xffffffff, 0x00040007,
894 	0x916c, 0xffffffff, 0x00060005,
895 	0x9170, 0xffffffff, 0x00090008,
896 	0x9174, 0xffffffff, 0x00020001,
897 	0x9178, 0xffffffff, 0x00040003,
898 	0x917c, 0xffffffff, 0x00000007,
899 	0x9180, 0xffffffff, 0x00060005,
900 	0x9184, 0xffffffff, 0x00090008,
901 	0x9188, 0xffffffff, 0x00030002,
902 	0x918c, 0xffffffff, 0x00050004,
903 	0x9190, 0xffffffff, 0x00000008,
904 	0x9194, 0xffffffff, 0x00070006,
905 	0x9198, 0xffffffff, 0x000a0009,
906 	0x919c, 0xffffffff, 0x00040003,
907 	0x91a0, 0xffffffff, 0x00060005,
908 	0x91a4, 0xffffffff, 0x00000009,
909 	0x91a8, 0xffffffff, 0x00080007,
910 	0x91ac, 0xffffffff, 0x000b000a,
911 	0x91b0, 0xffffffff, 0x00050004,
912 	0x91b4, 0xffffffff, 0x00070006,
913 	0x91b8, 0xffffffff, 0x0008000b,
914 	0x91bc, 0xffffffff, 0x000a0009,
915 	0x91c0, 0xffffffff, 0x000d000c,
916 	0x91c4, 0xffffffff, 0x00060005,
917 	0x91c8, 0xffffffff, 0x00080007,
918 	0x91cc, 0xffffffff, 0x0000000b,
919 	0x91d0, 0xffffffff, 0x000a0009,
920 	0x91d4, 0xffffffff, 0x000d000c,
921 	0x9150, 0xffffffff, 0x96940200,
922 	0x8708, 0xffffffff, 0x00900100,
923 	0xc478, 0xffffffff, 0x00000080,
924 	0xc404, 0xffffffff, 0x0020003f,
925 	0x30, 0xffffffff, 0x0000001c,
926 	0x34, 0x000f0000, 0x000f0000,
927 	0x160c, 0xffffffff, 0x00000100,
928 	0x1024, 0xffffffff, 0x00000100,
929 	0x102c, 0x00000101, 0x00000000,
930 	0x20a8, 0xffffffff, 0x00000104,
931 	0x264c, 0x000c0000, 0x000c0000,
932 	0x2648, 0x000c0000, 0x000c0000,
933 	0x55e4, 0xff000fff, 0x00000100,
934 	0x55e8, 0x00000001, 0x00000001,
935 	0x2f50, 0x00000001, 0x00000001,
936 	0x30cc, 0xc0000fff, 0x00000104,
937 	0xc1e4, 0x00000001, 0x00000001,
938 	0xd0c0, 0xfffffff0, 0x00000100,
939 	0xd8c0, 0xfffffff0, 0x00000100
940 };
941 
942 static const u32 hainan_mgcg_cgcg_init[] =
943 {
944 	0xc400, 0xffffffff, 0xfffffffc,
945 	0x802c, 0xffffffff, 0xe0000000,
946 	0x9a60, 0xffffffff, 0x00000100,
947 	0x92a4, 0xffffffff, 0x00000100,
948 	0xc164, 0xffffffff, 0x00000100,
949 	0x9774, 0xffffffff, 0x00000100,
950 	0x8984, 0xffffffff, 0x06000100,
951 	0x8a18, 0xffffffff, 0x00000100,
952 	0x92a0, 0xffffffff, 0x00000100,
953 	0xc380, 0xffffffff, 0x00000100,
954 	0x8b28, 0xffffffff, 0x00000100,
955 	0x9144, 0xffffffff, 0x00000100,
956 	0x8d88, 0xffffffff, 0x00000100,
957 	0x8d8c, 0xffffffff, 0x00000100,
958 	0x9030, 0xffffffff, 0x00000100,
959 	0x9034, 0xffffffff, 0x00000100,
960 	0x9038, 0xffffffff, 0x00000100,
961 	0x903c, 0xffffffff, 0x00000100,
962 	0xad80, 0xffffffff, 0x00000100,
963 	0xac54, 0xffffffff, 0x00000100,
964 	0x897c, 0xffffffff, 0x06000100,
965 	0x9868, 0xffffffff, 0x00000100,
966 	0x9510, 0xffffffff, 0x00000100,
967 	0xaf04, 0xffffffff, 0x00000100,
968 	0xae04, 0xffffffff, 0x00000100,
969 	0x949c, 0xffffffff, 0x00000100,
970 	0x802c, 0xffffffff, 0xe0000000,
971 	0x9160, 0xffffffff, 0x00010000,
972 	0x9164, 0xffffffff, 0x00030002,
973 	0x9168, 0xffffffff, 0x00040007,
974 	0x916c, 0xffffffff, 0x00060005,
975 	0x9170, 0xffffffff, 0x00090008,
976 	0x9174, 0xffffffff, 0x00020001,
977 	0x9178, 0xffffffff, 0x00040003,
978 	0x917c, 0xffffffff, 0x00000007,
979 	0x9180, 0xffffffff, 0x00060005,
980 	0x9184, 0xffffffff, 0x00090008,
981 	0x9188, 0xffffffff, 0x00030002,
982 	0x918c, 0xffffffff, 0x00050004,
983 	0x9190, 0xffffffff, 0x00000008,
984 	0x9194, 0xffffffff, 0x00070006,
985 	0x9198, 0xffffffff, 0x000a0009,
986 	0x919c, 0xffffffff, 0x00040003,
987 	0x91a0, 0xffffffff, 0x00060005,
988 	0x91a4, 0xffffffff, 0x00000009,
989 	0x91a8, 0xffffffff, 0x00080007,
990 	0x91ac, 0xffffffff, 0x000b000a,
991 	0x91b0, 0xffffffff, 0x00050004,
992 	0x91b4, 0xffffffff, 0x00070006,
993 	0x91b8, 0xffffffff, 0x0008000b,
994 	0x91bc, 0xffffffff, 0x000a0009,
995 	0x91c0, 0xffffffff, 0x000d000c,
996 	0x91c4, 0xffffffff, 0x00060005,
997 	0x91c8, 0xffffffff, 0x00080007,
998 	0x91cc, 0xffffffff, 0x0000000b,
999 	0x91d0, 0xffffffff, 0x000a0009,
1000 	0x91d4, 0xffffffff, 0x000d000c,
1001 	0x9150, 0xffffffff, 0x96940200,
1002 	0x8708, 0xffffffff, 0x00900100,
1003 	0xc478, 0xffffffff, 0x00000080,
1004 	0xc404, 0xffffffff, 0x0020003f,
1005 	0x30, 0xffffffff, 0x0000001c,
1006 	0x34, 0x000f0000, 0x000f0000,
1007 	0x160c, 0xffffffff, 0x00000100,
1008 	0x1024, 0xffffffff, 0x00000100,
1009 	0x20a8, 0xffffffff, 0x00000104,
1010 	0x264c, 0x000c0000, 0x000c0000,
1011 	0x2648, 0x000c0000, 0x000c0000,
1012 	0x2f50, 0x00000001, 0x00000001,
1013 	0x30cc, 0xc0000fff, 0x00000104,
1014 	0xc1e4, 0x00000001, 0x00000001,
1015 	0xd0c0, 0xfffffff0, 0x00000100,
1016 	0xd8c0, 0xfffffff0, 0x00000100
1017 };
1018 
1019 static u32 verde_pg_init[] =
1020 {
1021 	0x353c, 0xffffffff, 0x40000,
1022 	0x3538, 0xffffffff, 0x200010ff,
1023 	0x353c, 0xffffffff, 0x0,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x0,
1026 	0x353c, 0xffffffff, 0x0,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x7007,
1029 	0x3538, 0xffffffff, 0x300010ff,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x0,
1033 	0x353c, 0xffffffff, 0x0,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x400000,
1036 	0x3538, 0xffffffff, 0x100010ff,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x0,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x120200,
1043 	0x3538, 0xffffffff, 0x500010ff,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x0,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x1e1e16,
1050 	0x3538, 0xffffffff, 0x600010ff,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x0,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x171f1e,
1057 	0x3538, 0xffffffff, 0x700010ff,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x0,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x3538, 0xffffffff, 0x9ff,
1065 	0x3500, 0xffffffff, 0x0,
1066 	0x3504, 0xffffffff, 0x10000800,
1067 	0x3504, 0xffffffff, 0xf,
1068 	0x3504, 0xffffffff, 0xf,
1069 	0x3500, 0xffffffff, 0x4,
1070 	0x3504, 0xffffffff, 0x1000051e,
1071 	0x3504, 0xffffffff, 0xffff,
1072 	0x3504, 0xffffffff, 0xffff,
1073 	0x3500, 0xffffffff, 0x8,
1074 	0x3504, 0xffffffff, 0x80500,
1075 	0x3500, 0xffffffff, 0x12,
1076 	0x3504, 0xffffffff, 0x9050c,
1077 	0x3500, 0xffffffff, 0x1d,
1078 	0x3504, 0xffffffff, 0xb052c,
1079 	0x3500, 0xffffffff, 0x2a,
1080 	0x3504, 0xffffffff, 0x1053e,
1081 	0x3500, 0xffffffff, 0x2d,
1082 	0x3504, 0xffffffff, 0x10546,
1083 	0x3500, 0xffffffff, 0x30,
1084 	0x3504, 0xffffffff, 0xa054e,
1085 	0x3500, 0xffffffff, 0x3c,
1086 	0x3504, 0xffffffff, 0x1055f,
1087 	0x3500, 0xffffffff, 0x3f,
1088 	0x3504, 0xffffffff, 0x10567,
1089 	0x3500, 0xffffffff, 0x42,
1090 	0x3504, 0xffffffff, 0x1056f,
1091 	0x3500, 0xffffffff, 0x45,
1092 	0x3504, 0xffffffff, 0x10572,
1093 	0x3500, 0xffffffff, 0x48,
1094 	0x3504, 0xffffffff, 0x20575,
1095 	0x3500, 0xffffffff, 0x4c,
1096 	0x3504, 0xffffffff, 0x190801,
1097 	0x3500, 0xffffffff, 0x67,
1098 	0x3504, 0xffffffff, 0x1082a,
1099 	0x3500, 0xffffffff, 0x6a,
1100 	0x3504, 0xffffffff, 0x1b082d,
1101 	0x3500, 0xffffffff, 0x87,
1102 	0x3504, 0xffffffff, 0x310851,
1103 	0x3500, 0xffffffff, 0xba,
1104 	0x3504, 0xffffffff, 0x891,
1105 	0x3500, 0xffffffff, 0xbc,
1106 	0x3504, 0xffffffff, 0x893,
1107 	0x3500, 0xffffffff, 0xbe,
1108 	0x3504, 0xffffffff, 0x20895,
1109 	0x3500, 0xffffffff, 0xc2,
1110 	0x3504, 0xffffffff, 0x20899,
1111 	0x3500, 0xffffffff, 0xc6,
1112 	0x3504, 0xffffffff, 0x2089d,
1113 	0x3500, 0xffffffff, 0xca,
1114 	0x3504, 0xffffffff, 0x8a1,
1115 	0x3500, 0xffffffff, 0xcc,
1116 	0x3504, 0xffffffff, 0x8a3,
1117 	0x3500, 0xffffffff, 0xce,
1118 	0x3504, 0xffffffff, 0x308a5,
1119 	0x3500, 0xffffffff, 0xd3,
1120 	0x3504, 0xffffffff, 0x6d08cd,
1121 	0x3500, 0xffffffff, 0x142,
1122 	0x3504, 0xffffffff, 0x2000095a,
1123 	0x3504, 0xffffffff, 0x1,
1124 	0x3500, 0xffffffff, 0x144,
1125 	0x3504, 0xffffffff, 0x301f095b,
1126 	0x3500, 0xffffffff, 0x165,
1127 	0x3504, 0xffffffff, 0xc094d,
1128 	0x3500, 0xffffffff, 0x173,
1129 	0x3504, 0xffffffff, 0xf096d,
1130 	0x3500, 0xffffffff, 0x184,
1131 	0x3504, 0xffffffff, 0x15097f,
1132 	0x3500, 0xffffffff, 0x19b,
1133 	0x3504, 0xffffffff, 0xc0998,
1134 	0x3500, 0xffffffff, 0x1a9,
1135 	0x3504, 0xffffffff, 0x409a7,
1136 	0x3500, 0xffffffff, 0x1af,
1137 	0x3504, 0xffffffff, 0xcdc,
1138 	0x3500, 0xffffffff, 0x1b1,
1139 	0x3504, 0xffffffff, 0x800,
1140 	0x3508, 0xffffffff, 0x6c9b2000,
1141 	0x3510, 0xfc00, 0x2000,
1142 	0x3544, 0xffffffff, 0xfc0,
1143 	0x28d4, 0x00000100, 0x100
1144 };
1145 
1146 static void si_init_golden_registers(struct radeon_device *rdev)
1147 {
1148 	switch (rdev->family) {
1149 	case CHIP_TAHITI:
1150 		radeon_program_register_sequence(rdev,
1151 						 tahiti_golden_registers,
1152 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1153 		radeon_program_register_sequence(rdev,
1154 						 tahiti_golden_rlc_registers,
1155 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1156 		radeon_program_register_sequence(rdev,
1157 						 tahiti_mgcg_cgcg_init,
1158 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1159 		radeon_program_register_sequence(rdev,
1160 						 tahiti_golden_registers2,
1161 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1162 		break;
1163 	case CHIP_PITCAIRN:
1164 		radeon_program_register_sequence(rdev,
1165 						 pitcairn_golden_registers,
1166 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1167 		radeon_program_register_sequence(rdev,
1168 						 pitcairn_golden_rlc_registers,
1169 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1170 		radeon_program_register_sequence(rdev,
1171 						 pitcairn_mgcg_cgcg_init,
1172 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1173 		break;
1174 	case CHIP_VERDE:
1175 		radeon_program_register_sequence(rdev,
1176 						 verde_golden_registers,
1177 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1178 		radeon_program_register_sequence(rdev,
1179 						 verde_golden_rlc_registers,
1180 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1181 		radeon_program_register_sequence(rdev,
1182 						 verde_mgcg_cgcg_init,
1183 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1184 		radeon_program_register_sequence(rdev,
1185 						 verde_pg_init,
1186 						 (const u32)ARRAY_SIZE(verde_pg_init));
1187 		break;
1188 	case CHIP_OLAND:
1189 		radeon_program_register_sequence(rdev,
1190 						 oland_golden_registers,
1191 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1192 		radeon_program_register_sequence(rdev,
1193 						 oland_golden_rlc_registers,
1194 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1195 		radeon_program_register_sequence(rdev,
1196 						 oland_mgcg_cgcg_init,
1197 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1198 		break;
1199 	case CHIP_HAINAN:
1200 		radeon_program_register_sequence(rdev,
1201 						 hainan_golden_registers,
1202 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1203 		radeon_program_register_sequence(rdev,
1204 						 hainan_golden_registers2,
1205 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1206 		radeon_program_register_sequence(rdev,
1207 						 hainan_mgcg_cgcg_init,
1208 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1209 		break;
1210 	default:
1211 		break;
1212 	}
1213 }
1214 
1215 #define PCIE_BUS_CLK                10000
1216 #define TCLK                        (PCIE_BUS_CLK / 10)
1217 
1218 /**
1219  * si_get_xclk - get the xclk
1220  *
1221  * @rdev: radeon_device pointer
1222  *
1223  * Returns the reference clock used by the gfx engine
1224  * (SI).
1225  */
1226 u32 si_get_xclk(struct radeon_device *rdev)
1227 {
1228         u32 reference_clock = rdev->clock.spll.reference_freq;
1229 	u32 tmp;
1230 
1231 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1232 	if (tmp & MUX_TCLK_TO_XCLK)
1233 		return TCLK;
1234 
1235 	tmp = RREG32(CG_CLKPIN_CNTL);
1236 	if (tmp & XTALIN_DIVIDE)
1237 		return reference_clock / 4;
1238 
1239 	return reference_clock;
1240 }
1241 
1242 /* get temperature in millidegrees */
1243 int si_get_temp(struct radeon_device *rdev)
1244 {
1245 	u32 temp;
1246 	int actual_temp = 0;
1247 
1248 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1249 		CTF_TEMP_SHIFT;
1250 
1251 	if (temp & 0x200)
1252 		actual_temp = 255;
1253 	else
1254 		actual_temp = temp & 0x1ff;
1255 
1256 	actual_temp = (actual_temp * 1000);
1257 
1258 	return actual_temp;
1259 }
1260 
1261 #define TAHITI_IO_MC_REGS_SIZE 36
1262 
1263 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1264 	{0x0000006f, 0x03044000},
1265 	{0x00000070, 0x0480c018},
1266 	{0x00000071, 0x00000040},
1267 	{0x00000072, 0x01000000},
1268 	{0x00000074, 0x000000ff},
1269 	{0x00000075, 0x00143400},
1270 	{0x00000076, 0x08ec0800},
1271 	{0x00000077, 0x040000cc},
1272 	{0x00000079, 0x00000000},
1273 	{0x0000007a, 0x21000409},
1274 	{0x0000007c, 0x00000000},
1275 	{0x0000007d, 0xe8000000},
1276 	{0x0000007e, 0x044408a8},
1277 	{0x0000007f, 0x00000003},
1278 	{0x00000080, 0x00000000},
1279 	{0x00000081, 0x01000000},
1280 	{0x00000082, 0x02000000},
1281 	{0x00000083, 0x00000000},
1282 	{0x00000084, 0xe3f3e4f4},
1283 	{0x00000085, 0x00052024},
1284 	{0x00000087, 0x00000000},
1285 	{0x00000088, 0x66036603},
1286 	{0x00000089, 0x01000000},
1287 	{0x0000008b, 0x1c0a0000},
1288 	{0x0000008c, 0xff010000},
1289 	{0x0000008e, 0xffffefff},
1290 	{0x0000008f, 0xfff3efff},
1291 	{0x00000090, 0xfff3efbf},
1292 	{0x00000094, 0x00101101},
1293 	{0x00000095, 0x00000fff},
1294 	{0x00000096, 0x00116fff},
1295 	{0x00000097, 0x60010000},
1296 	{0x00000098, 0x10010000},
1297 	{0x00000099, 0x00006000},
1298 	{0x0000009a, 0x00001000},
1299 	{0x0000009f, 0x00a77400}
1300 };
1301 
1302 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1303 	{0x0000006f, 0x03044000},
1304 	{0x00000070, 0x0480c018},
1305 	{0x00000071, 0x00000040},
1306 	{0x00000072, 0x01000000},
1307 	{0x00000074, 0x000000ff},
1308 	{0x00000075, 0x00143400},
1309 	{0x00000076, 0x08ec0800},
1310 	{0x00000077, 0x040000cc},
1311 	{0x00000079, 0x00000000},
1312 	{0x0000007a, 0x21000409},
1313 	{0x0000007c, 0x00000000},
1314 	{0x0000007d, 0xe8000000},
1315 	{0x0000007e, 0x044408a8},
1316 	{0x0000007f, 0x00000003},
1317 	{0x00000080, 0x00000000},
1318 	{0x00000081, 0x01000000},
1319 	{0x00000082, 0x02000000},
1320 	{0x00000083, 0x00000000},
1321 	{0x00000084, 0xe3f3e4f4},
1322 	{0x00000085, 0x00052024},
1323 	{0x00000087, 0x00000000},
1324 	{0x00000088, 0x66036603},
1325 	{0x00000089, 0x01000000},
1326 	{0x0000008b, 0x1c0a0000},
1327 	{0x0000008c, 0xff010000},
1328 	{0x0000008e, 0xffffefff},
1329 	{0x0000008f, 0xfff3efff},
1330 	{0x00000090, 0xfff3efbf},
1331 	{0x00000094, 0x00101101},
1332 	{0x00000095, 0x00000fff},
1333 	{0x00000096, 0x00116fff},
1334 	{0x00000097, 0x60010000},
1335 	{0x00000098, 0x10010000},
1336 	{0x00000099, 0x00006000},
1337 	{0x0000009a, 0x00001000},
1338 	{0x0000009f, 0x00a47400}
1339 };
1340 
1341 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1342 	{0x0000006f, 0x03044000},
1343 	{0x00000070, 0x0480c018},
1344 	{0x00000071, 0x00000040},
1345 	{0x00000072, 0x01000000},
1346 	{0x00000074, 0x000000ff},
1347 	{0x00000075, 0x00143400},
1348 	{0x00000076, 0x08ec0800},
1349 	{0x00000077, 0x040000cc},
1350 	{0x00000079, 0x00000000},
1351 	{0x0000007a, 0x21000409},
1352 	{0x0000007c, 0x00000000},
1353 	{0x0000007d, 0xe8000000},
1354 	{0x0000007e, 0x044408a8},
1355 	{0x0000007f, 0x00000003},
1356 	{0x00000080, 0x00000000},
1357 	{0x00000081, 0x01000000},
1358 	{0x00000082, 0x02000000},
1359 	{0x00000083, 0x00000000},
1360 	{0x00000084, 0xe3f3e4f4},
1361 	{0x00000085, 0x00052024},
1362 	{0x00000087, 0x00000000},
1363 	{0x00000088, 0x66036603},
1364 	{0x00000089, 0x01000000},
1365 	{0x0000008b, 0x1c0a0000},
1366 	{0x0000008c, 0xff010000},
1367 	{0x0000008e, 0xffffefff},
1368 	{0x0000008f, 0xfff3efff},
1369 	{0x00000090, 0xfff3efbf},
1370 	{0x00000094, 0x00101101},
1371 	{0x00000095, 0x00000fff},
1372 	{0x00000096, 0x00116fff},
1373 	{0x00000097, 0x60010000},
1374 	{0x00000098, 0x10010000},
1375 	{0x00000099, 0x00006000},
1376 	{0x0000009a, 0x00001000},
1377 	{0x0000009f, 0x00a37400}
1378 };
1379 
1380 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381 	{0x0000006f, 0x03044000},
1382 	{0x00000070, 0x0480c018},
1383 	{0x00000071, 0x00000040},
1384 	{0x00000072, 0x01000000},
1385 	{0x00000074, 0x000000ff},
1386 	{0x00000075, 0x00143400},
1387 	{0x00000076, 0x08ec0800},
1388 	{0x00000077, 0x040000cc},
1389 	{0x00000079, 0x00000000},
1390 	{0x0000007a, 0x21000409},
1391 	{0x0000007c, 0x00000000},
1392 	{0x0000007d, 0xe8000000},
1393 	{0x0000007e, 0x044408a8},
1394 	{0x0000007f, 0x00000003},
1395 	{0x00000080, 0x00000000},
1396 	{0x00000081, 0x01000000},
1397 	{0x00000082, 0x02000000},
1398 	{0x00000083, 0x00000000},
1399 	{0x00000084, 0xe3f3e4f4},
1400 	{0x00000085, 0x00052024},
1401 	{0x00000087, 0x00000000},
1402 	{0x00000088, 0x66036603},
1403 	{0x00000089, 0x01000000},
1404 	{0x0000008b, 0x1c0a0000},
1405 	{0x0000008c, 0xff010000},
1406 	{0x0000008e, 0xffffefff},
1407 	{0x0000008f, 0xfff3efff},
1408 	{0x00000090, 0xfff3efbf},
1409 	{0x00000094, 0x00101101},
1410 	{0x00000095, 0x00000fff},
1411 	{0x00000096, 0x00116fff},
1412 	{0x00000097, 0x60010000},
1413 	{0x00000098, 0x10010000},
1414 	{0x00000099, 0x00006000},
1415 	{0x0000009a, 0x00001000},
1416 	{0x0000009f, 0x00a17730}
1417 };
1418 
1419 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420 	{0x0000006f, 0x03044000},
1421 	{0x00000070, 0x0480c018},
1422 	{0x00000071, 0x00000040},
1423 	{0x00000072, 0x01000000},
1424 	{0x00000074, 0x000000ff},
1425 	{0x00000075, 0x00143400},
1426 	{0x00000076, 0x08ec0800},
1427 	{0x00000077, 0x040000cc},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x21000409},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007d, 0xe8000000},
1432 	{0x0000007e, 0x044408a8},
1433 	{0x0000007f, 0x00000003},
1434 	{0x00000080, 0x00000000},
1435 	{0x00000081, 0x01000000},
1436 	{0x00000082, 0x02000000},
1437 	{0x00000083, 0x00000000},
1438 	{0x00000084, 0xe3f3e4f4},
1439 	{0x00000085, 0x00052024},
1440 	{0x00000087, 0x00000000},
1441 	{0x00000088, 0x66036603},
1442 	{0x00000089, 0x01000000},
1443 	{0x0000008b, 0x1c0a0000},
1444 	{0x0000008c, 0xff010000},
1445 	{0x0000008e, 0xffffefff},
1446 	{0x0000008f, 0xfff3efff},
1447 	{0x00000090, 0xfff3efbf},
1448 	{0x00000094, 0x00101101},
1449 	{0x00000095, 0x00000fff},
1450 	{0x00000096, 0x00116fff},
1451 	{0x00000097, 0x60010000},
1452 	{0x00000098, 0x10010000},
1453 	{0x00000099, 0x00006000},
1454 	{0x0000009a, 0x00001000},
1455 	{0x0000009f, 0x00a07730}
1456 };
1457 
1458 /* ucode loading */
1459 static int si_mc_load_microcode(struct radeon_device *rdev)
1460 {
1461 	const __be32 *fw_data;
1462 	u32 running, blackout = 0;
1463 	u32 *io_mc_regs;
1464 	int i, ucode_size, regs_size;
1465 
1466 	if (!rdev->mc_fw)
1467 		return -EINVAL;
1468 
1469 	switch (rdev->family) {
1470 	case CHIP_TAHITI:
1471 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1472 		ucode_size = SI_MC_UCODE_SIZE;
1473 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1474 		break;
1475 	case CHIP_PITCAIRN:
1476 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1477 		ucode_size = SI_MC_UCODE_SIZE;
1478 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1479 		break;
1480 	case CHIP_VERDE:
1481 	default:
1482 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1483 		ucode_size = SI_MC_UCODE_SIZE;
1484 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1485 		break;
1486 	case CHIP_OLAND:
1487 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1488 		ucode_size = OLAND_MC_UCODE_SIZE;
1489 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1490 		break;
1491 	case CHIP_HAINAN:
1492 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1493 		ucode_size = OLAND_MC_UCODE_SIZE;
1494 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1495 		break;
1496 	}
1497 
1498 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1499 
1500 	if (running == 0) {
1501 		if (running) {
1502 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1503 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1504 		}
1505 
1506 		/* reset the engine and set to writable */
1507 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1508 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1509 
1510 		/* load mc io regs */
1511 		for (i = 0; i < regs_size; i++) {
1512 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1513 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1514 		}
1515 		/* load the MC ucode */
1516 		fw_data = (const __be32 *)rdev->mc_fw->data;
1517 		for (i = 0; i < ucode_size; i++)
1518 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1519 
1520 		/* put the engine back into the active state */
1521 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1522 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1523 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1524 
1525 		/* wait for training to complete */
1526 		for (i = 0; i < rdev->usec_timeout; i++) {
1527 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1528 				break;
1529 			udelay(1);
1530 		}
1531 		for (i = 0; i < rdev->usec_timeout; i++) {
1532 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1533 				break;
1534 			udelay(1);
1535 		}
1536 
1537 		if (running)
1538 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1539 	}
1540 
1541 	return 0;
1542 }
1543 
1544 static int si_init_microcode(struct radeon_device *rdev)
1545 {
1546 	const char *chip_name;
1547 	const char *rlc_chip_name;
1548 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1549 	size_t smc_req_size;
1550 	char fw_name[30];
1551 	int err;
1552 
1553 	DRM_DEBUG("\n");
1554 
1555 	switch (rdev->family) {
1556 	case CHIP_TAHITI:
1557 		chip_name = "TAHITI";
1558 		rlc_chip_name = "TAHITI";
1559 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1560 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1561 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1562 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1563 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1564 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1565 		break;
1566 	case CHIP_PITCAIRN:
1567 		chip_name = "PITCAIRN";
1568 		rlc_chip_name = "PITCAIRN";
1569 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1570 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1571 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1572 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1573 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1574 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1575 		break;
1576 	case CHIP_VERDE:
1577 		chip_name = "VERDE";
1578 		rlc_chip_name = "VERDE";
1579 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1580 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1581 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1582 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1583 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1584 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1585 		break;
1586 	case CHIP_OLAND:
1587 		chip_name = "OLAND";
1588 		rlc_chip_name = "OLAND";
1589 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1590 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1591 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1592 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1593 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1594 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1595 		break;
1596 	case CHIP_HAINAN:
1597 		chip_name = "HAINAN";
1598 		rlc_chip_name = "HAINAN";
1599 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1600 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1601 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1602 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1603 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1604 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1605 		break;
1606 	default: BUG();
1607 	}
1608 
1609 	DRM_INFO("Loading %s Microcode\n", chip_name);
1610 
1611 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1612 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1613 	if (err)
1614 		goto out;
1615 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1616 		printk(KERN_ERR
1617 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1618 		       rdev->pfp_fw->datasize, fw_name);
1619 		err = -EINVAL;
1620 		goto out;
1621 	}
1622 
1623 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1624 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1625 	if (err)
1626 		goto out;
1627 	if (rdev->me_fw->datasize != me_req_size) {
1628 		printk(KERN_ERR
1629 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1630 		       rdev->me_fw->datasize, fw_name);
1631 		err = -EINVAL;
1632 	}
1633 
1634 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1635 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1636 	if (err)
1637 		goto out;
1638 	if (rdev->ce_fw->datasize != ce_req_size) {
1639 		printk(KERN_ERR
1640 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1641 		       rdev->ce_fw->datasize, fw_name);
1642 		err = -EINVAL;
1643 	}
1644 
1645 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
1646 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1647 	if (err)
1648 		goto out;
1649 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1650 		printk(KERN_ERR
1651 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1652 		       rdev->rlc_fw->datasize, fw_name);
1653 		err = -EINVAL;
1654 	}
1655 
1656 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1657 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1658 	if (err)
1659 		goto out;
1660 	if (rdev->mc_fw->datasize != mc_req_size) {
1661 		printk(KERN_ERR
1662 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1663 		       rdev->mc_fw->datasize, fw_name);
1664 		err = -EINVAL;
1665 	}
1666 
1667 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1668 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1669 	if (err) {
1670 		printk(KERN_ERR
1671 		       "smc: error loading firmware \"%s\"\n",
1672 		       fw_name);
1673 		release_firmware(rdev->smc_fw);
1674 		rdev->smc_fw = NULL;
1675 		err = 0;
1676 	} else if (rdev->smc_fw->datasize != smc_req_size) {
1677 		printk(KERN_ERR
1678 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1679 		       rdev->smc_fw->datasize, fw_name);
1680 		err = -EINVAL;
1681 	}
1682 
1683 out:
1684 	if (err) {
1685 		if (err != -EINVAL)
1686 			printk(KERN_ERR
1687 			       "si_cp: Failed to load firmware \"%s\"\n",
1688 			       fw_name);
1689 		release_firmware(rdev->pfp_fw);
1690 		rdev->pfp_fw = NULL;
1691 		release_firmware(rdev->me_fw);
1692 		rdev->me_fw = NULL;
1693 		release_firmware(rdev->ce_fw);
1694 		rdev->ce_fw = NULL;
1695 		release_firmware(rdev->rlc_fw);
1696 		rdev->rlc_fw = NULL;
1697 		release_firmware(rdev->mc_fw);
1698 		rdev->mc_fw = NULL;
1699 		release_firmware(rdev->smc_fw);
1700 		rdev->smc_fw = NULL;
1701 	}
1702 	return err;
1703 }
1704 
1705 /**
1706  * si_fini_microcode - drop the firmwares image references
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Drop the pfp, me, rlc, mc and ce firmware image references.
1711  * Called at driver shutdown.
1712  */
1713 static void si_fini_microcode(struct radeon_device *rdev)
1714 {
1715 	release_firmware(rdev->pfp_fw);
1716 	rdev->pfp_fw = NULL;
1717 	release_firmware(rdev->me_fw);
1718 	rdev->me_fw = NULL;
1719 	release_firmware(rdev->rlc_fw);
1720 	rdev->rlc_fw = NULL;
1721 	release_firmware(rdev->mc_fw);
1722 	rdev->mc_fw = NULL;
1723 	release_firmware(rdev->smc_fw);
1724 	rdev->smc_fw = NULL;
1725 	release_firmware(rdev->ce_fw);
1726 	rdev->ce_fw = NULL;
1727 }
1728 
1729 /* watermark setup */
1730 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1731 				   struct radeon_crtc *radeon_crtc,
1732 				   struct drm_display_mode *mode,
1733 				   struct drm_display_mode *other_mode)
1734 {
1735 	u32 tmp, buffer_alloc, i;
1736 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1737 	/*
1738 	 * Line Buffer Setup
1739 	 * There are 3 line buffers, each one shared by 2 display controllers.
1740 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1741 	 * the display controllers.  The paritioning is done via one of four
1742 	 * preset allocations specified in bits 21:20:
1743 	 *  0 - half lb
1744 	 *  2 - whole lb, other crtc must be disabled
1745 	 */
1746 	/* this can get tricky if we have two large displays on a paired group
1747 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1748 	 * non-linked crtcs for maximum line buffer allocation.
1749 	 */
1750 	if (radeon_crtc->base.enabled && mode) {
1751 		if (other_mode) {
1752 			tmp = 0; /* 1/2 */
1753 			buffer_alloc = 1;
1754 		} else {
1755 			tmp = 2; /* whole */
1756 			buffer_alloc = 2;
1757 		}
1758 	} else {
1759 		tmp = 0;
1760 		buffer_alloc = 0;
1761 	}
1762 
1763 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1764 	       DC_LB_MEMORY_CONFIG(tmp));
1765 
1766 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1767 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1768 	for (i = 0; i < rdev->usec_timeout; i++) {
1769 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1770 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1771 			break;
1772 		udelay(1);
1773 	}
1774 
1775 	if (radeon_crtc->base.enabled && mode) {
1776 		switch (tmp) {
1777 		case 0:
1778 		default:
1779 			return 4096 * 2;
1780 		case 2:
1781 			return 8192 * 2;
1782 		}
1783 	}
1784 
1785 	/* controller not enabled, so no lb used */
1786 	return 0;
1787 }
1788 
1789 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1790 {
1791 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1792 
1793 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1794 	case 0:
1795 	default:
1796 		return 1;
1797 	case 1:
1798 		return 2;
1799 	case 2:
1800 		return 4;
1801 	case 3:
1802 		return 8;
1803 	case 4:
1804 		return 3;
1805 	case 5:
1806 		return 6;
1807 	case 6:
1808 		return 10;
1809 	case 7:
1810 		return 12;
1811 	case 8:
1812 		return 16;
1813 	}
1814 }
1815 
1816 struct dce6_wm_params {
1817 	u32 dram_channels; /* number of dram channels */
1818 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1819 	u32 sclk;          /* engine clock in kHz */
1820 	u32 disp_clk;      /* display clock in kHz */
1821 	u32 src_width;     /* viewport width */
1822 	u32 active_time;   /* active display time in ns */
1823 	u32 blank_time;    /* blank time in ns */
1824 	bool interlaced;    /* mode is interlaced */
1825 	fixed20_12 vsc;    /* vertical scale ratio */
1826 	u32 num_heads;     /* number of active crtcs */
1827 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1828 	u32 lb_size;       /* line buffer allocated to pipe */
1829 	u32 vtaps;         /* vertical scaler taps */
1830 };
1831 
1832 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1833 {
1834 	/* Calculate raw DRAM Bandwidth */
1835 	fixed20_12 dram_efficiency; /* 0.7 */
1836 	fixed20_12 yclk, dram_channels, bandwidth;
1837 	fixed20_12 a;
1838 
1839 	a.full = dfixed_const(1000);
1840 	yclk.full = dfixed_const(wm->yclk);
1841 	yclk.full = dfixed_div(yclk, a);
1842 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843 	a.full = dfixed_const(10);
1844 	dram_efficiency.full = dfixed_const(7);
1845 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1846 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1847 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1848 
1849 	return dfixed_trunc(bandwidth);
1850 }
1851 
1852 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1853 {
1854 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1855 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1856 	fixed20_12 yclk, dram_channels, bandwidth;
1857 	fixed20_12 a;
1858 
1859 	a.full = dfixed_const(1000);
1860 	yclk.full = dfixed_const(wm->yclk);
1861 	yclk.full = dfixed_div(yclk, a);
1862 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1863 	a.full = dfixed_const(10);
1864 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1865 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1866 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1867 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1868 
1869 	return dfixed_trunc(bandwidth);
1870 }
1871 
1872 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1873 {
1874 	/* Calculate the display Data return Bandwidth */
1875 	fixed20_12 return_efficiency; /* 0.8 */
1876 	fixed20_12 sclk, bandwidth;
1877 	fixed20_12 a;
1878 
1879 	a.full = dfixed_const(1000);
1880 	sclk.full = dfixed_const(wm->sclk);
1881 	sclk.full = dfixed_div(sclk, a);
1882 	a.full = dfixed_const(10);
1883 	return_efficiency.full = dfixed_const(8);
1884 	return_efficiency.full = dfixed_div(return_efficiency, a);
1885 	a.full = dfixed_const(32);
1886 	bandwidth.full = dfixed_mul(a, sclk);
1887 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1888 
1889 	return dfixed_trunc(bandwidth);
1890 }
1891 
1892 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1893 {
1894 	return 32;
1895 }
1896 
1897 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1898 {
1899 	/* Calculate the DMIF Request Bandwidth */
1900 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1901 	fixed20_12 disp_clk, sclk, bandwidth;
1902 	fixed20_12 a, b1, b2;
1903 	u32 min_bandwidth;
1904 
1905 	a.full = dfixed_const(1000);
1906 	disp_clk.full = dfixed_const(wm->disp_clk);
1907 	disp_clk.full = dfixed_div(disp_clk, a);
1908 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1909 	b1.full = dfixed_mul(a, disp_clk);
1910 
1911 	a.full = dfixed_const(1000);
1912 	sclk.full = dfixed_const(wm->sclk);
1913 	sclk.full = dfixed_div(sclk, a);
1914 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1915 	b2.full = dfixed_mul(a, sclk);
1916 
1917 	a.full = dfixed_const(10);
1918 	disp_clk_request_efficiency.full = dfixed_const(8);
1919 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1920 
1921 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1922 
1923 	a.full = dfixed_const(min_bandwidth);
1924 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1925 
1926 	return dfixed_trunc(bandwidth);
1927 }
1928 
1929 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1930 {
1931 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1932 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1933 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1934 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1935 
1936 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1937 }
1938 
1939 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1940 {
1941 	/* Calculate the display mode Average Bandwidth
1942 	 * DisplayMode should contain the source and destination dimensions,
1943 	 * timing, etc.
1944 	 */
1945 	fixed20_12 bpp;
1946 	fixed20_12 line_time;
1947 	fixed20_12 src_width;
1948 	fixed20_12 bandwidth;
1949 	fixed20_12 a;
1950 
1951 	a.full = dfixed_const(1000);
1952 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1953 	line_time.full = dfixed_div(line_time, a);
1954 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1955 	src_width.full = dfixed_const(wm->src_width);
1956 	bandwidth.full = dfixed_mul(src_width, bpp);
1957 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1958 	bandwidth.full = dfixed_div(bandwidth, line_time);
1959 
1960 	return dfixed_trunc(bandwidth);
1961 }
1962 
1963 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1964 {
1965 	/* First calcualte the latency in ns */
1966 	u32 mc_latency = 2000; /* 2000 ns. */
1967 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1968 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1969 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1970 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1971 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1972 		(wm->num_heads * cursor_line_pair_return_time);
1973 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1974 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1975 	u32 tmp, dmif_size = 12288;
1976 	fixed20_12 a, b, c;
1977 
1978 	if (wm->num_heads == 0)
1979 		return 0;
1980 
1981 	a.full = dfixed_const(2);
1982 	b.full = dfixed_const(1);
1983 	if ((wm->vsc.full > a.full) ||
1984 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1985 	    (wm->vtaps >= 5) ||
1986 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1987 		max_src_lines_per_dst_line = 4;
1988 	else
1989 		max_src_lines_per_dst_line = 2;
1990 
1991 	a.full = dfixed_const(available_bandwidth);
1992 	b.full = dfixed_const(wm->num_heads);
1993 	a.full = dfixed_div(a, b);
1994 
1995 	b.full = dfixed_const(mc_latency + 512);
1996 	c.full = dfixed_const(wm->disp_clk);
1997 	b.full = dfixed_div(b, c);
1998 
1999 	c.full = dfixed_const(dmif_size);
2000 	b.full = dfixed_div(c, b);
2001 
2002 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2003 
2004 	b.full = dfixed_const(1000);
2005 	c.full = dfixed_const(wm->disp_clk);
2006 	b.full = dfixed_div(c, b);
2007 	c.full = dfixed_const(wm->bytes_per_pixel);
2008 	b.full = dfixed_mul(b, c);
2009 
2010 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2011 
2012 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2013 	b.full = dfixed_const(1000);
2014 	c.full = dfixed_const(lb_fill_bw);
2015 	b.full = dfixed_div(c, b);
2016 	a.full = dfixed_div(a, b);
2017 	line_fill_time = dfixed_trunc(a);
2018 
2019 	if (line_fill_time < wm->active_time)
2020 		return latency;
2021 	else
2022 		return latency + (line_fill_time - wm->active_time);
2023 
2024 }
2025 
2026 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2027 {
2028 	if (dce6_average_bandwidth(wm) <=
2029 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2030 		return true;
2031 	else
2032 		return false;
2033 };
2034 
2035 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2036 {
2037 	if (dce6_average_bandwidth(wm) <=
2038 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2039 		return true;
2040 	else
2041 		return false;
2042 };
2043 
2044 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2045 {
2046 	u32 lb_partitions = wm->lb_size / wm->src_width;
2047 	u32 line_time = wm->active_time + wm->blank_time;
2048 	u32 latency_tolerant_lines;
2049 	u32 latency_hiding;
2050 	fixed20_12 a;
2051 
2052 	a.full = dfixed_const(1);
2053 	if (wm->vsc.full > a.full)
2054 		latency_tolerant_lines = 1;
2055 	else {
2056 		if (lb_partitions <= (wm->vtaps + 1))
2057 			latency_tolerant_lines = 1;
2058 		else
2059 			latency_tolerant_lines = 2;
2060 	}
2061 
2062 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2063 
2064 	if (dce6_latency_watermark(wm) <= latency_hiding)
2065 		return true;
2066 	else
2067 		return false;
2068 }
2069 
2070 static void dce6_program_watermarks(struct radeon_device *rdev,
2071 					 struct radeon_crtc *radeon_crtc,
2072 					 u32 lb_size, u32 num_heads)
2073 {
2074 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2075 	struct dce6_wm_params wm_low, wm_high;
2076 	u32 dram_channels;
2077 	u32 pixel_period;
2078 	u32 line_time = 0;
2079 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2080 	u32 priority_a_mark = 0, priority_b_mark = 0;
2081 	u32 priority_a_cnt = PRIORITY_OFF;
2082 	u32 priority_b_cnt = PRIORITY_OFF;
2083 	u32 tmp, arb_control3;
2084 	fixed20_12 a, b, c;
2085 
2086 	if (radeon_crtc->base.enabled && num_heads && mode) {
2087 		pixel_period = 1000000 / (u32)mode->clock;
2088 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2089 		priority_a_cnt = 0;
2090 		priority_b_cnt = 0;
2091 
2092 		if (rdev->family == CHIP_ARUBA)
2093 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2094 		else
2095 			dram_channels = si_get_number_of_dram_channels(rdev);
2096 
2097 		/* watermark for high clocks */
2098 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2099 			wm_high.yclk =
2100 				radeon_dpm_get_mclk(rdev, false) * 10;
2101 			wm_high.sclk =
2102 				radeon_dpm_get_sclk(rdev, false) * 10;
2103 		} else {
2104 			wm_high.yclk = rdev->pm.current_mclk * 10;
2105 			wm_high.sclk = rdev->pm.current_sclk * 10;
2106 		}
2107 
2108 		wm_high.disp_clk = mode->clock;
2109 		wm_high.src_width = mode->crtc_hdisplay;
2110 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2111 		wm_high.blank_time = line_time - wm_high.active_time;
2112 		wm_high.interlaced = false;
2113 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2114 			wm_high.interlaced = true;
2115 		wm_high.vsc = radeon_crtc->vsc;
2116 		wm_high.vtaps = 1;
2117 		if (radeon_crtc->rmx_type != RMX_OFF)
2118 			wm_high.vtaps = 2;
2119 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2120 		wm_high.lb_size = lb_size;
2121 		wm_high.dram_channels = dram_channels;
2122 		wm_high.num_heads = num_heads;
2123 
2124 		/* watermark for low clocks */
2125 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2126 			wm_low.yclk =
2127 				radeon_dpm_get_mclk(rdev, true) * 10;
2128 			wm_low.sclk =
2129 				radeon_dpm_get_sclk(rdev, true) * 10;
2130 		} else {
2131 			wm_low.yclk = rdev->pm.current_mclk * 10;
2132 			wm_low.sclk = rdev->pm.current_sclk * 10;
2133 		}
2134 
2135 		wm_low.disp_clk = mode->clock;
2136 		wm_low.src_width = mode->crtc_hdisplay;
2137 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2138 		wm_low.blank_time = line_time - wm_low.active_time;
2139 		wm_low.interlaced = false;
2140 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2141 			wm_low.interlaced = true;
2142 		wm_low.vsc = radeon_crtc->vsc;
2143 		wm_low.vtaps = 1;
2144 		if (radeon_crtc->rmx_type != RMX_OFF)
2145 			wm_low.vtaps = 2;
2146 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2147 		wm_low.lb_size = lb_size;
2148 		wm_low.dram_channels = dram_channels;
2149 		wm_low.num_heads = num_heads;
2150 
2151 		/* set for high clocks */
2152 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2153 		/* set for low clocks */
2154 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2155 
2156 		/* possibly force display priority to high */
2157 		/* should really do this at mode validation time... */
2158 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2159 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2160 		    !dce6_check_latency_hiding(&wm_high) ||
2161 		    (rdev->disp_priority == 2)) {
2162 			DRM_DEBUG_KMS("force priority to high\n");
2163 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2164 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2165 		}
2166 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2167 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2168 		    !dce6_check_latency_hiding(&wm_low) ||
2169 		    (rdev->disp_priority == 2)) {
2170 			DRM_DEBUG_KMS("force priority to high\n");
2171 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2172 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2173 		}
2174 
2175 		a.full = dfixed_const(1000);
2176 		b.full = dfixed_const(mode->clock);
2177 		b.full = dfixed_div(b, a);
2178 		c.full = dfixed_const(latency_watermark_a);
2179 		c.full = dfixed_mul(c, b);
2180 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2181 		c.full = dfixed_div(c, a);
2182 		a.full = dfixed_const(16);
2183 		c.full = dfixed_div(c, a);
2184 		priority_a_mark = dfixed_trunc(c);
2185 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2186 
2187 		a.full = dfixed_const(1000);
2188 		b.full = dfixed_const(mode->clock);
2189 		b.full = dfixed_div(b, a);
2190 		c.full = dfixed_const(latency_watermark_b);
2191 		c.full = dfixed_mul(c, b);
2192 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2193 		c.full = dfixed_div(c, a);
2194 		a.full = dfixed_const(16);
2195 		c.full = dfixed_div(c, a);
2196 		priority_b_mark = dfixed_trunc(c);
2197 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2198 	}
2199 
2200 	/* select wm A */
2201 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2202 	tmp = arb_control3;
2203 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2204 	tmp |= LATENCY_WATERMARK_MASK(1);
2205 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2206 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2207 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2208 		LATENCY_HIGH_WATERMARK(line_time)));
2209 	/* select wm B */
2210 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2211 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2212 	tmp |= LATENCY_WATERMARK_MASK(2);
2213 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2214 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2215 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2216 		LATENCY_HIGH_WATERMARK(line_time)));
2217 	/* restore original selection */
2218 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2219 
2220 	/* write the priority marks */
2221 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2222 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2223 
2224 	/* save values for DPM */
2225 	radeon_crtc->line_time = line_time;
2226 	radeon_crtc->wm_high = latency_watermark_a;
2227 	radeon_crtc->wm_low = latency_watermark_b;
2228 }
2229 
2230 void dce6_bandwidth_update(struct radeon_device *rdev)
2231 {
2232 	struct drm_display_mode *mode0 = NULL;
2233 	struct drm_display_mode *mode1 = NULL;
2234 	u32 num_heads = 0, lb_size;
2235 	int i;
2236 
2237 	radeon_update_display_priority(rdev);
2238 
2239 	for (i = 0; i < rdev->num_crtc; i++) {
2240 		if (rdev->mode_info.crtcs[i]->base.enabled)
2241 			num_heads++;
2242 	}
2243 	for (i = 0; i < rdev->num_crtc; i += 2) {
2244 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2245 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2246 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2247 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2248 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2249 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2250 	}
2251 }
2252 
2253 /*
2254  * Core functions
2255  */
2256 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2257 {
2258 	const u32 num_tile_mode_states = 32;
2259 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2260 
2261 	switch (rdev->config.si.mem_row_size_in_kb) {
2262 	case 1:
2263 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2264 		break;
2265 	case 2:
2266 	default:
2267 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2268 		break;
2269 	case 4:
2270 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2271 		break;
2272 	}
2273 
2274 	if ((rdev->family == CHIP_TAHITI) ||
2275 	    (rdev->family == CHIP_PITCAIRN)) {
2276 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2277 			switch (reg_offset) {
2278 			case 0:  /* non-AA compressed depth or any compressed stencil */
2279 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2283 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2284 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287 				break;
2288 			case 1:  /* 2xAA/4xAA compressed depth only */
2289 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2294 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297 				break;
2298 			case 2:  /* 8xAA compressed depth only */
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2304 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307 				break;
2308 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2309 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2314 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2316 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317 				break;
2318 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2323 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2324 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2327 				break;
2328 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332 						 TILE_SPLIT(split_equal_to_row_size) |
2333 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2334 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337 				break;
2338 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342 						 TILE_SPLIT(split_equal_to_row_size) |
2343 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2344 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2347 				break;
2348 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352 						 TILE_SPLIT(split_equal_to_row_size) |
2353 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2354 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2356 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357 				break;
2358 			case 8:  /* 1D and 1D Array Surfaces */
2359 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2360 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2363 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2364 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367 				break;
2368 			case 9:  /* Displayable maps. */
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2373 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2374 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377 				break;
2378 			case 10:  /* Display 8bpp. */
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2383 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2384 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2387 				break;
2388 			case 11:  /* Display 16bpp. */
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2393 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2394 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397 				break;
2398 			case 12:  /* Display 32bpp. */
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2403 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2404 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407 				break;
2408 			case 13:  /* Thin. */
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2413 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2414 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2417 				break;
2418 			case 14:  /* Thin 8 bpp. */
2419 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2423 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2424 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427 				break;
2428 			case 15:  /* Thin 16 bpp. */
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2434 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437 				break;
2438 			case 16:  /* Thin 32 bpp. */
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2444 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2447 				break;
2448 			case 17:  /* Thin 64 bpp. */
2449 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452 						 TILE_SPLIT(split_equal_to_row_size) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2454 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2457 				break;
2458 			case 21:  /* 8 bpp PRT. */
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2464 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467 				break;
2468 			case 22:  /* 16 bpp PRT */
2469 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2474 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2477 				break;
2478 			case 23:  /* 32 bpp PRT */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2483 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2487 				break;
2488 			case 24:  /* 64 bpp PRT */
2489 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2491 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2494 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2497 				break;
2498 			case 25:  /* 128 bpp PRT */
2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2501 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2503 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2504 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2507 				break;
2508 			default:
2509 				gb_tile_moden = 0;
2510 				break;
2511 			}
2512 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2513 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2514 		}
2515 	} else if ((rdev->family == CHIP_VERDE) ||
2516 		   (rdev->family == CHIP_OLAND) ||
2517 		   (rdev->family == CHIP_HAINAN)) {
2518 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2519 			switch (reg_offset) {
2520 			case 0:  /* non-AA compressed depth or any compressed stencil */
2521 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2525 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2526 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529 				break;
2530 			case 1:  /* 2xAA/4xAA compressed depth only */
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2536 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539 				break;
2540 			case 2:  /* 8xAA compressed depth only */
2541 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2545 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2546 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2549 				break;
2550 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2555 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2556 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2559 				break;
2560 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2565 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2566 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569 				break;
2570 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2571 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574 						 TILE_SPLIT(split_equal_to_row_size) |
2575 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2576 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2578 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2579 				break;
2580 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584 						 TILE_SPLIT(split_equal_to_row_size) |
2585 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2586 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589 				break;
2590 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2591 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2593 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594 						 TILE_SPLIT(split_equal_to_row_size) |
2595 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2596 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2598 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2599 				break;
2600 			case 8:  /* 1D and 1D Array Surfaces */
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2602 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2605 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2606 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609 				break;
2610 			case 9:  /* Displayable maps. */
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2616 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619 				break;
2620 			case 10:  /* Display 8bpp. */
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2625 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2626 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2629 				break;
2630 			case 11:  /* Display 16bpp. */
2631 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2636 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639 				break;
2640 			case 12:  /* Display 32bpp. */
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2645 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2646 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649 				break;
2650 			case 13:  /* Thin. */
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2655 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2656 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659 				break;
2660 			case 14:  /* Thin 8 bpp. */
2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2665 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2666 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669 				break;
2670 			case 15:  /* Thin 16 bpp. */
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2676 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679 				break;
2680 			case 16:  /* Thin 32 bpp. */
2681 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2684 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2685 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2686 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689 				break;
2690 			case 17:  /* Thin 64 bpp. */
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694 						 TILE_SPLIT(split_equal_to_row_size) |
2695 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2696 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699 				break;
2700 			case 21:  /* 8 bpp PRT. */
2701 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2706 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2707 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709 				break;
2710 			case 22:  /* 16 bpp PRT */
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2715 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2716 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2719 				break;
2720 			case 23:  /* 32 bpp PRT */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2729 				break;
2730 			case 24:  /* 64 bpp PRT */
2731 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2733 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2734 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2736 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2739 				break;
2740 			case 25:  /* 128 bpp PRT */
2741 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2743 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2744 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2745 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2746 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2749 				break;
2750 			default:
2751 				gb_tile_moden = 0;
2752 				break;
2753 			}
2754 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2755 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756 		}
2757 	} else
2758 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2759 }
2760 
2761 static void si_select_se_sh(struct radeon_device *rdev,
2762 			    u32 se_num, u32 sh_num)
2763 {
2764 	u32 data = INSTANCE_BROADCAST_WRITES;
2765 
2766 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2767 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2768 	else if (se_num == 0xffffffff)
2769 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2770 	else if (sh_num == 0xffffffff)
2771 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2772 	else
2773 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2774 	WREG32(GRBM_GFX_INDEX, data);
2775 }
2776 
2777 static u32 si_create_bitmask(u32 bit_width)
2778 {
2779 	u32 i, mask = 0;
2780 
2781 	for (i = 0; i < bit_width; i++) {
2782 		mask <<= 1;
2783 		mask |= 1;
2784 	}
2785 	return mask;
2786 }
2787 
2788 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2789 {
2790 	u32 data, mask;
2791 
2792 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2793 	if (data & 1)
2794 		data &= INACTIVE_CUS_MASK;
2795 	else
2796 		data = 0;
2797 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2798 
2799 	data >>= INACTIVE_CUS_SHIFT;
2800 
2801 	mask = si_create_bitmask(cu_per_sh);
2802 
2803 	return ~data & mask;
2804 }
2805 
2806 static void si_setup_spi(struct radeon_device *rdev,
2807 			 u32 se_num, u32 sh_per_se,
2808 			 u32 cu_per_sh)
2809 {
2810 	int i, j, k;
2811 	u32 data, mask, active_cu;
2812 
2813 	for (i = 0; i < se_num; i++) {
2814 		for (j = 0; j < sh_per_se; j++) {
2815 			si_select_se_sh(rdev, i, j);
2816 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2817 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2818 
2819 			mask = 1;
2820 			for (k = 0; k < 16; k++) {
2821 				mask <<= k;
2822 				if (active_cu & mask) {
2823 					data &= ~mask;
2824 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2825 					break;
2826 				}
2827 			}
2828 		}
2829 	}
2830 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2831 }
2832 
2833 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2834 			      u32 max_rb_num, u32 se_num,
2835 			      u32 sh_per_se)
2836 {
2837 	u32 data, mask;
2838 
2839 	data = RREG32(CC_RB_BACKEND_DISABLE);
2840 	if (data & 1)
2841 		data &= BACKEND_DISABLE_MASK;
2842 	else
2843 		data = 0;
2844 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2845 
2846 	data >>= BACKEND_DISABLE_SHIFT;
2847 
2848 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2849 
2850 	return data & mask;
2851 }
2852 
2853 static void si_setup_rb(struct radeon_device *rdev,
2854 			u32 se_num, u32 sh_per_se,
2855 			u32 max_rb_num)
2856 {
2857 	int i, j;
2858 	u32 data, mask;
2859 	u32 disabled_rbs = 0;
2860 	u32 enabled_rbs = 0;
2861 
2862 	for (i = 0; i < se_num; i++) {
2863 		for (j = 0; j < sh_per_se; j++) {
2864 			si_select_se_sh(rdev, i, j);
2865 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2866 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2867 		}
2868 	}
2869 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2870 
2871 	mask = 1;
2872 	for (i = 0; i < max_rb_num; i++) {
2873 		if (!(disabled_rbs & mask))
2874 			enabled_rbs |= mask;
2875 		mask <<= 1;
2876 	}
2877 
2878 	for (i = 0; i < se_num; i++) {
2879 		si_select_se_sh(rdev, i, 0xffffffff);
2880 		data = 0;
2881 		for (j = 0; j < sh_per_se; j++) {
2882 			switch (enabled_rbs & 3) {
2883 			case 1:
2884 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2885 				break;
2886 			case 2:
2887 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2888 				break;
2889 			case 3:
2890 			default:
2891 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2892 				break;
2893 			}
2894 			enabled_rbs >>= 2;
2895 		}
2896 		WREG32(PA_SC_RASTER_CONFIG, data);
2897 	}
2898 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2899 }
2900 
2901 static void si_gpu_init(struct radeon_device *rdev)
2902 {
2903 	u32 gb_addr_config = 0;
2904 	u32 mc_shared_chmap, mc_arb_ramcfg;
2905 	u32 sx_debug_1;
2906 	u32 hdp_host_path_cntl;
2907 	u32 tmp;
2908 	int i, j;
2909 
2910 	switch (rdev->family) {
2911 	case CHIP_TAHITI:
2912 		rdev->config.si.max_shader_engines = 2;
2913 		rdev->config.si.max_tile_pipes = 12;
2914 		rdev->config.si.max_cu_per_sh = 8;
2915 		rdev->config.si.max_sh_per_se = 2;
2916 		rdev->config.si.max_backends_per_se = 4;
2917 		rdev->config.si.max_texture_channel_caches = 12;
2918 		rdev->config.si.max_gprs = 256;
2919 		rdev->config.si.max_gs_threads = 32;
2920 		rdev->config.si.max_hw_contexts = 8;
2921 
2922 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2923 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2924 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2925 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2926 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2927 		break;
2928 	case CHIP_PITCAIRN:
2929 		rdev->config.si.max_shader_engines = 2;
2930 		rdev->config.si.max_tile_pipes = 8;
2931 		rdev->config.si.max_cu_per_sh = 5;
2932 		rdev->config.si.max_sh_per_se = 2;
2933 		rdev->config.si.max_backends_per_se = 4;
2934 		rdev->config.si.max_texture_channel_caches = 8;
2935 		rdev->config.si.max_gprs = 256;
2936 		rdev->config.si.max_gs_threads = 32;
2937 		rdev->config.si.max_hw_contexts = 8;
2938 
2939 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2940 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2941 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2942 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2943 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2944 		break;
2945 	case CHIP_VERDE:
2946 	default:
2947 		rdev->config.si.max_shader_engines = 1;
2948 		rdev->config.si.max_tile_pipes = 4;
2949 		rdev->config.si.max_cu_per_sh = 5;
2950 		rdev->config.si.max_sh_per_se = 2;
2951 		rdev->config.si.max_backends_per_se = 4;
2952 		rdev->config.si.max_texture_channel_caches = 4;
2953 		rdev->config.si.max_gprs = 256;
2954 		rdev->config.si.max_gs_threads = 32;
2955 		rdev->config.si.max_hw_contexts = 8;
2956 
2957 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2958 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2959 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2960 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2961 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2962 		break;
2963 	case CHIP_OLAND:
2964 		rdev->config.si.max_shader_engines = 1;
2965 		rdev->config.si.max_tile_pipes = 4;
2966 		rdev->config.si.max_cu_per_sh = 6;
2967 		rdev->config.si.max_sh_per_se = 1;
2968 		rdev->config.si.max_backends_per_se = 2;
2969 		rdev->config.si.max_texture_channel_caches = 4;
2970 		rdev->config.si.max_gprs = 256;
2971 		rdev->config.si.max_gs_threads = 16;
2972 		rdev->config.si.max_hw_contexts = 8;
2973 
2974 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2975 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2976 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2977 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2978 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2979 		break;
2980 	case CHIP_HAINAN:
2981 		rdev->config.si.max_shader_engines = 1;
2982 		rdev->config.si.max_tile_pipes = 4;
2983 		rdev->config.si.max_cu_per_sh = 5;
2984 		rdev->config.si.max_sh_per_se = 1;
2985 		rdev->config.si.max_backends_per_se = 1;
2986 		rdev->config.si.max_texture_channel_caches = 2;
2987 		rdev->config.si.max_gprs = 256;
2988 		rdev->config.si.max_gs_threads = 16;
2989 		rdev->config.si.max_hw_contexts = 8;
2990 
2991 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2992 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2993 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2994 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2995 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2996 		break;
2997 	}
2998 
2999 	/* Initialize HDP */
3000 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3001 		WREG32((0x2c14 + j), 0x00000000);
3002 		WREG32((0x2c18 + j), 0x00000000);
3003 		WREG32((0x2c1c + j), 0x00000000);
3004 		WREG32((0x2c20 + j), 0x00000000);
3005 		WREG32((0x2c24 + j), 0x00000000);
3006 	}
3007 
3008 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3009 
3010 	evergreen_fix_pci_max_read_req_size(rdev);
3011 
3012 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3013 
3014 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3015 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3016 
3017 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3018 	rdev->config.si.mem_max_burst_length_bytes = 256;
3019 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3020 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3021 	if (rdev->config.si.mem_row_size_in_kb > 4)
3022 		rdev->config.si.mem_row_size_in_kb = 4;
3023 	/* XXX use MC settings? */
3024 	rdev->config.si.shader_engine_tile_size = 32;
3025 	rdev->config.si.num_gpus = 1;
3026 	rdev->config.si.multi_gpu_tile_size = 64;
3027 
3028 	/* fix up row size */
3029 	gb_addr_config &= ~ROW_SIZE_MASK;
3030 	switch (rdev->config.si.mem_row_size_in_kb) {
3031 	case 1:
3032 	default:
3033 		gb_addr_config |= ROW_SIZE(0);
3034 		break;
3035 	case 2:
3036 		gb_addr_config |= ROW_SIZE(1);
3037 		break;
3038 	case 4:
3039 		gb_addr_config |= ROW_SIZE(2);
3040 		break;
3041 	}
3042 
3043 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3044 	 * not have bank info, so create a custom tiling dword.
3045 	 * bits 3:0   num_pipes
3046 	 * bits 7:4   num_banks
3047 	 * bits 11:8  group_size
3048 	 * bits 15:12 row_size
3049 	 */
3050 	rdev->config.si.tile_config = 0;
3051 	switch (rdev->config.si.num_tile_pipes) {
3052 	case 1:
3053 		rdev->config.si.tile_config |= (0 << 0);
3054 		break;
3055 	case 2:
3056 		rdev->config.si.tile_config |= (1 << 0);
3057 		break;
3058 	case 4:
3059 		rdev->config.si.tile_config |= (2 << 0);
3060 		break;
3061 	case 8:
3062 	default:
3063 		/* XXX what about 12? */
3064 		rdev->config.si.tile_config |= (3 << 0);
3065 		break;
3066 	}
3067 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3068 	case 0: /* four banks */
3069 		rdev->config.si.tile_config |= 0 << 4;
3070 		break;
3071 	case 1: /* eight banks */
3072 		rdev->config.si.tile_config |= 1 << 4;
3073 		break;
3074 	case 2: /* sixteen banks */
3075 	default:
3076 		rdev->config.si.tile_config |= 2 << 4;
3077 		break;
3078 	}
3079 	rdev->config.si.tile_config |=
3080 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3081 	rdev->config.si.tile_config |=
3082 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3083 
3084 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3085 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3086 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3087 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3088 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3089 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3090 	if (rdev->has_uvd) {
3091 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3092 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3093 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3094 	}
3095 
3096 	si_tiling_mode_table_init(rdev);
3097 
3098 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3099 		    rdev->config.si.max_sh_per_se,
3100 		    rdev->config.si.max_backends_per_se);
3101 
3102 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3103 		     rdev->config.si.max_sh_per_se,
3104 		     rdev->config.si.max_cu_per_sh);
3105 
3106 
3107 	/* set HW defaults for 3D engine */
3108 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3109 				     ROQ_IB2_START(0x2b)));
3110 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3111 
3112 	sx_debug_1 = RREG32(SX_DEBUG_1);
3113 	WREG32(SX_DEBUG_1, sx_debug_1);
3114 
3115 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3116 
3117 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3118 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3119 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3120 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3121 
3122 	WREG32(VGT_NUM_INSTANCES, 1);
3123 
3124 	WREG32(CP_PERFMON_CNTL, 0);
3125 
3126 	WREG32(SQ_CONFIG, 0);
3127 
3128 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3129 					  FORCE_EOV_MAX_REZ_CNT(255)));
3130 
3131 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3132 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3133 
3134 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3135 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3136 
3137 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3138 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3139 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3140 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3141 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3142 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3143 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3144 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3145 
3146 	tmp = RREG32(HDP_MISC_CNTL);
3147 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3148 	WREG32(HDP_MISC_CNTL, tmp);
3149 
3150 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3151 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3152 
3153 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3154 
3155 	udelay(50);
3156 }
3157 
3158 /*
3159  * GPU scratch registers helpers function.
3160  */
3161 static void si_scratch_init(struct radeon_device *rdev)
3162 {
3163 	int i;
3164 
3165 	rdev->scratch.num_reg = 7;
3166 	rdev->scratch.reg_base = SCRATCH_REG0;
3167 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3168 		rdev->scratch.free[i] = true;
3169 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3170 	}
3171 }
3172 
3173 void si_fence_ring_emit(struct radeon_device *rdev,
3174 			struct radeon_fence *fence)
3175 {
3176 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3177 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3178 
3179 	/* flush read cache over gart */
3180 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3181 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3182 	radeon_ring_write(ring, 0);
3183 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3184 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3185 			  PACKET3_TC_ACTION_ENA |
3186 			  PACKET3_SH_KCACHE_ACTION_ENA |
3187 			  PACKET3_SH_ICACHE_ACTION_ENA);
3188 	radeon_ring_write(ring, 0xFFFFFFFF);
3189 	radeon_ring_write(ring, 0);
3190 	radeon_ring_write(ring, 10); /* poll interval */
3191 	/* EVENT_WRITE_EOP - flush caches, send int */
3192 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3193 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3194 	radeon_ring_write(ring, addr & 0xffffffff);
3195 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3196 	radeon_ring_write(ring, fence->seq);
3197 	radeon_ring_write(ring, 0);
3198 }
3199 
3200 /*
3201  * IB stuff
3202  */
3203 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3204 {
3205 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3206 	u32 header;
3207 
3208 	if (ib->is_const_ib) {
3209 		/* set switch buffer packet before const IB */
3210 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3211 		radeon_ring_write(ring, 0);
3212 
3213 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3214 	} else {
3215 		u32 next_rptr;
3216 		if (ring->rptr_save_reg) {
3217 			next_rptr = ring->wptr + 3 + 4 + 8;
3218 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3219 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3220 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3221 			radeon_ring_write(ring, next_rptr);
3222 		} else if (rdev->wb.enabled) {
3223 			next_rptr = ring->wptr + 5 + 4 + 8;
3224 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3225 			radeon_ring_write(ring, (1 << 8));
3226 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3227 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3228 			radeon_ring_write(ring, next_rptr);
3229 		}
3230 
3231 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3232 	}
3233 
3234 	radeon_ring_write(ring, header);
3235 	radeon_ring_write(ring,
3236 #ifdef __BIG_ENDIAN
3237 			  (2 << 0) |
3238 #endif
3239 			  (ib->gpu_addr & 0xFFFFFFFC));
3240 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3241 	radeon_ring_write(ring, ib->length_dw |
3242 			  (ib->vm ? (ib->vm->id << 24) : 0));
3243 
3244 	if (!ib->is_const_ib) {
3245 		/* flush read cache over gart for this vmid */
3246 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3247 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3248 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3249 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3250 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3251 				  PACKET3_TC_ACTION_ENA |
3252 				  PACKET3_SH_KCACHE_ACTION_ENA |
3253 				  PACKET3_SH_ICACHE_ACTION_ENA);
3254 		radeon_ring_write(ring, 0xFFFFFFFF);
3255 		radeon_ring_write(ring, 0);
3256 		radeon_ring_write(ring, 10); /* poll interval */
3257 	}
3258 }
3259 
3260 /*
3261  * CP.
3262  */
3263 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3264 {
3265 	if (enable)
3266 		WREG32(CP_ME_CNTL, 0);
3267 	else {
3268 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3269 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3270 		WREG32(SCRATCH_UMSK, 0);
3271 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3272 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3273 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3274 	}
3275 	udelay(50);
3276 }
3277 
3278 static int si_cp_load_microcode(struct radeon_device *rdev)
3279 {
3280 	const __be32 *fw_data;
3281 	int i;
3282 
3283 	if (!rdev->me_fw || !rdev->pfp_fw)
3284 		return -EINVAL;
3285 
3286 	si_cp_enable(rdev, false);
3287 
3288 	/* PFP */
3289 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3290 	WREG32(CP_PFP_UCODE_ADDR, 0);
3291 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3292 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3293 	WREG32(CP_PFP_UCODE_ADDR, 0);
3294 
3295 	/* CE */
3296 	fw_data = (const __be32 *)rdev->ce_fw->data;
3297 	WREG32(CP_CE_UCODE_ADDR, 0);
3298 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3299 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3300 	WREG32(CP_CE_UCODE_ADDR, 0);
3301 
3302 	/* ME */
3303 	fw_data = (const __be32 *)rdev->me_fw->data;
3304 	WREG32(CP_ME_RAM_WADDR, 0);
3305 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3306 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3307 	WREG32(CP_ME_RAM_WADDR, 0);
3308 
3309 	WREG32(CP_PFP_UCODE_ADDR, 0);
3310 	WREG32(CP_CE_UCODE_ADDR, 0);
3311 	WREG32(CP_ME_RAM_WADDR, 0);
3312 	WREG32(CP_ME_RAM_RADDR, 0);
3313 	return 0;
3314 }
3315 
3316 static int si_cp_start(struct radeon_device *rdev)
3317 {
3318 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3319 	int r, i;
3320 
3321 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3322 	if (r) {
3323 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3324 		return r;
3325 	}
3326 	/* init the CP */
3327 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3328 	radeon_ring_write(ring, 0x1);
3329 	radeon_ring_write(ring, 0x0);
3330 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3331 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3332 	radeon_ring_write(ring, 0);
3333 	radeon_ring_write(ring, 0);
3334 
3335 	/* init the CE partitions */
3336 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3337 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3338 	radeon_ring_write(ring, 0xc000);
3339 	radeon_ring_write(ring, 0xe000);
3340 	radeon_ring_unlock_commit(rdev, ring);
3341 
3342 	si_cp_enable(rdev, true);
3343 
3344 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3345 	if (r) {
3346 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3347 		return r;
3348 	}
3349 
3350 	/* setup clear context state */
3351 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3352 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3353 
3354 	for (i = 0; i < si_default_size; i++)
3355 		radeon_ring_write(ring, si_default_state[i]);
3356 
3357 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3359 
3360 	/* set clear context state */
3361 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362 	radeon_ring_write(ring, 0);
3363 
3364 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365 	radeon_ring_write(ring, 0x00000316);
3366 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3368 
3369 	radeon_ring_unlock_commit(rdev, ring);
3370 
3371 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3372 		ring = &rdev->ring[i];
3373 		r = radeon_ring_lock(rdev, ring, 2);
3374 
3375 		/* clear the compute context state */
3376 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3377 		radeon_ring_write(ring, 0);
3378 
3379 		radeon_ring_unlock_commit(rdev, ring);
3380 	}
3381 
3382 	return 0;
3383 }
3384 
3385 static void si_cp_fini(struct radeon_device *rdev)
3386 {
3387 	struct radeon_ring *ring;
3388 	si_cp_enable(rdev, false);
3389 
3390 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3391 	radeon_ring_fini(rdev, ring);
3392 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3393 
3394 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3395 	radeon_ring_fini(rdev, ring);
3396 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3397 
3398 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3399 	radeon_ring_fini(rdev, ring);
3400 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3401 }
3402 
3403 static int si_cp_resume(struct radeon_device *rdev)
3404 {
3405 	struct radeon_ring *ring;
3406 	u32 tmp;
3407 	u32 rb_bufsz;
3408 	int r;
3409 
3410 	si_enable_gui_idle_interrupt(rdev, false);
3411 
3412 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3413 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3414 
3415 	/* Set the write pointer delay */
3416 	WREG32(CP_RB_WPTR_DELAY, 0);
3417 
3418 	WREG32(CP_DEBUG, 0);
3419 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3420 
3421 	/* ring 0 - compute and gfx */
3422 	/* Set ring buffer size */
3423 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3424 	rb_bufsz = order_base_2(ring->ring_size / 8);
3425 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3426 #ifdef __BIG_ENDIAN
3427 	tmp |= BUF_SWAP_32BIT;
3428 #endif
3429 	WREG32(CP_RB0_CNTL, tmp);
3430 
3431 	/* Initialize the ring buffer's read and write pointers */
3432 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3433 	ring->wptr = 0;
3434 	WREG32(CP_RB0_WPTR, ring->wptr);
3435 
3436 	/* set the wb address whether it's enabled or not */
3437 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3438 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3439 
3440 	if (rdev->wb.enabled)
3441 		WREG32(SCRATCH_UMSK, 0xff);
3442 	else {
3443 		tmp |= RB_NO_UPDATE;
3444 		WREG32(SCRATCH_UMSK, 0);
3445 	}
3446 
3447 	mdelay(1);
3448 	WREG32(CP_RB0_CNTL, tmp);
3449 
3450 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3451 
3452 	ring->rptr = RREG32(CP_RB0_RPTR);
3453 
3454 	/* ring1  - compute only */
3455 	/* Set ring buffer size */
3456 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3457 	rb_bufsz = order_base_2(ring->ring_size / 8);
3458 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3459 #ifdef __BIG_ENDIAN
3460 	tmp |= BUF_SWAP_32BIT;
3461 #endif
3462 	WREG32(CP_RB1_CNTL, tmp);
3463 
3464 	/* Initialize the ring buffer's read and write pointers */
3465 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3466 	ring->wptr = 0;
3467 	WREG32(CP_RB1_WPTR, ring->wptr);
3468 
3469 	/* set the wb address whether it's enabled or not */
3470 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3471 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3472 
3473 	mdelay(1);
3474 	WREG32(CP_RB1_CNTL, tmp);
3475 
3476 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3477 
3478 	ring->rptr = RREG32(CP_RB1_RPTR);
3479 
3480 	/* ring2 - compute only */
3481 	/* Set ring buffer size */
3482 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3483 	rb_bufsz = order_base_2(ring->ring_size / 8);
3484 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3485 #ifdef __BIG_ENDIAN
3486 	tmp |= BUF_SWAP_32BIT;
3487 #endif
3488 	WREG32(CP_RB2_CNTL, tmp);
3489 
3490 	/* Initialize the ring buffer's read and write pointers */
3491 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3492 	ring->wptr = 0;
3493 	WREG32(CP_RB2_WPTR, ring->wptr);
3494 
3495 	/* set the wb address whether it's enabled or not */
3496 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3497 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3498 
3499 	mdelay(1);
3500 	WREG32(CP_RB2_CNTL, tmp);
3501 
3502 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3503 
3504 	ring->rptr = RREG32(CP_RB2_RPTR);
3505 
3506 	/* start the rings */
3507 	si_cp_start(rdev);
3508 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3509 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3510 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3511 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3512 	if (r) {
3513 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3514 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3515 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3516 		return r;
3517 	}
3518 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3519 	if (r) {
3520 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3521 	}
3522 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3523 	if (r) {
3524 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3525 	}
3526 
3527 	si_enable_gui_idle_interrupt(rdev, true);
3528 
3529 	return 0;
3530 }
3531 
3532 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3533 {
3534 	u32 reset_mask = 0;
3535 	u32 tmp;
3536 
3537 	/* GRBM_STATUS */
3538 	tmp = RREG32(GRBM_STATUS);
3539 	if (tmp & (PA_BUSY | SC_BUSY |
3540 		   BCI_BUSY | SX_BUSY |
3541 		   TA_BUSY | VGT_BUSY |
3542 		   DB_BUSY | CB_BUSY |
3543 		   GDS_BUSY | SPI_BUSY |
3544 		   IA_BUSY | IA_BUSY_NO_DMA))
3545 		reset_mask |= RADEON_RESET_GFX;
3546 
3547 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3548 		   CP_BUSY | CP_COHERENCY_BUSY))
3549 		reset_mask |= RADEON_RESET_CP;
3550 
3551 	if (tmp & GRBM_EE_BUSY)
3552 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3553 
3554 	/* GRBM_STATUS2 */
3555 	tmp = RREG32(GRBM_STATUS2);
3556 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3557 		reset_mask |= RADEON_RESET_RLC;
3558 
3559 	/* DMA_STATUS_REG 0 */
3560 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3561 	if (!(tmp & DMA_IDLE))
3562 		reset_mask |= RADEON_RESET_DMA;
3563 
3564 	/* DMA_STATUS_REG 1 */
3565 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3566 	if (!(tmp & DMA_IDLE))
3567 		reset_mask |= RADEON_RESET_DMA1;
3568 
3569 	/* SRBM_STATUS2 */
3570 	tmp = RREG32(SRBM_STATUS2);
3571 	if (tmp & DMA_BUSY)
3572 		reset_mask |= RADEON_RESET_DMA;
3573 
3574 	if (tmp & DMA1_BUSY)
3575 		reset_mask |= RADEON_RESET_DMA1;
3576 
3577 	/* SRBM_STATUS */
3578 	tmp = RREG32(SRBM_STATUS);
3579 
3580 	if (tmp & IH_BUSY)
3581 		reset_mask |= RADEON_RESET_IH;
3582 
3583 	if (tmp & SEM_BUSY)
3584 		reset_mask |= RADEON_RESET_SEM;
3585 
3586 	if (tmp & GRBM_RQ_PENDING)
3587 		reset_mask |= RADEON_RESET_GRBM;
3588 
3589 	if (tmp & VMC_BUSY)
3590 		reset_mask |= RADEON_RESET_VMC;
3591 
3592 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3593 		   MCC_BUSY | MCD_BUSY))
3594 		reset_mask |= RADEON_RESET_MC;
3595 
3596 	if (evergreen_is_display_hung(rdev))
3597 		reset_mask |= RADEON_RESET_DISPLAY;
3598 
3599 	/* VM_L2_STATUS */
3600 	tmp = RREG32(VM_L2_STATUS);
3601 	if (tmp & L2_BUSY)
3602 		reset_mask |= RADEON_RESET_VMC;
3603 
3604 	/* Skip MC reset as it's mostly likely not hung, just busy */
3605 	if (reset_mask & RADEON_RESET_MC) {
3606 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3607 		reset_mask &= ~RADEON_RESET_MC;
3608 	}
3609 
3610 	return reset_mask;
3611 }
3612 
3613 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3614 {
3615 	struct evergreen_mc_save save;
3616 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3617 	u32 tmp;
3618 
3619 	if (reset_mask == 0)
3620 		return;
3621 
3622 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3623 
3624 	evergreen_print_gpu_status_regs(rdev);
3625 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3626 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3627 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3628 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3629 
3630 	/* disable PG/CG */
3631 	si_fini_pg(rdev);
3632 	si_fini_cg(rdev);
3633 
3634 	/* stop the rlc */
3635 	si_rlc_stop(rdev);
3636 
3637 	/* Disable CP parsing/prefetching */
3638 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3639 
3640 	if (reset_mask & RADEON_RESET_DMA) {
3641 		/* dma0 */
3642 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3643 		tmp &= ~DMA_RB_ENABLE;
3644 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3645 	}
3646 	if (reset_mask & RADEON_RESET_DMA1) {
3647 		/* dma1 */
3648 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3649 		tmp &= ~DMA_RB_ENABLE;
3650 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3651 	}
3652 
3653 	udelay(50);
3654 
3655 	evergreen_mc_stop(rdev, &save);
3656 	if (evergreen_mc_wait_for_idle(rdev)) {
3657 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3658 	}
3659 
3660 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3661 		grbm_soft_reset = SOFT_RESET_CB |
3662 			SOFT_RESET_DB |
3663 			SOFT_RESET_GDS |
3664 			SOFT_RESET_PA |
3665 			SOFT_RESET_SC |
3666 			SOFT_RESET_BCI |
3667 			SOFT_RESET_SPI |
3668 			SOFT_RESET_SX |
3669 			SOFT_RESET_TC |
3670 			SOFT_RESET_TA |
3671 			SOFT_RESET_VGT |
3672 			SOFT_RESET_IA;
3673 	}
3674 
3675 	if (reset_mask & RADEON_RESET_CP) {
3676 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3677 
3678 		srbm_soft_reset |= SOFT_RESET_GRBM;
3679 	}
3680 
3681 	if (reset_mask & RADEON_RESET_DMA)
3682 		srbm_soft_reset |= SOFT_RESET_DMA;
3683 
3684 	if (reset_mask & RADEON_RESET_DMA1)
3685 		srbm_soft_reset |= SOFT_RESET_DMA1;
3686 
3687 	if (reset_mask & RADEON_RESET_DISPLAY)
3688 		srbm_soft_reset |= SOFT_RESET_DC;
3689 
3690 	if (reset_mask & RADEON_RESET_RLC)
3691 		grbm_soft_reset |= SOFT_RESET_RLC;
3692 
3693 	if (reset_mask & RADEON_RESET_SEM)
3694 		srbm_soft_reset |= SOFT_RESET_SEM;
3695 
3696 	if (reset_mask & RADEON_RESET_IH)
3697 		srbm_soft_reset |= SOFT_RESET_IH;
3698 
3699 	if (reset_mask & RADEON_RESET_GRBM)
3700 		srbm_soft_reset |= SOFT_RESET_GRBM;
3701 
3702 	if (reset_mask & RADEON_RESET_VMC)
3703 		srbm_soft_reset |= SOFT_RESET_VMC;
3704 
3705 	if (reset_mask & RADEON_RESET_MC)
3706 		srbm_soft_reset |= SOFT_RESET_MC;
3707 
3708 	if (grbm_soft_reset) {
3709 		tmp = RREG32(GRBM_SOFT_RESET);
3710 		tmp |= grbm_soft_reset;
3711 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3712 		WREG32(GRBM_SOFT_RESET, tmp);
3713 		tmp = RREG32(GRBM_SOFT_RESET);
3714 
3715 		udelay(50);
3716 
3717 		tmp &= ~grbm_soft_reset;
3718 		WREG32(GRBM_SOFT_RESET, tmp);
3719 		tmp = RREG32(GRBM_SOFT_RESET);
3720 	}
3721 
3722 	if (srbm_soft_reset) {
3723 		tmp = RREG32(SRBM_SOFT_RESET);
3724 		tmp |= srbm_soft_reset;
3725 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3726 		WREG32(SRBM_SOFT_RESET, tmp);
3727 		tmp = RREG32(SRBM_SOFT_RESET);
3728 
3729 		udelay(50);
3730 
3731 		tmp &= ~srbm_soft_reset;
3732 		WREG32(SRBM_SOFT_RESET, tmp);
3733 		tmp = RREG32(SRBM_SOFT_RESET);
3734 	}
3735 
3736 	/* Wait a little for things to settle down */
3737 	udelay(50);
3738 
3739 	evergreen_mc_resume(rdev, &save);
3740 	udelay(50);
3741 
3742 	evergreen_print_gpu_status_regs(rdev);
3743 }
3744 
3745 int si_asic_reset(struct radeon_device *rdev)
3746 {
3747 	u32 reset_mask;
3748 
3749 	reset_mask = si_gpu_check_soft_reset(rdev);
3750 
3751 	if (reset_mask)
3752 		r600_set_bios_scratch_engine_hung(rdev, true);
3753 
3754 	si_gpu_soft_reset(rdev, reset_mask);
3755 
3756 	reset_mask = si_gpu_check_soft_reset(rdev);
3757 
3758 	if (!reset_mask)
3759 		r600_set_bios_scratch_engine_hung(rdev, false);
3760 
3761 	return 0;
3762 }
3763 
3764 /**
3765  * si_gfx_is_lockup - Check if the GFX engine is locked up
3766  *
3767  * @rdev: radeon_device pointer
3768  * @ring: radeon_ring structure holding ring information
3769  *
3770  * Check if the GFX engine is locked up.
3771  * Returns true if the engine appears to be locked up, false if not.
3772  */
3773 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3774 {
3775 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3776 
3777 	if (!(reset_mask & (RADEON_RESET_GFX |
3778 			    RADEON_RESET_COMPUTE |
3779 			    RADEON_RESET_CP))) {
3780 		radeon_ring_lockup_update(ring);
3781 		return false;
3782 	}
3783 	/* force CP activities */
3784 	radeon_ring_force_activity(rdev, ring);
3785 	return radeon_ring_test_lockup(rdev, ring);
3786 }
3787 
3788 /* MC */
3789 static void si_mc_program(struct radeon_device *rdev)
3790 {
3791 	struct evergreen_mc_save save;
3792 	u32 tmp;
3793 	int i, j;
3794 
3795 	/* Initialize HDP */
3796 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3797 		WREG32((0x2c14 + j), 0x00000000);
3798 		WREG32((0x2c18 + j), 0x00000000);
3799 		WREG32((0x2c1c + j), 0x00000000);
3800 		WREG32((0x2c20 + j), 0x00000000);
3801 		WREG32((0x2c24 + j), 0x00000000);
3802 	}
3803 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3804 
3805 	evergreen_mc_stop(rdev, &save);
3806 	if (radeon_mc_wait_for_idle(rdev)) {
3807 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3808 	}
3809 	if (!ASIC_IS_NODCE(rdev))
3810 		/* Lockout access through VGA aperture*/
3811 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3812 	/* Update configuration */
3813 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3814 	       rdev->mc.vram_start >> 12);
3815 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3816 	       rdev->mc.vram_end >> 12);
3817 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3818 	       rdev->vram_scratch.gpu_addr >> 12);
3819 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3820 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3821 	WREG32(MC_VM_FB_LOCATION, tmp);
3822 	/* XXX double check these! */
3823 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3824 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3825 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3826 	WREG32(MC_VM_AGP_BASE, 0);
3827 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3828 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3829 	if (radeon_mc_wait_for_idle(rdev)) {
3830 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3831 	}
3832 	evergreen_mc_resume(rdev, &save);
3833 	if (!ASIC_IS_NODCE(rdev)) {
3834 		/* we need to own VRAM, so turn off the VGA renderer here
3835 		 * to stop it overwriting our objects */
3836 		rv515_vga_render_disable(rdev);
3837 	}
3838 }
3839 
3840 void si_vram_gtt_location(struct radeon_device *rdev,
3841 			  struct radeon_mc *mc)
3842 {
3843 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3844 		/* leave room for at least 1024M GTT */
3845 		dev_warn(rdev->dev, "limiting VRAM\n");
3846 		mc->real_vram_size = 0xFFC0000000ULL;
3847 		mc->mc_vram_size = 0xFFC0000000ULL;
3848 	}
3849 	radeon_vram_location(rdev, &rdev->mc, 0);
3850 	rdev->mc.gtt_base_align = 0;
3851 	radeon_gtt_location(rdev, mc);
3852 }
3853 
3854 static int si_mc_init(struct radeon_device *rdev)
3855 {
3856 	u32 tmp;
3857 	int chansize, numchan;
3858 
3859 	/* Get VRAM informations */
3860 	rdev->mc.vram_is_ddr = true;
3861 	tmp = RREG32(MC_ARB_RAMCFG);
3862 	if (tmp & CHANSIZE_OVERRIDE) {
3863 		chansize = 16;
3864 	} else if (tmp & CHANSIZE_MASK) {
3865 		chansize = 64;
3866 	} else {
3867 		chansize = 32;
3868 	}
3869 	tmp = RREG32(MC_SHARED_CHMAP);
3870 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3871 	case 0:
3872 	default:
3873 		numchan = 1;
3874 		break;
3875 	case 1:
3876 		numchan = 2;
3877 		break;
3878 	case 2:
3879 		numchan = 4;
3880 		break;
3881 	case 3:
3882 		numchan = 8;
3883 		break;
3884 	case 4:
3885 		numchan = 3;
3886 		break;
3887 	case 5:
3888 		numchan = 6;
3889 		break;
3890 	case 6:
3891 		numchan = 10;
3892 		break;
3893 	case 7:
3894 		numchan = 12;
3895 		break;
3896 	case 8:
3897 		numchan = 16;
3898 		break;
3899 	}
3900 	rdev->mc.vram_width = numchan * chansize;
3901 	/* Could aper size report 0 ? */
3902 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
3903 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
3904 	/* size in MB on si */
3905 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3906 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3907 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3908 	si_vram_gtt_location(rdev, &rdev->mc);
3909 	radeon_update_bandwidth_info(rdev);
3910 
3911 	return 0;
3912 }
3913 
3914 /*
3915  * GART
3916  */
3917 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3918 {
3919 	/* flush hdp cache */
3920 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3921 
3922 	/* bits 0-15 are the VM contexts0-15 */
3923 	WREG32(VM_INVALIDATE_REQUEST, 1);
3924 }
3925 
3926 static int si_pcie_gart_enable(struct radeon_device *rdev)
3927 {
3928 	int r, i;
3929 
3930 	if (rdev->gart.robj == NULL) {
3931 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3932 		return -EINVAL;
3933 	}
3934 	r = radeon_gart_table_vram_pin(rdev);
3935 	if (r)
3936 		return r;
3937 	radeon_gart_restore(rdev);
3938 	/* Setup TLB control */
3939 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3940 	       (0xA << 7) |
3941 	       ENABLE_L1_TLB |
3942 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3943 	       ENABLE_ADVANCED_DRIVER_MODEL |
3944 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3945 	/* Setup L2 cache */
3946 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3947 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3948 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3949 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3950 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3951 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3952 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3953 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3954 	/* setup context0 */
3955 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3956 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3957 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3958 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3959 			(u32)(rdev->dummy_page.addr >> 12));
3960 	WREG32(VM_CONTEXT0_CNTL2, 0);
3961 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3962 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3963 
3964 	WREG32(0x15D4, 0);
3965 	WREG32(0x15D8, 0);
3966 	WREG32(0x15DC, 0);
3967 
3968 	/* empty context1-15 */
3969 	/* set vm size, must be a multiple of 4 */
3970 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3971 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3972 	/* Assign the pt base to something valid for now; the pts used for
3973 	 * the VMs are determined by the application and setup and assigned
3974 	 * on the fly in the vm part of radeon_gart.c
3975 	 */
3976 	for (i = 1; i < 16; i++) {
3977 		if (i < 8)
3978 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3979 			       rdev->gart.table_addr >> 12);
3980 		else
3981 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3982 			       rdev->gart.table_addr >> 12);
3983 	}
3984 
3985 	/* enable context1-15 */
3986 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3987 	       (u32)(rdev->dummy_page.addr >> 12));
3988 	WREG32(VM_CONTEXT1_CNTL2, 4);
3989 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3990 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3991 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3992 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3993 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3994 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3995 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3996 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3997 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3998 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3999 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4000 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4001 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4002 
4003 	si_pcie_gart_tlb_flush(rdev);
4004 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4005 		 (unsigned)(rdev->mc.gtt_size >> 20),
4006 		 (unsigned long long)rdev->gart.table_addr);
4007 	rdev->gart.ready = true;
4008 	return 0;
4009 }
4010 
4011 static void si_pcie_gart_disable(struct radeon_device *rdev)
4012 {
4013 	/* Disable all tables */
4014 	WREG32(VM_CONTEXT0_CNTL, 0);
4015 	WREG32(VM_CONTEXT1_CNTL, 0);
4016 	/* Setup TLB control */
4017 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4018 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4019 	/* Setup L2 cache */
4020 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4021 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4022 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4023 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4024 	WREG32(VM_L2_CNTL2, 0);
4025 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4026 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4027 	radeon_gart_table_vram_unpin(rdev);
4028 }
4029 
4030 static void si_pcie_gart_fini(struct radeon_device *rdev)
4031 {
4032 	si_pcie_gart_disable(rdev);
4033 	radeon_gart_table_vram_free(rdev);
4034 	radeon_gart_fini(rdev);
4035 }
4036 
4037 /* vm parser */
4038 static bool si_vm_reg_valid(u32 reg)
4039 {
4040 	/* context regs are fine */
4041 	if (reg >= 0x28000)
4042 		return true;
4043 
4044 	/* check config regs */
4045 	switch (reg) {
4046 	case GRBM_GFX_INDEX:
4047 	case CP_STRMOUT_CNTL:
4048 	case VGT_VTX_VECT_EJECT_REG:
4049 	case VGT_CACHE_INVALIDATION:
4050 	case VGT_ESGS_RING_SIZE:
4051 	case VGT_GSVS_RING_SIZE:
4052 	case VGT_GS_VERTEX_REUSE:
4053 	case VGT_PRIMITIVE_TYPE:
4054 	case VGT_INDEX_TYPE:
4055 	case VGT_NUM_INDICES:
4056 	case VGT_NUM_INSTANCES:
4057 	case VGT_TF_RING_SIZE:
4058 	case VGT_HS_OFFCHIP_PARAM:
4059 	case VGT_TF_MEMORY_BASE:
4060 	case PA_CL_ENHANCE:
4061 	case PA_SU_LINE_STIPPLE_VALUE:
4062 	case PA_SC_LINE_STIPPLE_STATE:
4063 	case PA_SC_ENHANCE:
4064 	case SQC_CACHES:
4065 	case SPI_STATIC_THREAD_MGMT_1:
4066 	case SPI_STATIC_THREAD_MGMT_2:
4067 	case SPI_STATIC_THREAD_MGMT_3:
4068 	case SPI_PS_MAX_WAVE_ID:
4069 	case SPI_CONFIG_CNTL:
4070 	case SPI_CONFIG_CNTL_1:
4071 	case TA_CNTL_AUX:
4072 		return true;
4073 	default:
4074 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4075 		return false;
4076 	}
4077 }
4078 
4079 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4080 				  u32 *ib, struct radeon_cs_packet *pkt)
4081 {
4082 	switch (pkt->opcode) {
4083 	case PACKET3_NOP:
4084 	case PACKET3_SET_BASE:
4085 	case PACKET3_SET_CE_DE_COUNTERS:
4086 	case PACKET3_LOAD_CONST_RAM:
4087 	case PACKET3_WRITE_CONST_RAM:
4088 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4089 	case PACKET3_DUMP_CONST_RAM:
4090 	case PACKET3_INCREMENT_CE_COUNTER:
4091 	case PACKET3_WAIT_ON_DE_COUNTER:
4092 	case PACKET3_CE_WRITE:
4093 		break;
4094 	default:
4095 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4096 		return -EINVAL;
4097 	}
4098 	return 0;
4099 }
4100 
4101 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4102 {
4103 	u32 start_reg, reg, i;
4104 	u32 command = ib[idx + 4];
4105 	u32 info = ib[idx + 1];
4106 	u32 idx_value = ib[idx];
4107 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4108 		/* src address space is register */
4109 		if (((info & 0x60000000) >> 29) == 0) {
4110 			start_reg = idx_value << 2;
4111 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4112 				reg = start_reg;
4113 				if (!si_vm_reg_valid(reg)) {
4114 					DRM_ERROR("CP DMA Bad SRC register\n");
4115 					return -EINVAL;
4116 				}
4117 			} else {
4118 				for (i = 0; i < (command & 0x1fffff); i++) {
4119 					reg = start_reg + (4 * i);
4120 					if (!si_vm_reg_valid(reg)) {
4121 						DRM_ERROR("CP DMA Bad SRC register\n");
4122 						return -EINVAL;
4123 					}
4124 				}
4125 			}
4126 		}
4127 	}
4128 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4129 		/* dst address space is register */
4130 		if (((info & 0x00300000) >> 20) == 0) {
4131 			start_reg = ib[idx + 2];
4132 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4133 				reg = start_reg;
4134 				if (!si_vm_reg_valid(reg)) {
4135 					DRM_ERROR("CP DMA Bad DST register\n");
4136 					return -EINVAL;
4137 				}
4138 			} else {
4139 				for (i = 0; i < (command & 0x1fffff); i++) {
4140 					reg = start_reg + (4 * i);
4141 				if (!si_vm_reg_valid(reg)) {
4142 						DRM_ERROR("CP DMA Bad DST register\n");
4143 						return -EINVAL;
4144 					}
4145 				}
4146 			}
4147 		}
4148 	}
4149 	return 0;
4150 }
4151 
4152 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4153 				   u32 *ib, struct radeon_cs_packet *pkt)
4154 {
4155 	int r;
4156 	u32 idx = pkt->idx + 1;
4157 	u32 idx_value = ib[idx];
4158 	u32 start_reg, end_reg, reg, i;
4159 
4160 	switch (pkt->opcode) {
4161 	case PACKET3_NOP:
4162 	case PACKET3_SET_BASE:
4163 	case PACKET3_CLEAR_STATE:
4164 	case PACKET3_INDEX_BUFFER_SIZE:
4165 	case PACKET3_DISPATCH_DIRECT:
4166 	case PACKET3_DISPATCH_INDIRECT:
4167 	case PACKET3_ALLOC_GDS:
4168 	case PACKET3_WRITE_GDS_RAM:
4169 	case PACKET3_ATOMIC_GDS:
4170 	case PACKET3_ATOMIC:
4171 	case PACKET3_OCCLUSION_QUERY:
4172 	case PACKET3_SET_PREDICATION:
4173 	case PACKET3_COND_EXEC:
4174 	case PACKET3_PRED_EXEC:
4175 	case PACKET3_DRAW_INDIRECT:
4176 	case PACKET3_DRAW_INDEX_INDIRECT:
4177 	case PACKET3_INDEX_BASE:
4178 	case PACKET3_DRAW_INDEX_2:
4179 	case PACKET3_CONTEXT_CONTROL:
4180 	case PACKET3_INDEX_TYPE:
4181 	case PACKET3_DRAW_INDIRECT_MULTI:
4182 	case PACKET3_DRAW_INDEX_AUTO:
4183 	case PACKET3_DRAW_INDEX_IMMD:
4184 	case PACKET3_NUM_INSTANCES:
4185 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4186 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4187 	case PACKET3_DRAW_INDEX_OFFSET_2:
4188 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4189 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4190 	case PACKET3_MPEG_INDEX:
4191 	case PACKET3_WAIT_REG_MEM:
4192 	case PACKET3_MEM_WRITE:
4193 	case PACKET3_PFP_SYNC_ME:
4194 	case PACKET3_SURFACE_SYNC:
4195 	case PACKET3_EVENT_WRITE:
4196 	case PACKET3_EVENT_WRITE_EOP:
4197 	case PACKET3_EVENT_WRITE_EOS:
4198 	case PACKET3_SET_CONTEXT_REG:
4199 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4200 	case PACKET3_SET_SH_REG:
4201 	case PACKET3_SET_SH_REG_OFFSET:
4202 	case PACKET3_INCREMENT_DE_COUNTER:
4203 	case PACKET3_WAIT_ON_CE_COUNTER:
4204 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4205 	case PACKET3_ME_WRITE:
4206 		break;
4207 	case PACKET3_COPY_DATA:
4208 		if ((idx_value & 0xf00) == 0) {
4209 			reg = ib[idx + 3] * 4;
4210 			if (!si_vm_reg_valid(reg))
4211 				return -EINVAL;
4212 		}
4213 		break;
4214 	case PACKET3_WRITE_DATA:
4215 		if ((idx_value & 0xf00) == 0) {
4216 			start_reg = ib[idx + 1] * 4;
4217 			if (idx_value & 0x10000) {
4218 				if (!si_vm_reg_valid(start_reg))
4219 					return -EINVAL;
4220 			} else {
4221 				for (i = 0; i < (pkt->count - 2); i++) {
4222 					reg = start_reg + (4 * i);
4223 					if (!si_vm_reg_valid(reg))
4224 						return -EINVAL;
4225 				}
4226 			}
4227 		}
4228 		break;
4229 	case PACKET3_COND_WRITE:
4230 		if (idx_value & 0x100) {
4231 			reg = ib[idx + 5] * 4;
4232 			if (!si_vm_reg_valid(reg))
4233 				return -EINVAL;
4234 		}
4235 		break;
4236 	case PACKET3_COPY_DW:
4237 		if (idx_value & 0x2) {
4238 			reg = ib[idx + 3] * 4;
4239 			if (!si_vm_reg_valid(reg))
4240 				return -EINVAL;
4241 		}
4242 		break;
4243 	case PACKET3_SET_CONFIG_REG:
4244 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4245 		end_reg = 4 * pkt->count + start_reg - 4;
4246 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4247 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4248 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4249 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4250 			return -EINVAL;
4251 		}
4252 		for (i = 0; i < pkt->count; i++) {
4253 			reg = start_reg + (4 * i);
4254 			if (!si_vm_reg_valid(reg))
4255 				return -EINVAL;
4256 		}
4257 		break;
4258 	case PACKET3_CP_DMA:
4259 		r = si_vm_packet3_cp_dma_check(ib, idx);
4260 		if (r)
4261 			return r;
4262 		break;
4263 	default:
4264 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4265 		return -EINVAL;
4266 	}
4267 	return 0;
4268 }
4269 
4270 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4271 				       u32 *ib, struct radeon_cs_packet *pkt)
4272 {
4273 	int r;
4274 	u32 idx = pkt->idx + 1;
4275 	u32 idx_value = ib[idx];
4276 	u32 start_reg, reg, i;
4277 
4278 	switch (pkt->opcode) {
4279 	case PACKET3_NOP:
4280 	case PACKET3_SET_BASE:
4281 	case PACKET3_CLEAR_STATE:
4282 	case PACKET3_DISPATCH_DIRECT:
4283 	case PACKET3_DISPATCH_INDIRECT:
4284 	case PACKET3_ALLOC_GDS:
4285 	case PACKET3_WRITE_GDS_RAM:
4286 	case PACKET3_ATOMIC_GDS:
4287 	case PACKET3_ATOMIC:
4288 	case PACKET3_OCCLUSION_QUERY:
4289 	case PACKET3_SET_PREDICATION:
4290 	case PACKET3_COND_EXEC:
4291 	case PACKET3_PRED_EXEC:
4292 	case PACKET3_CONTEXT_CONTROL:
4293 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4294 	case PACKET3_WAIT_REG_MEM:
4295 	case PACKET3_MEM_WRITE:
4296 	case PACKET3_PFP_SYNC_ME:
4297 	case PACKET3_SURFACE_SYNC:
4298 	case PACKET3_EVENT_WRITE:
4299 	case PACKET3_EVENT_WRITE_EOP:
4300 	case PACKET3_EVENT_WRITE_EOS:
4301 	case PACKET3_SET_CONTEXT_REG:
4302 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4303 	case PACKET3_SET_SH_REG:
4304 	case PACKET3_SET_SH_REG_OFFSET:
4305 	case PACKET3_INCREMENT_DE_COUNTER:
4306 	case PACKET3_WAIT_ON_CE_COUNTER:
4307 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4308 	case PACKET3_ME_WRITE:
4309 		break;
4310 	case PACKET3_COPY_DATA:
4311 		if ((idx_value & 0xf00) == 0) {
4312 			reg = ib[idx + 3] * 4;
4313 			if (!si_vm_reg_valid(reg))
4314 				return -EINVAL;
4315 		}
4316 		break;
4317 	case PACKET3_WRITE_DATA:
4318 		if ((idx_value & 0xf00) == 0) {
4319 			start_reg = ib[idx + 1] * 4;
4320 			if (idx_value & 0x10000) {
4321 				if (!si_vm_reg_valid(start_reg))
4322 					return -EINVAL;
4323 			} else {
4324 				for (i = 0; i < (pkt->count - 2); i++) {
4325 					reg = start_reg + (4 * i);
4326 					if (!si_vm_reg_valid(reg))
4327 						return -EINVAL;
4328 				}
4329 			}
4330 		}
4331 		break;
4332 	case PACKET3_COND_WRITE:
4333 		if (idx_value & 0x100) {
4334 			reg = ib[idx + 5] * 4;
4335 			if (!si_vm_reg_valid(reg))
4336 				return -EINVAL;
4337 		}
4338 		break;
4339 	case PACKET3_COPY_DW:
4340 		if (idx_value & 0x2) {
4341 			reg = ib[idx + 3] * 4;
4342 			if (!si_vm_reg_valid(reg))
4343 				return -EINVAL;
4344 		}
4345 		break;
4346 	case PACKET3_CP_DMA:
4347 		r = si_vm_packet3_cp_dma_check(ib, idx);
4348 		if (r)
4349 			return r;
4350 		break;
4351 	default:
4352 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4353 		return -EINVAL;
4354 	}
4355 	return 0;
4356 }
4357 
4358 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4359 {
4360 	int ret = 0;
4361 	u32 idx = 0;
4362 	struct radeon_cs_packet pkt;
4363 
4364 	do {
4365 		pkt.idx = idx;
4366 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4367 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4368 		pkt.one_reg_wr = 0;
4369 		switch (pkt.type) {
4370 		case RADEON_PACKET_TYPE0:
4371 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4372 			ret = -EINVAL;
4373 			break;
4374 		case RADEON_PACKET_TYPE2:
4375 			idx += 1;
4376 			break;
4377 		case RADEON_PACKET_TYPE3:
4378 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4379 			if (ib->is_const_ib)
4380 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4381 			else {
4382 				switch (ib->ring) {
4383 				case RADEON_RING_TYPE_GFX_INDEX:
4384 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4385 					break;
4386 				case CAYMAN_RING_TYPE_CP1_INDEX:
4387 				case CAYMAN_RING_TYPE_CP2_INDEX:
4388 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4389 					break;
4390 				default:
4391 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4392 					ret = -EINVAL;
4393 					break;
4394 				}
4395 			}
4396 			idx += pkt.count + 2;
4397 			break;
4398 		default:
4399 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4400 			ret = -EINVAL;
4401 			break;
4402 		}
4403 		if (ret)
4404 			break;
4405 	} while (idx < ib->length_dw);
4406 
4407 	return ret;
4408 }
4409 
4410 /*
4411  * vm
4412  */
4413 int si_vm_init(struct radeon_device *rdev)
4414 {
4415 	/* number of VMs */
4416 	rdev->vm_manager.nvm = 16;
4417 	/* base offset of vram pages */
4418 	rdev->vm_manager.vram_base_offset = 0;
4419 
4420 	return 0;
4421 }
4422 
4423 void si_vm_fini(struct radeon_device *rdev)
4424 {
4425 }
4426 
4427 /**
4428  * si_vm_decode_fault - print human readable fault info
4429  *
4430  * @rdev: radeon_device pointer
4431  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4432  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4433  *
4434  * Print human readable fault information (SI).
4435  */
4436 static void si_vm_decode_fault(struct radeon_device *rdev,
4437 			       u32 status, u32 addr)
4438 {
4439 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4440 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4441 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4442 	char *block;
4443 
4444 	if (rdev->family == CHIP_TAHITI) {
4445 		switch (mc_id) {
4446 		case 160:
4447 		case 144:
4448 		case 96:
4449 		case 80:
4450 		case 224:
4451 		case 208:
4452 		case 32:
4453 		case 16:
4454 			block = "CB";
4455 			break;
4456 		case 161:
4457 		case 145:
4458 		case 97:
4459 		case 81:
4460 		case 225:
4461 		case 209:
4462 		case 33:
4463 		case 17:
4464 			block = "CB_FMASK";
4465 			break;
4466 		case 162:
4467 		case 146:
4468 		case 98:
4469 		case 82:
4470 		case 226:
4471 		case 210:
4472 		case 34:
4473 		case 18:
4474 			block = "CB_CMASK";
4475 			break;
4476 		case 163:
4477 		case 147:
4478 		case 99:
4479 		case 83:
4480 		case 227:
4481 		case 211:
4482 		case 35:
4483 		case 19:
4484 			block = "CB_IMMED";
4485 			break;
4486 		case 164:
4487 		case 148:
4488 		case 100:
4489 		case 84:
4490 		case 228:
4491 		case 212:
4492 		case 36:
4493 		case 20:
4494 			block = "DB";
4495 			break;
4496 		case 165:
4497 		case 149:
4498 		case 101:
4499 		case 85:
4500 		case 229:
4501 		case 213:
4502 		case 37:
4503 		case 21:
4504 			block = "DB_HTILE";
4505 			break;
4506 		case 167:
4507 		case 151:
4508 		case 103:
4509 		case 87:
4510 		case 231:
4511 		case 215:
4512 		case 39:
4513 		case 23:
4514 			block = "DB_STEN";
4515 			break;
4516 		case 72:
4517 		case 68:
4518 		case 64:
4519 		case 8:
4520 		case 4:
4521 		case 0:
4522 		case 136:
4523 		case 132:
4524 		case 128:
4525 		case 200:
4526 		case 196:
4527 		case 192:
4528 			block = "TC";
4529 			break;
4530 		case 112:
4531 		case 48:
4532 			block = "CP";
4533 			break;
4534 		case 49:
4535 		case 177:
4536 		case 50:
4537 		case 178:
4538 			block = "SH";
4539 			break;
4540 		case 53:
4541 		case 190:
4542 			block = "VGT";
4543 			break;
4544 		case 117:
4545 			block = "IH";
4546 			break;
4547 		case 51:
4548 		case 115:
4549 			block = "RLC";
4550 			break;
4551 		case 119:
4552 		case 183:
4553 			block = "DMA0";
4554 			break;
4555 		case 61:
4556 			block = "DMA1";
4557 			break;
4558 		case 248:
4559 		case 120:
4560 			block = "HDP";
4561 			break;
4562 		default:
4563 			block = "unknown";
4564 			break;
4565 		}
4566 	} else {
4567 		switch (mc_id) {
4568 		case 32:
4569 		case 16:
4570 		case 96:
4571 		case 80:
4572 		case 160:
4573 		case 144:
4574 		case 224:
4575 		case 208:
4576 			block = "CB";
4577 			break;
4578 		case 33:
4579 		case 17:
4580 		case 97:
4581 		case 81:
4582 		case 161:
4583 		case 145:
4584 		case 225:
4585 		case 209:
4586 			block = "CB_FMASK";
4587 			break;
4588 		case 34:
4589 		case 18:
4590 		case 98:
4591 		case 82:
4592 		case 162:
4593 		case 146:
4594 		case 226:
4595 		case 210:
4596 			block = "CB_CMASK";
4597 			break;
4598 		case 35:
4599 		case 19:
4600 		case 99:
4601 		case 83:
4602 		case 163:
4603 		case 147:
4604 		case 227:
4605 		case 211:
4606 			block = "CB_IMMED";
4607 			break;
4608 		case 36:
4609 		case 20:
4610 		case 100:
4611 		case 84:
4612 		case 164:
4613 		case 148:
4614 		case 228:
4615 		case 212:
4616 			block = "DB";
4617 			break;
4618 		case 37:
4619 		case 21:
4620 		case 101:
4621 		case 85:
4622 		case 165:
4623 		case 149:
4624 		case 229:
4625 		case 213:
4626 			block = "DB_HTILE";
4627 			break;
4628 		case 39:
4629 		case 23:
4630 		case 103:
4631 		case 87:
4632 		case 167:
4633 		case 151:
4634 		case 231:
4635 		case 215:
4636 			block = "DB_STEN";
4637 			break;
4638 		case 72:
4639 		case 68:
4640 		case 8:
4641 		case 4:
4642 		case 136:
4643 		case 132:
4644 		case 200:
4645 		case 196:
4646 			block = "TC";
4647 			break;
4648 		case 112:
4649 		case 48:
4650 			block = "CP";
4651 			break;
4652 		case 49:
4653 		case 177:
4654 		case 50:
4655 		case 178:
4656 			block = "SH";
4657 			break;
4658 		case 53:
4659 			block = "VGT";
4660 			break;
4661 		case 117:
4662 			block = "IH";
4663 			break;
4664 		case 51:
4665 		case 115:
4666 			block = "RLC";
4667 			break;
4668 		case 119:
4669 		case 183:
4670 			block = "DMA0";
4671 			break;
4672 		case 61:
4673 			block = "DMA1";
4674 			break;
4675 		case 248:
4676 		case 120:
4677 			block = "HDP";
4678 			break;
4679 		default:
4680 			block = "unknown";
4681 			break;
4682 		}
4683 	}
4684 
4685 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4686 	       protections, vmid, addr,
4687 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4688 	       block, mc_id);
4689 }
4690 
4691 /**
4692  * si_vm_set_page - update the page tables using the CP
4693  *
4694  * @rdev: radeon_device pointer
4695  * @ib: indirect buffer to fill with commands
4696  * @pe: addr of the page entry
4697  * @addr: dst addr to write into pe
4698  * @count: number of page entries to update
4699  * @incr: increase next addr by incr bytes
4700  * @flags: access flags
4701  *
4702  * Update the page tables using the CP (SI).
4703  */
4704 void si_vm_set_page(struct radeon_device *rdev,
4705 		    struct radeon_ib *ib,
4706 		    uint64_t pe,
4707 		    uint64_t addr, unsigned count,
4708 		    uint32_t incr, uint32_t flags)
4709 {
4710 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4711 	uint64_t value;
4712 	unsigned ndw;
4713 
4714 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4715 		while (count) {
4716 			ndw = 2 + count * 2;
4717 			if (ndw > 0x3FFE)
4718 				ndw = 0x3FFE;
4719 
4720 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4721 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4722 					WRITE_DATA_DST_SEL(1));
4723 			ib->ptr[ib->length_dw++] = pe;
4724 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4725 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4726 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4727 					value = radeon_vm_map_gart(rdev, addr);
4728 					value &= 0xFFFFFFFFFFFFF000ULL;
4729 				} else if (flags & RADEON_VM_PAGE_VALID) {
4730 					value = addr;
4731 				} else {
4732 					value = 0;
4733 				}
4734 				addr += incr;
4735 				value |= r600_flags;
4736 				ib->ptr[ib->length_dw++] = value;
4737 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4738 			}
4739 		}
4740 	} else {
4741 		/* DMA */
4742 		si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4743 	}
4744 }
4745 
4746 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4747 {
4748 	struct radeon_ring *ring = &rdev->ring[ridx];
4749 
4750 	if (vm == NULL)
4751 		return;
4752 
4753 	/* write new base address */
4754 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4755 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4756 				 WRITE_DATA_DST_SEL(0)));
4757 
4758 	if (vm->id < 8) {
4759 		radeon_ring_write(ring,
4760 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4761 	} else {
4762 		radeon_ring_write(ring,
4763 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4764 	}
4765 	radeon_ring_write(ring, 0);
4766 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4767 
4768 	/* flush hdp cache */
4769 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4770 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4771 				 WRITE_DATA_DST_SEL(0)));
4772 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4773 	radeon_ring_write(ring, 0);
4774 	radeon_ring_write(ring, 0x1);
4775 
4776 	/* bits 0-15 are the VM contexts0-15 */
4777 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4778 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4779 				 WRITE_DATA_DST_SEL(0)));
4780 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4781 	radeon_ring_write(ring, 0);
4782 	radeon_ring_write(ring, 1 << vm->id);
4783 
4784 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4785 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4786 	radeon_ring_write(ring, 0x0);
4787 }
4788 
4789 /*
4790  *  Power and clock gating
4791  */
4792 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4793 {
4794 	int i;
4795 
4796 	for (i = 0; i < rdev->usec_timeout; i++) {
4797 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4798 			break;
4799 		udelay(1);
4800 	}
4801 
4802 	for (i = 0; i < rdev->usec_timeout; i++) {
4803 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4804 			break;
4805 		udelay(1);
4806 	}
4807 }
4808 
4809 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4810 					 bool enable)
4811 {
4812 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4813 	u32 mask;
4814 	int i;
4815 
4816 	if (enable)
4817 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4818 	else
4819 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4820 	WREG32(CP_INT_CNTL_RING0, tmp);
4821 
4822 	if (!enable) {
4823 		/* read a gfx register */
4824 		tmp = RREG32(DB_DEPTH_INFO);
4825 
4826 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4827 		for (i = 0; i < rdev->usec_timeout; i++) {
4828 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4829 				break;
4830 			udelay(1);
4831 		}
4832 	}
4833 }
4834 
4835 static void si_set_uvd_dcm(struct radeon_device *rdev,
4836 			   bool sw_mode)
4837 {
4838 	u32 tmp, tmp2;
4839 
4840 	tmp = RREG32(UVD_CGC_CTRL);
4841 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4842 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4843 
4844 	if (sw_mode) {
4845 		tmp &= ~0x7ffff800;
4846 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4847 	} else {
4848 		tmp |= 0x7ffff800;
4849 		tmp2 = 0;
4850 	}
4851 
4852 	WREG32(UVD_CGC_CTRL, tmp);
4853 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4854 }
4855 
4856 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4857 {
4858 	bool hw_mode = true;
4859 
4860 	if (hw_mode) {
4861 		si_set_uvd_dcm(rdev, false);
4862 	} else {
4863 		u32 tmp = RREG32(UVD_CGC_CTRL);
4864 		tmp &= ~DCM;
4865 		WREG32(UVD_CGC_CTRL, tmp);
4866 	}
4867 }
4868 
4869 static u32 si_halt_rlc(struct radeon_device *rdev)
4870 {
4871 	u32 data, orig;
4872 
4873 	orig = data = RREG32(RLC_CNTL);
4874 
4875 	if (data & RLC_ENABLE) {
4876 		data &= ~RLC_ENABLE;
4877 		WREG32(RLC_CNTL, data);
4878 
4879 		si_wait_for_rlc_serdes(rdev);
4880 	}
4881 
4882 	return orig;
4883 }
4884 
4885 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4886 {
4887 	u32 tmp;
4888 
4889 	tmp = RREG32(RLC_CNTL);
4890 	if (tmp != rlc)
4891 		WREG32(RLC_CNTL, rlc);
4892 }
4893 
4894 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4895 {
4896 	u32 data, orig;
4897 
4898 	orig = data = RREG32(DMA_PG);
4899 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4900 		data |= PG_CNTL_ENABLE;
4901 	else
4902 		data &= ~PG_CNTL_ENABLE;
4903 	if (orig != data)
4904 		WREG32(DMA_PG, data);
4905 }
4906 
4907 static void si_init_dma_pg(struct radeon_device *rdev)
4908 {
4909 	u32 tmp;
4910 
4911 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4912 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4913 
4914 	for (tmp = 0; tmp < 5; tmp++)
4915 		WREG32(DMA_PGFSM_WRITE, 0);
4916 }
4917 
4918 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4919 			       bool enable)
4920 {
4921 	u32 tmp;
4922 
4923 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4924 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4925 		WREG32(RLC_TTOP_D, tmp);
4926 
4927 		tmp = RREG32(RLC_PG_CNTL);
4928 		tmp |= GFX_PG_ENABLE;
4929 		WREG32(RLC_PG_CNTL, tmp);
4930 
4931 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4932 		tmp |= AUTO_PG_EN;
4933 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4934 	} else {
4935 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4936 		tmp &= ~AUTO_PG_EN;
4937 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4938 
4939 		tmp = RREG32(DB_RENDER_CONTROL);
4940 	}
4941 }
4942 
4943 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4944 {
4945 	u32 tmp;
4946 
4947 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4948 
4949 	tmp = RREG32(RLC_PG_CNTL);
4950 	tmp |= GFX_PG_SRC;
4951 	WREG32(RLC_PG_CNTL, tmp);
4952 
4953 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4954 
4955 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4956 
4957 	tmp &= ~GRBM_REG_SGIT_MASK;
4958 	tmp |= GRBM_REG_SGIT(0x700);
4959 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4960 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4961 }
4962 
4963 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4964 {
4965 	u32 mask = 0, tmp, tmp1;
4966 	int i;
4967 
4968 	si_select_se_sh(rdev, se, sh);
4969 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4970 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4971 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4972 
4973 	tmp &= 0xffff0000;
4974 
4975 	tmp |= tmp1;
4976 	tmp >>= 16;
4977 
4978 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4979 		mask <<= 1;
4980 		mask |= 1;
4981 	}
4982 
4983 	return (~tmp) & mask;
4984 }
4985 
4986 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4987 {
4988 	u32 i, j, k, active_cu_number = 0;
4989 	u32 mask, counter, cu_bitmap;
4990 	u32 tmp = 0;
4991 
4992 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4993 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4994 			mask = 1;
4995 			cu_bitmap = 0;
4996 			counter  = 0;
4997 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4998 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4999 					if (counter < 2)
5000 						cu_bitmap |= mask;
5001 					counter++;
5002 				}
5003 				mask <<= 1;
5004 			}
5005 
5006 			active_cu_number += counter;
5007 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5008 		}
5009 	}
5010 
5011 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5012 
5013 	tmp = RREG32(RLC_MAX_PG_CU);
5014 	tmp &= ~MAX_PU_CU_MASK;
5015 	tmp |= MAX_PU_CU(active_cu_number);
5016 	WREG32(RLC_MAX_PG_CU, tmp);
5017 }
5018 
5019 static void si_enable_cgcg(struct radeon_device *rdev,
5020 			   bool enable)
5021 {
5022 	u32 data, orig, tmp;
5023 
5024 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5025 
5026 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5027 		si_enable_gui_idle_interrupt(rdev, true);
5028 
5029 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5030 
5031 		tmp = si_halt_rlc(rdev);
5032 
5033 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5034 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5035 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5036 
5037 		si_wait_for_rlc_serdes(rdev);
5038 
5039 		si_update_rlc(rdev, tmp);
5040 
5041 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5042 
5043 		data |= CGCG_EN | CGLS_EN;
5044 	} else {
5045 		si_enable_gui_idle_interrupt(rdev, false);
5046 
5047 		RREG32(CB_CGTT_SCLK_CTRL);
5048 		RREG32(CB_CGTT_SCLK_CTRL);
5049 		RREG32(CB_CGTT_SCLK_CTRL);
5050 		RREG32(CB_CGTT_SCLK_CTRL);
5051 
5052 		data &= ~(CGCG_EN | CGLS_EN);
5053 	}
5054 
5055 	if (orig != data)
5056 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5057 }
5058 
5059 static void si_enable_mgcg(struct radeon_device *rdev,
5060 			   bool enable)
5061 {
5062 	u32 data, orig, tmp = 0;
5063 
5064 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5065 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5066 		data = 0x96940200;
5067 		if (orig != data)
5068 			WREG32(CGTS_SM_CTRL_REG, data);
5069 
5070 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5071 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5072 			data |= CP_MEM_LS_EN;
5073 			if (orig != data)
5074 				WREG32(CP_MEM_SLP_CNTL, data);
5075 		}
5076 
5077 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5078 		data &= 0xffffffc0;
5079 		if (orig != data)
5080 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5081 
5082 		tmp = si_halt_rlc(rdev);
5083 
5084 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5085 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5086 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5087 
5088 		si_update_rlc(rdev, tmp);
5089 	} else {
5090 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5091 		data |= 0x00000003;
5092 		if (orig != data)
5093 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5094 
5095 		data = RREG32(CP_MEM_SLP_CNTL);
5096 		if (data & CP_MEM_LS_EN) {
5097 			data &= ~CP_MEM_LS_EN;
5098 			WREG32(CP_MEM_SLP_CNTL, data);
5099 		}
5100 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5101 		data |= LS_OVERRIDE | OVERRIDE;
5102 		if (orig != data)
5103 			WREG32(CGTS_SM_CTRL_REG, data);
5104 
5105 		tmp = si_halt_rlc(rdev);
5106 
5107 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5108 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5109 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5110 
5111 		si_update_rlc(rdev, tmp);
5112 	}
5113 }
5114 
5115 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5116 			       bool enable)
5117 {
5118 	u32 orig, data, tmp;
5119 
5120 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5121 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5122 		tmp |= 0x3fff;
5123 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5124 
5125 		orig = data = RREG32(UVD_CGC_CTRL);
5126 		data |= DCM;
5127 		if (orig != data)
5128 			WREG32(UVD_CGC_CTRL, data);
5129 
5130 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5131 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5132 	} else {
5133 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5134 		tmp &= ~0x3fff;
5135 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5136 
5137 		orig = data = RREG32(UVD_CGC_CTRL);
5138 		data &= ~DCM;
5139 		if (orig != data)
5140 			WREG32(UVD_CGC_CTRL, data);
5141 
5142 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5143 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5144 	}
5145 }
5146 
5147 static const u32 mc_cg_registers[] =
5148 {
5149 	MC_HUB_MISC_HUB_CG,
5150 	MC_HUB_MISC_SIP_CG,
5151 	MC_HUB_MISC_VM_CG,
5152 	MC_XPB_CLK_GAT,
5153 	ATC_MISC_CG,
5154 	MC_CITF_MISC_WR_CG,
5155 	MC_CITF_MISC_RD_CG,
5156 	MC_CITF_MISC_VM_CG,
5157 	VM_L2_CG,
5158 };
5159 
5160 static void si_enable_mc_ls(struct radeon_device *rdev,
5161 			    bool enable)
5162 {
5163 	int i;
5164 	u32 orig, data;
5165 
5166 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5167 		orig = data = RREG32(mc_cg_registers[i]);
5168 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5169 			data |= MC_LS_ENABLE;
5170 		else
5171 			data &= ~MC_LS_ENABLE;
5172 		if (data != orig)
5173 			WREG32(mc_cg_registers[i], data);
5174 	}
5175 }
5176 
5177 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5178 			       bool enable)
5179 {
5180 	int i;
5181 	u32 orig, data;
5182 
5183 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5184 		orig = data = RREG32(mc_cg_registers[i]);
5185 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5186 			data |= MC_CG_ENABLE;
5187 		else
5188 			data &= ~MC_CG_ENABLE;
5189 		if (data != orig)
5190 			WREG32(mc_cg_registers[i], data);
5191 	}
5192 }
5193 
5194 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5195 			       bool enable)
5196 {
5197 	u32 orig, data, offset;
5198 	int i;
5199 
5200 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5201 		for (i = 0; i < 2; i++) {
5202 			if (i == 0)
5203 				offset = DMA0_REGISTER_OFFSET;
5204 			else
5205 				offset = DMA1_REGISTER_OFFSET;
5206 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5207 			data &= ~MEM_POWER_OVERRIDE;
5208 			if (data != orig)
5209 				WREG32(DMA_POWER_CNTL + offset, data);
5210 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5211 		}
5212 	} else {
5213 		for (i = 0; i < 2; i++) {
5214 			if (i == 0)
5215 				offset = DMA0_REGISTER_OFFSET;
5216 			else
5217 				offset = DMA1_REGISTER_OFFSET;
5218 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5219 			data |= MEM_POWER_OVERRIDE;
5220 			if (data != orig)
5221 				WREG32(DMA_POWER_CNTL + offset, data);
5222 
5223 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5224 			data = 0xff000000;
5225 			if (data != orig)
5226 				WREG32(DMA_CLK_CTRL + offset, data);
5227 		}
5228 	}
5229 }
5230 
5231 static void si_enable_bif_mgls(struct radeon_device *rdev,
5232 			       bool enable)
5233 {
5234 	u32 orig, data;
5235 
5236 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5237 
5238 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5239 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5240 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5241 	else
5242 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5243 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5244 
5245 	if (orig != data)
5246 		WREG32_PCIE(PCIE_CNTL2, data);
5247 }
5248 
5249 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5250 			       bool enable)
5251 {
5252 	u32 orig, data;
5253 
5254 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5255 
5256 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5257 		data &= ~CLOCK_GATING_DIS;
5258 	else
5259 		data |= CLOCK_GATING_DIS;
5260 
5261 	if (orig != data)
5262 		WREG32(HDP_HOST_PATH_CNTL, data);
5263 }
5264 
5265 static void si_enable_hdp_ls(struct radeon_device *rdev,
5266 			     bool enable)
5267 {
5268 	u32 orig, data;
5269 
5270 	orig = data = RREG32(HDP_MEM_POWER_LS);
5271 
5272 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5273 		data |= HDP_LS_ENABLE;
5274 	else
5275 		data &= ~HDP_LS_ENABLE;
5276 
5277 	if (orig != data)
5278 		WREG32(HDP_MEM_POWER_LS, data);
5279 }
5280 
5281 void si_update_cg(struct radeon_device *rdev,
5282 		  u32 block, bool enable)
5283 {
5284 	if (block & RADEON_CG_BLOCK_GFX) {
5285 		si_enable_gui_idle_interrupt(rdev, false);
5286 		/* order matters! */
5287 		if (enable) {
5288 			si_enable_mgcg(rdev, true);
5289 			si_enable_cgcg(rdev, true);
5290 		} else {
5291 			si_enable_cgcg(rdev, false);
5292 			si_enable_mgcg(rdev, false);
5293 		}
5294 		si_enable_gui_idle_interrupt(rdev, true);
5295 	}
5296 
5297 	if (block & RADEON_CG_BLOCK_MC) {
5298 		si_enable_mc_mgcg(rdev, enable);
5299 		si_enable_mc_ls(rdev, enable);
5300 	}
5301 
5302 	if (block & RADEON_CG_BLOCK_SDMA) {
5303 		si_enable_dma_mgcg(rdev, enable);
5304 	}
5305 
5306 	if (block & RADEON_CG_BLOCK_BIF) {
5307 		si_enable_bif_mgls(rdev, enable);
5308 	}
5309 
5310 	if (block & RADEON_CG_BLOCK_UVD) {
5311 		if (rdev->has_uvd) {
5312 			si_enable_uvd_mgcg(rdev, enable);
5313 		}
5314 	}
5315 
5316 	if (block & RADEON_CG_BLOCK_HDP) {
5317 		si_enable_hdp_mgcg(rdev, enable);
5318 		si_enable_hdp_ls(rdev, enable);
5319 	}
5320 }
5321 
5322 static void si_init_cg(struct radeon_device *rdev)
5323 {
5324 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5325 			    RADEON_CG_BLOCK_MC |
5326 			    RADEON_CG_BLOCK_SDMA |
5327 			    RADEON_CG_BLOCK_BIF |
5328 			    RADEON_CG_BLOCK_HDP), true);
5329 	if (rdev->has_uvd) {
5330 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5331 		si_init_uvd_internal_cg(rdev);
5332 	}
5333 }
5334 
5335 static void si_fini_cg(struct radeon_device *rdev)
5336 {
5337 	if (rdev->has_uvd) {
5338 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5339 	}
5340 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5341 			    RADEON_CG_BLOCK_MC |
5342 			    RADEON_CG_BLOCK_SDMA |
5343 			    RADEON_CG_BLOCK_BIF |
5344 			    RADEON_CG_BLOCK_HDP), false);
5345 }
5346 
5347 u32 si_get_csb_size(struct radeon_device *rdev)
5348 {
5349 	u32 count = 0;
5350 	const struct cs_section_def *sect = NULL;
5351 	const struct cs_extent_def *ext = NULL;
5352 
5353 	if (rdev->rlc.cs_data == NULL)
5354 		return 0;
5355 
5356 	/* begin clear state */
5357 	count += 2;
5358 	/* context control state */
5359 	count += 3;
5360 
5361 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5362 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5363 			if (sect->id == SECT_CONTEXT)
5364 				count += 2 + ext->reg_count;
5365 			else
5366 				return 0;
5367 		}
5368 	}
5369 	/* pa_sc_raster_config */
5370 	count += 3;
5371 	/* end clear state */
5372 	count += 2;
5373 	/* clear state */
5374 	count += 2;
5375 
5376 	return count;
5377 }
5378 
5379 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5380 {
5381 	u32 count = 0, i;
5382 	const struct cs_section_def *sect = NULL;
5383 	const struct cs_extent_def *ext = NULL;
5384 
5385 	if (rdev->rlc.cs_data == NULL)
5386 		return;
5387 	if (buffer == NULL)
5388 		return;
5389 
5390 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5391 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5392 
5393 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5394 	buffer[count++] = 0x80000000;
5395 	buffer[count++] = 0x80000000;
5396 
5397 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5398 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5399 			if (sect->id == SECT_CONTEXT) {
5400 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5401 				buffer[count++] = ext->reg_index - 0xa000;
5402 				for (i = 0; i < ext->reg_count; i++)
5403 					buffer[count++] = ext->extent[i];
5404 			} else {
5405 				return;
5406 			}
5407 		}
5408 	}
5409 
5410 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5411 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5412 	switch (rdev->family) {
5413 	case CHIP_TAHITI:
5414 	case CHIP_PITCAIRN:
5415 		buffer[count++] = 0x2a00126a;
5416 		break;
5417 	case CHIP_VERDE:
5418 		buffer[count++] = 0x0000124a;
5419 		break;
5420 	case CHIP_OLAND:
5421 		buffer[count++] = 0x00000082;
5422 		break;
5423 	case CHIP_HAINAN:
5424 		buffer[count++] = 0x00000000;
5425 		break;
5426 	default:
5427 		buffer[count++] = 0x00000000;
5428 		break;
5429 	}
5430 
5431 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5432 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5433 
5434 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5435 	buffer[count++] = 0;
5436 }
5437 
5438 static void si_init_pg(struct radeon_device *rdev)
5439 {
5440 	if (rdev->pg_flags) {
5441 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5442 			si_init_dma_pg(rdev);
5443 		}
5444 		si_init_ao_cu_mask(rdev);
5445 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5446 			si_init_gfx_cgpg(rdev);
5447 		}
5448 		si_enable_dma_pg(rdev, true);
5449 		si_enable_gfx_cgpg(rdev, true);
5450 	} else {
5451 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5452 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5453 	}
5454 }
5455 
5456 static void si_fini_pg(struct radeon_device *rdev)
5457 {
5458 	if (rdev->pg_flags) {
5459 		si_enable_dma_pg(rdev, false);
5460 		si_enable_gfx_cgpg(rdev, false);
5461 	}
5462 }
5463 
5464 /*
5465  * RLC
5466  */
5467 void si_rlc_reset(struct radeon_device *rdev)
5468 {
5469 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5470 
5471 	tmp |= SOFT_RESET_RLC;
5472 	WREG32(GRBM_SOFT_RESET, tmp);
5473 	udelay(50);
5474 	tmp &= ~SOFT_RESET_RLC;
5475 	WREG32(GRBM_SOFT_RESET, tmp);
5476 	udelay(50);
5477 }
5478 
5479 static void si_rlc_stop(struct radeon_device *rdev)
5480 {
5481 	WREG32(RLC_CNTL, 0);
5482 
5483 	si_enable_gui_idle_interrupt(rdev, false);
5484 
5485 	si_wait_for_rlc_serdes(rdev);
5486 }
5487 
5488 static void si_rlc_start(struct radeon_device *rdev)
5489 {
5490 	WREG32(RLC_CNTL, RLC_ENABLE);
5491 
5492 	si_enable_gui_idle_interrupt(rdev, true);
5493 
5494 	udelay(50);
5495 }
5496 
5497 static bool si_lbpw_supported(struct radeon_device *rdev)
5498 {
5499 	u32 tmp;
5500 
5501 	/* Enable LBPW only for DDR3 */
5502 	tmp = RREG32(MC_SEQ_MISC0);
5503 	if ((tmp & 0xF0000000) == 0xB0000000)
5504 		return true;
5505 	return false;
5506 }
5507 
5508 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5509 {
5510 	u32 tmp;
5511 
5512 	tmp = RREG32(RLC_LB_CNTL);
5513 	if (enable)
5514 		tmp |= LOAD_BALANCE_ENABLE;
5515 	else
5516 		tmp &= ~LOAD_BALANCE_ENABLE;
5517 	WREG32(RLC_LB_CNTL, tmp);
5518 
5519 	if (!enable) {
5520 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5521 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5522 	}
5523 }
5524 
5525 static int si_rlc_resume(struct radeon_device *rdev)
5526 {
5527 	u32 i;
5528 	const __be32 *fw_data;
5529 
5530 	if (!rdev->rlc_fw)
5531 		return -EINVAL;
5532 
5533 	si_rlc_stop(rdev);
5534 
5535 	si_rlc_reset(rdev);
5536 
5537 	si_init_pg(rdev);
5538 
5539 	si_init_cg(rdev);
5540 
5541 	WREG32(RLC_RL_BASE, 0);
5542 	WREG32(RLC_RL_SIZE, 0);
5543 	WREG32(RLC_LB_CNTL, 0);
5544 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5545 	WREG32(RLC_LB_CNTR_INIT, 0);
5546 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5547 
5548 	WREG32(RLC_MC_CNTL, 0);
5549 	WREG32(RLC_UCODE_CNTL, 0);
5550 
5551 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5552 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5553 		WREG32(RLC_UCODE_ADDR, i);
5554 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5555 	}
5556 	WREG32(RLC_UCODE_ADDR, 0);
5557 
5558 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5559 
5560 	si_rlc_start(rdev);
5561 
5562 	return 0;
5563 }
5564 
5565 static void si_enable_interrupts(struct radeon_device *rdev)
5566 {
5567 	u32 ih_cntl = RREG32(IH_CNTL);
5568 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5569 
5570 	ih_cntl |= ENABLE_INTR;
5571 	ih_rb_cntl |= IH_RB_ENABLE;
5572 	WREG32(IH_CNTL, ih_cntl);
5573 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5574 	rdev->ih.enabled = true;
5575 }
5576 
5577 static void si_disable_interrupts(struct radeon_device *rdev)
5578 {
5579 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5580 	u32 ih_cntl = RREG32(IH_CNTL);
5581 
5582 	ih_rb_cntl &= ~IH_RB_ENABLE;
5583 	ih_cntl &= ~ENABLE_INTR;
5584 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5585 	WREG32(IH_CNTL, ih_cntl);
5586 	/* set rptr, wptr to 0 */
5587 	WREG32(IH_RB_RPTR, 0);
5588 	WREG32(IH_RB_WPTR, 0);
5589 	rdev->ih.enabled = false;
5590 	rdev->ih.rptr = 0;
5591 }
5592 
5593 static void si_disable_interrupt_state(struct radeon_device *rdev)
5594 {
5595 	u32 tmp;
5596 
5597 	tmp = RREG32(CP_INT_CNTL_RING0) &
5598 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5599 	WREG32(CP_INT_CNTL_RING0, tmp);
5600 	WREG32(CP_INT_CNTL_RING1, 0);
5601 	WREG32(CP_INT_CNTL_RING2, 0);
5602 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5603 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5604 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5605 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5606 	WREG32(GRBM_INT_CNTL, 0);
5607 	if (rdev->num_crtc >= 2) {
5608 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5609 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5610 	}
5611 	if (rdev->num_crtc >= 4) {
5612 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5613 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5614 	}
5615 	if (rdev->num_crtc >= 6) {
5616 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5617 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5618 	}
5619 
5620 	if (rdev->num_crtc >= 2) {
5621 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5622 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5623 	}
5624 	if (rdev->num_crtc >= 4) {
5625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5626 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5627 	}
5628 	if (rdev->num_crtc >= 6) {
5629 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5630 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5631 	}
5632 
5633 	if (!ASIC_IS_NODCE(rdev)) {
5634 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5635 
5636 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5637 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5638 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5639 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5640 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5641 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5642 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5643 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5644 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5645 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5646 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5647 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5648 	}
5649 }
5650 
5651 static int si_irq_init(struct radeon_device *rdev)
5652 {
5653 	int ret = 0;
5654 	int rb_bufsz;
5655 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5656 
5657 	/* allocate ring */
5658 	ret = r600_ih_ring_alloc(rdev);
5659 	if (ret)
5660 		return ret;
5661 
5662 	/* disable irqs */
5663 	si_disable_interrupts(rdev);
5664 
5665 	/* init rlc */
5666 	ret = si_rlc_resume(rdev);
5667 	if (ret) {
5668 		r600_ih_ring_fini(rdev);
5669 		return ret;
5670 	}
5671 
5672 	/* setup interrupt control */
5673 	/* set dummy read address to ring address */
5674 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5675 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5676 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5677 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5678 	 */
5679 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5680 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5681 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5682 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5683 
5684 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5685 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5686 
5687 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5688 		      IH_WPTR_OVERFLOW_CLEAR |
5689 		      (rb_bufsz << 1));
5690 
5691 	if (rdev->wb.enabled)
5692 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5693 
5694 	/* set the writeback address whether it's enabled or not */
5695 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5696 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5697 
5698 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5699 
5700 	/* set rptr, wptr to 0 */
5701 	WREG32(IH_RB_RPTR, 0);
5702 	WREG32(IH_RB_WPTR, 0);
5703 
5704 	/* Default settings for IH_CNTL (disabled at first) */
5705 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5706 	/* RPTR_REARM only works if msi's are enabled */
5707 	if (rdev->msi_enabled)
5708 		ih_cntl |= RPTR_REARM;
5709 	WREG32(IH_CNTL, ih_cntl);
5710 
5711 	/* force the active interrupt state to all disabled */
5712 	si_disable_interrupt_state(rdev);
5713 
5714 	pci_enable_busmaster(rdev->dev);
5715 
5716 	/* enable irqs */
5717 	si_enable_interrupts(rdev);
5718 
5719 	return ret;
5720 }
5721 
5722 int si_irq_set(struct radeon_device *rdev)
5723 {
5724 	u32 cp_int_cntl;
5725 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5726 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5727 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5728 	u32 grbm_int_cntl = 0;
5729 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5730 	u32 dma_cntl, dma_cntl1;
5731 	u32 thermal_int = 0;
5732 
5733 	if (!rdev->irq.installed) {
5734 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5735 		return -EINVAL;
5736 	}
5737 	/* don't enable anything if the ih is disabled */
5738 	if (!rdev->ih.enabled) {
5739 		si_disable_interrupts(rdev);
5740 		/* force the active interrupt state to all disabled */
5741 		si_disable_interrupt_state(rdev);
5742 		return 0;
5743 	}
5744 
5745 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5746 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5747 
5748 	if (!ASIC_IS_NODCE(rdev)) {
5749 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5750 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5751 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5752 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5753 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5754 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5755 	}
5756 
5757 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5758 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5759 
5760 	thermal_int = RREG32(CG_THERMAL_INT) &
5761 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5762 
5763 	/* enable CP interrupts on all rings */
5764 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5765 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5766 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5767 	}
5768 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5769 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5770 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5771 	}
5772 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5773 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5774 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5775 	}
5776 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5777 		DRM_DEBUG("si_irq_set: sw int dma\n");
5778 		dma_cntl |= TRAP_ENABLE;
5779 	}
5780 
5781 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5782 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5783 		dma_cntl1 |= TRAP_ENABLE;
5784 	}
5785 	if (rdev->irq.crtc_vblank_int[0] ||
5786 	    atomic_read(&rdev->irq.pflip[0])) {
5787 		DRM_DEBUG("si_irq_set: vblank 0\n");
5788 		crtc1 |= VBLANK_INT_MASK;
5789 	}
5790 	if (rdev->irq.crtc_vblank_int[1] ||
5791 	    atomic_read(&rdev->irq.pflip[1])) {
5792 		DRM_DEBUG("si_irq_set: vblank 1\n");
5793 		crtc2 |= VBLANK_INT_MASK;
5794 	}
5795 	if (rdev->irq.crtc_vblank_int[2] ||
5796 	    atomic_read(&rdev->irq.pflip[2])) {
5797 		DRM_DEBUG("si_irq_set: vblank 2\n");
5798 		crtc3 |= VBLANK_INT_MASK;
5799 	}
5800 	if (rdev->irq.crtc_vblank_int[3] ||
5801 	    atomic_read(&rdev->irq.pflip[3])) {
5802 		DRM_DEBUG("si_irq_set: vblank 3\n");
5803 		crtc4 |= VBLANK_INT_MASK;
5804 	}
5805 	if (rdev->irq.crtc_vblank_int[4] ||
5806 	    atomic_read(&rdev->irq.pflip[4])) {
5807 		DRM_DEBUG("si_irq_set: vblank 4\n");
5808 		crtc5 |= VBLANK_INT_MASK;
5809 	}
5810 	if (rdev->irq.crtc_vblank_int[5] ||
5811 	    atomic_read(&rdev->irq.pflip[5])) {
5812 		DRM_DEBUG("si_irq_set: vblank 5\n");
5813 		crtc6 |= VBLANK_INT_MASK;
5814 	}
5815 	if (rdev->irq.hpd[0]) {
5816 		DRM_DEBUG("si_irq_set: hpd 1\n");
5817 		hpd1 |= DC_HPDx_INT_EN;
5818 	}
5819 	if (rdev->irq.hpd[1]) {
5820 		DRM_DEBUG("si_irq_set: hpd 2\n");
5821 		hpd2 |= DC_HPDx_INT_EN;
5822 	}
5823 	if (rdev->irq.hpd[2]) {
5824 		DRM_DEBUG("si_irq_set: hpd 3\n");
5825 		hpd3 |= DC_HPDx_INT_EN;
5826 	}
5827 	if (rdev->irq.hpd[3]) {
5828 		DRM_DEBUG("si_irq_set: hpd 4\n");
5829 		hpd4 |= DC_HPDx_INT_EN;
5830 	}
5831 	if (rdev->irq.hpd[4]) {
5832 		DRM_DEBUG("si_irq_set: hpd 5\n");
5833 		hpd5 |= DC_HPDx_INT_EN;
5834 	}
5835 	if (rdev->irq.hpd[5]) {
5836 		DRM_DEBUG("si_irq_set: hpd 6\n");
5837 		hpd6 |= DC_HPDx_INT_EN;
5838 	}
5839 
5840 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5841 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5842 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5843 
5844 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5845 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5846 
5847 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5848 
5849 	if (rdev->irq.dpm_thermal) {
5850 		DRM_DEBUG("dpm thermal\n");
5851 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5852 	}
5853 
5854 	if (rdev->num_crtc >= 2) {
5855 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5856 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5857 	}
5858 	if (rdev->num_crtc >= 4) {
5859 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5860 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5861 	}
5862 	if (rdev->num_crtc >= 6) {
5863 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5864 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5865 	}
5866 
5867 	if (rdev->num_crtc >= 2) {
5868 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5869 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5870 	}
5871 	if (rdev->num_crtc >= 4) {
5872 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5873 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5874 	}
5875 	if (rdev->num_crtc >= 6) {
5876 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5877 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5878 	}
5879 
5880 	if (!ASIC_IS_NODCE(rdev)) {
5881 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5882 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5883 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5884 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5885 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5886 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5887 	}
5888 
5889 	WREG32(CG_THERMAL_INT, thermal_int);
5890 
5891 	return 0;
5892 }
5893 
5894 static inline void si_irq_ack(struct radeon_device *rdev)
5895 {
5896 	u32 tmp;
5897 
5898 	if (ASIC_IS_NODCE(rdev))
5899 		return;
5900 
5901 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5902 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5903 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5904 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5905 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5906 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5907 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5908 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5909 	if (rdev->num_crtc >= 4) {
5910 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5911 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5912 	}
5913 	if (rdev->num_crtc >= 6) {
5914 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5915 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5916 	}
5917 
5918 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5919 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5920 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5921 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5922 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5923 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5924 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5925 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5926 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5927 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5928 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5929 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5930 
5931 	if (rdev->num_crtc >= 4) {
5932 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5933 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5934 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5935 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5936 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5937 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5938 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5939 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5940 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5941 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5942 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5943 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5944 	}
5945 
5946 	if (rdev->num_crtc >= 6) {
5947 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5948 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5949 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5950 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5951 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5952 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5953 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5954 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5955 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5956 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5957 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5958 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5959 	}
5960 
5961 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5962 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5963 		tmp |= DC_HPDx_INT_ACK;
5964 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5965 	}
5966 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5967 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5968 		tmp |= DC_HPDx_INT_ACK;
5969 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5970 	}
5971 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5972 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5973 		tmp |= DC_HPDx_INT_ACK;
5974 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5975 	}
5976 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5977 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5978 		tmp |= DC_HPDx_INT_ACK;
5979 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5980 	}
5981 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5982 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5983 		tmp |= DC_HPDx_INT_ACK;
5984 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5985 	}
5986 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5987 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5988 		tmp |= DC_HPDx_INT_ACK;
5989 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5990 	}
5991 }
5992 
5993 static void si_irq_disable(struct radeon_device *rdev)
5994 {
5995 	si_disable_interrupts(rdev);
5996 	/* Wait and acknowledge irq */
5997 	mdelay(1);
5998 	si_irq_ack(rdev);
5999 	si_disable_interrupt_state(rdev);
6000 }
6001 
6002 static void si_irq_suspend(struct radeon_device *rdev)
6003 {
6004 	si_irq_disable(rdev);
6005 	si_rlc_stop(rdev);
6006 }
6007 
6008 static void si_irq_fini(struct radeon_device *rdev)
6009 {
6010 	si_irq_suspend(rdev);
6011 	r600_ih_ring_fini(rdev);
6012 }
6013 
6014 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6015 {
6016 	u32 wptr, tmp;
6017 
6018 	if (rdev->wb.enabled)
6019 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6020 	else
6021 		wptr = RREG32(IH_RB_WPTR);
6022 
6023 	if (wptr & RB_OVERFLOW) {
6024 		/* When a ring buffer overflow happen start parsing interrupt
6025 		 * from the last not overwritten vector (wptr + 16). Hopefully
6026 		 * this should allow us to catchup.
6027 		 */
6028 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6029 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6030 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6031 		tmp = RREG32(IH_RB_CNTL);
6032 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6033 		WREG32(IH_RB_CNTL, tmp);
6034 	}
6035 	return (wptr & rdev->ih.ptr_mask);
6036 }
6037 
6038 /*        SI IV Ring
6039  * Each IV ring entry is 128 bits:
6040  * [7:0]    - interrupt source id
6041  * [31:8]   - reserved
6042  * [59:32]  - interrupt source data
6043  * [63:60]  - reserved
6044  * [71:64]  - RINGID
6045  * [79:72]  - VMID
6046  * [127:80] - reserved
6047  */
6048 irqreturn_t si_irq_process(struct radeon_device *rdev)
6049 {
6050 	u32 wptr;
6051 	u32 rptr;
6052 	u32 src_id, src_data, ring_id;
6053 	u32 ring_index;
6054 	bool queue_hotplug = false;
6055 	bool queue_thermal = false;
6056 	u32 status, addr;
6057 
6058 	if (!rdev->ih.enabled || rdev->shutdown)
6059 		return IRQ_NONE;
6060 
6061 	wptr = si_get_ih_wptr(rdev);
6062 
6063 restart_ih:
6064 	/* is somebody else already processing irqs? */
6065 	if (atomic_xchg(&rdev->ih.lock, 1))
6066 		return IRQ_NONE;
6067 
6068 	rptr = rdev->ih.rptr;
6069 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6070 
6071 	/* Order reading of wptr vs. reading of IH ring data */
6072 	rmb();
6073 
6074 	/* display interrupts */
6075 	si_irq_ack(rdev);
6076 
6077 	while (rptr != wptr) {
6078 		/* wptr/rptr are in bytes! */
6079 		ring_index = rptr / 4;
6080 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6081 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6082 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6083 
6084 		switch (src_id) {
6085 		case 1: /* D1 vblank/vline */
6086 			switch (src_data) {
6087 			case 0: /* D1 vblank */
6088 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6089 					if (rdev->irq.crtc_vblank_int[0]) {
6090 						drm_handle_vblank(rdev->ddev, 0);
6091 						rdev->pm.vblank_sync = true;
6092 						wake_up(&rdev->irq.vblank_queue);
6093 					}
6094 					if (atomic_read(&rdev->irq.pflip[0]))
6095 						radeon_crtc_handle_flip(rdev, 0);
6096 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6097 					DRM_DEBUG("IH: D1 vblank\n");
6098 				}
6099 				break;
6100 			case 1: /* D1 vline */
6101 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6102 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6103 					DRM_DEBUG("IH: D1 vline\n");
6104 				}
6105 				break;
6106 			default:
6107 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6108 				break;
6109 			}
6110 			break;
6111 		case 2: /* D2 vblank/vline */
6112 			switch (src_data) {
6113 			case 0: /* D2 vblank */
6114 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6115 					if (rdev->irq.crtc_vblank_int[1]) {
6116 						drm_handle_vblank(rdev->ddev, 1);
6117 						rdev->pm.vblank_sync = true;
6118 						wake_up(&rdev->irq.vblank_queue);
6119 					}
6120 					if (atomic_read(&rdev->irq.pflip[1]))
6121 						radeon_crtc_handle_flip(rdev, 1);
6122 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6123 					DRM_DEBUG("IH: D2 vblank\n");
6124 				}
6125 				break;
6126 			case 1: /* D2 vline */
6127 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6128 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6129 					DRM_DEBUG("IH: D2 vline\n");
6130 				}
6131 				break;
6132 			default:
6133 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6134 				break;
6135 			}
6136 			break;
6137 		case 3: /* D3 vblank/vline */
6138 			switch (src_data) {
6139 			case 0: /* D3 vblank */
6140 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6141 					if (rdev->irq.crtc_vblank_int[2]) {
6142 						drm_handle_vblank(rdev->ddev, 2);
6143 						rdev->pm.vblank_sync = true;
6144 						wake_up(&rdev->irq.vblank_queue);
6145 					}
6146 					if (atomic_read(&rdev->irq.pflip[2]))
6147 						radeon_crtc_handle_flip(rdev, 2);
6148 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6149 					DRM_DEBUG("IH: D3 vblank\n");
6150 				}
6151 				break;
6152 			case 1: /* D3 vline */
6153 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6154 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6155 					DRM_DEBUG("IH: D3 vline\n");
6156 				}
6157 				break;
6158 			default:
6159 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6160 				break;
6161 			}
6162 			break;
6163 		case 4: /* D4 vblank/vline */
6164 			switch (src_data) {
6165 			case 0: /* D4 vblank */
6166 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6167 					if (rdev->irq.crtc_vblank_int[3]) {
6168 						drm_handle_vblank(rdev->ddev, 3);
6169 						rdev->pm.vblank_sync = true;
6170 						wake_up(&rdev->irq.vblank_queue);
6171 					}
6172 					if (atomic_read(&rdev->irq.pflip[3]))
6173 						radeon_crtc_handle_flip(rdev, 3);
6174 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6175 					DRM_DEBUG("IH: D4 vblank\n");
6176 				}
6177 				break;
6178 			case 1: /* D4 vline */
6179 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6180 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6181 					DRM_DEBUG("IH: D4 vline\n");
6182 				}
6183 				break;
6184 			default:
6185 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6186 				break;
6187 			}
6188 			break;
6189 		case 5: /* D5 vblank/vline */
6190 			switch (src_data) {
6191 			case 0: /* D5 vblank */
6192 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6193 					if (rdev->irq.crtc_vblank_int[4]) {
6194 						drm_handle_vblank(rdev->ddev, 4);
6195 						rdev->pm.vblank_sync = true;
6196 						wake_up(&rdev->irq.vblank_queue);
6197 					}
6198 					if (atomic_read(&rdev->irq.pflip[4]))
6199 						radeon_crtc_handle_flip(rdev, 4);
6200 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6201 					DRM_DEBUG("IH: D5 vblank\n");
6202 				}
6203 				break;
6204 			case 1: /* D5 vline */
6205 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6207 					DRM_DEBUG("IH: D5 vline\n");
6208 				}
6209 				break;
6210 			default:
6211 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6212 				break;
6213 			}
6214 			break;
6215 		case 6: /* D6 vblank/vline */
6216 			switch (src_data) {
6217 			case 0: /* D6 vblank */
6218 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6219 					if (rdev->irq.crtc_vblank_int[5]) {
6220 						drm_handle_vblank(rdev->ddev, 5);
6221 						rdev->pm.vblank_sync = true;
6222 						wake_up(&rdev->irq.vblank_queue);
6223 					}
6224 					if (atomic_read(&rdev->irq.pflip[5]))
6225 						radeon_crtc_handle_flip(rdev, 5);
6226 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6227 					DRM_DEBUG("IH: D6 vblank\n");
6228 				}
6229 				break;
6230 			case 1: /* D6 vline */
6231 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6232 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6233 					DRM_DEBUG("IH: D6 vline\n");
6234 				}
6235 				break;
6236 			default:
6237 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6238 				break;
6239 			}
6240 			break;
6241 		case 42: /* HPD hotplug */
6242 			switch (src_data) {
6243 			case 0:
6244 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6245 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6246 					queue_hotplug = true;
6247 					DRM_DEBUG("IH: HPD1\n");
6248 				}
6249 				break;
6250 			case 1:
6251 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6252 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6253 					queue_hotplug = true;
6254 					DRM_DEBUG("IH: HPD2\n");
6255 				}
6256 				break;
6257 			case 2:
6258 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6259 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6260 					queue_hotplug = true;
6261 					DRM_DEBUG("IH: HPD3\n");
6262 				}
6263 				break;
6264 			case 3:
6265 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6266 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6267 					queue_hotplug = true;
6268 					DRM_DEBUG("IH: HPD4\n");
6269 				}
6270 				break;
6271 			case 4:
6272 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6273 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6274 					queue_hotplug = true;
6275 					DRM_DEBUG("IH: HPD5\n");
6276 				}
6277 				break;
6278 			case 5:
6279 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6280 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6281 					queue_hotplug = true;
6282 					DRM_DEBUG("IH: HPD6\n");
6283 				}
6284 				break;
6285 			default:
6286 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6287 				break;
6288 			}
6289 			break;
6290 		case 146:
6291 		case 147:
6292 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6293 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6294 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6295 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6296 				addr);
6297 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6298 				status);
6299 			si_vm_decode_fault(rdev, status, addr);
6300 			/* reset addr and status */
6301 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6302 			break;
6303 		case 176: /* RINGID0 CP_INT */
6304 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6305 			break;
6306 		case 177: /* RINGID1 CP_INT */
6307 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6308 			break;
6309 		case 178: /* RINGID2 CP_INT */
6310 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6311 			break;
6312 		case 181: /* CP EOP event */
6313 			DRM_DEBUG("IH: CP EOP\n");
6314 			switch (ring_id) {
6315 			case 0:
6316 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6317 				break;
6318 			case 1:
6319 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6320 				break;
6321 			case 2:
6322 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6323 				break;
6324 			}
6325 			break;
6326 		case 224: /* DMA trap event */
6327 			DRM_DEBUG("IH: DMA trap\n");
6328 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6329 			break;
6330 		case 230: /* thermal low to high */
6331 			DRM_DEBUG("IH: thermal low to high\n");
6332 			rdev->pm.dpm.thermal.high_to_low = false;
6333 			queue_thermal = true;
6334 			break;
6335 		case 231: /* thermal high to low */
6336 			DRM_DEBUG("IH: thermal high to low\n");
6337 			rdev->pm.dpm.thermal.high_to_low = true;
6338 			queue_thermal = true;
6339 			break;
6340 		case 233: /* GUI IDLE */
6341 			DRM_DEBUG("IH: GUI idle\n");
6342 			break;
6343 		case 244: /* DMA trap event */
6344 			DRM_DEBUG("IH: DMA1 trap\n");
6345 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6346 			break;
6347 		default:
6348 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6349 			break;
6350 		}
6351 
6352 		/* wptr/rptr are in bytes! */
6353 		rptr += 16;
6354 		rptr &= rdev->ih.ptr_mask;
6355 	}
6356 	if (queue_hotplug)
6357 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6358 	if (queue_thermal && rdev->pm.dpm_enabled)
6359 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6360 	rdev->ih.rptr = rptr;
6361 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6362 	atomic_set(&rdev->ih.lock, 0);
6363 
6364 	/* make sure wptr hasn't changed while processing */
6365 	wptr = si_get_ih_wptr(rdev);
6366 	if (wptr != rptr)
6367 		goto restart_ih;
6368 
6369 	return IRQ_HANDLED;
6370 }
6371 
6372 /*
6373  * startup/shutdown callbacks
6374  */
6375 static int si_startup(struct radeon_device *rdev)
6376 {
6377 	struct radeon_ring *ring;
6378 	int r;
6379 
6380 	/* enable pcie gen2/3 link */
6381 	si_pcie_gen3_enable(rdev);
6382 	/* enable aspm */
6383 	si_program_aspm(rdev);
6384 
6385 	/* scratch needs to be initialized before MC */
6386 	r = r600_vram_scratch_init(rdev);
6387 	if (r)
6388 		return r;
6389 
6390 	si_mc_program(rdev);
6391 
6392 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6393 	    !rdev->rlc_fw || !rdev->mc_fw) {
6394 		r = si_init_microcode(rdev);
6395 		if (r) {
6396 			DRM_ERROR("Failed to load firmware!\n");
6397 			return r;
6398 		}
6399 	}
6400 
6401 	r = si_mc_load_microcode(rdev);
6402 	if (r) {
6403 		DRM_ERROR("Failed to load MC firmware!\n");
6404 		return r;
6405 	}
6406 
6407 	r = si_pcie_gart_enable(rdev);
6408 	if (r)
6409 		return r;
6410 	si_gpu_init(rdev);
6411 
6412 	/* allocate rlc buffers */
6413 	if (rdev->family == CHIP_VERDE) {
6414 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6415 		rdev->rlc.reg_list_size =
6416 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6417 	}
6418 	rdev->rlc.cs_data = si_cs_data;
6419 	r = sumo_rlc_init(rdev);
6420 	if (r) {
6421 		DRM_ERROR("Failed to init rlc BOs!\n");
6422 		return r;
6423 	}
6424 
6425 	/* allocate wb buffer */
6426 	r = radeon_wb_init(rdev);
6427 	if (r)
6428 		return r;
6429 
6430 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6431 	if (r) {
6432 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6433 		return r;
6434 	}
6435 
6436 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6437 	if (r) {
6438 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6439 		return r;
6440 	}
6441 
6442 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6443 	if (r) {
6444 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6445 		return r;
6446 	}
6447 
6448 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6449 	if (r) {
6450 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6451 		return r;
6452 	}
6453 
6454 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6455 	if (r) {
6456 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6457 		return r;
6458 	}
6459 
6460 	if (rdev->has_uvd) {
6461 		r = uvd_v2_2_resume(rdev);
6462 		if (!r) {
6463 			r = radeon_fence_driver_start_ring(rdev,
6464 							   R600_RING_TYPE_UVD_INDEX);
6465 			if (r)
6466 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6467 		}
6468 		if (r)
6469 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6470 	}
6471 
6472 	/* Enable IRQ */
6473 	if (!rdev->irq.installed) {
6474 		r = radeon_irq_kms_init(rdev);
6475 		if (r)
6476 			return r;
6477 	}
6478 
6479 	r = si_irq_init(rdev);
6480 	if (r) {
6481 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6482 		radeon_irq_kms_fini(rdev);
6483 		return r;
6484 	}
6485 	si_irq_set(rdev);
6486 
6487 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6488 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6489 			     CP_RB0_RPTR, CP_RB0_WPTR,
6490 			     RADEON_CP_PACKET2);
6491 	if (r)
6492 		return r;
6493 
6494 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6495 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6496 			     CP_RB1_RPTR, CP_RB1_WPTR,
6497 			     RADEON_CP_PACKET2);
6498 	if (r)
6499 		return r;
6500 
6501 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6502 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6503 			     CP_RB2_RPTR, CP_RB2_WPTR,
6504 			     RADEON_CP_PACKET2);
6505 	if (r)
6506 		return r;
6507 
6508 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6509 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6510 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6511 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6512 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6513 	if (r)
6514 		return r;
6515 
6516 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6517 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6518 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6519 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6520 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6521 	if (r)
6522 		return r;
6523 
6524 	r = si_cp_load_microcode(rdev);
6525 	if (r)
6526 		return r;
6527 	r = si_cp_resume(rdev);
6528 	if (r)
6529 		return r;
6530 
6531 	r = cayman_dma_resume(rdev);
6532 	if (r)
6533 		return r;
6534 
6535 	if (rdev->has_uvd) {
6536 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6537 		if (ring->ring_size) {
6538 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6539 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6540 					     RADEON_CP_PACKET2);
6541 			if (!r)
6542 				r = uvd_v1_0_init(rdev);
6543 			if (r)
6544 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6545 		}
6546 	}
6547 
6548 	r = radeon_ib_pool_init(rdev);
6549 	if (r) {
6550 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6551 		return r;
6552 	}
6553 
6554 	r = radeon_vm_manager_init(rdev);
6555 	if (r) {
6556 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6557 		return r;
6558 	}
6559 
6560 	r = dce6_audio_init(rdev);
6561 	if (r)
6562 		return r;
6563 
6564 	return 0;
6565 }
6566 
6567 int si_resume(struct radeon_device *rdev)
6568 {
6569 	int r;
6570 
6571 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6572 	 * posting will perform necessary task to bring back GPU into good
6573 	 * shape.
6574 	 */
6575 	/* post card */
6576 	atom_asic_init(rdev->mode_info.atom_context);
6577 
6578 	/* init golden registers */
6579 	si_init_golden_registers(rdev);
6580 
6581 	rdev->accel_working = true;
6582 	r = si_startup(rdev);
6583 	if (r) {
6584 		DRM_ERROR("si startup failed on resume\n");
6585 		rdev->accel_working = false;
6586 		return r;
6587 	}
6588 
6589 	return r;
6590 
6591 }
6592 
6593 int si_suspend(struct radeon_device *rdev)
6594 {
6595 	dce6_audio_fini(rdev);
6596 	radeon_vm_manager_fini(rdev);
6597 	si_cp_enable(rdev, false);
6598 	cayman_dma_stop(rdev);
6599 	if (rdev->has_uvd) {
6600 		uvd_v1_0_fini(rdev);
6601 		radeon_uvd_suspend(rdev);
6602 	}
6603 	si_fini_pg(rdev);
6604 	si_fini_cg(rdev);
6605 	si_irq_suspend(rdev);
6606 	radeon_wb_disable(rdev);
6607 	si_pcie_gart_disable(rdev);
6608 	return 0;
6609 }
6610 
6611 /* Plan is to move initialization in that function and use
6612  * helper function so that radeon_device_init pretty much
6613  * do nothing more than calling asic specific function. This
6614  * should also allow to remove a bunch of callback function
6615  * like vram_info.
6616  */
6617 int si_init(struct radeon_device *rdev)
6618 {
6619 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6620 	int r;
6621 
6622 	/* Read BIOS */
6623 	if (!radeon_get_bios(rdev)) {
6624 		if (ASIC_IS_AVIVO(rdev))
6625 			return -EINVAL;
6626 	}
6627 	/* Must be an ATOMBIOS */
6628 	if (!rdev->is_atom_bios) {
6629 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6630 		return -EINVAL;
6631 	}
6632 	r = radeon_atombios_init(rdev);
6633 	if (r)
6634 		return r;
6635 
6636 	/* Post card if necessary */
6637 	if (!radeon_card_posted(rdev)) {
6638 		if (!rdev->bios) {
6639 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6640 			return -EINVAL;
6641 		}
6642 		DRM_INFO("GPU not posted. posting now...\n");
6643 		atom_asic_init(rdev->mode_info.atom_context);
6644 	}
6645 	/* init golden registers */
6646 	si_init_golden_registers(rdev);
6647 	/* Initialize scratch registers */
6648 	si_scratch_init(rdev);
6649 	/* Initialize surface registers */
6650 	radeon_surface_init(rdev);
6651 	/* Initialize clocks */
6652 	radeon_get_clock_info(rdev->ddev);
6653 
6654 	/* Fence driver */
6655 	r = radeon_fence_driver_init(rdev);
6656 	if (r)
6657 		return r;
6658 
6659 	/* initialize memory controller */
6660 	r = si_mc_init(rdev);
6661 	if (r)
6662 		return r;
6663 	/* Memory manager */
6664 	r = radeon_bo_init(rdev);
6665 	if (r)
6666 		return r;
6667 
6668 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6669 	ring->ring_obj = NULL;
6670 	r600_ring_init(rdev, ring, 1024 * 1024);
6671 
6672 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6673 	ring->ring_obj = NULL;
6674 	r600_ring_init(rdev, ring, 1024 * 1024);
6675 
6676 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6677 	ring->ring_obj = NULL;
6678 	r600_ring_init(rdev, ring, 1024 * 1024);
6679 
6680 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6681 	ring->ring_obj = NULL;
6682 	r600_ring_init(rdev, ring, 64 * 1024);
6683 
6684 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6685 	ring->ring_obj = NULL;
6686 	r600_ring_init(rdev, ring, 64 * 1024);
6687 
6688 	if (rdev->has_uvd) {
6689 		r = radeon_uvd_init(rdev);
6690 		if (!r) {
6691 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6692 			ring->ring_obj = NULL;
6693 			r600_ring_init(rdev, ring, 4096);
6694 		}
6695 	}
6696 
6697 	rdev->ih.ring_obj = NULL;
6698 	r600_ih_ring_init(rdev, 64 * 1024);
6699 
6700 	r = r600_pcie_gart_init(rdev);
6701 	if (r)
6702 		return r;
6703 
6704 	rdev->accel_working = true;
6705 	r = si_startup(rdev);
6706 	if (r) {
6707 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6708 		si_cp_fini(rdev);
6709 		cayman_dma_fini(rdev);
6710 		si_irq_fini(rdev);
6711 		sumo_rlc_fini(rdev);
6712 		radeon_wb_fini(rdev);
6713 		radeon_ib_pool_fini(rdev);
6714 		radeon_vm_manager_fini(rdev);
6715 		radeon_irq_kms_fini(rdev);
6716 		si_pcie_gart_fini(rdev);
6717 		rdev->accel_working = false;
6718 	}
6719 
6720 	/* Don't start up if the MC ucode is missing.
6721 	 * The default clocks and voltages before the MC ucode
6722 	 * is loaded are not suffient for advanced operations.
6723 	 */
6724 	if (!rdev->mc_fw) {
6725 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6726 		return -EINVAL;
6727 	}
6728 
6729 	return 0;
6730 }
6731 
6732 void si_fini(struct radeon_device *rdev)
6733 {
6734 	si_cp_fini(rdev);
6735 	cayman_dma_fini(rdev);
6736 	si_fini_pg(rdev);
6737 	si_fini_cg(rdev);
6738 	si_irq_fini(rdev);
6739 	sumo_rlc_fini(rdev);
6740 	radeon_wb_fini(rdev);
6741 	radeon_vm_manager_fini(rdev);
6742 	radeon_ib_pool_fini(rdev);
6743 	radeon_irq_kms_fini(rdev);
6744 	if (rdev->has_uvd) {
6745 		uvd_v1_0_fini(rdev);
6746 		radeon_uvd_fini(rdev);
6747 	}
6748 	si_pcie_gart_fini(rdev);
6749 	r600_vram_scratch_fini(rdev);
6750 	radeon_gem_fini(rdev);
6751 	radeon_fence_driver_fini(rdev);
6752 	radeon_bo_fini(rdev);
6753 	radeon_atombios_fini(rdev);
6754 	si_fini_microcode(rdev);
6755 	kfree(rdev->bios);
6756 	rdev->bios = NULL;
6757 }
6758 
6759 /**
6760  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6761  *
6762  * @rdev: radeon_device pointer
6763  *
6764  * Fetches a GPU clock counter snapshot (SI).
6765  * Returns the 64 bit clock counter snapshot.
6766  */
6767 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6768 {
6769 	uint64_t clock;
6770 
6771 	spin_lock(&rdev->gpu_clock_mutex);
6772 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6773 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6774 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6775 	spin_unlock(&rdev->gpu_clock_mutex);
6776 	return clock;
6777 }
6778 
6779 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6780 {
6781 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6782 	int r;
6783 
6784 	/* bypass vclk and dclk with bclk */
6785 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6786 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6787 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6788 
6789 	/* put PLL in bypass mode */
6790 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6791 
6792 	if (!vclk || !dclk) {
6793 		/* keep the Bypass mode, put PLL to sleep */
6794 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6795 		return 0;
6796 	}
6797 
6798 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6799 					  16384, 0x03FFFFFF, 0, 128, 5,
6800 					  &fb_div, &vclk_div, &dclk_div);
6801 	if (r)
6802 		return r;
6803 
6804 	/* set RESET_ANTI_MUX to 0 */
6805 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6806 
6807 	/* set VCO_MODE to 1 */
6808 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6809 
6810 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6811 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6812 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6813 
6814 	/* deassert UPLL_RESET */
6815 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6816 
6817 	mdelay(1);
6818 
6819 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6820 	if (r)
6821 		return r;
6822 
6823 	/* assert UPLL_RESET again */
6824 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6825 
6826 	/* disable spread spectrum. */
6827 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6828 
6829 	/* set feedback divider */
6830 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6831 
6832 	/* set ref divider to 0 */
6833 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6834 
6835 	if (fb_div < 307200)
6836 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6837 	else
6838 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6839 
6840 	/* set PDIV_A and PDIV_B */
6841 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6842 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6843 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6844 
6845 	/* give the PLL some time to settle */
6846 	mdelay(15);
6847 
6848 	/* deassert PLL_RESET */
6849 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6850 
6851 	mdelay(15);
6852 
6853 	/* switch from bypass mode to normal mode */
6854 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6855 
6856 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6857 	if (r)
6858 		return r;
6859 
6860 	/* switch VCLK and DCLK selection */
6861 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6862 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6863 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6864 
6865 	mdelay(100);
6866 
6867 	return 0;
6868 }
6869 
6870 static struct pci_dev dev_to_pcidev(device_t dev)
6871 {
6872     struct pci_dev pdev;
6873     pdev.dev = dev;
6874     return pdev;
6875 }
6876 
6877 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6878 {
6879 #if 0
6880 	struct pci_dev *root = rdev->pdev->bus->self;
6881 #else
6882 	device_t root = device_get_parent(rdev->dev);
6883 #endif
6884 	int bridge_pos, gpu_pos;
6885 	u32 speed_cntl, mask, current_data_rate;
6886 	int ret, i;
6887 	u16 tmp16;
6888 	struct pci_dev root_pdev = dev_to_pcidev(root);
6889 	struct pci_dev pdev = dev_to_pcidev(rdev->dev);
6890 
6891 	if (radeon_pcie_gen2 == 0)
6892 		return;
6893 
6894 	if (rdev->flags & RADEON_IS_IGP)
6895 		return;
6896 
6897 	if (!(rdev->flags & RADEON_IS_PCIE))
6898 		return;
6899 
6900 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6901 	if (ret != 0)
6902 		return;
6903 
6904 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6905 		return;
6906 
6907 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6908 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6909 		LC_CURRENT_DATA_RATE_SHIFT;
6910 	if (mask & DRM_PCIE_SPEED_80) {
6911 		if (current_data_rate == 2) {
6912 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6913 			return;
6914 		}
6915 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6916 	} else if (mask & DRM_PCIE_SPEED_50) {
6917 		if (current_data_rate == 1) {
6918 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6919 			return;
6920 		}
6921 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6922 	}
6923 
6924 	bridge_pos = pci_get_pciecap_ptr(root);
6925 	if (!bridge_pos)
6926 		return;
6927 
6928 	gpu_pos = pci_get_pciecap_ptr(rdev->dev);
6929 	if (!gpu_pos)
6930 		return;
6931 
6932 	if (mask & DRM_PCIE_SPEED_80) {
6933 		/* re-try equalization if gen3 is not already enabled */
6934 		if (current_data_rate != 2) {
6935 			u16 bridge_cfg, gpu_cfg;
6936 			u16 bridge_cfg2, gpu_cfg2;
6937 			u32 max_lw, current_lw, tmp;
6938 
6939 			pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6940 			pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6941 
6942 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6943 			pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6944 
6945 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6946 			pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6947 
6948 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6949 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6950 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6951 
6952 			if (current_lw < max_lw) {
6953 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6954 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6955 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6956 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6957 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6958 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6959 				}
6960 			}
6961 
6962 			for (i = 0; i < 10; i++) {
6963 				/* check status */
6964 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6965 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6966 					break;
6967 
6968 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6969 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6970 
6971 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6972 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6973 
6974 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6975 				tmp |= LC_SET_QUIESCE;
6976 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6977 
6978 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6979 				tmp |= LC_REDO_EQ;
6980 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6981 
6982 				mdelay(100);
6983 
6984 				/* linkctl */
6985 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6986 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6987 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6988 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6989 
6990 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6991 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6992 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6993 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6994 
6995 				/* linkctl2 */
6996 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6997 				tmp16 &= ~((1 << 4) | (7 << 9));
6998 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6999 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7000 
7001 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7002 				tmp16 &= ~((1 << 4) | (7 << 9));
7003 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7004 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7005 
7006 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7007 				tmp &= ~LC_SET_QUIESCE;
7008 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7009 			}
7010 		}
7011 	}
7012 
7013 	/* set the link speed */
7014 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7015 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7016 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7017 
7018 	pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7019 	tmp16 &= ~0xf;
7020 	if (mask & DRM_PCIE_SPEED_80)
7021 		tmp16 |= 3; /* gen3 */
7022 	else if (mask & DRM_PCIE_SPEED_50)
7023 		tmp16 |= 2; /* gen2 */
7024 	else
7025 		tmp16 |= 1; /* gen1 */
7026 	pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7027 
7028 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7029 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7030 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7031 
7032 	for (i = 0; i < rdev->usec_timeout; i++) {
7033 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7034 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7035 			break;
7036 		udelay(1);
7037 	}
7038 }
7039 
7040 static void si_program_aspm(struct radeon_device *rdev)
7041 {
7042 	u32 data, orig;
7043 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7044 	bool disable_clkreq = false;
7045 
7046 	if (radeon_aspm == 0)
7047 		return;
7048 
7049 	if (!(rdev->flags & RADEON_IS_PCIE))
7050 		return;
7051 
7052 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7053 	data &= ~LC_XMIT_N_FTS_MASK;
7054 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7055 	if (orig != data)
7056 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7057 
7058 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7059 	data |= LC_GO_TO_RECOVERY;
7060 	if (orig != data)
7061 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7062 
7063 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7064 	data |= P_IGNORE_EDB_ERR;
7065 	if (orig != data)
7066 		WREG32_PCIE(PCIE_P_CNTL, data);
7067 
7068 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7069 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7070 	data |= LC_PMI_TO_L1_DIS;
7071 	if (!disable_l0s)
7072 		data |= LC_L0S_INACTIVITY(7);
7073 
7074 	if (!disable_l1) {
7075 		data |= LC_L1_INACTIVITY(7);
7076 		data &= ~LC_PMI_TO_L1_DIS;
7077 		if (orig != data)
7078 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7079 
7080 		if (!disable_plloff_in_l1) {
7081 			bool clk_req_support;
7082 
7083 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7084 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7085 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7086 			if (orig != data)
7087 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7088 
7089 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7090 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7091 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7092 			if (orig != data)
7093 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7094 
7095 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7096 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7097 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7098 			if (orig != data)
7099 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7100 
7101 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7102 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7103 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7104 			if (orig != data)
7105 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7106 
7107 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7108 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7109 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7110 				if (orig != data)
7111 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7112 
7113 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7114 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7115 				if (orig != data)
7116 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7117 
7118 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7119 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7120 				if (orig != data)
7121 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7122 
7123 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7124 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7125 				if (orig != data)
7126 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7127 
7128 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7129 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7130 				if (orig != data)
7131 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7132 
7133 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7134 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7135 				if (orig != data)
7136 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7137 
7138 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7139 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7140 				if (orig != data)
7141 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7142 
7143 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7144 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7145 				if (orig != data)
7146 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7147 			}
7148 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7149 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7150 			data |= LC_DYN_LANES_PWR_STATE(3);
7151 			if (orig != data)
7152 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7153 
7154 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7155 			data &= ~LS2_EXIT_TIME_MASK;
7156 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7157 				data |= LS2_EXIT_TIME(5);
7158 			if (orig != data)
7159 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7160 
7161 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7162 			data &= ~LS2_EXIT_TIME_MASK;
7163 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7164 				data |= LS2_EXIT_TIME(5);
7165 			if (orig != data)
7166 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7167 
7168 			if (!disable_clkreq) {
7169 #ifdef zMN_TODO
7170 				struct pci_dev *root = rdev->pdev->bus->self;
7171 				u32 lnkcap;
7172 
7173 				clk_req_support = false;
7174 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7175 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7176 					clk_req_support = true;
7177 #else
7178 				clk_req_support = false;
7179 #endif
7180 			} else {
7181 				clk_req_support = false;
7182 			}
7183 
7184 			if (clk_req_support) {
7185 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7186 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7187 				if (orig != data)
7188 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7189 
7190 				orig = data = RREG32(THM_CLK_CNTL);
7191 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7192 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7193 				if (orig != data)
7194 					WREG32(THM_CLK_CNTL, data);
7195 
7196 				orig = data = RREG32(MISC_CLK_CNTL);
7197 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7198 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7199 				if (orig != data)
7200 					WREG32(MISC_CLK_CNTL, data);
7201 
7202 				orig = data = RREG32(CG_CLKPIN_CNTL);
7203 				data &= ~BCLK_AS_XCLK;
7204 				if (orig != data)
7205 					WREG32(CG_CLKPIN_CNTL, data);
7206 
7207 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7208 				data &= ~FORCE_BIF_REFCLK_EN;
7209 				if (orig != data)
7210 					WREG32(CG_CLKPIN_CNTL_2, data);
7211 
7212 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7213 				data &= ~MPLL_CLKOUT_SEL_MASK;
7214 				data |= MPLL_CLKOUT_SEL(4);
7215 				if (orig != data)
7216 					WREG32(MPLL_BYPASSCLK_SEL, data);
7217 
7218 				orig = data = RREG32(SPLL_CNTL_MODE);
7219 				data &= ~SPLL_REFCLK_SEL_MASK;
7220 				if (orig != data)
7221 					WREG32(SPLL_CNTL_MODE, data);
7222 			}
7223 		}
7224 	} else {
7225 		if (orig != data)
7226 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7227 	}
7228 
7229 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7230 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7231 	if (orig != data)
7232 		WREG32_PCIE(PCIE_CNTL2, data);
7233 
7234 	if (!disable_l0s) {
7235 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7236 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7237 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7238 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7239 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7240 				data &= ~LC_L0S_INACTIVITY_MASK;
7241 				if (orig != data)
7242 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7243 			}
7244 		}
7245 	}
7246 }
7247