xref: /dflybsd-src/sys/dev/drm/radeon/si.c (revision 081e4509d0b74e3f878e66bfcd9c6a5d4555b09a)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35 
36 
37 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
38 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 
45 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
46 MODULE_FIRMWARE("radeon/tahiti_me.bin");
47 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
48 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
49 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
51 
52 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
61 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
68 MODULE_FIRMWARE("radeon/VERDE_me.bin");
69 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
70 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
72 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
74 
75 MODULE_FIRMWARE("radeon/verde_pfp.bin");
76 MODULE_FIRMWARE("radeon/verde_me.bin");
77 MODULE_FIRMWARE("radeon/verde_ce.bin");
78 MODULE_FIRMWARE("radeon/verde_mc.bin");
79 MODULE_FIRMWARE("radeon/verde_rlc.bin");
80 MODULE_FIRMWARE("radeon/verde_smc.bin");
81 
82 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
83 MODULE_FIRMWARE("radeon/OLAND_me.bin");
84 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
85 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
87 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
89 
90 MODULE_FIRMWARE("radeon/oland_pfp.bin");
91 MODULE_FIRMWARE("radeon/oland_me.bin");
92 MODULE_FIRMWARE("radeon/oland_ce.bin");
93 MODULE_FIRMWARE("radeon/oland_mc.bin");
94 MODULE_FIRMWARE("radeon/oland_rlc.bin");
95 MODULE_FIRMWARE("radeon/oland_smc.bin");
96 
97 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
98 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
104 
105 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
106 MODULE_FIRMWARE("radeon/hainan_me.bin");
107 MODULE_FIRMWARE("radeon/hainan_ce.bin");
108 MODULE_FIRMWARE("radeon/hainan_mc.bin");
109 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
110 MODULE_FIRMWARE("radeon/hainan_smc.bin");
111 
112 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
113 static void si_pcie_gen3_enable(struct radeon_device *rdev);
114 static void si_program_aspm(struct radeon_device *rdev);
115 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
116 					 bool enable);
117 static void si_init_pg(struct radeon_device *rdev);
118 static void si_init_cg(struct radeon_device *rdev);
119 static void si_fini_pg(struct radeon_device *rdev);
120 static void si_fini_cg(struct radeon_device *rdev);
121 static void si_rlc_stop(struct radeon_device *rdev);
122 
123 static const u32 verde_rlc_save_restore_register_list[] =
124 {
125 	(0x8000 << 16) | (0x98f4 >> 2),
126 	0x00000000,
127 	(0x8040 << 16) | (0x98f4 >> 2),
128 	0x00000000,
129 	(0x8000 << 16) | (0xe80 >> 2),
130 	0x00000000,
131 	(0x8040 << 16) | (0xe80 >> 2),
132 	0x00000000,
133 	(0x8000 << 16) | (0x89bc >> 2),
134 	0x00000000,
135 	(0x8040 << 16) | (0x89bc >> 2),
136 	0x00000000,
137 	(0x8000 << 16) | (0x8c1c >> 2),
138 	0x00000000,
139 	(0x8040 << 16) | (0x8c1c >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x98f0 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0xe7c >> 2),
144 	0x00000000,
145 	(0x8000 << 16) | (0x9148 >> 2),
146 	0x00000000,
147 	(0x8040 << 16) | (0x9148 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9150 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x897c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x8d8c >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0xac54 >> 2),
156 	0X00000000,
157 	0x3,
158 	(0x9c00 << 16) | (0x98f8 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9910 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x9914 >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9918 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x991c >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9920 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9924 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9928 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x992c >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9930 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9934 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9938 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x993c >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9940 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x9944 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9948 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x994c >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9950 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x9954 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9958 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x995c >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9960 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9964 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9968 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x996c >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9970 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9974 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9978 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x997c >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9980 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9984 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9988 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x998c >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x8c00 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x8c14 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x8c04 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x8c08 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x9b7c >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x9b7c >> 2),
235 	0x00000000,
236 	(0x8000 << 16) | (0xe84 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0xe84 >> 2),
239 	0x00000000,
240 	(0x8000 << 16) | (0x89c0 >> 2),
241 	0x00000000,
242 	(0x8040 << 16) | (0x89c0 >> 2),
243 	0x00000000,
244 	(0x8000 << 16) | (0x914c >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0x914c >> 2),
247 	0x00000000,
248 	(0x8000 << 16) | (0x8c20 >> 2),
249 	0x00000000,
250 	(0x8040 << 16) | (0x8c20 >> 2),
251 	0x00000000,
252 	(0x8000 << 16) | (0x9354 >> 2),
253 	0x00000000,
254 	(0x8040 << 16) | (0x9354 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x9060 >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9364 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x9100 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x913c >> 2),
263 	0x00000000,
264 	(0x8000 << 16) | (0x90e0 >> 2),
265 	0x00000000,
266 	(0x8000 << 16) | (0x90e4 >> 2),
267 	0x00000000,
268 	(0x8000 << 16) | (0x90e8 >> 2),
269 	0x00000000,
270 	(0x8040 << 16) | (0x90e0 >> 2),
271 	0x00000000,
272 	(0x8040 << 16) | (0x90e4 >> 2),
273 	0x00000000,
274 	(0x8040 << 16) | (0x90e8 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x8bcc >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8b24 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x88c4 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x8e50 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x8c0c >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x8e58 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8e5c >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x9508 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x950c >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x9494 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0xac0c >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0xac10 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0xac14 >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0xae00 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0xac08 >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0x88d4 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0x88c8 >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0x88cc >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0x89b0 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0x8b10 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0x8a14 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x9830 >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x9834 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x9838 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x9a10 >> 2),
325 	0x00000000,
326 	(0x8000 << 16) | (0x9870 >> 2),
327 	0x00000000,
328 	(0x8000 << 16) | (0x9874 >> 2),
329 	0x00000000,
330 	(0x8001 << 16) | (0x9870 >> 2),
331 	0x00000000,
332 	(0x8001 << 16) | (0x9874 >> 2),
333 	0x00000000,
334 	(0x8040 << 16) | (0x9870 >> 2),
335 	0x00000000,
336 	(0x8040 << 16) | (0x9874 >> 2),
337 	0x00000000,
338 	(0x8041 << 16) | (0x9870 >> 2),
339 	0x00000000,
340 	(0x8041 << 16) | (0x9874 >> 2),
341 	0x00000000,
342 	0x00000000
343 };
344 
345 static const u32 tahiti_golden_rlc_registers[] =
346 {
347 	0xc424, 0xffffffff, 0x00601005,
348 	0xc47c, 0xffffffff, 0x10104040,
349 	0xc488, 0xffffffff, 0x0100000a,
350 	0xc314, 0xffffffff, 0x00000800,
351 	0xc30c, 0xffffffff, 0x800000f4,
352 	0xf4a8, 0xffffffff, 0x00000000
353 };
354 
355 static const u32 tahiti_golden_registers[] =
356 {
357 	0x9a10, 0x00010000, 0x00018208,
358 	0x9830, 0xffffffff, 0x00000000,
359 	0x9834, 0xf00fffff, 0x00000400,
360 	0x9838, 0x0002021c, 0x00020200,
361 	0xc78, 0x00000080, 0x00000000,
362 	0xd030, 0x000300c0, 0x00800040,
363 	0xd830, 0x000300c0, 0x00800040,
364 	0x5bb0, 0x000000f0, 0x00000070,
365 	0x5bc0, 0x00200000, 0x50100000,
366 	0x7030, 0x31000311, 0x00000011,
367 	0x277c, 0x00000003, 0x000007ff,
368 	0x240c, 0x000007ff, 0x00000000,
369 	0x8a14, 0xf000001f, 0x00000007,
370 	0x8b24, 0xffffffff, 0x00ffffff,
371 	0x8b10, 0x0000ff0f, 0x00000000,
372 	0x28a4c, 0x07ffffff, 0x4e000000,
373 	0x28350, 0x3f3f3fff, 0x2a00126a,
374 	0x30, 0x000000ff, 0x0040,
375 	0x34, 0x00000040, 0x00004040,
376 	0x9100, 0x07ffffff, 0x03000000,
377 	0x8e88, 0x01ff1f3f, 0x00000000,
378 	0x8e84, 0x01ff1f3f, 0x00000000,
379 	0x9060, 0x0000007f, 0x00000020,
380 	0x9508, 0x00010000, 0x00010000,
381 	0xac14, 0x00000200, 0x000002fb,
382 	0xac10, 0xffffffff, 0x0000543b,
383 	0xac0c, 0xffffffff, 0xa9210876,
384 	0x88d0, 0xffffffff, 0x000fff40,
385 	0x88d4, 0x0000001f, 0x00000010,
386 	0x1410, 0x20000000, 0x20fffed8,
387 	0x15c0, 0x000c0fc0, 0x000c0400
388 };
389 
390 static const u32 tahiti_golden_registers2[] =
391 {
392 	0xc64, 0x00000001, 0x00000001
393 };
394 
395 static const u32 pitcairn_golden_rlc_registers[] =
396 {
397 	0xc424, 0xffffffff, 0x00601004,
398 	0xc47c, 0xffffffff, 0x10102020,
399 	0xc488, 0xffffffff, 0x01000020,
400 	0xc314, 0xffffffff, 0x00000800,
401 	0xc30c, 0xffffffff, 0x800000a4
402 };
403 
404 static const u32 pitcairn_golden_registers[] =
405 {
406 	0x9a10, 0x00010000, 0x00018208,
407 	0x9830, 0xffffffff, 0x00000000,
408 	0x9834, 0xf00fffff, 0x00000400,
409 	0x9838, 0x0002021c, 0x00020200,
410 	0xc78, 0x00000080, 0x00000000,
411 	0xd030, 0x000300c0, 0x00800040,
412 	0xd830, 0x000300c0, 0x00800040,
413 	0x5bb0, 0x000000f0, 0x00000070,
414 	0x5bc0, 0x00200000, 0x50100000,
415 	0x7030, 0x31000311, 0x00000011,
416 	0x2ae4, 0x00073ffe, 0x000022a2,
417 	0x240c, 0x000007ff, 0x00000000,
418 	0x8a14, 0xf000001f, 0x00000007,
419 	0x8b24, 0xffffffff, 0x00ffffff,
420 	0x8b10, 0x0000ff0f, 0x00000000,
421 	0x28a4c, 0x07ffffff, 0x4e000000,
422 	0x28350, 0x3f3f3fff, 0x2a00126a,
423 	0x30, 0x000000ff, 0x0040,
424 	0x34, 0x00000040, 0x00004040,
425 	0x9100, 0x07ffffff, 0x03000000,
426 	0x9060, 0x0000007f, 0x00000020,
427 	0x9508, 0x00010000, 0x00010000,
428 	0xac14, 0x000003ff, 0x000000f7,
429 	0xac10, 0xffffffff, 0x00000000,
430 	0xac0c, 0xffffffff, 0x32761054,
431 	0x88d4, 0x0000001f, 0x00000010,
432 	0x15c0, 0x000c0fc0, 0x000c0400
433 };
434 
435 static const u32 verde_golden_rlc_registers[] =
436 {
437 	0xc424, 0xffffffff, 0x033f1005,
438 	0xc47c, 0xffffffff, 0x10808020,
439 	0xc488, 0xffffffff, 0x00800008,
440 	0xc314, 0xffffffff, 0x00001000,
441 	0xc30c, 0xffffffff, 0x80010014
442 };
443 
444 static const u32 verde_golden_registers[] =
445 {
446 	0x9a10, 0x00010000, 0x00018208,
447 	0x9830, 0xffffffff, 0x00000000,
448 	0x9834, 0xf00fffff, 0x00000400,
449 	0x9838, 0x0002021c, 0x00020200,
450 	0xc78, 0x00000080, 0x00000000,
451 	0xd030, 0x000300c0, 0x00800040,
452 	0xd030, 0x000300c0, 0x00800040,
453 	0xd830, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0x5bb0, 0x000000f0, 0x00000070,
456 	0x5bc0, 0x00200000, 0x50100000,
457 	0x7030, 0x31000311, 0x00000011,
458 	0x2ae4, 0x00073ffe, 0x000022a2,
459 	0x2ae4, 0x00073ffe, 0x000022a2,
460 	0x2ae4, 0x00073ffe, 0x000022a2,
461 	0x240c, 0x000007ff, 0x00000000,
462 	0x240c, 0x000007ff, 0x00000000,
463 	0x240c, 0x000007ff, 0x00000000,
464 	0x8a14, 0xf000001f, 0x00000007,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8a14, 0xf000001f, 0x00000007,
467 	0x8b24, 0xffffffff, 0x00ffffff,
468 	0x8b10, 0x0000ff0f, 0x00000000,
469 	0x28a4c, 0x07ffffff, 0x4e000000,
470 	0x28350, 0x3f3f3fff, 0x0000124a,
471 	0x28350, 0x3f3f3fff, 0x0000124a,
472 	0x28350, 0x3f3f3fff, 0x0000124a,
473 	0x30, 0x000000ff, 0x0040,
474 	0x34, 0x00000040, 0x00004040,
475 	0x9100, 0x07ffffff, 0x03000000,
476 	0x9100, 0x07ffffff, 0x03000000,
477 	0x8e88, 0x01ff1f3f, 0x00000000,
478 	0x8e88, 0x01ff1f3f, 0x00000000,
479 	0x8e88, 0x01ff1f3f, 0x00000000,
480 	0x8e84, 0x01ff1f3f, 0x00000000,
481 	0x8e84, 0x01ff1f3f, 0x00000000,
482 	0x8e84, 0x01ff1f3f, 0x00000000,
483 	0x9060, 0x0000007f, 0x00000020,
484 	0x9508, 0x00010000, 0x00010000,
485 	0xac14, 0x000003ff, 0x00000003,
486 	0xac14, 0x000003ff, 0x00000003,
487 	0xac14, 0x000003ff, 0x00000003,
488 	0xac10, 0xffffffff, 0x00000000,
489 	0xac10, 0xffffffff, 0x00000000,
490 	0xac10, 0xffffffff, 0x00000000,
491 	0xac0c, 0xffffffff, 0x00001032,
492 	0xac0c, 0xffffffff, 0x00001032,
493 	0xac0c, 0xffffffff, 0x00001032,
494 	0x88d4, 0x0000001f, 0x00000010,
495 	0x88d4, 0x0000001f, 0x00000010,
496 	0x88d4, 0x0000001f, 0x00000010,
497 	0x15c0, 0x000c0fc0, 0x000c0400
498 };
499 
500 static const u32 oland_golden_rlc_registers[] =
501 {
502 	0xc424, 0xffffffff, 0x00601005,
503 	0xc47c, 0xffffffff, 0x10104040,
504 	0xc488, 0xffffffff, 0x0100000a,
505 	0xc314, 0xffffffff, 0x00000800,
506 	0xc30c, 0xffffffff, 0x800000f4
507 };
508 
509 static const u32 oland_golden_registers[] =
510 {
511 	0x9a10, 0x00010000, 0x00018208,
512 	0x9830, 0xffffffff, 0x00000000,
513 	0x9834, 0xf00fffff, 0x00000400,
514 	0x9838, 0x0002021c, 0x00020200,
515 	0xc78, 0x00000080, 0x00000000,
516 	0xd030, 0x000300c0, 0x00800040,
517 	0xd830, 0x000300c0, 0x00800040,
518 	0x5bb0, 0x000000f0, 0x00000070,
519 	0x5bc0, 0x00200000, 0x50100000,
520 	0x7030, 0x31000311, 0x00000011,
521 	0x2ae4, 0x00073ffe, 0x000022a2,
522 	0x240c, 0x000007ff, 0x00000000,
523 	0x8a14, 0xf000001f, 0x00000007,
524 	0x8b24, 0xffffffff, 0x00ffffff,
525 	0x8b10, 0x0000ff0f, 0x00000000,
526 	0x28a4c, 0x07ffffff, 0x4e000000,
527 	0x28350, 0x3f3f3fff, 0x00000082,
528 	0x30, 0x000000ff, 0x0040,
529 	0x34, 0x00000040, 0x00004040,
530 	0x9100, 0x07ffffff, 0x03000000,
531 	0x9060, 0x0000007f, 0x00000020,
532 	0x9508, 0x00010000, 0x00010000,
533 	0xac14, 0x000003ff, 0x000000f3,
534 	0xac10, 0xffffffff, 0x00000000,
535 	0xac0c, 0xffffffff, 0x00003210,
536 	0x88d4, 0x0000001f, 0x00000010,
537 	0x15c0, 0x000c0fc0, 0x000c0400
538 };
539 
540 static const u32 hainan_golden_registers[] =
541 {
542 	0x9a10, 0x00010000, 0x00018208,
543 	0x9830, 0xffffffff, 0x00000000,
544 	0x9834, 0xf00fffff, 0x00000400,
545 	0x9838, 0x0002021c, 0x00020200,
546 	0xd0c0, 0xff000fff, 0x00000100,
547 	0xd030, 0x000300c0, 0x00800040,
548 	0xd8c0, 0xff000fff, 0x00000100,
549 	0xd830, 0x000300c0, 0x00800040,
550 	0x2ae4, 0x00073ffe, 0x000022a2,
551 	0x240c, 0x000007ff, 0x00000000,
552 	0x8a14, 0xf000001f, 0x00000007,
553 	0x8b24, 0xffffffff, 0x00ffffff,
554 	0x8b10, 0x0000ff0f, 0x00000000,
555 	0x28a4c, 0x07ffffff, 0x4e000000,
556 	0x28350, 0x3f3f3fff, 0x00000000,
557 	0x30, 0x000000ff, 0x0040,
558 	0x34, 0x00000040, 0x00004040,
559 	0x9100, 0x03e00000, 0x03600000,
560 	0x9060, 0x0000007f, 0x00000020,
561 	0x9508, 0x00010000, 0x00010000,
562 	0xac14, 0x000003ff, 0x000000f1,
563 	0xac10, 0xffffffff, 0x00000000,
564 	0xac0c, 0xffffffff, 0x00003210,
565 	0x88d4, 0x0000001f, 0x00000010,
566 	0x15c0, 0x000c0fc0, 0x000c0400
567 };
568 
569 static const u32 hainan_golden_registers2[] =
570 {
571 	0x98f8, 0xffffffff, 0x02010001
572 };
573 
574 static const u32 tahiti_mgcg_cgcg_init[] =
575 {
576 	0xc400, 0xffffffff, 0xfffffffc,
577 	0x802c, 0xffffffff, 0xe0000000,
578 	0x9a60, 0xffffffff, 0x00000100,
579 	0x92a4, 0xffffffff, 0x00000100,
580 	0xc164, 0xffffffff, 0x00000100,
581 	0x9774, 0xffffffff, 0x00000100,
582 	0x8984, 0xffffffff, 0x06000100,
583 	0x8a18, 0xffffffff, 0x00000100,
584 	0x92a0, 0xffffffff, 0x00000100,
585 	0xc380, 0xffffffff, 0x00000100,
586 	0x8b28, 0xffffffff, 0x00000100,
587 	0x9144, 0xffffffff, 0x00000100,
588 	0x8d88, 0xffffffff, 0x00000100,
589 	0x8d8c, 0xffffffff, 0x00000100,
590 	0x9030, 0xffffffff, 0x00000100,
591 	0x9034, 0xffffffff, 0x00000100,
592 	0x9038, 0xffffffff, 0x00000100,
593 	0x903c, 0xffffffff, 0x00000100,
594 	0xad80, 0xffffffff, 0x00000100,
595 	0xac54, 0xffffffff, 0x00000100,
596 	0x897c, 0xffffffff, 0x06000100,
597 	0x9868, 0xffffffff, 0x00000100,
598 	0x9510, 0xffffffff, 0x00000100,
599 	0xaf04, 0xffffffff, 0x00000100,
600 	0xae04, 0xffffffff, 0x00000100,
601 	0x949c, 0xffffffff, 0x00000100,
602 	0x802c, 0xffffffff, 0xe0000000,
603 	0x9160, 0xffffffff, 0x00010000,
604 	0x9164, 0xffffffff, 0x00030002,
605 	0x9168, 0xffffffff, 0x00040007,
606 	0x916c, 0xffffffff, 0x00060005,
607 	0x9170, 0xffffffff, 0x00090008,
608 	0x9174, 0xffffffff, 0x00020001,
609 	0x9178, 0xffffffff, 0x00040003,
610 	0x917c, 0xffffffff, 0x00000007,
611 	0x9180, 0xffffffff, 0x00060005,
612 	0x9184, 0xffffffff, 0x00090008,
613 	0x9188, 0xffffffff, 0x00030002,
614 	0x918c, 0xffffffff, 0x00050004,
615 	0x9190, 0xffffffff, 0x00000008,
616 	0x9194, 0xffffffff, 0x00070006,
617 	0x9198, 0xffffffff, 0x000a0009,
618 	0x919c, 0xffffffff, 0x00040003,
619 	0x91a0, 0xffffffff, 0x00060005,
620 	0x91a4, 0xffffffff, 0x00000009,
621 	0x91a8, 0xffffffff, 0x00080007,
622 	0x91ac, 0xffffffff, 0x000b000a,
623 	0x91b0, 0xffffffff, 0x00050004,
624 	0x91b4, 0xffffffff, 0x00070006,
625 	0x91b8, 0xffffffff, 0x0008000b,
626 	0x91bc, 0xffffffff, 0x000a0009,
627 	0x91c0, 0xffffffff, 0x000d000c,
628 	0x91c4, 0xffffffff, 0x00060005,
629 	0x91c8, 0xffffffff, 0x00080007,
630 	0x91cc, 0xffffffff, 0x0000000b,
631 	0x91d0, 0xffffffff, 0x000a0009,
632 	0x91d4, 0xffffffff, 0x000d000c,
633 	0x91d8, 0xffffffff, 0x00070006,
634 	0x91dc, 0xffffffff, 0x00090008,
635 	0x91e0, 0xffffffff, 0x0000000c,
636 	0x91e4, 0xffffffff, 0x000b000a,
637 	0x91e8, 0xffffffff, 0x000e000d,
638 	0x91ec, 0xffffffff, 0x00080007,
639 	0x91f0, 0xffffffff, 0x000a0009,
640 	0x91f4, 0xffffffff, 0x0000000d,
641 	0x91f8, 0xffffffff, 0x000c000b,
642 	0x91fc, 0xffffffff, 0x000f000e,
643 	0x9200, 0xffffffff, 0x00090008,
644 	0x9204, 0xffffffff, 0x000b000a,
645 	0x9208, 0xffffffff, 0x000c000f,
646 	0x920c, 0xffffffff, 0x000e000d,
647 	0x9210, 0xffffffff, 0x00110010,
648 	0x9214, 0xffffffff, 0x000a0009,
649 	0x9218, 0xffffffff, 0x000c000b,
650 	0x921c, 0xffffffff, 0x0000000f,
651 	0x9220, 0xffffffff, 0x000e000d,
652 	0x9224, 0xffffffff, 0x00110010,
653 	0x9228, 0xffffffff, 0x000b000a,
654 	0x922c, 0xffffffff, 0x000d000c,
655 	0x9230, 0xffffffff, 0x00000010,
656 	0x9234, 0xffffffff, 0x000f000e,
657 	0x9238, 0xffffffff, 0x00120011,
658 	0x923c, 0xffffffff, 0x000c000b,
659 	0x9240, 0xffffffff, 0x000e000d,
660 	0x9244, 0xffffffff, 0x00000011,
661 	0x9248, 0xffffffff, 0x0010000f,
662 	0x924c, 0xffffffff, 0x00130012,
663 	0x9250, 0xffffffff, 0x000d000c,
664 	0x9254, 0xffffffff, 0x000f000e,
665 	0x9258, 0xffffffff, 0x00100013,
666 	0x925c, 0xffffffff, 0x00120011,
667 	0x9260, 0xffffffff, 0x00150014,
668 	0x9264, 0xffffffff, 0x000e000d,
669 	0x9268, 0xffffffff, 0x0010000f,
670 	0x926c, 0xffffffff, 0x00000013,
671 	0x9270, 0xffffffff, 0x00120011,
672 	0x9274, 0xffffffff, 0x00150014,
673 	0x9278, 0xffffffff, 0x000f000e,
674 	0x927c, 0xffffffff, 0x00110010,
675 	0x9280, 0xffffffff, 0x00000014,
676 	0x9284, 0xffffffff, 0x00130012,
677 	0x9288, 0xffffffff, 0x00160015,
678 	0x928c, 0xffffffff, 0x0010000f,
679 	0x9290, 0xffffffff, 0x00120011,
680 	0x9294, 0xffffffff, 0x00000015,
681 	0x9298, 0xffffffff, 0x00140013,
682 	0x929c, 0xffffffff, 0x00170016,
683 	0x9150, 0xffffffff, 0x96940200,
684 	0x8708, 0xffffffff, 0x00900100,
685 	0xc478, 0xffffffff, 0x00000080,
686 	0xc404, 0xffffffff, 0x0020003f,
687 	0x30, 0xffffffff, 0x0000001c,
688 	0x34, 0x000f0000, 0x000f0000,
689 	0x160c, 0xffffffff, 0x00000100,
690 	0x1024, 0xffffffff, 0x00000100,
691 	0x102c, 0x00000101, 0x00000000,
692 	0x20a8, 0xffffffff, 0x00000104,
693 	0x264c, 0x000c0000, 0x000c0000,
694 	0x2648, 0x000c0000, 0x000c0000,
695 	0x55e4, 0xff000fff, 0x00000100,
696 	0x55e8, 0x00000001, 0x00000001,
697 	0x2f50, 0x00000001, 0x00000001,
698 	0x30cc, 0xc0000fff, 0x00000104,
699 	0xc1e4, 0x00000001, 0x00000001,
700 	0xd0c0, 0xfffffff0, 0x00000100,
701 	0xd8c0, 0xfffffff0, 0x00000100
702 };
703 
704 static const u32 pitcairn_mgcg_cgcg_init[] =
705 {
706 	0xc400, 0xffffffff, 0xfffffffc,
707 	0x802c, 0xffffffff, 0xe0000000,
708 	0x9a60, 0xffffffff, 0x00000100,
709 	0x92a4, 0xffffffff, 0x00000100,
710 	0xc164, 0xffffffff, 0x00000100,
711 	0x9774, 0xffffffff, 0x00000100,
712 	0x8984, 0xffffffff, 0x06000100,
713 	0x8a18, 0xffffffff, 0x00000100,
714 	0x92a0, 0xffffffff, 0x00000100,
715 	0xc380, 0xffffffff, 0x00000100,
716 	0x8b28, 0xffffffff, 0x00000100,
717 	0x9144, 0xffffffff, 0x00000100,
718 	0x8d88, 0xffffffff, 0x00000100,
719 	0x8d8c, 0xffffffff, 0x00000100,
720 	0x9030, 0xffffffff, 0x00000100,
721 	0x9034, 0xffffffff, 0x00000100,
722 	0x9038, 0xffffffff, 0x00000100,
723 	0x903c, 0xffffffff, 0x00000100,
724 	0xad80, 0xffffffff, 0x00000100,
725 	0xac54, 0xffffffff, 0x00000100,
726 	0x897c, 0xffffffff, 0x06000100,
727 	0x9868, 0xffffffff, 0x00000100,
728 	0x9510, 0xffffffff, 0x00000100,
729 	0xaf04, 0xffffffff, 0x00000100,
730 	0xae04, 0xffffffff, 0x00000100,
731 	0x949c, 0xffffffff, 0x00000100,
732 	0x802c, 0xffffffff, 0xe0000000,
733 	0x9160, 0xffffffff, 0x00010000,
734 	0x9164, 0xffffffff, 0x00030002,
735 	0x9168, 0xffffffff, 0x00040007,
736 	0x916c, 0xffffffff, 0x00060005,
737 	0x9170, 0xffffffff, 0x00090008,
738 	0x9174, 0xffffffff, 0x00020001,
739 	0x9178, 0xffffffff, 0x00040003,
740 	0x917c, 0xffffffff, 0x00000007,
741 	0x9180, 0xffffffff, 0x00060005,
742 	0x9184, 0xffffffff, 0x00090008,
743 	0x9188, 0xffffffff, 0x00030002,
744 	0x918c, 0xffffffff, 0x00050004,
745 	0x9190, 0xffffffff, 0x00000008,
746 	0x9194, 0xffffffff, 0x00070006,
747 	0x9198, 0xffffffff, 0x000a0009,
748 	0x919c, 0xffffffff, 0x00040003,
749 	0x91a0, 0xffffffff, 0x00060005,
750 	0x91a4, 0xffffffff, 0x00000009,
751 	0x91a8, 0xffffffff, 0x00080007,
752 	0x91ac, 0xffffffff, 0x000b000a,
753 	0x91b0, 0xffffffff, 0x00050004,
754 	0x91b4, 0xffffffff, 0x00070006,
755 	0x91b8, 0xffffffff, 0x0008000b,
756 	0x91bc, 0xffffffff, 0x000a0009,
757 	0x91c0, 0xffffffff, 0x000d000c,
758 	0x9200, 0xffffffff, 0x00090008,
759 	0x9204, 0xffffffff, 0x000b000a,
760 	0x9208, 0xffffffff, 0x000c000f,
761 	0x920c, 0xffffffff, 0x000e000d,
762 	0x9210, 0xffffffff, 0x00110010,
763 	0x9214, 0xffffffff, 0x000a0009,
764 	0x9218, 0xffffffff, 0x000c000b,
765 	0x921c, 0xffffffff, 0x0000000f,
766 	0x9220, 0xffffffff, 0x000e000d,
767 	0x9224, 0xffffffff, 0x00110010,
768 	0x9228, 0xffffffff, 0x000b000a,
769 	0x922c, 0xffffffff, 0x000d000c,
770 	0x9230, 0xffffffff, 0x00000010,
771 	0x9234, 0xffffffff, 0x000f000e,
772 	0x9238, 0xffffffff, 0x00120011,
773 	0x923c, 0xffffffff, 0x000c000b,
774 	0x9240, 0xffffffff, 0x000e000d,
775 	0x9244, 0xffffffff, 0x00000011,
776 	0x9248, 0xffffffff, 0x0010000f,
777 	0x924c, 0xffffffff, 0x00130012,
778 	0x9250, 0xffffffff, 0x000d000c,
779 	0x9254, 0xffffffff, 0x000f000e,
780 	0x9258, 0xffffffff, 0x00100013,
781 	0x925c, 0xffffffff, 0x00120011,
782 	0x9260, 0xffffffff, 0x00150014,
783 	0x9150, 0xffffffff, 0x96940200,
784 	0x8708, 0xffffffff, 0x00900100,
785 	0xc478, 0xffffffff, 0x00000080,
786 	0xc404, 0xffffffff, 0x0020003f,
787 	0x30, 0xffffffff, 0x0000001c,
788 	0x34, 0x000f0000, 0x000f0000,
789 	0x160c, 0xffffffff, 0x00000100,
790 	0x1024, 0xffffffff, 0x00000100,
791 	0x102c, 0x00000101, 0x00000000,
792 	0x20a8, 0xffffffff, 0x00000104,
793 	0x55e4, 0xff000fff, 0x00000100,
794 	0x55e8, 0x00000001, 0x00000001,
795 	0x2f50, 0x00000001, 0x00000001,
796 	0x30cc, 0xc0000fff, 0x00000104,
797 	0xc1e4, 0x00000001, 0x00000001,
798 	0xd0c0, 0xfffffff0, 0x00000100,
799 	0xd8c0, 0xfffffff0, 0x00000100
800 };
801 
802 static const u32 verde_mgcg_cgcg_init[] =
803 {
804 	0xc400, 0xffffffff, 0xfffffffc,
805 	0x802c, 0xffffffff, 0xe0000000,
806 	0x9a60, 0xffffffff, 0x00000100,
807 	0x92a4, 0xffffffff, 0x00000100,
808 	0xc164, 0xffffffff, 0x00000100,
809 	0x9774, 0xffffffff, 0x00000100,
810 	0x8984, 0xffffffff, 0x06000100,
811 	0x8a18, 0xffffffff, 0x00000100,
812 	0x92a0, 0xffffffff, 0x00000100,
813 	0xc380, 0xffffffff, 0x00000100,
814 	0x8b28, 0xffffffff, 0x00000100,
815 	0x9144, 0xffffffff, 0x00000100,
816 	0x8d88, 0xffffffff, 0x00000100,
817 	0x8d8c, 0xffffffff, 0x00000100,
818 	0x9030, 0xffffffff, 0x00000100,
819 	0x9034, 0xffffffff, 0x00000100,
820 	0x9038, 0xffffffff, 0x00000100,
821 	0x903c, 0xffffffff, 0x00000100,
822 	0xad80, 0xffffffff, 0x00000100,
823 	0xac54, 0xffffffff, 0x00000100,
824 	0x897c, 0xffffffff, 0x06000100,
825 	0x9868, 0xffffffff, 0x00000100,
826 	0x9510, 0xffffffff, 0x00000100,
827 	0xaf04, 0xffffffff, 0x00000100,
828 	0xae04, 0xffffffff, 0x00000100,
829 	0x949c, 0xffffffff, 0x00000100,
830 	0x802c, 0xffffffff, 0xe0000000,
831 	0x9160, 0xffffffff, 0x00010000,
832 	0x9164, 0xffffffff, 0x00030002,
833 	0x9168, 0xffffffff, 0x00040007,
834 	0x916c, 0xffffffff, 0x00060005,
835 	0x9170, 0xffffffff, 0x00090008,
836 	0x9174, 0xffffffff, 0x00020001,
837 	0x9178, 0xffffffff, 0x00040003,
838 	0x917c, 0xffffffff, 0x00000007,
839 	0x9180, 0xffffffff, 0x00060005,
840 	0x9184, 0xffffffff, 0x00090008,
841 	0x9188, 0xffffffff, 0x00030002,
842 	0x918c, 0xffffffff, 0x00050004,
843 	0x9190, 0xffffffff, 0x00000008,
844 	0x9194, 0xffffffff, 0x00070006,
845 	0x9198, 0xffffffff, 0x000a0009,
846 	0x919c, 0xffffffff, 0x00040003,
847 	0x91a0, 0xffffffff, 0x00060005,
848 	0x91a4, 0xffffffff, 0x00000009,
849 	0x91a8, 0xffffffff, 0x00080007,
850 	0x91ac, 0xffffffff, 0x000b000a,
851 	0x91b0, 0xffffffff, 0x00050004,
852 	0x91b4, 0xffffffff, 0x00070006,
853 	0x91b8, 0xffffffff, 0x0008000b,
854 	0x91bc, 0xffffffff, 0x000a0009,
855 	0x91c0, 0xffffffff, 0x000d000c,
856 	0x9200, 0xffffffff, 0x00090008,
857 	0x9204, 0xffffffff, 0x000b000a,
858 	0x9208, 0xffffffff, 0x000c000f,
859 	0x920c, 0xffffffff, 0x000e000d,
860 	0x9210, 0xffffffff, 0x00110010,
861 	0x9214, 0xffffffff, 0x000a0009,
862 	0x9218, 0xffffffff, 0x000c000b,
863 	0x921c, 0xffffffff, 0x0000000f,
864 	0x9220, 0xffffffff, 0x000e000d,
865 	0x9224, 0xffffffff, 0x00110010,
866 	0x9228, 0xffffffff, 0x000b000a,
867 	0x922c, 0xffffffff, 0x000d000c,
868 	0x9230, 0xffffffff, 0x00000010,
869 	0x9234, 0xffffffff, 0x000f000e,
870 	0x9238, 0xffffffff, 0x00120011,
871 	0x923c, 0xffffffff, 0x000c000b,
872 	0x9240, 0xffffffff, 0x000e000d,
873 	0x9244, 0xffffffff, 0x00000011,
874 	0x9248, 0xffffffff, 0x0010000f,
875 	0x924c, 0xffffffff, 0x00130012,
876 	0x9250, 0xffffffff, 0x000d000c,
877 	0x9254, 0xffffffff, 0x000f000e,
878 	0x9258, 0xffffffff, 0x00100013,
879 	0x925c, 0xffffffff, 0x00120011,
880 	0x9260, 0xffffffff, 0x00150014,
881 	0x9150, 0xffffffff, 0x96940200,
882 	0x8708, 0xffffffff, 0x00900100,
883 	0xc478, 0xffffffff, 0x00000080,
884 	0xc404, 0xffffffff, 0x0020003f,
885 	0x30, 0xffffffff, 0x0000001c,
886 	0x34, 0x000f0000, 0x000f0000,
887 	0x160c, 0xffffffff, 0x00000100,
888 	0x1024, 0xffffffff, 0x00000100,
889 	0x102c, 0x00000101, 0x00000000,
890 	0x20a8, 0xffffffff, 0x00000104,
891 	0x264c, 0x000c0000, 0x000c0000,
892 	0x2648, 0x000c0000, 0x000c0000,
893 	0x55e4, 0xff000fff, 0x00000100,
894 	0x55e8, 0x00000001, 0x00000001,
895 	0x2f50, 0x00000001, 0x00000001,
896 	0x30cc, 0xc0000fff, 0x00000104,
897 	0xc1e4, 0x00000001, 0x00000001,
898 	0xd0c0, 0xfffffff0, 0x00000100,
899 	0xd8c0, 0xfffffff0, 0x00000100
900 };
901 
902 static const u32 oland_mgcg_cgcg_init[] =
903 {
904 	0xc400, 0xffffffff, 0xfffffffc,
905 	0x802c, 0xffffffff, 0xe0000000,
906 	0x9a60, 0xffffffff, 0x00000100,
907 	0x92a4, 0xffffffff, 0x00000100,
908 	0xc164, 0xffffffff, 0x00000100,
909 	0x9774, 0xffffffff, 0x00000100,
910 	0x8984, 0xffffffff, 0x06000100,
911 	0x8a18, 0xffffffff, 0x00000100,
912 	0x92a0, 0xffffffff, 0x00000100,
913 	0xc380, 0xffffffff, 0x00000100,
914 	0x8b28, 0xffffffff, 0x00000100,
915 	0x9144, 0xffffffff, 0x00000100,
916 	0x8d88, 0xffffffff, 0x00000100,
917 	0x8d8c, 0xffffffff, 0x00000100,
918 	0x9030, 0xffffffff, 0x00000100,
919 	0x9034, 0xffffffff, 0x00000100,
920 	0x9038, 0xffffffff, 0x00000100,
921 	0x903c, 0xffffffff, 0x00000100,
922 	0xad80, 0xffffffff, 0x00000100,
923 	0xac54, 0xffffffff, 0x00000100,
924 	0x897c, 0xffffffff, 0x06000100,
925 	0x9868, 0xffffffff, 0x00000100,
926 	0x9510, 0xffffffff, 0x00000100,
927 	0xaf04, 0xffffffff, 0x00000100,
928 	0xae04, 0xffffffff, 0x00000100,
929 	0x949c, 0xffffffff, 0x00000100,
930 	0x802c, 0xffffffff, 0xe0000000,
931 	0x9160, 0xffffffff, 0x00010000,
932 	0x9164, 0xffffffff, 0x00030002,
933 	0x9168, 0xffffffff, 0x00040007,
934 	0x916c, 0xffffffff, 0x00060005,
935 	0x9170, 0xffffffff, 0x00090008,
936 	0x9174, 0xffffffff, 0x00020001,
937 	0x9178, 0xffffffff, 0x00040003,
938 	0x917c, 0xffffffff, 0x00000007,
939 	0x9180, 0xffffffff, 0x00060005,
940 	0x9184, 0xffffffff, 0x00090008,
941 	0x9188, 0xffffffff, 0x00030002,
942 	0x918c, 0xffffffff, 0x00050004,
943 	0x9190, 0xffffffff, 0x00000008,
944 	0x9194, 0xffffffff, 0x00070006,
945 	0x9198, 0xffffffff, 0x000a0009,
946 	0x919c, 0xffffffff, 0x00040003,
947 	0x91a0, 0xffffffff, 0x00060005,
948 	0x91a4, 0xffffffff, 0x00000009,
949 	0x91a8, 0xffffffff, 0x00080007,
950 	0x91ac, 0xffffffff, 0x000b000a,
951 	0x91b0, 0xffffffff, 0x00050004,
952 	0x91b4, 0xffffffff, 0x00070006,
953 	0x91b8, 0xffffffff, 0x0008000b,
954 	0x91bc, 0xffffffff, 0x000a0009,
955 	0x91c0, 0xffffffff, 0x000d000c,
956 	0x91c4, 0xffffffff, 0x00060005,
957 	0x91c8, 0xffffffff, 0x00080007,
958 	0x91cc, 0xffffffff, 0x0000000b,
959 	0x91d0, 0xffffffff, 0x000a0009,
960 	0x91d4, 0xffffffff, 0x000d000c,
961 	0x9150, 0xffffffff, 0x96940200,
962 	0x8708, 0xffffffff, 0x00900100,
963 	0xc478, 0xffffffff, 0x00000080,
964 	0xc404, 0xffffffff, 0x0020003f,
965 	0x30, 0xffffffff, 0x0000001c,
966 	0x34, 0x000f0000, 0x000f0000,
967 	0x160c, 0xffffffff, 0x00000100,
968 	0x1024, 0xffffffff, 0x00000100,
969 	0x102c, 0x00000101, 0x00000000,
970 	0x20a8, 0xffffffff, 0x00000104,
971 	0x264c, 0x000c0000, 0x000c0000,
972 	0x2648, 0x000c0000, 0x000c0000,
973 	0x55e4, 0xff000fff, 0x00000100,
974 	0x55e8, 0x00000001, 0x00000001,
975 	0x2f50, 0x00000001, 0x00000001,
976 	0x30cc, 0xc0000fff, 0x00000104,
977 	0xc1e4, 0x00000001, 0x00000001,
978 	0xd0c0, 0xfffffff0, 0x00000100,
979 	0xd8c0, 0xfffffff0, 0x00000100
980 };
981 
982 static const u32 hainan_mgcg_cgcg_init[] =
983 {
984 	0xc400, 0xffffffff, 0xfffffffc,
985 	0x802c, 0xffffffff, 0xe0000000,
986 	0x9a60, 0xffffffff, 0x00000100,
987 	0x92a4, 0xffffffff, 0x00000100,
988 	0xc164, 0xffffffff, 0x00000100,
989 	0x9774, 0xffffffff, 0x00000100,
990 	0x8984, 0xffffffff, 0x06000100,
991 	0x8a18, 0xffffffff, 0x00000100,
992 	0x92a0, 0xffffffff, 0x00000100,
993 	0xc380, 0xffffffff, 0x00000100,
994 	0x8b28, 0xffffffff, 0x00000100,
995 	0x9144, 0xffffffff, 0x00000100,
996 	0x8d88, 0xffffffff, 0x00000100,
997 	0x8d8c, 0xffffffff, 0x00000100,
998 	0x9030, 0xffffffff, 0x00000100,
999 	0x9034, 0xffffffff, 0x00000100,
1000 	0x9038, 0xffffffff, 0x00000100,
1001 	0x903c, 0xffffffff, 0x00000100,
1002 	0xad80, 0xffffffff, 0x00000100,
1003 	0xac54, 0xffffffff, 0x00000100,
1004 	0x897c, 0xffffffff, 0x06000100,
1005 	0x9868, 0xffffffff, 0x00000100,
1006 	0x9510, 0xffffffff, 0x00000100,
1007 	0xaf04, 0xffffffff, 0x00000100,
1008 	0xae04, 0xffffffff, 0x00000100,
1009 	0x949c, 0xffffffff, 0x00000100,
1010 	0x802c, 0xffffffff, 0xe0000000,
1011 	0x9160, 0xffffffff, 0x00010000,
1012 	0x9164, 0xffffffff, 0x00030002,
1013 	0x9168, 0xffffffff, 0x00040007,
1014 	0x916c, 0xffffffff, 0x00060005,
1015 	0x9170, 0xffffffff, 0x00090008,
1016 	0x9174, 0xffffffff, 0x00020001,
1017 	0x9178, 0xffffffff, 0x00040003,
1018 	0x917c, 0xffffffff, 0x00000007,
1019 	0x9180, 0xffffffff, 0x00060005,
1020 	0x9184, 0xffffffff, 0x00090008,
1021 	0x9188, 0xffffffff, 0x00030002,
1022 	0x918c, 0xffffffff, 0x00050004,
1023 	0x9190, 0xffffffff, 0x00000008,
1024 	0x9194, 0xffffffff, 0x00070006,
1025 	0x9198, 0xffffffff, 0x000a0009,
1026 	0x919c, 0xffffffff, 0x00040003,
1027 	0x91a0, 0xffffffff, 0x00060005,
1028 	0x91a4, 0xffffffff, 0x00000009,
1029 	0x91a8, 0xffffffff, 0x00080007,
1030 	0x91ac, 0xffffffff, 0x000b000a,
1031 	0x91b0, 0xffffffff, 0x00050004,
1032 	0x91b4, 0xffffffff, 0x00070006,
1033 	0x91b8, 0xffffffff, 0x0008000b,
1034 	0x91bc, 0xffffffff, 0x000a0009,
1035 	0x91c0, 0xffffffff, 0x000d000c,
1036 	0x91c4, 0xffffffff, 0x00060005,
1037 	0x91c8, 0xffffffff, 0x00080007,
1038 	0x91cc, 0xffffffff, 0x0000000b,
1039 	0x91d0, 0xffffffff, 0x000a0009,
1040 	0x91d4, 0xffffffff, 0x000d000c,
1041 	0x9150, 0xffffffff, 0x96940200,
1042 	0x8708, 0xffffffff, 0x00900100,
1043 	0xc478, 0xffffffff, 0x00000080,
1044 	0xc404, 0xffffffff, 0x0020003f,
1045 	0x30, 0xffffffff, 0x0000001c,
1046 	0x34, 0x000f0000, 0x000f0000,
1047 	0x160c, 0xffffffff, 0x00000100,
1048 	0x1024, 0xffffffff, 0x00000100,
1049 	0x20a8, 0xffffffff, 0x00000104,
1050 	0x264c, 0x000c0000, 0x000c0000,
1051 	0x2648, 0x000c0000, 0x000c0000,
1052 	0x2f50, 0x00000001, 0x00000001,
1053 	0x30cc, 0xc0000fff, 0x00000104,
1054 	0xc1e4, 0x00000001, 0x00000001,
1055 	0xd0c0, 0xfffffff0, 0x00000100,
1056 	0xd8c0, 0xfffffff0, 0x00000100
1057 };
1058 
1059 static u32 verde_pg_init[] =
1060 {
1061 	0x353c, 0xffffffff, 0x40000,
1062 	0x3538, 0xffffffff, 0x200010ff,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x7007,
1069 	0x3538, 0xffffffff, 0x300010ff,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x400000,
1076 	0x3538, 0xffffffff, 0x100010ff,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x120200,
1083 	0x3538, 0xffffffff, 0x500010ff,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x1e1e16,
1090 	0x3538, 0xffffffff, 0x600010ff,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x171f1e,
1097 	0x3538, 0xffffffff, 0x700010ff,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x3538, 0xffffffff, 0x9ff,
1105 	0x3500, 0xffffffff, 0x0,
1106 	0x3504, 0xffffffff, 0x10000800,
1107 	0x3504, 0xffffffff, 0xf,
1108 	0x3504, 0xffffffff, 0xf,
1109 	0x3500, 0xffffffff, 0x4,
1110 	0x3504, 0xffffffff, 0x1000051e,
1111 	0x3504, 0xffffffff, 0xffff,
1112 	0x3504, 0xffffffff, 0xffff,
1113 	0x3500, 0xffffffff, 0x8,
1114 	0x3504, 0xffffffff, 0x80500,
1115 	0x3500, 0xffffffff, 0x12,
1116 	0x3504, 0xffffffff, 0x9050c,
1117 	0x3500, 0xffffffff, 0x1d,
1118 	0x3504, 0xffffffff, 0xb052c,
1119 	0x3500, 0xffffffff, 0x2a,
1120 	0x3504, 0xffffffff, 0x1053e,
1121 	0x3500, 0xffffffff, 0x2d,
1122 	0x3504, 0xffffffff, 0x10546,
1123 	0x3500, 0xffffffff, 0x30,
1124 	0x3504, 0xffffffff, 0xa054e,
1125 	0x3500, 0xffffffff, 0x3c,
1126 	0x3504, 0xffffffff, 0x1055f,
1127 	0x3500, 0xffffffff, 0x3f,
1128 	0x3504, 0xffffffff, 0x10567,
1129 	0x3500, 0xffffffff, 0x42,
1130 	0x3504, 0xffffffff, 0x1056f,
1131 	0x3500, 0xffffffff, 0x45,
1132 	0x3504, 0xffffffff, 0x10572,
1133 	0x3500, 0xffffffff, 0x48,
1134 	0x3504, 0xffffffff, 0x20575,
1135 	0x3500, 0xffffffff, 0x4c,
1136 	0x3504, 0xffffffff, 0x190801,
1137 	0x3500, 0xffffffff, 0x67,
1138 	0x3504, 0xffffffff, 0x1082a,
1139 	0x3500, 0xffffffff, 0x6a,
1140 	0x3504, 0xffffffff, 0x1b082d,
1141 	0x3500, 0xffffffff, 0x87,
1142 	0x3504, 0xffffffff, 0x310851,
1143 	0x3500, 0xffffffff, 0xba,
1144 	0x3504, 0xffffffff, 0x891,
1145 	0x3500, 0xffffffff, 0xbc,
1146 	0x3504, 0xffffffff, 0x893,
1147 	0x3500, 0xffffffff, 0xbe,
1148 	0x3504, 0xffffffff, 0x20895,
1149 	0x3500, 0xffffffff, 0xc2,
1150 	0x3504, 0xffffffff, 0x20899,
1151 	0x3500, 0xffffffff, 0xc6,
1152 	0x3504, 0xffffffff, 0x2089d,
1153 	0x3500, 0xffffffff, 0xca,
1154 	0x3504, 0xffffffff, 0x8a1,
1155 	0x3500, 0xffffffff, 0xcc,
1156 	0x3504, 0xffffffff, 0x8a3,
1157 	0x3500, 0xffffffff, 0xce,
1158 	0x3504, 0xffffffff, 0x308a5,
1159 	0x3500, 0xffffffff, 0xd3,
1160 	0x3504, 0xffffffff, 0x6d08cd,
1161 	0x3500, 0xffffffff, 0x142,
1162 	0x3504, 0xffffffff, 0x2000095a,
1163 	0x3504, 0xffffffff, 0x1,
1164 	0x3500, 0xffffffff, 0x144,
1165 	0x3504, 0xffffffff, 0x301f095b,
1166 	0x3500, 0xffffffff, 0x165,
1167 	0x3504, 0xffffffff, 0xc094d,
1168 	0x3500, 0xffffffff, 0x173,
1169 	0x3504, 0xffffffff, 0xf096d,
1170 	0x3500, 0xffffffff, 0x184,
1171 	0x3504, 0xffffffff, 0x15097f,
1172 	0x3500, 0xffffffff, 0x19b,
1173 	0x3504, 0xffffffff, 0xc0998,
1174 	0x3500, 0xffffffff, 0x1a9,
1175 	0x3504, 0xffffffff, 0x409a7,
1176 	0x3500, 0xffffffff, 0x1af,
1177 	0x3504, 0xffffffff, 0xcdc,
1178 	0x3500, 0xffffffff, 0x1b1,
1179 	0x3504, 0xffffffff, 0x800,
1180 	0x3508, 0xffffffff, 0x6c9b2000,
1181 	0x3510, 0xfc00, 0x2000,
1182 	0x3544, 0xffffffff, 0xfc0,
1183 	0x28d4, 0x00000100, 0x100
1184 };
1185 
1186 static void si_init_golden_registers(struct radeon_device *rdev)
1187 {
1188 	switch (rdev->family) {
1189 	case CHIP_TAHITI:
1190 		radeon_program_register_sequence(rdev,
1191 						 tahiti_golden_registers,
1192 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1193 		radeon_program_register_sequence(rdev,
1194 						 tahiti_golden_rlc_registers,
1195 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 tahiti_mgcg_cgcg_init,
1198 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1199 		radeon_program_register_sequence(rdev,
1200 						 tahiti_golden_registers2,
1201 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1202 		break;
1203 	case CHIP_PITCAIRN:
1204 		radeon_program_register_sequence(rdev,
1205 						 pitcairn_golden_registers,
1206 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 pitcairn_golden_rlc_registers,
1209 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 pitcairn_mgcg_cgcg_init,
1212 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1213 		break;
1214 	case CHIP_VERDE:
1215 		radeon_program_register_sequence(rdev,
1216 						 verde_golden_registers,
1217 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 verde_golden_rlc_registers,
1220 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1221 		radeon_program_register_sequence(rdev,
1222 						 verde_mgcg_cgcg_init,
1223 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1224 		radeon_program_register_sequence(rdev,
1225 						 verde_pg_init,
1226 						 (const u32)ARRAY_SIZE(verde_pg_init));
1227 		break;
1228 	case CHIP_OLAND:
1229 		radeon_program_register_sequence(rdev,
1230 						 oland_golden_registers,
1231 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1232 		radeon_program_register_sequence(rdev,
1233 						 oland_golden_rlc_registers,
1234 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 oland_mgcg_cgcg_init,
1237 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1238 		break;
1239 	case CHIP_HAINAN:
1240 		radeon_program_register_sequence(rdev,
1241 						 hainan_golden_registers,
1242 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 hainan_golden_registers2,
1245 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1246 		radeon_program_register_sequence(rdev,
1247 						 hainan_mgcg_cgcg_init,
1248 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1249 		break;
1250 	default:
1251 		break;
1252 	}
1253 }
1254 
1255 #define PCIE_BUS_CLK                10000
1256 #define TCLK                        (PCIE_BUS_CLK / 10)
1257 
1258 /**
1259  * si_get_xclk - get the xclk
1260  *
1261  * @rdev: radeon_device pointer
1262  *
1263  * Returns the reference clock used by the gfx engine
1264  * (SI).
1265  */
1266 u32 si_get_xclk(struct radeon_device *rdev)
1267 {
1268         u32 reference_clock = rdev->clock.spll.reference_freq;
1269 	u32 tmp;
1270 
1271 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1272 	if (tmp & MUX_TCLK_TO_XCLK)
1273 		return TCLK;
1274 
1275 	tmp = RREG32(CG_CLKPIN_CNTL);
1276 	if (tmp & XTALIN_DIVIDE)
1277 		return reference_clock / 4;
1278 
1279 	return reference_clock;
1280 }
1281 
1282 /* get temperature in millidegrees */
1283 int si_get_temp(struct radeon_device *rdev)
1284 {
1285 	u32 temp;
1286 	int actual_temp = 0;
1287 
1288 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1289 		CTF_TEMP_SHIFT;
1290 
1291 	if (temp & 0x200)
1292 		actual_temp = 255;
1293 	else
1294 		actual_temp = temp & 0x1ff;
1295 
1296 	actual_temp = (actual_temp * 1000);
1297 
1298 	return actual_temp;
1299 }
1300 
1301 #define TAHITI_IO_MC_REGS_SIZE 36
1302 
1303 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1304 	{0x0000006f, 0x03044000},
1305 	{0x00000070, 0x0480c018},
1306 	{0x00000071, 0x00000040},
1307 	{0x00000072, 0x01000000},
1308 	{0x00000074, 0x000000ff},
1309 	{0x00000075, 0x00143400},
1310 	{0x00000076, 0x08ec0800},
1311 	{0x00000077, 0x040000cc},
1312 	{0x00000079, 0x00000000},
1313 	{0x0000007a, 0x21000409},
1314 	{0x0000007c, 0x00000000},
1315 	{0x0000007d, 0xe8000000},
1316 	{0x0000007e, 0x044408a8},
1317 	{0x0000007f, 0x00000003},
1318 	{0x00000080, 0x00000000},
1319 	{0x00000081, 0x01000000},
1320 	{0x00000082, 0x02000000},
1321 	{0x00000083, 0x00000000},
1322 	{0x00000084, 0xe3f3e4f4},
1323 	{0x00000085, 0x00052024},
1324 	{0x00000087, 0x00000000},
1325 	{0x00000088, 0x66036603},
1326 	{0x00000089, 0x01000000},
1327 	{0x0000008b, 0x1c0a0000},
1328 	{0x0000008c, 0xff010000},
1329 	{0x0000008e, 0xffffefff},
1330 	{0x0000008f, 0xfff3efff},
1331 	{0x00000090, 0xfff3efbf},
1332 	{0x00000094, 0x00101101},
1333 	{0x00000095, 0x00000fff},
1334 	{0x00000096, 0x00116fff},
1335 	{0x00000097, 0x60010000},
1336 	{0x00000098, 0x10010000},
1337 	{0x00000099, 0x00006000},
1338 	{0x0000009a, 0x00001000},
1339 	{0x0000009f, 0x00a77400}
1340 };
1341 
1342 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1343 	{0x0000006f, 0x03044000},
1344 	{0x00000070, 0x0480c018},
1345 	{0x00000071, 0x00000040},
1346 	{0x00000072, 0x01000000},
1347 	{0x00000074, 0x000000ff},
1348 	{0x00000075, 0x00143400},
1349 	{0x00000076, 0x08ec0800},
1350 	{0x00000077, 0x040000cc},
1351 	{0x00000079, 0x00000000},
1352 	{0x0000007a, 0x21000409},
1353 	{0x0000007c, 0x00000000},
1354 	{0x0000007d, 0xe8000000},
1355 	{0x0000007e, 0x044408a8},
1356 	{0x0000007f, 0x00000003},
1357 	{0x00000080, 0x00000000},
1358 	{0x00000081, 0x01000000},
1359 	{0x00000082, 0x02000000},
1360 	{0x00000083, 0x00000000},
1361 	{0x00000084, 0xe3f3e4f4},
1362 	{0x00000085, 0x00052024},
1363 	{0x00000087, 0x00000000},
1364 	{0x00000088, 0x66036603},
1365 	{0x00000089, 0x01000000},
1366 	{0x0000008b, 0x1c0a0000},
1367 	{0x0000008c, 0xff010000},
1368 	{0x0000008e, 0xffffefff},
1369 	{0x0000008f, 0xfff3efff},
1370 	{0x00000090, 0xfff3efbf},
1371 	{0x00000094, 0x00101101},
1372 	{0x00000095, 0x00000fff},
1373 	{0x00000096, 0x00116fff},
1374 	{0x00000097, 0x60010000},
1375 	{0x00000098, 0x10010000},
1376 	{0x00000099, 0x00006000},
1377 	{0x0000009a, 0x00001000},
1378 	{0x0000009f, 0x00a47400}
1379 };
1380 
1381 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1382 	{0x0000006f, 0x03044000},
1383 	{0x00000070, 0x0480c018},
1384 	{0x00000071, 0x00000040},
1385 	{0x00000072, 0x01000000},
1386 	{0x00000074, 0x000000ff},
1387 	{0x00000075, 0x00143400},
1388 	{0x00000076, 0x08ec0800},
1389 	{0x00000077, 0x040000cc},
1390 	{0x00000079, 0x00000000},
1391 	{0x0000007a, 0x21000409},
1392 	{0x0000007c, 0x00000000},
1393 	{0x0000007d, 0xe8000000},
1394 	{0x0000007e, 0x044408a8},
1395 	{0x0000007f, 0x00000003},
1396 	{0x00000080, 0x00000000},
1397 	{0x00000081, 0x01000000},
1398 	{0x00000082, 0x02000000},
1399 	{0x00000083, 0x00000000},
1400 	{0x00000084, 0xe3f3e4f4},
1401 	{0x00000085, 0x00052024},
1402 	{0x00000087, 0x00000000},
1403 	{0x00000088, 0x66036603},
1404 	{0x00000089, 0x01000000},
1405 	{0x0000008b, 0x1c0a0000},
1406 	{0x0000008c, 0xff010000},
1407 	{0x0000008e, 0xffffefff},
1408 	{0x0000008f, 0xfff3efff},
1409 	{0x00000090, 0xfff3efbf},
1410 	{0x00000094, 0x00101101},
1411 	{0x00000095, 0x00000fff},
1412 	{0x00000096, 0x00116fff},
1413 	{0x00000097, 0x60010000},
1414 	{0x00000098, 0x10010000},
1415 	{0x00000099, 0x00006000},
1416 	{0x0000009a, 0x00001000},
1417 	{0x0000009f, 0x00a37400}
1418 };
1419 
1420 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1421 	{0x0000006f, 0x03044000},
1422 	{0x00000070, 0x0480c018},
1423 	{0x00000071, 0x00000040},
1424 	{0x00000072, 0x01000000},
1425 	{0x00000074, 0x000000ff},
1426 	{0x00000075, 0x00143400},
1427 	{0x00000076, 0x08ec0800},
1428 	{0x00000077, 0x040000cc},
1429 	{0x00000079, 0x00000000},
1430 	{0x0000007a, 0x21000409},
1431 	{0x0000007c, 0x00000000},
1432 	{0x0000007d, 0xe8000000},
1433 	{0x0000007e, 0x044408a8},
1434 	{0x0000007f, 0x00000003},
1435 	{0x00000080, 0x00000000},
1436 	{0x00000081, 0x01000000},
1437 	{0x00000082, 0x02000000},
1438 	{0x00000083, 0x00000000},
1439 	{0x00000084, 0xe3f3e4f4},
1440 	{0x00000085, 0x00052024},
1441 	{0x00000087, 0x00000000},
1442 	{0x00000088, 0x66036603},
1443 	{0x00000089, 0x01000000},
1444 	{0x0000008b, 0x1c0a0000},
1445 	{0x0000008c, 0xff010000},
1446 	{0x0000008e, 0xffffefff},
1447 	{0x0000008f, 0xfff3efff},
1448 	{0x00000090, 0xfff3efbf},
1449 	{0x00000094, 0x00101101},
1450 	{0x00000095, 0x00000fff},
1451 	{0x00000096, 0x00116fff},
1452 	{0x00000097, 0x60010000},
1453 	{0x00000098, 0x10010000},
1454 	{0x00000099, 0x00006000},
1455 	{0x0000009a, 0x00001000},
1456 	{0x0000009f, 0x00a17730}
1457 };
1458 
1459 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1460 	{0x0000006f, 0x03044000},
1461 	{0x00000070, 0x0480c018},
1462 	{0x00000071, 0x00000040},
1463 	{0x00000072, 0x01000000},
1464 	{0x00000074, 0x000000ff},
1465 	{0x00000075, 0x00143400},
1466 	{0x00000076, 0x08ec0800},
1467 	{0x00000077, 0x040000cc},
1468 	{0x00000079, 0x00000000},
1469 	{0x0000007a, 0x21000409},
1470 	{0x0000007c, 0x00000000},
1471 	{0x0000007d, 0xe8000000},
1472 	{0x0000007e, 0x044408a8},
1473 	{0x0000007f, 0x00000003},
1474 	{0x00000080, 0x00000000},
1475 	{0x00000081, 0x01000000},
1476 	{0x00000082, 0x02000000},
1477 	{0x00000083, 0x00000000},
1478 	{0x00000084, 0xe3f3e4f4},
1479 	{0x00000085, 0x00052024},
1480 	{0x00000087, 0x00000000},
1481 	{0x00000088, 0x66036603},
1482 	{0x00000089, 0x01000000},
1483 	{0x0000008b, 0x1c0a0000},
1484 	{0x0000008c, 0xff010000},
1485 	{0x0000008e, 0xffffefff},
1486 	{0x0000008f, 0xfff3efff},
1487 	{0x00000090, 0xfff3efbf},
1488 	{0x00000094, 0x00101101},
1489 	{0x00000095, 0x00000fff},
1490 	{0x00000096, 0x00116fff},
1491 	{0x00000097, 0x60010000},
1492 	{0x00000098, 0x10010000},
1493 	{0x00000099, 0x00006000},
1494 	{0x0000009a, 0x00001000},
1495 	{0x0000009f, 0x00a07730}
1496 };
1497 
1498 /* ucode loading */
1499 int si_mc_load_microcode(struct radeon_device *rdev)
1500 {
1501 	const __be32 *fw_data = NULL;
1502 	const __le32 *new_fw_data = NULL;
1503 	u32 running, blackout = 0;
1504 	u32 *io_mc_regs = NULL;
1505 	const __le32 *new_io_mc_regs = NULL;
1506 	int i, regs_size, ucode_size;
1507 
1508 	if (!rdev->mc_fw)
1509 		return -EINVAL;
1510 
1511 	if (rdev->new_fw) {
1512 		const struct mc_firmware_header_v1_0 *hdr =
1513 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1514 
1515 		radeon_ucode_print_mc_hdr(&hdr->header);
1516 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1517 		new_io_mc_regs = (const __le32 *)
1518 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1519 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1520 		new_fw_data = (const __le32 *)
1521 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522 	} else {
1523 		ucode_size = rdev->mc_fw->datasize / 4;
1524 
1525 		switch (rdev->family) {
1526 		case CHIP_TAHITI:
1527 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1528 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1529 			break;
1530 		case CHIP_PITCAIRN:
1531 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1532 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1533 			break;
1534 		case CHIP_VERDE:
1535 		default:
1536 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1537 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1538 			break;
1539 		case CHIP_OLAND:
1540 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1541 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1542 			break;
1543 		case CHIP_HAINAN:
1544 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1545 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1546 			break;
1547 		}
1548 		fw_data = (const __be32 *)rdev->mc_fw->data;
1549 	}
1550 
1551 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1552 
1553 	if (running == 0) {
1554 		if (running) {
1555 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1556 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1557 		}
1558 
1559 		/* reset the engine and set to writable */
1560 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1561 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1562 
1563 		/* load mc io regs */
1564 		for (i = 0; i < regs_size; i++) {
1565 			if (rdev->new_fw) {
1566 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1567 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1568 			} else {
1569 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1570 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1571 			}
1572 		}
1573 		/* load the MC ucode */
1574 		for (i = 0; i < ucode_size; i++) {
1575 			if (rdev->new_fw)
1576 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1577 			else
1578 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1579 		}
1580 
1581 		/* put the engine back into the active state */
1582 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1583 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1584 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1585 
1586 		/* wait for training to complete */
1587 		for (i = 0; i < rdev->usec_timeout; i++) {
1588 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1589 				break;
1590 			udelay(1);
1591 		}
1592 		for (i = 0; i < rdev->usec_timeout; i++) {
1593 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1594 				break;
1595 			udelay(1);
1596 		}
1597 
1598 		if (running)
1599 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1600 	}
1601 
1602 	return 0;
1603 }
1604 
1605 static int si_init_microcode(struct radeon_device *rdev)
1606 {
1607 	const char *chip_name;
1608 	const char *new_chip_name;
1609 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1610 	size_t smc_req_size, mc2_req_size;
1611 	char fw_name[30];
1612 	int err;
1613 	int new_fw = 0;
1614 
1615 	DRM_DEBUG("\n");
1616 
1617 	switch (rdev->family) {
1618 	case CHIP_TAHITI:
1619 		chip_name = "TAHITI";
1620 		new_chip_name = "tahiti";
1621 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1622 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1623 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1624 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1625 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1626 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1627 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1628 		break;
1629 	case CHIP_PITCAIRN:
1630 		chip_name = "PITCAIRN";
1631 		new_chip_name = "pitcairn";
1632 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1633 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1634 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1635 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1636 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1637 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1638 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1639 		break;
1640 	case CHIP_VERDE:
1641 		chip_name = "VERDE";
1642 		new_chip_name = "verde";
1643 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1644 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1645 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1646 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1647 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1648 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1649 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1650 		break;
1651 	case CHIP_OLAND:
1652 		chip_name = "OLAND";
1653 		new_chip_name = "oland";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1659 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1660 		break;
1661 	case CHIP_HAINAN:
1662 		chip_name = "HAINAN";
1663 		new_chip_name = "hainan";
1664 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1666 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1667 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1669 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1670 		break;
1671 	default: BUG();
1672 	}
1673 
1674 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1675 
1676 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1677 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1678 	if (err) {
1679 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1680 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1681 		if (err)
1682 			goto out;
1683 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1684 			printk(KERN_ERR
1685 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1686 			       rdev->pfp_fw->datasize, fw_name);
1687 			err = -EINVAL;
1688 			goto out;
1689 		}
1690 	} else {
1691 		err = radeon_ucode_validate(rdev->pfp_fw);
1692 		if (err) {
1693 			printk(KERN_ERR
1694 			       "si_cp: validation failed for firmware \"%s\"\n",
1695 			       fw_name);
1696 			goto out;
1697 		} else {
1698 			new_fw++;
1699 		}
1700 	}
1701 
1702 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1703 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1704 	if (err) {
1705 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1706 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1707 		if (err)
1708 			goto out;
1709 		if (rdev->me_fw->datasize != me_req_size) {
1710 			printk(KERN_ERR
1711 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1712 			       rdev->me_fw->datasize, fw_name);
1713 			err = -EINVAL;
1714 		}
1715 	} else {
1716 		err = radeon_ucode_validate(rdev->me_fw);
1717 		if (err) {
1718 			printk(KERN_ERR
1719 			       "si_cp: validation failed for firmware \"%s\"\n",
1720 			       fw_name);
1721 			goto out;
1722 		} else {
1723 			new_fw++;
1724 		}
1725 	}
1726 
1727 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1728 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1729 	if (err) {
1730 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1731 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1732 		if (err)
1733 			goto out;
1734 		if (rdev->ce_fw->datasize != ce_req_size) {
1735 			printk(KERN_ERR
1736 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1737 			       rdev->ce_fw->datasize, fw_name);
1738 			err = -EINVAL;
1739 		}
1740 	} else {
1741 		err = radeon_ucode_validate(rdev->ce_fw);
1742 		if (err) {
1743 			printk(KERN_ERR
1744 			       "si_cp: validation failed for firmware \"%s\"\n",
1745 			       fw_name);
1746 			goto out;
1747 		} else {
1748 			new_fw++;
1749 		}
1750 	}
1751 
1752 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1753 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1754 	if (err) {
1755 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1756 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1757 		if (err)
1758 			goto out;
1759 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1760 			printk(KERN_ERR
1761 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1762 			       rdev->rlc_fw->datasize, fw_name);
1763 			err = -EINVAL;
1764 		}
1765 	} else {
1766 		err = radeon_ucode_validate(rdev->rlc_fw);
1767 		if (err) {
1768 			printk(KERN_ERR
1769 			       "si_cp: validation failed for firmware \"%s\"\n",
1770 			       fw_name);
1771 			goto out;
1772 		} else {
1773 			new_fw++;
1774 		}
1775 	}
1776 
1777 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1778 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1779 	if (err) {
1780 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1781 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1782 		if (err) {
1783 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1784 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1785 			if (err)
1786 				goto out;
1787 		}
1788 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1789 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1790 			printk(KERN_ERR
1791 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1792 			       rdev->mc_fw->datasize, fw_name);
1793 			err = -EINVAL;
1794 		}
1795 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1796 	} else {
1797 		err = radeon_ucode_validate(rdev->mc_fw);
1798 		if (err) {
1799 			printk(KERN_ERR
1800 			       "si_cp: validation failed for firmware \"%s\"\n",
1801 			       fw_name);
1802 			goto out;
1803 		} else {
1804 			new_fw++;
1805 		}
1806 	}
1807 
1808 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1809 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1810 	if (err) {
1811 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1812 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1813 		if (err) {
1814 			printk(KERN_ERR
1815 			       "smc: error loading firmware \"%s\"\n",
1816 			       fw_name);
1817 			release_firmware(rdev->smc_fw);
1818 			rdev->smc_fw = NULL;
1819 			err = 0;
1820 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1821 			printk(KERN_ERR
1822 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1823 			       rdev->smc_fw->datasize, fw_name);
1824 			err = -EINVAL;
1825 		}
1826 	} else {
1827 		err = radeon_ucode_validate(rdev->smc_fw);
1828 		if (err) {
1829 			printk(KERN_ERR
1830 			       "si_cp: validation failed for firmware \"%s\"\n",
1831 			       fw_name);
1832 			goto out;
1833 		} else {
1834 			new_fw++;
1835 		}
1836 	}
1837 
1838 	if (new_fw == 0) {
1839 		rdev->new_fw = false;
1840 	} else if (new_fw < 6) {
1841 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1842 		err = -EINVAL;
1843 	} else {
1844 		rdev->new_fw = true;
1845 	}
1846 out:
1847 	if (err) {
1848 		if (err != -EINVAL)
1849 			printk(KERN_ERR
1850 			       "si_cp: Failed to load firmware \"%s\"\n",
1851 			       fw_name);
1852 		release_firmware(rdev->pfp_fw);
1853 		rdev->pfp_fw = NULL;
1854 		release_firmware(rdev->me_fw);
1855 		rdev->me_fw = NULL;
1856 		release_firmware(rdev->ce_fw);
1857 		rdev->ce_fw = NULL;
1858 		release_firmware(rdev->rlc_fw);
1859 		rdev->rlc_fw = NULL;
1860 		release_firmware(rdev->mc_fw);
1861 		rdev->mc_fw = NULL;
1862 		release_firmware(rdev->smc_fw);
1863 		rdev->smc_fw = NULL;
1864 	}
1865 	return err;
1866 }
1867 
1868 /**
1869  * si_fini_microcode - drop the firmwares image references
1870  *
1871  * @rdev: radeon_device pointer
1872  *
1873  * Drop the pfp, me, rlc, mc and ce firmware image references.
1874  * Called at driver shutdown.
1875  */
1876 static void si_fini_microcode(struct radeon_device *rdev)
1877 {
1878 	release_firmware(rdev->pfp_fw);
1879 	rdev->pfp_fw = NULL;
1880 	release_firmware(rdev->me_fw);
1881 	rdev->me_fw = NULL;
1882 	release_firmware(rdev->rlc_fw);
1883 	rdev->rlc_fw = NULL;
1884 	release_firmware(rdev->mc_fw);
1885 	rdev->mc_fw = NULL;
1886 	release_firmware(rdev->smc_fw);
1887 	rdev->smc_fw = NULL;
1888 	release_firmware(rdev->ce_fw);
1889 	rdev->ce_fw = NULL;
1890 }
1891 
1892 /* watermark setup */
1893 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1894 				   struct radeon_crtc *radeon_crtc,
1895 				   struct drm_display_mode *mode,
1896 				   struct drm_display_mode *other_mode)
1897 {
1898 	u32 tmp, buffer_alloc, i;
1899 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1900 	/*
1901 	 * Line Buffer Setup
1902 	 * There are 3 line buffers, each one shared by 2 display controllers.
1903 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1904 	 * the display controllers.  The paritioning is done via one of four
1905 	 * preset allocations specified in bits 21:20:
1906 	 *  0 - half lb
1907 	 *  2 - whole lb, other crtc must be disabled
1908 	 */
1909 	/* this can get tricky if we have two large displays on a paired group
1910 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1911 	 * non-linked crtcs for maximum line buffer allocation.
1912 	 */
1913 	if (radeon_crtc->base.enabled && mode) {
1914 		if (other_mode) {
1915 			tmp = 0; /* 1/2 */
1916 			buffer_alloc = 1;
1917 		} else {
1918 			tmp = 2; /* whole */
1919 			buffer_alloc = 2;
1920 		}
1921 	} else {
1922 		tmp = 0;
1923 		buffer_alloc = 0;
1924 	}
1925 
1926 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1927 	       DC_LB_MEMORY_CONFIG(tmp));
1928 
1929 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1930 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1931 	for (i = 0; i < rdev->usec_timeout; i++) {
1932 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1933 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1934 			break;
1935 		udelay(1);
1936 	}
1937 
1938 	if (radeon_crtc->base.enabled && mode) {
1939 		switch (tmp) {
1940 		case 0:
1941 		default:
1942 			return 4096 * 2;
1943 		case 2:
1944 			return 8192 * 2;
1945 		}
1946 	}
1947 
1948 	/* controller not enabled, so no lb used */
1949 	return 0;
1950 }
1951 
1952 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1953 {
1954 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1955 
1956 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1957 	case 0:
1958 	default:
1959 		return 1;
1960 	case 1:
1961 		return 2;
1962 	case 2:
1963 		return 4;
1964 	case 3:
1965 		return 8;
1966 	case 4:
1967 		return 3;
1968 	case 5:
1969 		return 6;
1970 	case 6:
1971 		return 10;
1972 	case 7:
1973 		return 12;
1974 	case 8:
1975 		return 16;
1976 	}
1977 }
1978 
1979 struct dce6_wm_params {
1980 	u32 dram_channels; /* number of dram channels */
1981 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1982 	u32 sclk;          /* engine clock in kHz */
1983 	u32 disp_clk;      /* display clock in kHz */
1984 	u32 src_width;     /* viewport width */
1985 	u32 active_time;   /* active display time in ns */
1986 	u32 blank_time;    /* blank time in ns */
1987 	bool interlaced;    /* mode is interlaced */
1988 	fixed20_12 vsc;    /* vertical scale ratio */
1989 	u32 num_heads;     /* number of active crtcs */
1990 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1991 	u32 lb_size;       /* line buffer allocated to pipe */
1992 	u32 vtaps;         /* vertical scaler taps */
1993 };
1994 
1995 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1996 {
1997 	/* Calculate raw DRAM Bandwidth */
1998 	fixed20_12 dram_efficiency; /* 0.7 */
1999 	fixed20_12 yclk, dram_channels, bandwidth;
2000 	fixed20_12 a;
2001 
2002 	a.full = dfixed_const(1000);
2003 	yclk.full = dfixed_const(wm->yclk);
2004 	yclk.full = dfixed_div(yclk, a);
2005 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2006 	a.full = dfixed_const(10);
2007 	dram_efficiency.full = dfixed_const(7);
2008 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2009 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2010 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2011 
2012 	return dfixed_trunc(bandwidth);
2013 }
2014 
2015 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2016 {
2017 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2018 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2019 	fixed20_12 yclk, dram_channels, bandwidth;
2020 	fixed20_12 a;
2021 
2022 	a.full = dfixed_const(1000);
2023 	yclk.full = dfixed_const(wm->yclk);
2024 	yclk.full = dfixed_div(yclk, a);
2025 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2026 	a.full = dfixed_const(10);
2027 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2028 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2029 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2030 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2031 
2032 	return dfixed_trunc(bandwidth);
2033 }
2034 
2035 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2036 {
2037 	/* Calculate the display Data return Bandwidth */
2038 	fixed20_12 return_efficiency; /* 0.8 */
2039 	fixed20_12 sclk, bandwidth;
2040 	fixed20_12 a;
2041 
2042 	a.full = dfixed_const(1000);
2043 	sclk.full = dfixed_const(wm->sclk);
2044 	sclk.full = dfixed_div(sclk, a);
2045 	a.full = dfixed_const(10);
2046 	return_efficiency.full = dfixed_const(8);
2047 	return_efficiency.full = dfixed_div(return_efficiency, a);
2048 	a.full = dfixed_const(32);
2049 	bandwidth.full = dfixed_mul(a, sclk);
2050 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2051 
2052 	return dfixed_trunc(bandwidth);
2053 }
2054 
2055 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2056 {
2057 	return 32;
2058 }
2059 
2060 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2061 {
2062 	/* Calculate the DMIF Request Bandwidth */
2063 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2064 	fixed20_12 disp_clk, sclk, bandwidth;
2065 	fixed20_12 a, b1, b2;
2066 	u32 min_bandwidth;
2067 
2068 	a.full = dfixed_const(1000);
2069 	disp_clk.full = dfixed_const(wm->disp_clk);
2070 	disp_clk.full = dfixed_div(disp_clk, a);
2071 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2072 	b1.full = dfixed_mul(a, disp_clk);
2073 
2074 	a.full = dfixed_const(1000);
2075 	sclk.full = dfixed_const(wm->sclk);
2076 	sclk.full = dfixed_div(sclk, a);
2077 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2078 	b2.full = dfixed_mul(a, sclk);
2079 
2080 	a.full = dfixed_const(10);
2081 	disp_clk_request_efficiency.full = dfixed_const(8);
2082 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2083 
2084 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2085 
2086 	a.full = dfixed_const(min_bandwidth);
2087 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2088 
2089 	return dfixed_trunc(bandwidth);
2090 }
2091 
2092 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2093 {
2094 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2095 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2096 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2097 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2098 
2099 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2100 }
2101 
2102 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2103 {
2104 	/* Calculate the display mode Average Bandwidth
2105 	 * DisplayMode should contain the source and destination dimensions,
2106 	 * timing, etc.
2107 	 */
2108 	fixed20_12 bpp;
2109 	fixed20_12 line_time;
2110 	fixed20_12 src_width;
2111 	fixed20_12 bandwidth;
2112 	fixed20_12 a;
2113 
2114 	a.full = dfixed_const(1000);
2115 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2116 	line_time.full = dfixed_div(line_time, a);
2117 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2118 	src_width.full = dfixed_const(wm->src_width);
2119 	bandwidth.full = dfixed_mul(src_width, bpp);
2120 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2121 	bandwidth.full = dfixed_div(bandwidth, line_time);
2122 
2123 	return dfixed_trunc(bandwidth);
2124 }
2125 
2126 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2127 {
2128 	/* First calcualte the latency in ns */
2129 	u32 mc_latency = 2000; /* 2000 ns. */
2130 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2131 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2132 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2133 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2134 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2135 		(wm->num_heads * cursor_line_pair_return_time);
2136 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2137 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2138 	u32 tmp, dmif_size = 12288;
2139 	fixed20_12 a, b, c;
2140 
2141 	if (wm->num_heads == 0)
2142 		return 0;
2143 
2144 	a.full = dfixed_const(2);
2145 	b.full = dfixed_const(1);
2146 	if ((wm->vsc.full > a.full) ||
2147 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2148 	    (wm->vtaps >= 5) ||
2149 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2150 		max_src_lines_per_dst_line = 4;
2151 	else
2152 		max_src_lines_per_dst_line = 2;
2153 
2154 	a.full = dfixed_const(available_bandwidth);
2155 	b.full = dfixed_const(wm->num_heads);
2156 	a.full = dfixed_div(a, b);
2157 
2158 	b.full = dfixed_const(mc_latency + 512);
2159 	c.full = dfixed_const(wm->disp_clk);
2160 	b.full = dfixed_div(b, c);
2161 
2162 	c.full = dfixed_const(dmif_size);
2163 	b.full = dfixed_div(c, b);
2164 
2165 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2166 
2167 	b.full = dfixed_const(1000);
2168 	c.full = dfixed_const(wm->disp_clk);
2169 	b.full = dfixed_div(c, b);
2170 	c.full = dfixed_const(wm->bytes_per_pixel);
2171 	b.full = dfixed_mul(b, c);
2172 
2173 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2174 
2175 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2176 	b.full = dfixed_const(1000);
2177 	c.full = dfixed_const(lb_fill_bw);
2178 	b.full = dfixed_div(c, b);
2179 	a.full = dfixed_div(a, b);
2180 	line_fill_time = dfixed_trunc(a);
2181 
2182 	if (line_fill_time < wm->active_time)
2183 		return latency;
2184 	else
2185 		return latency + (line_fill_time - wm->active_time);
2186 
2187 }
2188 
2189 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2190 {
2191 	if (dce6_average_bandwidth(wm) <=
2192 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2193 		return true;
2194 	else
2195 		return false;
2196 };
2197 
2198 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2199 {
2200 	if (dce6_average_bandwidth(wm) <=
2201 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2202 		return true;
2203 	else
2204 		return false;
2205 };
2206 
2207 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2208 {
2209 	u32 lb_partitions = wm->lb_size / wm->src_width;
2210 	u32 line_time = wm->active_time + wm->blank_time;
2211 	u32 latency_tolerant_lines;
2212 	u32 latency_hiding;
2213 	fixed20_12 a;
2214 
2215 	a.full = dfixed_const(1);
2216 	if (wm->vsc.full > a.full)
2217 		latency_tolerant_lines = 1;
2218 	else {
2219 		if (lb_partitions <= (wm->vtaps + 1))
2220 			latency_tolerant_lines = 1;
2221 		else
2222 			latency_tolerant_lines = 2;
2223 	}
2224 
2225 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2226 
2227 	if (dce6_latency_watermark(wm) <= latency_hiding)
2228 		return true;
2229 	else
2230 		return false;
2231 }
2232 
2233 static void dce6_program_watermarks(struct radeon_device *rdev,
2234 					 struct radeon_crtc *radeon_crtc,
2235 					 u32 lb_size, u32 num_heads)
2236 {
2237 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2238 	struct dce6_wm_params wm_low, wm_high;
2239 	u32 dram_channels;
2240 	u32 pixel_period;
2241 	u32 line_time = 0;
2242 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2243 	u32 priority_a_mark = 0, priority_b_mark = 0;
2244 	u32 priority_a_cnt = PRIORITY_OFF;
2245 	u32 priority_b_cnt = PRIORITY_OFF;
2246 	u32 tmp, arb_control3;
2247 	fixed20_12 a, b, c;
2248 
2249 	if (radeon_crtc->base.enabled && num_heads && mode) {
2250 		pixel_period = 1000000 / (u32)mode->clock;
2251 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2252 		priority_a_cnt = 0;
2253 		priority_b_cnt = 0;
2254 
2255 		if (rdev->family == CHIP_ARUBA)
2256 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2257 		else
2258 			dram_channels = si_get_number_of_dram_channels(rdev);
2259 
2260 		/* watermark for high clocks */
2261 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2262 			wm_high.yclk =
2263 				radeon_dpm_get_mclk(rdev, false) * 10;
2264 			wm_high.sclk =
2265 				radeon_dpm_get_sclk(rdev, false) * 10;
2266 		} else {
2267 			wm_high.yclk = rdev->pm.current_mclk * 10;
2268 			wm_high.sclk = rdev->pm.current_sclk * 10;
2269 		}
2270 
2271 		wm_high.disp_clk = mode->clock;
2272 		wm_high.src_width = mode->crtc_hdisplay;
2273 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2274 		wm_high.blank_time = line_time - wm_high.active_time;
2275 		wm_high.interlaced = false;
2276 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2277 			wm_high.interlaced = true;
2278 		wm_high.vsc = radeon_crtc->vsc;
2279 		wm_high.vtaps = 1;
2280 		if (radeon_crtc->rmx_type != RMX_OFF)
2281 			wm_high.vtaps = 2;
2282 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2283 		wm_high.lb_size = lb_size;
2284 		wm_high.dram_channels = dram_channels;
2285 		wm_high.num_heads = num_heads;
2286 
2287 		/* watermark for low clocks */
2288 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2289 			wm_low.yclk =
2290 				radeon_dpm_get_mclk(rdev, true) * 10;
2291 			wm_low.sclk =
2292 				radeon_dpm_get_sclk(rdev, true) * 10;
2293 		} else {
2294 			wm_low.yclk = rdev->pm.current_mclk * 10;
2295 			wm_low.sclk = rdev->pm.current_sclk * 10;
2296 		}
2297 
2298 		wm_low.disp_clk = mode->clock;
2299 		wm_low.src_width = mode->crtc_hdisplay;
2300 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2301 		wm_low.blank_time = line_time - wm_low.active_time;
2302 		wm_low.interlaced = false;
2303 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2304 			wm_low.interlaced = true;
2305 		wm_low.vsc = radeon_crtc->vsc;
2306 		wm_low.vtaps = 1;
2307 		if (radeon_crtc->rmx_type != RMX_OFF)
2308 			wm_low.vtaps = 2;
2309 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2310 		wm_low.lb_size = lb_size;
2311 		wm_low.dram_channels = dram_channels;
2312 		wm_low.num_heads = num_heads;
2313 
2314 		/* set for high clocks */
2315 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2316 		/* set for low clocks */
2317 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2318 
2319 		/* possibly force display priority to high */
2320 		/* should really do this at mode validation time... */
2321 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2322 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2323 		    !dce6_check_latency_hiding(&wm_high) ||
2324 		    (rdev->disp_priority == 2)) {
2325 			DRM_DEBUG_KMS("force priority to high\n");
2326 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2327 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2328 		}
2329 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2330 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2331 		    !dce6_check_latency_hiding(&wm_low) ||
2332 		    (rdev->disp_priority == 2)) {
2333 			DRM_DEBUG_KMS("force priority to high\n");
2334 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2335 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2336 		}
2337 
2338 		a.full = dfixed_const(1000);
2339 		b.full = dfixed_const(mode->clock);
2340 		b.full = dfixed_div(b, a);
2341 		c.full = dfixed_const(latency_watermark_a);
2342 		c.full = dfixed_mul(c, b);
2343 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2344 		c.full = dfixed_div(c, a);
2345 		a.full = dfixed_const(16);
2346 		c.full = dfixed_div(c, a);
2347 		priority_a_mark = dfixed_trunc(c);
2348 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2349 
2350 		a.full = dfixed_const(1000);
2351 		b.full = dfixed_const(mode->clock);
2352 		b.full = dfixed_div(b, a);
2353 		c.full = dfixed_const(latency_watermark_b);
2354 		c.full = dfixed_mul(c, b);
2355 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2356 		c.full = dfixed_div(c, a);
2357 		a.full = dfixed_const(16);
2358 		c.full = dfixed_div(c, a);
2359 		priority_b_mark = dfixed_trunc(c);
2360 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2361 	}
2362 
2363 	/* select wm A */
2364 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2365 	tmp = arb_control3;
2366 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2367 	tmp |= LATENCY_WATERMARK_MASK(1);
2368 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2369 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2370 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2371 		LATENCY_HIGH_WATERMARK(line_time)));
2372 	/* select wm B */
2373 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2374 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2375 	tmp |= LATENCY_WATERMARK_MASK(2);
2376 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2377 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2378 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2379 		LATENCY_HIGH_WATERMARK(line_time)));
2380 	/* restore original selection */
2381 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2382 
2383 	/* write the priority marks */
2384 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2385 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2386 
2387 	/* save values for DPM */
2388 	radeon_crtc->line_time = line_time;
2389 	radeon_crtc->wm_high = latency_watermark_a;
2390 	radeon_crtc->wm_low = latency_watermark_b;
2391 }
2392 
2393 void dce6_bandwidth_update(struct radeon_device *rdev)
2394 {
2395 	struct drm_display_mode *mode0 = NULL;
2396 	struct drm_display_mode *mode1 = NULL;
2397 	u32 num_heads = 0, lb_size;
2398 	int i;
2399 
2400 	if (!rdev->mode_info.mode_config_initialized)
2401 		return;
2402 
2403 	radeon_update_display_priority(rdev);
2404 
2405 	for (i = 0; i < rdev->num_crtc; i++) {
2406 		if (rdev->mode_info.crtcs[i]->base.enabled)
2407 			num_heads++;
2408 	}
2409 	for (i = 0; i < rdev->num_crtc; i += 2) {
2410 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2411 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2412 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2413 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2414 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2415 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2416 	}
2417 }
2418 
2419 /*
2420  * Core functions
2421  */
2422 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2423 {
2424 	const u32 num_tile_mode_states = 32;
2425 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2426 
2427 	switch (rdev->config.si.mem_row_size_in_kb) {
2428 	case 1:
2429 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2430 		break;
2431 	case 2:
2432 	default:
2433 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2434 		break;
2435 	case 4:
2436 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2437 		break;
2438 	}
2439 
2440 	if ((rdev->family == CHIP_TAHITI) ||
2441 	    (rdev->family == CHIP_PITCAIRN)) {
2442 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:  /* non-AA compressed depth or any compressed stencil */
2445 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2447 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2448 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2449 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2450 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2453 				break;
2454 			case 1:  /* 2xAA/4xAA compressed depth only */
2455 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2457 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2458 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2459 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2460 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2463 				break;
2464 			case 2:  /* 8xAA compressed depth only */
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2468 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2469 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2470 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2473 				break;
2474 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2475 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2477 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2478 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2479 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2480 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2482 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2483 				break;
2484 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2485 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2486 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2488 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2490 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2493 				break;
2494 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2495 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2497 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2498 						 TILE_SPLIT(split_equal_to_row_size) |
2499 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2500 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2503 				break;
2504 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2505 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508 						 TILE_SPLIT(split_equal_to_row_size) |
2509 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2510 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2513 				break;
2514 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2515 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 						 TILE_SPLIT(split_equal_to_row_size) |
2519 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2520 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 				break;
2524 			case 8:  /* 1D and 1D Array Surfaces */
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2526 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2530 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533 				break;
2534 			case 9:  /* Displayable maps. */
2535 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2538 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2539 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2540 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2543 				break;
2544 			case 10:  /* Display 8bpp. */
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2549 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2550 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553 				break;
2554 			case 11:  /* Display 16bpp. */
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2559 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2560 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 				break;
2564 			case 12:  /* Display 32bpp. */
2565 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2567 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2569 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2570 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2573 				break;
2574 			case 13:  /* Thin. */
2575 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2578 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2579 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2580 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583 				break;
2584 			case 14:  /* Thin 8 bpp. */
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2588 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2590 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2593 				break;
2594 			case 15:  /* Thin 16 bpp. */
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2600 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2603 				break;
2604 			case 16:  /* Thin 32 bpp. */
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2609 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2610 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2613 				break;
2614 			case 17:  /* Thin 64 bpp. */
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618 						 TILE_SPLIT(split_equal_to_row_size) |
2619 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2620 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2623 				break;
2624 			case 21:  /* 8 bpp PRT. */
2625 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2627 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2628 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2629 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2630 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2631 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633 				break;
2634 			case 22:  /* 16 bpp PRT */
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2639 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2640 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2643 				break;
2644 			case 23:  /* 32 bpp PRT */
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2649 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2650 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653 				break;
2654 			case 24:  /* 64 bpp PRT */
2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2659 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2660 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663 				break;
2664 			case 25:  /* 128 bpp PRT */
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2669 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2670 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2673 				break;
2674 			default:
2675 				gb_tile_moden = 0;
2676 				break;
2677 			}
2678 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2679 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2680 		}
2681 	} else if ((rdev->family == CHIP_VERDE) ||
2682 		   (rdev->family == CHIP_OLAND) ||
2683 		   (rdev->family == CHIP_HAINAN)) {
2684 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2685 			switch (reg_offset) {
2686 			case 0:  /* non-AA compressed depth or any compressed stencil */
2687 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2689 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2691 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2692 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2695 				break;
2696 			case 1:  /* 2xAA/4xAA compressed depth only */
2697 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2702 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2705 				break;
2706 			case 2:  /* 8xAA compressed depth only */
2707 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2709 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2712 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2715 				break;
2716 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2717 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2719 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2721 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2722 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2725 				break;
2726 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2727 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2729 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2731 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2732 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2735 				break;
2736 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2737 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 						 TILE_SPLIT(split_equal_to_row_size) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2742 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2745 				break;
2746 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2747 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 						 TILE_SPLIT(split_equal_to_row_size) |
2751 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2752 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2755 				break;
2756 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2757 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 						 TILE_SPLIT(split_equal_to_row_size) |
2761 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2762 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765 				break;
2766 			case 8:  /* 1D and 1D Array Surfaces */
2767 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2769 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2772 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2775 				break;
2776 			case 9:  /* Displayable maps. */
2777 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2781 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2782 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2784 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2785 				break;
2786 			case 10:  /* Display 8bpp. */
2787 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2789 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2791 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2792 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2795 				break;
2796 			case 11:  /* Display 16bpp. */
2797 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2801 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2802 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2804 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2805 				break;
2806 			case 12:  /* Display 32bpp. */
2807 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2811 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2812 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2815 				break;
2816 			case 13:  /* Thin. */
2817 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2818 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2819 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2820 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2821 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2822 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2825 				break;
2826 			case 14:  /* Thin 8 bpp. */
2827 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2829 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2831 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2832 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2835 				break;
2836 			case 15:  /* Thin 16 bpp. */
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2841 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2842 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845 				break;
2846 			case 16:  /* Thin 32 bpp. */
2847 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2851 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2852 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 				break;
2856 			case 17:  /* Thin 64 bpp. */
2857 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2859 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 						 TILE_SPLIT(split_equal_to_row_size) |
2861 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2862 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2865 				break;
2866 			case 21:  /* 8 bpp PRT. */
2867 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2869 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2870 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2871 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2872 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2873 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2874 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2875 				break;
2876 			case 22:  /* 16 bpp PRT */
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2879 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2880 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2881 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2882 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2884 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2885 				break;
2886 			case 23:  /* 32 bpp PRT */
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2890 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2891 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2892 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2894 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2895 				break;
2896 			case 24:  /* 64 bpp PRT */
2897 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2902 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2904 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905 				break;
2906 			case 25:  /* 128 bpp PRT */
2907 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2909 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2910 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2911 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2912 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2913 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2914 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2915 				break;
2916 			default:
2917 				gb_tile_moden = 0;
2918 				break;
2919 			}
2920 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2921 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2922 		}
2923 	} else
2924 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2925 }
2926 
2927 static void si_select_se_sh(struct radeon_device *rdev,
2928 			    u32 se_num, u32 sh_num)
2929 {
2930 	u32 data = INSTANCE_BROADCAST_WRITES;
2931 
2932 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2933 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2934 	else if (se_num == 0xffffffff)
2935 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2936 	else if (sh_num == 0xffffffff)
2937 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2938 	else
2939 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2940 	WREG32(GRBM_GFX_INDEX, data);
2941 }
2942 
2943 static u32 si_create_bitmask(u32 bit_width)
2944 {
2945 	u32 i, mask = 0;
2946 
2947 	for (i = 0; i < bit_width; i++) {
2948 		mask <<= 1;
2949 		mask |= 1;
2950 	}
2951 	return mask;
2952 }
2953 
2954 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2955 {
2956 	u32 data, mask;
2957 
2958 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2959 	if (data & 1)
2960 		data &= INACTIVE_CUS_MASK;
2961 	else
2962 		data = 0;
2963 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2964 
2965 	data >>= INACTIVE_CUS_SHIFT;
2966 
2967 	mask = si_create_bitmask(cu_per_sh);
2968 
2969 	return ~data & mask;
2970 }
2971 
2972 static void si_setup_spi(struct radeon_device *rdev,
2973 			 u32 se_num, u32 sh_per_se,
2974 			 u32 cu_per_sh)
2975 {
2976 	int i, j, k;
2977 	u32 data, mask, active_cu;
2978 
2979 	for (i = 0; i < se_num; i++) {
2980 		for (j = 0; j < sh_per_se; j++) {
2981 			si_select_se_sh(rdev, i, j);
2982 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2983 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2984 
2985 			mask = 1;
2986 			for (k = 0; k < 16; k++) {
2987 				mask <<= k;
2988 				if (active_cu & mask) {
2989 					data &= ~mask;
2990 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2991 					break;
2992 				}
2993 			}
2994 		}
2995 	}
2996 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2997 }
2998 
2999 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3000 			      u32 max_rb_num_per_se,
3001 			      u32 sh_per_se)
3002 {
3003 	u32 data, mask;
3004 
3005 	data = RREG32(CC_RB_BACKEND_DISABLE);
3006 	if (data & 1)
3007 		data &= BACKEND_DISABLE_MASK;
3008 	else
3009 		data = 0;
3010 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3011 
3012 	data >>= BACKEND_DISABLE_SHIFT;
3013 
3014 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3015 
3016 	return data & mask;
3017 }
3018 
3019 static void si_setup_rb(struct radeon_device *rdev,
3020 			u32 se_num, u32 sh_per_se,
3021 			u32 max_rb_num_per_se)
3022 {
3023 	int i, j;
3024 	u32 data, mask;
3025 	u32 disabled_rbs = 0;
3026 	u32 enabled_rbs = 0;
3027 
3028 	for (i = 0; i < se_num; i++) {
3029 		for (j = 0; j < sh_per_se; j++) {
3030 			si_select_se_sh(rdev, i, j);
3031 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3032 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3033 		}
3034 	}
3035 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3036 
3037 	mask = 1;
3038 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3039 		if (!(disabled_rbs & mask))
3040 			enabled_rbs |= mask;
3041 		mask <<= 1;
3042 	}
3043 
3044 	rdev->config.si.backend_enable_mask = enabled_rbs;
3045 
3046 	for (i = 0; i < se_num; i++) {
3047 		si_select_se_sh(rdev, i, 0xffffffff);
3048 		data = 0;
3049 		for (j = 0; j < sh_per_se; j++) {
3050 			switch (enabled_rbs & 3) {
3051 			case 1:
3052 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3053 				break;
3054 			case 2:
3055 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3056 				break;
3057 			case 3:
3058 			default:
3059 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3060 				break;
3061 			}
3062 			enabled_rbs >>= 2;
3063 		}
3064 		WREG32(PA_SC_RASTER_CONFIG, data);
3065 	}
3066 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3067 }
3068 
3069 static void si_gpu_init(struct radeon_device *rdev)
3070 {
3071 	u32 gb_addr_config = 0;
3072 	u32 mc_shared_chmap, mc_arb_ramcfg;
3073 	u32 sx_debug_1;
3074 	u32 hdp_host_path_cntl;
3075 	u32 tmp;
3076 	int i, j;
3077 
3078 	switch (rdev->family) {
3079 	case CHIP_TAHITI:
3080 		rdev->config.si.max_shader_engines = 2;
3081 		rdev->config.si.max_tile_pipes = 12;
3082 		rdev->config.si.max_cu_per_sh = 8;
3083 		rdev->config.si.max_sh_per_se = 2;
3084 		rdev->config.si.max_backends_per_se = 4;
3085 		rdev->config.si.max_texture_channel_caches = 12;
3086 		rdev->config.si.max_gprs = 256;
3087 		rdev->config.si.max_gs_threads = 32;
3088 		rdev->config.si.max_hw_contexts = 8;
3089 
3090 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3091 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3092 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3093 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3094 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3095 		break;
3096 	case CHIP_PITCAIRN:
3097 		rdev->config.si.max_shader_engines = 2;
3098 		rdev->config.si.max_tile_pipes = 8;
3099 		rdev->config.si.max_cu_per_sh = 5;
3100 		rdev->config.si.max_sh_per_se = 2;
3101 		rdev->config.si.max_backends_per_se = 4;
3102 		rdev->config.si.max_texture_channel_caches = 8;
3103 		rdev->config.si.max_gprs = 256;
3104 		rdev->config.si.max_gs_threads = 32;
3105 		rdev->config.si.max_hw_contexts = 8;
3106 
3107 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3108 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3109 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3110 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3111 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3112 		break;
3113 	case CHIP_VERDE:
3114 	default:
3115 		rdev->config.si.max_shader_engines = 1;
3116 		rdev->config.si.max_tile_pipes = 4;
3117 		rdev->config.si.max_cu_per_sh = 5;
3118 		rdev->config.si.max_sh_per_se = 2;
3119 		rdev->config.si.max_backends_per_se = 4;
3120 		rdev->config.si.max_texture_channel_caches = 4;
3121 		rdev->config.si.max_gprs = 256;
3122 		rdev->config.si.max_gs_threads = 32;
3123 		rdev->config.si.max_hw_contexts = 8;
3124 
3125 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3126 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3127 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3128 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3129 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3130 		break;
3131 	case CHIP_OLAND:
3132 		rdev->config.si.max_shader_engines = 1;
3133 		rdev->config.si.max_tile_pipes = 4;
3134 		rdev->config.si.max_cu_per_sh = 6;
3135 		rdev->config.si.max_sh_per_se = 1;
3136 		rdev->config.si.max_backends_per_se = 2;
3137 		rdev->config.si.max_texture_channel_caches = 4;
3138 		rdev->config.si.max_gprs = 256;
3139 		rdev->config.si.max_gs_threads = 16;
3140 		rdev->config.si.max_hw_contexts = 8;
3141 
3142 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3143 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3144 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3145 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3146 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3147 		break;
3148 	case CHIP_HAINAN:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 5;
3152 		rdev->config.si.max_sh_per_se = 1;
3153 		rdev->config.si.max_backends_per_se = 1;
3154 		rdev->config.si.max_texture_channel_caches = 2;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 16;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	}
3166 
3167 	/* Initialize HDP */
3168 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3169 		WREG32((0x2c14 + j), 0x00000000);
3170 		WREG32((0x2c18 + j), 0x00000000);
3171 		WREG32((0x2c1c + j), 0x00000000);
3172 		WREG32((0x2c20 + j), 0x00000000);
3173 		WREG32((0x2c24 + j), 0x00000000);
3174 	}
3175 
3176 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3177 
3178 	evergreen_fix_pci_max_read_req_size(rdev);
3179 
3180 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3181 
3182 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3183 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3184 
3185 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3186 	rdev->config.si.mem_max_burst_length_bytes = 256;
3187 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3188 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3189 	if (rdev->config.si.mem_row_size_in_kb > 4)
3190 		rdev->config.si.mem_row_size_in_kb = 4;
3191 	/* XXX use MC settings? */
3192 	rdev->config.si.shader_engine_tile_size = 32;
3193 	rdev->config.si.num_gpus = 1;
3194 	rdev->config.si.multi_gpu_tile_size = 64;
3195 
3196 	/* fix up row size */
3197 	gb_addr_config &= ~ROW_SIZE_MASK;
3198 	switch (rdev->config.si.mem_row_size_in_kb) {
3199 	case 1:
3200 	default:
3201 		gb_addr_config |= ROW_SIZE(0);
3202 		break;
3203 	case 2:
3204 		gb_addr_config |= ROW_SIZE(1);
3205 		break;
3206 	case 4:
3207 		gb_addr_config |= ROW_SIZE(2);
3208 		break;
3209 	}
3210 
3211 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3212 	 * not have bank info, so create a custom tiling dword.
3213 	 * bits 3:0   num_pipes
3214 	 * bits 7:4   num_banks
3215 	 * bits 11:8  group_size
3216 	 * bits 15:12 row_size
3217 	 */
3218 	rdev->config.si.tile_config = 0;
3219 	switch (rdev->config.si.num_tile_pipes) {
3220 	case 1:
3221 		rdev->config.si.tile_config |= (0 << 0);
3222 		break;
3223 	case 2:
3224 		rdev->config.si.tile_config |= (1 << 0);
3225 		break;
3226 	case 4:
3227 		rdev->config.si.tile_config |= (2 << 0);
3228 		break;
3229 	case 8:
3230 	default:
3231 		/* XXX what about 12? */
3232 		rdev->config.si.tile_config |= (3 << 0);
3233 		break;
3234 	}
3235 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3236 	case 0: /* four banks */
3237 		rdev->config.si.tile_config |= 0 << 4;
3238 		break;
3239 	case 1: /* eight banks */
3240 		rdev->config.si.tile_config |= 1 << 4;
3241 		break;
3242 	case 2: /* sixteen banks */
3243 	default:
3244 		rdev->config.si.tile_config |= 2 << 4;
3245 		break;
3246 	}
3247 	rdev->config.si.tile_config |=
3248 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3249 	rdev->config.si.tile_config |=
3250 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3251 
3252 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3253 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3254 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3255 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3256 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3257 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3258 	if (rdev->has_uvd) {
3259 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3260 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3261 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3262 	}
3263 
3264 	si_tiling_mode_table_init(rdev);
3265 
3266 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3267 		    rdev->config.si.max_sh_per_se,
3268 		    rdev->config.si.max_backends_per_se);
3269 
3270 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3271 		     rdev->config.si.max_sh_per_se,
3272 		     rdev->config.si.max_cu_per_sh);
3273 
3274 	rdev->config.si.active_cus = 0;
3275 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3276 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3277 			rdev->config.si.active_cus +=
3278 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3279 		}
3280 	}
3281 
3282 	/* set HW defaults for 3D engine */
3283 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3284 				     ROQ_IB2_START(0x2b)));
3285 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3286 
3287 	sx_debug_1 = RREG32(SX_DEBUG_1);
3288 	WREG32(SX_DEBUG_1, sx_debug_1);
3289 
3290 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3291 
3292 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3293 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3294 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3295 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3296 
3297 	WREG32(VGT_NUM_INSTANCES, 1);
3298 
3299 	WREG32(CP_PERFMON_CNTL, 0);
3300 
3301 	WREG32(SQ_CONFIG, 0);
3302 
3303 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3304 					  FORCE_EOV_MAX_REZ_CNT(255)));
3305 
3306 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3307 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3308 
3309 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3310 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3311 
3312 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3313 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3314 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3315 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3316 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3317 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3318 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3319 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3320 
3321 	tmp = RREG32(HDP_MISC_CNTL);
3322 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3323 	WREG32(HDP_MISC_CNTL, tmp);
3324 
3325 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3326 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3327 
3328 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3329 
3330 	udelay(50);
3331 }
3332 
3333 /*
3334  * GPU scratch registers helpers function.
3335  */
3336 static void si_scratch_init(struct radeon_device *rdev)
3337 {
3338 	int i;
3339 
3340 	rdev->scratch.num_reg = 7;
3341 	rdev->scratch.reg_base = SCRATCH_REG0;
3342 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3343 		rdev->scratch.free[i] = true;
3344 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3345 	}
3346 }
3347 
3348 void si_fence_ring_emit(struct radeon_device *rdev,
3349 			struct radeon_fence *fence)
3350 {
3351 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3352 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3353 
3354 	/* flush read cache over gart */
3355 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3356 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3357 	radeon_ring_write(ring, 0);
3358 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3359 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3360 			  PACKET3_TC_ACTION_ENA |
3361 			  PACKET3_SH_KCACHE_ACTION_ENA |
3362 			  PACKET3_SH_ICACHE_ACTION_ENA);
3363 	radeon_ring_write(ring, 0xFFFFFFFF);
3364 	radeon_ring_write(ring, 0);
3365 	radeon_ring_write(ring, 10); /* poll interval */
3366 	/* EVENT_WRITE_EOP - flush caches, send int */
3367 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3368 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3369 	radeon_ring_write(ring, lower_32_bits(addr));
3370 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3371 	radeon_ring_write(ring, fence->seq);
3372 	radeon_ring_write(ring, 0);
3373 }
3374 
3375 /*
3376  * IB stuff
3377  */
3378 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3379 {
3380 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3381 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3382 	u32 header;
3383 
3384 	if (ib->is_const_ib) {
3385 		/* set switch buffer packet before const IB */
3386 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3387 		radeon_ring_write(ring, 0);
3388 
3389 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3390 	} else {
3391 		u32 next_rptr;
3392 		if (ring->rptr_save_reg) {
3393 			next_rptr = ring->wptr + 3 + 4 + 8;
3394 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3395 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3396 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3397 			radeon_ring_write(ring, next_rptr);
3398 		} else if (rdev->wb.enabled) {
3399 			next_rptr = ring->wptr + 5 + 4 + 8;
3400 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3401 			radeon_ring_write(ring, (1 << 8));
3402 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3403 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3404 			radeon_ring_write(ring, next_rptr);
3405 		}
3406 
3407 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3408 	}
3409 
3410 	radeon_ring_write(ring, header);
3411 	radeon_ring_write(ring,
3412 #ifdef __BIG_ENDIAN
3413 			  (2 << 0) |
3414 #endif
3415 			  (ib->gpu_addr & 0xFFFFFFFC));
3416 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3417 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3418 
3419 	if (!ib->is_const_ib) {
3420 		/* flush read cache over gart for this vmid */
3421 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3422 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3423 		radeon_ring_write(ring, vm_id);
3424 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3425 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3426 				  PACKET3_TC_ACTION_ENA |
3427 				  PACKET3_SH_KCACHE_ACTION_ENA |
3428 				  PACKET3_SH_ICACHE_ACTION_ENA);
3429 		radeon_ring_write(ring, 0xFFFFFFFF);
3430 		radeon_ring_write(ring, 0);
3431 		radeon_ring_write(ring, 10); /* poll interval */
3432 	}
3433 }
3434 
3435 /*
3436  * CP.
3437  */
3438 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3439 {
3440 	if (enable)
3441 		WREG32(CP_ME_CNTL, 0);
3442 	else {
3443 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3444 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3445 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3446 		WREG32(SCRATCH_UMSK, 0);
3447 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3448 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3449 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3450 	}
3451 	udelay(50);
3452 }
3453 
3454 static int si_cp_load_microcode(struct radeon_device *rdev)
3455 {
3456 	int i;
3457 
3458 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3459 		return -EINVAL;
3460 
3461 	si_cp_enable(rdev, false);
3462 
3463 	if (rdev->new_fw) {
3464 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3465 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3466 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3467 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3468 		const struct gfx_firmware_header_v1_0 *me_hdr =
3469 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3470 		const __le32 *fw_data;
3471 		u32 fw_size;
3472 
3473 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3474 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3475 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3476 
3477 		/* PFP */
3478 		fw_data = (const __le32 *)
3479 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3480 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3481 		WREG32(CP_PFP_UCODE_ADDR, 0);
3482 		for (i = 0; i < fw_size; i++)
3483 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3484 		WREG32(CP_PFP_UCODE_ADDR, 0);
3485 
3486 		/* CE */
3487 		fw_data = (const __le32 *)
3488 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3489 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3490 		WREG32(CP_CE_UCODE_ADDR, 0);
3491 		for (i = 0; i < fw_size; i++)
3492 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3493 		WREG32(CP_CE_UCODE_ADDR, 0);
3494 
3495 		/* ME */
3496 		fw_data = (const __be32 *)
3497 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3498 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3499 		WREG32(CP_ME_RAM_WADDR, 0);
3500 		for (i = 0; i < fw_size; i++)
3501 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3502 		WREG32(CP_ME_RAM_WADDR, 0);
3503 	} else {
3504 		const __be32 *fw_data;
3505 
3506 		/* PFP */
3507 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3508 		WREG32(CP_PFP_UCODE_ADDR, 0);
3509 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3510 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3511 		WREG32(CP_PFP_UCODE_ADDR, 0);
3512 
3513 		/* CE */
3514 		fw_data = (const __be32 *)rdev->ce_fw->data;
3515 		WREG32(CP_CE_UCODE_ADDR, 0);
3516 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3517 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3518 		WREG32(CP_CE_UCODE_ADDR, 0);
3519 
3520 		/* ME */
3521 		fw_data = (const __be32 *)rdev->me_fw->data;
3522 		WREG32(CP_ME_RAM_WADDR, 0);
3523 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3524 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3525 		WREG32(CP_ME_RAM_WADDR, 0);
3526 	}
3527 
3528 	WREG32(CP_PFP_UCODE_ADDR, 0);
3529 	WREG32(CP_CE_UCODE_ADDR, 0);
3530 	WREG32(CP_ME_RAM_WADDR, 0);
3531 	WREG32(CP_ME_RAM_RADDR, 0);
3532 	return 0;
3533 }
3534 
3535 static int si_cp_start(struct radeon_device *rdev)
3536 {
3537 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3538 	int r, i;
3539 
3540 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3541 	if (r) {
3542 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3543 		return r;
3544 	}
3545 	/* init the CP */
3546 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3547 	radeon_ring_write(ring, 0x1);
3548 	radeon_ring_write(ring, 0x0);
3549 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3550 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3551 	radeon_ring_write(ring, 0);
3552 	radeon_ring_write(ring, 0);
3553 
3554 	/* init the CE partitions */
3555 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3556 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3557 	radeon_ring_write(ring, 0xc000);
3558 	radeon_ring_write(ring, 0xe000);
3559 	radeon_ring_unlock_commit(rdev, ring, false);
3560 
3561 	si_cp_enable(rdev, true);
3562 
3563 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3564 	if (r) {
3565 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3566 		return r;
3567 	}
3568 
3569 	/* setup clear context state */
3570 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3571 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3572 
3573 	for (i = 0; i < si_default_size; i++)
3574 		radeon_ring_write(ring, si_default_state[i]);
3575 
3576 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3577 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3578 
3579 	/* set clear context state */
3580 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3581 	radeon_ring_write(ring, 0);
3582 
3583 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3584 	radeon_ring_write(ring, 0x00000316);
3585 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3586 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3587 
3588 	radeon_ring_unlock_commit(rdev, ring, false);
3589 
3590 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3591 		ring = &rdev->ring[i];
3592 		r = radeon_ring_lock(rdev, ring, 2);
3593 
3594 		/* clear the compute context state */
3595 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3596 		radeon_ring_write(ring, 0);
3597 
3598 		radeon_ring_unlock_commit(rdev, ring, false);
3599 	}
3600 
3601 	return 0;
3602 }
3603 
3604 static void si_cp_fini(struct radeon_device *rdev)
3605 {
3606 	struct radeon_ring *ring;
3607 	si_cp_enable(rdev, false);
3608 
3609 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3610 	radeon_ring_fini(rdev, ring);
3611 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3612 
3613 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3614 	radeon_ring_fini(rdev, ring);
3615 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3616 
3617 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3618 	radeon_ring_fini(rdev, ring);
3619 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3620 }
3621 
3622 static int si_cp_resume(struct radeon_device *rdev)
3623 {
3624 	struct radeon_ring *ring;
3625 	u32 tmp;
3626 	u32 rb_bufsz;
3627 	int r;
3628 
3629 	si_enable_gui_idle_interrupt(rdev, false);
3630 
3631 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3632 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3633 
3634 	/* Set the write pointer delay */
3635 	WREG32(CP_RB_WPTR_DELAY, 0);
3636 
3637 	WREG32(CP_DEBUG, 0);
3638 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3639 
3640 	/* ring 0 - compute and gfx */
3641 	/* Set ring buffer size */
3642 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3643 	rb_bufsz = order_base_2(ring->ring_size / 8);
3644 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3645 #ifdef __BIG_ENDIAN
3646 	tmp |= BUF_SWAP_32BIT;
3647 #endif
3648 	WREG32(CP_RB0_CNTL, tmp);
3649 
3650 	/* Initialize the ring buffer's read and write pointers */
3651 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3652 	ring->wptr = 0;
3653 	WREG32(CP_RB0_WPTR, ring->wptr);
3654 
3655 	/* set the wb address whether it's enabled or not */
3656 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3657 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3658 
3659 	if (rdev->wb.enabled)
3660 		WREG32(SCRATCH_UMSK, 0xff);
3661 	else {
3662 		tmp |= RB_NO_UPDATE;
3663 		WREG32(SCRATCH_UMSK, 0);
3664 	}
3665 
3666 	mdelay(1);
3667 	WREG32(CP_RB0_CNTL, tmp);
3668 
3669 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3670 
3671 	/* ring1  - compute only */
3672 	/* Set ring buffer size */
3673 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3674 	rb_bufsz = order_base_2(ring->ring_size / 8);
3675 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3676 #ifdef __BIG_ENDIAN
3677 	tmp |= BUF_SWAP_32BIT;
3678 #endif
3679 	WREG32(CP_RB1_CNTL, tmp);
3680 
3681 	/* Initialize the ring buffer's read and write pointers */
3682 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3683 	ring->wptr = 0;
3684 	WREG32(CP_RB1_WPTR, ring->wptr);
3685 
3686 	/* set the wb address whether it's enabled or not */
3687 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3688 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3689 
3690 	mdelay(1);
3691 	WREG32(CP_RB1_CNTL, tmp);
3692 
3693 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3694 
3695 	/* ring2 - compute only */
3696 	/* Set ring buffer size */
3697 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3698 	rb_bufsz = order_base_2(ring->ring_size / 8);
3699 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3700 #ifdef __BIG_ENDIAN
3701 	tmp |= BUF_SWAP_32BIT;
3702 #endif
3703 	WREG32(CP_RB2_CNTL, tmp);
3704 
3705 	/* Initialize the ring buffer's read and write pointers */
3706 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3707 	ring->wptr = 0;
3708 	WREG32(CP_RB2_WPTR, ring->wptr);
3709 
3710 	/* set the wb address whether it's enabled or not */
3711 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3712 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3713 
3714 	mdelay(1);
3715 	WREG32(CP_RB2_CNTL, tmp);
3716 
3717 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3718 
3719 	/* start the rings */
3720 	si_cp_start(rdev);
3721 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3722 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3723 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3724 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3725 	if (r) {
3726 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3727 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3728 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3729 		return r;
3730 	}
3731 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3732 	if (r) {
3733 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3734 	}
3735 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3736 	if (r) {
3737 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3738 	}
3739 
3740 	si_enable_gui_idle_interrupt(rdev, true);
3741 
3742 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3743 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3744 
3745 	return 0;
3746 }
3747 
3748 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3749 {
3750 	u32 reset_mask = 0;
3751 	u32 tmp;
3752 
3753 	/* GRBM_STATUS */
3754 	tmp = RREG32(GRBM_STATUS);
3755 	if (tmp & (PA_BUSY | SC_BUSY |
3756 		   BCI_BUSY | SX_BUSY |
3757 		   TA_BUSY | VGT_BUSY |
3758 		   DB_BUSY | CB_BUSY |
3759 		   GDS_BUSY | SPI_BUSY |
3760 		   IA_BUSY | IA_BUSY_NO_DMA))
3761 		reset_mask |= RADEON_RESET_GFX;
3762 
3763 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3764 		   CP_BUSY | CP_COHERENCY_BUSY))
3765 		reset_mask |= RADEON_RESET_CP;
3766 
3767 	if (tmp & GRBM_EE_BUSY)
3768 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3769 
3770 	/* GRBM_STATUS2 */
3771 	tmp = RREG32(GRBM_STATUS2);
3772 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3773 		reset_mask |= RADEON_RESET_RLC;
3774 
3775 	/* DMA_STATUS_REG 0 */
3776 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3777 	if (!(tmp & DMA_IDLE))
3778 		reset_mask |= RADEON_RESET_DMA;
3779 
3780 	/* DMA_STATUS_REG 1 */
3781 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3782 	if (!(tmp & DMA_IDLE))
3783 		reset_mask |= RADEON_RESET_DMA1;
3784 
3785 	/* SRBM_STATUS2 */
3786 	tmp = RREG32(SRBM_STATUS2);
3787 	if (tmp & DMA_BUSY)
3788 		reset_mask |= RADEON_RESET_DMA;
3789 
3790 	if (tmp & DMA1_BUSY)
3791 		reset_mask |= RADEON_RESET_DMA1;
3792 
3793 	/* SRBM_STATUS */
3794 	tmp = RREG32(SRBM_STATUS);
3795 
3796 	if (tmp & IH_BUSY)
3797 		reset_mask |= RADEON_RESET_IH;
3798 
3799 	if (tmp & SEM_BUSY)
3800 		reset_mask |= RADEON_RESET_SEM;
3801 
3802 	if (tmp & GRBM_RQ_PENDING)
3803 		reset_mask |= RADEON_RESET_GRBM;
3804 
3805 	if (tmp & VMC_BUSY)
3806 		reset_mask |= RADEON_RESET_VMC;
3807 
3808 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3809 		   MCC_BUSY | MCD_BUSY))
3810 		reset_mask |= RADEON_RESET_MC;
3811 
3812 	if (evergreen_is_display_hung(rdev))
3813 		reset_mask |= RADEON_RESET_DISPLAY;
3814 
3815 	/* VM_L2_STATUS */
3816 	tmp = RREG32(VM_L2_STATUS);
3817 	if (tmp & L2_BUSY)
3818 		reset_mask |= RADEON_RESET_VMC;
3819 
3820 	/* Skip MC reset as it's mostly likely not hung, just busy */
3821 	if (reset_mask & RADEON_RESET_MC) {
3822 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3823 		reset_mask &= ~RADEON_RESET_MC;
3824 	}
3825 
3826 	return reset_mask;
3827 }
3828 
3829 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3830 {
3831 	struct evergreen_mc_save save;
3832 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3833 	u32 tmp;
3834 
3835 	if (reset_mask == 0)
3836 		return;
3837 
3838 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3839 
3840 	evergreen_print_gpu_status_regs(rdev);
3841 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3842 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3843 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3844 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3845 
3846 	/* disable PG/CG */
3847 	si_fini_pg(rdev);
3848 	si_fini_cg(rdev);
3849 
3850 	/* stop the rlc */
3851 	si_rlc_stop(rdev);
3852 
3853 	/* Disable CP parsing/prefetching */
3854 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3855 
3856 	if (reset_mask & RADEON_RESET_DMA) {
3857 		/* dma0 */
3858 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3859 		tmp &= ~DMA_RB_ENABLE;
3860 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3861 	}
3862 	if (reset_mask & RADEON_RESET_DMA1) {
3863 		/* dma1 */
3864 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3865 		tmp &= ~DMA_RB_ENABLE;
3866 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3867 	}
3868 
3869 	udelay(50);
3870 
3871 	evergreen_mc_stop(rdev, &save);
3872 	if (evergreen_mc_wait_for_idle(rdev)) {
3873 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3874 	}
3875 
3876 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3877 		grbm_soft_reset = SOFT_RESET_CB |
3878 			SOFT_RESET_DB |
3879 			SOFT_RESET_GDS |
3880 			SOFT_RESET_PA |
3881 			SOFT_RESET_SC |
3882 			SOFT_RESET_BCI |
3883 			SOFT_RESET_SPI |
3884 			SOFT_RESET_SX |
3885 			SOFT_RESET_TC |
3886 			SOFT_RESET_TA |
3887 			SOFT_RESET_VGT |
3888 			SOFT_RESET_IA;
3889 	}
3890 
3891 	if (reset_mask & RADEON_RESET_CP) {
3892 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3893 
3894 		srbm_soft_reset |= SOFT_RESET_GRBM;
3895 	}
3896 
3897 	if (reset_mask & RADEON_RESET_DMA)
3898 		srbm_soft_reset |= SOFT_RESET_DMA;
3899 
3900 	if (reset_mask & RADEON_RESET_DMA1)
3901 		srbm_soft_reset |= SOFT_RESET_DMA1;
3902 
3903 	if (reset_mask & RADEON_RESET_DISPLAY)
3904 		srbm_soft_reset |= SOFT_RESET_DC;
3905 
3906 	if (reset_mask & RADEON_RESET_RLC)
3907 		grbm_soft_reset |= SOFT_RESET_RLC;
3908 
3909 	if (reset_mask & RADEON_RESET_SEM)
3910 		srbm_soft_reset |= SOFT_RESET_SEM;
3911 
3912 	if (reset_mask & RADEON_RESET_IH)
3913 		srbm_soft_reset |= SOFT_RESET_IH;
3914 
3915 	if (reset_mask & RADEON_RESET_GRBM)
3916 		srbm_soft_reset |= SOFT_RESET_GRBM;
3917 
3918 	if (reset_mask & RADEON_RESET_VMC)
3919 		srbm_soft_reset |= SOFT_RESET_VMC;
3920 
3921 	if (reset_mask & RADEON_RESET_MC)
3922 		srbm_soft_reset |= SOFT_RESET_MC;
3923 
3924 	if (grbm_soft_reset) {
3925 		tmp = RREG32(GRBM_SOFT_RESET);
3926 		tmp |= grbm_soft_reset;
3927 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3928 		WREG32(GRBM_SOFT_RESET, tmp);
3929 		tmp = RREG32(GRBM_SOFT_RESET);
3930 
3931 		udelay(50);
3932 
3933 		tmp &= ~grbm_soft_reset;
3934 		WREG32(GRBM_SOFT_RESET, tmp);
3935 		tmp = RREG32(GRBM_SOFT_RESET);
3936 	}
3937 
3938 	if (srbm_soft_reset) {
3939 		tmp = RREG32(SRBM_SOFT_RESET);
3940 		tmp |= srbm_soft_reset;
3941 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3942 		WREG32(SRBM_SOFT_RESET, tmp);
3943 		tmp = RREG32(SRBM_SOFT_RESET);
3944 
3945 		udelay(50);
3946 
3947 		tmp &= ~srbm_soft_reset;
3948 		WREG32(SRBM_SOFT_RESET, tmp);
3949 		tmp = RREG32(SRBM_SOFT_RESET);
3950 	}
3951 
3952 	/* Wait a little for things to settle down */
3953 	udelay(50);
3954 
3955 	evergreen_mc_resume(rdev, &save);
3956 	udelay(50);
3957 
3958 	evergreen_print_gpu_status_regs(rdev);
3959 }
3960 
3961 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3962 {
3963 	u32 tmp, i;
3964 
3965 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3966 	tmp |= SPLL_BYPASS_EN;
3967 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3968 
3969 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3970 	tmp |= SPLL_CTLREQ_CHG;
3971 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3972 
3973 	for (i = 0; i < rdev->usec_timeout; i++) {
3974 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3975 			break;
3976 		udelay(1);
3977 	}
3978 
3979 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3980 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3981 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3982 
3983 	tmp = RREG32(MPLL_CNTL_MODE);
3984 	tmp &= ~MPLL_MCLK_SEL;
3985 	WREG32(MPLL_CNTL_MODE, tmp);
3986 }
3987 
3988 static void si_spll_powerdown(struct radeon_device *rdev)
3989 {
3990 	u32 tmp;
3991 
3992 	tmp = RREG32(SPLL_CNTL_MODE);
3993 	tmp |= SPLL_SW_DIR_CONTROL;
3994 	WREG32(SPLL_CNTL_MODE, tmp);
3995 
3996 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3997 	tmp |= SPLL_RESET;
3998 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3999 
4000 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4001 	tmp |= SPLL_SLEEP;
4002 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4003 
4004 	tmp = RREG32(SPLL_CNTL_MODE);
4005 	tmp &= ~SPLL_SW_DIR_CONTROL;
4006 	WREG32(SPLL_CNTL_MODE, tmp);
4007 }
4008 
4009 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4010 {
4011 	struct evergreen_mc_save save;
4012 	u32 tmp, i;
4013 
4014 	dev_info(rdev->dev, "GPU pci config reset\n");
4015 
4016 	/* disable dpm? */
4017 
4018 	/* disable cg/pg */
4019 	si_fini_pg(rdev);
4020 	si_fini_cg(rdev);
4021 
4022 	/* Disable CP parsing/prefetching */
4023 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4024 	/* dma0 */
4025 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4026 	tmp &= ~DMA_RB_ENABLE;
4027 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4028 	/* dma1 */
4029 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4030 	tmp &= ~DMA_RB_ENABLE;
4031 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4032 	/* XXX other engines? */
4033 
4034 	/* halt the rlc, disable cp internal ints */
4035 	si_rlc_stop(rdev);
4036 
4037 	udelay(50);
4038 
4039 	/* disable mem access */
4040 	evergreen_mc_stop(rdev, &save);
4041 	if (evergreen_mc_wait_for_idle(rdev)) {
4042 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4043 	}
4044 
4045 	/* set mclk/sclk to bypass */
4046 	si_set_clk_bypass_mode(rdev);
4047 	/* powerdown spll */
4048 	si_spll_powerdown(rdev);
4049 	/* disable BM */
4050 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
4051 	/* reset */
4052 	radeon_pci_config_reset(rdev);
4053 	/* wait for asic to come out of reset */
4054 	for (i = 0; i < rdev->usec_timeout; i++) {
4055 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4056 			break;
4057 		udelay(1);
4058 	}
4059 }
4060 
4061 int si_asic_reset(struct radeon_device *rdev)
4062 {
4063 	u32 reset_mask;
4064 
4065 	reset_mask = si_gpu_check_soft_reset(rdev);
4066 
4067 	if (reset_mask)
4068 		r600_set_bios_scratch_engine_hung(rdev, true);
4069 
4070 	/* try soft reset */
4071 	si_gpu_soft_reset(rdev, reset_mask);
4072 
4073 	reset_mask = si_gpu_check_soft_reset(rdev);
4074 
4075 	/* try pci config reset */
4076 	if (reset_mask && radeon_hard_reset)
4077 		si_gpu_pci_config_reset(rdev);
4078 
4079 	reset_mask = si_gpu_check_soft_reset(rdev);
4080 
4081 	if (!reset_mask)
4082 		r600_set_bios_scratch_engine_hung(rdev, false);
4083 
4084 	return 0;
4085 }
4086 
4087 /**
4088  * si_gfx_is_lockup - Check if the GFX engine is locked up
4089  *
4090  * @rdev: radeon_device pointer
4091  * @ring: radeon_ring structure holding ring information
4092  *
4093  * Check if the GFX engine is locked up.
4094  * Returns true if the engine appears to be locked up, false if not.
4095  */
4096 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4097 {
4098 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4099 
4100 	if (!(reset_mask & (RADEON_RESET_GFX |
4101 			    RADEON_RESET_COMPUTE |
4102 			    RADEON_RESET_CP))) {
4103 		radeon_ring_lockup_update(rdev, ring);
4104 		return false;
4105 	}
4106 	return radeon_ring_test_lockup(rdev, ring);
4107 }
4108 
4109 /* MC */
4110 static void si_mc_program(struct radeon_device *rdev)
4111 {
4112 	struct evergreen_mc_save save;
4113 	u32 tmp;
4114 	int i, j;
4115 
4116 	/* Initialize HDP */
4117 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4118 		WREG32((0x2c14 + j), 0x00000000);
4119 		WREG32((0x2c18 + j), 0x00000000);
4120 		WREG32((0x2c1c + j), 0x00000000);
4121 		WREG32((0x2c20 + j), 0x00000000);
4122 		WREG32((0x2c24 + j), 0x00000000);
4123 	}
4124 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4125 
4126 	evergreen_mc_stop(rdev, &save);
4127 	if (radeon_mc_wait_for_idle(rdev)) {
4128 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4129 	}
4130 	if (!ASIC_IS_NODCE(rdev))
4131 		/* Lockout access through VGA aperture*/
4132 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4133 	/* Update configuration */
4134 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4135 	       rdev->mc.vram_start >> 12);
4136 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4137 	       rdev->mc.vram_end >> 12);
4138 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4139 	       rdev->vram_scratch.gpu_addr >> 12);
4140 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4141 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4142 	WREG32(MC_VM_FB_LOCATION, tmp);
4143 	/* XXX double check these! */
4144 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4145 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4146 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4147 	WREG32(MC_VM_AGP_BASE, 0);
4148 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4149 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4150 	if (radeon_mc_wait_for_idle(rdev)) {
4151 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4152 	}
4153 	evergreen_mc_resume(rdev, &save);
4154 	if (!ASIC_IS_NODCE(rdev)) {
4155 		/* we need to own VRAM, so turn off the VGA renderer here
4156 		 * to stop it overwriting our objects */
4157 		rv515_vga_render_disable(rdev);
4158 	}
4159 }
4160 
4161 void si_vram_gtt_location(struct radeon_device *rdev,
4162 			  struct radeon_mc *mc)
4163 {
4164 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4165 		/* leave room for at least 1024M GTT */
4166 		dev_warn(rdev->dev, "limiting VRAM\n");
4167 		mc->real_vram_size = 0xFFC0000000ULL;
4168 		mc->mc_vram_size = 0xFFC0000000ULL;
4169 	}
4170 	radeon_vram_location(rdev, &rdev->mc, 0);
4171 	rdev->mc.gtt_base_align = 0;
4172 	radeon_gtt_location(rdev, mc);
4173 }
4174 
4175 static int si_mc_init(struct radeon_device *rdev)
4176 {
4177 	u32 tmp;
4178 	int chansize, numchan;
4179 
4180 	/* Get VRAM informations */
4181 	rdev->mc.vram_is_ddr = true;
4182 	tmp = RREG32(MC_ARB_RAMCFG);
4183 	if (tmp & CHANSIZE_OVERRIDE) {
4184 		chansize = 16;
4185 	} else if (tmp & CHANSIZE_MASK) {
4186 		chansize = 64;
4187 	} else {
4188 		chansize = 32;
4189 	}
4190 	tmp = RREG32(MC_SHARED_CHMAP);
4191 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4192 	case 0:
4193 	default:
4194 		numchan = 1;
4195 		break;
4196 	case 1:
4197 		numchan = 2;
4198 		break;
4199 	case 2:
4200 		numchan = 4;
4201 		break;
4202 	case 3:
4203 		numchan = 8;
4204 		break;
4205 	case 4:
4206 		numchan = 3;
4207 		break;
4208 	case 5:
4209 		numchan = 6;
4210 		break;
4211 	case 6:
4212 		numchan = 10;
4213 		break;
4214 	case 7:
4215 		numchan = 12;
4216 		break;
4217 	case 8:
4218 		numchan = 16;
4219 		break;
4220 	}
4221 	rdev->mc.vram_width = numchan * chansize;
4222 	/* Could aper size report 0 ? */
4223 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4224 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4225 	/* size in MB on si */
4226 	tmp = RREG32(CONFIG_MEMSIZE);
4227 	/* some boards may have garbage in the upper 16 bits */
4228 	if (tmp & 0xffff0000) {
4229 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4230 		if (tmp & 0xffff)
4231 			tmp &= 0xffff;
4232 	}
4233 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4234 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4235 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4236 	si_vram_gtt_location(rdev, &rdev->mc);
4237 	radeon_update_bandwidth_info(rdev);
4238 
4239 	return 0;
4240 }
4241 
4242 /*
4243  * GART
4244  */
4245 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4246 {
4247 	/* flush hdp cache */
4248 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4249 
4250 	/* bits 0-15 are the VM contexts0-15 */
4251 	WREG32(VM_INVALIDATE_REQUEST, 1);
4252 }
4253 
4254 static int si_pcie_gart_enable(struct radeon_device *rdev)
4255 {
4256 	int r, i;
4257 
4258 	if (rdev->gart.robj == NULL) {
4259 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4260 		return -EINVAL;
4261 	}
4262 	r = radeon_gart_table_vram_pin(rdev);
4263 	if (r)
4264 		return r;
4265 	/* Setup TLB control */
4266 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4267 	       (0xA << 7) |
4268 	       ENABLE_L1_TLB |
4269 	       ENABLE_L1_FRAGMENT_PROCESSING |
4270 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4271 	       ENABLE_ADVANCED_DRIVER_MODEL |
4272 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4273 	/* Setup L2 cache */
4274 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4275 	       ENABLE_L2_FRAGMENT_PROCESSING |
4276 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4277 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4278 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4279 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4280 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4281 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4282 	       BANK_SELECT(4) |
4283 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4284 	/* setup context0 */
4285 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4286 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4287 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4288 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4289 			(u32)(rdev->dummy_page.addr >> 12));
4290 	WREG32(VM_CONTEXT0_CNTL2, 0);
4291 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4292 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4293 
4294 	WREG32(0x15D4, 0);
4295 	WREG32(0x15D8, 0);
4296 	WREG32(0x15DC, 0);
4297 
4298 	/* empty context1-15 */
4299 	/* set vm size, must be a multiple of 4 */
4300 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4301 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4302 	/* Assign the pt base to something valid for now; the pts used for
4303 	 * the VMs are determined by the application and setup and assigned
4304 	 * on the fly in the vm part of radeon_gart.c
4305 	 */
4306 	for (i = 1; i < 16; i++) {
4307 		if (i < 8)
4308 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4309 			       rdev->vm_manager.saved_table_addr[i]);
4310 		else
4311 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4312 			       rdev->vm_manager.saved_table_addr[i]);
4313 	}
4314 
4315 	/* enable context1-15 */
4316 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4317 	       (u32)(rdev->dummy_page.addr >> 12));
4318 	WREG32(VM_CONTEXT1_CNTL2, 4);
4319 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4320 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4321 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4323 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4324 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4325 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4326 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4327 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4328 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4329 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4330 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4331 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4332 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4333 
4334 	si_pcie_gart_tlb_flush(rdev);
4335 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4336 		 (unsigned)(rdev->mc.gtt_size >> 20),
4337 		 (unsigned long long)rdev->gart.table_addr);
4338 	rdev->gart.ready = true;
4339 	return 0;
4340 }
4341 
4342 static void si_pcie_gart_disable(struct radeon_device *rdev)
4343 {
4344 	unsigned i;
4345 
4346 	for (i = 1; i < 16; ++i) {
4347 		uint32_t reg;
4348 		if (i < 8)
4349 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4350 		else
4351 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4352 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4353 	}
4354 
4355 	/* Disable all tables */
4356 	WREG32(VM_CONTEXT0_CNTL, 0);
4357 	WREG32(VM_CONTEXT1_CNTL, 0);
4358 	/* Setup TLB control */
4359 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4360 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4361 	/* Setup L2 cache */
4362 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4363 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4364 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4365 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4366 	WREG32(VM_L2_CNTL2, 0);
4367 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4368 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4369 	radeon_gart_table_vram_unpin(rdev);
4370 }
4371 
4372 static void si_pcie_gart_fini(struct radeon_device *rdev)
4373 {
4374 	si_pcie_gart_disable(rdev);
4375 	radeon_gart_table_vram_free(rdev);
4376 	radeon_gart_fini(rdev);
4377 }
4378 
4379 /* vm parser */
4380 static bool si_vm_reg_valid(u32 reg)
4381 {
4382 	/* context regs are fine */
4383 	if (reg >= 0x28000)
4384 		return true;
4385 
4386 	/* check config regs */
4387 	switch (reg) {
4388 	case GRBM_GFX_INDEX:
4389 	case CP_STRMOUT_CNTL:
4390 	case VGT_VTX_VECT_EJECT_REG:
4391 	case VGT_CACHE_INVALIDATION:
4392 	case VGT_ESGS_RING_SIZE:
4393 	case VGT_GSVS_RING_SIZE:
4394 	case VGT_GS_VERTEX_REUSE:
4395 	case VGT_PRIMITIVE_TYPE:
4396 	case VGT_INDEX_TYPE:
4397 	case VGT_NUM_INDICES:
4398 	case VGT_NUM_INSTANCES:
4399 	case VGT_TF_RING_SIZE:
4400 	case VGT_HS_OFFCHIP_PARAM:
4401 	case VGT_TF_MEMORY_BASE:
4402 	case PA_CL_ENHANCE:
4403 	case PA_SU_LINE_STIPPLE_VALUE:
4404 	case PA_SC_LINE_STIPPLE_STATE:
4405 	case PA_SC_ENHANCE:
4406 	case SQC_CACHES:
4407 	case SPI_STATIC_THREAD_MGMT_1:
4408 	case SPI_STATIC_THREAD_MGMT_2:
4409 	case SPI_STATIC_THREAD_MGMT_3:
4410 	case SPI_PS_MAX_WAVE_ID:
4411 	case SPI_CONFIG_CNTL:
4412 	case SPI_CONFIG_CNTL_1:
4413 	case TA_CNTL_AUX:
4414 		return true;
4415 	default:
4416 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4417 		return false;
4418 	}
4419 }
4420 
4421 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4422 				  u32 *ib, struct radeon_cs_packet *pkt)
4423 {
4424 	switch (pkt->opcode) {
4425 	case PACKET3_NOP:
4426 	case PACKET3_SET_BASE:
4427 	case PACKET3_SET_CE_DE_COUNTERS:
4428 	case PACKET3_LOAD_CONST_RAM:
4429 	case PACKET3_WRITE_CONST_RAM:
4430 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4431 	case PACKET3_DUMP_CONST_RAM:
4432 	case PACKET3_INCREMENT_CE_COUNTER:
4433 	case PACKET3_WAIT_ON_DE_COUNTER:
4434 	case PACKET3_CE_WRITE:
4435 		break;
4436 	default:
4437 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4438 		return -EINVAL;
4439 	}
4440 	return 0;
4441 }
4442 
4443 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4444 {
4445 	u32 start_reg, reg, i;
4446 	u32 command = ib[idx + 4];
4447 	u32 info = ib[idx + 1];
4448 	u32 idx_value = ib[idx];
4449 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4450 		/* src address space is register */
4451 		if (((info & 0x60000000) >> 29) == 0) {
4452 			start_reg = idx_value << 2;
4453 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4454 				reg = start_reg;
4455 				if (!si_vm_reg_valid(reg)) {
4456 					DRM_ERROR("CP DMA Bad SRC register\n");
4457 					return -EINVAL;
4458 				}
4459 			} else {
4460 				for (i = 0; i < (command & 0x1fffff); i++) {
4461 					reg = start_reg + (4 * i);
4462 					if (!si_vm_reg_valid(reg)) {
4463 						DRM_ERROR("CP DMA Bad SRC register\n");
4464 						return -EINVAL;
4465 					}
4466 				}
4467 			}
4468 		}
4469 	}
4470 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4471 		/* dst address space is register */
4472 		if (((info & 0x00300000) >> 20) == 0) {
4473 			start_reg = ib[idx + 2];
4474 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4475 				reg = start_reg;
4476 				if (!si_vm_reg_valid(reg)) {
4477 					DRM_ERROR("CP DMA Bad DST register\n");
4478 					return -EINVAL;
4479 				}
4480 			} else {
4481 				for (i = 0; i < (command & 0x1fffff); i++) {
4482 					reg = start_reg + (4 * i);
4483 				if (!si_vm_reg_valid(reg)) {
4484 						DRM_ERROR("CP DMA Bad DST register\n");
4485 						return -EINVAL;
4486 					}
4487 				}
4488 			}
4489 		}
4490 	}
4491 	return 0;
4492 }
4493 
4494 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4495 				   u32 *ib, struct radeon_cs_packet *pkt)
4496 {
4497 	int r;
4498 	u32 idx = pkt->idx + 1;
4499 	u32 idx_value = ib[idx];
4500 	u32 start_reg, end_reg, reg, i;
4501 
4502 	switch (pkt->opcode) {
4503 	case PACKET3_NOP:
4504 	case PACKET3_SET_BASE:
4505 	case PACKET3_CLEAR_STATE:
4506 	case PACKET3_INDEX_BUFFER_SIZE:
4507 	case PACKET3_DISPATCH_DIRECT:
4508 	case PACKET3_DISPATCH_INDIRECT:
4509 	case PACKET3_ALLOC_GDS:
4510 	case PACKET3_WRITE_GDS_RAM:
4511 	case PACKET3_ATOMIC_GDS:
4512 	case PACKET3_ATOMIC:
4513 	case PACKET3_OCCLUSION_QUERY:
4514 	case PACKET3_SET_PREDICATION:
4515 	case PACKET3_COND_EXEC:
4516 	case PACKET3_PRED_EXEC:
4517 	case PACKET3_DRAW_INDIRECT:
4518 	case PACKET3_DRAW_INDEX_INDIRECT:
4519 	case PACKET3_INDEX_BASE:
4520 	case PACKET3_DRAW_INDEX_2:
4521 	case PACKET3_CONTEXT_CONTROL:
4522 	case PACKET3_INDEX_TYPE:
4523 	case PACKET3_DRAW_INDIRECT_MULTI:
4524 	case PACKET3_DRAW_INDEX_AUTO:
4525 	case PACKET3_DRAW_INDEX_IMMD:
4526 	case PACKET3_NUM_INSTANCES:
4527 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4528 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4529 	case PACKET3_DRAW_INDEX_OFFSET_2:
4530 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4531 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4532 	case PACKET3_MPEG_INDEX:
4533 	case PACKET3_WAIT_REG_MEM:
4534 	case PACKET3_MEM_WRITE:
4535 	case PACKET3_PFP_SYNC_ME:
4536 	case PACKET3_SURFACE_SYNC:
4537 	case PACKET3_EVENT_WRITE:
4538 	case PACKET3_EVENT_WRITE_EOP:
4539 	case PACKET3_EVENT_WRITE_EOS:
4540 	case PACKET3_SET_CONTEXT_REG:
4541 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4542 	case PACKET3_SET_SH_REG:
4543 	case PACKET3_SET_SH_REG_OFFSET:
4544 	case PACKET3_INCREMENT_DE_COUNTER:
4545 	case PACKET3_WAIT_ON_CE_COUNTER:
4546 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4547 	case PACKET3_ME_WRITE:
4548 		break;
4549 	case PACKET3_COPY_DATA:
4550 		if ((idx_value & 0xf00) == 0) {
4551 			reg = ib[idx + 3] * 4;
4552 			if (!si_vm_reg_valid(reg))
4553 				return -EINVAL;
4554 		}
4555 		break;
4556 	case PACKET3_WRITE_DATA:
4557 		if ((idx_value & 0xf00) == 0) {
4558 			start_reg = ib[idx + 1] * 4;
4559 			if (idx_value & 0x10000) {
4560 				if (!si_vm_reg_valid(start_reg))
4561 					return -EINVAL;
4562 			} else {
4563 				for (i = 0; i < (pkt->count - 2); i++) {
4564 					reg = start_reg + (4 * i);
4565 					if (!si_vm_reg_valid(reg))
4566 						return -EINVAL;
4567 				}
4568 			}
4569 		}
4570 		break;
4571 	case PACKET3_COND_WRITE:
4572 		if (idx_value & 0x100) {
4573 			reg = ib[idx + 5] * 4;
4574 			if (!si_vm_reg_valid(reg))
4575 				return -EINVAL;
4576 		}
4577 		break;
4578 	case PACKET3_COPY_DW:
4579 		if (idx_value & 0x2) {
4580 			reg = ib[idx + 3] * 4;
4581 			if (!si_vm_reg_valid(reg))
4582 				return -EINVAL;
4583 		}
4584 		break;
4585 	case PACKET3_SET_CONFIG_REG:
4586 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4587 		end_reg = 4 * pkt->count + start_reg - 4;
4588 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4589 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4590 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4591 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4592 			return -EINVAL;
4593 		}
4594 		for (i = 0; i < pkt->count; i++) {
4595 			reg = start_reg + (4 * i);
4596 			if (!si_vm_reg_valid(reg))
4597 				return -EINVAL;
4598 		}
4599 		break;
4600 	case PACKET3_CP_DMA:
4601 		r = si_vm_packet3_cp_dma_check(ib, idx);
4602 		if (r)
4603 			return r;
4604 		break;
4605 	default:
4606 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4607 		return -EINVAL;
4608 	}
4609 	return 0;
4610 }
4611 
4612 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4613 				       u32 *ib, struct radeon_cs_packet *pkt)
4614 {
4615 	int r;
4616 	u32 idx = pkt->idx + 1;
4617 	u32 idx_value = ib[idx];
4618 	u32 start_reg, reg, i;
4619 
4620 	switch (pkt->opcode) {
4621 	case PACKET3_NOP:
4622 	case PACKET3_SET_BASE:
4623 	case PACKET3_CLEAR_STATE:
4624 	case PACKET3_DISPATCH_DIRECT:
4625 	case PACKET3_DISPATCH_INDIRECT:
4626 	case PACKET3_ALLOC_GDS:
4627 	case PACKET3_WRITE_GDS_RAM:
4628 	case PACKET3_ATOMIC_GDS:
4629 	case PACKET3_ATOMIC:
4630 	case PACKET3_OCCLUSION_QUERY:
4631 	case PACKET3_SET_PREDICATION:
4632 	case PACKET3_COND_EXEC:
4633 	case PACKET3_PRED_EXEC:
4634 	case PACKET3_CONTEXT_CONTROL:
4635 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4636 	case PACKET3_WAIT_REG_MEM:
4637 	case PACKET3_MEM_WRITE:
4638 	case PACKET3_PFP_SYNC_ME:
4639 	case PACKET3_SURFACE_SYNC:
4640 	case PACKET3_EVENT_WRITE:
4641 	case PACKET3_EVENT_WRITE_EOP:
4642 	case PACKET3_EVENT_WRITE_EOS:
4643 	case PACKET3_SET_CONTEXT_REG:
4644 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4645 	case PACKET3_SET_SH_REG:
4646 	case PACKET3_SET_SH_REG_OFFSET:
4647 	case PACKET3_INCREMENT_DE_COUNTER:
4648 	case PACKET3_WAIT_ON_CE_COUNTER:
4649 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4650 	case PACKET3_ME_WRITE:
4651 		break;
4652 	case PACKET3_COPY_DATA:
4653 		if ((idx_value & 0xf00) == 0) {
4654 			reg = ib[idx + 3] * 4;
4655 			if (!si_vm_reg_valid(reg))
4656 				return -EINVAL;
4657 		}
4658 		break;
4659 	case PACKET3_WRITE_DATA:
4660 		if ((idx_value & 0xf00) == 0) {
4661 			start_reg = ib[idx + 1] * 4;
4662 			if (idx_value & 0x10000) {
4663 				if (!si_vm_reg_valid(start_reg))
4664 					return -EINVAL;
4665 			} else {
4666 				for (i = 0; i < (pkt->count - 2); i++) {
4667 					reg = start_reg + (4 * i);
4668 					if (!si_vm_reg_valid(reg))
4669 						return -EINVAL;
4670 				}
4671 			}
4672 		}
4673 		break;
4674 	case PACKET3_COND_WRITE:
4675 		if (idx_value & 0x100) {
4676 			reg = ib[idx + 5] * 4;
4677 			if (!si_vm_reg_valid(reg))
4678 				return -EINVAL;
4679 		}
4680 		break;
4681 	case PACKET3_COPY_DW:
4682 		if (idx_value & 0x2) {
4683 			reg = ib[idx + 3] * 4;
4684 			if (!si_vm_reg_valid(reg))
4685 				return -EINVAL;
4686 		}
4687 		break;
4688 	case PACKET3_CP_DMA:
4689 		r = si_vm_packet3_cp_dma_check(ib, idx);
4690 		if (r)
4691 			return r;
4692 		break;
4693 	default:
4694 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4695 		return -EINVAL;
4696 	}
4697 	return 0;
4698 }
4699 
4700 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4701 {
4702 	int ret = 0;
4703 	u32 idx = 0, i;
4704 	struct radeon_cs_packet pkt;
4705 
4706 	do {
4707 		pkt.idx = idx;
4708 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4709 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4710 		pkt.one_reg_wr = 0;
4711 		switch (pkt.type) {
4712 		case RADEON_PACKET_TYPE0:
4713 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4714 			for (i = 0; i < ib->length_dw; i++) {
4715 				if (i == idx)
4716 					printk("\t0x%08x <---\n", ib->ptr[i]);
4717 				else
4718 					printk("\t0x%08x\n", ib->ptr[i]);
4719 			}
4720 			ret = -EINVAL;
4721 			break;
4722 		case RADEON_PACKET_TYPE2:
4723 			idx += 1;
4724 			break;
4725 		case RADEON_PACKET_TYPE3:
4726 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4727 			if (ib->is_const_ib)
4728 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4729 			else {
4730 				switch (ib->ring) {
4731 				case RADEON_RING_TYPE_GFX_INDEX:
4732 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4733 					break;
4734 				case CAYMAN_RING_TYPE_CP1_INDEX:
4735 				case CAYMAN_RING_TYPE_CP2_INDEX:
4736 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4737 					break;
4738 				default:
4739 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4740 					ret = -EINVAL;
4741 					break;
4742 				}
4743 			}
4744 			idx += pkt.count + 2;
4745 			break;
4746 		default:
4747 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4748 			ret = -EINVAL;
4749 			break;
4750 		}
4751 		if (ret)
4752 			break;
4753 	} while (idx < ib->length_dw);
4754 
4755 	return ret;
4756 }
4757 
4758 /*
4759  * vm
4760  */
4761 int si_vm_init(struct radeon_device *rdev)
4762 {
4763 	/* number of VMs */
4764 	rdev->vm_manager.nvm = 16;
4765 	/* base offset of vram pages */
4766 	rdev->vm_manager.vram_base_offset = 0;
4767 
4768 	return 0;
4769 }
4770 
4771 void si_vm_fini(struct radeon_device *rdev)
4772 {
4773 }
4774 
4775 /**
4776  * si_vm_decode_fault - print human readable fault info
4777  *
4778  * @rdev: radeon_device pointer
4779  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4780  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4781  *
4782  * Print human readable fault information (SI).
4783  */
4784 static void si_vm_decode_fault(struct radeon_device *rdev,
4785 			       u32 status, u32 addr)
4786 {
4787 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4788 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4789 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4790 	char *block;
4791 
4792 	if (rdev->family == CHIP_TAHITI) {
4793 		switch (mc_id) {
4794 		case 160:
4795 		case 144:
4796 		case 96:
4797 		case 80:
4798 		case 224:
4799 		case 208:
4800 		case 32:
4801 		case 16:
4802 			block = "CB";
4803 			break;
4804 		case 161:
4805 		case 145:
4806 		case 97:
4807 		case 81:
4808 		case 225:
4809 		case 209:
4810 		case 33:
4811 		case 17:
4812 			block = "CB_FMASK";
4813 			break;
4814 		case 162:
4815 		case 146:
4816 		case 98:
4817 		case 82:
4818 		case 226:
4819 		case 210:
4820 		case 34:
4821 		case 18:
4822 			block = "CB_CMASK";
4823 			break;
4824 		case 163:
4825 		case 147:
4826 		case 99:
4827 		case 83:
4828 		case 227:
4829 		case 211:
4830 		case 35:
4831 		case 19:
4832 			block = "CB_IMMED";
4833 			break;
4834 		case 164:
4835 		case 148:
4836 		case 100:
4837 		case 84:
4838 		case 228:
4839 		case 212:
4840 		case 36:
4841 		case 20:
4842 			block = "DB";
4843 			break;
4844 		case 165:
4845 		case 149:
4846 		case 101:
4847 		case 85:
4848 		case 229:
4849 		case 213:
4850 		case 37:
4851 		case 21:
4852 			block = "DB_HTILE";
4853 			break;
4854 		case 167:
4855 		case 151:
4856 		case 103:
4857 		case 87:
4858 		case 231:
4859 		case 215:
4860 		case 39:
4861 		case 23:
4862 			block = "DB_STEN";
4863 			break;
4864 		case 72:
4865 		case 68:
4866 		case 64:
4867 		case 8:
4868 		case 4:
4869 		case 0:
4870 		case 136:
4871 		case 132:
4872 		case 128:
4873 		case 200:
4874 		case 196:
4875 		case 192:
4876 			block = "TC";
4877 			break;
4878 		case 112:
4879 		case 48:
4880 			block = "CP";
4881 			break;
4882 		case 49:
4883 		case 177:
4884 		case 50:
4885 		case 178:
4886 			block = "SH";
4887 			break;
4888 		case 53:
4889 		case 190:
4890 			block = "VGT";
4891 			break;
4892 		case 117:
4893 			block = "IH";
4894 			break;
4895 		case 51:
4896 		case 115:
4897 			block = "RLC";
4898 			break;
4899 		case 119:
4900 		case 183:
4901 			block = "DMA0";
4902 			break;
4903 		case 61:
4904 			block = "DMA1";
4905 			break;
4906 		case 248:
4907 		case 120:
4908 			block = "HDP";
4909 			break;
4910 		default:
4911 			block = "unknown";
4912 			break;
4913 		}
4914 	} else {
4915 		switch (mc_id) {
4916 		case 32:
4917 		case 16:
4918 		case 96:
4919 		case 80:
4920 		case 160:
4921 		case 144:
4922 		case 224:
4923 		case 208:
4924 			block = "CB";
4925 			break;
4926 		case 33:
4927 		case 17:
4928 		case 97:
4929 		case 81:
4930 		case 161:
4931 		case 145:
4932 		case 225:
4933 		case 209:
4934 			block = "CB_FMASK";
4935 			break;
4936 		case 34:
4937 		case 18:
4938 		case 98:
4939 		case 82:
4940 		case 162:
4941 		case 146:
4942 		case 226:
4943 		case 210:
4944 			block = "CB_CMASK";
4945 			break;
4946 		case 35:
4947 		case 19:
4948 		case 99:
4949 		case 83:
4950 		case 163:
4951 		case 147:
4952 		case 227:
4953 		case 211:
4954 			block = "CB_IMMED";
4955 			break;
4956 		case 36:
4957 		case 20:
4958 		case 100:
4959 		case 84:
4960 		case 164:
4961 		case 148:
4962 		case 228:
4963 		case 212:
4964 			block = "DB";
4965 			break;
4966 		case 37:
4967 		case 21:
4968 		case 101:
4969 		case 85:
4970 		case 165:
4971 		case 149:
4972 		case 229:
4973 		case 213:
4974 			block = "DB_HTILE";
4975 			break;
4976 		case 39:
4977 		case 23:
4978 		case 103:
4979 		case 87:
4980 		case 167:
4981 		case 151:
4982 		case 231:
4983 		case 215:
4984 			block = "DB_STEN";
4985 			break;
4986 		case 72:
4987 		case 68:
4988 		case 8:
4989 		case 4:
4990 		case 136:
4991 		case 132:
4992 		case 200:
4993 		case 196:
4994 			block = "TC";
4995 			break;
4996 		case 112:
4997 		case 48:
4998 			block = "CP";
4999 			break;
5000 		case 49:
5001 		case 177:
5002 		case 50:
5003 		case 178:
5004 			block = "SH";
5005 			break;
5006 		case 53:
5007 			block = "VGT";
5008 			break;
5009 		case 117:
5010 			block = "IH";
5011 			break;
5012 		case 51:
5013 		case 115:
5014 			block = "RLC";
5015 			break;
5016 		case 119:
5017 		case 183:
5018 			block = "DMA0";
5019 			break;
5020 		case 61:
5021 			block = "DMA1";
5022 			break;
5023 		case 248:
5024 		case 120:
5025 			block = "HDP";
5026 			break;
5027 		default:
5028 			block = "unknown";
5029 			break;
5030 		}
5031 	}
5032 
5033 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5034 	       protections, vmid, addr,
5035 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5036 	       block, mc_id);
5037 }
5038 
5039 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5040 		 unsigned vm_id, uint64_t pd_addr)
5041 {
5042 	/* write new base address */
5043 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5044 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5045 				 WRITE_DATA_DST_SEL(0)));
5046 
5047 	if (vm_id < 8) {
5048 		radeon_ring_write(ring,
5049 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5050 	} else {
5051 		radeon_ring_write(ring,
5052 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5053 	}
5054 	radeon_ring_write(ring, 0);
5055 	radeon_ring_write(ring, pd_addr >> 12);
5056 
5057 	/* flush hdp cache */
5058 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5059 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5060 				 WRITE_DATA_DST_SEL(0)));
5061 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5062 	radeon_ring_write(ring, 0);
5063 	radeon_ring_write(ring, 0x1);
5064 
5065 	/* bits 0-15 are the VM contexts0-15 */
5066 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5067 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5068 				 WRITE_DATA_DST_SEL(0)));
5069 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5070 	radeon_ring_write(ring, 0);
5071 	radeon_ring_write(ring, 1 << vm_id);
5072 
5073 	/* wait for the invalidate to complete */
5074 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5075 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5076 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5077 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5078 	radeon_ring_write(ring, 0);
5079 	radeon_ring_write(ring, 0); /* ref */
5080 	radeon_ring_write(ring, 0); /* mask */
5081 	radeon_ring_write(ring, 0x20); /* poll interval */
5082 
5083 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5084 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5085 	radeon_ring_write(ring, 0x0);
5086 }
5087 
5088 /*
5089  *  Power and clock gating
5090  */
5091 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5092 {
5093 	int i;
5094 
5095 	for (i = 0; i < rdev->usec_timeout; i++) {
5096 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5097 			break;
5098 		udelay(1);
5099 	}
5100 
5101 	for (i = 0; i < rdev->usec_timeout; i++) {
5102 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5103 			break;
5104 		udelay(1);
5105 	}
5106 }
5107 
5108 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5109 					 bool enable)
5110 {
5111 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5112 	u32 mask;
5113 	int i;
5114 
5115 	if (enable)
5116 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5117 	else
5118 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5119 	WREG32(CP_INT_CNTL_RING0, tmp);
5120 
5121 	if (!enable) {
5122 		/* read a gfx register */
5123 		tmp = RREG32(DB_DEPTH_INFO);
5124 
5125 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5126 		for (i = 0; i < rdev->usec_timeout; i++) {
5127 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5128 				break;
5129 			udelay(1);
5130 		}
5131 	}
5132 }
5133 
5134 static void si_set_uvd_dcm(struct radeon_device *rdev,
5135 			   bool sw_mode)
5136 {
5137 	u32 tmp, tmp2;
5138 
5139 	tmp = RREG32(UVD_CGC_CTRL);
5140 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5141 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5142 
5143 	if (sw_mode) {
5144 		tmp &= ~0x7ffff800;
5145 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5146 	} else {
5147 		tmp |= 0x7ffff800;
5148 		tmp2 = 0;
5149 	}
5150 
5151 	WREG32(UVD_CGC_CTRL, tmp);
5152 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5153 }
5154 
5155 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5156 {
5157 	bool hw_mode = true;
5158 
5159 	if (hw_mode) {
5160 		si_set_uvd_dcm(rdev, false);
5161 	} else {
5162 		u32 tmp = RREG32(UVD_CGC_CTRL);
5163 		tmp &= ~DCM;
5164 		WREG32(UVD_CGC_CTRL, tmp);
5165 	}
5166 }
5167 
5168 static u32 si_halt_rlc(struct radeon_device *rdev)
5169 {
5170 	u32 data, orig;
5171 
5172 	orig = data = RREG32(RLC_CNTL);
5173 
5174 	if (data & RLC_ENABLE) {
5175 		data &= ~RLC_ENABLE;
5176 		WREG32(RLC_CNTL, data);
5177 
5178 		si_wait_for_rlc_serdes(rdev);
5179 	}
5180 
5181 	return orig;
5182 }
5183 
5184 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5185 {
5186 	u32 tmp;
5187 
5188 	tmp = RREG32(RLC_CNTL);
5189 	if (tmp != rlc)
5190 		WREG32(RLC_CNTL, rlc);
5191 }
5192 
5193 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5194 {
5195 	u32 data, orig;
5196 
5197 	orig = data = RREG32(DMA_PG);
5198 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5199 		data |= PG_CNTL_ENABLE;
5200 	else
5201 		data &= ~PG_CNTL_ENABLE;
5202 	if (orig != data)
5203 		WREG32(DMA_PG, data);
5204 }
5205 
5206 static void si_init_dma_pg(struct radeon_device *rdev)
5207 {
5208 	u32 tmp;
5209 
5210 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5211 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5212 
5213 	for (tmp = 0; tmp < 5; tmp++)
5214 		WREG32(DMA_PGFSM_WRITE, 0);
5215 }
5216 
5217 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5218 			       bool enable)
5219 {
5220 	u32 tmp;
5221 
5222 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5223 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5224 		WREG32(RLC_TTOP_D, tmp);
5225 
5226 		tmp = RREG32(RLC_PG_CNTL);
5227 		tmp |= GFX_PG_ENABLE;
5228 		WREG32(RLC_PG_CNTL, tmp);
5229 
5230 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5231 		tmp |= AUTO_PG_EN;
5232 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5233 	} else {
5234 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5235 		tmp &= ~AUTO_PG_EN;
5236 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5237 
5238 		tmp = RREG32(DB_RENDER_CONTROL);
5239 	}
5240 }
5241 
5242 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5243 {
5244 	u32 tmp;
5245 
5246 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5247 
5248 	tmp = RREG32(RLC_PG_CNTL);
5249 	tmp |= GFX_PG_SRC;
5250 	WREG32(RLC_PG_CNTL, tmp);
5251 
5252 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5253 
5254 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5255 
5256 	tmp &= ~GRBM_REG_SGIT_MASK;
5257 	tmp |= GRBM_REG_SGIT(0x700);
5258 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5259 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5260 }
5261 
5262 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5263 {
5264 	u32 mask = 0, tmp, tmp1;
5265 	int i;
5266 
5267 	si_select_se_sh(rdev, se, sh);
5268 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5269 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5270 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 
5272 	tmp &= 0xffff0000;
5273 
5274 	tmp |= tmp1;
5275 	tmp >>= 16;
5276 
5277 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5278 		mask <<= 1;
5279 		mask |= 1;
5280 	}
5281 
5282 	return (~tmp) & mask;
5283 }
5284 
5285 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5286 {
5287 	u32 i, j, k, active_cu_number = 0;
5288 	u32 mask, counter, cu_bitmap;
5289 	u32 tmp = 0;
5290 
5291 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5292 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5293 			mask = 1;
5294 			cu_bitmap = 0;
5295 			counter  = 0;
5296 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5297 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5298 					if (counter < 2)
5299 						cu_bitmap |= mask;
5300 					counter++;
5301 				}
5302 				mask <<= 1;
5303 			}
5304 
5305 			active_cu_number += counter;
5306 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5307 		}
5308 	}
5309 
5310 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5311 
5312 	tmp = RREG32(RLC_MAX_PG_CU);
5313 	tmp &= ~MAX_PU_CU_MASK;
5314 	tmp |= MAX_PU_CU(active_cu_number);
5315 	WREG32(RLC_MAX_PG_CU, tmp);
5316 }
5317 
5318 static void si_enable_cgcg(struct radeon_device *rdev,
5319 			   bool enable)
5320 {
5321 	u32 data, orig, tmp;
5322 
5323 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5324 
5325 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5326 		si_enable_gui_idle_interrupt(rdev, true);
5327 
5328 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5329 
5330 		tmp = si_halt_rlc(rdev);
5331 
5332 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5333 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5334 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5335 
5336 		si_wait_for_rlc_serdes(rdev);
5337 
5338 		si_update_rlc(rdev, tmp);
5339 
5340 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5341 
5342 		data |= CGCG_EN | CGLS_EN;
5343 	} else {
5344 		si_enable_gui_idle_interrupt(rdev, false);
5345 
5346 		RREG32(CB_CGTT_SCLK_CTRL);
5347 		RREG32(CB_CGTT_SCLK_CTRL);
5348 		RREG32(CB_CGTT_SCLK_CTRL);
5349 		RREG32(CB_CGTT_SCLK_CTRL);
5350 
5351 		data &= ~(CGCG_EN | CGLS_EN);
5352 	}
5353 
5354 	if (orig != data)
5355 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5356 }
5357 
5358 static void si_enable_mgcg(struct radeon_device *rdev,
5359 			   bool enable)
5360 {
5361 	u32 data, orig, tmp = 0;
5362 
5363 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5364 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5365 		data = 0x96940200;
5366 		if (orig != data)
5367 			WREG32(CGTS_SM_CTRL_REG, data);
5368 
5369 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5370 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5371 			data |= CP_MEM_LS_EN;
5372 			if (orig != data)
5373 				WREG32(CP_MEM_SLP_CNTL, data);
5374 		}
5375 
5376 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5377 		data &= 0xffffffc0;
5378 		if (orig != data)
5379 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5380 
5381 		tmp = si_halt_rlc(rdev);
5382 
5383 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5384 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5385 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5386 
5387 		si_update_rlc(rdev, tmp);
5388 	} else {
5389 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5390 		data |= 0x00000003;
5391 		if (orig != data)
5392 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5393 
5394 		data = RREG32(CP_MEM_SLP_CNTL);
5395 		if (data & CP_MEM_LS_EN) {
5396 			data &= ~CP_MEM_LS_EN;
5397 			WREG32(CP_MEM_SLP_CNTL, data);
5398 		}
5399 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5400 		data |= LS_OVERRIDE | OVERRIDE;
5401 		if (orig != data)
5402 			WREG32(CGTS_SM_CTRL_REG, data);
5403 
5404 		tmp = si_halt_rlc(rdev);
5405 
5406 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5407 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5408 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5409 
5410 		si_update_rlc(rdev, tmp);
5411 	}
5412 }
5413 
5414 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5415 			       bool enable)
5416 {
5417 	u32 orig, data, tmp;
5418 
5419 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5420 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5421 		tmp |= 0x3fff;
5422 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5423 
5424 		orig = data = RREG32(UVD_CGC_CTRL);
5425 		data |= DCM;
5426 		if (orig != data)
5427 			WREG32(UVD_CGC_CTRL, data);
5428 
5429 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5430 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5431 	} else {
5432 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5433 		tmp &= ~0x3fff;
5434 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5435 
5436 		orig = data = RREG32(UVD_CGC_CTRL);
5437 		data &= ~DCM;
5438 		if (orig != data)
5439 			WREG32(UVD_CGC_CTRL, data);
5440 
5441 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5442 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5443 	}
5444 }
5445 
5446 static const u32 mc_cg_registers[] =
5447 {
5448 	MC_HUB_MISC_HUB_CG,
5449 	MC_HUB_MISC_SIP_CG,
5450 	MC_HUB_MISC_VM_CG,
5451 	MC_XPB_CLK_GAT,
5452 	ATC_MISC_CG,
5453 	MC_CITF_MISC_WR_CG,
5454 	MC_CITF_MISC_RD_CG,
5455 	MC_CITF_MISC_VM_CG,
5456 	VM_L2_CG,
5457 };
5458 
5459 static void si_enable_mc_ls(struct radeon_device *rdev,
5460 			    bool enable)
5461 {
5462 	int i;
5463 	u32 orig, data;
5464 
5465 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5466 		orig = data = RREG32(mc_cg_registers[i]);
5467 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5468 			data |= MC_LS_ENABLE;
5469 		else
5470 			data &= ~MC_LS_ENABLE;
5471 		if (data != orig)
5472 			WREG32(mc_cg_registers[i], data);
5473 	}
5474 }
5475 
5476 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5477 			       bool enable)
5478 {
5479 	int i;
5480 	u32 orig, data;
5481 
5482 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5483 		orig = data = RREG32(mc_cg_registers[i]);
5484 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5485 			data |= MC_CG_ENABLE;
5486 		else
5487 			data &= ~MC_CG_ENABLE;
5488 		if (data != orig)
5489 			WREG32(mc_cg_registers[i], data);
5490 	}
5491 }
5492 
5493 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5494 			       bool enable)
5495 {
5496 	u32 orig, data, offset;
5497 	int i;
5498 
5499 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5500 		for (i = 0; i < 2; i++) {
5501 			if (i == 0)
5502 				offset = DMA0_REGISTER_OFFSET;
5503 			else
5504 				offset = DMA1_REGISTER_OFFSET;
5505 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5506 			data &= ~MEM_POWER_OVERRIDE;
5507 			if (data != orig)
5508 				WREG32(DMA_POWER_CNTL + offset, data);
5509 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5510 		}
5511 	} else {
5512 		for (i = 0; i < 2; i++) {
5513 			if (i == 0)
5514 				offset = DMA0_REGISTER_OFFSET;
5515 			else
5516 				offset = DMA1_REGISTER_OFFSET;
5517 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5518 			data |= MEM_POWER_OVERRIDE;
5519 			if (data != orig)
5520 				WREG32(DMA_POWER_CNTL + offset, data);
5521 
5522 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5523 			data = 0xff000000;
5524 			if (data != orig)
5525 				WREG32(DMA_CLK_CTRL + offset, data);
5526 		}
5527 	}
5528 }
5529 
5530 static void si_enable_bif_mgls(struct radeon_device *rdev,
5531 			       bool enable)
5532 {
5533 	u32 orig, data;
5534 
5535 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5536 
5537 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5538 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5539 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5540 	else
5541 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5542 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5543 
5544 	if (orig != data)
5545 		WREG32_PCIE(PCIE_CNTL2, data);
5546 }
5547 
5548 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5549 			       bool enable)
5550 {
5551 	u32 orig, data;
5552 
5553 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5554 
5555 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5556 		data &= ~CLOCK_GATING_DIS;
5557 	else
5558 		data |= CLOCK_GATING_DIS;
5559 
5560 	if (orig != data)
5561 		WREG32(HDP_HOST_PATH_CNTL, data);
5562 }
5563 
5564 static void si_enable_hdp_ls(struct radeon_device *rdev,
5565 			     bool enable)
5566 {
5567 	u32 orig, data;
5568 
5569 	orig = data = RREG32(HDP_MEM_POWER_LS);
5570 
5571 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5572 		data |= HDP_LS_ENABLE;
5573 	else
5574 		data &= ~HDP_LS_ENABLE;
5575 
5576 	if (orig != data)
5577 		WREG32(HDP_MEM_POWER_LS, data);
5578 }
5579 
5580 static void si_update_cg(struct radeon_device *rdev,
5581 			 u32 block, bool enable)
5582 {
5583 	if (block & RADEON_CG_BLOCK_GFX) {
5584 		si_enable_gui_idle_interrupt(rdev, false);
5585 		/* order matters! */
5586 		if (enable) {
5587 			si_enable_mgcg(rdev, true);
5588 			si_enable_cgcg(rdev, true);
5589 		} else {
5590 			si_enable_cgcg(rdev, false);
5591 			si_enable_mgcg(rdev, false);
5592 		}
5593 		si_enable_gui_idle_interrupt(rdev, true);
5594 	}
5595 
5596 	if (block & RADEON_CG_BLOCK_MC) {
5597 		si_enable_mc_mgcg(rdev, enable);
5598 		si_enable_mc_ls(rdev, enable);
5599 	}
5600 
5601 	if (block & RADEON_CG_BLOCK_SDMA) {
5602 		si_enable_dma_mgcg(rdev, enable);
5603 	}
5604 
5605 	if (block & RADEON_CG_BLOCK_BIF) {
5606 		si_enable_bif_mgls(rdev, enable);
5607 	}
5608 
5609 	if (block & RADEON_CG_BLOCK_UVD) {
5610 		if (rdev->has_uvd) {
5611 			si_enable_uvd_mgcg(rdev, enable);
5612 		}
5613 	}
5614 
5615 	if (block & RADEON_CG_BLOCK_HDP) {
5616 		si_enable_hdp_mgcg(rdev, enable);
5617 		si_enable_hdp_ls(rdev, enable);
5618 	}
5619 }
5620 
5621 static void si_init_cg(struct radeon_device *rdev)
5622 {
5623 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5624 			    RADEON_CG_BLOCK_MC |
5625 			    RADEON_CG_BLOCK_SDMA |
5626 			    RADEON_CG_BLOCK_BIF |
5627 			    RADEON_CG_BLOCK_HDP), true);
5628 	if (rdev->has_uvd) {
5629 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5630 		si_init_uvd_internal_cg(rdev);
5631 	}
5632 }
5633 
5634 static void si_fini_cg(struct radeon_device *rdev)
5635 {
5636 	if (rdev->has_uvd) {
5637 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5638 	}
5639 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5640 			    RADEON_CG_BLOCK_MC |
5641 			    RADEON_CG_BLOCK_SDMA |
5642 			    RADEON_CG_BLOCK_BIF |
5643 			    RADEON_CG_BLOCK_HDP), false);
5644 }
5645 
5646 u32 si_get_csb_size(struct radeon_device *rdev)
5647 {
5648 	u32 count = 0;
5649 	const struct cs_section_def *sect = NULL;
5650 	const struct cs_extent_def *ext = NULL;
5651 
5652 	if (rdev->rlc.cs_data == NULL)
5653 		return 0;
5654 
5655 	/* begin clear state */
5656 	count += 2;
5657 	/* context control state */
5658 	count += 3;
5659 
5660 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5661 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5662 			if (sect->id == SECT_CONTEXT)
5663 				count += 2 + ext->reg_count;
5664 			else
5665 				return 0;
5666 		}
5667 	}
5668 	/* pa_sc_raster_config */
5669 	count += 3;
5670 	/* end clear state */
5671 	count += 2;
5672 	/* clear state */
5673 	count += 2;
5674 
5675 	return count;
5676 }
5677 
5678 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5679 {
5680 	u32 count = 0, i;
5681 	const struct cs_section_def *sect = NULL;
5682 	const struct cs_extent_def *ext = NULL;
5683 
5684 	if (rdev->rlc.cs_data == NULL)
5685 		return;
5686 	if (buffer == NULL)
5687 		return;
5688 
5689 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5690 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5691 
5692 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5693 	buffer[count++] = cpu_to_le32(0x80000000);
5694 	buffer[count++] = cpu_to_le32(0x80000000);
5695 
5696 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5697 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5698 			if (sect->id == SECT_CONTEXT) {
5699 				buffer[count++] =
5700 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5701 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5702 				for (i = 0; i < ext->reg_count; i++)
5703 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5704 			} else {
5705 				return;
5706 			}
5707 		}
5708 	}
5709 
5710 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5711 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5712 	switch (rdev->family) {
5713 	case CHIP_TAHITI:
5714 	case CHIP_PITCAIRN:
5715 		buffer[count++] = cpu_to_le32(0x2a00126a);
5716 		break;
5717 	case CHIP_VERDE:
5718 		buffer[count++] = cpu_to_le32(0x0000124a);
5719 		break;
5720 	case CHIP_OLAND:
5721 		buffer[count++] = cpu_to_le32(0x00000082);
5722 		break;
5723 	case CHIP_HAINAN:
5724 		buffer[count++] = cpu_to_le32(0x00000000);
5725 		break;
5726 	default:
5727 		buffer[count++] = cpu_to_le32(0x00000000);
5728 		break;
5729 	}
5730 
5731 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5732 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5733 
5734 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5735 	buffer[count++] = cpu_to_le32(0);
5736 }
5737 
5738 static void si_init_pg(struct radeon_device *rdev)
5739 {
5740 	if (rdev->pg_flags) {
5741 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5742 			si_init_dma_pg(rdev);
5743 		}
5744 		si_init_ao_cu_mask(rdev);
5745 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5746 			si_init_gfx_cgpg(rdev);
5747 		} else {
5748 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5749 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5750 		}
5751 		si_enable_dma_pg(rdev, true);
5752 		si_enable_gfx_cgpg(rdev, true);
5753 	} else {
5754 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5755 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5756 	}
5757 }
5758 
5759 static void si_fini_pg(struct radeon_device *rdev)
5760 {
5761 	if (rdev->pg_flags) {
5762 		si_enable_dma_pg(rdev, false);
5763 		si_enable_gfx_cgpg(rdev, false);
5764 	}
5765 }
5766 
5767 /*
5768  * RLC
5769  */
5770 void si_rlc_reset(struct radeon_device *rdev)
5771 {
5772 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5773 
5774 	tmp |= SOFT_RESET_RLC;
5775 	WREG32(GRBM_SOFT_RESET, tmp);
5776 	udelay(50);
5777 	tmp &= ~SOFT_RESET_RLC;
5778 	WREG32(GRBM_SOFT_RESET, tmp);
5779 	udelay(50);
5780 }
5781 
5782 static void si_rlc_stop(struct radeon_device *rdev)
5783 {
5784 	WREG32(RLC_CNTL, 0);
5785 
5786 	si_enable_gui_idle_interrupt(rdev, false);
5787 
5788 	si_wait_for_rlc_serdes(rdev);
5789 }
5790 
5791 static void si_rlc_start(struct radeon_device *rdev)
5792 {
5793 	WREG32(RLC_CNTL, RLC_ENABLE);
5794 
5795 	si_enable_gui_idle_interrupt(rdev, true);
5796 
5797 	udelay(50);
5798 }
5799 
5800 static bool si_lbpw_supported(struct radeon_device *rdev)
5801 {
5802 	u32 tmp;
5803 
5804 	/* Enable LBPW only for DDR3 */
5805 	tmp = RREG32(MC_SEQ_MISC0);
5806 	if ((tmp & 0xF0000000) == 0xB0000000)
5807 		return true;
5808 	return false;
5809 }
5810 
5811 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5812 {
5813 	u32 tmp;
5814 
5815 	tmp = RREG32(RLC_LB_CNTL);
5816 	if (enable)
5817 		tmp |= LOAD_BALANCE_ENABLE;
5818 	else
5819 		tmp &= ~LOAD_BALANCE_ENABLE;
5820 	WREG32(RLC_LB_CNTL, tmp);
5821 
5822 	if (!enable) {
5823 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5824 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5825 	}
5826 }
5827 
5828 static int si_rlc_resume(struct radeon_device *rdev)
5829 {
5830 	u32 i;
5831 
5832 	if (!rdev->rlc_fw)
5833 		return -EINVAL;
5834 
5835 	si_rlc_stop(rdev);
5836 
5837 	si_rlc_reset(rdev);
5838 
5839 	si_init_pg(rdev);
5840 
5841 	si_init_cg(rdev);
5842 
5843 	WREG32(RLC_RL_BASE, 0);
5844 	WREG32(RLC_RL_SIZE, 0);
5845 	WREG32(RLC_LB_CNTL, 0);
5846 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5847 	WREG32(RLC_LB_CNTR_INIT, 0);
5848 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5849 
5850 	WREG32(RLC_MC_CNTL, 0);
5851 	WREG32(RLC_UCODE_CNTL, 0);
5852 
5853 	if (rdev->new_fw) {
5854 		const struct rlc_firmware_header_v1_0 *hdr =
5855 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5856 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5857 		const __le32 *fw_data = (const __le32 *)
5858 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5859 
5860 		radeon_ucode_print_rlc_hdr(&hdr->header);
5861 
5862 		for (i = 0; i < fw_size; i++) {
5863 			WREG32(RLC_UCODE_ADDR, i);
5864 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5865 		}
5866 	} else {
5867 		const __be32 *fw_data =
5868 			(const __be32 *)rdev->rlc_fw->data;
5869 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5870 			WREG32(RLC_UCODE_ADDR, i);
5871 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5872 		}
5873 	}
5874 	WREG32(RLC_UCODE_ADDR, 0);
5875 
5876 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5877 
5878 	si_rlc_start(rdev);
5879 
5880 	return 0;
5881 }
5882 
5883 static void si_enable_interrupts(struct radeon_device *rdev)
5884 {
5885 	u32 ih_cntl = RREG32(IH_CNTL);
5886 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5887 
5888 	ih_cntl |= ENABLE_INTR;
5889 	ih_rb_cntl |= IH_RB_ENABLE;
5890 	WREG32(IH_CNTL, ih_cntl);
5891 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5892 	rdev->ih.enabled = true;
5893 }
5894 
5895 static void si_disable_interrupts(struct radeon_device *rdev)
5896 {
5897 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5898 	u32 ih_cntl = RREG32(IH_CNTL);
5899 
5900 	ih_rb_cntl &= ~IH_RB_ENABLE;
5901 	ih_cntl &= ~ENABLE_INTR;
5902 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5903 	WREG32(IH_CNTL, ih_cntl);
5904 	/* set rptr, wptr to 0 */
5905 	WREG32(IH_RB_RPTR, 0);
5906 	WREG32(IH_RB_WPTR, 0);
5907 	rdev->ih.enabled = false;
5908 	rdev->ih.rptr = 0;
5909 }
5910 
5911 static void si_disable_interrupt_state(struct radeon_device *rdev)
5912 {
5913 	u32 tmp;
5914 
5915 	tmp = RREG32(CP_INT_CNTL_RING0) &
5916 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5917 	WREG32(CP_INT_CNTL_RING0, tmp);
5918 	WREG32(CP_INT_CNTL_RING1, 0);
5919 	WREG32(CP_INT_CNTL_RING2, 0);
5920 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5921 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5922 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5923 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5924 	WREG32(GRBM_INT_CNTL, 0);
5925 	if (rdev->num_crtc >= 2) {
5926 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5927 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5928 	}
5929 	if (rdev->num_crtc >= 4) {
5930 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5931 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5932 	}
5933 	if (rdev->num_crtc >= 6) {
5934 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5935 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5936 	}
5937 
5938 	if (rdev->num_crtc >= 2) {
5939 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5940 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5941 	}
5942 	if (rdev->num_crtc >= 4) {
5943 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5944 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5945 	}
5946 	if (rdev->num_crtc >= 6) {
5947 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5948 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5949 	}
5950 
5951 	if (!ASIC_IS_NODCE(rdev)) {
5952 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5953 
5954 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5956 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5958 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5959 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5960 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5961 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5962 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5963 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5964 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5965 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5966 	}
5967 }
5968 
5969 static int si_irq_init(struct radeon_device *rdev)
5970 {
5971 	int ret = 0;
5972 	int rb_bufsz;
5973 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5974 
5975 	/* allocate ring */
5976 	ret = r600_ih_ring_alloc(rdev);
5977 	if (ret)
5978 		return ret;
5979 
5980 	/* disable irqs */
5981 	si_disable_interrupts(rdev);
5982 
5983 	/* init rlc */
5984 	ret = si_rlc_resume(rdev);
5985 	if (ret) {
5986 		r600_ih_ring_fini(rdev);
5987 		return ret;
5988 	}
5989 
5990 	/* setup interrupt control */
5991 	/* set dummy read address to ring address */
5992 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5993 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5994 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5995 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5996 	 */
5997 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5998 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5999 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6000 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6001 
6002 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6003 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6004 
6005 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6006 		      IH_WPTR_OVERFLOW_CLEAR |
6007 		      (rb_bufsz << 1));
6008 
6009 	if (rdev->wb.enabled)
6010 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6011 
6012 	/* set the writeback address whether it's enabled or not */
6013 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6014 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6015 
6016 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6017 
6018 	/* set rptr, wptr to 0 */
6019 	WREG32(IH_RB_RPTR, 0);
6020 	WREG32(IH_RB_WPTR, 0);
6021 
6022 	/* Default settings for IH_CNTL (disabled at first) */
6023 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6024 	/* RPTR_REARM only works if msi's are enabled */
6025 	if (rdev->msi_enabled)
6026 		ih_cntl |= RPTR_REARM;
6027 	WREG32(IH_CNTL, ih_cntl);
6028 
6029 	/* force the active interrupt state to all disabled */
6030 	si_disable_interrupt_state(rdev);
6031 
6032 	pci_enable_busmaster(rdev->dev->bsddev);
6033 
6034 	/* enable irqs */
6035 	si_enable_interrupts(rdev);
6036 
6037 	return ret;
6038 }
6039 
6040 int si_irq_set(struct radeon_device *rdev)
6041 {
6042 	u32 cp_int_cntl;
6043 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6044 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6045 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6046 	u32 grbm_int_cntl = 0;
6047 	u32 dma_cntl, dma_cntl1;
6048 	u32 thermal_int = 0;
6049 
6050 	if (!rdev->irq.installed) {
6051 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6052 		return -EINVAL;
6053 	}
6054 	/* don't enable anything if the ih is disabled */
6055 	if (!rdev->ih.enabled) {
6056 		si_disable_interrupts(rdev);
6057 		/* force the active interrupt state to all disabled */
6058 		si_disable_interrupt_state(rdev);
6059 		return 0;
6060 	}
6061 
6062 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6063 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6064 
6065 	if (!ASIC_IS_NODCE(rdev)) {
6066 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6067 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6068 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6069 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6070 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6071 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6072 	}
6073 
6074 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6075 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6076 
6077 	thermal_int = RREG32(CG_THERMAL_INT) &
6078 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6079 
6080 	/* enable CP interrupts on all rings */
6081 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6083 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6084 	}
6085 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6087 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6088 	}
6089 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6090 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6091 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6092 	}
6093 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6094 		DRM_DEBUG("si_irq_set: sw int dma\n");
6095 		dma_cntl |= TRAP_ENABLE;
6096 	}
6097 
6098 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6099 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6100 		dma_cntl1 |= TRAP_ENABLE;
6101 	}
6102 	if (rdev->irq.crtc_vblank_int[0] ||
6103 	    atomic_read(&rdev->irq.pflip[0])) {
6104 		DRM_DEBUG("si_irq_set: vblank 0\n");
6105 		crtc1 |= VBLANK_INT_MASK;
6106 	}
6107 	if (rdev->irq.crtc_vblank_int[1] ||
6108 	    atomic_read(&rdev->irq.pflip[1])) {
6109 		DRM_DEBUG("si_irq_set: vblank 1\n");
6110 		crtc2 |= VBLANK_INT_MASK;
6111 	}
6112 	if (rdev->irq.crtc_vblank_int[2] ||
6113 	    atomic_read(&rdev->irq.pflip[2])) {
6114 		DRM_DEBUG("si_irq_set: vblank 2\n");
6115 		crtc3 |= VBLANK_INT_MASK;
6116 	}
6117 	if (rdev->irq.crtc_vblank_int[3] ||
6118 	    atomic_read(&rdev->irq.pflip[3])) {
6119 		DRM_DEBUG("si_irq_set: vblank 3\n");
6120 		crtc4 |= VBLANK_INT_MASK;
6121 	}
6122 	if (rdev->irq.crtc_vblank_int[4] ||
6123 	    atomic_read(&rdev->irq.pflip[4])) {
6124 		DRM_DEBUG("si_irq_set: vblank 4\n");
6125 		crtc5 |= VBLANK_INT_MASK;
6126 	}
6127 	if (rdev->irq.crtc_vblank_int[5] ||
6128 	    atomic_read(&rdev->irq.pflip[5])) {
6129 		DRM_DEBUG("si_irq_set: vblank 5\n");
6130 		crtc6 |= VBLANK_INT_MASK;
6131 	}
6132 	if (rdev->irq.hpd[0]) {
6133 		DRM_DEBUG("si_irq_set: hpd 1\n");
6134 		hpd1 |= DC_HPDx_INT_EN;
6135 	}
6136 	if (rdev->irq.hpd[1]) {
6137 		DRM_DEBUG("si_irq_set: hpd 2\n");
6138 		hpd2 |= DC_HPDx_INT_EN;
6139 	}
6140 	if (rdev->irq.hpd[2]) {
6141 		DRM_DEBUG("si_irq_set: hpd 3\n");
6142 		hpd3 |= DC_HPDx_INT_EN;
6143 	}
6144 	if (rdev->irq.hpd[3]) {
6145 		DRM_DEBUG("si_irq_set: hpd 4\n");
6146 		hpd4 |= DC_HPDx_INT_EN;
6147 	}
6148 	if (rdev->irq.hpd[4]) {
6149 		DRM_DEBUG("si_irq_set: hpd 5\n");
6150 		hpd5 |= DC_HPDx_INT_EN;
6151 	}
6152 	if (rdev->irq.hpd[5]) {
6153 		DRM_DEBUG("si_irq_set: hpd 6\n");
6154 		hpd6 |= DC_HPDx_INT_EN;
6155 	}
6156 
6157 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6158 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6159 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6160 
6161 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6162 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6163 
6164 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6165 
6166 	if (rdev->irq.dpm_thermal) {
6167 		DRM_DEBUG("dpm thermal\n");
6168 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6169 	}
6170 
6171 	if (rdev->num_crtc >= 2) {
6172 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6173 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6174 	}
6175 	if (rdev->num_crtc >= 4) {
6176 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6177 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6178 	}
6179 	if (rdev->num_crtc >= 6) {
6180 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6181 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6182 	}
6183 
6184 	if (rdev->num_crtc >= 2) {
6185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6186 		       GRPH_PFLIP_INT_MASK);
6187 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6188 		       GRPH_PFLIP_INT_MASK);
6189 	}
6190 	if (rdev->num_crtc >= 4) {
6191 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6192 		       GRPH_PFLIP_INT_MASK);
6193 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6194 		       GRPH_PFLIP_INT_MASK);
6195 	}
6196 	if (rdev->num_crtc >= 6) {
6197 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6198 		       GRPH_PFLIP_INT_MASK);
6199 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6200 		       GRPH_PFLIP_INT_MASK);
6201 	}
6202 
6203 	if (!ASIC_IS_NODCE(rdev)) {
6204 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6205 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6206 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6207 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6208 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6209 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6210 	}
6211 
6212 	WREG32(CG_THERMAL_INT, thermal_int);
6213 
6214 	/* posting read */
6215 	RREG32(SRBM_STATUS);
6216 
6217 	return 0;
6218 }
6219 
6220 static inline void si_irq_ack(struct radeon_device *rdev)
6221 {
6222 	u32 tmp;
6223 
6224 	if (ASIC_IS_NODCE(rdev))
6225 		return;
6226 
6227 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6228 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6229 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6230 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6231 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6232 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6233 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6234 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6235 	if (rdev->num_crtc >= 4) {
6236 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6237 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6238 	}
6239 	if (rdev->num_crtc >= 6) {
6240 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6241 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6242 	}
6243 
6244 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6245 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6246 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6247 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6248 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6249 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6250 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6251 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6252 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6253 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6254 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6255 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6256 
6257 	if (rdev->num_crtc >= 4) {
6258 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6259 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6260 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6261 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6262 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6263 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6264 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6265 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6266 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6267 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6268 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6269 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6270 	}
6271 
6272 	if (rdev->num_crtc >= 6) {
6273 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6274 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6275 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6276 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6277 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6278 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6279 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6280 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6281 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6282 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6283 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6284 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6285 	}
6286 
6287 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6288 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6289 		tmp |= DC_HPDx_INT_ACK;
6290 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6291 	}
6292 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6293 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6294 		tmp |= DC_HPDx_INT_ACK;
6295 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6296 	}
6297 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6298 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6299 		tmp |= DC_HPDx_INT_ACK;
6300 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6301 	}
6302 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6303 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6304 		tmp |= DC_HPDx_INT_ACK;
6305 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6306 	}
6307 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6308 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6309 		tmp |= DC_HPDx_INT_ACK;
6310 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6311 	}
6312 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6313 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6314 		tmp |= DC_HPDx_INT_ACK;
6315 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6316 	}
6317 }
6318 
6319 static void si_irq_disable(struct radeon_device *rdev)
6320 {
6321 	si_disable_interrupts(rdev);
6322 	/* Wait and acknowledge irq */
6323 	mdelay(1);
6324 	si_irq_ack(rdev);
6325 	si_disable_interrupt_state(rdev);
6326 }
6327 
6328 static void si_irq_suspend(struct radeon_device *rdev)
6329 {
6330 	si_irq_disable(rdev);
6331 	si_rlc_stop(rdev);
6332 }
6333 
6334 static void si_irq_fini(struct radeon_device *rdev)
6335 {
6336 	si_irq_suspend(rdev);
6337 	r600_ih_ring_fini(rdev);
6338 }
6339 
6340 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6341 {
6342 	u32 wptr, tmp;
6343 
6344 	if (rdev->wb.enabled)
6345 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6346 	else
6347 		wptr = RREG32(IH_RB_WPTR);
6348 
6349 	if (wptr & RB_OVERFLOW) {
6350 		wptr &= ~RB_OVERFLOW;
6351 		/* When a ring buffer overflow happen start parsing interrupt
6352 		 * from the last not overwritten vector (wptr + 16). Hopefully
6353 		 * this should allow us to catchup.
6354 		 */
6355 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6356 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6357 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6358 		tmp = RREG32(IH_RB_CNTL);
6359 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6360 		WREG32(IH_RB_CNTL, tmp);
6361 	}
6362 	return (wptr & rdev->ih.ptr_mask);
6363 }
6364 
6365 /*        SI IV Ring
6366  * Each IV ring entry is 128 bits:
6367  * [7:0]    - interrupt source id
6368  * [31:8]   - reserved
6369  * [59:32]  - interrupt source data
6370  * [63:60]  - reserved
6371  * [71:64]  - RINGID
6372  * [79:72]  - VMID
6373  * [127:80] - reserved
6374  */
6375 irqreturn_t si_irq_process(struct radeon_device *rdev)
6376 {
6377 	u32 wptr;
6378 	u32 rptr;
6379 	u32 src_id, src_data, ring_id;
6380 	u32 ring_index;
6381 	bool queue_hotplug = false;
6382 	bool queue_thermal = false;
6383 	u32 status, addr;
6384 
6385 	if (!rdev->ih.enabled || rdev->shutdown)
6386 		return IRQ_NONE;
6387 
6388 	wptr = si_get_ih_wptr(rdev);
6389 
6390 restart_ih:
6391 	/* is somebody else already processing irqs? */
6392 	if (atomic_xchg(&rdev->ih.lock, 1))
6393 		return IRQ_NONE;
6394 
6395 	rptr = rdev->ih.rptr;
6396 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6397 
6398 	/* Order reading of wptr vs. reading of IH ring data */
6399 	rmb();
6400 
6401 	/* display interrupts */
6402 	si_irq_ack(rdev);
6403 
6404 	while (rptr != wptr) {
6405 		/* wptr/rptr are in bytes! */
6406 		ring_index = rptr / 4;
6407 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6408 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6409 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6410 
6411 		switch (src_id) {
6412 		case 1: /* D1 vblank/vline */
6413 			switch (src_data) {
6414 			case 0: /* D1 vblank */
6415 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6416 					if (rdev->irq.crtc_vblank_int[0]) {
6417 						drm_handle_vblank(rdev->ddev, 0);
6418 						rdev->pm.vblank_sync = true;
6419 						wake_up(&rdev->irq.vblank_queue);
6420 					}
6421 					if (atomic_read(&rdev->irq.pflip[0]))
6422 						radeon_crtc_handle_vblank(rdev, 0);
6423 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6424 					DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6425 				}
6426 				break;
6427 			case 1: /* D1 vline */
6428 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6429 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6430 					DRM_DEBUG_VBLANK("IH: D1 vline\n");
6431 				}
6432 				break;
6433 			default:
6434 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6435 				break;
6436 			}
6437 			break;
6438 		case 2: /* D2 vblank/vline */
6439 			switch (src_data) {
6440 			case 0: /* D2 vblank */
6441 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6442 					if (rdev->irq.crtc_vblank_int[1]) {
6443 						drm_handle_vblank(rdev->ddev, 1);
6444 						rdev->pm.vblank_sync = true;
6445 						wake_up(&rdev->irq.vblank_queue);
6446 					}
6447 					if (atomic_read(&rdev->irq.pflip[1]))
6448 						radeon_crtc_handle_vblank(rdev, 1);
6449 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6450 					DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6451 				}
6452 				break;
6453 			case 1: /* D2 vline */
6454 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6455 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6456 					DRM_DEBUG_VBLANK("IH: D2 vline\n");
6457 				}
6458 				break;
6459 			default:
6460 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6461 				break;
6462 			}
6463 			break;
6464 		case 3: /* D3 vblank/vline */
6465 			switch (src_data) {
6466 			case 0: /* D3 vblank */
6467 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6468 					if (rdev->irq.crtc_vblank_int[2]) {
6469 						drm_handle_vblank(rdev->ddev, 2);
6470 						rdev->pm.vblank_sync = true;
6471 						wake_up(&rdev->irq.vblank_queue);
6472 					}
6473 					if (atomic_read(&rdev->irq.pflip[2]))
6474 						radeon_crtc_handle_vblank(rdev, 2);
6475 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6476 					DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6477 				}
6478 				break;
6479 			case 1: /* D3 vline */
6480 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6481 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6482 					DRM_DEBUG_VBLANK("IH: D3 vline\n");
6483 				}
6484 				break;
6485 			default:
6486 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6487 				break;
6488 			}
6489 			break;
6490 		case 4: /* D4 vblank/vline */
6491 			switch (src_data) {
6492 			case 0: /* D4 vblank */
6493 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6494 					if (rdev->irq.crtc_vblank_int[3]) {
6495 						drm_handle_vblank(rdev->ddev, 3);
6496 						rdev->pm.vblank_sync = true;
6497 						wake_up(&rdev->irq.vblank_queue);
6498 					}
6499 					if (atomic_read(&rdev->irq.pflip[3]))
6500 						radeon_crtc_handle_vblank(rdev, 3);
6501 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6502 					DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6503 				}
6504 				break;
6505 			case 1: /* D4 vline */
6506 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6507 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6508 					DRM_DEBUG_VBLANK("IH: D4 vline\n");
6509 				}
6510 				break;
6511 			default:
6512 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6513 				break;
6514 			}
6515 			break;
6516 		case 5: /* D5 vblank/vline */
6517 			switch (src_data) {
6518 			case 0: /* D5 vblank */
6519 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6520 					if (rdev->irq.crtc_vblank_int[4]) {
6521 						drm_handle_vblank(rdev->ddev, 4);
6522 						rdev->pm.vblank_sync = true;
6523 						wake_up(&rdev->irq.vblank_queue);
6524 					}
6525 					if (atomic_read(&rdev->irq.pflip[4]))
6526 						radeon_crtc_handle_vblank(rdev, 4);
6527 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6528 					DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6529 				}
6530 				break;
6531 			case 1: /* D5 vline */
6532 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6533 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6534 					DRM_DEBUG_VBLANK("IH: D5 vline\n");
6535 				}
6536 				break;
6537 			default:
6538 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6539 				break;
6540 			}
6541 			break;
6542 		case 6: /* D6 vblank/vline */
6543 			switch (src_data) {
6544 			case 0: /* D6 vblank */
6545 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6546 					if (rdev->irq.crtc_vblank_int[5]) {
6547 						drm_handle_vblank(rdev->ddev, 5);
6548 						rdev->pm.vblank_sync = true;
6549 						wake_up(&rdev->irq.vblank_queue);
6550 					}
6551 					if (atomic_read(&rdev->irq.pflip[5]))
6552 						radeon_crtc_handle_vblank(rdev, 5);
6553 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6554 					DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6555 				}
6556 				break;
6557 			case 1: /* D6 vline */
6558 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6559 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6560 					DRM_DEBUG_VBLANK("IH: D6 vline\n");
6561 				}
6562 				break;
6563 			default:
6564 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6565 				break;
6566 			}
6567 			break;
6568 		case 8: /* D1 page flip */
6569 		case 10: /* D2 page flip */
6570 		case 12: /* D3 page flip */
6571 		case 14: /* D4 page flip */
6572 		case 16: /* D5 page flip */
6573 		case 18: /* D6 page flip */
6574 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6575 			if (radeon_use_pflipirq > 0)
6576 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6577 			break;
6578 		case 42: /* HPD hotplug */
6579 			switch (src_data) {
6580 			case 0:
6581 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6582 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6583 					queue_hotplug = true;
6584 					DRM_DEBUG("IH: HPD1\n");
6585 				}
6586 				break;
6587 			case 1:
6588 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6589 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6590 					queue_hotplug = true;
6591 					DRM_DEBUG("IH: HPD2\n");
6592 				}
6593 				break;
6594 			case 2:
6595 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6596 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6597 					queue_hotplug = true;
6598 					DRM_DEBUG("IH: HPD3\n");
6599 				}
6600 				break;
6601 			case 3:
6602 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6603 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6604 					queue_hotplug = true;
6605 					DRM_DEBUG("IH: HPD4\n");
6606 				}
6607 				break;
6608 			case 4:
6609 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6610 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6611 					queue_hotplug = true;
6612 					DRM_DEBUG("IH: HPD5\n");
6613 				}
6614 				break;
6615 			case 5:
6616 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6617 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6618 					queue_hotplug = true;
6619 					DRM_DEBUG("IH: HPD6\n");
6620 				}
6621 				break;
6622 			default:
6623 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6624 				break;
6625 			}
6626 			break;
6627 		case 124: /* UVD */
6628 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6629 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6630 			break;
6631 		case 146:
6632 		case 147:
6633 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6634 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6635 			/* reset addr and status */
6636 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6637 			if (addr == 0x0 && status == 0x0)
6638 				break;
6639 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6640 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6641 				addr);
6642 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6643 				status);
6644 			si_vm_decode_fault(rdev, status, addr);
6645 			break;
6646 		case 176: /* RINGID0 CP_INT */
6647 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6648 			break;
6649 		case 177: /* RINGID1 CP_INT */
6650 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6651 			break;
6652 		case 178: /* RINGID2 CP_INT */
6653 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6654 			break;
6655 		case 181: /* CP EOP event */
6656 			DRM_DEBUG("IH: CP EOP\n");
6657 			switch (ring_id) {
6658 			case 0:
6659 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6660 				break;
6661 			case 1:
6662 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6663 				break;
6664 			case 2:
6665 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6666 				break;
6667 			}
6668 			break;
6669 		case 224: /* DMA trap event */
6670 			DRM_DEBUG("IH: DMA trap\n");
6671 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6672 			break;
6673 		case 230: /* thermal low to high */
6674 			DRM_DEBUG("IH: thermal low to high\n");
6675 			rdev->pm.dpm.thermal.high_to_low = false;
6676 			queue_thermal = true;
6677 			break;
6678 		case 231: /* thermal high to low */
6679 			DRM_DEBUG("IH: thermal high to low\n");
6680 			rdev->pm.dpm.thermal.high_to_low = true;
6681 			queue_thermal = true;
6682 			break;
6683 		case 233: /* GUI IDLE */
6684 			DRM_DEBUG("IH: GUI idle\n");
6685 			break;
6686 		case 244: /* DMA trap event */
6687 			DRM_DEBUG("IH: DMA1 trap\n");
6688 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6689 			break;
6690 		default:
6691 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6692 			break;
6693 		}
6694 
6695 		/* wptr/rptr are in bytes! */
6696 		rptr += 16;
6697 		rptr &= rdev->ih.ptr_mask;
6698 		WREG32(IH_RB_RPTR, rptr);
6699 	}
6700 	if (queue_hotplug)
6701 		schedule_work(&rdev->hotplug_work);
6702 	if (queue_thermal && rdev->pm.dpm_enabled)
6703 		schedule_work(&rdev->pm.dpm.thermal.work);
6704 	rdev->ih.rptr = rptr;
6705 	atomic_set(&rdev->ih.lock, 0);
6706 
6707 	/* make sure wptr hasn't changed while processing */
6708 	wptr = si_get_ih_wptr(rdev);
6709 	if (wptr != rptr)
6710 		goto restart_ih;
6711 
6712 	return IRQ_HANDLED;
6713 }
6714 
6715 /*
6716  * startup/shutdown callbacks
6717  */
6718 static int si_startup(struct radeon_device *rdev)
6719 {
6720 	struct radeon_ring *ring;
6721 	int r;
6722 
6723 	/* enable pcie gen2/3 link */
6724 	si_pcie_gen3_enable(rdev);
6725 	/* enable aspm */
6726 	si_program_aspm(rdev);
6727 
6728 	/* scratch needs to be initialized before MC */
6729 	r = r600_vram_scratch_init(rdev);
6730 	if (r)
6731 		return r;
6732 
6733 	si_mc_program(rdev);
6734 
6735 	if (!rdev->pm.dpm_enabled) {
6736 		r = si_mc_load_microcode(rdev);
6737 		if (r) {
6738 			DRM_ERROR("Failed to load MC firmware!\n");
6739 			return r;
6740 		}
6741 	}
6742 
6743 	r = si_pcie_gart_enable(rdev);
6744 	if (r)
6745 		return r;
6746 	si_gpu_init(rdev);
6747 
6748 	/* allocate rlc buffers */
6749 	if (rdev->family == CHIP_VERDE) {
6750 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6751 		rdev->rlc.reg_list_size =
6752 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6753 	}
6754 	rdev->rlc.cs_data = si_cs_data;
6755 	r = sumo_rlc_init(rdev);
6756 	if (r) {
6757 		DRM_ERROR("Failed to init rlc BOs!\n");
6758 		return r;
6759 	}
6760 
6761 	/* allocate wb buffer */
6762 	r = radeon_wb_init(rdev);
6763 	if (r)
6764 		return r;
6765 
6766 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6767 	if (r) {
6768 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6769 		return r;
6770 	}
6771 
6772 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6773 	if (r) {
6774 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6775 		return r;
6776 	}
6777 
6778 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6779 	if (r) {
6780 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6781 		return r;
6782 	}
6783 
6784 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6785 	if (r) {
6786 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6787 		return r;
6788 	}
6789 
6790 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6791 	if (r) {
6792 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6793 		return r;
6794 	}
6795 
6796 	if (rdev->has_uvd) {
6797 		r = uvd_v2_2_resume(rdev);
6798 		if (!r) {
6799 			r = radeon_fence_driver_start_ring(rdev,
6800 							   R600_RING_TYPE_UVD_INDEX);
6801 			if (r)
6802 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6803 		}
6804 		if (r)
6805 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6806 	}
6807 
6808 	/* Enable IRQ */
6809 	if (!rdev->irq.installed) {
6810 		r = radeon_irq_kms_init(rdev);
6811 		if (r)
6812 			return r;
6813 	}
6814 
6815 	r = si_irq_init(rdev);
6816 	if (r) {
6817 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6818 		radeon_irq_kms_fini(rdev);
6819 		return r;
6820 	}
6821 	si_irq_set(rdev);
6822 
6823 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6824 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6825 			     RADEON_CP_PACKET2);
6826 	if (r)
6827 		return r;
6828 
6829 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6830 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6831 			     RADEON_CP_PACKET2);
6832 	if (r)
6833 		return r;
6834 
6835 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6836 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6837 			     RADEON_CP_PACKET2);
6838 	if (r)
6839 		return r;
6840 
6841 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6842 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6843 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6844 	if (r)
6845 		return r;
6846 
6847 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6848 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6849 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6850 	if (r)
6851 		return r;
6852 
6853 	r = si_cp_load_microcode(rdev);
6854 	if (r)
6855 		return r;
6856 	r = si_cp_resume(rdev);
6857 	if (r)
6858 		return r;
6859 
6860 	r = cayman_dma_resume(rdev);
6861 	if (r)
6862 		return r;
6863 
6864 	if (rdev->has_uvd) {
6865 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6866 		if (ring->ring_size) {
6867 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6868 					     RADEON_CP_PACKET2);
6869 			if (!r)
6870 				r = uvd_v1_0_init(rdev);
6871 			if (r)
6872 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6873 		}
6874 	}
6875 
6876 	r = radeon_ib_pool_init(rdev);
6877 	if (r) {
6878 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6879 		return r;
6880 	}
6881 
6882 	r = radeon_vm_manager_init(rdev);
6883 	if (r) {
6884 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6885 		return r;
6886 	}
6887 
6888 	r = dce6_audio_init(rdev);
6889 	if (r)
6890 		return r;
6891 
6892 	return 0;
6893 }
6894 
6895 int si_resume(struct radeon_device *rdev)
6896 {
6897 	int r;
6898 
6899 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6900 	 * posting will perform necessary task to bring back GPU into good
6901 	 * shape.
6902 	 */
6903 	/* post card */
6904 	atom_asic_init(rdev->mode_info.atom_context);
6905 
6906 	/* init golden registers */
6907 	si_init_golden_registers(rdev);
6908 
6909 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6910 		radeon_pm_resume(rdev);
6911 
6912 	rdev->accel_working = true;
6913 	r = si_startup(rdev);
6914 	if (r) {
6915 		DRM_ERROR("si startup failed on resume\n");
6916 		rdev->accel_working = false;
6917 		return r;
6918 	}
6919 
6920 	return r;
6921 
6922 }
6923 
6924 int si_suspend(struct radeon_device *rdev)
6925 {
6926 	radeon_pm_suspend(rdev);
6927 	dce6_audio_fini(rdev);
6928 	radeon_vm_manager_fini(rdev);
6929 	si_cp_enable(rdev, false);
6930 	cayman_dma_stop(rdev);
6931 	if (rdev->has_uvd) {
6932 		uvd_v1_0_fini(rdev);
6933 		radeon_uvd_suspend(rdev);
6934 	}
6935 	si_fini_pg(rdev);
6936 	si_fini_cg(rdev);
6937 	si_irq_suspend(rdev);
6938 	radeon_wb_disable(rdev);
6939 	si_pcie_gart_disable(rdev);
6940 	return 0;
6941 }
6942 
6943 /* Plan is to move initialization in that function and use
6944  * helper function so that radeon_device_init pretty much
6945  * do nothing more than calling asic specific function. This
6946  * should also allow to remove a bunch of callback function
6947  * like vram_info.
6948  */
6949 int si_init(struct radeon_device *rdev)
6950 {
6951 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6952 	int r;
6953 
6954 	/* Read BIOS */
6955 	if (!radeon_get_bios(rdev)) {
6956 		if (ASIC_IS_AVIVO(rdev))
6957 			return -EINVAL;
6958 	}
6959 	/* Must be an ATOMBIOS */
6960 	if (!rdev->is_atom_bios) {
6961 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6962 		return -EINVAL;
6963 	}
6964 	r = radeon_atombios_init(rdev);
6965 	if (r)
6966 		return r;
6967 
6968 	/* Post card if necessary */
6969 	if (!radeon_card_posted(rdev)) {
6970 		if (!rdev->bios) {
6971 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6972 			return -EINVAL;
6973 		}
6974 		DRM_INFO("GPU not posted. posting now...\n");
6975 		atom_asic_init(rdev->mode_info.atom_context);
6976 	}
6977 	/* init golden registers */
6978 	si_init_golden_registers(rdev);
6979 	/* Initialize scratch registers */
6980 	si_scratch_init(rdev);
6981 	/* Initialize surface registers */
6982 	radeon_surface_init(rdev);
6983 	/* Initialize clocks */
6984 	radeon_get_clock_info(rdev->ddev);
6985 
6986 	/* Fence driver */
6987 	r = radeon_fence_driver_init(rdev);
6988 	if (r)
6989 		return r;
6990 
6991 	/* initialize memory controller */
6992 	r = si_mc_init(rdev);
6993 	if (r)
6994 		return r;
6995 	/* Memory manager */
6996 	r = radeon_bo_init(rdev);
6997 	if (r)
6998 		return r;
6999 
7000 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7001 	    !rdev->rlc_fw || !rdev->mc_fw) {
7002 		r = si_init_microcode(rdev);
7003 		if (r) {
7004 			DRM_ERROR("Failed to load firmware!\n");
7005 			return r;
7006 		}
7007 	}
7008 
7009 	/* Initialize power management */
7010 	radeon_pm_init(rdev);
7011 
7012 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7013 	ring->ring_obj = NULL;
7014 	r600_ring_init(rdev, ring, 1024 * 1024);
7015 
7016 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7017 	ring->ring_obj = NULL;
7018 	r600_ring_init(rdev, ring, 1024 * 1024);
7019 
7020 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7021 	ring->ring_obj = NULL;
7022 	r600_ring_init(rdev, ring, 1024 * 1024);
7023 
7024 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7025 	ring->ring_obj = NULL;
7026 	r600_ring_init(rdev, ring, 64 * 1024);
7027 
7028 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7029 	ring->ring_obj = NULL;
7030 	r600_ring_init(rdev, ring, 64 * 1024);
7031 
7032 	if (rdev->has_uvd) {
7033 		r = radeon_uvd_init(rdev);
7034 		if (!r) {
7035 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7036 			ring->ring_obj = NULL;
7037 			r600_ring_init(rdev, ring, 4096);
7038 		}
7039 	}
7040 
7041 	rdev->ih.ring_obj = NULL;
7042 	r600_ih_ring_init(rdev, 64 * 1024);
7043 
7044 	r = r600_pcie_gart_init(rdev);
7045 	if (r)
7046 		return r;
7047 
7048 	rdev->accel_working = true;
7049 	r = si_startup(rdev);
7050 	if (r) {
7051 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7052 		si_cp_fini(rdev);
7053 		cayman_dma_fini(rdev);
7054 		si_irq_fini(rdev);
7055 		sumo_rlc_fini(rdev);
7056 		radeon_wb_fini(rdev);
7057 		radeon_ib_pool_fini(rdev);
7058 		radeon_vm_manager_fini(rdev);
7059 		radeon_irq_kms_fini(rdev);
7060 		si_pcie_gart_fini(rdev);
7061 		rdev->accel_working = false;
7062 	}
7063 
7064 	/* Don't start up if the MC ucode is missing.
7065 	 * The default clocks and voltages before the MC ucode
7066 	 * is loaded are not suffient for advanced operations.
7067 	 */
7068 	if (!rdev->mc_fw) {
7069 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7070 		return -EINVAL;
7071 	}
7072 
7073 	return 0;
7074 }
7075 
7076 void si_fini(struct radeon_device *rdev)
7077 {
7078 	radeon_pm_fini(rdev);
7079 	si_cp_fini(rdev);
7080 	cayman_dma_fini(rdev);
7081 	si_fini_pg(rdev);
7082 	si_fini_cg(rdev);
7083 	si_irq_fini(rdev);
7084 	sumo_rlc_fini(rdev);
7085 	radeon_wb_fini(rdev);
7086 	radeon_vm_manager_fini(rdev);
7087 	radeon_ib_pool_fini(rdev);
7088 	radeon_irq_kms_fini(rdev);
7089 	if (rdev->has_uvd) {
7090 		uvd_v1_0_fini(rdev);
7091 		radeon_uvd_fini(rdev);
7092 	}
7093 	si_pcie_gart_fini(rdev);
7094 	r600_vram_scratch_fini(rdev);
7095 	radeon_gem_fini(rdev);
7096 	radeon_fence_driver_fini(rdev);
7097 	radeon_bo_fini(rdev);
7098 	radeon_atombios_fini(rdev);
7099 	si_fini_microcode(rdev);
7100 	kfree(rdev->bios);
7101 	rdev->bios = NULL;
7102 }
7103 
7104 /**
7105  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7106  *
7107  * @rdev: radeon_device pointer
7108  *
7109  * Fetches a GPU clock counter snapshot (SI).
7110  * Returns the 64 bit clock counter snapshot.
7111  */
7112 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7113 {
7114 	uint64_t clock;
7115 
7116 	mutex_lock(&rdev->gpu_clock_mutex);
7117 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7118 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7119 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7120 	mutex_unlock(&rdev->gpu_clock_mutex);
7121 	return clock;
7122 }
7123 
7124 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7125 {
7126 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7127 	int r;
7128 
7129 	/* bypass vclk and dclk with bclk */
7130 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7131 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7132 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7133 
7134 	/* put PLL in bypass mode */
7135 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7136 
7137 	if (!vclk || !dclk) {
7138 		/* keep the Bypass mode */
7139 		return 0;
7140 	}
7141 
7142 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7143 					  16384, 0x03FFFFFF, 0, 128, 5,
7144 					  &fb_div, &vclk_div, &dclk_div);
7145 	if (r)
7146 		return r;
7147 
7148 	/* set RESET_ANTI_MUX to 0 */
7149 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7150 
7151 	/* set VCO_MODE to 1 */
7152 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7153 
7154 	/* disable sleep mode */
7155 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7156 
7157 	/* deassert UPLL_RESET */
7158 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7159 
7160 	mdelay(1);
7161 
7162 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7163 	if (r)
7164 		return r;
7165 
7166 	/* assert UPLL_RESET again */
7167 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7168 
7169 	/* disable spread spectrum. */
7170 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7171 
7172 	/* set feedback divider */
7173 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7174 
7175 	/* set ref divider to 0 */
7176 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7177 
7178 	if (fb_div < 307200)
7179 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7180 	else
7181 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7182 
7183 	/* set PDIV_A and PDIV_B */
7184 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7185 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7186 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7187 
7188 	/* give the PLL some time to settle */
7189 	mdelay(15);
7190 
7191 	/* deassert PLL_RESET */
7192 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7193 
7194 	mdelay(15);
7195 
7196 	/* switch from bypass mode to normal mode */
7197 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7198 
7199 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7200 	if (r)
7201 		return r;
7202 
7203 	/* switch VCLK and DCLK selection */
7204 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7205 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7206 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7207 
7208 	mdelay(100);
7209 
7210 	return 0;
7211 }
7212 
7213 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7214 {
7215 	struct pci_dev *root = rdev->pdev->bus->self;
7216 	int bridge_pos, gpu_pos;
7217 	u32 speed_cntl, mask, current_data_rate;
7218 	int ret, i;
7219 	u16 tmp16;
7220 
7221 	if (radeon_pcie_gen2 == 0)
7222 		return;
7223 
7224 	if (rdev->flags & RADEON_IS_IGP)
7225 		return;
7226 
7227 	if (!(rdev->flags & RADEON_IS_PCIE))
7228 		return;
7229 
7230 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7231 	if (ret != 0)
7232 		return;
7233 
7234 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7235 		return;
7236 
7237 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7238 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7239 		LC_CURRENT_DATA_RATE_SHIFT;
7240 	if (mask & DRM_PCIE_SPEED_80) {
7241 		if (current_data_rate == 2) {
7242 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7243 			return;
7244 		}
7245 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7246 	} else if (mask & DRM_PCIE_SPEED_50) {
7247 		if (current_data_rate == 1) {
7248 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7249 			return;
7250 		}
7251 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7252 	}
7253 
7254 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
7255 	if (!bridge_pos)
7256 		return;
7257 
7258 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
7259 	if (!gpu_pos)
7260 		return;
7261 
7262 	if (mask & DRM_PCIE_SPEED_80) {
7263 		/* re-try equalization if gen3 is not already enabled */
7264 		if (current_data_rate != 2) {
7265 			u16 bridge_cfg, gpu_cfg;
7266 			u16 bridge_cfg2, gpu_cfg2;
7267 			u32 max_lw, current_lw, tmp;
7268 
7269 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7270 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7271 
7272 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7273 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7274 
7275 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7276 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7277 
7278 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7279 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7280 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7281 
7282 			if (current_lw < max_lw) {
7283 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7284 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7285 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7286 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7287 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7288 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7289 				}
7290 			}
7291 
7292 			for (i = 0; i < 10; i++) {
7293 				/* check status */
7294 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7295 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7296 					break;
7297 
7298 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7299 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7300 
7301 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7302 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7303 
7304 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7305 				tmp |= LC_SET_QUIESCE;
7306 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7307 
7308 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7309 				tmp |= LC_REDO_EQ;
7310 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7311 
7312 				mdelay(100);
7313 
7314 				/* linkctl */
7315 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7316 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7317 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7318 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7319 
7320 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7321 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7322 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7323 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7324 
7325 				/* linkctl2 */
7326 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7327 				tmp16 &= ~((1 << 4) | (7 << 9));
7328 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7329 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7330 
7331 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7332 				tmp16 &= ~((1 << 4) | (7 << 9));
7333 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7334 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7335 
7336 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7337 				tmp &= ~LC_SET_QUIESCE;
7338 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7339 			}
7340 		}
7341 	}
7342 
7343 	/* set the link speed */
7344 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7345 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7346 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7347 
7348 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7349 	tmp16 &= ~0xf;
7350 	if (mask & DRM_PCIE_SPEED_80)
7351 		tmp16 |= 3; /* gen3 */
7352 	else if (mask & DRM_PCIE_SPEED_50)
7353 		tmp16 |= 2; /* gen2 */
7354 	else
7355 		tmp16 |= 1; /* gen1 */
7356 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7357 
7358 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7359 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7360 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7361 
7362 	for (i = 0; i < rdev->usec_timeout; i++) {
7363 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7364 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7365 			break;
7366 		udelay(1);
7367 	}
7368 }
7369 
7370 static void si_program_aspm(struct radeon_device *rdev)
7371 {
7372 	u32 data, orig;
7373 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7374 	bool disable_clkreq = false;
7375 
7376 	if (radeon_aspm == 0)
7377 		return;
7378 
7379 	if (!(rdev->flags & RADEON_IS_PCIE))
7380 		return;
7381 
7382 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7383 	data &= ~LC_XMIT_N_FTS_MASK;
7384 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7385 	if (orig != data)
7386 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7387 
7388 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7389 	data |= LC_GO_TO_RECOVERY;
7390 	if (orig != data)
7391 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7392 
7393 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7394 	data |= P_IGNORE_EDB_ERR;
7395 	if (orig != data)
7396 		WREG32_PCIE(PCIE_P_CNTL, data);
7397 
7398 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7399 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7400 	data |= LC_PMI_TO_L1_DIS;
7401 	if (!disable_l0s)
7402 		data |= LC_L0S_INACTIVITY(7);
7403 
7404 	if (!disable_l1) {
7405 		data |= LC_L1_INACTIVITY(7);
7406 		data &= ~LC_PMI_TO_L1_DIS;
7407 		if (orig != data)
7408 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7409 
7410 		if (!disable_plloff_in_l1) {
7411 			bool clk_req_support;
7412 
7413 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7414 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7415 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7416 			if (orig != data)
7417 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7418 
7419 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7420 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7421 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7422 			if (orig != data)
7423 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7424 
7425 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7426 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7427 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7428 			if (orig != data)
7429 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7430 
7431 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7432 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7433 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7434 			if (orig != data)
7435 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7436 
7437 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7438 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7439 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7440 				if (orig != data)
7441 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7442 
7443 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7444 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7445 				if (orig != data)
7446 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7447 
7448 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7449 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7450 				if (orig != data)
7451 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7452 
7453 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7454 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7455 				if (orig != data)
7456 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7457 
7458 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7459 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7460 				if (orig != data)
7461 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7462 
7463 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7464 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7465 				if (orig != data)
7466 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7467 
7468 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7469 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7470 				if (orig != data)
7471 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7472 
7473 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7474 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7475 				if (orig != data)
7476 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7477 			}
7478 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7479 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7480 			data |= LC_DYN_LANES_PWR_STATE(3);
7481 			if (orig != data)
7482 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7483 
7484 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7485 			data &= ~LS2_EXIT_TIME_MASK;
7486 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7487 				data |= LS2_EXIT_TIME(5);
7488 			if (orig != data)
7489 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7490 
7491 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7492 			data &= ~LS2_EXIT_TIME_MASK;
7493 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7494 				data |= LS2_EXIT_TIME(5);
7495 			if (orig != data)
7496 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7497 
7498 			if (!disable_clkreq) {
7499 #ifdef zMN_TODO
7500 				struct pci_dev *root = rdev->pdev->bus->self;
7501 				u32 lnkcap;
7502 
7503 				clk_req_support = false;
7504 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7505 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7506 					clk_req_support = true;
7507 #else
7508 				clk_req_support = false;
7509 #endif
7510 			} else {
7511 				clk_req_support = false;
7512 			}
7513 
7514 			if (clk_req_support) {
7515 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7516 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7517 				if (orig != data)
7518 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7519 
7520 				orig = data = RREG32(THM_CLK_CNTL);
7521 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7522 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7523 				if (orig != data)
7524 					WREG32(THM_CLK_CNTL, data);
7525 
7526 				orig = data = RREG32(MISC_CLK_CNTL);
7527 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7528 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7529 				if (orig != data)
7530 					WREG32(MISC_CLK_CNTL, data);
7531 
7532 				orig = data = RREG32(CG_CLKPIN_CNTL);
7533 				data &= ~BCLK_AS_XCLK;
7534 				if (orig != data)
7535 					WREG32(CG_CLKPIN_CNTL, data);
7536 
7537 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7538 				data &= ~FORCE_BIF_REFCLK_EN;
7539 				if (orig != data)
7540 					WREG32(CG_CLKPIN_CNTL_2, data);
7541 
7542 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7543 				data &= ~MPLL_CLKOUT_SEL_MASK;
7544 				data |= MPLL_CLKOUT_SEL(4);
7545 				if (orig != data)
7546 					WREG32(MPLL_BYPASSCLK_SEL, data);
7547 
7548 				orig = data = RREG32(SPLL_CNTL_MODE);
7549 				data &= ~SPLL_REFCLK_SEL_MASK;
7550 				if (orig != data)
7551 					WREG32(SPLL_CNTL_MODE, data);
7552 			}
7553 		}
7554 	} else {
7555 		if (orig != data)
7556 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7557 	}
7558 
7559 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7560 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7561 	if (orig != data)
7562 		WREG32_PCIE(PCIE_CNTL2, data);
7563 
7564 	if (!disable_l0s) {
7565 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7566 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7567 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7568 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7569 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7570 				data &= ~LC_L0S_INACTIVITY_MASK;
7571 				if (orig != data)
7572 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7573 			}
7574 		}
7575 	}
7576 }
7577